diff options
Diffstat (limited to 'fs')
43 files changed, 991 insertions, 470 deletions
@@ -625,7 +625,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
625 | 625 | ||
626 | /* | 626 | /* |
627 | * Add a completion event to the ring buffer. Must be done holding | 627 | * Add a completion event to the ring buffer. Must be done holding |
628 | * ctx->ctx_lock to prevent other code from messing with the tail | 628 | * ctx->completion_lock to prevent other code from messing with the tail |
629 | * pointer since we might be called from irq context. | 629 | * pointer since we might be called from irq context. |
630 | */ | 630 | */ |
631 | spin_lock_irqsave(&ctx->completion_lock, flags); | 631 | spin_lock_irqsave(&ctx->completion_lock, flags); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 431b6a04ebfd..bb43ce081d6e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1562,6 +1562,7 @@ static const struct address_space_operations def_blk_aops = { | |||
1562 | .writepages = generic_writepages, | 1562 | .writepages = generic_writepages, |
1563 | .releasepage = blkdev_releasepage, | 1563 | .releasepage = blkdev_releasepage, |
1564 | .direct_IO = blkdev_direct_IO, | 1564 | .direct_IO = blkdev_direct_IO, |
1565 | .is_dirty_writeback = buffer_check_dirty_writeback, | ||
1565 | }; | 1566 | }; |
1566 | 1567 | ||
1567 | const struct file_operations def_blk_fops = { | 1568 | const struct file_operations def_blk_fops = { |
diff --git a/fs/buffer.c b/fs/buffer.c index f93392e2df12..4d7433534f5c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -83,6 +83,40 @@ void unlock_buffer(struct buffer_head *bh) | |||
83 | EXPORT_SYMBOL(unlock_buffer); | 83 | EXPORT_SYMBOL(unlock_buffer); |
84 | 84 | ||
85 | /* | 85 | /* |
86 | * Returns if the page has dirty or writeback buffers. If all the buffers | ||
87 | * are unlocked and clean then the PageDirty information is stale. If | ||
88 | * any of the pages are locked, it is assumed they are locked for IO. | ||
89 | */ | ||
90 | void buffer_check_dirty_writeback(struct page *page, | ||
91 | bool *dirty, bool *writeback) | ||
92 | { | ||
93 | struct buffer_head *head, *bh; | ||
94 | *dirty = false; | ||
95 | *writeback = false; | ||
96 | |||
97 | BUG_ON(!PageLocked(page)); | ||
98 | |||
99 | if (!page_has_buffers(page)) | ||
100 | return; | ||
101 | |||
102 | if (PageWriteback(page)) | ||
103 | *writeback = true; | ||
104 | |||
105 | head = page_buffers(page); | ||
106 | bh = head; | ||
107 | do { | ||
108 | if (buffer_locked(bh)) | ||
109 | *writeback = true; | ||
110 | |||
111 | if (buffer_dirty(bh)) | ||
112 | *dirty = true; | ||
113 | |||
114 | bh = bh->b_this_page; | ||
115 | } while (bh != head); | ||
116 | } | ||
117 | EXPORT_SYMBOL(buffer_check_dirty_writeback); | ||
118 | |||
119 | /* | ||
86 | * Block until a buffer comes unlocked. This doesn't stop it | 120 | * Block until a buffer comes unlocked. This doesn't stop it |
87 | * from becoming locked again - you have to lock it yourself | 121 | * from becoming locked again - you have to lock it yourself |
88 | * if you want to preserve its state. | 122 | * if you want to preserve its state. |
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 317f9ee9c991..ebaff368120d 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/mount.h> | 12 | #include <linux/mount.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/file.h> | 14 | #include <linux/file.h> |
15 | #include <linux/swap.h> | ||
15 | #include "internal.h" | 16 | #include "internal.h" |
16 | 17 | ||
17 | /* | 18 | /* |
@@ -227,8 +228,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) | |||
227 | */ | 228 | */ |
228 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | 229 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, |
229 | struct fscache_retrieval *op, | 230 | struct fscache_retrieval *op, |
230 | struct page *netpage, | 231 | struct page *netpage) |
231 | struct pagevec *pagevec) | ||
232 | { | 232 | { |
233 | struct cachefiles_one_read *monitor; | 233 | struct cachefiles_one_read *monitor; |
234 | struct address_space *bmapping; | 234 | struct address_space *bmapping; |
@@ -237,8 +237,6 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | |||
237 | 237 | ||
238 | _enter(""); | 238 | _enter(""); |
239 | 239 | ||
240 | pagevec_reinit(pagevec); | ||
241 | |||
242 | _debug("read back %p{%lu,%d}", | 240 | _debug("read back %p{%lu,%d}", |
243 | netpage, netpage->index, page_count(netpage)); | 241 | netpage, netpage->index, page_count(netpage)); |
244 | 242 | ||
@@ -283,9 +281,7 @@ installed_new_backing_page: | |||
283 | backpage = newpage; | 281 | backpage = newpage; |
284 | newpage = NULL; | 282 | newpage = NULL; |
285 | 283 | ||
286 | page_cache_get(backpage); | 284 | lru_cache_add_file(backpage); |
287 | pagevec_add(pagevec, backpage); | ||
288 | __pagevec_lru_add_file(pagevec); | ||
289 | 285 | ||
290 | read_backing_page: | 286 | read_backing_page: |
291 | ret = bmapping->a_ops->readpage(NULL, backpage); | 287 | ret = bmapping->a_ops->readpage(NULL, backpage); |
@@ -452,8 +448,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | |||
452 | if (block) { | 448 | if (block) { |
453 | /* submit the apparently valid page to the backing fs to be | 449 | /* submit the apparently valid page to the backing fs to be |
454 | * read from disk */ | 450 | * read from disk */ |
455 | ret = cachefiles_read_backing_file_one(object, op, page, | 451 | ret = cachefiles_read_backing_file_one(object, op, page); |
456 | &pagevec); | ||
457 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { | 452 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { |
458 | /* there's space in the cache we can use */ | 453 | /* there's space in the cache we can use */ |
459 | fscache_mark_page_cached(op, page); | 454 | fscache_mark_page_cached(op, page); |
@@ -482,14 +477,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
482 | { | 477 | { |
483 | struct cachefiles_one_read *monitor = NULL; | 478 | struct cachefiles_one_read *monitor = NULL; |
484 | struct address_space *bmapping = object->backer->d_inode->i_mapping; | 479 | struct address_space *bmapping = object->backer->d_inode->i_mapping; |
485 | struct pagevec lru_pvec; | ||
486 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; | 480 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; |
487 | int ret = 0; | 481 | int ret = 0; |
488 | 482 | ||
489 | _enter(""); | 483 | _enter(""); |
490 | 484 | ||
491 | pagevec_init(&lru_pvec, 0); | ||
492 | |||
493 | list_for_each_entry_safe(netpage, _n, list, lru) { | 485 | list_for_each_entry_safe(netpage, _n, list, lru) { |
494 | list_del(&netpage->lru); | 486 | list_del(&netpage->lru); |
495 | 487 | ||
@@ -534,9 +526,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
534 | backpage = newpage; | 526 | backpage = newpage; |
535 | newpage = NULL; | 527 | newpage = NULL; |
536 | 528 | ||
537 | page_cache_get(backpage); | 529 | lru_cache_add_file(backpage); |
538 | if (!pagevec_add(&lru_pvec, backpage)) | ||
539 | __pagevec_lru_add_file(&lru_pvec); | ||
540 | 530 | ||
541 | reread_backing_page: | 531 | reread_backing_page: |
542 | ret = bmapping->a_ops->readpage(NULL, backpage); | 532 | ret = bmapping->a_ops->readpage(NULL, backpage); |
@@ -559,9 +549,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
559 | goto nomem; | 549 | goto nomem; |
560 | } | 550 | } |
561 | 551 | ||
562 | page_cache_get(netpage); | 552 | lru_cache_add_file(netpage); |
563 | if (!pagevec_add(&lru_pvec, netpage)) | ||
564 | __pagevec_lru_add_file(&lru_pvec); | ||
565 | 553 | ||
566 | /* install a monitor */ | 554 | /* install a monitor */ |
567 | page_cache_get(netpage); | 555 | page_cache_get(netpage); |
@@ -643,9 +631,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
643 | 631 | ||
644 | fscache_mark_page_cached(op, netpage); | 632 | fscache_mark_page_cached(op, netpage); |
645 | 633 | ||
646 | page_cache_get(netpage); | 634 | lru_cache_add_file(netpage); |
647 | if (!pagevec_add(&lru_pvec, netpage)) | ||
648 | __pagevec_lru_add_file(&lru_pvec); | ||
649 | 635 | ||
650 | /* the netpage is unlocked and marked up to date here */ | 636 | /* the netpage is unlocked and marked up to date here */ |
651 | fscache_end_io(op, netpage, 0); | 637 | fscache_end_io(op, netpage, 0); |
@@ -661,8 +647,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
661 | 647 | ||
662 | out: | 648 | out: |
663 | /* tidy up */ | 649 | /* tidy up */ |
664 | pagevec_lru_add_file(&lru_pvec); | ||
665 | |||
666 | if (newpage) | 650 | if (newpage) |
667 | page_cache_release(newpage); | 651 | page_cache_release(newpage); |
668 | if (netpage) | 652 | if (netpage) |
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 2b6cb23dd14e..1d1c41f1014d 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c | |||
@@ -203,7 +203,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof | |||
203 | mutex_lock(&buffer->mutex); | 203 | mutex_lock(&buffer->mutex); |
204 | len = fill_write_buffer(buffer, buf, count); | 204 | len = fill_write_buffer(buffer, buf, count); |
205 | if (len > 0) | 205 | if (len > 0) |
206 | len = flush_write_buffer(file->f_path.dentry, buffer, count); | 206 | len = flush_write_buffer(file->f_path.dentry, buffer, len); |
207 | if (len > 0) | 207 | if (len > 0) |
208 | *ppos += len; | 208 | *ppos += len; |
209 | mutex_unlock(&buffer->mutex); | 209 | mutex_unlock(&buffer->mutex); |
diff --git a/fs/coredump.c b/fs/coredump.c index dafafbafa731..72f816d6cad9 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
@@ -45,69 +45,79 @@ | |||
45 | #include <trace/events/sched.h> | 45 | #include <trace/events/sched.h> |
46 | 46 | ||
47 | int core_uses_pid; | 47 | int core_uses_pid; |
48 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
49 | unsigned int core_pipe_limit; | 48 | unsigned int core_pipe_limit; |
49 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | ||
50 | static int core_name_size = CORENAME_MAX_SIZE; | ||
50 | 51 | ||
51 | struct core_name { | 52 | struct core_name { |
52 | char *corename; | 53 | char *corename; |
53 | int used, size; | 54 | int used, size; |
54 | }; | 55 | }; |
55 | static atomic_t call_count = ATOMIC_INIT(1); | ||
56 | 56 | ||
57 | /* The maximal length of core_pattern is also specified in sysctl.c */ | 57 | /* The maximal length of core_pattern is also specified in sysctl.c */ |
58 | 58 | ||
59 | static int expand_corename(struct core_name *cn) | 59 | static int expand_corename(struct core_name *cn, int size) |
60 | { | 60 | { |
61 | char *old_corename = cn->corename; | 61 | char *corename = krealloc(cn->corename, size, GFP_KERNEL); |
62 | |||
63 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
64 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
65 | 62 | ||
66 | if (!cn->corename) { | 63 | if (!corename) |
67 | kfree(old_corename); | ||
68 | return -ENOMEM; | 64 | return -ENOMEM; |
69 | } | ||
70 | 65 | ||
66 | if (size > core_name_size) /* racy but harmless */ | ||
67 | core_name_size = size; | ||
68 | |||
69 | cn->size = ksize(corename); | ||
70 | cn->corename = corename; | ||
71 | return 0; | 71 | return 0; |
72 | } | 72 | } |
73 | 73 | ||
74 | static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) | ||
75 | { | ||
76 | int free, need; | ||
77 | |||
78 | again: | ||
79 | free = cn->size - cn->used; | ||
80 | need = vsnprintf(cn->corename + cn->used, free, fmt, arg); | ||
81 | if (need < free) { | ||
82 | cn->used += need; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | if (!expand_corename(cn, cn->size + need - free + 1)) | ||
87 | goto again; | ||
88 | |||
89 | return -ENOMEM; | ||
90 | } | ||
91 | |||
74 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | 92 | static int cn_printf(struct core_name *cn, const char *fmt, ...) |
75 | { | 93 | { |
76 | char *cur; | ||
77 | int need; | ||
78 | int ret; | ||
79 | va_list arg; | 94 | va_list arg; |
95 | int ret; | ||
80 | 96 | ||
81 | va_start(arg, fmt); | 97 | va_start(arg, fmt); |
82 | need = vsnprintf(NULL, 0, fmt, arg); | 98 | ret = cn_vprintf(cn, fmt, arg); |
83 | va_end(arg); | 99 | va_end(arg); |
84 | 100 | ||
85 | if (likely(need < cn->size - cn->used - 1)) | 101 | return ret; |
86 | goto out_printf; | 102 | } |
87 | 103 | ||
88 | ret = expand_corename(cn); | 104 | static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) |
89 | if (ret) | 105 | { |
90 | goto expand_fail; | 106 | int cur = cn->used; |
107 | va_list arg; | ||
108 | int ret; | ||
91 | 109 | ||
92 | out_printf: | ||
93 | cur = cn->corename + cn->used; | ||
94 | va_start(arg, fmt); | 110 | va_start(arg, fmt); |
95 | vsnprintf(cur, need + 1, fmt, arg); | 111 | ret = cn_vprintf(cn, fmt, arg); |
96 | va_end(arg); | 112 | va_end(arg); |
97 | cn->used += need; | ||
98 | return 0; | ||
99 | 113 | ||
100 | expand_fail: | 114 | for (; cur < cn->used; ++cur) { |
115 | if (cn->corename[cur] == '/') | ||
116 | cn->corename[cur] = '!'; | ||
117 | } | ||
101 | return ret; | 118 | return ret; |
102 | } | 119 | } |
103 | 120 | ||
104 | static void cn_escape(char *str) | ||
105 | { | ||
106 | for (; *str; str++) | ||
107 | if (*str == '/') | ||
108 | *str = '!'; | ||
109 | } | ||
110 | |||
111 | static int cn_print_exe_file(struct core_name *cn) | 121 | static int cn_print_exe_file(struct core_name *cn) |
112 | { | 122 | { |
113 | struct file *exe_file; | 123 | struct file *exe_file; |
@@ -115,12 +125,8 @@ static int cn_print_exe_file(struct core_name *cn) | |||
115 | int ret; | 125 | int ret; |
116 | 126 | ||
117 | exe_file = get_mm_exe_file(current->mm); | 127 | exe_file = get_mm_exe_file(current->mm); |
118 | if (!exe_file) { | 128 | if (!exe_file) |
119 | char *commstart = cn->corename + cn->used; | 129 | return cn_esc_printf(cn, "%s (path unknown)", current->comm); |
120 | ret = cn_printf(cn, "%s (path unknown)", current->comm); | ||
121 | cn_escape(commstart); | ||
122 | return ret; | ||
123 | } | ||
124 | 130 | ||
125 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); | 131 | pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); |
126 | if (!pathbuf) { | 132 | if (!pathbuf) { |
@@ -134,9 +140,7 @@ static int cn_print_exe_file(struct core_name *cn) | |||
134 | goto free_buf; | 140 | goto free_buf; |
135 | } | 141 | } |
136 | 142 | ||
137 | cn_escape(path); | 143 | ret = cn_esc_printf(cn, "%s", path); |
138 | |||
139 | ret = cn_printf(cn, "%s", path); | ||
140 | 144 | ||
141 | free_buf: | 145 | free_buf: |
142 | kfree(pathbuf); | 146 | kfree(pathbuf); |
@@ -157,19 +161,19 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
157 | int pid_in_pattern = 0; | 161 | int pid_in_pattern = 0; |
158 | int err = 0; | 162 | int err = 0; |
159 | 163 | ||
160 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
161 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
162 | cn->used = 0; | 164 | cn->used = 0; |
163 | 165 | cn->corename = NULL; | |
164 | if (!cn->corename) | 166 | if (expand_corename(cn, core_name_size)) |
165 | return -ENOMEM; | 167 | return -ENOMEM; |
168 | cn->corename[0] = '\0'; | ||
169 | |||
170 | if (ispipe) | ||
171 | ++pat_ptr; | ||
166 | 172 | ||
167 | /* Repeat as long as we have more pattern to process and more output | 173 | /* Repeat as long as we have more pattern to process and more output |
168 | space */ | 174 | space */ |
169 | while (*pat_ptr) { | 175 | while (*pat_ptr) { |
170 | if (*pat_ptr != '%') { | 176 | if (*pat_ptr != '%') { |
171 | if (*pat_ptr == 0) | ||
172 | goto out; | ||
173 | err = cn_printf(cn, "%c", *pat_ptr++); | 177 | err = cn_printf(cn, "%c", *pat_ptr++); |
174 | } else { | 178 | } else { |
175 | switch (*++pat_ptr) { | 179 | switch (*++pat_ptr) { |
@@ -210,22 +214,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
210 | break; | 214 | break; |
211 | } | 215 | } |
212 | /* hostname */ | 216 | /* hostname */ |
213 | case 'h': { | 217 | case 'h': |
214 | char *namestart = cn->corename + cn->used; | ||
215 | down_read(&uts_sem); | 218 | down_read(&uts_sem); |
216 | err = cn_printf(cn, "%s", | 219 | err = cn_esc_printf(cn, "%s", |
217 | utsname()->nodename); | 220 | utsname()->nodename); |
218 | up_read(&uts_sem); | 221 | up_read(&uts_sem); |
219 | cn_escape(namestart); | ||
220 | break; | 222 | break; |
221 | } | ||
222 | /* executable */ | 223 | /* executable */ |
223 | case 'e': { | 224 | case 'e': |
224 | char *commstart = cn->corename + cn->used; | 225 | err = cn_esc_printf(cn, "%s", current->comm); |
225 | err = cn_printf(cn, "%s", current->comm); | ||
226 | cn_escape(commstart); | ||
227 | break; | 226 | break; |
228 | } | ||
229 | case 'E': | 227 | case 'E': |
230 | err = cn_print_exe_file(cn); | 228 | err = cn_print_exe_file(cn); |
231 | break; | 229 | break; |
@@ -244,6 +242,7 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
244 | return err; | 242 | return err; |
245 | } | 243 | } |
246 | 244 | ||
245 | out: | ||
247 | /* Backward compatibility with core_uses_pid: | 246 | /* Backward compatibility with core_uses_pid: |
248 | * | 247 | * |
249 | * If core_pattern does not include a %p (as is the default) | 248 | * If core_pattern does not include a %p (as is the default) |
@@ -254,7 +253,6 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) | |||
254 | if (err) | 253 | if (err) |
255 | return err; | 254 | return err; |
256 | } | 255 | } |
257 | out: | ||
258 | return ispipe; | 256 | return ispipe; |
259 | } | 257 | } |
260 | 258 | ||
@@ -549,7 +547,7 @@ void do_coredump(siginfo_t *siginfo) | |||
549 | if (ispipe < 0) { | 547 | if (ispipe < 0) { |
550 | printk(KERN_WARNING "format_corename failed\n"); | 548 | printk(KERN_WARNING "format_corename failed\n"); |
551 | printk(KERN_WARNING "Aborting core\n"); | 549 | printk(KERN_WARNING "Aborting core\n"); |
552 | goto fail_corename; | 550 | goto fail_unlock; |
553 | } | 551 | } |
554 | 552 | ||
555 | if (cprm.limit == 1) { | 553 | if (cprm.limit == 1) { |
@@ -584,7 +582,7 @@ void do_coredump(siginfo_t *siginfo) | |||
584 | goto fail_dropcount; | 582 | goto fail_dropcount; |
585 | } | 583 | } |
586 | 584 | ||
587 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); | 585 | helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); |
588 | if (!helper_argv) { | 586 | if (!helper_argv) { |
589 | printk(KERN_WARNING "%s failed to allocate memory\n", | 587 | printk(KERN_WARNING "%s failed to allocate memory\n", |
590 | __func__); | 588 | __func__); |
@@ -601,7 +599,7 @@ void do_coredump(siginfo_t *siginfo) | |||
601 | 599 | ||
602 | argv_free(helper_argv); | 600 | argv_free(helper_argv); |
603 | if (retval) { | 601 | if (retval) { |
604 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 602 | printk(KERN_INFO "Core dump to |%s pipe failed\n", |
605 | cn.corename); | 603 | cn.corename); |
606 | goto close_fail; | 604 | goto close_fail; |
607 | } | 605 | } |
@@ -669,7 +667,6 @@ fail_dropcount: | |||
669 | atomic_dec(&core_dump_count); | 667 | atomic_dec(&core_dump_count); |
670 | fail_unlock: | 668 | fail_unlock: |
671 | kfree(cn.corename); | 669 | kfree(cn.corename); |
672 | fail_corename: | ||
673 | coredump_finish(mm, core_dumped); | 670 | coredump_finish(mm, core_dumped); |
674 | revert_creds(old_cred); | 671 | revert_creds(old_cred); |
675 | fail_creds: | 672 | fail_creds: |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0cff4434880d..9ad17b15b454 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -1977,8 +1977,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, | |||
1977 | return -EINVAL; | 1977 | return -EINVAL; |
1978 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) | 1978 | if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) |
1979 | return -EFAULT; | 1979 | return -EFAULT; |
1980 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 1980 | sigsaved = current->blocked; |
1981 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 1981 | set_current_blocked(&ksigmask); |
1982 | } | 1982 | } |
1983 | 1983 | ||
1984 | error = sys_epoll_wait(epfd, events, maxevents, timeout); | 1984 | error = sys_epoll_wait(epfd, events, maxevents, timeout); |
@@ -1995,7 +1995,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, | |||
1995 | sizeof(sigsaved)); | 1995 | sizeof(sigsaved)); |
1996 | set_restore_sigmask(); | 1996 | set_restore_sigmask(); |
1997 | } else | 1997 | } else |
1998 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1998 | set_current_blocked(&sigsaved); |
1999 | } | 1999 | } |
2000 | 2000 | ||
2001 | return error; | 2001 | return error; |
@@ -2022,8 +2022,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, | |||
2022 | if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) | 2022 | if (copy_from_user(&csigmask, sigmask, sizeof(csigmask))) |
2023 | return -EFAULT; | 2023 | return -EFAULT; |
2024 | sigset_from_compat(&ksigmask, &csigmask); | 2024 | sigset_from_compat(&ksigmask, &csigmask); |
2025 | sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP)); | 2025 | sigsaved = current->blocked; |
2026 | sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); | 2026 | set_current_blocked(&ksigmask); |
2027 | } | 2027 | } |
2028 | 2028 | ||
2029 | err = sys_epoll_wait(epfd, events, maxevents, timeout); | 2029 | err = sys_epoll_wait(epfd, events, maxevents, timeout); |
@@ -2040,7 +2040,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, | |||
2040 | sizeof(sigsaved)); | 2040 | sizeof(sigsaved)); |
2041 | set_restore_sigmask(); | 2041 | set_restore_sigmask(); |
2042 | } else | 2042 | } else |
2043 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 2043 | set_current_blocked(&sigsaved); |
2044 | } | 2044 | } |
2045 | 2045 | ||
2046 | return err; | 2046 | return err; |
@@ -932,6 +932,7 @@ static int de_thread(struct task_struct *tsk) | |||
932 | * also take its birthdate (always earlier than our own). | 932 | * also take its birthdate (always earlier than our own). |
933 | */ | 933 | */ |
934 | tsk->start_time = leader->start_time; | 934 | tsk->start_time = leader->start_time; |
935 | tsk->real_start_time = leader->real_start_time; | ||
935 | 936 | ||
936 | BUG_ON(!same_thread_group(leader, tsk)); | 937 | BUG_ON(!same_thread_group(leader, tsk)); |
937 | BUG_ON(has_group_leader_pid(tsk)); | 938 | BUG_ON(has_group_leader_pid(tsk)); |
@@ -947,9 +948,8 @@ static int de_thread(struct task_struct *tsk) | |||
947 | * Note: The old leader also uses this pid until release_task | 948 | * Note: The old leader also uses this pid until release_task |
948 | * is called. Odd but simple and correct. | 949 | * is called. Odd but simple and correct. |
949 | */ | 950 | */ |
950 | detach_pid(tsk, PIDTYPE_PID); | ||
951 | tsk->pid = leader->pid; | 951 | tsk->pid = leader->pid; |
952 | attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); | 952 | change_pid(tsk, PIDTYPE_PID, task_pid(leader)); |
953 | transfer_pid(leader, tsk, PIDTYPE_PGID); | 953 | transfer_pid(leader, tsk, PIDTYPE_PGID); |
954 | transfer_pid(leader, tsk, PIDTYPE_SID); | 954 | transfer_pid(leader, tsk, PIDTYPE_SID); |
955 | 955 | ||
@@ -1465,7 +1465,6 @@ static int do_execve_common(const char *filename, | |||
1465 | struct files_struct *displaced; | 1465 | struct files_struct *displaced; |
1466 | bool clear_in_exec; | 1466 | bool clear_in_exec; |
1467 | int retval; | 1467 | int retval; |
1468 | const struct cred *cred = current_cred(); | ||
1469 | 1468 | ||
1470 | /* | 1469 | /* |
1471 | * We move the actual failure in case of RLIMIT_NPROC excess from | 1470 | * We move the actual failure in case of RLIMIT_NPROC excess from |
@@ -1474,7 +1473,7 @@ static int do_execve_common(const char *filename, | |||
1474 | * whether NPROC limit is still exceeded. | 1473 | * whether NPROC limit is still exceeded. |
1475 | */ | 1474 | */ |
1476 | if ((current->flags & PF_NPROC_EXCEEDED) && | 1475 | if ((current->flags & PF_NPROC_EXCEEDED) && |
1477 | atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { | 1476 | atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { |
1478 | retval = -EAGAIN; | 1477 | retval = -EAGAIN; |
1479 | goto out_ret; | 1478 | goto out_ret; |
1480 | } | 1479 | } |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f67668f724ba..2bd85486b879 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1985,6 +1985,7 @@ static const struct address_space_operations ext3_ordered_aops = { | |||
1985 | .direct_IO = ext3_direct_IO, | 1985 | .direct_IO = ext3_direct_IO, |
1986 | .migratepage = buffer_migrate_page, | 1986 | .migratepage = buffer_migrate_page, |
1987 | .is_partially_uptodate = block_is_partially_uptodate, | 1987 | .is_partially_uptodate = block_is_partially_uptodate, |
1988 | .is_dirty_writeback = buffer_check_dirty_writeback, | ||
1988 | .error_remove_page = generic_error_remove_page, | 1989 | .error_remove_page = generic_error_remove_page, |
1989 | }; | 1990 | }; |
1990 | 1991 | ||
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 359d307b5507..628e22a5a543 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -30,7 +30,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) | |||
30 | va_start(args, fmt); | 30 | va_start(args, fmt); |
31 | vaf.fmt = fmt; | 31 | vaf.fmt = fmt; |
32 | vaf.va = &args; | 32 | vaf.va = &args; |
33 | printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf); | 33 | fat_msg(sb, KERN_ERR, "error, %pV", &vaf); |
34 | va_end(args); | 34 | va_end(args); |
35 | } | 35 | } |
36 | 36 | ||
@@ -38,8 +38,7 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) | |||
38 | panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); | 38 | panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); |
39 | else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { | 39 | else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { |
40 | sb->s_flags |= MS_RDONLY; | 40 | sb->s_flags |= MS_RDONLY; |
41 | printk(KERN_ERR "FAT-fs (%s): Filesystem has been " | 41 | fat_msg(sb, KERN_ERR, "Filesystem has been set read-only"); |
42 | "set read-only\n", sb->s_id); | ||
43 | } | 42 | } |
44 | } | 43 | } |
45 | EXPORT_SYMBOL_GPL(__fat_fs_error); | 44 | EXPORT_SYMBOL_GPL(__fat_fs_error); |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..0b578598c6ac 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -785,7 +785,7 @@ static const struct super_operations fuse_super_operations = { | |||
785 | static void sanitize_global_limit(unsigned *limit) | 785 | static void sanitize_global_limit(unsigned *limit) |
786 | { | 786 | { |
787 | if (*limit == 0) | 787 | if (*limit == 0) |
788 | *limit = ((num_physpages << PAGE_SHIFT) >> 13) / | 788 | *limit = ((totalram_pages << PAGE_SHIFT) >> 13) / |
789 | sizeof(struct fuse_req); | 789 | sizeof(struct fuse_req); |
790 | 790 | ||
791 | if (*limit >= 1 << 16) | 791 | if (*limit >= 1 << 16) |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index fc90ab11c340..4338ff32959d 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -69,7 +69,7 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
69 | struct dentry *parent; | 69 | struct dentry *parent; |
70 | char *root, *name; | 70 | char *root, *name; |
71 | const char *seg_name; | 71 | const char *seg_name; |
72 | int len, seg_len; | 72 | int len, seg_len, root_len; |
73 | 73 | ||
74 | len = 0; | 74 | len = 0; |
75 | parent = dentry; | 75 | parent = dentry; |
@@ -81,7 +81,8 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
81 | } | 81 | } |
82 | 82 | ||
83 | root = "proc"; | 83 | root = "proc"; |
84 | len += strlen(root); | 84 | root_len = strlen(root); |
85 | len += root_len; | ||
85 | name = kmalloc(len + extra + 1, GFP_KERNEL); | 86 | name = kmalloc(len + extra + 1, GFP_KERNEL); |
86 | if (name == NULL) | 87 | if (name == NULL) |
87 | return NULL; | 88 | return NULL; |
@@ -91,7 +92,7 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
91 | while (parent->d_parent != parent) { | 92 | while (parent->d_parent != parent) { |
92 | if (is_pid(parent)) { | 93 | if (is_pid(parent)) { |
93 | seg_name = "pid"; | 94 | seg_name = "pid"; |
94 | seg_len = strlen("pid"); | 95 | seg_len = strlen(seg_name); |
95 | } | 96 | } |
96 | else { | 97 | else { |
97 | seg_name = parent->d_name.name; | 98 | seg_name = parent->d_name.name; |
@@ -100,10 +101,10 @@ static char *dentry_name(struct dentry *dentry, int extra) | |||
100 | 101 | ||
101 | len -= seg_len + 1; | 102 | len -= seg_len + 1; |
102 | name[len] = '/'; | 103 | name[len] = '/'; |
103 | strncpy(&name[len + 1], seg_name, seg_len); | 104 | memcpy(&name[len + 1], seg_name, seg_len); |
104 | parent = parent->d_parent; | 105 | parent = parent->d_parent; |
105 | } | 106 | } |
106 | strncpy(name, root, strlen(root)); | 107 | memcpy(name, root, root_len); |
107 | return name; | 108 | return name; |
108 | } | 109 | } |
109 | 110 | ||
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index a2aa97d45670..10d6c41aecad 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -305,7 +305,7 @@ static int lockd_start_svc(struct svc_serv *serv) | |||
305 | svc_sock_update_bufs(serv); | 305 | svc_sock_update_bufs(serv); |
306 | serv->sv_maxconn = nlm_max_connections; | 306 | serv->sv_maxconn = nlm_max_connections; |
307 | 307 | ||
308 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); | 308 | nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); |
309 | if (IS_ERR(nlmsvc_task)) { | 309 | if (IS_ERR(nlmsvc_task)) { |
310 | error = PTR_ERR(nlmsvc_task); | 310 | error = PTR_ERR(nlmsvc_task); |
311 | printk(KERN_WARNING | 311 | printk(KERN_WARNING |
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index ee24df5af1f9..3c5dd55d284c 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c | |||
@@ -117,7 +117,7 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma) | |||
117 | return -EINVAL; | 117 | return -EINVAL; |
118 | /* we do not support files bigger than 4GB... We eventually | 118 | /* we do not support files bigger than 4GB... We eventually |
119 | supports just 4GB... */ | 119 | supports just 4GB... */ |
120 | if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff | 120 | if (vma_pages(vma) + vma->vm_pgoff |
121 | > (1U << (32 - PAGE_SHIFT))) | 121 | > (1U << (32 - PAGE_SHIFT))) |
122 | return -EFBIG; | 122 | return -EFBIG; |
123 | 123 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index cff089a412c7..da6a43d19aa3 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -211,7 +211,6 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
211 | struct svc_rqst *rqstp; | 211 | struct svc_rqst *rqstp; |
212 | int (*callback_svc)(void *vrqstp); | 212 | int (*callback_svc)(void *vrqstp); |
213 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; | 213 | struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; |
214 | char svc_name[12]; | ||
215 | int ret; | 214 | int ret; |
216 | 215 | ||
217 | nfs_callback_bc_serv(minorversion, xprt, serv); | 216 | nfs_callback_bc_serv(minorversion, xprt, serv); |
@@ -235,10 +234,10 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, | |||
235 | 234 | ||
236 | svc_sock_update_bufs(serv); | 235 | svc_sock_update_bufs(serv); |
237 | 236 | ||
238 | sprintf(svc_name, "nfsv4.%u-svc", minorversion); | ||
239 | cb_info->serv = serv; | 237 | cb_info->serv = serv; |
240 | cb_info->rqst = rqstp; | 238 | cb_info->rqst = rqstp; |
241 | cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); | 239 | cb_info->task = kthread_run(callback_svc, cb_info->rqst, |
240 | "nfsv4.%u-svc", minorversion); | ||
242 | if (IS_ERR(cb_info->task)) { | 241 | if (IS_ERR(cb_info->task)) { |
243 | ret = PTR_ERR(cb_info->task); | 242 | ret = PTR_ERR(cb_info->task); |
244 | svc_exit_thread(cb_info->rqst); | 243 | svc_exit_thread(cb_info->rqst); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5d051419527b..d7ed697133f0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/pagevec.h> | 33 | #include <linux/pagevec.h> |
34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/swap.h> | ||
36 | #include <linux/sched.h> | 37 | #include <linux/sched.h> |
37 | #include <linux/kmemleak.h> | 38 | #include <linux/kmemleak.h> |
38 | #include <linux/xattr.h> | 39 | #include <linux/xattr.h> |
@@ -1758,7 +1759,6 @@ EXPORT_SYMBOL_GPL(nfs_unlink); | |||
1758 | */ | 1759 | */ |
1759 | int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | 1760 | int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) |
1760 | { | 1761 | { |
1761 | struct pagevec lru_pvec; | ||
1762 | struct page *page; | 1762 | struct page *page; |
1763 | char *kaddr; | 1763 | char *kaddr; |
1764 | struct iattr attr; | 1764 | struct iattr attr; |
@@ -1798,11 +1798,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) | |||
1798 | * No big deal if we can't add this page to the page cache here. | 1798 | * No big deal if we can't add this page to the page cache here. |
1799 | * READLINK will get the missing page from the server if needed. | 1799 | * READLINK will get the missing page from the server if needed. |
1800 | */ | 1800 | */ |
1801 | pagevec_init(&lru_pvec, 0); | 1801 | if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0, |
1802 | if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, | ||
1803 | GFP_KERNEL)) { | 1802 | GFP_KERNEL)) { |
1804 | pagevec_add(&lru_pvec, page); | ||
1805 | pagevec_lru_add_file(&lru_pvec); | ||
1806 | SetPageUptodate(page); | 1803 | SetPageUptodate(page); |
1807 | unlock_page(page); | 1804 | unlock_page(page); |
1808 | } else | 1805 | } else |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6b4a79f4ad1d..94e94bd11aae 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -495,6 +495,35 @@ static int nfs_release_page(struct page *page, gfp_t gfp) | |||
495 | return nfs_fscache_release_page(page, gfp); | 495 | return nfs_fscache_release_page(page, gfp); |
496 | } | 496 | } |
497 | 497 | ||
498 | static void nfs_check_dirty_writeback(struct page *page, | ||
499 | bool *dirty, bool *writeback) | ||
500 | { | ||
501 | struct nfs_inode *nfsi; | ||
502 | struct address_space *mapping = page_file_mapping(page); | ||
503 | |||
504 | if (!mapping || PageSwapCache(page)) | ||
505 | return; | ||
506 | |||
507 | /* | ||
508 | * Check if an unstable page is currently being committed and | ||
509 | * if so, have the VM treat it as if the page is under writeback | ||
510 | * so it will not block due to pages that will shortly be freeable. | ||
511 | */ | ||
512 | nfsi = NFS_I(mapping->host); | ||
513 | if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { | ||
514 | *writeback = true; | ||
515 | return; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * If PagePrivate() is set, then the page is not freeable and as the | ||
520 | * inode is not being committed, it's not going to be cleaned in the | ||
521 | * near future so treat it as dirty | ||
522 | */ | ||
523 | if (PagePrivate(page)) | ||
524 | *dirty = true; | ||
525 | } | ||
526 | |||
498 | /* | 527 | /* |
499 | * Attempt to clear the private state associated with a page when an error | 528 | * Attempt to clear the private state associated with a page when an error |
500 | * occurs that requires the cached contents of an inode to be written back or | 529 | * occurs that requires the cached contents of an inode to be written back or |
@@ -542,6 +571,7 @@ const struct address_space_operations nfs_file_aops = { | |||
542 | .direct_IO = nfs_direct_IO, | 571 | .direct_IO = nfs_direct_IO, |
543 | .migratepage = nfs_migrate_page, | 572 | .migratepage = nfs_migrate_page, |
544 | .launder_page = nfs_launder_page, | 573 | .launder_page = nfs_launder_page, |
574 | .is_dirty_writeback = nfs_check_dirty_writeback, | ||
545 | .error_remove_page = generic_error_remove_page, | 575 | .error_remove_page = generic_error_remove_page, |
546 | #ifdef CONFIG_NFS_SWAP | 576 | #ifdef CONFIG_NFS_SWAP |
547 | .swap_activate = nfs_swap_activate, | 577 | .swap_activate = nfs_swap_activate, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ff10b4aa534c..55418811a55a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1194,7 +1194,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) | |||
1194 | snprintf(buf, sizeof(buf), "%s-manager", | 1194 | snprintf(buf, sizeof(buf), "%s-manager", |
1195 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); | 1195 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); |
1196 | rcu_read_unlock(); | 1196 | rcu_read_unlock(); |
1197 | task = kthread_run(nfs4_run_state_manager, clp, buf); | 1197 | task = kthread_run(nfs4_run_state_manager, clp, "%s", buf); |
1198 | if (IS_ERR(task)) { | 1198 | if (IS_ERR(task)) { |
1199 | printk(KERN_ERR "%s: kthread_run: %ld\n", | 1199 | printk(KERN_ERR "%s: kthread_run: %ld\n", |
1200 | __func__, PTR_ERR(task)); | 1200 | __func__, PTR_ERR(task)); |
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index eed4d7b26249..741fd02e0444 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c | |||
@@ -398,6 +398,69 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, | |||
398 | } | 398 | } |
399 | 399 | ||
400 | /** | 400 | /** |
401 | * nilfs_palloc_count_desc_blocks - count descriptor blocks number | ||
402 | * @inode: inode of metadata file using this allocator | ||
403 | * @desc_blocks: descriptor blocks number [out] | ||
404 | */ | ||
405 | static int nilfs_palloc_count_desc_blocks(struct inode *inode, | ||
406 | unsigned long *desc_blocks) | ||
407 | { | ||
408 | unsigned long blknum; | ||
409 | int ret; | ||
410 | |||
411 | ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum); | ||
412 | if (likely(!ret)) | ||
413 | *desc_blocks = DIV_ROUND_UP( | ||
414 | blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block); | ||
415 | return ret; | ||
416 | } | ||
417 | |||
418 | /** | ||
419 | * nilfs_palloc_mdt_file_can_grow - check potential opportunity for | ||
420 | * MDT file growing | ||
421 | * @inode: inode of metadata file using this allocator | ||
422 | * @desc_blocks: known current descriptor blocks count | ||
423 | */ | ||
424 | static inline bool nilfs_palloc_mdt_file_can_grow(struct inode *inode, | ||
425 | unsigned long desc_blocks) | ||
426 | { | ||
427 | return (nilfs_palloc_groups_per_desc_block(inode) * desc_blocks) < | ||
428 | nilfs_palloc_groups_count(inode); | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * nilfs_palloc_count_max_entries - count max number of entries that can be | ||
433 | * described by descriptor blocks count | ||
434 | * @inode: inode of metadata file using this allocator | ||
435 | * @nused: current number of used entries | ||
436 | * @nmaxp: max number of entries [out] | ||
437 | */ | ||
438 | int nilfs_palloc_count_max_entries(struct inode *inode, u64 nused, u64 *nmaxp) | ||
439 | { | ||
440 | unsigned long desc_blocks = 0; | ||
441 | u64 entries_per_desc_block, nmax; | ||
442 | int err; | ||
443 | |||
444 | err = nilfs_palloc_count_desc_blocks(inode, &desc_blocks); | ||
445 | if (unlikely(err)) | ||
446 | return err; | ||
447 | |||
448 | entries_per_desc_block = (u64)nilfs_palloc_entries_per_group(inode) * | ||
449 | nilfs_palloc_groups_per_desc_block(inode); | ||
450 | nmax = entries_per_desc_block * desc_blocks; | ||
451 | |||
452 | if (nused == nmax && | ||
453 | nilfs_palloc_mdt_file_can_grow(inode, desc_blocks)) | ||
454 | nmax += entries_per_desc_block; | ||
455 | |||
456 | if (nused > nmax) | ||
457 | return -ERANGE; | ||
458 | |||
459 | *nmaxp = nmax; | ||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | /** | ||
401 | * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object | 464 | * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object |
402 | * @inode: inode of metadata file using this allocator | 465 | * @inode: inode of metadata file using this allocator |
403 | * @req: nilfs_palloc_req structure exchanged for the allocation | 466 | * @req: nilfs_palloc_req structure exchanged for the allocation |
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index fb7238100548..4bd6451b5703 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h | |||
@@ -48,6 +48,8 @@ int nilfs_palloc_get_entry_block(struct inode *, __u64, int, | |||
48 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, | 48 | void *nilfs_palloc_block_get_entry(const struct inode *, __u64, |
49 | const struct buffer_head *, void *); | 49 | const struct buffer_head *, void *); |
50 | 50 | ||
51 | int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); | ||
52 | |||
51 | /** | 53 | /** |
52 | * nilfs_palloc_req - persistent allocator request and reply | 54 | * nilfs_palloc_req - persistent allocator request and reply |
53 | * @pr_entry_nr: entry number (vblocknr or inode number) | 55 | * @pr_entry_nr: entry number (vblocknr or inode number) |
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index d8e65bde083c..6548c7851b48 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c | |||
@@ -160,6 +160,28 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, | |||
160 | } | 160 | } |
161 | 161 | ||
162 | /** | 162 | /** |
163 | * nilfs_ifile_count_free_inodes - calculate free inodes count | ||
164 | * @ifile: ifile inode | ||
165 | * @nmaxinodes: current maximum of available inodes count [out] | ||
166 | * @nfreeinodes: free inodes count [out] | ||
167 | */ | ||
168 | int nilfs_ifile_count_free_inodes(struct inode *ifile, | ||
169 | u64 *nmaxinodes, u64 *nfreeinodes) | ||
170 | { | ||
171 | u64 nused; | ||
172 | int err; | ||
173 | |||
174 | *nmaxinodes = 0; | ||
175 | *nfreeinodes = 0; | ||
176 | |||
177 | nused = atomic64_read(&NILFS_I(ifile)->i_root->inodes_count); | ||
178 | err = nilfs_palloc_count_max_entries(ifile, nused, nmaxinodes); | ||
179 | if (likely(!err)) | ||
180 | *nfreeinodes = *nmaxinodes - nused; | ||
181 | return err; | ||
182 | } | ||
183 | |||
184 | /** | ||
163 | * nilfs_ifile_read - read or get ifile inode | 185 | * nilfs_ifile_read - read or get ifile inode |
164 | * @sb: super block instance | 186 | * @sb: super block instance |
165 | * @root: root object | 187 | * @root: root object |
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 59b6f2b51df6..679674d13372 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h | |||
@@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); | |||
49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); | 49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); |
50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); | 50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); |
51 | 51 | ||
52 | int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); | ||
53 | |||
52 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, | 54 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, |
53 | size_t inode_size, struct nilfs_inode *raw_inode, | 55 | size_t inode_size, struct nilfs_inode *raw_inode, |
54 | struct inode **inodep); | 56 | struct inode **inodep); |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index bccfec8343c5..b1a5277cfd18 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -54,7 +54,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n) | |||
54 | 54 | ||
55 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); | 55 | inode_add_bytes(inode, (1 << inode->i_blkbits) * n); |
56 | if (root) | 56 | if (root) |
57 | atomic_add(n, &root->blocks_count); | 57 | atomic64_add(n, &root->blocks_count); |
58 | } | 58 | } |
59 | 59 | ||
60 | void nilfs_inode_sub_blocks(struct inode *inode, int n) | 60 | void nilfs_inode_sub_blocks(struct inode *inode, int n) |
@@ -63,7 +63,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) | |||
63 | 63 | ||
64 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); | 64 | inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); |
65 | if (root) | 65 | if (root) |
66 | atomic_sub(n, &root->blocks_count); | 66 | atomic64_sub(n, &root->blocks_count); |
67 | } | 67 | } |
68 | 68 | ||
69 | /** | 69 | /** |
@@ -369,7 +369,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) | |||
369 | goto failed_ifile_create_inode; | 369 | goto failed_ifile_create_inode; |
370 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | 370 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
371 | 371 | ||
372 | atomic_inc(&root->inodes_count); | 372 | atomic64_inc(&root->inodes_count); |
373 | inode_init_owner(inode, dir, mode); | 373 | inode_init_owner(inode, dir, mode); |
374 | inode->i_ino = ino; | 374 | inode->i_ino = ino; |
375 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 375 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
@@ -801,7 +801,7 @@ void nilfs_evict_inode(struct inode *inode) | |||
801 | 801 | ||
802 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); | 802 | ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); |
803 | if (!ret) | 803 | if (!ret) |
804 | atomic_dec(&ii->i_root->inodes_count); | 804 | atomic64_dec(&ii->i_root->inodes_count); |
805 | 805 | ||
806 | nilfs_clear_inode(inode); | 806 | nilfs_clear_inode(inode); |
807 | 807 | ||
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a5752a589932..bd88a7461063 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -835,9 +835,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) | |||
835 | raw_cp->cp_snapshot_list.ssl_next = 0; | 835 | raw_cp->cp_snapshot_list.ssl_next = 0; |
836 | raw_cp->cp_snapshot_list.ssl_prev = 0; | 836 | raw_cp->cp_snapshot_list.ssl_prev = 0; |
837 | raw_cp->cp_inodes_count = | 837 | raw_cp->cp_inodes_count = |
838 | cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); | 838 | cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); |
839 | raw_cp->cp_blocks_count = | 839 | raw_cp->cp_blocks_count = |
840 | cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); | 840 | cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); |
841 | raw_cp->cp_nblk_inc = | 841 | raw_cp->cp_nblk_inc = |
842 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); | 842 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); |
843 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); | 843 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7d1f9f18b09..1427de5ebf4d 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -554,8 +554,10 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, | |||
554 | if (err) | 554 | if (err) |
555 | goto failed_bh; | 555 | goto failed_bh; |
556 | 556 | ||
557 | atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); | 557 | atomic64_set(&root->inodes_count, |
558 | atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); | 558 | le64_to_cpu(raw_cp->cp_inodes_count)); |
559 | atomic64_set(&root->blocks_count, | ||
560 | le64_to_cpu(raw_cp->cp_blocks_count)); | ||
559 | 561 | ||
560 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | 562 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); |
561 | 563 | ||
@@ -609,6 +611,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
609 | unsigned long overhead; | 611 | unsigned long overhead; |
610 | unsigned long nrsvblocks; | 612 | unsigned long nrsvblocks; |
611 | sector_t nfreeblocks; | 613 | sector_t nfreeblocks; |
614 | u64 nmaxinodes, nfreeinodes; | ||
612 | int err; | 615 | int err; |
613 | 616 | ||
614 | /* | 617 | /* |
@@ -633,14 +636,34 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
633 | if (unlikely(err)) | 636 | if (unlikely(err)) |
634 | return err; | 637 | return err; |
635 | 638 | ||
639 | err = nilfs_ifile_count_free_inodes(root->ifile, | ||
640 | &nmaxinodes, &nfreeinodes); | ||
641 | if (unlikely(err)) { | ||
642 | printk(KERN_WARNING | ||
643 | "NILFS warning: fail to count free inodes: err %d.\n", | ||
644 | err); | ||
645 | if (err == -ERANGE) { | ||
646 | /* | ||
647 | * If nilfs_palloc_count_max_entries() returns | ||
648 | * -ERANGE error code then we simply treat | ||
649 | * curent inodes count as maximum possible and | ||
650 | * zero as free inodes value. | ||
651 | */ | ||
652 | nmaxinodes = atomic64_read(&root->inodes_count); | ||
653 | nfreeinodes = 0; | ||
654 | err = 0; | ||
655 | } else | ||
656 | return err; | ||
657 | } | ||
658 | |||
636 | buf->f_type = NILFS_SUPER_MAGIC; | 659 | buf->f_type = NILFS_SUPER_MAGIC; |
637 | buf->f_bsize = sb->s_blocksize; | 660 | buf->f_bsize = sb->s_blocksize; |
638 | buf->f_blocks = blocks - overhead; | 661 | buf->f_blocks = blocks - overhead; |
639 | buf->f_bfree = nfreeblocks; | 662 | buf->f_bfree = nfreeblocks; |
640 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? | 663 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? |
641 | (buf->f_bfree - nrsvblocks) : 0; | 664 | (buf->f_bfree - nrsvblocks) : 0; |
642 | buf->f_files = atomic_read(&root->inodes_count); | 665 | buf->f_files = nmaxinodes; |
643 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ | 666 | buf->f_ffree = nfreeinodes; |
644 | buf->f_namelen = NILFS_NAME_LEN; | 667 | buf->f_namelen = NILFS_NAME_LEN; |
645 | buf->f_fsid.val[0] = (u32)id; | 668 | buf->f_fsid.val[0] = (u32)id; |
646 | buf->f_fsid.val[1] = (u32)(id >> 32); | 669 | buf->f_fsid.val[1] = (u32)(id >> 32); |
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 41e6a04a561f..94c451ce6d24 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -764,8 +764,8 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) | |||
764 | new->ifile = NULL; | 764 | new->ifile = NULL; |
765 | new->nilfs = nilfs; | 765 | new->nilfs = nilfs; |
766 | atomic_set(&new->count, 1); | 766 | atomic_set(&new->count, 1); |
767 | atomic_set(&new->inodes_count, 0); | 767 | atomic64_set(&new->inodes_count, 0); |
768 | atomic_set(&new->blocks_count, 0); | 768 | atomic64_set(&new->blocks_count, 0); |
769 | 769 | ||
770 | rb_link_node(&new->rb_node, parent, p); | 770 | rb_link_node(&new->rb_node, parent, p); |
771 | rb_insert_color(&new->rb_node, &nilfs->ns_cptree); | 771 | rb_insert_color(&new->rb_node, &nilfs->ns_cptree); |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index be1267a34cea..de8cc53b4a5c 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -241,8 +241,8 @@ struct nilfs_root { | |||
241 | struct the_nilfs *nilfs; | 241 | struct the_nilfs *nilfs; |
242 | struct inode *ifile; | 242 | struct inode *ifile; |
243 | 243 | ||
244 | atomic_t inodes_count; | 244 | atomic64_t inodes_count; |
245 | atomic_t blocks_count; | 245 | atomic64_t blocks_count; |
246 | }; | 246 | }; |
247 | 247 | ||
248 | /* Special checkpoint number */ | 248 | /* Special checkpoint number */ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index b8a9d87231b1..17e6bdde96c5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -5655,7 +5655,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5655 | &ref_tree, NULL); | 5655 | &ref_tree, NULL); |
5656 | if (ret) { | 5656 | if (ret) { |
5657 | mlog_errno(ret); | 5657 | mlog_errno(ret); |
5658 | goto out; | 5658 | goto bail; |
5659 | } | 5659 | } |
5660 | 5660 | ||
5661 | ret = ocfs2_prepare_refcount_change_for_del(inode, | 5661 | ret = ocfs2_prepare_refcount_change_for_del(inode, |
@@ -5666,7 +5666,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5666 | &extra_blocks); | 5666 | &extra_blocks); |
5667 | if (ret < 0) { | 5667 | if (ret < 0) { |
5668 | mlog_errno(ret); | 5668 | mlog_errno(ret); |
5669 | goto out; | 5669 | goto bail; |
5670 | } | 5670 | } |
5671 | } | 5671 | } |
5672 | 5672 | ||
@@ -5674,7 +5674,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5674 | extra_blocks); | 5674 | extra_blocks); |
5675 | if (ret) { | 5675 | if (ret) { |
5676 | mlog_errno(ret); | 5676 | mlog_errno(ret); |
5677 | return ret; | 5677 | goto bail; |
5678 | } | 5678 | } |
5679 | 5679 | ||
5680 | mutex_lock(&tl_inode->i_mutex); | 5680 | mutex_lock(&tl_inode->i_mutex); |
@@ -5734,7 +5734,7 @@ out_commit: | |||
5734 | ocfs2_commit_trans(osb, handle); | 5734 | ocfs2_commit_trans(osb, handle); |
5735 | out: | 5735 | out: |
5736 | mutex_unlock(&tl_inode->i_mutex); | 5736 | mutex_unlock(&tl_inode->i_mutex); |
5737 | 5737 | bail: | |
5738 | if (meta_ac) | 5738 | if (meta_ac) |
5739 | ocfs2_free_alloc_context(meta_ac); | 5739 | ocfs2_free_alloc_context(meta_ac); |
5740 | 5740 | ||
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 42252bf64b51..5c1c864e81cc 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -176,7 +176,7 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
176 | } | 176 | } |
177 | } | 177 | } |
178 | 178 | ||
179 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | 179 | static int o2hb_global_heartbeat_mode_set(unsigned int hb_mode) |
180 | { | 180 | { |
181 | int ret = -1; | 181 | int ret = -1; |
182 | 182 | ||
@@ -500,7 +500,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, | |||
500 | } | 500 | } |
501 | 501 | ||
502 | atomic_inc(&write_wc->wc_num_reqs); | 502 | atomic_inc(&write_wc->wc_num_reqs); |
503 | submit_bio(WRITE, bio); | 503 | submit_bio(WRITE_SYNC, bio); |
504 | 504 | ||
505 | status = 0; | 505 | status = 0; |
506 | bail: | 506 | bail: |
@@ -2271,7 +2271,7 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | |||
2271 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | 2271 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) |
2272 | continue; | 2272 | continue; |
2273 | 2273 | ||
2274 | ret = o2hb_global_hearbeat_mode_set(i); | 2274 | ret = o2hb_global_heartbeat_mode_set(i); |
2275 | if (!ret) | 2275 | if (!ret) |
2276 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | 2276 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", |
2277 | o2hb_heartbeat_mode_desc[i]); | 2277 | o2hb_heartbeat_mode_desc[i]); |
@@ -2304,7 +2304,7 @@ static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | |||
2304 | NULL, | 2304 | NULL, |
2305 | }; | 2305 | }; |
2306 | 2306 | ||
2307 | static struct configfs_item_operations o2hb_hearbeat_group_item_ops = { | 2307 | static struct configfs_item_operations o2hb_heartbeat_group_item_ops = { |
2308 | .show_attribute = o2hb_heartbeat_group_show, | 2308 | .show_attribute = o2hb_heartbeat_group_show, |
2309 | .store_attribute = o2hb_heartbeat_group_store, | 2309 | .store_attribute = o2hb_heartbeat_group_store, |
2310 | }; | 2310 | }; |
@@ -2316,7 +2316,7 @@ static struct configfs_group_operations o2hb_heartbeat_group_group_ops = { | |||
2316 | 2316 | ||
2317 | static struct config_item_type o2hb_heartbeat_group_type = { | 2317 | static struct config_item_type o2hb_heartbeat_group_type = { |
2318 | .ct_group_ops = &o2hb_heartbeat_group_group_ops, | 2318 | .ct_group_ops = &o2hb_heartbeat_group_group_ops, |
2319 | .ct_item_ops = &o2hb_hearbeat_group_item_ops, | 2319 | .ct_item_ops = &o2hb_heartbeat_group_item_ops, |
2320 | .ct_attrs = o2hb_heartbeat_group_attrs, | 2320 | .ct_attrs = o2hb_heartbeat_group_attrs, |
2321 | .ct_owner = THIS_MODULE, | 2321 | .ct_owner = THIS_MODULE, |
2322 | }; | 2322 | }; |
@@ -2389,6 +2389,9 @@ static int o2hb_region_pin(const char *region_uuid) | |||
2389 | assert_spin_locked(&o2hb_live_lock); | 2389 | assert_spin_locked(&o2hb_live_lock); |
2390 | 2390 | ||
2391 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2391 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
2392 | if (reg->hr_item_dropped) | ||
2393 | continue; | ||
2394 | |||
2392 | uuid = config_item_name(®->hr_item); | 2395 | uuid = config_item_name(®->hr_item); |
2393 | 2396 | ||
2394 | /* local heartbeat */ | 2397 | /* local heartbeat */ |
@@ -2439,6 +2442,9 @@ static void o2hb_region_unpin(const char *region_uuid) | |||
2439 | assert_spin_locked(&o2hb_live_lock); | 2442 | assert_spin_locked(&o2hb_live_lock); |
2440 | 2443 | ||
2441 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2444 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
2445 | if (reg->hr_item_dropped) | ||
2446 | continue; | ||
2447 | |||
2442 | uuid = config_item_name(®->hr_item); | 2448 | uuid = config_item_name(®->hr_item); |
2443 | if (region_uuid) { | 2449 | if (region_uuid) { |
2444 | if (strcmp(region_uuid, uuid)) | 2450 | if (strcmp(region_uuid, uuid)) |
@@ -2654,6 +2660,9 @@ int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | |||
2654 | 2660 | ||
2655 | p = region_uuids; | 2661 | p = region_uuids; |
2656 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | 2662 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { |
2663 | if (reg->hr_item_dropped) | ||
2664 | continue; | ||
2665 | |||
2657 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | 2666 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); |
2658 | if (numregs < max_regions) { | 2667 | if (numregs < max_regions) { |
2659 | memcpy(p, config_item_name(®->hr_item), | 2668 | memcpy(p, config_item_name(®->hr_item), |
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index c19897d0fe14..1ec141e758d7 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c | |||
@@ -264,7 +264,7 @@ void o2quo_hb_still_up(u8 node) | |||
264 | /* This is analogous to hb_up. as a node's connection comes up we delay the | 264 | /* This is analogous to hb_up. as a node's connection comes up we delay the |
265 | * quorum decision until we see it heartbeating. the hold will be droped in | 265 | * quorum decision until we see it heartbeating. the hold will be droped in |
266 | * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if | 266 | * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if |
267 | * it's already heartbeating we we might be dropping a hold that conn_up got. | 267 | * it's already heartbeating we might be dropping a hold that conn_up got. |
268 | * */ | 268 | * */ |
269 | void o2quo_conn_up(u8 node) | 269 | void o2quo_conn_up(u8 node) |
270 | { | 270 | { |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa88bd8bcedc..d644dc611425 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -406,6 +406,9 @@ static void sc_kref_release(struct kref *kref) | |||
406 | sc->sc_node = NULL; | 406 | sc->sc_node = NULL; |
407 | 407 | ||
408 | o2net_debug_del_sc(sc); | 408 | o2net_debug_del_sc(sc); |
409 | |||
410 | if (sc->sc_page) | ||
411 | __free_page(sc->sc_page); | ||
409 | kfree(sc); | 412 | kfree(sc); |
410 | } | 413 | } |
411 | 414 | ||
@@ -630,19 +633,19 @@ static void o2net_state_change(struct sock *sk) | |||
630 | state_change = sc->sc_state_change; | 633 | state_change = sc->sc_state_change; |
631 | 634 | ||
632 | switch(sk->sk_state) { | 635 | switch(sk->sk_state) { |
633 | /* ignore connecting sockets as they make progress */ | 636 | /* ignore connecting sockets as they make progress */ |
634 | case TCP_SYN_SENT: | 637 | case TCP_SYN_SENT: |
635 | case TCP_SYN_RECV: | 638 | case TCP_SYN_RECV: |
636 | break; | 639 | break; |
637 | case TCP_ESTABLISHED: | 640 | case TCP_ESTABLISHED: |
638 | o2net_sc_queue_work(sc, &sc->sc_connect_work); | 641 | o2net_sc_queue_work(sc, &sc->sc_connect_work); |
639 | break; | 642 | break; |
640 | default: | 643 | default: |
641 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT | 644 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT |
642 | " shutdown, state %d\n", | 645 | " shutdown, state %d\n", |
643 | SC_NODEF_ARGS(sc), sk->sk_state); | 646 | SC_NODEF_ARGS(sc), sk->sk_state); |
644 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 647 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
645 | break; | 648 | break; |
646 | } | 649 | } |
647 | out: | 650 | out: |
648 | read_unlock(&sk->sk_callback_lock); | 651 | read_unlock(&sk->sk_callback_lock); |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 975810b98492..47e67c2d228f 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
@@ -178,6 +178,7 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, | |||
178 | lock->ml.node); | 178 | lock->ml.node); |
179 | } | 179 | } |
180 | } else { | 180 | } else { |
181 | status = DLM_NORMAL; | ||
181 | dlm_lock_get(lock); | 182 | dlm_lock_get(lock); |
182 | list_add_tail(&lock->list, &res->blocked); | 183 | list_add_tail(&lock->list, &res->blocked); |
183 | kick_thread = 1; | 184 | kick_thread = 1; |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index e68588e6b1e8..773bd32bfd8c 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -55,9 +55,6 @@ | |||
55 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); | 55 | static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); |
56 | 56 | ||
57 | static int dlm_recovery_thread(void *data); | 57 | static int dlm_recovery_thread(void *data); |
58 | void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); | ||
59 | int dlm_launch_recovery_thread(struct dlm_ctxt *dlm); | ||
60 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); | ||
61 | static int dlm_do_recovery(struct dlm_ctxt *dlm); | 58 | static int dlm_do_recovery(struct dlm_ctxt *dlm); |
62 | 59 | ||
63 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); | 60 | static int dlm_pick_recovery_master(struct dlm_ctxt *dlm); |
@@ -789,7 +786,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
789 | u8 dead_node) | 786 | u8 dead_node) |
790 | { | 787 | { |
791 | struct dlm_lock_request lr; | 788 | struct dlm_lock_request lr; |
792 | enum dlm_status ret; | 789 | int ret; |
793 | 790 | ||
794 | mlog(0, "\n"); | 791 | mlog(0, "\n"); |
795 | 792 | ||
@@ -802,7 +799,6 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
802 | lr.dead_node = dead_node; | 799 | lr.dead_node = dead_node; |
803 | 800 | ||
804 | // send message | 801 | // send message |
805 | ret = DLM_NOLOCKMGR; | ||
806 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, | 802 | ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, |
807 | &lr, sizeof(lr), request_from, NULL); | 803 | &lr, sizeof(lr), request_from, NULL); |
808 | 804 | ||
@@ -2696,6 +2692,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2696 | dlm->name, br->node_idx, br->dead_node, | 2692 | dlm->name, br->node_idx, br->dead_node, |
2697 | dlm->reco.dead_node, dlm->reco.new_master); | 2693 | dlm->reco.dead_node, dlm->reco.new_master); |
2698 | spin_unlock(&dlm->spinlock); | 2694 | spin_unlock(&dlm->spinlock); |
2695 | dlm_put(dlm); | ||
2699 | return -EAGAIN; | 2696 | return -EAGAIN; |
2700 | } | 2697 | } |
2701 | spin_unlock(&dlm->spinlock); | 2698 | spin_unlock(&dlm->spinlock); |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index a3385b63ff5e..96f9ac237e86 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -200,7 +200,6 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb); | |||
200 | 200 | ||
201 | static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) | 201 | static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) |
202 | { | 202 | { |
203 | atomic_set(&osb->needs_checkpoint, 1); | ||
204 | wake_up(&osb->checkpoint_event); | 203 | wake_up(&osb->checkpoint_event); |
205 | } | 204 | } |
206 | 205 | ||
@@ -538,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, | |||
538 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); | 537 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); |
539 | 538 | ||
540 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + | 539 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + |
541 | ocfs2_quota_trans_credits(sb); | 540 | ocfs2_quota_trans_credits(sb) + bits_wanted; |
542 | } | 541 | } |
543 | 542 | ||
544 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) | 543 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b4a5cdf9dbc5..be3f8676a438 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -522,7 +522,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
522 | 522 | ||
523 | fe->i_last_eb_blk = 0; | 523 | fe->i_last_eb_blk = 0; |
524 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); | 524 | strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); |
525 | le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL); | 525 | fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); |
526 | fe->i_atime = fe->i_ctime = fe->i_mtime = | 526 | fe->i_atime = fe->i_ctime = fe->i_mtime = |
527 | cpu_to_le64(CURRENT_TIME.tv_sec); | 527 | cpu_to_le64(CURRENT_TIME.tv_sec); |
528 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = | 528 | fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = |
@@ -773,7 +773,7 @@ static int ocfs2_remote_dentry_delete(struct dentry *dentry) | |||
773 | return ret; | 773 | return ret; |
774 | } | 774 | } |
775 | 775 | ||
776 | static inline int inode_is_unlinkable(struct inode *inode) | 776 | static inline int ocfs2_inode_is_unlinkable(struct inode *inode) |
777 | { | 777 | { |
778 | if (S_ISDIR(inode->i_mode)) { | 778 | if (S_ISDIR(inode->i_mode)) { |
779 | if (inode->i_nlink == 2) | 779 | if (inode->i_nlink == 2) |
@@ -791,6 +791,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
791 | { | 791 | { |
792 | int status; | 792 | int status; |
793 | int child_locked = 0; | 793 | int child_locked = 0; |
794 | bool is_unlinkable = false; | ||
794 | struct inode *inode = dentry->d_inode; | 795 | struct inode *inode = dentry->d_inode; |
795 | struct inode *orphan_dir = NULL; | 796 | struct inode *orphan_dir = NULL; |
796 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | 797 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); |
@@ -865,7 +866,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
865 | goto leave; | 866 | goto leave; |
866 | } | 867 | } |
867 | 868 | ||
868 | if (inode_is_unlinkable(inode)) { | 869 | if (ocfs2_inode_is_unlinkable(inode)) { |
869 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | 870 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, |
870 | OCFS2_I(inode)->ip_blkno, | 871 | OCFS2_I(inode)->ip_blkno, |
871 | orphan_name, &orphan_insert); | 872 | orphan_name, &orphan_insert); |
@@ -873,6 +874,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
873 | mlog_errno(status); | 874 | mlog_errno(status); |
874 | goto leave; | 875 | goto leave; |
875 | } | 876 | } |
877 | is_unlinkable = true; | ||
876 | } | 878 | } |
877 | 879 | ||
878 | handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); | 880 | handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); |
@@ -892,15 +894,6 @@ static int ocfs2_unlink(struct inode *dir, | |||
892 | 894 | ||
893 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 895 | fe = (struct ocfs2_dinode *) fe_bh->b_data; |
894 | 896 | ||
895 | if (inode_is_unlinkable(inode)) { | ||
896 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name, | ||
897 | &orphan_insert, orphan_dir); | ||
898 | if (status < 0) { | ||
899 | mlog_errno(status); | ||
900 | goto leave; | ||
901 | } | ||
902 | } | ||
903 | |||
904 | /* delete the name from the parent dir */ | 897 | /* delete the name from the parent dir */ |
905 | status = ocfs2_delete_entry(handle, dir, &lookup); | 898 | status = ocfs2_delete_entry(handle, dir, &lookup); |
906 | if (status < 0) { | 899 | if (status < 0) { |
@@ -923,6 +916,14 @@ static int ocfs2_unlink(struct inode *dir, | |||
923 | mlog_errno(status); | 916 | mlog_errno(status); |
924 | if (S_ISDIR(inode->i_mode)) | 917 | if (S_ISDIR(inode->i_mode)) |
925 | inc_nlink(dir); | 918 | inc_nlink(dir); |
919 | goto leave; | ||
920 | } | ||
921 | |||
922 | if (is_unlinkable) { | ||
923 | status = ocfs2_orphan_add(osb, handle, inode, fe_bh, | ||
924 | orphan_name, &orphan_insert, orphan_dir); | ||
925 | if (status < 0) | ||
926 | mlog_errno(status); | ||
926 | } | 927 | } |
927 | 928 | ||
928 | leave: | 929 | leave: |
@@ -2012,6 +2013,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2012 | goto leave; | 2013 | goto leave; |
2013 | } | 2014 | } |
2014 | 2015 | ||
2016 | /* | ||
2017 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
2018 | * It's safe anyway, though some callers may duplicate the journaling. | ||
2019 | * Journaling within the func just make the logic look more | ||
2020 | * straightforward. | ||
2021 | */ | ||
2022 | status = ocfs2_journal_access_di(handle, | ||
2023 | INODE_CACHE(inode), | ||
2024 | fe_bh, | ||
2025 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2026 | if (status < 0) { | ||
2027 | mlog_errno(status); | ||
2028 | goto leave; | ||
2029 | } | ||
2030 | |||
2015 | /* we're a cluster, and nlink can change on disk from | 2031 | /* we're a cluster, and nlink can change on disk from |
2016 | * underneath us... */ | 2032 | * underneath us... */ |
2017 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; | 2033 | orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; |
@@ -2026,25 +2042,10 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2026 | orphan_dir_bh, lookup); | 2042 | orphan_dir_bh, lookup); |
2027 | if (status < 0) { | 2043 | if (status < 0) { |
2028 | mlog_errno(status); | 2044 | mlog_errno(status); |
2029 | goto leave; | 2045 | goto rollback; |
2030 | } | ||
2031 | |||
2032 | /* | ||
2033 | * We're going to journal the change of i_flags and i_orphaned_slot. | ||
2034 | * It's safe anyway, though some callers may duplicate the journaling. | ||
2035 | * Journaling within the func just make the logic look more | ||
2036 | * straightforward. | ||
2037 | */ | ||
2038 | status = ocfs2_journal_access_di(handle, | ||
2039 | INODE_CACHE(inode), | ||
2040 | fe_bh, | ||
2041 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2042 | if (status < 0) { | ||
2043 | mlog_errno(status); | ||
2044 | goto leave; | ||
2045 | } | 2046 | } |
2046 | 2047 | ||
2047 | le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); | 2048 | fe->i_flags |= cpu_to_le32(OCFS2_ORPHANED_FL); |
2048 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; | 2049 | OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR; |
2049 | 2050 | ||
2050 | /* Record which orphan dir our inode now resides | 2051 | /* Record which orphan dir our inode now resides |
@@ -2057,11 +2058,16 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
2057 | trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, | 2058 | trace_ocfs2_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, |
2058 | osb->slot_num); | 2059 | osb->slot_num); |
2059 | 2060 | ||
2061 | rollback: | ||
2062 | if (status < 0) { | ||
2063 | if (S_ISDIR(inode->i_mode)) | ||
2064 | ocfs2_add_links_count(orphan_fe, -1); | ||
2065 | set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); | ||
2066 | } | ||
2067 | |||
2060 | leave: | 2068 | leave: |
2061 | brelse(orphan_dir_bh); | 2069 | brelse(orphan_dir_bh); |
2062 | 2070 | ||
2063 | if (status) | ||
2064 | mlog_errno(status); | ||
2065 | return status; | 2071 | return status; |
2066 | } | 2072 | } |
2067 | 2073 | ||
@@ -2434,7 +2440,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
2434 | } | 2440 | } |
2435 | 2441 | ||
2436 | di = (struct ocfs2_dinode *)di_bh->b_data; | 2442 | di = (struct ocfs2_dinode *)di_bh->b_data; |
2437 | le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); | 2443 | di->i_flags &= ~cpu_to_le32(OCFS2_ORPHANED_FL); |
2438 | di->i_orphaned_slot = 0; | 2444 | di->i_orphaned_slot = 0; |
2439 | set_nlink(inode, 1); | 2445 | set_nlink(inode, 1); |
2440 | ocfs2_set_links_count(di, inode->i_nlink); | 2446 | ocfs2_set_links_count(di, inode->i_nlink); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d355e6e36b36..3a903470c794 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -347,7 +347,6 @@ struct ocfs2_super | |||
347 | struct task_struct *recovery_thread_task; | 347 | struct task_struct *recovery_thread_task; |
348 | int disable_recovery; | 348 | int disable_recovery; |
349 | wait_queue_head_t checkpoint_event; | 349 | wait_queue_head_t checkpoint_event; |
350 | atomic_t needs_checkpoint; | ||
351 | struct ocfs2_journal *journal; | 350 | struct ocfs2_journal *journal; |
352 | unsigned long osb_commit_interval; | 351 | unsigned long osb_commit_interval; |
353 | 352 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b7e74b580c0f..5397c07ce608 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -1422,7 +1422,7 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
1422 | int status; | 1422 | int status; |
1423 | /* there is a really tiny chance the journal calls could fail, | 1423 | /* there is a really tiny chance the journal calls could fail, |
1424 | * but we wouldn't want inconsistent blocks in *any* case. */ | 1424 | * but we wouldn't want inconsistent blocks in *any* case. */ |
1425 | u64 fe_ptr, bg_ptr, prev_bg_ptr; | 1425 | u64 bg_ptr, prev_bg_ptr; |
1426 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1426 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; |
1427 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; | 1427 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; |
1428 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; | 1428 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; |
@@ -1437,51 +1437,44 @@ static int ocfs2_relink_block_group(handle_t *handle, | |||
1437 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | 1437 | (unsigned long long)le64_to_cpu(bg->bg_blkno), |
1438 | (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); | 1438 | (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); |
1439 | 1439 | ||
1440 | fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); | ||
1441 | bg_ptr = le64_to_cpu(bg->bg_next_group); | 1440 | bg_ptr = le64_to_cpu(bg->bg_next_group); |
1442 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); | 1441 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); |
1443 | 1442 | ||
1444 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1443 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1445 | prev_bg_bh, | 1444 | prev_bg_bh, |
1446 | OCFS2_JOURNAL_ACCESS_WRITE); | 1445 | OCFS2_JOURNAL_ACCESS_WRITE); |
1447 | if (status < 0) { | 1446 | if (status < 0) |
1448 | mlog_errno(status); | 1447 | goto out; |
1449 | goto out_rollback; | ||
1450 | } | ||
1451 | 1448 | ||
1452 | prev_bg->bg_next_group = bg->bg_next_group; | 1449 | prev_bg->bg_next_group = bg->bg_next_group; |
1453 | ocfs2_journal_dirty(handle, prev_bg_bh); | 1450 | ocfs2_journal_dirty(handle, prev_bg_bh); |
1454 | 1451 | ||
1455 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), | 1452 | status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), |
1456 | bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 1453 | bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1457 | if (status < 0) { | 1454 | if (status < 0) |
1458 | mlog_errno(status); | 1455 | goto out_rollback_prev_bg; |
1459 | goto out_rollback; | ||
1460 | } | ||
1461 | 1456 | ||
1462 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; | 1457 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; |
1463 | ocfs2_journal_dirty(handle, bg_bh); | 1458 | ocfs2_journal_dirty(handle, bg_bh); |
1464 | 1459 | ||
1465 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), | 1460 | status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), |
1466 | fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); | 1461 | fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
1467 | if (status < 0) { | 1462 | if (status < 0) |
1468 | mlog_errno(status); | 1463 | goto out_rollback_bg; |
1469 | goto out_rollback; | ||
1470 | } | ||
1471 | 1464 | ||
1472 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; | 1465 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; |
1473 | ocfs2_journal_dirty(handle, fe_bh); | 1466 | ocfs2_journal_dirty(handle, fe_bh); |
1474 | 1467 | ||
1475 | out_rollback: | 1468 | out: |
1476 | if (status < 0) { | 1469 | if (status < 0) |
1477 | fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); | ||
1478 | bg->bg_next_group = cpu_to_le64(bg_ptr); | ||
1479 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); | ||
1480 | } | ||
1481 | |||
1482 | if (status) | ||
1483 | mlog_errno(status); | 1470 | mlog_errno(status); |
1484 | return status; | 1471 | return status; |
1472 | |||
1473 | out_rollback_bg: | ||
1474 | bg->bg_next_group = cpu_to_le64(bg_ptr); | ||
1475 | out_rollback_prev_bg: | ||
1476 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); | ||
1477 | goto out; | ||
1485 | } | 1478 | } |
1486 | 1479 | ||
1487 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | 1480 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01b85165552b..854d80955bf8 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -286,10 +286,9 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) | |||
286 | spin_unlock(&osb->osb_lock); | 286 | spin_unlock(&osb->osb_lock); |
287 | 287 | ||
288 | out += snprintf(buf + out, len - out, | 288 | out += snprintf(buf + out, len - out, |
289 | "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", | 289 | "%10s => Pid: %d Interval: %lu\n", "Commit", |
290 | (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), | 290 | (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), |
291 | osb->osb_commit_interval, | 291 | osb->osb_commit_interval); |
292 | atomic_read(&osb->needs_checkpoint)); | ||
293 | 292 | ||
294 | out += snprintf(buf + out, len - out, | 293 | out += snprintf(buf + out, len - out, |
295 | "%10s => State: %d TxnId: %lu NumTxns: %d\n", | 294 | "%10s => State: %d TxnId: %lu NumTxns: %d\n", |
@@ -2154,7 +2153,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2154 | } | 2153 | } |
2155 | 2154 | ||
2156 | init_waitqueue_head(&osb->checkpoint_event); | 2155 | init_waitqueue_head(&osb->checkpoint_event); |
2157 | atomic_set(&osb->needs_checkpoint, 0); | ||
2158 | 2156 | ||
2159 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 2157 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
2160 | 2158 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea308c144..317ef0abccbb 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -2751,7 +2751,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
2751 | { | 2751 | { |
2752 | int ret; | 2752 | int ret; |
2753 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 2753 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
2754 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
2755 | struct ocfs2_xa_loc loc; | 2754 | struct ocfs2_xa_loc loc; |
2756 | 2755 | ||
2757 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | 2756 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) |
@@ -2759,13 +2758,6 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, | |||
2759 | 2758 | ||
2760 | down_write(&oi->ip_alloc_sem); | 2759 | down_write(&oi->ip_alloc_sem); |
2761 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | 2760 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { |
2762 | if (!ocfs2_xattr_has_space_inline(inode, di)) { | ||
2763 | ret = -ENOSPC; | ||
2764 | goto out; | ||
2765 | } | ||
2766 | } | ||
2767 | |||
2768 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
2769 | ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); | 2761 | ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt); |
2770 | if (ret) { | 2762 | if (ret) { |
2771 | if (ret != -ENOSPC) | 2763 | if (ret != -ENOSPC) |
@@ -6499,6 +6491,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) | |||
6499 | } | 6491 | } |
6500 | 6492 | ||
6501 | new_oi = OCFS2_I(args->new_inode); | 6493 | new_oi = OCFS2_I(args->new_inode); |
6494 | /* | ||
6495 | * Adjust extent record count to reserve space for extended attribute. | ||
6496 | * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). | ||
6497 | */ | ||
6498 | if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && | ||
6499 | !(ocfs2_inode_is_fast_symlink(args->new_inode))) { | ||
6500 | struct ocfs2_extent_list *el = &new_di->id2.i_list; | ||
6501 | le16_add_cpu(&el->l_count, -(inline_size / | ||
6502 | sizeof(struct ocfs2_extent_rec))); | ||
6503 | } | ||
6502 | spin_lock(&new_oi->ip_lock); | 6504 | spin_lock(&new_oi->ip_lock); |
6503 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; | 6505 | new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; |
6504 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); | 6506 | new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0a22194e5d58..06ea155e1a59 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) | |||
408 | prpsinfo.pr_zomb = 0; | 408 | prpsinfo.pr_zomb = 0; |
409 | 409 | ||
410 | strcpy(prpsinfo.pr_fname, "vmlinux"); | 410 | strcpy(prpsinfo.pr_fname, "vmlinux"); |
411 | strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); | 411 | strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); |
412 | 412 | ||
413 | nhdr->p_filesz += notesize(¬es[1]); | 413 | nhdr->p_filesz += notesize(¬es[1]); |
414 | bufp = storenote(¬es[1], bufp); | 414 | bufp = storenote(¬es[1], bufp); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..dbf61f6174f0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/rmap.h> | 11 | #include <linux/rmap.h> |
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/swapops.h> | 13 | #include <linux/swapops.h> |
14 | #include <linux/mmu_notifier.h> | ||
14 | 15 | ||
15 | #include <asm/elf.h> | 16 | #include <asm/elf.h> |
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
@@ -688,10 +689,58 @@ const struct file_operations proc_tid_smaps_operations = { | |||
688 | .release = seq_release_private, | 689 | .release = seq_release_private, |
689 | }; | 690 | }; |
690 | 691 | ||
692 | /* | ||
693 | * We do not want to have constant page-shift bits sitting in | ||
694 | * pagemap entries and are about to reuse them some time soon. | ||
695 | * | ||
696 | * Here's the "migration strategy": | ||
697 | * 1. when the system boots these bits remain what they are, | ||
698 | * but a warning about future change is printed in log; | ||
699 | * 2. once anyone clears soft-dirty bits via clear_refs file, | ||
700 | * these flag is set to denote, that user is aware of the | ||
701 | * new API and those page-shift bits change their meaning. | ||
702 | * The respective warning is printed in dmesg; | ||
703 | * 3. In a couple of releases we will remove all the mentions | ||
704 | * of page-shift in pagemap entries. | ||
705 | */ | ||
706 | |||
707 | static bool soft_dirty_cleared __read_mostly; | ||
708 | |||
709 | enum clear_refs_types { | ||
710 | CLEAR_REFS_ALL = 1, | ||
711 | CLEAR_REFS_ANON, | ||
712 | CLEAR_REFS_MAPPED, | ||
713 | CLEAR_REFS_SOFT_DIRTY, | ||
714 | CLEAR_REFS_LAST, | ||
715 | }; | ||
716 | |||
717 | struct clear_refs_private { | ||
718 | struct vm_area_struct *vma; | ||
719 | enum clear_refs_types type; | ||
720 | }; | ||
721 | |||
722 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | ||
723 | unsigned long addr, pte_t *pte) | ||
724 | { | ||
725 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
726 | /* | ||
727 | * The soft-dirty tracker uses #PF-s to catch writes | ||
728 | * to pages, so write-protect the pte as well. See the | ||
729 | * Documentation/vm/soft-dirty.txt for full description | ||
730 | * of how soft-dirty works. | ||
731 | */ | ||
732 | pte_t ptent = *pte; | ||
733 | ptent = pte_wrprotect(ptent); | ||
734 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | ||
735 | set_pte_at(vma->vm_mm, addr, pte, ptent); | ||
736 | #endif | ||
737 | } | ||
738 | |||
691 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 739 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
692 | unsigned long end, struct mm_walk *walk) | 740 | unsigned long end, struct mm_walk *walk) |
693 | { | 741 | { |
694 | struct vm_area_struct *vma = walk->private; | 742 | struct clear_refs_private *cp = walk->private; |
743 | struct vm_area_struct *vma = cp->vma; | ||
695 | pte_t *pte, ptent; | 744 | pte_t *pte, ptent; |
696 | spinlock_t *ptl; | 745 | spinlock_t *ptl; |
697 | struct page *page; | 746 | struct page *page; |
@@ -706,6 +755,11 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
706 | if (!pte_present(ptent)) | 755 | if (!pte_present(ptent)) |
707 | continue; | 756 | continue; |
708 | 757 | ||
758 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | ||
759 | clear_soft_dirty(vma, addr, pte); | ||
760 | continue; | ||
761 | } | ||
762 | |||
709 | page = vm_normal_page(vma, addr, ptent); | 763 | page = vm_normal_page(vma, addr, ptent); |
710 | if (!page) | 764 | if (!page) |
711 | continue; | 765 | continue; |
@@ -719,10 +773,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
719 | return 0; | 773 | return 0; |
720 | } | 774 | } |
721 | 775 | ||
722 | #define CLEAR_REFS_ALL 1 | ||
723 | #define CLEAR_REFS_ANON 2 | ||
724 | #define CLEAR_REFS_MAPPED 3 | ||
725 | |||
726 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 776 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
727 | size_t count, loff_t *ppos) | 777 | size_t count, loff_t *ppos) |
728 | { | 778 | { |
@@ -730,7 +780,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
730 | char buffer[PROC_NUMBUF]; | 780 | char buffer[PROC_NUMBUF]; |
731 | struct mm_struct *mm; | 781 | struct mm_struct *mm; |
732 | struct vm_area_struct *vma; | 782 | struct vm_area_struct *vma; |
733 | int type; | 783 | enum clear_refs_types type; |
784 | int itype; | ||
734 | int rv; | 785 | int rv; |
735 | 786 | ||
736 | memset(buffer, 0, sizeof(buffer)); | 787 | memset(buffer, 0, sizeof(buffer)); |
@@ -738,23 +789,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
738 | count = sizeof(buffer) - 1; | 789 | count = sizeof(buffer) - 1; |
739 | if (copy_from_user(buffer, buf, count)) | 790 | if (copy_from_user(buffer, buf, count)) |
740 | return -EFAULT; | 791 | return -EFAULT; |
741 | rv = kstrtoint(strstrip(buffer), 10, &type); | 792 | rv = kstrtoint(strstrip(buffer), 10, &itype); |
742 | if (rv < 0) | 793 | if (rv < 0) |
743 | return rv; | 794 | return rv; |
744 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) | 795 | type = (enum clear_refs_types)itype; |
796 | if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) | ||
745 | return -EINVAL; | 797 | return -EINVAL; |
798 | |||
799 | if (type == CLEAR_REFS_SOFT_DIRTY) { | ||
800 | soft_dirty_cleared = true; | ||
801 | pr_warn_once("The pagemap bits 55-60 has changed their meaning! " | ||
802 | "See the linux/Documentation/vm/pagemap.txt for details.\n"); | ||
803 | } | ||
804 | |||
746 | task = get_proc_task(file_inode(file)); | 805 | task = get_proc_task(file_inode(file)); |
747 | if (!task) | 806 | if (!task) |
748 | return -ESRCH; | 807 | return -ESRCH; |
749 | mm = get_task_mm(task); | 808 | mm = get_task_mm(task); |
750 | if (mm) { | 809 | if (mm) { |
810 | struct clear_refs_private cp = { | ||
811 | .type = type, | ||
812 | }; | ||
751 | struct mm_walk clear_refs_walk = { | 813 | struct mm_walk clear_refs_walk = { |
752 | .pmd_entry = clear_refs_pte_range, | 814 | .pmd_entry = clear_refs_pte_range, |
753 | .mm = mm, | 815 | .mm = mm, |
816 | .private = &cp, | ||
754 | }; | 817 | }; |
755 | down_read(&mm->mmap_sem); | 818 | down_read(&mm->mmap_sem); |
819 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
820 | mmu_notifier_invalidate_range_start(mm, 0, -1); | ||
756 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 821 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
757 | clear_refs_walk.private = vma; | 822 | cp.vma = vma; |
758 | if (is_vm_hugetlb_page(vma)) | 823 | if (is_vm_hugetlb_page(vma)) |
759 | continue; | 824 | continue; |
760 | /* | 825 | /* |
@@ -773,6 +838,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
773 | walk_page_range(vma->vm_start, vma->vm_end, | 838 | walk_page_range(vma->vm_start, vma->vm_end, |
774 | &clear_refs_walk); | 839 | &clear_refs_walk); |
775 | } | 840 | } |
841 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
842 | mmu_notifier_invalidate_range_end(mm, 0, -1); | ||
776 | flush_tlb_mm(mm); | 843 | flush_tlb_mm(mm); |
777 | up_read(&mm->mmap_sem); | 844 | up_read(&mm->mmap_sem); |
778 | mmput(mm); | 845 | mmput(mm); |
@@ -794,6 +861,7 @@ typedef struct { | |||
794 | struct pagemapread { | 861 | struct pagemapread { |
795 | int pos, len; | 862 | int pos, len; |
796 | pagemap_entry_t *buffer; | 863 | pagemap_entry_t *buffer; |
864 | bool v2; | ||
797 | }; | 865 | }; |
798 | 866 | ||
799 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 867 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
@@ -807,14 +875,17 @@ struct pagemapread { | |||
807 | #define PM_PSHIFT_BITS 6 | 875 | #define PM_PSHIFT_BITS 6 |
808 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | 876 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) |
809 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | 877 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) |
810 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | 878 | #define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) |
811 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | 879 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) |
812 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | 880 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) |
881 | /* in "new" pagemap pshift bits are occupied with more status bits */ | ||
882 | #define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) | ||
813 | 883 | ||
884 | #define __PM_SOFT_DIRTY (1LL) | ||
814 | #define PM_PRESENT PM_STATUS(4LL) | 885 | #define PM_PRESENT PM_STATUS(4LL) |
815 | #define PM_SWAP PM_STATUS(2LL) | 886 | #define PM_SWAP PM_STATUS(2LL) |
816 | #define PM_FILE PM_STATUS(1LL) | 887 | #define PM_FILE PM_STATUS(1LL) |
817 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 888 | #define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) |
818 | #define PM_END_OF_BUFFER 1 | 889 | #define PM_END_OF_BUFFER 1 |
819 | 890 | ||
820 | static inline pagemap_entry_t make_pme(u64 val) | 891 | static inline pagemap_entry_t make_pme(u64 val) |
@@ -837,7 +908,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
837 | struct pagemapread *pm = walk->private; | 908 | struct pagemapread *pm = walk->private; |
838 | unsigned long addr; | 909 | unsigned long addr; |
839 | int err = 0; | 910 | int err = 0; |
840 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 911 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
841 | 912 | ||
842 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 913 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
843 | err = add_to_pagemap(addr, &pme, pm); | 914 | err = add_to_pagemap(addr, &pme, pm); |
@@ -847,11 +918,12 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
847 | return err; | 918 | return err; |
848 | } | 919 | } |
849 | 920 | ||
850 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, | 921 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
851 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) | 922 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
852 | { | 923 | { |
853 | u64 frame, flags; | 924 | u64 frame, flags; |
854 | struct page *page = NULL; | 925 | struct page *page = NULL; |
926 | int flags2 = 0; | ||
855 | 927 | ||
856 | if (pte_present(pte)) { | 928 | if (pte_present(pte)) { |
857 | frame = pte_pfn(pte); | 929 | frame = pte_pfn(pte); |
@@ -866,19 +938,21 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, | |||
866 | if (is_migration_entry(entry)) | 938 | if (is_migration_entry(entry)) |
867 | page = migration_entry_to_page(entry); | 939 | page = migration_entry_to_page(entry); |
868 | } else { | 940 | } else { |
869 | *pme = make_pme(PM_NOT_PRESENT); | 941 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
870 | return; | 942 | return; |
871 | } | 943 | } |
872 | 944 | ||
873 | if (page && !PageAnon(page)) | 945 | if (page && !PageAnon(page)) |
874 | flags |= PM_FILE; | 946 | flags |= PM_FILE; |
947 | if (pte_soft_dirty(pte)) | ||
948 | flags2 |= __PM_SOFT_DIRTY; | ||
875 | 949 | ||
876 | *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); | 950 | *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); |
877 | } | 951 | } |
878 | 952 | ||
879 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 953 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
880 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 954 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
881 | pmd_t pmd, int offset) | 955 | pmd_t pmd, int offset, int pmd_flags2) |
882 | { | 956 | { |
883 | /* | 957 | /* |
884 | * Currently pmd for thp is always present because thp can not be | 958 | * Currently pmd for thp is always present because thp can not be |
@@ -887,13 +961,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | |||
887 | */ | 961 | */ |
888 | if (pmd_present(pmd)) | 962 | if (pmd_present(pmd)) |
889 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | 963 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) |
890 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 964 | | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); |
891 | else | 965 | else |
892 | *pme = make_pme(PM_NOT_PRESENT); | 966 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
893 | } | 967 | } |
894 | #else | 968 | #else |
895 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 969 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
896 | pmd_t pmd, int offset) | 970 | pmd_t pmd, int offset, int pmd_flags2) |
897 | { | 971 | { |
898 | } | 972 | } |
899 | #endif | 973 | #endif |
@@ -905,17 +979,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
905 | struct pagemapread *pm = walk->private; | 979 | struct pagemapread *pm = walk->private; |
906 | pte_t *pte; | 980 | pte_t *pte; |
907 | int err = 0; | 981 | int err = 0; |
908 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 982 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
909 | 983 | ||
910 | /* find the first VMA at or above 'addr' */ | 984 | /* find the first VMA at or above 'addr' */ |
911 | vma = find_vma(walk->mm, addr); | 985 | vma = find_vma(walk->mm, addr); |
912 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | 986 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { |
987 | int pmd_flags2; | ||
988 | |||
989 | pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); | ||
913 | for (; addr != end; addr += PAGE_SIZE) { | 990 | for (; addr != end; addr += PAGE_SIZE) { |
914 | unsigned long offset; | 991 | unsigned long offset; |
915 | 992 | ||
916 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 993 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
917 | PAGE_SHIFT; | 994 | PAGE_SHIFT; |
918 | thp_pmd_to_pagemap_entry(&pme, *pmd, offset); | 995 | thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); |
919 | err = add_to_pagemap(addr, &pme, pm); | 996 | err = add_to_pagemap(addr, &pme, pm); |
920 | if (err) | 997 | if (err) |
921 | break; | 998 | break; |
@@ -932,7 +1009,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
932 | * and need a new, higher one */ | 1009 | * and need a new, higher one */ |
933 | if (vma && (addr >= vma->vm_end)) { | 1010 | if (vma && (addr >= vma->vm_end)) { |
934 | vma = find_vma(walk->mm, addr); | 1011 | vma = find_vma(walk->mm, addr); |
935 | pme = make_pme(PM_NOT_PRESENT); | 1012 | pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
936 | } | 1013 | } |
937 | 1014 | ||
938 | /* check that 'vma' actually covers this address, | 1015 | /* check that 'vma' actually covers this address, |
@@ -940,7 +1017,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
940 | if (vma && (vma->vm_start <= addr) && | 1017 | if (vma && (vma->vm_start <= addr) && |
941 | !is_vm_hugetlb_page(vma)) { | 1018 | !is_vm_hugetlb_page(vma)) { |
942 | pte = pte_offset_map(pmd, addr); | 1019 | pte = pte_offset_map(pmd, addr); |
943 | pte_to_pagemap_entry(&pme, vma, addr, *pte); | 1020 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
944 | /* unmap before userspace copy */ | 1021 | /* unmap before userspace copy */ |
945 | pte_unmap(pte); | 1022 | pte_unmap(pte); |
946 | } | 1023 | } |
@@ -955,14 +1032,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
955 | } | 1032 | } |
956 | 1033 | ||
957 | #ifdef CONFIG_HUGETLB_PAGE | 1034 | #ifdef CONFIG_HUGETLB_PAGE |
958 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, | 1035 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
959 | pte_t pte, int offset) | 1036 | pte_t pte, int offset) |
960 | { | 1037 | { |
961 | if (pte_present(pte)) | 1038 | if (pte_present(pte)) |
962 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | 1039 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
963 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 1040 | | PM_STATUS2(pm->v2, 0) | PM_PRESENT); |
964 | else | 1041 | else |
965 | *pme = make_pme(PM_NOT_PRESENT); | 1042 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
966 | } | 1043 | } |
967 | 1044 | ||
968 | /* This function walks within one hugetlb entry in the single call */ | 1045 | /* This function walks within one hugetlb entry in the single call */ |
@@ -976,7 +1053,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
976 | 1053 | ||
977 | for (; addr != end; addr += PAGE_SIZE) { | 1054 | for (; addr != end; addr += PAGE_SIZE) { |
978 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 1055 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
979 | huge_pte_to_pagemap_entry(&pme, *pte, offset); | 1056 | huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); |
980 | err = add_to_pagemap(addr, &pme, pm); | 1057 | err = add_to_pagemap(addr, &pme, pm); |
981 | if (err) | 1058 | if (err) |
982 | return err; | 1059 | return err; |
@@ -1038,6 +1115,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1038 | if (!count) | 1115 | if (!count) |
1039 | goto out_task; | 1116 | goto out_task; |
1040 | 1117 | ||
1118 | pm.v2 = soft_dirty_cleared; | ||
1041 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 1119 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
1042 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 1120 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); |
1043 | ret = -ENOMEM; | 1121 | ret = -ENOMEM; |
@@ -1110,9 +1188,18 @@ out: | |||
1110 | return ret; | 1188 | return ret; |
1111 | } | 1189 | } |
1112 | 1190 | ||
1191 | static int pagemap_open(struct inode *inode, struct file *file) | ||
1192 | { | ||
1193 | pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " | ||
1194 | "to stop being page-shift some time soon. See the " | ||
1195 | "linux/Documentation/vm/pagemap.txt for details.\n"); | ||
1196 | return 0; | ||
1197 | } | ||
1198 | |||
1113 | const struct file_operations proc_pagemap_operations = { | 1199 | const struct file_operations proc_pagemap_operations = { |
1114 | .llseek = mem_lseek, /* borrow this */ | 1200 | .llseek = mem_lseek, /* borrow this */ |
1115 | .read = pagemap_read, | 1201 | .read = pagemap_read, |
1202 | .open = pagemap_open, | ||
1116 | }; | 1203 | }; |
1117 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 1204 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
1118 | 1205 | ||
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 9610ac772d7e..061894625903 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c | |||
@@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) | |||
20 | for_each_possible_cpu(i) | 20 | for_each_possible_cpu(i) |
21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; | 21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; |
22 | 22 | ||
23 | do_posix_clock_monotonic_gettime(&uptime); | 23 | get_monotonic_boottime(&uptime); |
24 | monotonic_to_bootbased(&uptime); | ||
25 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; | 24 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; |
26 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); | 25 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); |
27 | idle.tv_nsec = rem; | 26 | idle.tv_nsec = rem; |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 17f7e080d7ff..28503172f2e4 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/crash_dump.h> | 21 | #include <linux/crash_dump.h> |
22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
25 | #include "internal.h" | 26 | #include "internal.h" |
@@ -32,6 +33,10 @@ static LIST_HEAD(vmcore_list); | |||
32 | /* Stores the pointer to the buffer containing kernel elf core headers. */ | 33 | /* Stores the pointer to the buffer containing kernel elf core headers. */ |
33 | static char *elfcorebuf; | 34 | static char *elfcorebuf; |
34 | static size_t elfcorebuf_sz; | 35 | static size_t elfcorebuf_sz; |
36 | static size_t elfcorebuf_sz_orig; | ||
37 | |||
38 | static char *elfnotes_buf; | ||
39 | static size_t elfnotes_sz; | ||
35 | 40 | ||
36 | /* Total size of vmcore file. */ | 41 | /* Total size of vmcore file. */ |
37 | static u64 vmcore_size; | 42 | static u64 vmcore_size; |
@@ -118,27 +123,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count, | |||
118 | return read; | 123 | return read; |
119 | } | 124 | } |
120 | 125 | ||
121 | /* Maps vmcore file offset to respective physical address in memroy. */ | ||
122 | static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list, | ||
123 | struct vmcore **m_ptr) | ||
124 | { | ||
125 | struct vmcore *m; | ||
126 | u64 paddr; | ||
127 | |||
128 | list_for_each_entry(m, vc_list, list) { | ||
129 | u64 start, end; | ||
130 | start = m->offset; | ||
131 | end = m->offset + m->size - 1; | ||
132 | if (offset >= start && offset <= end) { | ||
133 | paddr = m->paddr + offset - start; | ||
134 | *m_ptr = m; | ||
135 | return paddr; | ||
136 | } | ||
137 | } | ||
138 | *m_ptr = NULL; | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | /* Read from the ELF header and then the crash dump. On error, negative value is | 126 | /* Read from the ELF header and then the crash dump. On error, negative value is |
143 | * returned otherwise number of bytes read are returned. | 127 | * returned otherwise number of bytes read are returned. |
144 | */ | 128 | */ |
@@ -147,8 +131,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
147 | { | 131 | { |
148 | ssize_t acc = 0, tmp; | 132 | ssize_t acc = 0, tmp; |
149 | size_t tsz; | 133 | size_t tsz; |
150 | u64 start, nr_bytes; | 134 | u64 start; |
151 | struct vmcore *curr_m = NULL; | 135 | struct vmcore *m = NULL; |
152 | 136 | ||
153 | if (buflen == 0 || *fpos >= vmcore_size) | 137 | if (buflen == 0 || *fpos >= vmcore_size) |
154 | return 0; | 138 | return 0; |
@@ -159,9 +143,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
159 | 143 | ||
160 | /* Read ELF core header */ | 144 | /* Read ELF core header */ |
161 | if (*fpos < elfcorebuf_sz) { | 145 | if (*fpos < elfcorebuf_sz) { |
162 | tsz = elfcorebuf_sz - *fpos; | 146 | tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); |
163 | if (buflen < tsz) | ||
164 | tsz = buflen; | ||
165 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) | 147 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) |
166 | return -EFAULT; | 148 | return -EFAULT; |
167 | buflen -= tsz; | 149 | buflen -= tsz; |
@@ -174,39 +156,161 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
174 | return acc; | 156 | return acc; |
175 | } | 157 | } |
176 | 158 | ||
177 | start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); | 159 | /* Read Elf note segment */ |
178 | if (!curr_m) | 160 | if (*fpos < elfcorebuf_sz + elfnotes_sz) { |
179 | return -EINVAL; | 161 | void *kaddr; |
180 | |||
181 | while (buflen) { | ||
182 | tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); | ||
183 | 162 | ||
184 | /* Calculate left bytes in current memory segment. */ | 163 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); |
185 | nr_bytes = (curr_m->size - (start - curr_m->paddr)); | 164 | kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; |
186 | if (tsz > nr_bytes) | 165 | if (copy_to_user(buffer, kaddr, tsz)) |
187 | tsz = nr_bytes; | 166 | return -EFAULT; |
188 | |||
189 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
190 | if (tmp < 0) | ||
191 | return tmp; | ||
192 | buflen -= tsz; | 167 | buflen -= tsz; |
193 | *fpos += tsz; | 168 | *fpos += tsz; |
194 | buffer += tsz; | 169 | buffer += tsz; |
195 | acc += tsz; | 170 | acc += tsz; |
196 | if (start >= (curr_m->paddr + curr_m->size)) { | 171 | |
197 | if (curr_m->list.next == &vmcore_list) | 172 | /* leave now if filled buffer already */ |
198 | return acc; /*EOF*/ | 173 | if (buflen == 0) |
199 | curr_m = list_entry(curr_m->list.next, | 174 | return acc; |
200 | struct vmcore, list); | 175 | } |
201 | start = curr_m->paddr; | 176 | |
177 | list_for_each_entry(m, &vmcore_list, list) { | ||
178 | if (*fpos < m->offset + m->size) { | ||
179 | tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); | ||
180 | start = m->paddr + *fpos - m->offset; | ||
181 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
182 | if (tmp < 0) | ||
183 | return tmp; | ||
184 | buflen -= tsz; | ||
185 | *fpos += tsz; | ||
186 | buffer += tsz; | ||
187 | acc += tsz; | ||
188 | |||
189 | /* leave now if filled buffer already */ | ||
190 | if (buflen == 0) | ||
191 | return acc; | ||
202 | } | 192 | } |
203 | } | 193 | } |
194 | |||
204 | return acc; | 195 | return acc; |
205 | } | 196 | } |
206 | 197 | ||
198 | /** | ||
199 | * alloc_elfnotes_buf - allocate buffer for ELF note segment in | ||
200 | * vmalloc memory | ||
201 | * | ||
202 | * @notes_sz: size of buffer | ||
203 | * | ||
204 | * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap | ||
205 | * the buffer to user-space by means of remap_vmalloc_range(). | ||
206 | * | ||
207 | * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is | ||
208 | * disabled and there's no need to allow users to mmap the buffer. | ||
209 | */ | ||
210 | static inline char *alloc_elfnotes_buf(size_t notes_sz) | ||
211 | { | ||
212 | #ifdef CONFIG_MMU | ||
213 | return vmalloc_user(notes_sz); | ||
214 | #else | ||
215 | return vzalloc(notes_sz); | ||
216 | #endif | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is | ||
221 | * essential for mmap_vmcore() in order to map physically | ||
222 | * non-contiguous objects (ELF header, ELF note segment and memory | ||
223 | * regions in the 1st kernel pointed to by PT_LOAD entries) into | ||
224 | * virtually contiguous user-space in ELF layout. | ||
225 | */ | ||
226 | #ifdef CONFIG_MMU | ||
227 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
228 | { | ||
229 | size_t size = vma->vm_end - vma->vm_start; | ||
230 | u64 start, end, len, tsz; | ||
231 | struct vmcore *m; | ||
232 | |||
233 | start = (u64)vma->vm_pgoff << PAGE_SHIFT; | ||
234 | end = start + size; | ||
235 | |||
236 | if (size > vmcore_size || end > vmcore_size) | ||
237 | return -EINVAL; | ||
238 | |||
239 | if (vma->vm_flags & (VM_WRITE | VM_EXEC)) | ||
240 | return -EPERM; | ||
241 | |||
242 | vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); | ||
243 | vma->vm_flags |= VM_MIXEDMAP; | ||
244 | |||
245 | len = 0; | ||
246 | |||
247 | if (start < elfcorebuf_sz) { | ||
248 | u64 pfn; | ||
249 | |||
250 | tsz = min(elfcorebuf_sz - (size_t)start, size); | ||
251 | pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT; | ||
252 | if (remap_pfn_range(vma, vma->vm_start, pfn, tsz, | ||
253 | vma->vm_page_prot)) | ||
254 | return -EAGAIN; | ||
255 | size -= tsz; | ||
256 | start += tsz; | ||
257 | len += tsz; | ||
258 | |||
259 | if (size == 0) | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | if (start < elfcorebuf_sz + elfnotes_sz) { | ||
264 | void *kaddr; | ||
265 | |||
266 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); | ||
267 | kaddr = elfnotes_buf + start - elfcorebuf_sz; | ||
268 | if (remap_vmalloc_range_partial(vma, vma->vm_start + len, | ||
269 | kaddr, tsz)) | ||
270 | goto fail; | ||
271 | size -= tsz; | ||
272 | start += tsz; | ||
273 | len += tsz; | ||
274 | |||
275 | if (size == 0) | ||
276 | return 0; | ||
277 | } | ||
278 | |||
279 | list_for_each_entry(m, &vmcore_list, list) { | ||
280 | if (start < m->offset + m->size) { | ||
281 | u64 paddr = 0; | ||
282 | |||
283 | tsz = min_t(size_t, m->offset + m->size - start, size); | ||
284 | paddr = m->paddr + start - m->offset; | ||
285 | if (remap_pfn_range(vma, vma->vm_start + len, | ||
286 | paddr >> PAGE_SHIFT, tsz, | ||
287 | vma->vm_page_prot)) | ||
288 | goto fail; | ||
289 | size -= tsz; | ||
290 | start += tsz; | ||
291 | len += tsz; | ||
292 | |||
293 | if (size == 0) | ||
294 | return 0; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | return 0; | ||
299 | fail: | ||
300 | do_munmap(vma->vm_mm, vma->vm_start, len); | ||
301 | return -EAGAIN; | ||
302 | } | ||
303 | #else | ||
304 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
305 | { | ||
306 | return -ENOSYS; | ||
307 | } | ||
308 | #endif | ||
309 | |||
207 | static const struct file_operations proc_vmcore_operations = { | 310 | static const struct file_operations proc_vmcore_operations = { |
208 | .read = read_vmcore, | 311 | .read = read_vmcore, |
209 | .llseek = default_llseek, | 312 | .llseek = default_llseek, |
313 | .mmap = mmap_vmcore, | ||
210 | }; | 314 | }; |
211 | 315 | ||
212 | static struct vmcore* __init get_new_element(void) | 316 | static struct vmcore* __init get_new_element(void) |
@@ -214,61 +318,40 @@ static struct vmcore* __init get_new_element(void) | |||
214 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); | 318 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); |
215 | } | 319 | } |
216 | 320 | ||
217 | static u64 __init get_vmcore_size_elf64(char *elfptr) | 321 | static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz, |
322 | struct list_head *vc_list) | ||
218 | { | 323 | { |
219 | int i; | ||
220 | u64 size; | ||
221 | Elf64_Ehdr *ehdr_ptr; | ||
222 | Elf64_Phdr *phdr_ptr; | ||
223 | |||
224 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
225 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
226 | size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr)); | ||
227 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
228 | size += phdr_ptr->p_memsz; | ||
229 | phdr_ptr++; | ||
230 | } | ||
231 | return size; | ||
232 | } | ||
233 | |||
234 | static u64 __init get_vmcore_size_elf32(char *elfptr) | ||
235 | { | ||
236 | int i; | ||
237 | u64 size; | 324 | u64 size; |
238 | Elf32_Ehdr *ehdr_ptr; | 325 | struct vmcore *m; |
239 | Elf32_Phdr *phdr_ptr; | ||
240 | 326 | ||
241 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 327 | size = elfsz + elfnotesegsz; |
242 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | 328 | list_for_each_entry(m, vc_list, list) { |
243 | size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr)); | 329 | size += m->size; |
244 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
245 | size += phdr_ptr->p_memsz; | ||
246 | phdr_ptr++; | ||
247 | } | 330 | } |
248 | return size; | 331 | return size; |
249 | } | 332 | } |
250 | 333 | ||
251 | /* Merges all the PT_NOTE headers into one. */ | 334 | /** |
252 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | 335 | * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry |
253 | struct list_head *vc_list) | 336 | * |
337 | * @ehdr_ptr: ELF header | ||
338 | * | ||
339 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
340 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
341 | * note segment. | ||
342 | */ | ||
343 | static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) | ||
254 | { | 344 | { |
255 | int i, nr_ptnote=0, rc=0; | 345 | int i, rc=0; |
256 | char *tmp; | 346 | Elf64_Phdr *phdr_ptr; |
257 | Elf64_Ehdr *ehdr_ptr; | ||
258 | Elf64_Phdr phdr, *phdr_ptr; | ||
259 | Elf64_Nhdr *nhdr_ptr; | 347 | Elf64_Nhdr *nhdr_ptr; |
260 | u64 phdr_sz = 0, note_off; | ||
261 | 348 | ||
262 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 349 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); |
263 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
264 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 350 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
265 | int j; | ||
266 | void *notes_section; | 351 | void *notes_section; |
267 | struct vmcore *new; | ||
268 | u64 offset, max_sz, sz, real_sz = 0; | 352 | u64 offset, max_sz, sz, real_sz = 0; |
269 | if (phdr_ptr->p_type != PT_NOTE) | 353 | if (phdr_ptr->p_type != PT_NOTE) |
270 | continue; | 354 | continue; |
271 | nr_ptnote++; | ||
272 | max_sz = phdr_ptr->p_memsz; | 355 | max_sz = phdr_ptr->p_memsz; |
273 | offset = phdr_ptr->p_offset; | 356 | offset = phdr_ptr->p_offset; |
274 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 357 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
@@ -280,7 +363,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
280 | return rc; | 363 | return rc; |
281 | } | 364 | } |
282 | nhdr_ptr = notes_section; | 365 | nhdr_ptr = notes_section; |
283 | for (j = 0; j < max_sz; j += sz) { | 366 | while (real_sz < max_sz) { |
284 | if (nhdr_ptr->n_namesz == 0) | 367 | if (nhdr_ptr->n_namesz == 0) |
285 | break; | 368 | break; |
286 | sz = sizeof(Elf64_Nhdr) + | 369 | sz = sizeof(Elf64_Nhdr) + |
@@ -289,26 +372,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
289 | real_sz += sz; | 372 | real_sz += sz; |
290 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); | 373 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); |
291 | } | 374 | } |
292 | |||
293 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
294 | new = get_new_element(); | ||
295 | if (!new) { | ||
296 | kfree(notes_section); | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | new->paddr = phdr_ptr->p_offset; | ||
300 | new->size = real_sz; | ||
301 | list_add_tail(&new->list, vc_list); | ||
302 | phdr_sz += real_sz; | ||
303 | kfree(notes_section); | 375 | kfree(notes_section); |
376 | phdr_ptr->p_memsz = real_sz; | ||
377 | } | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /** | ||
383 | * get_note_number_and_size_elf64 - get the number of PT_NOTE program | ||
384 | * headers and sum of real size of their ELF note segment headers and | ||
385 | * data. | ||
386 | * | ||
387 | * @ehdr_ptr: ELF header | ||
388 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
389 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
390 | * | ||
391 | * This function is used to merge multiple PT_NOTE program headers | ||
392 | * into a unique single one. The resulting unique entry will have | ||
393 | * @sz_ptnote in its phdr->p_mem. | ||
394 | * | ||
395 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
396 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
397 | * and each of PT_NOTE program headers has actual ELF note segment | ||
398 | * size in its p_memsz member. | ||
399 | */ | ||
400 | static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr, | ||
401 | int *nr_ptnote, u64 *sz_ptnote) | ||
402 | { | ||
403 | int i; | ||
404 | Elf64_Phdr *phdr_ptr; | ||
405 | |||
406 | *nr_ptnote = *sz_ptnote = 0; | ||
407 | |||
408 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); | ||
409 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
410 | if (phdr_ptr->p_type != PT_NOTE) | ||
411 | continue; | ||
412 | *nr_ptnote += 1; | ||
413 | *sz_ptnote += phdr_ptr->p_memsz; | ||
414 | } | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | /** | ||
420 | * copy_notes_elf64 - copy ELF note segments in a given buffer | ||
421 | * | ||
422 | * @ehdr_ptr: ELF header | ||
423 | * @notes_buf: buffer into which ELF note segments are copied | ||
424 | * | ||
425 | * This function is used to copy ELF note segment in the 1st kernel | ||
426 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
427 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
428 | * real ELF note segment headers and data. | ||
429 | * | ||
430 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
431 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
432 | * and each of PT_NOTE program headers has actual ELF note segment | ||
433 | * size in its p_memsz member. | ||
434 | */ | ||
435 | static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) | ||
436 | { | ||
437 | int i, rc=0; | ||
438 | Elf64_Phdr *phdr_ptr; | ||
439 | |||
440 | phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1); | ||
441 | |||
442 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
443 | u64 offset; | ||
444 | if (phdr_ptr->p_type != PT_NOTE) | ||
445 | continue; | ||
446 | offset = phdr_ptr->p_offset; | ||
447 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
448 | if (rc < 0) | ||
449 | return rc; | ||
450 | notes_buf += phdr_ptr->p_memsz; | ||
304 | } | 451 | } |
305 | 452 | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | /* Merges all the PT_NOTE headers into one. */ | ||
457 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | ||
458 | char **notes_buf, size_t *notes_sz) | ||
459 | { | ||
460 | int i, nr_ptnote=0, rc=0; | ||
461 | char *tmp; | ||
462 | Elf64_Ehdr *ehdr_ptr; | ||
463 | Elf64_Phdr phdr; | ||
464 | u64 phdr_sz = 0, note_off; | ||
465 | |||
466 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
467 | |||
468 | rc = update_note_header_size_elf64(ehdr_ptr); | ||
469 | if (rc < 0) | ||
470 | return rc; | ||
471 | |||
472 | rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
473 | if (rc < 0) | ||
474 | return rc; | ||
475 | |||
476 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
477 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
478 | if (!*notes_buf) | ||
479 | return -ENOMEM; | ||
480 | |||
481 | rc = copy_notes_elf64(ehdr_ptr, *notes_buf); | ||
482 | if (rc < 0) | ||
483 | return rc; | ||
484 | |||
306 | /* Prepare merged PT_NOTE program header. */ | 485 | /* Prepare merged PT_NOTE program header. */ |
307 | phdr.p_type = PT_NOTE; | 486 | phdr.p_type = PT_NOTE; |
308 | phdr.p_flags = 0; | 487 | phdr.p_flags = 0; |
309 | note_off = sizeof(Elf64_Ehdr) + | 488 | note_off = sizeof(Elf64_Ehdr) + |
310 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); | 489 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); |
311 | phdr.p_offset = note_off; | 490 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
312 | phdr.p_vaddr = phdr.p_paddr = 0; | 491 | phdr.p_vaddr = phdr.p_paddr = 0; |
313 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 492 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
314 | phdr.p_align = 0; | 493 | phdr.p_align = 0; |
@@ -322,6 +501,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
322 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); | 501 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); |
323 | *elfsz = *elfsz - i; | 502 | *elfsz = *elfsz - i; |
324 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); | 503 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); |
504 | memset(elfptr + *elfsz, 0, i); | ||
505 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
325 | 506 | ||
326 | /* Modify e_phnum to reflect merged headers. */ | 507 | /* Modify e_phnum to reflect merged headers. */ |
327 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 508 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
@@ -329,27 +510,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
329 | return 0; | 510 | return 0; |
330 | } | 511 | } |
331 | 512 | ||
332 | /* Merges all the PT_NOTE headers into one. */ | 513 | /** |
333 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | 514 | * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry |
334 | struct list_head *vc_list) | 515 | * |
516 | * @ehdr_ptr: ELF header | ||
517 | * | ||
518 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
519 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
520 | * note segment. | ||
521 | */ | ||
522 | static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) | ||
335 | { | 523 | { |
336 | int i, nr_ptnote=0, rc=0; | 524 | int i, rc=0; |
337 | char *tmp; | 525 | Elf32_Phdr *phdr_ptr; |
338 | Elf32_Ehdr *ehdr_ptr; | ||
339 | Elf32_Phdr phdr, *phdr_ptr; | ||
340 | Elf32_Nhdr *nhdr_ptr; | 526 | Elf32_Nhdr *nhdr_ptr; |
341 | u64 phdr_sz = 0, note_off; | ||
342 | 527 | ||
343 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 528 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); |
344 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | ||
345 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 529 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
346 | int j; | ||
347 | void *notes_section; | 530 | void *notes_section; |
348 | struct vmcore *new; | ||
349 | u64 offset, max_sz, sz, real_sz = 0; | 531 | u64 offset, max_sz, sz, real_sz = 0; |
350 | if (phdr_ptr->p_type != PT_NOTE) | 532 | if (phdr_ptr->p_type != PT_NOTE) |
351 | continue; | 533 | continue; |
352 | nr_ptnote++; | ||
353 | max_sz = phdr_ptr->p_memsz; | 534 | max_sz = phdr_ptr->p_memsz; |
354 | offset = phdr_ptr->p_offset; | 535 | offset = phdr_ptr->p_offset; |
355 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 536 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
@@ -361,7 +542,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
361 | return rc; | 542 | return rc; |
362 | } | 543 | } |
363 | nhdr_ptr = notes_section; | 544 | nhdr_ptr = notes_section; |
364 | for (j = 0; j < max_sz; j += sz) { | 545 | while (real_sz < max_sz) { |
365 | if (nhdr_ptr->n_namesz == 0) | 546 | if (nhdr_ptr->n_namesz == 0) |
366 | break; | 547 | break; |
367 | sz = sizeof(Elf32_Nhdr) + | 548 | sz = sizeof(Elf32_Nhdr) + |
@@ -370,26 +551,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
370 | real_sz += sz; | 551 | real_sz += sz; |
371 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); | 552 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); |
372 | } | 553 | } |
373 | |||
374 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
375 | new = get_new_element(); | ||
376 | if (!new) { | ||
377 | kfree(notes_section); | ||
378 | return -ENOMEM; | ||
379 | } | ||
380 | new->paddr = phdr_ptr->p_offset; | ||
381 | new->size = real_sz; | ||
382 | list_add_tail(&new->list, vc_list); | ||
383 | phdr_sz += real_sz; | ||
384 | kfree(notes_section); | 554 | kfree(notes_section); |
555 | phdr_ptr->p_memsz = real_sz; | ||
556 | } | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * get_note_number_and_size_elf32 - get the number of PT_NOTE program | ||
563 | * headers and sum of real size of their ELF note segment headers and | ||
564 | * data. | ||
565 | * | ||
566 | * @ehdr_ptr: ELF header | ||
567 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
568 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
569 | * | ||
570 | * This function is used to merge multiple PT_NOTE program headers | ||
571 | * into a unique single one. The resulting unique entry will have | ||
572 | * @sz_ptnote in its phdr->p_mem. | ||
573 | * | ||
574 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
575 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
576 | * and each of PT_NOTE program headers has actual ELF note segment | ||
577 | * size in its p_memsz member. | ||
578 | */ | ||
579 | static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr, | ||
580 | int *nr_ptnote, u64 *sz_ptnote) | ||
581 | { | ||
582 | int i; | ||
583 | Elf32_Phdr *phdr_ptr; | ||
584 | |||
585 | *nr_ptnote = *sz_ptnote = 0; | ||
586 | |||
587 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); | ||
588 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
589 | if (phdr_ptr->p_type != PT_NOTE) | ||
590 | continue; | ||
591 | *nr_ptnote += 1; | ||
592 | *sz_ptnote += phdr_ptr->p_memsz; | ||
593 | } | ||
594 | |||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /** | ||
599 | * copy_notes_elf32 - copy ELF note segments in a given buffer | ||
600 | * | ||
601 | * @ehdr_ptr: ELF header | ||
602 | * @notes_buf: buffer into which ELF note segments are copied | ||
603 | * | ||
604 | * This function is used to copy ELF note segment in the 1st kernel | ||
605 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
606 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
607 | * real ELF note segment headers and data. | ||
608 | * | ||
609 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
610 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
611 | * and each of PT_NOTE program headers has actual ELF note segment | ||
612 | * size in its p_memsz member. | ||
613 | */ | ||
614 | static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) | ||
615 | { | ||
616 | int i, rc=0; | ||
617 | Elf32_Phdr *phdr_ptr; | ||
618 | |||
619 | phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1); | ||
620 | |||
621 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
622 | u64 offset; | ||
623 | if (phdr_ptr->p_type != PT_NOTE) | ||
624 | continue; | ||
625 | offset = phdr_ptr->p_offset; | ||
626 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
627 | if (rc < 0) | ||
628 | return rc; | ||
629 | notes_buf += phdr_ptr->p_memsz; | ||
385 | } | 630 | } |
386 | 631 | ||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | /* Merges all the PT_NOTE headers into one. */ | ||
636 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | ||
637 | char **notes_buf, size_t *notes_sz) | ||
638 | { | ||
639 | int i, nr_ptnote=0, rc=0; | ||
640 | char *tmp; | ||
641 | Elf32_Ehdr *ehdr_ptr; | ||
642 | Elf32_Phdr phdr; | ||
643 | u64 phdr_sz = 0, note_off; | ||
644 | |||
645 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
646 | |||
647 | rc = update_note_header_size_elf32(ehdr_ptr); | ||
648 | if (rc < 0) | ||
649 | return rc; | ||
650 | |||
651 | rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
652 | if (rc < 0) | ||
653 | return rc; | ||
654 | |||
655 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
656 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
657 | if (!*notes_buf) | ||
658 | return -ENOMEM; | ||
659 | |||
660 | rc = copy_notes_elf32(ehdr_ptr, *notes_buf); | ||
661 | if (rc < 0) | ||
662 | return rc; | ||
663 | |||
387 | /* Prepare merged PT_NOTE program header. */ | 664 | /* Prepare merged PT_NOTE program header. */ |
388 | phdr.p_type = PT_NOTE; | 665 | phdr.p_type = PT_NOTE; |
389 | phdr.p_flags = 0; | 666 | phdr.p_flags = 0; |
390 | note_off = sizeof(Elf32_Ehdr) + | 667 | note_off = sizeof(Elf32_Ehdr) + |
391 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); | 668 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); |
392 | phdr.p_offset = note_off; | 669 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
393 | phdr.p_vaddr = phdr.p_paddr = 0; | 670 | phdr.p_vaddr = phdr.p_paddr = 0; |
394 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 671 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
395 | phdr.p_align = 0; | 672 | phdr.p_align = 0; |
@@ -403,6 +680,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
403 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); | 680 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); |
404 | *elfsz = *elfsz - i; | 681 | *elfsz = *elfsz - i; |
405 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); | 682 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); |
683 | memset(elfptr + *elfsz, 0, i); | ||
684 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
406 | 685 | ||
407 | /* Modify e_phnum to reflect merged headers. */ | 686 | /* Modify e_phnum to reflect merged headers. */ |
408 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 687 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
@@ -414,6 +693,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
414 | * the new offset fields of exported program headers. */ | 693 | * the new offset fields of exported program headers. */ |
415 | static int __init process_ptload_program_headers_elf64(char *elfptr, | 694 | static int __init process_ptload_program_headers_elf64(char *elfptr, |
416 | size_t elfsz, | 695 | size_t elfsz, |
696 | size_t elfnotes_sz, | ||
417 | struct list_head *vc_list) | 697 | struct list_head *vc_list) |
418 | { | 698 | { |
419 | int i; | 699 | int i; |
@@ -425,32 +705,38 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, | |||
425 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 705 | ehdr_ptr = (Elf64_Ehdr *)elfptr; |
426 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ | 706 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ |
427 | 707 | ||
428 | /* First program header is PT_NOTE header. */ | 708 | /* Skip Elf header, program headers and Elf note segment. */ |
429 | vmcore_off = sizeof(Elf64_Ehdr) + | 709 | vmcore_off = elfsz + elfnotes_sz; |
430 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) + | ||
431 | phdr_ptr->p_memsz; /* Note sections */ | ||
432 | 710 | ||
433 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 711 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
712 | u64 paddr, start, end, size; | ||
713 | |||
434 | if (phdr_ptr->p_type != PT_LOAD) | 714 | if (phdr_ptr->p_type != PT_LOAD) |
435 | continue; | 715 | continue; |
436 | 716 | ||
717 | paddr = phdr_ptr->p_offset; | ||
718 | start = rounddown(paddr, PAGE_SIZE); | ||
719 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
720 | size = end - start; | ||
721 | |||
437 | /* Add this contiguous chunk of memory to vmcore list.*/ | 722 | /* Add this contiguous chunk of memory to vmcore list.*/ |
438 | new = get_new_element(); | 723 | new = get_new_element(); |
439 | if (!new) | 724 | if (!new) |
440 | return -ENOMEM; | 725 | return -ENOMEM; |
441 | new->paddr = phdr_ptr->p_offset; | 726 | new->paddr = start; |
442 | new->size = phdr_ptr->p_memsz; | 727 | new->size = size; |
443 | list_add_tail(&new->list, vc_list); | 728 | list_add_tail(&new->list, vc_list); |
444 | 729 | ||
445 | /* Update the program header offset. */ | 730 | /* Update the program header offset. */ |
446 | phdr_ptr->p_offset = vmcore_off; | 731 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
447 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 732 | vmcore_off = vmcore_off + size; |
448 | } | 733 | } |
449 | return 0; | 734 | return 0; |
450 | } | 735 | } |
451 | 736 | ||
452 | static int __init process_ptload_program_headers_elf32(char *elfptr, | 737 | static int __init process_ptload_program_headers_elf32(char *elfptr, |
453 | size_t elfsz, | 738 | size_t elfsz, |
739 | size_t elfnotes_sz, | ||
454 | struct list_head *vc_list) | 740 | struct list_head *vc_list) |
455 | { | 741 | { |
456 | int i; | 742 | int i; |
@@ -462,43 +748,44 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, | |||
462 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 748 | ehdr_ptr = (Elf32_Ehdr *)elfptr; |
463 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ | 749 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ |
464 | 750 | ||
465 | /* First program header is PT_NOTE header. */ | 751 | /* Skip Elf header, program headers and Elf note segment. */ |
466 | vmcore_off = sizeof(Elf32_Ehdr) + | 752 | vmcore_off = elfsz + elfnotes_sz; |
467 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) + | ||
468 | phdr_ptr->p_memsz; /* Note sections */ | ||
469 | 753 | ||
470 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 754 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
755 | u64 paddr, start, end, size; | ||
756 | |||
471 | if (phdr_ptr->p_type != PT_LOAD) | 757 | if (phdr_ptr->p_type != PT_LOAD) |
472 | continue; | 758 | continue; |
473 | 759 | ||
760 | paddr = phdr_ptr->p_offset; | ||
761 | start = rounddown(paddr, PAGE_SIZE); | ||
762 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
763 | size = end - start; | ||
764 | |||
474 | /* Add this contiguous chunk of memory to vmcore list.*/ | 765 | /* Add this contiguous chunk of memory to vmcore list.*/ |
475 | new = get_new_element(); | 766 | new = get_new_element(); |
476 | if (!new) | 767 | if (!new) |
477 | return -ENOMEM; | 768 | return -ENOMEM; |
478 | new->paddr = phdr_ptr->p_offset; | 769 | new->paddr = start; |
479 | new->size = phdr_ptr->p_memsz; | 770 | new->size = size; |
480 | list_add_tail(&new->list, vc_list); | 771 | list_add_tail(&new->list, vc_list); |
481 | 772 | ||
482 | /* Update the program header offset */ | 773 | /* Update the program header offset */ |
483 | phdr_ptr->p_offset = vmcore_off; | 774 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
484 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 775 | vmcore_off = vmcore_off + size; |
485 | } | 776 | } |
486 | return 0; | 777 | return 0; |
487 | } | 778 | } |
488 | 779 | ||
489 | /* Sets offset fields of vmcore elements. */ | 780 | /* Sets offset fields of vmcore elements. */ |
490 | static void __init set_vmcore_list_offsets_elf64(char *elfptr, | 781 | static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, |
491 | struct list_head *vc_list) | 782 | struct list_head *vc_list) |
492 | { | 783 | { |
493 | loff_t vmcore_off; | 784 | loff_t vmcore_off; |
494 | Elf64_Ehdr *ehdr_ptr; | ||
495 | struct vmcore *m; | 785 | struct vmcore *m; |
496 | 786 | ||
497 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 787 | /* Skip Elf header, program headers and Elf note segment. */ |
498 | 788 | vmcore_off = elfsz + elfnotes_sz; | |
499 | /* Skip Elf header and program headers. */ | ||
500 | vmcore_off = sizeof(Elf64_Ehdr) + | ||
501 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr); | ||
502 | 789 | ||
503 | list_for_each_entry(m, vc_list, list) { | 790 | list_for_each_entry(m, vc_list, list) { |
504 | m->offset = vmcore_off; | 791 | m->offset = vmcore_off; |
@@ -506,24 +793,12 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr, | |||
506 | } | 793 | } |
507 | } | 794 | } |
508 | 795 | ||
509 | /* Sets offset fields of vmcore elements. */ | 796 | static void free_elfcorebuf(void) |
510 | static void __init set_vmcore_list_offsets_elf32(char *elfptr, | ||
511 | struct list_head *vc_list) | ||
512 | { | 797 | { |
513 | loff_t vmcore_off; | 798 | free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); |
514 | Elf32_Ehdr *ehdr_ptr; | 799 | elfcorebuf = NULL; |
515 | struct vmcore *m; | 800 | vfree(elfnotes_buf); |
516 | 801 | elfnotes_buf = NULL; | |
517 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
518 | |||
519 | /* Skip Elf header and program headers. */ | ||
520 | vmcore_off = sizeof(Elf32_Ehdr) + | ||
521 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr); | ||
522 | |||
523 | list_for_each_entry(m, vc_list, list) { | ||
524 | m->offset = vmcore_off; | ||
525 | vmcore_off += m->size; | ||
526 | } | ||
527 | } | 802 | } |
528 | 803 | ||
529 | static int __init parse_crash_elf64_headers(void) | 804 | static int __init parse_crash_elf64_headers(void) |
@@ -554,31 +829,32 @@ static int __init parse_crash_elf64_headers(void) | |||
554 | } | 829 | } |
555 | 830 | ||
556 | /* Read in all elf headers. */ | 831 | /* Read in all elf headers. */ |
557 | elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr); | 832 | elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) + |
558 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 833 | ehdr.e_phnum * sizeof(Elf64_Phdr); |
834 | elfcorebuf_sz = elfcorebuf_sz_orig; | ||
835 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
836 | get_order(elfcorebuf_sz_orig)); | ||
559 | if (!elfcorebuf) | 837 | if (!elfcorebuf) |
560 | return -ENOMEM; | 838 | return -ENOMEM; |
561 | addr = elfcorehdr_addr; | 839 | addr = elfcorehdr_addr; |
562 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 840 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
563 | if (rc < 0) { | 841 | if (rc < 0) |
564 | kfree(elfcorebuf); | 842 | goto fail; |
565 | return rc; | ||
566 | } | ||
567 | 843 | ||
568 | /* Merge all PT_NOTE headers into one. */ | 844 | /* Merge all PT_NOTE headers into one. */ |
569 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 845 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, |
570 | if (rc) { | 846 | &elfnotes_buf, &elfnotes_sz); |
571 | kfree(elfcorebuf); | 847 | if (rc) |
572 | return rc; | 848 | goto fail; |
573 | } | ||
574 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, | 849 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, |
575 | &vmcore_list); | 850 | elfnotes_sz, &vmcore_list); |
576 | if (rc) { | 851 | if (rc) |
577 | kfree(elfcorebuf); | 852 | goto fail; |
578 | return rc; | 853 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
579 | } | ||
580 | set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list); | ||
581 | return 0; | 854 | return 0; |
855 | fail: | ||
856 | free_elfcorebuf(); | ||
857 | return rc; | ||
582 | } | 858 | } |
583 | 859 | ||
584 | static int __init parse_crash_elf32_headers(void) | 860 | static int __init parse_crash_elf32_headers(void) |
@@ -609,31 +885,31 @@ static int __init parse_crash_elf32_headers(void) | |||
609 | } | 885 | } |
610 | 886 | ||
611 | /* Read in all elf headers. */ | 887 | /* Read in all elf headers. */ |
612 | elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); | 888 | elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); |
613 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 889 | elfcorebuf_sz = elfcorebuf_sz_orig; |
890 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
891 | get_order(elfcorebuf_sz_orig)); | ||
614 | if (!elfcorebuf) | 892 | if (!elfcorebuf) |
615 | return -ENOMEM; | 893 | return -ENOMEM; |
616 | addr = elfcorehdr_addr; | 894 | addr = elfcorehdr_addr; |
617 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 895 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
618 | if (rc < 0) { | 896 | if (rc < 0) |
619 | kfree(elfcorebuf); | 897 | goto fail; |
620 | return rc; | ||
621 | } | ||
622 | 898 | ||
623 | /* Merge all PT_NOTE headers into one. */ | 899 | /* Merge all PT_NOTE headers into one. */ |
624 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 900 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, |
625 | if (rc) { | 901 | &elfnotes_buf, &elfnotes_sz); |
626 | kfree(elfcorebuf); | 902 | if (rc) |
627 | return rc; | 903 | goto fail; |
628 | } | ||
629 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, | 904 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, |
630 | &vmcore_list); | 905 | elfnotes_sz, &vmcore_list); |
631 | if (rc) { | 906 | if (rc) |
632 | kfree(elfcorebuf); | 907 | goto fail; |
633 | return rc; | 908 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
634 | } | ||
635 | set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list); | ||
636 | return 0; | 909 | return 0; |
910 | fail: | ||
911 | free_elfcorebuf(); | ||
912 | return rc; | ||
637 | } | 913 | } |
638 | 914 | ||
639 | static int __init parse_crash_elf_headers(void) | 915 | static int __init parse_crash_elf_headers(void) |
@@ -655,20 +931,19 @@ static int __init parse_crash_elf_headers(void) | |||
655 | rc = parse_crash_elf64_headers(); | 931 | rc = parse_crash_elf64_headers(); |
656 | if (rc) | 932 | if (rc) |
657 | return rc; | 933 | return rc; |
658 | |||
659 | /* Determine vmcore size. */ | ||
660 | vmcore_size = get_vmcore_size_elf64(elfcorebuf); | ||
661 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { | 934 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { |
662 | rc = parse_crash_elf32_headers(); | 935 | rc = parse_crash_elf32_headers(); |
663 | if (rc) | 936 | if (rc) |
664 | return rc; | 937 | return rc; |
665 | |||
666 | /* Determine vmcore size. */ | ||
667 | vmcore_size = get_vmcore_size_elf32(elfcorebuf); | ||
668 | } else { | 938 | } else { |
669 | pr_warn("Warning: Core image elf header is not sane\n"); | 939 | pr_warn("Warning: Core image elf header is not sane\n"); |
670 | return -EINVAL; | 940 | return -EINVAL; |
671 | } | 941 | } |
942 | |||
943 | /* Determine vmcore size. */ | ||
944 | vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, | ||
945 | &vmcore_list); | ||
946 | |||
672 | return 0; | 947 | return 0; |
673 | } | 948 | } |
674 | 949 | ||
@@ -711,7 +986,6 @@ void vmcore_cleanup(void) | |||
711 | list_del(&m->list); | 986 | list_del(&m->list); |
712 | kfree(m); | 987 | kfree(m); |
713 | } | 988 | } |
714 | kfree(elfcorebuf); | 989 | free_elfcorebuf(); |
715 | elfcorebuf = NULL; | ||
716 | } | 990 | } |
717 | EXPORT_SYMBOL_GPL(vmcore_cleanup); | 991 | EXPORT_SYMBOL_GPL(vmcore_cleanup); |