aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/kexec.c
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2014-08-08 17:25:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:32 -0400
commitcb1052581e2bddd6096544f3f944f4e7fdad4c7f (patch)
treec802781e0c67685bfe062d9a04d09cdb4ff82aea /kernel/kexec.c
parentf0895685c7fd8c938c91a9d8a6f7c11f22df58d2 (diff)
kexec: implementation of new syscall kexec_file_load
Previous patch provided the interface definition and this patch prvides implementation of new syscall. Previously segment list was prepared in user space. Now user space just passes kernel fd, initrd fd and command line and kernel will create a segment list internally. This patch contains generic part of the code. Actual segment preparation and loading is done by arch and image specific loader. Which comes in next patch. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Cc: Borislav Petkov <bp@suse.de> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Eric Biederman <ebiederm@xmission.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Matthew Garrett <mjg59@srcf.ucam.org> Cc: Greg Kroah-Hartman <greg@kroah.com> Cc: Dave Young <dyoung@redhat.com> Cc: WANG Chao <chaowang@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/kexec.c')
-rw-r--r--kernel/kexec.c483
1 files changed, 478 insertions, 5 deletions
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec4386c1b94f..9b46219254dd 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
6 * Version 2. See the file COPYING for more details. 6 * Version 2. See the file COPYING for more details.
7 */ 7 */
8 8
9#define pr_fmt(fmt) "kexec: " fmt
10
9#include <linux/capability.h> 11#include <linux/capability.h>
10#include <linux/mm.h> 12#include <linux/mm.h>
11#include <linux/file.h> 13#include <linux/file.h>
@@ -327,6 +329,221 @@ out_free_image:
327 return ret; 329 return ret;
328} 330}
329 331
332static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
333{
334 struct fd f = fdget(fd);
335 int ret;
336 struct kstat stat;
337 loff_t pos;
338 ssize_t bytes = 0;
339
340 if (!f.file)
341 return -EBADF;
342
343 ret = vfs_getattr(&f.file->f_path, &stat);
344 if (ret)
345 goto out;
346
347 if (stat.size > INT_MAX) {
348 ret = -EFBIG;
349 goto out;
350 }
351
352 /* Don't hand 0 to vmalloc, it whines. */
353 if (stat.size == 0) {
354 ret = -EINVAL;
355 goto out;
356 }
357
358 *buf = vmalloc(stat.size);
359 if (!*buf) {
360 ret = -ENOMEM;
361 goto out;
362 }
363
364 pos = 0;
365 while (pos < stat.size) {
366 bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
367 stat.size - pos);
368 if (bytes < 0) {
369 vfree(*buf);
370 ret = bytes;
371 goto out;
372 }
373
374 if (bytes == 0)
375 break;
376 pos += bytes;
377 }
378
379 if (pos != stat.size) {
380 ret = -EBADF;
381 vfree(*buf);
382 goto out;
383 }
384
385 *buf_len = pos;
386out:
387 fdput(f);
388 return ret;
389}
390
391/* Architectures can provide this probe function */
392int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
393 unsigned long buf_len)
394{
395 return -ENOEXEC;
396}
397
398void * __weak arch_kexec_kernel_image_load(struct kimage *image)
399{
400 return ERR_PTR(-ENOEXEC);
401}
402
403void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
404{
405}
406
407/*
408 * Free up memory used by kernel, initrd, and comand line. This is temporary
409 * memory allocation which is not needed any more after these buffers have
410 * been loaded into separate segments and have been copied elsewhere.
411 */
412static void kimage_file_post_load_cleanup(struct kimage *image)
413{
414 vfree(image->kernel_buf);
415 image->kernel_buf = NULL;
416
417 vfree(image->initrd_buf);
418 image->initrd_buf = NULL;
419
420 kfree(image->cmdline_buf);
421 image->cmdline_buf = NULL;
422
423 /* See if architecture has anything to cleanup post load */
424 arch_kimage_file_post_load_cleanup(image);
425}
426
427/*
428 * In file mode list of segments is prepared by kernel. Copy relevant
429 * data from user space, do error checking, prepare segment list
430 */
431static int
432kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
433 const char __user *cmdline_ptr,
434 unsigned long cmdline_len, unsigned flags)
435{
436 int ret = 0;
437 void *ldata;
438
439 ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
440 &image->kernel_buf_len);
441 if (ret)
442 return ret;
443
444 /* Call arch image probe handlers */
445 ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
446 image->kernel_buf_len);
447
448 if (ret)
449 goto out;
450
451 /* It is possible that there no initramfs is being loaded */
452 if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
453 ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
454 &image->initrd_buf_len);
455 if (ret)
456 goto out;
457 }
458
459 if (cmdline_len) {
460 image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
461 if (!image->cmdline_buf) {
462 ret = -ENOMEM;
463 goto out;
464 }
465
466 ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
467 cmdline_len);
468 if (ret) {
469 ret = -EFAULT;
470 goto out;
471 }
472
473 image->cmdline_buf_len = cmdline_len;
474
475 /* command line should be a string with last byte null */
476 if (image->cmdline_buf[cmdline_len - 1] != '\0') {
477 ret = -EINVAL;
478 goto out;
479 }
480 }
481
482 /* Call arch image load handlers */
483 ldata = arch_kexec_kernel_image_load(image);
484
485 if (IS_ERR(ldata)) {
486 ret = PTR_ERR(ldata);
487 goto out;
488 }
489
490 image->image_loader_data = ldata;
491out:
492 /* In case of error, free up all allocated memory in this function */
493 if (ret)
494 kimage_file_post_load_cleanup(image);
495 return ret;
496}
497
498static int
499kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
500 int initrd_fd, const char __user *cmdline_ptr,
501 unsigned long cmdline_len, unsigned long flags)
502{
503 int ret;
504 struct kimage *image;
505
506 image = do_kimage_alloc_init();
507 if (!image)
508 return -ENOMEM;
509
510 image->file_mode = 1;
511
512 ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
513 cmdline_ptr, cmdline_len, flags);
514 if (ret)
515 goto out_free_image;
516
517 ret = sanity_check_segment_list(image);
518 if (ret)
519 goto out_free_post_load_bufs;
520
521 ret = -ENOMEM;
522 image->control_code_page = kimage_alloc_control_pages(image,
523 get_order(KEXEC_CONTROL_PAGE_SIZE));
524 if (!image->control_code_page) {
525 pr_err("Could not allocate control_code_buffer\n");
526 goto out_free_post_load_bufs;
527 }
528
529 image->swap_page = kimage_alloc_control_pages(image, 0);
530 if (!image->swap_page) {
531 pr_err(KERN_ERR "Could not allocate swap buffer\n");
532 goto out_free_control_pages;
533 }
534
535 *rimage = image;
536 return 0;
537out_free_control_pages:
538 kimage_free_page_list(&image->control_pages);
539out_free_post_load_bufs:
540 kimage_file_post_load_cleanup(image);
541 kfree(image->image_loader_data);
542out_free_image:
543 kfree(image);
544 return ret;
545}
546
330static int kimage_is_destination_range(struct kimage *image, 547static int kimage_is_destination_range(struct kimage *image,
331 unsigned long start, 548 unsigned long start,
332 unsigned long end) 549 unsigned long end)
@@ -644,6 +861,16 @@ static void kimage_free(struct kimage *image)
644 861
645 /* Free the kexec control pages... */ 862 /* Free the kexec control pages... */
646 kimage_free_page_list(&image->control_pages); 863 kimage_free_page_list(&image->control_pages);
864
865 kfree(image->image_loader_data);
866
867 /*
868 * Free up any temporary buffers allocated. This might hit if
869 * error occurred much later after buffer allocation.
870 */
871 if (image->file_mode)
872 kimage_file_post_load_cleanup(image);
873
647 kfree(image); 874 kfree(image);
648} 875}
649 876
@@ -772,10 +999,14 @@ static int kimage_load_normal_segment(struct kimage *image,
772 unsigned long maddr; 999 unsigned long maddr;
773 size_t ubytes, mbytes; 1000 size_t ubytes, mbytes;
774 int result; 1001 int result;
775 unsigned char __user *buf; 1002 unsigned char __user *buf = NULL;
1003 unsigned char *kbuf = NULL;
776 1004
777 result = 0; 1005 result = 0;
778 buf = segment->buf; 1006 if (image->file_mode)
1007 kbuf = segment->kbuf;
1008 else
1009 buf = segment->buf;
779 ubytes = segment->bufsz; 1010 ubytes = segment->bufsz;
780 mbytes = segment->memsz; 1011 mbytes = segment->memsz;
781 maddr = segment->mem; 1012 maddr = segment->mem;
@@ -807,7 +1038,11 @@ static int kimage_load_normal_segment(struct kimage *image,
807 PAGE_SIZE - (maddr & ~PAGE_MASK)); 1038 PAGE_SIZE - (maddr & ~PAGE_MASK));
808 uchunk = min(ubytes, mchunk); 1039 uchunk = min(ubytes, mchunk);
809 1040
810 result = copy_from_user(ptr, buf, uchunk); 1041 /* For file based kexec, source pages are in kernel memory */
1042 if (image->file_mode)
1043 memcpy(ptr, kbuf, uchunk);
1044 else
1045 result = copy_from_user(ptr, buf, uchunk);
811 kunmap(page); 1046 kunmap(page);
812 if (result) { 1047 if (result) {
813 result = -EFAULT; 1048 result = -EFAULT;
@@ -815,7 +1050,10 @@ static int kimage_load_normal_segment(struct kimage *image,
815 } 1050 }
816 ubytes -= uchunk; 1051 ubytes -= uchunk;
817 maddr += mchunk; 1052 maddr += mchunk;
818 buf += mchunk; 1053 if (image->file_mode)
1054 kbuf += mchunk;
1055 else
1056 buf += mchunk;
819 mbytes -= mchunk; 1057 mbytes -= mchunk;
820 } 1058 }
821out: 1059out:
@@ -1062,7 +1300,72 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
1062 unsigned long, cmdline_len, const char __user *, cmdline_ptr, 1300 unsigned long, cmdline_len, const char __user *, cmdline_ptr,
1063 unsigned long, flags) 1301 unsigned long, flags)
1064{ 1302{
1065 return -ENOSYS; 1303 int ret = 0, i;
1304 struct kimage **dest_image, *image;
1305
1306 /* We only trust the superuser with rebooting the system. */
1307 if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
1308 return -EPERM;
1309
1310 /* Make sure we have a legal set of flags */
1311 if (flags != (flags & KEXEC_FILE_FLAGS))
1312 return -EINVAL;
1313
1314 image = NULL;
1315
1316 if (!mutex_trylock(&kexec_mutex))
1317 return -EBUSY;
1318
1319 dest_image = &kexec_image;
1320 if (flags & KEXEC_FILE_ON_CRASH)
1321 dest_image = &kexec_crash_image;
1322
1323 if (flags & KEXEC_FILE_UNLOAD)
1324 goto exchange;
1325
1326 /*
1327 * In case of crash, new kernel gets loaded in reserved region. It is
1328 * same memory where old crash kernel might be loaded. Free any
1329 * current crash dump kernel before we corrupt it.
1330 */
1331 if (flags & KEXEC_FILE_ON_CRASH)
1332 kimage_free(xchg(&kexec_crash_image, NULL));
1333
1334 ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
1335 cmdline_len, flags);
1336 if (ret)
1337 goto out;
1338
1339 ret = machine_kexec_prepare(image);
1340 if (ret)
1341 goto out;
1342
1343 for (i = 0; i < image->nr_segments; i++) {
1344 struct kexec_segment *ksegment;
1345
1346 ksegment = &image->segment[i];
1347 pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
1348 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
1349 ksegment->memsz);
1350
1351 ret = kimage_load_segment(image, &image->segment[i]);
1352 if (ret)
1353 goto out;
1354 }
1355
1356 kimage_terminate(image);
1357
1358 /*
1359 * Free up any temporary buffers allocated which are not needed
1360 * after image has been loaded
1361 */
1362 kimage_file_post_load_cleanup(image);
1363exchange:
1364 image = xchg(dest_image, image);
1365out:
1366 mutex_unlock(&kexec_mutex);
1367 kimage_free(image);
1368 return ret;
1066} 1369}
1067 1370
1068void crash_kexec(struct pt_regs *regs) 1371void crash_kexec(struct pt_regs *regs)
@@ -1620,6 +1923,176 @@ static int __init crash_save_vmcoreinfo_init(void)
1620 1923
1621subsys_initcall(crash_save_vmcoreinfo_init); 1924subsys_initcall(crash_save_vmcoreinfo_init);
1622 1925
1926static int __kexec_add_segment(struct kimage *image, char *buf,
1927 unsigned long bufsz, unsigned long mem,
1928 unsigned long memsz)
1929{
1930 struct kexec_segment *ksegment;
1931
1932 ksegment = &image->segment[image->nr_segments];
1933 ksegment->kbuf = buf;
1934 ksegment->bufsz = bufsz;
1935 ksegment->mem = mem;
1936 ksegment->memsz = memsz;
1937 image->nr_segments++;
1938
1939 return 0;
1940}
1941
1942static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
1943 struct kexec_buf *kbuf)
1944{
1945 struct kimage *image = kbuf->image;
1946 unsigned long temp_start, temp_end;
1947
1948 temp_end = min(end, kbuf->buf_max);
1949 temp_start = temp_end - kbuf->memsz;
1950
1951 do {
1952 /* align down start */
1953 temp_start = temp_start & (~(kbuf->buf_align - 1));
1954
1955 if (temp_start < start || temp_start < kbuf->buf_min)
1956 return 0;
1957
1958 temp_end = temp_start + kbuf->memsz - 1;
1959
1960 /*
1961 * Make sure this does not conflict with any of existing
1962 * segments
1963 */
1964 if (kimage_is_destination_range(image, temp_start, temp_end)) {
1965 temp_start = temp_start - PAGE_SIZE;
1966 continue;
1967 }
1968
1969 /* We found a suitable memory range */
1970 break;
1971 } while (1);
1972
1973 /* If we are here, we found a suitable memory range */
1974 __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
1975 kbuf->memsz);
1976
1977 /* Success, stop navigating through remaining System RAM ranges */
1978 return 1;
1979}
1980
1981static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
1982 struct kexec_buf *kbuf)
1983{
1984 struct kimage *image = kbuf->image;
1985 unsigned long temp_start, temp_end;
1986
1987 temp_start = max(start, kbuf->buf_min);
1988
1989 do {
1990 temp_start = ALIGN(temp_start, kbuf->buf_align);
1991 temp_end = temp_start + kbuf->memsz - 1;
1992
1993 if (temp_end > end || temp_end > kbuf->buf_max)
1994 return 0;
1995 /*
1996 * Make sure this does not conflict with any of existing
1997 * segments
1998 */
1999 if (kimage_is_destination_range(image, temp_start, temp_end)) {
2000 temp_start = temp_start + PAGE_SIZE;
2001 continue;
2002 }
2003
2004 /* We found a suitable memory range */
2005 break;
2006 } while (1);
2007
2008 /* If we are here, we found a suitable memory range */
2009 __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
2010 kbuf->memsz);
2011
2012 /* Success, stop navigating through remaining System RAM ranges */
2013 return 1;
2014}
2015
2016static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
2017{
2018 struct kexec_buf *kbuf = (struct kexec_buf *)arg;
2019 unsigned long sz = end - start + 1;
2020
2021 /* Returning 0 will take to next memory range */
2022 if (sz < kbuf->memsz)
2023 return 0;
2024
2025 if (end < kbuf->buf_min || start > kbuf->buf_max)
2026 return 0;
2027
2028 /*
2029 * Allocate memory top down with-in ram range. Otherwise bottom up
2030 * allocation.
2031 */
2032 if (kbuf->top_down)
2033 return locate_mem_hole_top_down(start, end, kbuf);
2034 return locate_mem_hole_bottom_up(start, end, kbuf);
2035}
2036
2037/*
2038 * Helper function for placing a buffer in a kexec segment. This assumes
2039 * that kexec_mutex is held.
2040 */
2041int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
2042 unsigned long memsz, unsigned long buf_align,
2043 unsigned long buf_min, unsigned long buf_max,
2044 bool top_down, unsigned long *load_addr)
2045{
2046
2047 struct kexec_segment *ksegment;
2048 struct kexec_buf buf, *kbuf;
2049 int ret;
2050
2051 /* Currently adding segment this way is allowed only in file mode */
2052 if (!image->file_mode)
2053 return -EINVAL;
2054
2055 if (image->nr_segments >= KEXEC_SEGMENT_MAX)
2056 return -EINVAL;
2057
2058 /*
2059 * Make sure we are not trying to add buffer after allocating
2060 * control pages. All segments need to be placed first before
2061 * any control pages are allocated. As control page allocation
2062 * logic goes through list of segments to make sure there are
2063 * no destination overlaps.
2064 */
2065 if (!list_empty(&image->control_pages)) {
2066 WARN_ON(1);
2067 return -EINVAL;
2068 }
2069
2070 memset(&buf, 0, sizeof(struct kexec_buf));
2071 kbuf = &buf;
2072 kbuf->image = image;
2073 kbuf->buffer = buffer;
2074 kbuf->bufsz = bufsz;
2075
2076 kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
2077 kbuf->buf_align = max(buf_align, PAGE_SIZE);
2078 kbuf->buf_min = buf_min;
2079 kbuf->buf_max = buf_max;
2080 kbuf->top_down = top_down;
2081
2082 /* Walk the RAM ranges and allocate a suitable range for the buffer */
2083 ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback);
2084 if (ret != 1) {
2085 /* A suitable memory range could not be found for buffer */
2086 return -EADDRNOTAVAIL;
2087 }
2088
2089 /* Found a suitable memory range */
2090 ksegment = &image->segment[image->nr_segments - 1];
2091 *load_addr = ksegment->mem;
2092 return 0;
2093}
2094
2095
1623/* 2096/*
1624 * Move into place and start executing a preloaded standalone 2097 * Move into place and start executing a preloaded standalone
1625 * executable. If nothing was preloaded return an error. 2098 * executable. If nothing was preloaded return an error.