diff options
165 files changed, 4768 insertions, 4353 deletions
@@ -2992,6 +2992,10 @@ S: 2200 Mission College Blvd | |||
2992 | S: Santa Clara, CA 95052 | 2992 | S: Santa Clara, CA 95052 |
2993 | S: USA | 2993 | S: USA |
2994 | 2994 | ||
2995 | N: Anil Ravindranath | ||
2996 | E: anil_ravindranath@pmc-sierra.com | ||
2997 | D: PMC-Sierra MaxRAID driver | ||
2998 | |||
2995 | N: Eric S. Raymond | 2999 | N: Eric S. Raymond |
2996 | E: esr@thyrsus.com | 3000 | E: esr@thyrsus.com |
2997 | W: http://www.tuxedo.org/~esr/ | 3001 | W: http://www.tuxedo.org/~esr/ |
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX index 081c49777abb..6a5e2a102a45 100644 --- a/Documentation/vm/00-INDEX +++ b/Documentation/vm/00-INDEX | |||
@@ -14,6 +14,8 @@ hugetlbpage.txt | |||
14 | - a brief summary of hugetlbpage support in the Linux kernel. | 14 | - a brief summary of hugetlbpage support in the Linux kernel. |
15 | hwpoison.txt | 15 | hwpoison.txt |
16 | - explains what hwpoison is | 16 | - explains what hwpoison is |
17 | idle_page_tracking.txt | ||
18 | - description of the idle page tracking feature. | ||
17 | ksm.txt | 19 | ksm.txt |
18 | - how to use the Kernel Samepage Merging feature. | 20 | - how to use the Kernel Samepage Merging feature. |
19 | numa | 21 | numa |
diff --git a/Documentation/vm/idle_page_tracking.txt b/Documentation/vm/idle_page_tracking.txt new file mode 100644 index 000000000000..85dcc3bb85dc --- /dev/null +++ b/Documentation/vm/idle_page_tracking.txt | |||
@@ -0,0 +1,98 @@ | |||
1 | MOTIVATION | ||
2 | |||
3 | The idle page tracking feature allows to track which memory pages are being | ||
4 | accessed by a workload and which are idle. This information can be useful for | ||
5 | estimating the workload's working set size, which, in turn, can be taken into | ||
6 | account when configuring the workload parameters, setting memory cgroup limits, | ||
7 | or deciding where to place the workload within a compute cluster. | ||
8 | |||
9 | It is enabled by CONFIG_IDLE_PAGE_TRACKING=y. | ||
10 | |||
11 | USER API | ||
12 | |||
13 | The idle page tracking API is located at /sys/kernel/mm/page_idle. Currently, | ||
14 | it consists of the only read-write file, /sys/kernel/mm/page_idle/bitmap. | ||
15 | |||
16 | The file implements a bitmap where each bit corresponds to a memory page. The | ||
17 | bitmap is represented by an array of 8-byte integers, and the page at PFN #i is | ||
18 | mapped to bit #i%64 of array element #i/64, byte order is native. When a bit is | ||
19 | set, the corresponding page is idle. | ||
20 | |||
21 | A page is considered idle if it has not been accessed since it was marked idle | ||
22 | (for more details on what "accessed" actually means see the IMPLEMENTATION | ||
23 | DETAILS section). To mark a page idle one has to set the bit corresponding to | ||
24 | the page by writing to the file. A value written to the file is OR-ed with the | ||
25 | current bitmap value. | ||
26 | |||
27 | Only accesses to user memory pages are tracked. These are pages mapped to a | ||
28 | process address space, page cache and buffer pages, swap cache pages. For other | ||
29 | page types (e.g. SLAB pages) an attempt to mark a page idle is silently ignored, | ||
30 | and hence such pages are never reported idle. | ||
31 | |||
32 | For huge pages the idle flag is set only on the head page, so one has to read | ||
33 | /proc/kpageflags in order to correctly count idle huge pages. | ||
34 | |||
35 | Reading from or writing to /sys/kernel/mm/page_idle/bitmap will return | ||
36 | -EINVAL if you are not starting the read/write on an 8-byte boundary, or | ||
37 | if the size of the read/write is not a multiple of 8 bytes. Writing to | ||
38 | this file beyond max PFN will return -ENXIO. | ||
39 | |||
40 | That said, in order to estimate the amount of pages that are not used by a | ||
41 | workload one should: | ||
42 | |||
43 | 1. Mark all the workload's pages as idle by setting corresponding bits in | ||
44 | /sys/kernel/mm/page_idle/bitmap. The pages can be found by reading | ||
45 | /proc/pid/pagemap if the workload is represented by a process, or by | ||
46 | filtering out alien pages using /proc/kpagecgroup in case the workload is | ||
47 | placed in a memory cgroup. | ||
48 | |||
49 | 2. Wait until the workload accesses its working set. | ||
50 | |||
51 | 3. Read /sys/kernel/mm/page_idle/bitmap and count the number of bits set. If | ||
52 | one wants to ignore certain types of pages, e.g. mlocked pages since they | ||
53 | are not reclaimable, he or she can filter them out using /proc/kpageflags. | ||
54 | |||
55 | See Documentation/vm/pagemap.txt for more information about /proc/pid/pagemap, | ||
56 | /proc/kpageflags, and /proc/kpagecgroup. | ||
57 | |||
58 | IMPLEMENTATION DETAILS | ||
59 | |||
60 | The kernel internally keeps track of accesses to user memory pages in order to | ||
61 | reclaim unreferenced pages first on memory shortage conditions. A page is | ||
62 | considered referenced if it has been recently accessed via a process address | ||
63 | space, in which case one or more PTEs it is mapped to will have the Accessed bit | ||
64 | set, or marked accessed explicitly by the kernel (see mark_page_accessed()). The | ||
65 | latter happens when: | ||
66 | |||
67 | - a userspace process reads or writes a page using a system call (e.g. read(2) | ||
68 | or write(2)) | ||
69 | |||
70 | - a page that is used for storing filesystem buffers is read or written, | ||
71 | because a process needs filesystem metadata stored in it (e.g. lists a | ||
72 | directory tree) | ||
73 | |||
74 | - a page is accessed by a device driver using get_user_pages() | ||
75 | |||
76 | When a dirty page is written to swap or disk as a result of memory reclaim or | ||
77 | exceeding the dirty memory limit, it is not marked referenced. | ||
78 | |||
79 | The idle memory tracking feature adds a new page flag, the Idle flag. This flag | ||
80 | is set manually, by writing to /sys/kernel/mm/page_idle/bitmap (see the USER API | ||
81 | section), and cleared automatically whenever a page is referenced as defined | ||
82 | above. | ||
83 | |||
84 | When a page is marked idle, the Accessed bit must be cleared in all PTEs it is | ||
85 | mapped to, otherwise we will not be able to detect accesses to the page coming | ||
86 | from a process address space. To avoid interference with the reclaimer, which, | ||
87 | as noted above, uses the Accessed bit to promote actively referenced pages, one | ||
88 | more page flag is introduced, the Young flag. When the PTE Accessed bit is | ||
89 | cleared as a result of setting or updating a page's Idle flag, the Young flag | ||
90 | is set on the page. The reclaimer treats the Young flag as an extra PTE | ||
91 | Accessed bit and therefore will consider such a page as referenced. | ||
92 | |||
93 | Since the idle memory tracking feature is based on the memory reclaimer logic, | ||
94 | it only works with pages that are on an LRU list, other pages are silently | ||
95 | ignored. That means it will ignore a user memory page if it is isolated, but | ||
96 | since there are usually not many of them, it should not affect the overall | ||
97 | result noticeably. In order not to stall scanning of the idle page bitmap, | ||
98 | locked pages may be skipped too. | ||
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index 3cd38438242a..0e1e55588b59 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt | |||
@@ -5,7 +5,7 @@ pagemap is a new (as of 2.6.25) set of interfaces in the kernel that allow | |||
5 | userspace programs to examine the page tables and related information by | 5 | userspace programs to examine the page tables and related information by |
6 | reading files in /proc. | 6 | reading files in /proc. |
7 | 7 | ||
8 | There are three components to pagemap: | 8 | There are four components to pagemap: |
9 | 9 | ||
10 | * /proc/pid/pagemap. This file lets a userspace process find out which | 10 | * /proc/pid/pagemap. This file lets a userspace process find out which |
11 | physical frame each virtual page is mapped to. It contains one 64-bit | 11 | physical frame each virtual page is mapped to. It contains one 64-bit |
@@ -70,6 +70,11 @@ There are three components to pagemap: | |||
70 | 22. THP | 70 | 22. THP |
71 | 23. BALLOON | 71 | 23. BALLOON |
72 | 24. ZERO_PAGE | 72 | 24. ZERO_PAGE |
73 | 25. IDLE | ||
74 | |||
75 | * /proc/kpagecgroup. This file contains a 64-bit inode number of the | ||
76 | memory cgroup each page is charged to, indexed by PFN. Only available when | ||
77 | CONFIG_MEMCG is set. | ||
73 | 78 | ||
74 | Short descriptions to the page flags: | 79 | Short descriptions to the page flags: |
75 | 80 | ||
@@ -116,6 +121,12 @@ Short descriptions to the page flags: | |||
116 | 24. ZERO_PAGE | 121 | 24. ZERO_PAGE |
117 | zero page for pfn_zero or huge_zero page | 122 | zero page for pfn_zero or huge_zero page |
118 | 123 | ||
124 | 25. IDLE | ||
125 | page has not been accessed since it was marked idle (see | ||
126 | Documentation/vm/idle_page_tracking.txt). Note that this flag may be | ||
127 | stale in case the page was accessed via a PTE. To make sure the flag | ||
128 | is up-to-date one has to read /sys/kernel/mm/page_idle/bitmap first. | ||
129 | |||
119 | [IO related page flags] | 130 | [IO related page flags] |
120 | 1. ERROR IO error occurred | 131 | 1. ERROR IO error occurred |
121 | 3. UPTODATE page has up-to-date data | 132 | 3. UPTODATE page has up-to-date data |
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt index 8458c0861e4e..89fff7d611cc 100644 --- a/Documentation/vm/zswap.txt +++ b/Documentation/vm/zswap.txt | |||
@@ -32,7 +32,7 @@ can also be enabled and disabled at runtime using the sysfs interface. | |||
32 | An example command to enable zswap at runtime, assuming sysfs is mounted | 32 | An example command to enable zswap at runtime, assuming sysfs is mounted |
33 | at /sys, is: | 33 | at /sys, is: |
34 | 34 | ||
35 | echo 1 > /sys/modules/zswap/parameters/enabled | 35 | echo 1 > /sys/module/zswap/parameters/enabled |
36 | 36 | ||
37 | When zswap is disabled at runtime it will stop storing pages that are | 37 | When zswap is disabled at runtime it will stop storing pages that are |
38 | being swapped out. However, it will _not_ immediately write out or fault | 38 | being swapped out. However, it will _not_ immediately write out or fault |
@@ -49,14 +49,26 @@ Zswap receives pages for compression through the Frontswap API and is able to | |||
49 | evict pages from its own compressed pool on an LRU basis and write them back to | 49 | evict pages from its own compressed pool on an LRU basis and write them back to |
50 | the backing swap device in the case that the compressed pool is full. | 50 | the backing swap device in the case that the compressed pool is full. |
51 | 51 | ||
52 | Zswap makes use of zbud for the managing the compressed memory pool. Each | 52 | Zswap makes use of zpool for the managing the compressed memory pool. Each |
53 | allocation in zbud is not directly accessible by address. Rather, a handle is | 53 | allocation in zpool is not directly accessible by address. Rather, a handle is |
54 | returned by the allocation routine and that handle must be mapped before being | 54 | returned by the allocation routine and that handle must be mapped before being |
55 | accessed. The compressed memory pool grows on demand and shrinks as compressed | 55 | accessed. The compressed memory pool grows on demand and shrinks as compressed |
56 | pages are freed. The pool is not preallocated. | 56 | pages are freed. The pool is not preallocated. By default, a zpool of type |
57 | zbud is created, but it can be selected at boot time by setting the "zpool" | ||
58 | attribute, e.g. zswap.zpool=zbud. It can also be changed at runtime using the | ||
59 | sysfs "zpool" attribute, e.g. | ||
60 | |||
61 | echo zbud > /sys/module/zswap/parameters/zpool | ||
62 | |||
63 | The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which | ||
64 | means the compression ratio will always be 2:1 or worse (because of half-full | ||
65 | zbud pages). The zsmalloc type zpool has a more complex compressed page | ||
66 | storage method, and it can achieve greater storage densities. However, | ||
67 | zsmalloc does not implement compressed page eviction, so once zswap fills it | ||
68 | cannot evict the oldest page, it can only reject new pages. | ||
57 | 69 | ||
58 | When a swap page is passed from frontswap to zswap, zswap maintains a mapping | 70 | When a swap page is passed from frontswap to zswap, zswap maintains a mapping |
59 | of the swap entry, a combination of the swap type and swap offset, to the zbud | 71 | of the swap entry, a combination of the swap type and swap offset, to the zpool |
60 | handle that references that compressed swap page. This mapping is achieved | 72 | handle that references that compressed swap page. This mapping is achieved |
61 | with a red-black tree per swap type. The swap offset is the search key for the | 73 | with a red-black tree per swap type. The swap offset is the search key for the |
62 | tree nodes. | 74 | tree nodes. |
@@ -74,9 +86,17 @@ controlled policy: | |||
74 | * max_pool_percent - The maximum percentage of memory that the compressed | 86 | * max_pool_percent - The maximum percentage of memory that the compressed |
75 | pool can occupy. | 87 | pool can occupy. |
76 | 88 | ||
77 | Zswap allows the compressor to be selected at kernel boot time by setting the | 89 | The default compressor is lzo, but it can be selected at boot time by setting |
78 | “compressor” attribute. The default compressor is lzo. e.g. | 90 | the “compressor” attribute, e.g. zswap.compressor=lzo. It can also be changed |
79 | zswap.compressor=deflate | 91 | at runtime using the sysfs "compressor" attribute, e.g. |
92 | |||
93 | echo lzo > /sys/module/zswap/parameters/compressor | ||
94 | |||
95 | When the zpool and/or compressor parameter is changed at runtime, any existing | ||
96 | compressed pages are not modified; they are left in their own zpool. When a | ||
97 | request is made for a page in an old zpool, it is uncompressed using its | ||
98 | original compressor. Once all pages are removed from an old zpool, the zpool | ||
99 | and its compressor are freed. | ||
80 | 100 | ||
81 | A debugfs interface is provided for various statistic about pool size, number | 101 | A debugfs interface is provided for various statistic about pool size, number |
82 | of pages stored, and various counters for the reasons pages are rejected. | 102 | of pages stored, and various counters for the reasons pages are rejected. |
diff --git a/MAINTAINERS b/MAINTAINERS index 67a4443daed9..310da4295c70 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -8199,10 +8199,9 @@ F: drivers/hwmon/pmbus/ | |||
8199 | F: include/linux/i2c/pmbus.h | 8199 | F: include/linux/i2c/pmbus.h |
8200 | 8200 | ||
8201 | PMC SIERRA MaxRAID DRIVER | 8201 | PMC SIERRA MaxRAID DRIVER |
8202 | M: Anil Ravindranath <anil_ravindranath@pmc-sierra.com> | ||
8203 | L: linux-scsi@vger.kernel.org | 8202 | L: linux-scsi@vger.kernel.org |
8204 | W: http://www.pmc-sierra.com/ | 8203 | W: http://www.pmc-sierra.com/ |
8205 | S: Supported | 8204 | S: Orphan |
8206 | F: drivers/scsi/pmcraid.* | 8205 | F: drivers/scsi/pmcraid.* |
8207 | 8206 | ||
8208 | PMC SIERRA PM8001 DRIVER | 8207 | PMC SIERRA PM8001 DRIVER |
diff --git a/arch/Kconfig b/arch/Kconfig index 8f3564930580..4e949e58b192 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -2,6 +2,9 @@ | |||
2 | # General architecture dependent options | 2 | # General architecture dependent options |
3 | # | 3 | # |
4 | 4 | ||
5 | config KEXEC_CORE | ||
6 | bool | ||
7 | |||
5 | config OPROFILE | 8 | config OPROFILE |
6 | tristate "OProfile system profiling" | 9 | tristate "OProfile system profiling" |
7 | depends on PROFILING | 10 | depends on PROFILING |
diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index dfa32f061320..72a8ca7796d9 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h | |||
@@ -12,42 +12,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
12 | 12 | ||
13 | #include <asm-generic/dma-mapping-common.h> | 13 | #include <asm-generic/dma-mapping-common.h> |
14 | 14 | ||
15 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
16 | |||
17 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
18 | dma_addr_t *dma_handle, gfp_t gfp, | ||
19 | struct dma_attrs *attrs) | ||
20 | { | ||
21 | return get_dma_ops(dev)->alloc(dev, size, dma_handle, gfp, attrs); | ||
22 | } | ||
23 | |||
24 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
25 | |||
26 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
27 | void *vaddr, dma_addr_t dma_handle, | ||
28 | struct dma_attrs *attrs) | ||
29 | { | ||
30 | get_dma_ops(dev)->free(dev, size, vaddr, dma_handle, attrs); | ||
31 | } | ||
32 | |||
33 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
34 | { | ||
35 | return get_dma_ops(dev)->mapping_error(dev, dma_addr); | ||
36 | } | ||
37 | |||
38 | static inline int dma_supported(struct device *dev, u64 mask) | ||
39 | { | ||
40 | return get_dma_ops(dev)->dma_supported(dev, mask); | ||
41 | } | ||
42 | |||
43 | static inline int dma_set_mask(struct device *dev, u64 mask) | ||
44 | { | ||
45 | return get_dma_ops(dev)->set_dma_mask(dev, mask); | ||
46 | } | ||
47 | |||
48 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
49 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
50 | |||
51 | #define dma_cache_sync(dev, va, size, dir) ((void)0) | 15 | #define dma_cache_sync(dev, va, size, dir) ((void)0) |
52 | 16 | ||
53 | #endif /* _ALPHA_DMA_MAPPING_H */ | 17 | #endif /* _ALPHA_DMA_MAPPING_H */ |
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index df24b76f9246..2b1f4a1e9272 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c | |||
@@ -166,15 +166,6 @@ static int alpha_noop_supported(struct device *dev, u64 mask) | |||
166 | return mask < 0x00ffffffUL ? 0 : 1; | 166 | return mask < 0x00ffffffUL ? 0 : 1; |
167 | } | 167 | } |
168 | 168 | ||
169 | static int alpha_noop_set_mask(struct device *dev, u64 mask) | ||
170 | { | ||
171 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
172 | return -EIO; | ||
173 | |||
174 | *dev->dma_mask = mask; | ||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | struct dma_map_ops alpha_noop_ops = { | 169 | struct dma_map_ops alpha_noop_ops = { |
179 | .alloc = alpha_noop_alloc_coherent, | 170 | .alloc = alpha_noop_alloc_coherent, |
180 | .free = alpha_noop_free_coherent, | 171 | .free = alpha_noop_free_coherent, |
@@ -182,7 +173,6 @@ struct dma_map_ops alpha_noop_ops = { | |||
182 | .map_sg = alpha_noop_map_sg, | 173 | .map_sg = alpha_noop_map_sg, |
183 | .mapping_error = alpha_noop_mapping_error, | 174 | .mapping_error = alpha_noop_mapping_error, |
184 | .dma_supported = alpha_noop_supported, | 175 | .dma_supported = alpha_noop_supported, |
185 | .set_dma_mask = alpha_noop_set_mask, | ||
186 | }; | 176 | }; |
187 | 177 | ||
188 | struct dma_map_ops *dma_ops = &alpha_noop_ops; | 178 | struct dma_map_ops *dma_ops = &alpha_noop_ops; |
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index eddee7720343..8969bf2dfe3a 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c | |||
@@ -939,16 +939,6 @@ static int alpha_pci_mapping_error(struct device *dev, dma_addr_t dma_addr) | |||
939 | return dma_addr == 0; | 939 | return dma_addr == 0; |
940 | } | 940 | } |
941 | 941 | ||
942 | static int alpha_pci_set_mask(struct device *dev, u64 mask) | ||
943 | { | ||
944 | if (!dev->dma_mask || | ||
945 | !pci_dma_supported(alpha_gendev_to_pci(dev), mask)) | ||
946 | return -EIO; | ||
947 | |||
948 | *dev->dma_mask = mask; | ||
949 | return 0; | ||
950 | } | ||
951 | |||
952 | struct dma_map_ops alpha_pci_ops = { | 942 | struct dma_map_ops alpha_pci_ops = { |
953 | .alloc = alpha_pci_alloc_coherent, | 943 | .alloc = alpha_pci_alloc_coherent, |
954 | .free = alpha_pci_free_coherent, | 944 | .free = alpha_pci_free_coherent, |
@@ -958,7 +948,6 @@ struct dma_map_ops alpha_pci_ops = { | |||
958 | .unmap_sg = alpha_pci_unmap_sg, | 948 | .unmap_sg = alpha_pci_unmap_sg, |
959 | .mapping_error = alpha_pci_mapping_error, | 949 | .mapping_error = alpha_pci_mapping_error, |
960 | .dma_supported = alpha_pci_supported, | 950 | .dma_supported = alpha_pci_supported, |
961 | .set_dma_mask = alpha_pci_set_mask, | ||
962 | }; | 951 | }; |
963 | 952 | ||
964 | struct dma_map_ops *dma_ops = &alpha_pci_ops; | 953 | struct dma_map_ops *dma_ops = &alpha_pci_ops; |
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0d1b717e1eca..72ad724c67ae 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -2020,6 +2020,7 @@ config KEXEC | |||
2020 | bool "Kexec system call (EXPERIMENTAL)" | 2020 | bool "Kexec system call (EXPERIMENTAL)" |
2021 | depends on (!SMP || PM_SLEEP_SMP) | 2021 | depends on (!SMP || PM_SLEEP_SMP) |
2022 | depends on !CPU_V7M | 2022 | depends on !CPU_V7M |
2023 | select KEXEC_CORE | ||
2023 | help | 2024 | help |
2024 | kexec is a system call that implements the ability to shutdown your | 2025 | kexec is a system call that implements the ability to shutdown your |
2025 | current kernel, and to start another kernel. It is like a reboot | 2026 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index bd245d34952d..a0765e7ed6c7 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c | |||
@@ -57,5 +57,5 @@ extern char * strstr(const char * s1, const char *s2); | |||
57 | 57 | ||
58 | int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) | 58 | int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) |
59 | { | 59 | { |
60 | return decompress(input, len, NULL, NULL, output, NULL, error); | 60 | return __decompress(input, len, NULL, NULL, output, 0, NULL, error); |
61 | } | 61 | } |
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index a68b9d8a71fe..ccb3aa64640d 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/dma-attrs.h> | 8 | #include <linux/dma-attrs.h> |
9 | #include <linux/dma-debug.h> | 9 | #include <linux/dma-debug.h> |
10 | 10 | ||
11 | #include <asm-generic/dma-coherent.h> | ||
12 | #include <asm/memory.h> | 11 | #include <asm/memory.h> |
13 | 12 | ||
14 | #include <xen/xen.h> | 13 | #include <xen/xen.h> |
@@ -39,12 +38,15 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) | |||
39 | dev->archdata.dma_ops = ops; | 38 | dev->archdata.dma_ops = ops; |
40 | } | 39 | } |
41 | 40 | ||
42 | #include <asm-generic/dma-mapping-common.h> | 41 | #define HAVE_ARCH_DMA_SUPPORTED 1 |
42 | extern int dma_supported(struct device *dev, u64 mask); | ||
43 | 43 | ||
44 | static inline int dma_set_mask(struct device *dev, u64 mask) | 44 | /* |
45 | { | 45 | * Note that while the generic code provides dummy dma_{alloc,free}_noncoherent |
46 | return get_dma_ops(dev)->set_dma_mask(dev, mask); | 46 | * implementations, we don't provide a dma_cache_sync function so drivers using |
47 | } | 47 | * this API are highlighted with build warnings. |
48 | */ | ||
49 | #include <asm-generic/dma-mapping-common.h> | ||
48 | 50 | ||
49 | #ifdef __arch_page_to_dma | 51 | #ifdef __arch_page_to_dma |
50 | #error Please update to __arch_pfn_to_dma | 52 | #error Please update to __arch_pfn_to_dma |
@@ -167,32 +169,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | |||
167 | 169 | ||
168 | static inline void dma_mark_clean(void *addr, size_t size) { } | 170 | static inline void dma_mark_clean(void *addr, size_t size) { } |
169 | 171 | ||
170 | /* | ||
171 | * DMA errors are defined by all-bits-set in the DMA address. | ||
172 | */ | ||
173 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
174 | { | ||
175 | debug_dma_mapping_error(dev, dma_addr); | ||
176 | return dma_addr == DMA_ERROR_CODE; | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * Dummy noncoherent implementation. We don't provide a dma_cache_sync | ||
181 | * function so drivers using this API are highlighted with build warnings. | ||
182 | */ | ||
183 | static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, | ||
184 | dma_addr_t *handle, gfp_t gfp) | ||
185 | { | ||
186 | return NULL; | ||
187 | } | ||
188 | |||
189 | static inline void dma_free_noncoherent(struct device *dev, size_t size, | ||
190 | void *cpu_addr, dma_addr_t handle) | ||
191 | { | ||
192 | } | ||
193 | |||
194 | extern int dma_supported(struct device *dev, u64 mask); | ||
195 | |||
196 | extern int arm_dma_set_mask(struct device *dev, u64 dma_mask); | 172 | extern int arm_dma_set_mask(struct device *dev, u64 dma_mask); |
197 | 173 | ||
198 | /** | 174 | /** |
@@ -209,21 +185,6 @@ extern int arm_dma_set_mask(struct device *dev, u64 dma_mask); | |||
209 | extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | 185 | extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, |
210 | gfp_t gfp, struct dma_attrs *attrs); | 186 | gfp_t gfp, struct dma_attrs *attrs); |
211 | 187 | ||
212 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
213 | |||
214 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
215 | dma_addr_t *dma_handle, gfp_t flag, | ||
216 | struct dma_attrs *attrs) | ||
217 | { | ||
218 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
219 | void *cpu_addr; | ||
220 | BUG_ON(!ops); | ||
221 | |||
222 | cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
223 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
224 | return cpu_addr; | ||
225 | } | ||
226 | |||
227 | /** | 188 | /** |
228 | * arm_dma_free - free memory allocated by arm_dma_alloc | 189 | * arm_dma_free - free memory allocated by arm_dma_alloc |
229 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 190 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
@@ -241,19 +202,6 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, | |||
241 | extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, | 202 | extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, |
242 | dma_addr_t handle, struct dma_attrs *attrs); | 203 | dma_addr_t handle, struct dma_attrs *attrs); |
243 | 204 | ||
244 | #define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) | ||
245 | |||
246 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
247 | void *cpu_addr, dma_addr_t dma_handle, | ||
248 | struct dma_attrs *attrs) | ||
249 | { | ||
250 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
251 | BUG_ON(!ops); | ||
252 | |||
253 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
254 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
255 | } | ||
256 | |||
257 | /** | 205 | /** |
258 | * arm_dma_mmap - map a coherent DMA allocation into user space | 206 | * arm_dma_mmap - map a coherent DMA allocation into user space |
259 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices | 207 | * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices |
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index bf35abcc7d59..e62604384945 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c | |||
@@ -676,10 +676,6 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | |||
676 | gfp_t gfp, struct dma_attrs *attrs) | 676 | gfp_t gfp, struct dma_attrs *attrs) |
677 | { | 677 | { |
678 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); | 678 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); |
679 | void *memory; | ||
680 | |||
681 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) | ||
682 | return memory; | ||
683 | 679 | ||
684 | return __dma_alloc(dev, size, handle, gfp, prot, false, | 680 | return __dma_alloc(dev, size, handle, gfp, prot, false, |
685 | attrs, __builtin_return_address(0)); | 681 | attrs, __builtin_return_address(0)); |
@@ -688,11 +684,6 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, | |||
688 | static void *arm_coherent_dma_alloc(struct device *dev, size_t size, | 684 | static void *arm_coherent_dma_alloc(struct device *dev, size_t size, |
689 | dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) | 685 | dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) |
690 | { | 686 | { |
691 | void *memory; | ||
692 | |||
693 | if (dma_alloc_from_coherent(dev, size, handle, &memory)) | ||
694 | return memory; | ||
695 | |||
696 | return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, | 687 | return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, |
697 | attrs, __builtin_return_address(0)); | 688 | attrs, __builtin_return_address(0)); |
698 | } | 689 | } |
@@ -752,9 +743,6 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, | |||
752 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); | 743 | struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); |
753 | bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); | 744 | bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); |
754 | 745 | ||
755 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) | ||
756 | return; | ||
757 | |||
758 | size = PAGE_ALIGN(size); | 746 | size = PAGE_ALIGN(size); |
759 | 747 | ||
760 | if (nommu()) { | 748 | if (nommu()) { |
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index f0d6d0bfe55c..cfdb34bedbcd 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h | |||
@@ -22,8 +22,6 @@ | |||
22 | #include <linux/types.h> | 22 | #include <linux/types.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | 24 | ||
25 | #include <asm-generic/dma-coherent.h> | ||
26 | |||
27 | #include <xen/xen.h> | 25 | #include <xen/xen.h> |
28 | #include <asm/xen/hypervisor.h> | 26 | #include <asm/xen/hypervisor.h> |
29 | 27 | ||
@@ -86,28 +84,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) | |||
86 | return (phys_addr_t)dev_addr; | 84 | return (phys_addr_t)dev_addr; |
87 | } | 85 | } |
88 | 86 | ||
89 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr) | ||
90 | { | ||
91 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
92 | debug_dma_mapping_error(dev, dev_addr); | ||
93 | return ops->mapping_error(dev, dev_addr); | ||
94 | } | ||
95 | |||
96 | static inline int dma_supported(struct device *dev, u64 mask) | ||
97 | { | ||
98 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
99 | return ops->dma_supported(dev, mask); | ||
100 | } | ||
101 | |||
102 | static inline int dma_set_mask(struct device *dev, u64 mask) | ||
103 | { | ||
104 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
105 | return -EIO; | ||
106 | *dev->dma_mask = mask; | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | 87 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) |
112 | { | 88 | { |
113 | if (!dev->dma_mask) | 89 | if (!dev->dma_mask) |
@@ -120,50 +96,5 @@ static inline void dma_mark_clean(void *addr, size_t size) | |||
120 | { | 96 | { |
121 | } | 97 | } |
122 | 98 | ||
123 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
124 | #define dma_free_coherent(d, s, h, f) dma_free_attrs(d, s, h, f, NULL) | ||
125 | |||
126 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
127 | dma_addr_t *dma_handle, gfp_t flags, | ||
128 | struct dma_attrs *attrs) | ||
129 | { | ||
130 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
131 | void *vaddr; | ||
132 | |||
133 | if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr)) | ||
134 | return vaddr; | ||
135 | |||
136 | vaddr = ops->alloc(dev, size, dma_handle, flags, attrs); | ||
137 | debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr); | ||
138 | return vaddr; | ||
139 | } | ||
140 | |||
141 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
142 | void *vaddr, dma_addr_t dev_addr, | ||
143 | struct dma_attrs *attrs) | ||
144 | { | ||
145 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
146 | |||
147 | if (dma_release_from_coherent(dev, get_order(size), vaddr)) | ||
148 | return; | ||
149 | |||
150 | debug_dma_free_coherent(dev, size, vaddr, dev_addr); | ||
151 | ops->free(dev, size, vaddr, dev_addr, attrs); | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * There is no dma_cache_sync() implementation, so just return NULL here. | ||
156 | */ | ||
157 | static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, | ||
158 | dma_addr_t *handle, gfp_t flags) | ||
159 | { | ||
160 | return NULL; | ||
161 | } | ||
162 | |||
163 | static inline void dma_free_noncoherent(struct device *dev, size_t size, | ||
164 | void *cpu_addr, dma_addr_t handle) | ||
165 | { | ||
166 | } | ||
167 | |||
168 | #endif /* __KERNEL__ */ | 99 | #endif /* __KERNEL__ */ |
169 | #endif /* __ASM_DMA_MAPPING_H */ | 100 | #endif /* __ASM_DMA_MAPPING_H */ |
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c index 704274127c07..c4f2cfcb117b 100644 --- a/arch/h8300/boot/compressed/misc.c +++ b/arch/h8300/boot/compressed/misc.c | |||
@@ -70,5 +70,5 @@ void decompress_kernel(void) | |||
70 | free_mem_ptr = (unsigned long)&_end; | 70 | free_mem_ptr = (unsigned long)&_end; |
71 | free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; | 71 | free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; |
72 | 72 | ||
73 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 73 | __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); |
74 | } | 74 | } |
diff --git a/arch/h8300/include/asm/dma-mapping.h b/arch/h8300/include/asm/dma-mapping.h index 6e67a90902f2..d9b5b806afe6 100644 --- a/arch/h8300/include/asm/dma-mapping.h +++ b/arch/h8300/include/asm/dma-mapping.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _H8300_DMA_MAPPING_H | 1 | #ifndef _H8300_DMA_MAPPING_H |
2 | #define _H8300_DMA_MAPPING_H | 2 | #define _H8300_DMA_MAPPING_H |
3 | 3 | ||
4 | #include <asm-generic/dma-coherent.h> | ||
5 | |||
6 | extern struct dma_map_ops h8300_dma_map_ops; | 4 | extern struct dma_map_ops h8300_dma_map_ops; |
7 | 5 | ||
8 | static inline struct dma_map_ops *get_dma_ops(struct device *dev) | 6 | static inline struct dma_map_ops *get_dma_ops(struct device *dev) |
@@ -12,46 +10,4 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
12 | 10 | ||
13 | #include <asm-generic/dma-mapping-common.h> | 11 | #include <asm-generic/dma-mapping-common.h> |
14 | 12 | ||
15 | static inline int dma_supported(struct device *dev, u64 mask) | ||
16 | { | ||
17 | return 0; | ||
18 | } | ||
19 | |||
20 | static inline int dma_set_mask(struct device *dev, u64 mask) | ||
21 | { | ||
22 | return 0; | ||
23 | } | ||
24 | |||
25 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
26 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
27 | |||
28 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
29 | |||
30 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
31 | dma_addr_t *dma_handle, gfp_t flag, | ||
32 | struct dma_attrs *attrs) | ||
33 | { | ||
34 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
35 | void *memory; | ||
36 | |||
37 | memory = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
38 | return memory; | ||
39 | } | ||
40 | |||
41 | #define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) | ||
42 | |||
43 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
44 | void *cpu_addr, dma_addr_t dma_handle, | ||
45 | struct dma_attrs *attrs) | ||
46 | { | ||
47 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
48 | |||
49 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
50 | } | ||
51 | |||
52 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | #endif | 13 | #endif |
diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h index 16965427f6b4..268fde8a4575 100644 --- a/arch/hexagon/include/asm/dma-mapping.h +++ b/arch/hexagon/include/asm/dma-mapping.h | |||
@@ -31,12 +31,10 @@ | |||
31 | 31 | ||
32 | struct device; | 32 | struct device; |
33 | extern int bad_dma_address; | 33 | extern int bad_dma_address; |
34 | #define DMA_ERROR_CODE bad_dma_address | ||
34 | 35 | ||
35 | extern struct dma_map_ops *dma_ops; | 36 | extern struct dma_map_ops *dma_ops; |
36 | 37 | ||
37 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
38 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
39 | |||
40 | static inline struct dma_map_ops *get_dma_ops(struct device *dev) | 38 | static inline struct dma_map_ops *get_dma_ops(struct device *dev) |
41 | { | 39 | { |
42 | if (unlikely(dev == NULL)) | 40 | if (unlikely(dev == NULL)) |
@@ -45,8 +43,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
45 | return dma_ops; | 43 | return dma_ops; |
46 | } | 44 | } |
47 | 45 | ||
46 | #define HAVE_ARCH_DMA_SUPPORTED 1 | ||
48 | extern int dma_supported(struct device *dev, u64 mask); | 47 | extern int dma_supported(struct device *dev, u64 mask); |
49 | extern int dma_set_mask(struct device *dev, u64 mask); | ||
50 | extern int dma_is_consistent(struct device *dev, dma_addr_t dma_handle); | 48 | extern int dma_is_consistent(struct device *dev, dma_addr_t dma_handle); |
51 | extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 49 | extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
52 | enum dma_data_direction direction); | 50 | enum dma_data_direction direction); |
@@ -60,47 +58,4 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | |||
60 | return addr + size - 1 <= *dev->dma_mask; | 58 | return addr + size - 1 <= *dev->dma_mask; |
61 | } | 59 | } |
62 | 60 | ||
63 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
64 | { | ||
65 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
66 | |||
67 | if (dma_ops->mapping_error) | ||
68 | return dma_ops->mapping_error(dev, dma_addr); | ||
69 | |||
70 | return (dma_addr == bad_dma_address); | ||
71 | } | ||
72 | |||
73 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
74 | |||
75 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
76 | dma_addr_t *dma_handle, gfp_t flag, | ||
77 | struct dma_attrs *attrs) | ||
78 | { | ||
79 | void *ret; | ||
80 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
81 | |||
82 | BUG_ON(!dma_ops); | ||
83 | |||
84 | ret = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
85 | |||
86 | debug_dma_alloc_coherent(dev, size, *dma_handle, ret); | ||
87 | |||
88 | return ret; | ||
89 | } | ||
90 | |||
91 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
92 | |||
93 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
94 | void *cpu_addr, dma_addr_t dma_handle, | ||
95 | struct dma_attrs *attrs) | ||
96 | { | ||
97 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
98 | |||
99 | BUG_ON(!dma_ops); | ||
100 | |||
101 | dma_ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
102 | |||
103 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
104 | } | ||
105 | |||
106 | #endif | 61 | #endif |
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index b74f9bae31a3..9e3ddf792bd3 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c | |||
@@ -44,17 +44,6 @@ int dma_supported(struct device *dev, u64 mask) | |||
44 | } | 44 | } |
45 | EXPORT_SYMBOL(dma_supported); | 45 | EXPORT_SYMBOL(dma_supported); |
46 | 46 | ||
47 | int dma_set_mask(struct device *dev, u64 mask) | ||
48 | { | ||
49 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
50 | return -EIO; | ||
51 | |||
52 | *dev->dma_mask = mask; | ||
53 | |||
54 | return 0; | ||
55 | } | ||
56 | EXPORT_SYMBOL(dma_set_mask); | ||
57 | |||
58 | static struct gen_pool *coherent_pool; | 47 | static struct gen_pool *coherent_pool; |
59 | 48 | ||
60 | 49 | ||
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 42a91a7aa2b0..eb0249e37981 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -518,6 +518,7 @@ source "drivers/sn/Kconfig" | |||
518 | config KEXEC | 518 | config KEXEC |
519 | bool "kexec system call" | 519 | bool "kexec system call" |
520 | depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) | 520 | depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) |
521 | select KEXEC_CORE | ||
521 | help | 522 | help |
522 | kexec is a system call that implements the ability to shutdown your | 523 | kexec is a system call that implements the ability to shutdown your |
523 | current kernel, and to start another kernel. It is like a reboot | 524 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index cf3ab7e784b5..9beccf8010bd 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h | |||
@@ -23,60 +23,10 @@ extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t, | |||
23 | extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int, | 23 | extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int, |
24 | enum dma_data_direction); | 24 | enum dma_data_direction); |
25 | 25 | ||
26 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
27 | |||
28 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
29 | dma_addr_t *daddr, gfp_t gfp, | ||
30 | struct dma_attrs *attrs) | ||
31 | { | ||
32 | struct dma_map_ops *ops = platform_dma_get_ops(dev); | ||
33 | void *caddr; | ||
34 | |||
35 | caddr = ops->alloc(dev, size, daddr, gfp, attrs); | ||
36 | debug_dma_alloc_coherent(dev, size, *daddr, caddr); | ||
37 | return caddr; | ||
38 | } | ||
39 | |||
40 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
41 | |||
42 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
43 | void *caddr, dma_addr_t daddr, | ||
44 | struct dma_attrs *attrs) | ||
45 | { | ||
46 | struct dma_map_ops *ops = platform_dma_get_ops(dev); | ||
47 | debug_dma_free_coherent(dev, size, caddr, daddr); | ||
48 | ops->free(dev, size, caddr, daddr, attrs); | ||
49 | } | ||
50 | |||
51 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
52 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
53 | |||
54 | #define get_dma_ops(dev) platform_dma_get_ops(dev) | 26 | #define get_dma_ops(dev) platform_dma_get_ops(dev) |
55 | 27 | ||
56 | #include <asm-generic/dma-mapping-common.h> | 28 | #include <asm-generic/dma-mapping-common.h> |
57 | 29 | ||
58 | static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) | ||
59 | { | ||
60 | struct dma_map_ops *ops = platform_dma_get_ops(dev); | ||
61 | debug_dma_mapping_error(dev, daddr); | ||
62 | return ops->mapping_error(dev, daddr); | ||
63 | } | ||
64 | |||
65 | static inline int dma_supported(struct device *dev, u64 mask) | ||
66 | { | ||
67 | struct dma_map_ops *ops = platform_dma_get_ops(dev); | ||
68 | return ops->dma_supported(dev, mask); | ||
69 | } | ||
70 | |||
71 | static inline int | ||
72 | dma_set_mask (struct device *dev, u64 mask) | ||
73 | { | ||
74 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
75 | return -EIO; | ||
76 | *dev->dma_mask = mask; | ||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | 30 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) |
81 | { | 31 | { |
82 | if (!dev->dma_mask) | 32 | if (!dev->dma_mask) |
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c index 28a09529f206..3a7692745868 100644 --- a/arch/m32r/boot/compressed/misc.c +++ b/arch/m32r/boot/compressed/misc.c | |||
@@ -86,6 +86,7 @@ decompress_kernel(int mmu_on, unsigned char *zimage_data, | |||
86 | free_mem_end_ptr = free_mem_ptr + BOOT_HEAP_SIZE; | 86 | free_mem_end_ptr = free_mem_ptr + BOOT_HEAP_SIZE; |
87 | 87 | ||
88 | puts("\nDecompressing Linux... "); | 88 | puts("\nDecompressing Linux... "); |
89 | decompress(input_data, input_len, NULL, NULL, output_data, NULL, error); | 89 | __decompress(input_data, input_len, NULL, NULL, output_data, 0, |
90 | NULL, error); | ||
90 | puts("done.\nBooting the kernel.\n"); | 91 | puts("done.\nBooting the kernel.\n"); |
91 | } | 92 | } |
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 2dd8f63bfbbb..498b567f007b 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig | |||
@@ -95,6 +95,7 @@ config MMU_SUN3 | |||
95 | config KEXEC | 95 | config KEXEC |
96 | bool "kexec system call" | 96 | bool "kexec system call" |
97 | depends on M68KCLASSIC | 97 | depends on M68KCLASSIC |
98 | select KEXEC_CORE | ||
98 | help | 99 | help |
99 | kexec is a system call that implements the ability to shutdown your | 100 | kexec is a system call that implements the ability to shutdown your |
100 | current kernel, and to start another kernel. It is like a reboot | 101 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index ab353723076a..24b12970c9cf 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/dma-debug.h> | 27 | #include <linux/dma-debug.h> |
28 | #include <linux/dma-attrs.h> | 28 | #include <linux/dma-attrs.h> |
29 | #include <asm/io.h> | 29 | #include <asm/io.h> |
30 | #include <asm-generic/dma-coherent.h> | ||
31 | #include <asm/cacheflush.h> | 30 | #include <asm/cacheflush.h> |
32 | 31 | ||
33 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) | 32 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) |
@@ -45,31 +44,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
45 | return &dma_direct_ops; | 44 | return &dma_direct_ops; |
46 | } | 45 | } |
47 | 46 | ||
48 | static inline int dma_supported(struct device *dev, u64 mask) | ||
49 | { | ||
50 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
51 | |||
52 | if (unlikely(!ops)) | ||
53 | return 0; | ||
54 | if (!ops->dma_supported) | ||
55 | return 1; | ||
56 | return ops->dma_supported(dev, mask); | ||
57 | } | ||
58 | |||
59 | static inline int dma_set_mask(struct device *dev, u64 dma_mask) | ||
60 | { | ||
61 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
62 | |||
63 | if (unlikely(ops == NULL)) | ||
64 | return -EIO; | ||
65 | if (ops->set_dma_mask) | ||
66 | return ops->set_dma_mask(dev, dma_mask); | ||
67 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) | ||
68 | return -EIO; | ||
69 | *dev->dma_mask = dma_mask; | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | #include <asm-generic/dma-mapping-common.h> | 47 | #include <asm-generic/dma-mapping-common.h> |
74 | 48 | ||
75 | static inline void __dma_sync(unsigned long paddr, | 49 | static inline void __dma_sync(unsigned long paddr, |
@@ -88,50 +62,6 @@ static inline void __dma_sync(unsigned long paddr, | |||
88 | } | 62 | } |
89 | } | 63 | } |
90 | 64 | ||
91 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
92 | { | ||
93 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
94 | |||
95 | debug_dma_mapping_error(dev, dma_addr); | ||
96 | if (ops->mapping_error) | ||
97 | return ops->mapping_error(dev, dma_addr); | ||
98 | |||
99 | return (dma_addr == DMA_ERROR_CODE); | ||
100 | } | ||
101 | |||
102 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
103 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
104 | |||
105 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
106 | |||
107 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
108 | dma_addr_t *dma_handle, gfp_t flag, | ||
109 | struct dma_attrs *attrs) | ||
110 | { | ||
111 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
112 | void *memory; | ||
113 | |||
114 | BUG_ON(!ops); | ||
115 | |||
116 | memory = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
117 | |||
118 | debug_dma_alloc_coherent(dev, size, *dma_handle, memory); | ||
119 | return memory; | ||
120 | } | ||
121 | |||
122 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d, s, c, h, NULL) | ||
123 | |||
124 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
125 | void *cpu_addr, dma_addr_t dma_handle, | ||
126 | struct dma_attrs *attrs) | ||
127 | { | ||
128 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
129 | |||
130 | BUG_ON(!ops); | ||
131 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
132 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
133 | } | ||
134 | |||
135 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 65 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
136 | enum dma_data_direction direction) | 66 | enum dma_data_direction direction) |
137 | { | 67 | { |
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 752acca8de1f..e3aa5b0b4ef1 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
@@ -2597,6 +2597,7 @@ source "kernel/Kconfig.preempt" | |||
2597 | 2597 | ||
2598 | config KEXEC | 2598 | config KEXEC |
2599 | bool "Kexec system call" | 2599 | bool "Kexec system call" |
2600 | select KEXEC_CORE | ||
2600 | help | 2601 | help |
2601 | kexec is a system call that implements the ability to shutdown your | 2602 | kexec is a system call that implements the ability to shutdown your |
2602 | current kernel, and to start another kernel. It is like a reboot | 2603 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 54831069a206..080cd53bac36 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c | |||
@@ -111,8 +111,8 @@ void decompress_kernel(unsigned long boot_heap_start) | |||
111 | puts("\n"); | 111 | puts("\n"); |
112 | 112 | ||
113 | /* Decompress the kernel with according algorithm */ | 113 | /* Decompress the kernel with according algorithm */ |
114 | decompress((char *)zimage_start, zimage_size, 0, 0, | 114 | __decompress((char *)zimage_start, zimage_size, 0, 0, |
115 | (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, error); | 115 | (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); |
116 | 116 | ||
117 | /* FIXME: should we flush cache here? */ | 117 | /* FIXME: should we flush cache here? */ |
118 | puts("Now, booting the kernel...\n"); | 118 | puts("Now, booting the kernel...\n"); |
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index d8960d46417b..2cd45f5f9481 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c | |||
@@ -161,9 +161,6 @@ static void *octeon_dma_alloc_coherent(struct device *dev, size_t size, | |||
161 | { | 161 | { |
162 | void *ret; | 162 | void *ret; |
163 | 163 | ||
164 | if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) | ||
165 | return ret; | ||
166 | |||
167 | /* ignore region specifiers */ | 164 | /* ignore region specifiers */ |
168 | gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); | 165 | gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); |
169 | 166 | ||
@@ -194,11 +191,6 @@ static void *octeon_dma_alloc_coherent(struct device *dev, size_t size, | |||
194 | static void octeon_dma_free_coherent(struct device *dev, size_t size, | 191 | static void octeon_dma_free_coherent(struct device *dev, size_t size, |
195 | void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) | 192 | void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) |
196 | { | 193 | { |
197 | int order = get_order(size); | ||
198 | |||
199 | if (dma_release_from_coherent(dev, order, vaddr)) | ||
200 | return; | ||
201 | |||
202 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); | 194 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); |
203 | } | 195 | } |
204 | 196 | ||
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 360b3387182a..e604f760c4a0 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h | |||
@@ -4,7 +4,6 @@ | |||
4 | #include <linux/scatterlist.h> | 4 | #include <linux/scatterlist.h> |
5 | #include <asm/dma-coherence.h> | 5 | #include <asm/dma-coherence.h> |
6 | #include <asm/cache.h> | 6 | #include <asm/cache.h> |
7 | #include <asm-generic/dma-coherent.h> | ||
8 | 7 | ||
9 | #ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */ | 8 | #ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */ |
10 | #include <dma-coherence.h> | 9 | #include <dma-coherence.h> |
@@ -32,73 +31,7 @@ static inline void dma_mark_clean(void *addr, size_t size) {} | |||
32 | 31 | ||
33 | #include <asm-generic/dma-mapping-common.h> | 32 | #include <asm-generic/dma-mapping-common.h> |
34 | 33 | ||
35 | static inline int dma_supported(struct device *dev, u64 mask) | ||
36 | { | ||
37 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
38 | return ops->dma_supported(dev, mask); | ||
39 | } | ||
40 | |||
41 | static inline int dma_mapping_error(struct device *dev, u64 mask) | ||
42 | { | ||
43 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
44 | |||
45 | debug_dma_mapping_error(dev, mask); | ||
46 | return ops->mapping_error(dev, mask); | ||
47 | } | ||
48 | |||
49 | static inline int | ||
50 | dma_set_mask(struct device *dev, u64 mask) | ||
51 | { | ||
52 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
53 | |||
54 | if(!dev->dma_mask || !dma_supported(dev, mask)) | ||
55 | return -EIO; | ||
56 | |||
57 | if (ops->set_dma_mask) | ||
58 | return ops->set_dma_mask(dev, mask); | ||
59 | |||
60 | *dev->dma_mask = mask; | ||
61 | |||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 34 | extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
66 | enum dma_data_direction direction); | 35 | enum dma_data_direction direction); |
67 | 36 | ||
68 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
69 | |||
70 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
71 | dma_addr_t *dma_handle, gfp_t gfp, | ||
72 | struct dma_attrs *attrs) | ||
73 | { | ||
74 | void *ret; | ||
75 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
76 | |||
77 | ret = ops->alloc(dev, size, dma_handle, gfp, attrs); | ||
78 | |||
79 | debug_dma_alloc_coherent(dev, size, *dma_handle, ret); | ||
80 | |||
81 | return ret; | ||
82 | } | ||
83 | |||
84 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
85 | |||
86 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
87 | void *vaddr, dma_addr_t dma_handle, | ||
88 | struct dma_attrs *attrs) | ||
89 | { | ||
90 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
91 | |||
92 | ops->free(dev, size, vaddr, dma_handle, attrs); | ||
93 | |||
94 | debug_dma_free_coherent(dev, size, vaddr, dma_handle); | ||
95 | } | ||
96 | |||
97 | |||
98 | void *dma_alloc_noncoherent(struct device *dev, size_t size, | ||
99 | dma_addr_t *dma_handle, gfp_t flag); | ||
100 | |||
101 | void dma_free_noncoherent(struct device *dev, size_t size, | ||
102 | void *vaddr, dma_addr_t dma_handle); | ||
103 | |||
104 | #endif /* _ASM_DMA_MAPPING_H */ | 37 | #endif /* _ASM_DMA_MAPPING_H */ |
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index 2c6b989c1bc4..4ffa6fc81c8f 100644 --- a/arch/mips/loongson64/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c | |||
@@ -14,9 +14,6 @@ static void *loongson_dma_alloc_coherent(struct device *dev, size_t size, | |||
14 | { | 14 | { |
15 | void *ret; | 15 | void *ret; |
16 | 16 | ||
17 | if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) | ||
18 | return ret; | ||
19 | |||
20 | /* ignore region specifiers */ | 17 | /* ignore region specifiers */ |
21 | gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); | 18 | gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); |
22 | 19 | ||
@@ -46,11 +43,6 @@ static void *loongson_dma_alloc_coherent(struct device *dev, size_t size, | |||
46 | static void loongson_dma_free_coherent(struct device *dev, size_t size, | 43 | static void loongson_dma_free_coherent(struct device *dev, size_t size, |
47 | void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) | 44 | void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) |
48 | { | 45 | { |
49 | int order = get_order(size); | ||
50 | |||
51 | if (dma_release_from_coherent(dev, order, vaddr)) | ||
52 | return; | ||
53 | |||
54 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); | 46 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); |
55 | } | 47 | } |
56 | 48 | ||
@@ -93,6 +85,9 @@ static void loongson_dma_sync_sg_for_device(struct device *dev, | |||
93 | 85 | ||
94 | static int loongson_dma_set_mask(struct device *dev, u64 mask) | 86 | static int loongson_dma_set_mask(struct device *dev, u64 mask) |
95 | { | 87 | { |
88 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
89 | return -EIO; | ||
90 | |||
96 | if (mask > DMA_BIT_MASK(loongson_sysconf.dma_mask_bits)) { | 91 | if (mask > DMA_BIT_MASK(loongson_sysconf.dma_mask_bits)) { |
97 | *dev->dma_mask = DMA_BIT_MASK(loongson_sysconf.dma_mask_bits); | 92 | *dev->dma_mask = DMA_BIT_MASK(loongson_sysconf.dma_mask_bits); |
98 | return -EIO; | 93 | return -EIO; |
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 8f23cf08f4ba..a914dc1cb6d1 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c | |||
@@ -112,7 +112,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) | |||
112 | return gfp | dma_flag; | 112 | return gfp | dma_flag; |
113 | } | 113 | } |
114 | 114 | ||
115 | void *dma_alloc_noncoherent(struct device *dev, size_t size, | 115 | static void *mips_dma_alloc_noncoherent(struct device *dev, size_t size, |
116 | dma_addr_t * dma_handle, gfp_t gfp) | 116 | dma_addr_t * dma_handle, gfp_t gfp) |
117 | { | 117 | { |
118 | void *ret; | 118 | void *ret; |
@@ -128,7 +128,6 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size, | |||
128 | 128 | ||
129 | return ret; | 129 | return ret; |
130 | } | 130 | } |
131 | EXPORT_SYMBOL(dma_alloc_noncoherent); | ||
132 | 131 | ||
133 | static void *mips_dma_alloc_coherent(struct device *dev, size_t size, | 132 | static void *mips_dma_alloc_coherent(struct device *dev, size_t size, |
134 | dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs) | 133 | dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs) |
@@ -137,8 +136,12 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size, | |||
137 | struct page *page = NULL; | 136 | struct page *page = NULL; |
138 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | 137 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; |
139 | 138 | ||
140 | if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) | 139 | /* |
141 | return ret; | 140 | * XXX: seems like the coherent and non-coherent implementations could |
141 | * be consolidated. | ||
142 | */ | ||
143 | if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) | ||
144 | return mips_dma_alloc_noncoherent(dev, size, dma_handle, gfp); | ||
142 | 145 | ||
143 | gfp = massage_gfp_flags(dev, gfp); | 146 | gfp = massage_gfp_flags(dev, gfp); |
144 | 147 | ||
@@ -164,24 +167,24 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size, | |||
164 | } | 167 | } |
165 | 168 | ||
166 | 169 | ||
167 | void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, | 170 | static void mips_dma_free_noncoherent(struct device *dev, size_t size, |
168 | dma_addr_t dma_handle) | 171 | void *vaddr, dma_addr_t dma_handle) |
169 | { | 172 | { |
170 | plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); | 173 | plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); |
171 | free_pages((unsigned long) vaddr, get_order(size)); | 174 | free_pages((unsigned long) vaddr, get_order(size)); |
172 | } | 175 | } |
173 | EXPORT_SYMBOL(dma_free_noncoherent); | ||
174 | 176 | ||
175 | static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, | 177 | static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, |
176 | dma_addr_t dma_handle, struct dma_attrs *attrs) | 178 | dma_addr_t dma_handle, struct dma_attrs *attrs) |
177 | { | 179 | { |
178 | unsigned long addr = (unsigned long) vaddr; | 180 | unsigned long addr = (unsigned long) vaddr; |
179 | int order = get_order(size); | ||
180 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | 181 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; |
181 | struct page *page = NULL; | 182 | struct page *page = NULL; |
182 | 183 | ||
183 | if (dma_release_from_coherent(dev, order, vaddr)) | 184 | if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) { |
185 | mips_dma_free_noncoherent(dev, size, vaddr, dma_handle); | ||
184 | return; | 186 | return; |
187 | } | ||
185 | 188 | ||
186 | plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); | 189 | plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); |
187 | 190 | ||
diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c index f3d4ae87abc7..3758715d4ab6 100644 --- a/arch/mips/netlogic/common/nlm-dma.c +++ b/arch/mips/netlogic/common/nlm-dma.c | |||
@@ -47,11 +47,6 @@ static char *nlm_swiotlb; | |||
47 | static void *nlm_dma_alloc_coherent(struct device *dev, size_t size, | 47 | static void *nlm_dma_alloc_coherent(struct device *dev, size_t size, |
48 | dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) | 48 | dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) |
49 | { | 49 | { |
50 | void *ret; | ||
51 | |||
52 | if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) | ||
53 | return ret; | ||
54 | |||
55 | /* ignore region specifiers */ | 50 | /* ignore region specifiers */ |
56 | gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); | 51 | gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); |
57 | 52 | ||
@@ -69,11 +64,6 @@ static void *nlm_dma_alloc_coherent(struct device *dev, size_t size, | |||
69 | static void nlm_dma_free_coherent(struct device *dev, size_t size, | 64 | static void nlm_dma_free_coherent(struct device *dev, size_t size, |
70 | void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) | 65 | void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) |
71 | { | 66 | { |
72 | int order = get_order(size); | ||
73 | |||
74 | if (dma_release_from_coherent(dev, order, vaddr)) | ||
75 | return; | ||
76 | |||
77 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); | 67 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); |
78 | } | 68 | } |
79 | 69 | ||
diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h index fab8628e1b6e..413bfcf86384 100644 --- a/arch/openrisc/include/asm/dma-mapping.h +++ b/arch/openrisc/include/asm/dma-mapping.h | |||
@@ -23,7 +23,6 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/dma-debug.h> | 25 | #include <linux/dma-debug.h> |
26 | #include <asm-generic/dma-coherent.h> | ||
27 | #include <linux/kmemcheck.h> | 26 | #include <linux/kmemcheck.h> |
28 | #include <linux/dma-mapping.h> | 27 | #include <linux/dma-mapping.h> |
29 | 28 | ||
@@ -36,75 +35,13 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
36 | return &or1k_dma_map_ops; | 35 | return &or1k_dma_map_ops; |
37 | } | 36 | } |
38 | 37 | ||
39 | #include <asm-generic/dma-mapping-common.h> | 38 | #define HAVE_ARCH_DMA_SUPPORTED 1 |
40 | |||
41 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
42 | |||
43 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
44 | dma_addr_t *dma_handle, gfp_t gfp, | ||
45 | struct dma_attrs *attrs) | ||
46 | { | ||
47 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
48 | void *memory; | ||
49 | |||
50 | memory = ops->alloc(dev, size, dma_handle, gfp, attrs); | ||
51 | |||
52 | debug_dma_alloc_coherent(dev, size, *dma_handle, memory); | ||
53 | |||
54 | return memory; | ||
55 | } | ||
56 | |||
57 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
58 | |||
59 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
60 | void *cpu_addr, dma_addr_t dma_handle, | ||
61 | struct dma_attrs *attrs) | ||
62 | { | ||
63 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
64 | |||
65 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
66 | |||
67 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
68 | } | ||
69 | |||
70 | static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, | ||
71 | dma_addr_t *dma_handle, gfp_t gfp) | ||
72 | { | ||
73 | struct dma_attrs attrs; | ||
74 | |||
75 | dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); | ||
76 | |||
77 | return dma_alloc_attrs(dev, size, dma_handle, gfp, &attrs); | ||
78 | } | ||
79 | |||
80 | static inline void dma_free_noncoherent(struct device *dev, size_t size, | ||
81 | void *cpu_addr, dma_addr_t dma_handle) | ||
82 | { | ||
83 | struct dma_attrs attrs; | ||
84 | |||
85 | dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); | ||
86 | |||
87 | dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); | ||
88 | } | ||
89 | |||
90 | static inline int dma_supported(struct device *dev, u64 dma_mask) | 39 | static inline int dma_supported(struct device *dev, u64 dma_mask) |
91 | { | 40 | { |
92 | /* Support 32 bit DMA mask exclusively */ | 41 | /* Support 32 bit DMA mask exclusively */ |
93 | return dma_mask == DMA_BIT_MASK(32); | 42 | return dma_mask == DMA_BIT_MASK(32); |
94 | } | 43 | } |
95 | 44 | ||
96 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 45 | #include <asm-generic/dma-mapping-common.h> |
97 | { | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | static inline int dma_set_mask(struct device *dev, u64 dma_mask) | ||
102 | { | ||
103 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) | ||
104 | return -EIO; | ||
105 | |||
106 | *dev->dma_mask = dma_mask; | ||
107 | 46 | ||
108 | return 0; | ||
109 | } | ||
110 | #endif /* __ASM_OPENRISC_DMA_MAPPING_H */ | 47 | #endif /* __ASM_OPENRISC_DMA_MAPPING_H */ |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b447918b9e2c..9a7057ec2154 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -420,6 +420,7 @@ config PPC64_SUPPORTS_MEMORY_FAILURE | |||
420 | config KEXEC | 420 | config KEXEC |
421 | bool "kexec system call" | 421 | bool "kexec system call" |
422 | depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) | 422 | depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) |
423 | select KEXEC_CORE | ||
423 | help | 424 | help |
424 | kexec is a system call that implements the ability to shutdown your | 425 | kexec is a system call that implements the ability to shutdown your |
425 | current kernel, and to start another kernel. It is like a reboot | 426 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 710f60e380e0..7f522c021dc3 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h | |||
@@ -18,7 +18,9 @@ | |||
18 | #include <asm/io.h> | 18 | #include <asm/io.h> |
19 | #include <asm/swiotlb.h> | 19 | #include <asm/swiotlb.h> |
20 | 20 | ||
21 | #ifdef CONFIG_PPC64 | ||
21 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) | 22 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) |
23 | #endif | ||
22 | 24 | ||
23 | /* Some dma direct funcs must be visible for use in other dma_ops */ | 25 | /* Some dma direct funcs must be visible for use in other dma_ops */ |
24 | extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size, | 26 | extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size, |
@@ -120,71 +122,14 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) | |||
120 | /* this will be removed soon */ | 122 | /* this will be removed soon */ |
121 | #define flush_write_buffers() | 123 | #define flush_write_buffers() |
122 | 124 | ||
123 | #include <asm-generic/dma-mapping-common.h> | 125 | #define HAVE_ARCH_DMA_SET_MASK 1 |
124 | 126 | extern int dma_set_mask(struct device *dev, u64 dma_mask); | |
125 | static inline int dma_supported(struct device *dev, u64 mask) | ||
126 | { | ||
127 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
128 | 127 | ||
129 | if (unlikely(dma_ops == NULL)) | 128 | #include <asm-generic/dma-mapping-common.h> |
130 | return 0; | ||
131 | if (dma_ops->dma_supported == NULL) | ||
132 | return 1; | ||
133 | return dma_ops->dma_supported(dev, mask); | ||
134 | } | ||
135 | 129 | ||
136 | extern int dma_set_mask(struct device *dev, u64 dma_mask); | ||
137 | extern int __dma_set_mask(struct device *dev, u64 dma_mask); | 130 | extern int __dma_set_mask(struct device *dev, u64 dma_mask); |
138 | extern u64 __dma_get_required_mask(struct device *dev); | 131 | extern u64 __dma_get_required_mask(struct device *dev); |
139 | 132 | ||
140 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
141 | |||
142 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
143 | dma_addr_t *dma_handle, gfp_t flag, | ||
144 | struct dma_attrs *attrs) | ||
145 | { | ||
146 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
147 | void *cpu_addr; | ||
148 | |||
149 | BUG_ON(!dma_ops); | ||
150 | |||
151 | cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs); | ||
152 | |||
153 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
154 | |||
155 | return cpu_addr; | ||
156 | } | ||
157 | |||
158 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
159 | |||
160 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
161 | void *cpu_addr, dma_addr_t dma_handle, | ||
162 | struct dma_attrs *attrs) | ||
163 | { | ||
164 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
165 | |||
166 | BUG_ON(!dma_ops); | ||
167 | |||
168 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
169 | |||
170 | dma_ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
171 | } | ||
172 | |||
173 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
174 | { | ||
175 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
176 | |||
177 | debug_dma_mapping_error(dev, dma_addr); | ||
178 | if (dma_ops->mapping_error) | ||
179 | return dma_ops->mapping_error(dev, dma_addr); | ||
180 | |||
181 | #ifdef CONFIG_PPC64 | ||
182 | return (dma_addr == DMA_ERROR_CODE); | ||
183 | #else | ||
184 | return 0; | ||
185 | #endif | ||
186 | } | ||
187 | |||
188 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | 133 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) |
189 | { | 134 | { |
190 | #ifdef CONFIG_SWIOTLB | 135 | #ifdef CONFIG_SWIOTLB |
@@ -210,9 +155,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) | |||
210 | return daddr - get_dma_offset(dev); | 155 | return daddr - get_dma_offset(dev); |
211 | } | 156 | } |
212 | 157 | ||
213 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
214 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
215 | |||
216 | #define ARCH_HAS_DMA_MMAP_COHERENT | 158 | #define ARCH_HAS_DMA_MMAP_COHERENT |
217 | 159 | ||
218 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 160 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4827870f7a6d..1d57000b1b24 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -48,6 +48,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC | |||
48 | 48 | ||
49 | config KEXEC | 49 | config KEXEC |
50 | def_bool y | 50 | def_bool y |
51 | select KEXEC_CORE | ||
51 | 52 | ||
52 | config AUDIT_ARCH | 53 | config AUDIT_ARCH |
53 | def_bool y | 54 | def_bool y |
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 42506b371b74..4da604ebf6fd 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c | |||
@@ -167,7 +167,7 @@ unsigned long decompress_kernel(void) | |||
167 | #endif | 167 | #endif |
168 | 168 | ||
169 | puts("Uncompressing Linux... "); | 169 | puts("Uncompressing Linux... "); |
170 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 170 | __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); |
171 | puts("Ok, booting the kernel.\n"); | 171 | puts("Ok, booting the kernel.\n"); |
172 | return (unsigned long) output; | 172 | return (unsigned long) output; |
173 | } | 173 | } |
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 9d395961e713..b3fd54d93dd2 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h | |||
@@ -18,27 +18,13 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
18 | return &s390_dma_ops; | 18 | return &s390_dma_ops; |
19 | } | 19 | } |
20 | 20 | ||
21 | extern int dma_set_mask(struct device *dev, u64 mask); | ||
22 | |||
23 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 21 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
24 | enum dma_data_direction direction) | 22 | enum dma_data_direction direction) |
25 | { | 23 | { |
26 | } | 24 | } |
27 | 25 | ||
28 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
29 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
30 | |||
31 | #include <asm-generic/dma-mapping-common.h> | 26 | #include <asm-generic/dma-mapping-common.h> |
32 | 27 | ||
33 | static inline int dma_supported(struct device *dev, u64 mask) | ||
34 | { | ||
35 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
36 | |||
37 | if (dma_ops->dma_supported == NULL) | ||
38 | return 1; | ||
39 | return dma_ops->dma_supported(dev, mask); | ||
40 | } | ||
41 | |||
42 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | 28 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) |
43 | { | 29 | { |
44 | if (!dev->dma_mask) | 30 | if (!dev->dma_mask) |
@@ -46,45 +32,4 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | |||
46 | return addr + size - 1 <= *dev->dma_mask; | 32 | return addr + size - 1 <= *dev->dma_mask; |
47 | } | 33 | } |
48 | 34 | ||
49 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
50 | { | ||
51 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
52 | |||
53 | debug_dma_mapping_error(dev, dma_addr); | ||
54 | if (dma_ops->mapping_error) | ||
55 | return dma_ops->mapping_error(dev, dma_addr); | ||
56 | return dma_addr == DMA_ERROR_CODE; | ||
57 | } | ||
58 | |||
59 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
60 | |||
61 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
62 | dma_addr_t *dma_handle, gfp_t flags, | ||
63 | struct dma_attrs *attrs) | ||
64 | { | ||
65 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
66 | void *cpu_addr; | ||
67 | |||
68 | BUG_ON(!ops); | ||
69 | |||
70 | cpu_addr = ops->alloc(dev, size, dma_handle, flags, attrs); | ||
71 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
72 | |||
73 | return cpu_addr; | ||
74 | } | ||
75 | |||
76 | #define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) | ||
77 | |||
78 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
79 | void *cpu_addr, dma_addr_t dma_handle, | ||
80 | struct dma_attrs *attrs) | ||
81 | { | ||
82 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
83 | |||
84 | BUG_ON(!ops); | ||
85 | |||
86 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
87 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
88 | } | ||
89 | |||
90 | #endif /* _ASM_S390_DMA_MAPPING_H */ | 35 | #endif /* _ASM_S390_DMA_MAPPING_H */ |
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 42b76580c8b8..37505b8b4093 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c | |||
@@ -262,16 +262,6 @@ out: | |||
262 | spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); | 262 | spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); |
263 | } | 263 | } |
264 | 264 | ||
265 | int dma_set_mask(struct device *dev, u64 mask) | ||
266 | { | ||
267 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
268 | return -EIO; | ||
269 | |||
270 | *dev->dma_mask = mask; | ||
271 | return 0; | ||
272 | } | ||
273 | EXPORT_SYMBOL_GPL(dma_set_mask); | ||
274 | |||
275 | static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, | 265 | static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, |
276 | unsigned long offset, size_t size, | 266 | unsigned long offset, size_t size, |
277 | enum dma_data_direction direction, | 267 | enum dma_data_direction direction, |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 50057fed819d..d514df7e04dd 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -602,6 +602,7 @@ source kernel/Kconfig.hz | |||
602 | config KEXEC | 602 | config KEXEC |
603 | bool "kexec system call (EXPERIMENTAL)" | 603 | bool "kexec system call (EXPERIMENTAL)" |
604 | depends on SUPERH32 && MMU | 604 | depends on SUPERH32 && MMU |
605 | select KEXEC_CORE | ||
605 | help | 606 | help |
606 | kexec is a system call that implements the ability to shutdown your | 607 | kexec is a system call that implements the ability to shutdown your |
607 | current kernel, and to start another kernel. It is like a reboot | 608 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index 95470a472d2c..208a9753ab38 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c | |||
@@ -132,7 +132,7 @@ void decompress_kernel(void) | |||
132 | 132 | ||
133 | puts("Uncompressing Linux... "); | 133 | puts("Uncompressing Linux... "); |
134 | cache_control(CACHE_ENABLE); | 134 | cache_control(CACHE_ENABLE); |
135 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 135 | __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); |
136 | cache_control(CACHE_DISABLE); | 136 | cache_control(CACHE_DISABLE); |
137 | puts("Ok, booting the kernel.\n"); | 137 | puts("Ok, booting the kernel.\n"); |
138 | } | 138 | } |
diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index b437f2c780b8..a3745a3fe029 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h | |||
@@ -9,86 +9,13 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
9 | return dma_ops; | 9 | return dma_ops; |
10 | } | 10 | } |
11 | 11 | ||
12 | #include <asm-generic/dma-coherent.h> | 12 | #define DMA_ERROR_CODE 0 |
13 | #include <asm-generic/dma-mapping-common.h> | ||
14 | |||
15 | static inline int dma_supported(struct device *dev, u64 mask) | ||
16 | { | ||
17 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
18 | |||
19 | if (ops->dma_supported) | ||
20 | return ops->dma_supported(dev, mask); | ||
21 | |||
22 | return 1; | ||
23 | } | ||
24 | |||
25 | static inline int dma_set_mask(struct device *dev, u64 mask) | ||
26 | { | ||
27 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
28 | 13 | ||
29 | if (!dev->dma_mask || !dma_supported(dev, mask)) | 14 | #include <asm-generic/dma-mapping-common.h> |
30 | return -EIO; | ||
31 | if (ops->set_dma_mask) | ||
32 | return ops->set_dma_mask(dev, mask); | ||
33 | |||
34 | *dev->dma_mask = mask; | ||
35 | |||
36 | return 0; | ||
37 | } | ||
38 | 15 | ||
39 | void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 16 | void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
40 | enum dma_data_direction dir); | 17 | enum dma_data_direction dir); |
41 | 18 | ||
42 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
43 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
44 | |||
45 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
46 | { | ||
47 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
48 | |||
49 | debug_dma_mapping_error(dev, dma_addr); | ||
50 | if (ops->mapping_error) | ||
51 | return ops->mapping_error(dev, dma_addr); | ||
52 | |||
53 | return dma_addr == 0; | ||
54 | } | ||
55 | |||
56 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
57 | |||
58 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
59 | dma_addr_t *dma_handle, gfp_t gfp, | ||
60 | struct dma_attrs *attrs) | ||
61 | { | ||
62 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
63 | void *memory; | ||
64 | |||
65 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | ||
66 | return memory; | ||
67 | if (!ops->alloc) | ||
68 | return NULL; | ||
69 | |||
70 | memory = ops->alloc(dev, size, dma_handle, gfp, attrs); | ||
71 | debug_dma_alloc_coherent(dev, size, *dma_handle, memory); | ||
72 | |||
73 | return memory; | ||
74 | } | ||
75 | |||
76 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
77 | |||
78 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
79 | void *vaddr, dma_addr_t dma_handle, | ||
80 | struct dma_attrs *attrs) | ||
81 | { | ||
82 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
83 | |||
84 | if (dma_release_from_coherent(dev, get_order(size), vaddr)) | ||
85 | return; | ||
86 | |||
87 | debug_dma_free_coherent(dev, size, vaddr, dma_handle); | ||
88 | if (ops->free) | ||
89 | ops->free(dev, size, vaddr, dma_handle, attrs); | ||
90 | } | ||
91 | |||
92 | /* arch/sh/mm/consistent.c */ | 19 | /* arch/sh/mm/consistent.c */ |
93 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 20 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
94 | dma_addr_t *dma_addr, gfp_t flag, | 21 | dma_addr_t *dma_addr, gfp_t flag, |
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 7e064c68c5ec..a21da597b0b5 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h | |||
@@ -7,11 +7,9 @@ | |||
7 | 7 | ||
8 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) | 8 | #define DMA_ERROR_CODE (~(dma_addr_t)0x0) |
9 | 9 | ||
10 | #define HAVE_ARCH_DMA_SUPPORTED 1 | ||
10 | int dma_supported(struct device *dev, u64 mask); | 11 | int dma_supported(struct device *dev, u64 mask); |
11 | 12 | ||
12 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
13 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
14 | |||
15 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 13 | static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
16 | enum dma_data_direction dir) | 14 | enum dma_data_direction dir) |
17 | { | 15 | { |
@@ -39,39 +37,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
39 | return dma_ops; | 37 | return dma_ops; |
40 | } | 38 | } |
41 | 39 | ||
42 | #include <asm-generic/dma-mapping-common.h> | 40 | #define HAVE_ARCH_DMA_SET_MASK 1 |
43 | |||
44 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
45 | |||
46 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
47 | dma_addr_t *dma_handle, gfp_t flag, | ||
48 | struct dma_attrs *attrs) | ||
49 | { | ||
50 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
51 | void *cpu_addr; | ||
52 | |||
53 | cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
54 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
55 | return cpu_addr; | ||
56 | } | ||
57 | |||
58 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
59 | |||
60 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
61 | void *cpu_addr, dma_addr_t dma_handle, | ||
62 | struct dma_attrs *attrs) | ||
63 | { | ||
64 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
65 | |||
66 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
67 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
68 | } | ||
69 | |||
70 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
71 | { | ||
72 | debug_dma_mapping_error(dev, dma_addr); | ||
73 | return (dma_addr == DMA_ERROR_CODE); | ||
74 | } | ||
75 | 41 | ||
76 | static inline int dma_set_mask(struct device *dev, u64 mask) | 42 | static inline int dma_set_mask(struct device *dev, u64 mask) |
77 | { | 43 | { |
@@ -86,4 +52,6 @@ static inline int dma_set_mask(struct device *dev, u64 mask) | |||
86 | return -EINVAL; | 52 | return -EINVAL; |
87 | } | 53 | } |
88 | 54 | ||
55 | #include <asm-generic/dma-mapping-common.h> | ||
56 | |||
89 | #endif | 57 | #endif |
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 2ba12d761723..106c21bd7f44 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -205,6 +205,7 @@ source "kernel/Kconfig.hz" | |||
205 | 205 | ||
206 | config KEXEC | 206 | config KEXEC |
207 | bool "kexec system call" | 207 | bool "kexec system call" |
208 | select KEXEC_CORE | ||
208 | ---help--- | 209 | ---help--- |
209 | kexec is a system call that implements the ability to shutdown your | 210 | kexec is a system call that implements the ability to shutdown your |
210 | current kernel, and to start another kernel. It is like a reboot | 211 | current kernel, and to start another kernel. It is like a reboot |
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 1eae359d8315..96ac6cce4a32 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h | |||
@@ -59,8 +59,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) | |||
59 | 59 | ||
60 | static inline void dma_mark_clean(void *addr, size_t size) {} | 60 | static inline void dma_mark_clean(void *addr, size_t size) {} |
61 | 61 | ||
62 | #include <asm-generic/dma-mapping-common.h> | ||
63 | |||
64 | static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) | 62 | static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) |
65 | { | 63 | { |
66 | dev->archdata.dma_ops = ops; | 64 | dev->archdata.dma_ops = ops; |
@@ -74,18 +72,9 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | |||
74 | return addr + size - 1 <= *dev->dma_mask; | 72 | return addr + size - 1 <= *dev->dma_mask; |
75 | } | 73 | } |
76 | 74 | ||
77 | static inline int | 75 | #define HAVE_ARCH_DMA_SET_MASK 1 |
78 | dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
79 | { | ||
80 | debug_dma_mapping_error(dev, dma_addr); | ||
81 | return get_dma_ops(dev)->mapping_error(dev, dma_addr); | ||
82 | } | ||
83 | 76 | ||
84 | static inline int | 77 | #include <asm-generic/dma-mapping-common.h> |
85 | dma_supported(struct device *dev, u64 mask) | ||
86 | { | ||
87 | return get_dma_ops(dev)->dma_supported(dev, mask); | ||
88 | } | ||
89 | 78 | ||
90 | static inline int | 79 | static inline int |
91 | dma_set_mask(struct device *dev, u64 mask) | 80 | dma_set_mask(struct device *dev, u64 mask) |
@@ -116,36 +105,6 @@ dma_set_mask(struct device *dev, u64 mask) | |||
116 | return 0; | 105 | return 0; |
117 | } | 106 | } |
118 | 107 | ||
119 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
120 | dma_addr_t *dma_handle, gfp_t flag, | ||
121 | struct dma_attrs *attrs) | ||
122 | { | ||
123 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
124 | void *cpu_addr; | ||
125 | |||
126 | cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs); | ||
127 | |||
128 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
129 | |||
130 | return cpu_addr; | ||
131 | } | ||
132 | |||
133 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
134 | void *cpu_addr, dma_addr_t dma_handle, | ||
135 | struct dma_attrs *attrs) | ||
136 | { | ||
137 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
138 | |||
139 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
140 | |||
141 | dma_ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
142 | } | ||
143 | |||
144 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
145 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
146 | #define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) | ||
147 | #define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) | ||
148 | |||
149 | /* | 108 | /* |
150 | * dma_alloc_noncoherent() is #defined to return coherent memory, | 109 | * dma_alloc_noncoherent() is #defined to return coherent memory, |
151 | * so there's no need to do any flushing here. | 110 | * so there's no need to do any flushing here. |
diff --git a/arch/unicore32/boot/compressed/misc.c b/arch/unicore32/boot/compressed/misc.c index 176d5bda3559..5c65dfee278c 100644 --- a/arch/unicore32/boot/compressed/misc.c +++ b/arch/unicore32/boot/compressed/misc.c | |||
@@ -119,8 +119,8 @@ unsigned long decompress_kernel(unsigned long output_start, | |||
119 | output_ptr = get_unaligned_le32(tmp); | 119 | output_ptr = get_unaligned_le32(tmp); |
120 | 120 | ||
121 | arch_decomp_puts("Uncompressing Linux..."); | 121 | arch_decomp_puts("Uncompressing Linux..."); |
122 | decompress(input_data, input_data_end - input_data, NULL, NULL, | 122 | __decompress(input_data, input_data_end - input_data, NULL, NULL, |
123 | output_data, NULL, error); | 123 | output_data, 0, NULL, error); |
124 | arch_decomp_puts(" done, booting the kernel.\n"); | 124 | arch_decomp_puts(" done, booting the kernel.\n"); |
125 | return output_ptr; | 125 | return output_ptr; |
126 | } | 126 | } |
diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h index 366460a81796..8140e053ccd3 100644 --- a/arch/unicore32/include/asm/dma-mapping.h +++ b/arch/unicore32/include/asm/dma-mapping.h | |||
@@ -18,8 +18,6 @@ | |||
18 | #include <linux/scatterlist.h> | 18 | #include <linux/scatterlist.h> |
19 | #include <linux/swiotlb.h> | 19 | #include <linux/swiotlb.h> |
20 | 20 | ||
21 | #include <asm-generic/dma-coherent.h> | ||
22 | |||
23 | #include <asm/memory.h> | 21 | #include <asm/memory.h> |
24 | #include <asm/cacheflush.h> | 22 | #include <asm/cacheflush.h> |
25 | 23 | ||
@@ -30,26 +28,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
30 | return &swiotlb_dma_map_ops; | 28 | return &swiotlb_dma_map_ops; |
31 | } | 29 | } |
32 | 30 | ||
33 | static inline int dma_supported(struct device *dev, u64 mask) | ||
34 | { | ||
35 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
36 | |||
37 | if (unlikely(dma_ops == NULL)) | ||
38 | return 0; | ||
39 | |||
40 | return dma_ops->dma_supported(dev, mask); | ||
41 | } | ||
42 | |||
43 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
44 | { | ||
45 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
46 | |||
47 | if (dma_ops->mapping_error) | ||
48 | return dma_ops->mapping_error(dev, dma_addr); | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | #include <asm-generic/dma-mapping-common.h> | 31 | #include <asm-generic/dma-mapping-common.h> |
54 | 32 | ||
55 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) | 33 | static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) |
@@ -72,41 +50,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) | |||
72 | 50 | ||
73 | static inline void dma_mark_clean(void *addr, size_t size) {} | 51 | static inline void dma_mark_clean(void *addr, size_t size) {} |
74 | 52 | ||
75 | static inline int dma_set_mask(struct device *dev, u64 dma_mask) | ||
76 | { | ||
77 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) | ||
78 | return -EIO; | ||
79 | |||
80 | *dev->dma_mask = dma_mask; | ||
81 | |||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
86 | |||
87 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
88 | dma_addr_t *dma_handle, gfp_t flag, | ||
89 | struct dma_attrs *attrs) | ||
90 | { | ||
91 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
92 | |||
93 | return dma_ops->alloc(dev, size, dma_handle, flag, attrs); | ||
94 | } | ||
95 | |||
96 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
97 | |||
98 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
99 | void *cpu_addr, dma_addr_t dma_handle, | ||
100 | struct dma_attrs *attrs) | ||
101 | { | ||
102 | struct dma_map_ops *dma_ops = get_dma_ops(dev); | ||
103 | |||
104 | dma_ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
105 | } | ||
106 | |||
107 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
108 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
109 | |||
110 | static inline void dma_cache_sync(struct device *dev, void *vaddr, | 53 | static inline void dma_cache_sync(struct device *dev, void *vaddr, |
111 | size_t size, enum dma_data_direction direction) | 54 | size_t size, enum dma_data_direction direction) |
112 | { | 55 | { |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cc0d73eac047..7aef2d52daa0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1754,6 +1754,7 @@ source kernel/Kconfig.hz | |||
1754 | 1754 | ||
1755 | config KEXEC | 1755 | config KEXEC |
1756 | bool "kexec system call" | 1756 | bool "kexec system call" |
1757 | select KEXEC_CORE | ||
1757 | ---help--- | 1758 | ---help--- |
1758 | kexec is a system call that implements the ability to shutdown your | 1759 | kexec is a system call that implements the ability to shutdown your |
1759 | current kernel, and to start another kernel. It is like a reboot | 1760 | current kernel, and to start another kernel. It is like a reboot |
@@ -1770,8 +1771,8 @@ config KEXEC | |||
1770 | 1771 | ||
1771 | config KEXEC_FILE | 1772 | config KEXEC_FILE |
1772 | bool "kexec file based system call" | 1773 | bool "kexec file based system call" |
1774 | select KEXEC_CORE | ||
1773 | select BUILD_BIN2C | 1775 | select BUILD_BIN2C |
1774 | depends on KEXEC | ||
1775 | depends on X86_64 | 1776 | depends on X86_64 |
1776 | depends on CRYPTO=y | 1777 | depends on CRYPTO=y |
1777 | depends on CRYPTO_SHA256=y | 1778 | depends on CRYPTO_SHA256=y |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index f63797942bb5..79dac1758e7c 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -448,7 +448,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
448 | #endif | 448 | #endif |
449 | 449 | ||
450 | debug_putstr("\nDecompressing Linux... "); | 450 | debug_putstr("\nDecompressing Linux... "); |
451 | decompress(input_data, input_len, NULL, NULL, output, NULL, error); | 451 | __decompress(input_data, input_len, NULL, NULL, output, output_len, |
452 | NULL, error); | ||
452 | parse_elf(output); | 453 | parse_elf(output); |
453 | /* | 454 | /* |
454 | * 32-bit always performs relocations. 64-bit relocations are only | 455 | * 32-bit always performs relocations. 64-bit relocations are only |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 16ef02596db2..2d6b309c8e9a 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -414,7 +414,7 @@ xloadflags: | |||
414 | # define XLF23 0 | 414 | # define XLF23 0 |
415 | #endif | 415 | #endif |
416 | 416 | ||
417 | #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) | 417 | #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE) |
418 | # define XLF4 XLF_EFI_KEXEC | 418 | # define XLF4 XLF_EFI_KEXEC |
419 | #else | 419 | #else |
420 | # define XLF4 0 | 420 | # define XLF4 0 |
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 26a46f44e298..b160c0c6baed 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c | |||
@@ -277,7 +277,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma) | |||
277 | { | 277 | { |
278 | return "[vsyscall]"; | 278 | return "[vsyscall]"; |
279 | } | 279 | } |
280 | static struct vm_operations_struct gate_vma_ops = { | 280 | static const struct vm_operations_struct gate_vma_ops = { |
281 | .name = gate_vma_name, | 281 | .name = gate_vma_name, |
282 | }; | 282 | }; |
283 | static struct vm_area_struct gate_vma = { | 283 | static struct vm_area_struct gate_vma = { |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 1f5b7287d1ad..953b7263f844 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/dma-attrs.h> | 12 | #include <linux/dma-attrs.h> |
13 | #include <asm/io.h> | 13 | #include <asm/io.h> |
14 | #include <asm/swiotlb.h> | 14 | #include <asm/swiotlb.h> |
15 | #include <asm-generic/dma-coherent.h> | ||
16 | #include <linux/dma-contiguous.h> | 15 | #include <linux/dma-contiguous.h> |
17 | 16 | ||
18 | #ifdef CONFIG_ISA | 17 | #ifdef CONFIG_ISA |
@@ -41,24 +40,13 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
41 | #endif | 40 | #endif |
42 | } | 41 | } |
43 | 42 | ||
44 | #include <asm-generic/dma-mapping-common.h> | 43 | bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); |
45 | 44 | #define arch_dma_alloc_attrs arch_dma_alloc_attrs | |
46 | /* Make sure we keep the same behaviour */ | ||
47 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
48 | { | ||
49 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
50 | debug_dma_mapping_error(dev, dma_addr); | ||
51 | if (ops->mapping_error) | ||
52 | return ops->mapping_error(dev, dma_addr); | ||
53 | |||
54 | return (dma_addr == DMA_ERROR_CODE); | ||
55 | } | ||
56 | |||
57 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
58 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
59 | 45 | ||
46 | #define HAVE_ARCH_DMA_SUPPORTED 1 | ||
60 | extern int dma_supported(struct device *hwdev, u64 mask); | 47 | extern int dma_supported(struct device *hwdev, u64 mask); |
61 | extern int dma_set_mask(struct device *dev, u64 mask); | 48 | |
49 | #include <asm-generic/dma-mapping-common.h> | ||
62 | 50 | ||
63 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 51 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
64 | dma_addr_t *dma_addr, gfp_t flag, | 52 | dma_addr_t *dma_addr, gfp_t flag, |
@@ -125,16 +113,4 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) | |||
125 | return gfp; | 113 | return gfp; |
126 | } | 114 | } |
127 | 115 | ||
128 | #define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) | ||
129 | |||
130 | void * | ||
131 | dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
132 | gfp_t gfp, struct dma_attrs *attrs); | ||
133 | |||
134 | #define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) | ||
135 | |||
136 | void dma_free_attrs(struct device *dev, size_t size, | ||
137 | void *vaddr, dma_addr_t bus, | ||
138 | struct dma_attrs *attrs); | ||
139 | |||
140 | #endif | 116 | #endif |
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 32ce71375b21..b130d59406fb 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -29,7 +29,7 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs, | |||
29 | extern void __show_regs(struct pt_regs *regs, int all); | 29 | extern void __show_regs(struct pt_regs *regs, int all); |
30 | extern unsigned long oops_begin(void); | 30 | extern unsigned long oops_begin(void); |
31 | extern void oops_end(unsigned long, struct pt_regs *, int signr); | 31 | extern void oops_end(unsigned long, struct pt_regs *, int signr); |
32 | #ifdef CONFIG_KEXEC | 32 | #ifdef CONFIG_KEXEC_CORE |
33 | extern int in_crash_kexec; | 33 | extern int in_crash_kexec; |
34 | #else | 34 | #else |
35 | /* no crash dump is ever in progress if no crash kernel can be kexec'd */ | 35 | /* no crash dump is ever in progress if no crash kernel can be kexec'd */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9ffdf25e5b86..b1b78ffe01d0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -71,8 +71,8 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o | |||
71 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 71 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
72 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o | 72 | obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o |
73 | obj-$(CONFIG_X86_TSC) += trace_clock.o | 73 | obj-$(CONFIG_X86_TSC) += trace_clock.o |
74 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 74 | obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o |
75 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 75 | obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o |
76 | obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o | 76 | obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o |
77 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 77 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
78 | obj-y += kprobes/ | 78 | obj-y += kprobes/ |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 49487b488061..2c7aafa70702 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -200,7 +200,7 @@ static void kvm_setup_secondary_clock(void) | |||
200 | * kind of shutdown from our side, we unregister the clock by writting anything | 200 | * kind of shutdown from our side, we unregister the clock by writting anything |
201 | * that does not have the 'enable' bit set in the msr | 201 | * that does not have the 'enable' bit set in the msr |
202 | */ | 202 | */ |
203 | #ifdef CONFIG_KEXEC | 203 | #ifdef CONFIG_KEXEC_CORE |
204 | static void kvm_crash_shutdown(struct pt_regs *regs) | 204 | static void kvm_crash_shutdown(struct pt_regs *regs) |
205 | { | 205 | { |
206 | native_write_msr(msr_kvm_system_time, 0, 0); | 206 | native_write_msr(msr_kvm_system_time, 0, 0); |
@@ -259,7 +259,7 @@ void __init kvmclock_init(void) | |||
259 | x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; | 259 | x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; |
260 | x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; | 260 | x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; |
261 | machine_ops.shutdown = kvm_shutdown; | 261 | machine_ops.shutdown = kvm_shutdown; |
262 | #ifdef CONFIG_KEXEC | 262 | #ifdef CONFIG_KEXEC_CORE |
263 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 263 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
264 | #endif | 264 | #endif |
265 | kvm_get_preset_lpj(); | 265 | kvm_get_preset_lpj(); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 353972c1946c..84b8ef82a159 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -58,17 +58,6 @@ EXPORT_SYMBOL(x86_dma_fallback_dev); | |||
58 | /* Number of entries preallocated for DMA-API debugging */ | 58 | /* Number of entries preallocated for DMA-API debugging */ |
59 | #define PREALLOC_DMA_DEBUG_ENTRIES 65536 | 59 | #define PREALLOC_DMA_DEBUG_ENTRIES 65536 |
60 | 60 | ||
61 | int dma_set_mask(struct device *dev, u64 mask) | ||
62 | { | ||
63 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
64 | return -EIO; | ||
65 | |||
66 | *dev->dma_mask = mask; | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | EXPORT_SYMBOL(dma_set_mask); | ||
71 | |||
72 | void __init pci_iommu_alloc(void) | 61 | void __init pci_iommu_alloc(void) |
73 | { | 62 | { |
74 | struct iommu_table_entry *p; | 63 | struct iommu_table_entry *p; |
@@ -140,50 +129,19 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
140 | free_pages((unsigned long)vaddr, get_order(size)); | 129 | free_pages((unsigned long)vaddr, get_order(size)); |
141 | } | 130 | } |
142 | 131 | ||
143 | void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, | 132 | bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) |
144 | gfp_t gfp, struct dma_attrs *attrs) | ||
145 | { | 133 | { |
146 | struct dma_map_ops *ops = get_dma_ops(dev); | 134 | *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); |
147 | void *memory; | 135 | *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
148 | |||
149 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | ||
150 | |||
151 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) | ||
152 | return memory; | ||
153 | |||
154 | if (!dev) | ||
155 | dev = &x86_dma_fallback_dev; | ||
156 | |||
157 | if (!is_device_dma_capable(dev)) | ||
158 | return NULL; | ||
159 | |||
160 | if (!ops->alloc) | ||
161 | return NULL; | ||
162 | |||
163 | memory = ops->alloc(dev, size, dma_handle, | ||
164 | dma_alloc_coherent_gfp_flags(dev, gfp), attrs); | ||
165 | debug_dma_alloc_coherent(dev, size, *dma_handle, memory); | ||
166 | |||
167 | return memory; | ||
168 | } | ||
169 | EXPORT_SYMBOL(dma_alloc_attrs); | ||
170 | |||
171 | void dma_free_attrs(struct device *dev, size_t size, | ||
172 | void *vaddr, dma_addr_t bus, | ||
173 | struct dma_attrs *attrs) | ||
174 | { | ||
175 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
176 | |||
177 | WARN_ON(irqs_disabled()); /* for portability */ | ||
178 | 136 | ||
179 | if (dma_release_from_coherent(dev, get_order(size), vaddr)) | 137 | if (!*dev) |
180 | return; | 138 | *dev = &x86_dma_fallback_dev; |
139 | if (!is_device_dma_capable(*dev)) | ||
140 | return false; | ||
141 | return true; | ||
181 | 142 | ||
182 | debug_dma_free_coherent(dev, size, vaddr, bus); | ||
183 | if (ops->free) | ||
184 | ops->free(dev, size, vaddr, bus, attrs); | ||
185 | } | 143 | } |
186 | EXPORT_SYMBOL(dma_free_attrs); | 144 | EXPORT_SYMBOL(arch_dma_alloc_attrs); |
187 | 145 | ||
188 | /* | 146 | /* |
189 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel | 147 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 86db4bcd7ce5..02693dd9a079 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -673,7 +673,7 @@ struct machine_ops machine_ops = { | |||
673 | .emergency_restart = native_machine_emergency_restart, | 673 | .emergency_restart = native_machine_emergency_restart, |
674 | .restart = native_machine_restart, | 674 | .restart = native_machine_restart, |
675 | .halt = native_machine_halt, | 675 | .halt = native_machine_halt, |
676 | #ifdef CONFIG_KEXEC | 676 | #ifdef CONFIG_KEXEC_CORE |
677 | .crash_shutdown = native_machine_crash_shutdown, | 677 | .crash_shutdown = native_machine_crash_shutdown, |
678 | #endif | 678 | #endif |
679 | }; | 679 | }; |
@@ -703,7 +703,7 @@ void machine_halt(void) | |||
703 | machine_ops.halt(); | 703 | machine_ops.halt(); |
704 | } | 704 | } |
705 | 705 | ||
706 | #ifdef CONFIG_KEXEC | 706 | #ifdef CONFIG_KEXEC_CORE |
707 | void machine_crash_shutdown(struct pt_regs *regs) | 707 | void machine_crash_shutdown(struct pt_regs *regs) |
708 | { | 708 | { |
709 | machine_ops.crash_shutdown(regs); | 709 | machine_ops.crash_shutdown(regs); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index baadbf90a7c5..fdb7f2a2d328 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -478,7 +478,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
478 | * --------- Crashkernel reservation ------------------------------ | 478 | * --------- Crashkernel reservation ------------------------------ |
479 | */ | 479 | */ |
480 | 480 | ||
481 | #ifdef CONFIG_KEXEC | 481 | #ifdef CONFIG_KEXEC_CORE |
482 | 482 | ||
483 | /* | 483 | /* |
484 | * Keep the crash kernel below this limit. On 32 bits earlier kernels | 484 | * Keep the crash kernel below this limit. On 32 bits earlier kernels |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 00bf300fd846..74e4bf11f562 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -364,7 +364,7 @@ INIT_PER_CPU(irq_stack_union); | |||
364 | 364 | ||
365 | #endif /* CONFIG_X86_32 */ | 365 | #endif /* CONFIG_X86_32 */ |
366 | 366 | ||
367 | #ifdef CONFIG_KEXEC | 367 | #ifdef CONFIG_KEXEC_CORE |
368 | #include <asm/kexec.h> | 368 | #include <asm/kexec.h> |
369 | 369 | ||
370 | . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | 370 | . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 148ea2016022..d01986832afc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1264,7 +1264,7 @@ static void vmcs_load(struct vmcs *vmcs) | |||
1264 | vmcs, phys_addr); | 1264 | vmcs, phys_addr); |
1265 | } | 1265 | } |
1266 | 1266 | ||
1267 | #ifdef CONFIG_KEXEC | 1267 | #ifdef CONFIG_KEXEC_CORE |
1268 | /* | 1268 | /* |
1269 | * This bitmap is used to indicate whether the vmclear | 1269 | * This bitmap is used to indicate whether the vmclear |
1270 | * operation is enabled on all cpus. All disabled by | 1270 | * operation is enabled on all cpus. All disabled by |
@@ -1302,7 +1302,7 @@ static void crash_vmclear_local_loaded_vmcss(void) | |||
1302 | #else | 1302 | #else |
1303 | static inline void crash_enable_local_vmclear(int cpu) { } | 1303 | static inline void crash_enable_local_vmclear(int cpu) { } |
1304 | static inline void crash_disable_local_vmclear(int cpu) { } | 1304 | static inline void crash_disable_local_vmclear(int cpu) { } |
1305 | #endif /* CONFIG_KEXEC */ | 1305 | #endif /* CONFIG_KEXEC_CORE */ |
1306 | 1306 | ||
1307 | static void __loaded_vmcs_clear(void *arg) | 1307 | static void __loaded_vmcs_clear(void *arg) |
1308 | { | 1308 | { |
@@ -10411,7 +10411,7 @@ static int __init vmx_init(void) | |||
10411 | if (r) | 10411 | if (r) |
10412 | return r; | 10412 | return r; |
10413 | 10413 | ||
10414 | #ifdef CONFIG_KEXEC | 10414 | #ifdef CONFIG_KEXEC_CORE |
10415 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, | 10415 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, |
10416 | crash_vmclear_local_loaded_vmcss); | 10416 | crash_vmclear_local_loaded_vmcss); |
10417 | #endif | 10417 | #endif |
@@ -10421,7 +10421,7 @@ static int __init vmx_init(void) | |||
10421 | 10421 | ||
10422 | static void __exit vmx_exit(void) | 10422 | static void __exit vmx_exit(void) |
10423 | { | 10423 | { |
10424 | #ifdef CONFIG_KEXEC | 10424 | #ifdef CONFIG_KEXEC_CORE |
10425 | RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); | 10425 | RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); |
10426 | synchronize_rcu(); | 10426 | synchronize_rcu(); |
10427 | #endif | 10427 | #endif |
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index db1b0bc5017c..134948b0926f 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c | |||
@@ -42,58 +42,21 @@ static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) | |||
42 | */ | 42 | */ |
43 | static unsigned long mpx_mmap(unsigned long len) | 43 | static unsigned long mpx_mmap(unsigned long len) |
44 | { | 44 | { |
45 | unsigned long ret; | ||
46 | unsigned long addr, pgoff; | ||
47 | struct mm_struct *mm = current->mm; | 45 | struct mm_struct *mm = current->mm; |
48 | vm_flags_t vm_flags; | 46 | unsigned long addr, populate; |
49 | struct vm_area_struct *vma; | ||
50 | 47 | ||
51 | /* Only bounds table can be allocated here */ | 48 | /* Only bounds table can be allocated here */ |
52 | if (len != mpx_bt_size_bytes(mm)) | 49 | if (len != mpx_bt_size_bytes(mm)) |
53 | return -EINVAL; | 50 | return -EINVAL; |
54 | 51 | ||
55 | down_write(&mm->mmap_sem); | 52 | down_write(&mm->mmap_sem); |
56 | 53 | addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE, | |
57 | /* Too many mappings? */ | 54 | MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate); |
58 | if (mm->map_count > sysctl_max_map_count) { | ||
59 | ret = -ENOMEM; | ||
60 | goto out; | ||
61 | } | ||
62 | |||
63 | /* Obtain the address to map to. we verify (or select) it and ensure | ||
64 | * that it represents a valid section of the address space. | ||
65 | */ | ||
66 | addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE); | ||
67 | if (addr & ~PAGE_MASK) { | ||
68 | ret = addr; | ||
69 | goto out; | ||
70 | } | ||
71 | |||
72 | vm_flags = VM_READ | VM_WRITE | VM_MPX | | ||
73 | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; | ||
74 | |||
75 | /* Set pgoff according to addr for anon_vma */ | ||
76 | pgoff = addr >> PAGE_SHIFT; | ||
77 | |||
78 | ret = mmap_region(NULL, addr, len, vm_flags, pgoff); | ||
79 | if (IS_ERR_VALUE(ret)) | ||
80 | goto out; | ||
81 | |||
82 | vma = find_vma(mm, ret); | ||
83 | if (!vma) { | ||
84 | ret = -ENOMEM; | ||
85 | goto out; | ||
86 | } | ||
87 | |||
88 | if (vm_flags & VM_LOCKED) { | ||
89 | up_write(&mm->mmap_sem); | ||
90 | mm_populate(ret, len); | ||
91 | return ret; | ||
92 | } | ||
93 | |||
94 | out: | ||
95 | up_write(&mm->mmap_sem); | 55 | up_write(&mm->mmap_sem); |
96 | return ret; | 56 | if (populate) |
57 | mm_populate(addr, populate); | ||
58 | |||
59 | return addr; | ||
97 | } | 60 | } |
98 | 61 | ||
99 | enum reg_type { | 62 | enum reg_type { |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e4308fe6afe8..1db84c0758b7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -650,7 +650,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) | |||
650 | 650 | ||
651 | static void __init save_runtime_map(void) | 651 | static void __init save_runtime_map(void) |
652 | { | 652 | { |
653 | #ifdef CONFIG_KEXEC | 653 | #ifdef CONFIG_KEXEC_CORE |
654 | efi_memory_desc_t *md; | 654 | efi_memory_desc_t *md; |
655 | void *tmp, *p, *q = NULL; | 655 | void *tmp, *p, *q = NULL; |
656 | int count = 0; | 656 | int count = 0; |
@@ -748,7 +748,7 @@ static void * __init efi_map_regions(int *count, int *pg_shift) | |||
748 | 748 | ||
749 | static void __init kexec_enter_virtual_mode(void) | 749 | static void __init kexec_enter_virtual_mode(void) |
750 | { | 750 | { |
751 | #ifdef CONFIG_KEXEC | 751 | #ifdef CONFIG_KEXEC_CORE |
752 | efi_memory_desc_t *md; | 752 | efi_memory_desc_t *md; |
753 | void *p; | 753 | void *p; |
754 | 754 | ||
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 020c101c255f..5c9f63fa6abf 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c | |||
@@ -492,7 +492,7 @@ static void uv_nmi_touch_watchdogs(void) | |||
492 | touch_nmi_watchdog(); | 492 | touch_nmi_watchdog(); |
493 | } | 493 | } |
494 | 494 | ||
495 | #if defined(CONFIG_KEXEC) | 495 | #if defined(CONFIG_KEXEC_CORE) |
496 | static atomic_t uv_nmi_kexec_failed; | 496 | static atomic_t uv_nmi_kexec_failed; |
497 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | 497 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) |
498 | { | 498 | { |
@@ -519,13 +519,13 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | |||
519 | uv_nmi_sync_exit(0); | 519 | uv_nmi_sync_exit(0); |
520 | } | 520 | } |
521 | 521 | ||
522 | #else /* !CONFIG_KEXEC */ | 522 | #else /* !CONFIG_KEXEC_CORE */ |
523 | static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | 523 | static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) |
524 | { | 524 | { |
525 | if (master) | 525 | if (master) |
526 | pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); | 526 | pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); |
527 | } | 527 | } |
528 | #endif /* !CONFIG_KEXEC */ | 528 | #endif /* !CONFIG_KEXEC_CORE */ |
529 | 529 | ||
530 | #ifdef CONFIG_KGDB | 530 | #ifdef CONFIG_KGDB |
531 | #ifdef CONFIG_KGDB_KDB | 531 | #ifdef CONFIG_KGDB_KDB |
diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index f01cb3044e50..4427f38b634e 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h | |||
@@ -32,66 +32,6 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
32 | 32 | ||
33 | #include <asm-generic/dma-mapping-common.h> | 33 | #include <asm-generic/dma-mapping-common.h> |
34 | 34 | ||
35 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
36 | #define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL) | ||
37 | #define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) | ||
38 | #define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) | ||
39 | |||
40 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
41 | dma_addr_t *dma_handle, gfp_t gfp, | ||
42 | struct dma_attrs *attrs) | ||
43 | { | ||
44 | void *ret; | ||
45 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
46 | |||
47 | if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) | ||
48 | return ret; | ||
49 | |||
50 | ret = ops->alloc(dev, size, dma_handle, gfp, attrs); | ||
51 | debug_dma_alloc_coherent(dev, size, *dma_handle, ret); | ||
52 | |||
53 | return ret; | ||
54 | } | ||
55 | |||
56 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
57 | void *vaddr, dma_addr_t dma_handle, | ||
58 | struct dma_attrs *attrs) | ||
59 | { | ||
60 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
61 | |||
62 | if (dma_release_from_coherent(dev, get_order(size), vaddr)) | ||
63 | return; | ||
64 | |||
65 | ops->free(dev, size, vaddr, dma_handle, attrs); | ||
66 | debug_dma_free_coherent(dev, size, vaddr, dma_handle); | ||
67 | } | ||
68 | |||
69 | static inline int | ||
70 | dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
71 | { | ||
72 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
73 | |||
74 | debug_dma_mapping_error(dev, dma_addr); | ||
75 | return ops->mapping_error(dev, dma_addr); | ||
76 | } | ||
77 | |||
78 | static inline int | ||
79 | dma_supported(struct device *dev, u64 mask) | ||
80 | { | ||
81 | return 1; | ||
82 | } | ||
83 | |||
84 | static inline int | ||
85 | dma_set_mask(struct device *dev, u64 mask) | ||
86 | { | ||
87 | if(!dev->dma_mask || !dma_supported(dev, mask)) | ||
88 | return -EIO; | ||
89 | |||
90 | *dev->dma_mask = mask; | ||
91 | |||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | void dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 35 | void dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
96 | enum dma_data_direction direction); | 36 | enum dma_data_direction direction); |
97 | 37 | ||
diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 6607f3c6ace1..a39e85f9efa9 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c | |||
@@ -2834,7 +2834,7 @@ static int binder_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
2834 | return VM_FAULT_SIGBUS; | 2834 | return VM_FAULT_SIGBUS; |
2835 | } | 2835 | } |
2836 | 2836 | ||
2837 | static struct vm_operations_struct binder_vm_ops = { | 2837 | static const struct vm_operations_struct binder_vm_ops = { |
2838 | .open = binder_vma_open, | 2838 | .open = binder_vma_open, |
2839 | .close = binder_vma_close, | 2839 | .close = binder_vma_close, |
2840 | .fault = binder_vm_fault, | 2840 | .fault = binder_vm_fault, |
diff --git a/drivers/crypto/qat/qat_common/adf_transport_debug.c b/drivers/crypto/qat/qat_common/adf_transport_debug.c index e41986967294..52340b9bb387 100644 --- a/drivers/crypto/qat/qat_common/adf_transport_debug.c +++ b/drivers/crypto/qat/qat_common/adf_transport_debug.c | |||
@@ -86,9 +86,7 @@ static int adf_ring_show(struct seq_file *sfile, void *v) | |||
86 | { | 86 | { |
87 | struct adf_etr_ring_data *ring = sfile->private; | 87 | struct adf_etr_ring_data *ring = sfile->private; |
88 | struct adf_etr_bank_data *bank = ring->bank; | 88 | struct adf_etr_bank_data *bank = ring->bank; |
89 | uint32_t *msg = v; | ||
90 | void __iomem *csr = ring->bank->csr_addr; | 89 | void __iomem *csr = ring->bank->csr_addr; |
91 | int i, x; | ||
92 | 90 | ||
93 | if (v == SEQ_START_TOKEN) { | 91 | if (v == SEQ_START_TOKEN) { |
94 | int head, tail, empty; | 92 | int head, tail, empty; |
@@ -113,18 +111,8 @@ static int adf_ring_show(struct seq_file *sfile, void *v) | |||
113 | seq_puts(sfile, "----------- Ring data ------------\n"); | 111 | seq_puts(sfile, "----------- Ring data ------------\n"); |
114 | return 0; | 112 | return 0; |
115 | } | 113 | } |
116 | seq_printf(sfile, "%p:", msg); | 114 | seq_hex_dump(sfile, "", DUMP_PREFIX_ADDRESS, 32, 4, |
117 | x = 0; | 115 | v, ADF_MSG_SIZE_TO_BYTES(ring->msg_size), false); |
118 | i = 0; | ||
119 | for (; i < (ADF_MSG_SIZE_TO_BYTES(ring->msg_size) >> 2); i++) { | ||
120 | seq_printf(sfile, " %08X", *(msg + i)); | ||
121 | if ((ADF_MSG_SIZE_TO_BYTES(ring->msg_size) >> 2) != i + 1 && | ||
122 | (++x == 8)) { | ||
123 | seq_printf(sfile, "\n%p:", msg + i + 1); | ||
124 | x = 0; | ||
125 | } | ||
126 | } | ||
127 | seq_puts(sfile, "\n"); | ||
128 | return 0; | 116 | return 0; |
129 | } | 117 | } |
130 | 118 | ||
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 54071c148340..84533e02fbf8 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig | |||
@@ -43,7 +43,7 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE | |||
43 | 43 | ||
44 | config EFI_RUNTIME_MAP | 44 | config EFI_RUNTIME_MAP |
45 | bool "Export efi runtime maps to sysfs" | 45 | bool "Export efi runtime maps to sysfs" |
46 | depends on X86 && EFI && KEXEC | 46 | depends on X86 && EFI && KEXEC_CORE |
47 | default y | 47 | default y |
48 | help | 48 | help |
49 | Export efi runtime memory maps to /sys/firmware/efi/runtime-map. | 49 | Export efi runtime memory maps to /sys/firmware/efi/runtime-map. |
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 6394547cf67a..860062ef8814 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c | |||
@@ -125,7 +125,7 @@ static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
125 | } | 125 | } |
126 | } | 126 | } |
127 | 127 | ||
128 | static struct vm_operations_struct vgem_gem_vm_ops = { | 128 | static const struct vm_operations_struct vgem_gem_vm_ops = { |
129 | .fault = vgem_gem_fault, | 129 | .fault = vgem_gem_fault, |
130 | .open = drm_gem_vm_open, | 130 | .open = drm_gem_vm_open, |
131 | .close = drm_gem_vm_close, | 131 | .close = drm_gem_vm_close, |
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c index d04643f9548b..95638df73d1c 100644 --- a/drivers/hsi/clients/cmt_speech.c +++ b/drivers/hsi/clients/cmt_speech.c | |||
@@ -1110,7 +1110,7 @@ static int cs_char_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1110 | return 0; | 1110 | return 0; |
1111 | } | 1111 | } |
1112 | 1112 | ||
1113 | static struct vm_operations_struct cs_char_vm_ops = { | 1113 | static const struct vm_operations_struct cs_char_vm_ops = { |
1114 | .fault = cs_char_vma_fault, | 1114 | .fault = cs_char_vma_fault, |
1115 | }; | 1115 | }; |
1116 | 1116 | ||
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 725881890c4a..e449e394963f 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c | |||
@@ -908,7 +908,7 @@ static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
908 | return 0; | 908 | return 0; |
909 | } | 909 | } |
910 | 910 | ||
911 | static struct vm_operations_struct qib_file_vm_ops = { | 911 | static const struct vm_operations_struct qib_file_vm_ops = { |
912 | .fault = qib_file_vma_fault, | 912 | .fault = qib_file_vma_fault, |
913 | }; | 913 | }; |
914 | 914 | ||
diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c index 146cf29a2e1d..34927b700b0e 100644 --- a/drivers/infiniband/hw/qib/qib_mmap.c +++ b/drivers/infiniband/hw/qib/qib_mmap.c | |||
@@ -75,7 +75,7 @@ static void qib_vma_close(struct vm_area_struct *vma) | |||
75 | kref_put(&ip->ref, qib_release_mmap_info); | 75 | kref_put(&ip->ref, qib_release_mmap_info); |
76 | } | 76 | } |
77 | 77 | ||
78 | static struct vm_operations_struct qib_vm_ops = { | 78 | static const struct vm_operations_struct qib_vm_ops = { |
79 | .open = qib_vma_open, | 79 | .open = qib_vma_open, |
80 | .close = qib_vma_close, | 80 | .close = qib_vma_close, |
81 | }; | 81 | }; |
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index f09c5f17a42f..de2474e1132d 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c | |||
@@ -872,7 +872,7 @@ static void omap_vout_vm_close(struct vm_area_struct *vma) | |||
872 | vout->mmap_count--; | 872 | vout->mmap_count--; |
873 | } | 873 | } |
874 | 874 | ||
875 | static struct vm_operations_struct omap_vout_vm_ops = { | 875 | static const struct vm_operations_struct omap_vout_vm_ops = { |
876 | .open = omap_vout_vm_open, | 876 | .open = omap_vout_vm_open, |
877 | .close = omap_vout_vm_close, | 877 | .close = omap_vout_vm_close, |
878 | }; | 878 | }; |
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index c49d244265ec..70e62d6a3231 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c | |||
@@ -418,7 +418,7 @@ static void genwqe_vma_close(struct vm_area_struct *vma) | |||
418 | kfree(dma_map); | 418 | kfree(dma_map); |
419 | } | 419 | } |
420 | 420 | ||
421 | static struct vm_operations_struct genwqe_vma_ops = { | 421 | static const struct vm_operations_struct genwqe_vma_ops = { |
422 | .open = genwqe_vma_open, | 422 | .open = genwqe_vma_open, |
423 | .close = genwqe_vma_close, | 423 | .close = genwqe_vma_close, |
424 | }; | 424 | }; |
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index 613ca2b2527b..d1a1e160ef31 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c | |||
@@ -156,6 +156,12 @@ static const struct file_operations fops_vring = { | |||
156 | .llseek = seq_lseek, | 156 | .llseek = seq_lseek, |
157 | }; | 157 | }; |
158 | 158 | ||
159 | static void wil_seq_hexdump(struct seq_file *s, void *p, int len, | ||
160 | const char *prefix) | ||
161 | { | ||
162 | seq_hex_dump(s, prefix, DUMP_PREFIX_NONE, 16, 1, p, len, false); | ||
163 | } | ||
164 | |||
159 | static void wil_print_ring(struct seq_file *s, const char *prefix, | 165 | static void wil_print_ring(struct seq_file *s, const char *prefix, |
160 | void __iomem *off) | 166 | void __iomem *off) |
161 | { | 167 | { |
@@ -212,8 +218,6 @@ static void wil_print_ring(struct seq_file *s, const char *prefix, | |||
212 | le16_to_cpu(hdr.seq), len, | 218 | le16_to_cpu(hdr.seq), len, |
213 | le16_to_cpu(hdr.type), hdr.flags); | 219 | le16_to_cpu(hdr.type), hdr.flags); |
214 | if (len <= MAX_MBOXITEM_SIZE) { | 220 | if (len <= MAX_MBOXITEM_SIZE) { |
215 | int n = 0; | ||
216 | char printbuf[16 * 3 + 2]; | ||
217 | unsigned char databuf[MAX_MBOXITEM_SIZE]; | 221 | unsigned char databuf[MAX_MBOXITEM_SIZE]; |
218 | void __iomem *src = wmi_buffer(wil, d.addr) + | 222 | void __iomem *src = wmi_buffer(wil, d.addr) + |
219 | sizeof(struct wil6210_mbox_hdr); | 223 | sizeof(struct wil6210_mbox_hdr); |
@@ -223,16 +227,7 @@ static void wil_print_ring(struct seq_file *s, const char *prefix, | |||
223 | * reading header | 227 | * reading header |
224 | */ | 228 | */ |
225 | wil_memcpy_fromio_32(databuf, src, len); | 229 | wil_memcpy_fromio_32(databuf, src, len); |
226 | while (n < len) { | 230 | wil_seq_hexdump(s, databuf, len, " : "); |
227 | int l = min(len - n, 16); | ||
228 | |||
229 | hex_dump_to_buffer(databuf + n, l, | ||
230 | 16, 1, printbuf, | ||
231 | sizeof(printbuf), | ||
232 | false); | ||
233 | seq_printf(s, " : %s\n", printbuf); | ||
234 | n += l; | ||
235 | } | ||
236 | } | 231 | } |
237 | } else { | 232 | } else { |
238 | seq_puts(s, "\n"); | 233 | seq_puts(s, "\n"); |
@@ -867,22 +862,6 @@ static const struct file_operations fops_wmi = { | |||
867 | .open = simple_open, | 862 | .open = simple_open, |
868 | }; | 863 | }; |
869 | 864 | ||
870 | static void wil_seq_hexdump(struct seq_file *s, void *p, int len, | ||
871 | const char *prefix) | ||
872 | { | ||
873 | char printbuf[16 * 3 + 2]; | ||
874 | int i = 0; | ||
875 | |||
876 | while (i < len) { | ||
877 | int l = min(len - i, 16); | ||
878 | |||
879 | hex_dump_to_buffer(p + i, l, 16, 1, printbuf, | ||
880 | sizeof(printbuf), false); | ||
881 | seq_printf(s, "%s%s\n", prefix, printbuf); | ||
882 | i += l; | ||
883 | } | ||
884 | } | ||
885 | |||
886 | static void wil_seq_print_skb(struct seq_file *s, struct sk_buff *skb) | 865 | static void wil_seq_print_skb(struct seq_file *s, struct sk_buff *skb) |
887 | { | 866 | { |
888 | int i = 0; | 867 | int i = 0; |
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 02ff84fcfa61..957b42198328 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c | |||
@@ -1103,16 +1103,9 @@ static int ccio_proc_bitmap_info(struct seq_file *m, void *p) | |||
1103 | struct ioc *ioc = ioc_list; | 1103 | struct ioc *ioc = ioc_list; |
1104 | 1104 | ||
1105 | while (ioc != NULL) { | 1105 | while (ioc != NULL) { |
1106 | u32 *res_ptr = (u32 *)ioc->res_map; | 1106 | seq_hex_dump(m, " ", DUMP_PREFIX_NONE, 32, 4, ioc->res_map, |
1107 | int j; | 1107 | ioc->res_size, false); |
1108 | 1108 | seq_putc(m, '\n'); | |
1109 | for (j = 0; j < (ioc->res_size / sizeof(u32)); j++) { | ||
1110 | if ((j & 7) == 0) | ||
1111 | seq_puts(m, "\n "); | ||
1112 | seq_printf(m, "%08x", *res_ptr); | ||
1113 | res_ptr++; | ||
1114 | } | ||
1115 | seq_puts(m, "\n\n"); | ||
1116 | ioc = ioc->next; | 1109 | ioc = ioc->next; |
1117 | break; /* XXX - remove me */ | 1110 | break; /* XXX - remove me */ |
1118 | } | 1111 | } |
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index f1441e466c06..225049b492e5 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c | |||
@@ -1854,14 +1854,9 @@ sba_proc_bitmap_info(struct seq_file *m, void *p) | |||
1854 | { | 1854 | { |
1855 | struct sba_device *sba_dev = sba_list; | 1855 | struct sba_device *sba_dev = sba_list; |
1856 | struct ioc *ioc = &sba_dev->ioc[0]; /* FIXME: Multi-IOC support! */ | 1856 | struct ioc *ioc = &sba_dev->ioc[0]; /* FIXME: Multi-IOC support! */ |
1857 | unsigned int *res_ptr = (unsigned int *)ioc->res_map; | ||
1858 | int i; | ||
1859 | 1857 | ||
1860 | for (i = 0; i < (ioc->res_size/sizeof(unsigned int)); ++i, ++res_ptr) { | 1858 | seq_hex_dump(m, " ", DUMP_PREFIX_NONE, 32, 4, ioc->res_map, |
1861 | if ((i & 7) == 0) | 1859 | ioc->res_size, false); |
1862 | seq_puts(m, "\n "); | ||
1863 | seq_printf(m, " %08x", *res_ptr); | ||
1864 | } | ||
1865 | seq_putc(m, '\n'); | 1860 | seq_putc(m, '\n'); |
1866 | 1861 | ||
1867 | return 0; | 1862 | return 0; |
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 52a880ca1768..dd652f2ae03d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c | |||
@@ -467,7 +467,7 @@ static void pci_device_shutdown(struct device *dev) | |||
467 | pci_msi_shutdown(pci_dev); | 467 | pci_msi_shutdown(pci_dev); |
468 | pci_msix_shutdown(pci_dev); | 468 | pci_msix_shutdown(pci_dev); |
469 | 469 | ||
470 | #ifdef CONFIG_KEXEC | 470 | #ifdef CONFIG_KEXEC_CORE |
471 | /* | 471 | /* |
472 | * If this is a kexec reboot, turn off Bus Master bit on the | 472 | * If this is a kexec reboot, turn off Bus Master bit on the |
473 | * device to tell it to not continue to do DMA. Don't touch | 473 | * device to tell it to not continue to do DMA. Don't touch |
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 01bf1f5cf2e9..4eb45546a3aa 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c | |||
@@ -1206,16 +1206,8 @@ static void sprinthx(unsigned char *title, struct seq_file *m, | |||
1206 | static void sprinthx4(unsigned char *title, struct seq_file *m, | 1206 | static void sprinthx4(unsigned char *title, struct seq_file *m, |
1207 | unsigned int *array, unsigned int len) | 1207 | unsigned int *array, unsigned int len) |
1208 | { | 1208 | { |
1209 | int r; | ||
1210 | |||
1211 | seq_printf(m, "\n%s\n", title); | 1209 | seq_printf(m, "\n%s\n", title); |
1212 | for (r = 0; r < len; r++) { | 1210 | seq_hex_dump(m, " ", DUMP_PREFIX_NONE, 32, 4, array, len, false); |
1213 | if ((r % 8) == 0) | ||
1214 | seq_printf(m, " "); | ||
1215 | seq_printf(m, "%08X ", array[r]); | ||
1216 | if ((r % 8) == 7) | ||
1217 | seq_putc(m, '\n'); | ||
1218 | } | ||
1219 | seq_putc(m, '\n'); | 1211 | seq_putc(m, '\n'); |
1220 | } | 1212 | } |
1221 | 1213 | ||
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index eec878e183f5..217aa537c4eb 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c | |||
@@ -997,7 +997,7 @@ static void ion_vm_close(struct vm_area_struct *vma) | |||
997 | mutex_unlock(&buffer->lock); | 997 | mutex_unlock(&buffer->lock); |
998 | } | 998 | } |
999 | 999 | ||
1000 | static struct vm_operations_struct ion_vma_ops = { | 1000 | static const struct vm_operations_struct ion_vma_ops = { |
1001 | .open = ion_vm_open, | 1001 | .open = ion_vm_open, |
1002 | .close = ion_vm_close, | 1002 | .close = ion_vm_close, |
1003 | .fault = ion_vm_fault, | 1003 | .fault = ion_vm_fault, |
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index fd54d098ab02..0e8a45102933 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c | |||
@@ -2156,7 +2156,7 @@ static void comedi_vm_close(struct vm_area_struct *area) | |||
2156 | comedi_buf_map_put(bm); | 2156 | comedi_buf_map_put(bm); |
2157 | } | 2157 | } |
2158 | 2158 | ||
2159 | static struct vm_operations_struct comedi_vm_ops = { | 2159 | static const struct vm_operations_struct comedi_vm_ops = { |
2160 | .open = comedi_vm_open, | 2160 | .open = comedi_vm_open, |
2161 | .close = comedi_vm_close, | 2161 | .close = comedi_vm_close, |
2162 | }; | 2162 | }; |
diff --git a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c index 4f0cbb54d4db..d3af01c94a58 100644 --- a/drivers/video/fbdev/omap2/omapfb/omapfb-main.c +++ b/drivers/video/fbdev/omap2/omapfb/omapfb-main.c | |||
@@ -1091,7 +1091,7 @@ static void mmap_user_close(struct vm_area_struct *vma) | |||
1091 | omapfb_put_mem_region(rg); | 1091 | omapfb_put_mem_region(rg); |
1092 | } | 1092 | } |
1093 | 1093 | ||
1094 | static struct vm_operations_struct mmap_user_ops = { | 1094 | static const struct vm_operations_struct mmap_user_ops = { |
1095 | .open = mmap_user_open, | 1095 | .open = mmap_user_open, |
1096 | .close = mmap_user_close, | 1096 | .close = mmap_user_close, |
1097 | }; | 1097 | }; |
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 14370df9ac1c..4547a91bca67 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c | |||
@@ -494,7 +494,7 @@ static void gntalloc_vma_close(struct vm_area_struct *vma) | |||
494 | mutex_unlock(&gref_mutex); | 494 | mutex_unlock(&gref_mutex); |
495 | } | 495 | } |
496 | 496 | ||
497 | static struct vm_operations_struct gntalloc_vmops = { | 497 | static const struct vm_operations_struct gntalloc_vmops = { |
498 | .open = gntalloc_vma_open, | 498 | .open = gntalloc_vma_open, |
499 | .close = gntalloc_vma_close, | 499 | .close = gntalloc_vma_close, |
500 | }; | 500 | }; |
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 0dbb222daaf1..2ea0b3b2a91d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c | |||
@@ -433,7 +433,7 @@ static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, | |||
433 | return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT]; | 433 | return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT]; |
434 | } | 434 | } |
435 | 435 | ||
436 | static struct vm_operations_struct gntdev_vmops = { | 436 | static const struct vm_operations_struct gntdev_vmops = { |
437 | .open = gntdev_vma_open, | 437 | .open = gntdev_vma_open, |
438 | .close = gntdev_vma_close, | 438 | .close = gntdev_vma_close, |
439 | .find_special_page = gntdev_vma_find_special_page, | 439 | .find_special_page = gntdev_vma_find_special_page, |
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index c6deb87c5c69..5e9adac928e6 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c | |||
@@ -414,7 +414,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) | |||
414 | return 0; | 414 | return 0; |
415 | } | 415 | } |
416 | 416 | ||
417 | static struct vm_operations_struct privcmd_vm_ops; | 417 | static const struct vm_operations_struct privcmd_vm_ops; |
418 | 418 | ||
419 | static long privcmd_ioctl_mmap_batch(void __user *udata, int version) | 419 | static long privcmd_ioctl_mmap_batch(void __user *udata, int version) |
420 | { | 420 | { |
@@ -605,7 +605,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
605 | return VM_FAULT_SIGBUS; | 605 | return VM_FAULT_SIGBUS; |
606 | } | 606 | } |
607 | 607 | ||
608 | static struct vm_operations_struct privcmd_vm_ops = { | 608 | static const struct vm_operations_struct privcmd_vm_ops = { |
609 | .close = privcmd_close, | 609 | .close = privcmd_close, |
610 | .fault = privcmd_fault | 610 | .fault = privcmd_fault |
611 | }; | 611 | }; |
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index d757a3e610c6..79bc4933b13e 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c | |||
@@ -311,9 +311,6 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | |||
311 | */ | 311 | */ |
312 | flags &= ~(__GFP_DMA | __GFP_HIGHMEM); | 312 | flags &= ~(__GFP_DMA | __GFP_HIGHMEM); |
313 | 313 | ||
314 | if (dma_alloc_from_coherent(hwdev, size, dma_handle, &ret)) | ||
315 | return ret; | ||
316 | |||
317 | /* On ARM this function returns an ioremap'ped virtual address for | 314 | /* On ARM this function returns an ioremap'ped virtual address for |
318 | * which virt_to_phys doesn't return the corresponding physical | 315 | * which virt_to_phys doesn't return the corresponding physical |
319 | * address. In fact on ARM virt_to_phys only works for kernel direct | 316 | * address. In fact on ARM virt_to_phys only works for kernel direct |
@@ -356,9 +353,6 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, | |||
356 | phys_addr_t phys; | 353 | phys_addr_t phys; |
357 | u64 dma_mask = DMA_BIT_MASK(32); | 354 | u64 dma_mask = DMA_BIT_MASK(32); |
358 | 355 | ||
359 | if (dma_release_from_coherent(hwdev, order, vaddr)) | ||
360 | return; | ||
361 | |||
362 | if (hwdev && hwdev->coherent_dma_mask) | 356 | if (hwdev && hwdev->coherent_dma_mask) |
363 | dma_mask = hwdev->coherent_dma_mask; | 357 | dma_mask = hwdev->coherent_dma_mask; |
364 | 358 | ||
diff --git a/fs/affs/super.c b/fs/affs/super.c index 3f89c9e05b40..5b50c4ca43a7 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/writeback.h> | 20 | #include <linux/writeback.h> |
21 | #include <linux/blkdev.h> | ||
21 | #include "affs.h" | 22 | #include "affs.h" |
22 | 23 | ||
23 | static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); | 24 | static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); |
@@ -352,18 +353,19 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) | |||
352 | * blocks, we will have to change it. | 353 | * blocks, we will have to change it. |
353 | */ | 354 | */ |
354 | 355 | ||
355 | size = sb->s_bdev->bd_inode->i_size >> 9; | 356 | size = i_size_read(sb->s_bdev->bd_inode) >> 9; |
356 | pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size); | 357 | pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size); |
357 | 358 | ||
358 | affs_set_blocksize(sb, PAGE_SIZE); | 359 | affs_set_blocksize(sb, PAGE_SIZE); |
359 | /* Try to find root block. Its location depends on the block size. */ | 360 | /* Try to find root block. Its location depends on the block size. */ |
360 | 361 | ||
361 | i = 512; | 362 | i = bdev_logical_block_size(sb->s_bdev); |
362 | j = 4096; | 363 | j = PAGE_SIZE; |
363 | if (blocksize > 0) { | 364 | if (blocksize > 0) { |
364 | i = j = blocksize; | 365 | i = j = blocksize; |
365 | size = size / (blocksize / 512); | 366 | size = size / (blocksize / 512); |
366 | } | 367 | } |
368 | |||
367 | for (blocksize = i; blocksize <= j; blocksize <<= 1, size >>= 1) { | 369 | for (blocksize = i; blocksize <= j; blocksize <<= 1, size >>= 1) { |
368 | sbi->s_root_block = root_block; | 370 | sbi->s_root_block = root_block; |
369 | if (root_block < 0) | 371 | if (root_block < 0) |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 890c50971a69..a268abfe60ac 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1593,7 +1593,7 @@ out: | |||
1593 | return err; | 1593 | return err; |
1594 | } | 1594 | } |
1595 | 1595 | ||
1596 | static struct vm_operations_struct ceph_vmops = { | 1596 | static const struct vm_operations_struct ceph_vmops = { |
1597 | .fault = ceph_filemap_fault, | 1597 | .fault = ceph_filemap_fault, |
1598 | .page_mkwrite = ceph_page_mkwrite, | 1598 | .page_mkwrite = ceph_page_mkwrite, |
1599 | }; | 1599 | }; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3f50cee79df9..e2a6af1508af 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -3216,7 +3216,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
3216 | return VM_FAULT_LOCKED; | 3216 | return VM_FAULT_LOCKED; |
3217 | } | 3217 | } |
3218 | 3218 | ||
3219 | static struct vm_operations_struct cifs_file_vm_ops = { | 3219 | static const struct vm_operations_struct cifs_file_vm_ops = { |
3220 | .fault = filemap_fault, | 3220 | .fault = filemap_fault, |
3221 | .map_pages = filemap_map_pages, | 3221 | .map_pages = filemap_map_pages, |
3222 | .page_mkwrite = cifs_page_mkwrite, | 3222 | .page_mkwrite = cifs_page_mkwrite, |
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index 9b1ffaa0572e..f6c6c8adbc01 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c | |||
@@ -353,7 +353,7 @@ int venus_readlink(struct super_block *sb, struct CodaFid *fid, | |||
353 | char *result; | 353 | char *result; |
354 | 354 | ||
355 | insize = max_t(unsigned int, | 355 | insize = max_t(unsigned int, |
356 | INSIZE(readlink), OUTSIZE(readlink)+ *length + 1); | 356 | INSIZE(readlink), OUTSIZE(readlink)+ *length); |
357 | UPARG(CODA_READLINK); | 357 | UPARG(CODA_READLINK); |
358 | 358 | ||
359 | inp->coda_readlink.VFid = *fid; | 359 | inp->coda_readlink.VFid = *fid; |
@@ -361,8 +361,8 @@ int venus_readlink(struct super_block *sb, struct CodaFid *fid, | |||
361 | error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); | 361 | error = coda_upcall(coda_vcp(sb), insize, &outsize, inp); |
362 | if (!error) { | 362 | if (!error) { |
363 | retlen = outp->coda_readlink.count; | 363 | retlen = outp->coda_readlink.count; |
364 | if ( retlen > *length ) | 364 | if (retlen >= *length) |
365 | retlen = *length; | 365 | retlen = *length - 1; |
366 | *length = retlen; | 366 | *length = retlen; |
367 | result = (char *)outp + (long)outp->coda_readlink.data; | 367 | result = (char *)outp + (long)outp->coda_readlink.data; |
368 | memcpy(buffer, result, retlen); | 368 | memcpy(buffer, result, retlen); |
diff --git a/fs/coredump.c b/fs/coredump.c index c5ecde6f3eed..a8f75640ac86 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
@@ -513,10 +513,10 @@ void do_coredump(const siginfo_t *siginfo) | |||
513 | const struct cred *old_cred; | 513 | const struct cred *old_cred; |
514 | struct cred *cred; | 514 | struct cred *cred; |
515 | int retval = 0; | 515 | int retval = 0; |
516 | int flag = 0; | ||
517 | int ispipe; | 516 | int ispipe; |
518 | struct files_struct *displaced; | 517 | struct files_struct *displaced; |
519 | bool need_nonrelative = false; | 518 | /* require nonrelative corefile path and be extra careful */ |
519 | bool need_suid_safe = false; | ||
520 | bool core_dumped = false; | 520 | bool core_dumped = false; |
521 | static atomic_t core_dump_count = ATOMIC_INIT(0); | 521 | static atomic_t core_dump_count = ATOMIC_INIT(0); |
522 | struct coredump_params cprm = { | 522 | struct coredump_params cprm = { |
@@ -550,9 +550,8 @@ void do_coredump(const siginfo_t *siginfo) | |||
550 | */ | 550 | */ |
551 | if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { | 551 | if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { |
552 | /* Setuid core dump mode */ | 552 | /* Setuid core dump mode */ |
553 | flag = O_EXCL; /* Stop rewrite attacks */ | ||
554 | cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ | 553 | cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ |
555 | need_nonrelative = true; | 554 | need_suid_safe = true; |
556 | } | 555 | } |
557 | 556 | ||
558 | retval = coredump_wait(siginfo->si_signo, &core_state); | 557 | retval = coredump_wait(siginfo->si_signo, &core_state); |
@@ -633,7 +632,7 @@ void do_coredump(const siginfo_t *siginfo) | |||
633 | if (cprm.limit < binfmt->min_coredump) | 632 | if (cprm.limit < binfmt->min_coredump) |
634 | goto fail_unlock; | 633 | goto fail_unlock; |
635 | 634 | ||
636 | if (need_nonrelative && cn.corename[0] != '/') { | 635 | if (need_suid_safe && cn.corename[0] != '/') { |
637 | printk(KERN_WARNING "Pid %d(%s) can only dump core "\ | 636 | printk(KERN_WARNING "Pid %d(%s) can only dump core "\ |
638 | "to fully qualified path!\n", | 637 | "to fully qualified path!\n", |
639 | task_tgid_vnr(current), current->comm); | 638 | task_tgid_vnr(current), current->comm); |
@@ -641,8 +640,35 @@ void do_coredump(const siginfo_t *siginfo) | |||
641 | goto fail_unlock; | 640 | goto fail_unlock; |
642 | } | 641 | } |
643 | 642 | ||
643 | /* | ||
644 | * Unlink the file if it exists unless this is a SUID | ||
645 | * binary - in that case, we're running around with root | ||
646 | * privs and don't want to unlink another user's coredump. | ||
647 | */ | ||
648 | if (!need_suid_safe) { | ||
649 | mm_segment_t old_fs; | ||
650 | |||
651 | old_fs = get_fs(); | ||
652 | set_fs(KERNEL_DS); | ||
653 | /* | ||
654 | * If it doesn't exist, that's fine. If there's some | ||
655 | * other problem, we'll catch it at the filp_open(). | ||
656 | */ | ||
657 | (void) sys_unlink((const char __user *)cn.corename); | ||
658 | set_fs(old_fs); | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * There is a race between unlinking and creating the | ||
663 | * file, but if that causes an EEXIST here, that's | ||
664 | * fine - another process raced with us while creating | ||
665 | * the corefile, and the other process won. To userspace, | ||
666 | * what matters is that at least one of the two processes | ||
667 | * writes its coredump successfully, not which one. | ||
668 | */ | ||
644 | cprm.file = filp_open(cn.corename, | 669 | cprm.file = filp_open(cn.corename, |
645 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | 670 | O_CREAT | 2 | O_NOFOLLOW | |
671 | O_LARGEFILE | O_EXCL, | ||
646 | 0600); | 672 | 0600); |
647 | if (IS_ERR(cprm.file)) | 673 | if (IS_ERR(cprm.file)) |
648 | goto fail_unlock; | 674 | goto fail_unlock; |
@@ -659,11 +685,15 @@ void do_coredump(const siginfo_t *siginfo) | |||
659 | if (!S_ISREG(inode->i_mode)) | 685 | if (!S_ISREG(inode->i_mode)) |
660 | goto close_fail; | 686 | goto close_fail; |
661 | /* | 687 | /* |
662 | * Dont allow local users get cute and trick others to coredump | 688 | * Don't dump core if the filesystem changed owner or mode |
663 | * into their pre-created files. | 689 | * of the file during file creation. This is an issue when |
690 | * a process dumps core while its cwd is e.g. on a vfat | ||
691 | * filesystem. | ||
664 | */ | 692 | */ |
665 | if (!uid_eq(inode->i_uid, current_fsuid())) | 693 | if (!uid_eq(inode->i_uid, current_fsuid())) |
666 | goto close_fail; | 694 | goto close_fail; |
695 | if ((inode->i_mode & 0677) != 0600) | ||
696 | goto close_fail; | ||
667 | if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) | 697 | if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) |
668 | goto close_fail; | 698 | goto close_fail; |
669 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | 699 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) |
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index d3fa6bd9503e..221719eac5de 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c | |||
@@ -288,7 +288,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) | |||
288 | page_cache_release(page); | 288 | page_cache_release(page); |
289 | goto fail; | 289 | goto fail; |
290 | } | 290 | } |
291 | page_cache_release(page); | ||
292 | node->page[i] = page; | 291 | node->page[i] = page; |
293 | } | 292 | } |
294 | 293 | ||
@@ -398,11 +397,11 @@ node_error: | |||
398 | 397 | ||
399 | void hfs_bnode_free(struct hfs_bnode *node) | 398 | void hfs_bnode_free(struct hfs_bnode *node) |
400 | { | 399 | { |
401 | //int i; | 400 | int i; |
402 | 401 | ||
403 | //for (i = 0; i < node->tree->pages_per_bnode; i++) | 402 | for (i = 0; i < node->tree->pages_per_bnode; i++) |
404 | // if (node->page[i]) | 403 | if (node->page[i]) |
405 | // page_cache_release(node->page[i]); | 404 | page_cache_release(node->page[i]); |
406 | kfree(node); | 405 | kfree(node); |
407 | } | 406 | } |
408 | 407 | ||
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 9f4ee7f52026..6fc766df0461 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c | |||
@@ -131,13 +131,16 @@ skip: | |||
131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); | 131 | hfs_bnode_write(node, entry, data_off + key_len, entry_len); |
132 | hfs_bnode_dump(node); | 132 | hfs_bnode_dump(node); |
133 | 133 | ||
134 | if (new_node) { | 134 | /* |
135 | /* update parent key if we inserted a key | 135 | * update parent key if we inserted a key |
136 | * at the start of the first node | 136 | * at the start of the node and it is not the new node |
137 | */ | 137 | */ |
138 | if (!rec && new_node != node) | 138 | if (!rec && new_node != node) { |
139 | hfs_brec_update_parent(fd); | 139 | hfs_bnode_read_key(node, fd->search_key, data_off + size); |
140 | hfs_brec_update_parent(fd); | ||
141 | } | ||
140 | 142 | ||
143 | if (new_node) { | ||
141 | hfs_bnode_put(fd->bnode); | 144 | hfs_bnode_put(fd->bnode); |
142 | if (!new_node->parent) { | 145 | if (!new_node->parent) { |
143 | hfs_btree_inc_height(tree); | 146 | hfs_btree_inc_height(tree); |
@@ -166,9 +169,6 @@ skip: | |||
166 | goto again; | 169 | goto again; |
167 | } | 170 | } |
168 | 171 | ||
169 | if (!rec) | ||
170 | hfs_brec_update_parent(fd); | ||
171 | |||
172 | return 0; | 172 | return 0; |
173 | } | 173 | } |
174 | 174 | ||
@@ -366,6 +366,8 @@ again: | |||
366 | if (IS_ERR(parent)) | 366 | if (IS_ERR(parent)) |
367 | return PTR_ERR(parent); | 367 | return PTR_ERR(parent); |
368 | __hfs_brec_find(parent, fd); | 368 | __hfs_brec_find(parent, fd); |
369 | if (fd->record < 0) | ||
370 | return -ENOENT; | ||
369 | hfs_bnode_dump(parent); | 371 | hfs_bnode_dump(parent); |
370 | rec = fd->record; | 372 | rec = fd->record; |
371 | 373 | ||
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 759708fd9331..63924662aaf3 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c | |||
@@ -454,7 +454,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) | |||
454 | page_cache_release(page); | 454 | page_cache_release(page); |
455 | goto fail; | 455 | goto fail; |
456 | } | 456 | } |
457 | page_cache_release(page); | ||
458 | node->page[i] = page; | 457 | node->page[i] = page; |
459 | } | 458 | } |
460 | 459 | ||
@@ -566,13 +565,11 @@ node_error: | |||
566 | 565 | ||
567 | void hfs_bnode_free(struct hfs_bnode *node) | 566 | void hfs_bnode_free(struct hfs_bnode *node) |
568 | { | 567 | { |
569 | #if 0 | ||
570 | int i; | 568 | int i; |
571 | 569 | ||
572 | for (i = 0; i < node->tree->pages_per_bnode; i++) | 570 | for (i = 0; i < node->tree->pages_per_bnode; i++) |
573 | if (node->page[i]) | 571 | if (node->page[i]) |
574 | page_cache_release(node->page[i]); | 572 | page_cache_release(node->page[i]); |
575 | #endif | ||
576 | kfree(node); | 573 | kfree(node); |
577 | } | 574 | } |
578 | 575 | ||
diff --git a/fs/namei.c b/fs/namei.c index 29b927938b8c..726d211db484 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -2438,7 +2438,7 @@ done: | |||
2438 | 2438 | ||
2439 | /** | 2439 | /** |
2440 | * path_mountpoint - look up a path to be umounted | 2440 | * path_mountpoint - look up a path to be umounted |
2441 | * @nameidata: lookup context | 2441 | * @nd: lookup context |
2442 | * @flags: lookup flags | 2442 | * @flags: lookup flags |
2443 | * @path: pointer to container for result | 2443 | * @path: pointer to container for result |
2444 | * | 2444 | * |
diff --git a/fs/proc/base.c b/fs/proc/base.c index aa50d1ac28fc..b25eee4cead5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1230,10 +1230,9 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1230 | size_t count, loff_t *ppos) | 1230 | size_t count, loff_t *ppos) |
1231 | { | 1231 | { |
1232 | struct inode * inode = file_inode(file); | 1232 | struct inode * inode = file_inode(file); |
1233 | char *page, *tmp; | ||
1234 | ssize_t length; | ||
1235 | uid_t loginuid; | 1233 | uid_t loginuid; |
1236 | kuid_t kloginuid; | 1234 | kuid_t kloginuid; |
1235 | int rv; | ||
1237 | 1236 | ||
1238 | rcu_read_lock(); | 1237 | rcu_read_lock(); |
1239 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { | 1238 | if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { |
@@ -1242,46 +1241,28 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
1242 | } | 1241 | } |
1243 | rcu_read_unlock(); | 1242 | rcu_read_unlock(); |
1244 | 1243 | ||
1245 | if (count >= PAGE_SIZE) | ||
1246 | count = PAGE_SIZE - 1; | ||
1247 | |||
1248 | if (*ppos != 0) { | 1244 | if (*ppos != 0) { |
1249 | /* No partial writes. */ | 1245 | /* No partial writes. */ |
1250 | return -EINVAL; | 1246 | return -EINVAL; |
1251 | } | 1247 | } |
1252 | page = (char*)__get_free_page(GFP_TEMPORARY); | ||
1253 | if (!page) | ||
1254 | return -ENOMEM; | ||
1255 | length = -EFAULT; | ||
1256 | if (copy_from_user(page, buf, count)) | ||
1257 | goto out_free_page; | ||
1258 | |||
1259 | page[count] = '\0'; | ||
1260 | loginuid = simple_strtoul(page, &tmp, 10); | ||
1261 | if (tmp == page) { | ||
1262 | length = -EINVAL; | ||
1263 | goto out_free_page; | ||
1264 | 1248 | ||
1265 | } | 1249 | rv = kstrtou32_from_user(buf, count, 10, &loginuid); |
1250 | if (rv < 0) | ||
1251 | return rv; | ||
1266 | 1252 | ||
1267 | /* is userspace tring to explicitly UNSET the loginuid? */ | 1253 | /* is userspace tring to explicitly UNSET the loginuid? */ |
1268 | if (loginuid == AUDIT_UID_UNSET) { | 1254 | if (loginuid == AUDIT_UID_UNSET) { |
1269 | kloginuid = INVALID_UID; | 1255 | kloginuid = INVALID_UID; |
1270 | } else { | 1256 | } else { |
1271 | kloginuid = make_kuid(file->f_cred->user_ns, loginuid); | 1257 | kloginuid = make_kuid(file->f_cred->user_ns, loginuid); |
1272 | if (!uid_valid(kloginuid)) { | 1258 | if (!uid_valid(kloginuid)) |
1273 | length = -EINVAL; | 1259 | return -EINVAL; |
1274 | goto out_free_page; | ||
1275 | } | ||
1276 | } | 1260 | } |
1277 | 1261 | ||
1278 | length = audit_set_loginuid(kloginuid); | 1262 | rv = audit_set_loginuid(kloginuid); |
1279 | if (likely(length == 0)) | 1263 | if (rv < 0) |
1280 | length = count; | 1264 | return rv; |
1281 | 1265 | return count; | |
1282 | out_free_page: | ||
1283 | free_page((unsigned long) page); | ||
1284 | return length; | ||
1285 | } | 1266 | } |
1286 | 1267 | ||
1287 | static const struct file_operations proc_loginuid_operations = { | 1268 | static const struct file_operations proc_loginuid_operations = { |
@@ -1335,8 +1316,9 @@ static ssize_t proc_fault_inject_write(struct file * file, | |||
1335 | const char __user * buf, size_t count, loff_t *ppos) | 1316 | const char __user * buf, size_t count, loff_t *ppos) |
1336 | { | 1317 | { |
1337 | struct task_struct *task; | 1318 | struct task_struct *task; |
1338 | char buffer[PROC_NUMBUF], *end; | 1319 | char buffer[PROC_NUMBUF]; |
1339 | int make_it_fail; | 1320 | int make_it_fail; |
1321 | int rv; | ||
1340 | 1322 | ||
1341 | if (!capable(CAP_SYS_RESOURCE)) | 1323 | if (!capable(CAP_SYS_RESOURCE)) |
1342 | return -EPERM; | 1324 | return -EPERM; |
@@ -1345,9 +1327,9 @@ static ssize_t proc_fault_inject_write(struct file * file, | |||
1345 | count = sizeof(buffer) - 1; | 1327 | count = sizeof(buffer) - 1; |
1346 | if (copy_from_user(buffer, buf, count)) | 1328 | if (copy_from_user(buffer, buf, count)) |
1347 | return -EFAULT; | 1329 | return -EFAULT; |
1348 | make_it_fail = simple_strtol(strstrip(buffer), &end, 0); | 1330 | rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); |
1349 | if (*end) | 1331 | if (rv < 0) |
1350 | return -EINVAL; | 1332 | return rv; |
1351 | if (make_it_fail < 0 || make_it_fail > 1) | 1333 | if (make_it_fail < 0 || make_it_fail > 1) |
1352 | return -EINVAL; | 1334 | return -EINVAL; |
1353 | 1335 | ||
@@ -1836,8 +1818,6 @@ end_instantiate: | |||
1836 | return dir_emit(ctx, name, len, 1, DT_UNKNOWN); | 1818 | return dir_emit(ctx, name, len, 1, DT_UNKNOWN); |
1837 | } | 1819 | } |
1838 | 1820 | ||
1839 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
1840 | |||
1841 | /* | 1821 | /* |
1842 | * dname_to_vma_addr - maps a dentry name into two unsigned longs | 1822 | * dname_to_vma_addr - maps a dentry name into two unsigned longs |
1843 | * which represent vma start and end addresses. | 1823 | * which represent vma start and end addresses. |
@@ -1864,11 +1844,6 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) | |||
1864 | if (flags & LOOKUP_RCU) | 1844 | if (flags & LOOKUP_RCU) |
1865 | return -ECHILD; | 1845 | return -ECHILD; |
1866 | 1846 | ||
1867 | if (!capable(CAP_SYS_ADMIN)) { | ||
1868 | status = -EPERM; | ||
1869 | goto out_notask; | ||
1870 | } | ||
1871 | |||
1872 | inode = d_inode(dentry); | 1847 | inode = d_inode(dentry); |
1873 | task = get_proc_task(inode); | 1848 | task = get_proc_task(inode); |
1874 | if (!task) | 1849 | if (!task) |
@@ -1957,6 +1932,29 @@ struct map_files_info { | |||
1957 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | 1932 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
1958 | }; | 1933 | }; |
1959 | 1934 | ||
1935 | /* | ||
1936 | * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the | ||
1937 | * symlinks may be used to bypass permissions on ancestor directories in the | ||
1938 | * path to the file in question. | ||
1939 | */ | ||
1940 | static const char * | ||
1941 | proc_map_files_follow_link(struct dentry *dentry, void **cookie) | ||
1942 | { | ||
1943 | if (!capable(CAP_SYS_ADMIN)) | ||
1944 | return ERR_PTR(-EPERM); | ||
1945 | |||
1946 | return proc_pid_follow_link(dentry, NULL); | ||
1947 | } | ||
1948 | |||
1949 | /* | ||
1950 | * Identical to proc_pid_link_inode_operations except for follow_link() | ||
1951 | */ | ||
1952 | static const struct inode_operations proc_map_files_link_inode_operations = { | ||
1953 | .readlink = proc_pid_readlink, | ||
1954 | .follow_link = proc_map_files_follow_link, | ||
1955 | .setattr = proc_setattr, | ||
1956 | }; | ||
1957 | |||
1960 | static int | 1958 | static int |
1961 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | 1959 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, |
1962 | struct task_struct *task, const void *ptr) | 1960 | struct task_struct *task, const void *ptr) |
@@ -1972,7 +1970,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
1972 | ei = PROC_I(inode); | 1970 | ei = PROC_I(inode); |
1973 | ei->op.proc_get_link = proc_map_files_get_link; | 1971 | ei->op.proc_get_link = proc_map_files_get_link; |
1974 | 1972 | ||
1975 | inode->i_op = &proc_pid_link_inode_operations; | 1973 | inode->i_op = &proc_map_files_link_inode_operations; |
1976 | inode->i_size = 64; | 1974 | inode->i_size = 64; |
1977 | inode->i_mode = S_IFLNK; | 1975 | inode->i_mode = S_IFLNK; |
1978 | 1976 | ||
@@ -1996,10 +1994,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
1996 | int result; | 1994 | int result; |
1997 | struct mm_struct *mm; | 1995 | struct mm_struct *mm; |
1998 | 1996 | ||
1999 | result = -EPERM; | ||
2000 | if (!capable(CAP_SYS_ADMIN)) | ||
2001 | goto out; | ||
2002 | |||
2003 | result = -ENOENT; | 1997 | result = -ENOENT; |
2004 | task = get_proc_task(dir); | 1998 | task = get_proc_task(dir); |
2005 | if (!task) | 1999 | if (!task) |
@@ -2053,10 +2047,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) | |||
2053 | struct map_files_info *p; | 2047 | struct map_files_info *p; |
2054 | int ret; | 2048 | int ret; |
2055 | 2049 | ||
2056 | ret = -EPERM; | ||
2057 | if (!capable(CAP_SYS_ADMIN)) | ||
2058 | goto out; | ||
2059 | |||
2060 | ret = -ENOENT; | 2050 | ret = -ENOENT; |
2061 | task = get_proc_task(file_inode(file)); | 2051 | task = get_proc_task(file_inode(file)); |
2062 | if (!task) | 2052 | if (!task) |
@@ -2245,7 +2235,6 @@ static const struct file_operations proc_timers_operations = { | |||
2245 | .llseek = seq_lseek, | 2235 | .llseek = seq_lseek, |
2246 | .release = seq_release_private, | 2236 | .release = seq_release_private, |
2247 | }; | 2237 | }; |
2248 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
2249 | 2238 | ||
2250 | static int proc_pident_instantiate(struct inode *dir, | 2239 | static int proc_pident_instantiate(struct inode *dir, |
2251 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2240 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
@@ -2481,32 +2470,20 @@ static ssize_t proc_coredump_filter_write(struct file *file, | |||
2481 | { | 2470 | { |
2482 | struct task_struct *task; | 2471 | struct task_struct *task; |
2483 | struct mm_struct *mm; | 2472 | struct mm_struct *mm; |
2484 | char buffer[PROC_NUMBUF], *end; | ||
2485 | unsigned int val; | 2473 | unsigned int val; |
2486 | int ret; | 2474 | int ret; |
2487 | int i; | 2475 | int i; |
2488 | unsigned long mask; | 2476 | unsigned long mask; |
2489 | 2477 | ||
2490 | ret = -EFAULT; | 2478 | ret = kstrtouint_from_user(buf, count, 0, &val); |
2491 | memset(buffer, 0, sizeof(buffer)); | 2479 | if (ret < 0) |
2492 | if (count > sizeof(buffer) - 1) | 2480 | return ret; |
2493 | count = sizeof(buffer) - 1; | ||
2494 | if (copy_from_user(buffer, buf, count)) | ||
2495 | goto out_no_task; | ||
2496 | |||
2497 | ret = -EINVAL; | ||
2498 | val = (unsigned int)simple_strtoul(buffer, &end, 0); | ||
2499 | if (*end == '\n') | ||
2500 | end++; | ||
2501 | if (end - buffer == 0) | ||
2502 | goto out_no_task; | ||
2503 | 2481 | ||
2504 | ret = -ESRCH; | 2482 | ret = -ESRCH; |
2505 | task = get_proc_task(file_inode(file)); | 2483 | task = get_proc_task(file_inode(file)); |
2506 | if (!task) | 2484 | if (!task) |
2507 | goto out_no_task; | 2485 | goto out_no_task; |
2508 | 2486 | ||
2509 | ret = end - buffer; | ||
2510 | mm = get_task_mm(task); | 2487 | mm = get_task_mm(task); |
2511 | if (!mm) | 2488 | if (!mm) |
2512 | goto out_no_mm; | 2489 | goto out_no_mm; |
@@ -2522,7 +2499,9 @@ static ssize_t proc_coredump_filter_write(struct file *file, | |||
2522 | out_no_mm: | 2499 | out_no_mm: |
2523 | put_task_struct(task); | 2500 | put_task_struct(task); |
2524 | out_no_task: | 2501 | out_no_task: |
2525 | return ret; | 2502 | if (ret < 0) |
2503 | return ret; | ||
2504 | return count; | ||
2526 | } | 2505 | } |
2527 | 2506 | ||
2528 | static const struct file_operations proc_coredump_filter_operations = { | 2507 | static const struct file_operations proc_coredump_filter_operations = { |
@@ -2744,9 +2723,7 @@ static const struct inode_operations proc_task_inode_operations; | |||
2744 | static const struct pid_entry tgid_base_stuff[] = { | 2723 | static const struct pid_entry tgid_base_stuff[] = { |
2745 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), | 2724 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), |
2746 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 2725 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
2747 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
2748 | DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), | 2726 | DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), |
2749 | #endif | ||
2750 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 2727 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2751 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | 2728 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), |
2752 | #ifdef CONFIG_NET | 2729 | #ifdef CONFIG_NET |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index e5dee5c3188e..ff3ffc76a937 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -26,7 +26,7 @@ | |||
26 | 26 | ||
27 | #include "internal.h" | 27 | #include "internal.h" |
28 | 28 | ||
29 | static DEFINE_SPINLOCK(proc_subdir_lock); | 29 | static DEFINE_RWLOCK(proc_subdir_lock); |
30 | 30 | ||
31 | static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) | 31 | static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) |
32 | { | 32 | { |
@@ -172,9 +172,9 @@ static int xlate_proc_name(const char *name, struct proc_dir_entry **ret, | |||
172 | { | 172 | { |
173 | int rv; | 173 | int rv; |
174 | 174 | ||
175 | spin_lock(&proc_subdir_lock); | 175 | read_lock(&proc_subdir_lock); |
176 | rv = __xlate_proc_name(name, ret, residual); | 176 | rv = __xlate_proc_name(name, ret, residual); |
177 | spin_unlock(&proc_subdir_lock); | 177 | read_unlock(&proc_subdir_lock); |
178 | return rv; | 178 | return rv; |
179 | } | 179 | } |
180 | 180 | ||
@@ -231,11 +231,11 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, | |||
231 | { | 231 | { |
232 | struct inode *inode; | 232 | struct inode *inode; |
233 | 233 | ||
234 | spin_lock(&proc_subdir_lock); | 234 | read_lock(&proc_subdir_lock); |
235 | de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); | 235 | de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); |
236 | if (de) { | 236 | if (de) { |
237 | pde_get(de); | 237 | pde_get(de); |
238 | spin_unlock(&proc_subdir_lock); | 238 | read_unlock(&proc_subdir_lock); |
239 | inode = proc_get_inode(dir->i_sb, de); | 239 | inode = proc_get_inode(dir->i_sb, de); |
240 | if (!inode) | 240 | if (!inode) |
241 | return ERR_PTR(-ENOMEM); | 241 | return ERR_PTR(-ENOMEM); |
@@ -243,7 +243,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, | |||
243 | d_add(dentry, inode); | 243 | d_add(dentry, inode); |
244 | return NULL; | 244 | return NULL; |
245 | } | 245 | } |
246 | spin_unlock(&proc_subdir_lock); | 246 | read_unlock(&proc_subdir_lock); |
247 | return ERR_PTR(-ENOENT); | 247 | return ERR_PTR(-ENOENT); |
248 | } | 248 | } |
249 | 249 | ||
@@ -270,12 +270,12 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, | |||
270 | if (!dir_emit_dots(file, ctx)) | 270 | if (!dir_emit_dots(file, ctx)) |
271 | return 0; | 271 | return 0; |
272 | 272 | ||
273 | spin_lock(&proc_subdir_lock); | 273 | read_lock(&proc_subdir_lock); |
274 | de = pde_subdir_first(de); | 274 | de = pde_subdir_first(de); |
275 | i = ctx->pos - 2; | 275 | i = ctx->pos - 2; |
276 | for (;;) { | 276 | for (;;) { |
277 | if (!de) { | 277 | if (!de) { |
278 | spin_unlock(&proc_subdir_lock); | 278 | read_unlock(&proc_subdir_lock); |
279 | return 0; | 279 | return 0; |
280 | } | 280 | } |
281 | if (!i) | 281 | if (!i) |
@@ -287,19 +287,19 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file, | |||
287 | do { | 287 | do { |
288 | struct proc_dir_entry *next; | 288 | struct proc_dir_entry *next; |
289 | pde_get(de); | 289 | pde_get(de); |
290 | spin_unlock(&proc_subdir_lock); | 290 | read_unlock(&proc_subdir_lock); |
291 | if (!dir_emit(ctx, de->name, de->namelen, | 291 | if (!dir_emit(ctx, de->name, de->namelen, |
292 | de->low_ino, de->mode >> 12)) { | 292 | de->low_ino, de->mode >> 12)) { |
293 | pde_put(de); | 293 | pde_put(de); |
294 | return 0; | 294 | return 0; |
295 | } | 295 | } |
296 | spin_lock(&proc_subdir_lock); | 296 | read_lock(&proc_subdir_lock); |
297 | ctx->pos++; | 297 | ctx->pos++; |
298 | next = pde_subdir_next(de); | 298 | next = pde_subdir_next(de); |
299 | pde_put(de); | 299 | pde_put(de); |
300 | de = next; | 300 | de = next; |
301 | } while (de); | 301 | } while (de); |
302 | spin_unlock(&proc_subdir_lock); | 302 | read_unlock(&proc_subdir_lock); |
303 | return 1; | 303 | return 1; |
304 | } | 304 | } |
305 | 305 | ||
@@ -338,16 +338,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
338 | if (ret) | 338 | if (ret) |
339 | return ret; | 339 | return ret; |
340 | 340 | ||
341 | spin_lock(&proc_subdir_lock); | 341 | write_lock(&proc_subdir_lock); |
342 | dp->parent = dir; | 342 | dp->parent = dir; |
343 | if (pde_subdir_insert(dir, dp) == false) { | 343 | if (pde_subdir_insert(dir, dp) == false) { |
344 | WARN(1, "proc_dir_entry '%s/%s' already registered\n", | 344 | WARN(1, "proc_dir_entry '%s/%s' already registered\n", |
345 | dir->name, dp->name); | 345 | dir->name, dp->name); |
346 | spin_unlock(&proc_subdir_lock); | 346 | write_unlock(&proc_subdir_lock); |
347 | proc_free_inum(dp->low_ino); | 347 | proc_free_inum(dp->low_ino); |
348 | return -EEXIST; | 348 | return -EEXIST; |
349 | } | 349 | } |
350 | spin_unlock(&proc_subdir_lock); | 350 | write_unlock(&proc_subdir_lock); |
351 | 351 | ||
352 | return 0; | 352 | return 0; |
353 | } | 353 | } |
@@ -549,9 +549,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
549 | const char *fn = name; | 549 | const char *fn = name; |
550 | unsigned int len; | 550 | unsigned int len; |
551 | 551 | ||
552 | spin_lock(&proc_subdir_lock); | 552 | write_lock(&proc_subdir_lock); |
553 | if (__xlate_proc_name(name, &parent, &fn) != 0) { | 553 | if (__xlate_proc_name(name, &parent, &fn) != 0) { |
554 | spin_unlock(&proc_subdir_lock); | 554 | write_unlock(&proc_subdir_lock); |
555 | return; | 555 | return; |
556 | } | 556 | } |
557 | len = strlen(fn); | 557 | len = strlen(fn); |
@@ -559,7 +559,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
559 | de = pde_subdir_find(parent, fn, len); | 559 | de = pde_subdir_find(parent, fn, len); |
560 | if (de) | 560 | if (de) |
561 | rb_erase(&de->subdir_node, &parent->subdir); | 561 | rb_erase(&de->subdir_node, &parent->subdir); |
562 | spin_unlock(&proc_subdir_lock); | 562 | write_unlock(&proc_subdir_lock); |
563 | if (!de) { | 563 | if (!de) { |
564 | WARN(1, "name '%s'\n", name); | 564 | WARN(1, "name '%s'\n", name); |
565 | return; | 565 | return; |
@@ -583,16 +583,16 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) | |||
583 | const char *fn = name; | 583 | const char *fn = name; |
584 | unsigned int len; | 584 | unsigned int len; |
585 | 585 | ||
586 | spin_lock(&proc_subdir_lock); | 586 | write_lock(&proc_subdir_lock); |
587 | if (__xlate_proc_name(name, &parent, &fn) != 0) { | 587 | if (__xlate_proc_name(name, &parent, &fn) != 0) { |
588 | spin_unlock(&proc_subdir_lock); | 588 | write_unlock(&proc_subdir_lock); |
589 | return -ENOENT; | 589 | return -ENOENT; |
590 | } | 590 | } |
591 | len = strlen(fn); | 591 | len = strlen(fn); |
592 | 592 | ||
593 | root = pde_subdir_find(parent, fn, len); | 593 | root = pde_subdir_find(parent, fn, len); |
594 | if (!root) { | 594 | if (!root) { |
595 | spin_unlock(&proc_subdir_lock); | 595 | write_unlock(&proc_subdir_lock); |
596 | return -ENOENT; | 596 | return -ENOENT; |
597 | } | 597 | } |
598 | rb_erase(&root->subdir_node, &parent->subdir); | 598 | rb_erase(&root->subdir_node, &parent->subdir); |
@@ -605,7 +605,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) | |||
605 | de = next; | 605 | de = next; |
606 | continue; | 606 | continue; |
607 | } | 607 | } |
608 | spin_unlock(&proc_subdir_lock); | 608 | write_unlock(&proc_subdir_lock); |
609 | 609 | ||
610 | proc_entry_rundown(de); | 610 | proc_entry_rundown(de); |
611 | next = de->parent; | 611 | next = de->parent; |
@@ -616,7 +616,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) | |||
616 | break; | 616 | break; |
617 | pde_put(de); | 617 | pde_put(de); |
618 | 618 | ||
619 | spin_lock(&proc_subdir_lock); | 619 | write_lock(&proc_subdir_lock); |
620 | de = next; | 620 | de = next; |
621 | } | 621 | } |
622 | pde_put(root); | 622 | pde_put(root); |
diff --git a/fs/proc/page.c b/fs/proc/page.c index 7eee2d8b97d9..93484034a03d 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -9,12 +9,16 @@ | |||
9 | #include <linux/proc_fs.h> | 9 | #include <linux/proc_fs.h> |
10 | #include <linux/seq_file.h> | 10 | #include <linux/seq_file.h> |
11 | #include <linux/hugetlb.h> | 11 | #include <linux/hugetlb.h> |
12 | #include <linux/memcontrol.h> | ||
13 | #include <linux/mmu_notifier.h> | ||
14 | #include <linux/page_idle.h> | ||
12 | #include <linux/kernel-page-flags.h> | 15 | #include <linux/kernel-page-flags.h> |
13 | #include <asm/uaccess.h> | 16 | #include <asm/uaccess.h> |
14 | #include "internal.h" | 17 | #include "internal.h" |
15 | 18 | ||
16 | #define KPMSIZE sizeof(u64) | 19 | #define KPMSIZE sizeof(u64) |
17 | #define KPMMASK (KPMSIZE - 1) | 20 | #define KPMMASK (KPMSIZE - 1) |
21 | #define KPMBITS (KPMSIZE * BITS_PER_BYTE) | ||
18 | 22 | ||
19 | /* /proc/kpagecount - an array exposing page counts | 23 | /* /proc/kpagecount - an array exposing page counts |
20 | * | 24 | * |
@@ -54,6 +58,8 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, | |||
54 | pfn++; | 58 | pfn++; |
55 | out++; | 59 | out++; |
56 | count -= KPMSIZE; | 60 | count -= KPMSIZE; |
61 | |||
62 | cond_resched(); | ||
57 | } | 63 | } |
58 | 64 | ||
59 | *ppos += (char __user *)out - buf; | 65 | *ppos += (char __user *)out - buf; |
@@ -146,6 +152,9 @@ u64 stable_page_flags(struct page *page) | |||
146 | if (PageBalloon(page)) | 152 | if (PageBalloon(page)) |
147 | u |= 1 << KPF_BALLOON; | 153 | u |= 1 << KPF_BALLOON; |
148 | 154 | ||
155 | if (page_is_idle(page)) | ||
156 | u |= 1 << KPF_IDLE; | ||
157 | |||
149 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); | 158 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); |
150 | 159 | ||
151 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); | 160 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); |
@@ -212,6 +221,8 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, | |||
212 | pfn++; | 221 | pfn++; |
213 | out++; | 222 | out++; |
214 | count -= KPMSIZE; | 223 | count -= KPMSIZE; |
224 | |||
225 | cond_resched(); | ||
215 | } | 226 | } |
216 | 227 | ||
217 | *ppos += (char __user *)out - buf; | 228 | *ppos += (char __user *)out - buf; |
@@ -225,10 +236,64 @@ static const struct file_operations proc_kpageflags_operations = { | |||
225 | .read = kpageflags_read, | 236 | .read = kpageflags_read, |
226 | }; | 237 | }; |
227 | 238 | ||
239 | #ifdef CONFIG_MEMCG | ||
240 | static ssize_t kpagecgroup_read(struct file *file, char __user *buf, | ||
241 | size_t count, loff_t *ppos) | ||
242 | { | ||
243 | u64 __user *out = (u64 __user *)buf; | ||
244 | struct page *ppage; | ||
245 | unsigned long src = *ppos; | ||
246 | unsigned long pfn; | ||
247 | ssize_t ret = 0; | ||
248 | u64 ino; | ||
249 | |||
250 | pfn = src / KPMSIZE; | ||
251 | count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); | ||
252 | if (src & KPMMASK || count & KPMMASK) | ||
253 | return -EINVAL; | ||
254 | |||
255 | while (count > 0) { | ||
256 | if (pfn_valid(pfn)) | ||
257 | ppage = pfn_to_page(pfn); | ||
258 | else | ||
259 | ppage = NULL; | ||
260 | |||
261 | if (ppage) | ||
262 | ino = page_cgroup_ino(ppage); | ||
263 | else | ||
264 | ino = 0; | ||
265 | |||
266 | if (put_user(ino, out)) { | ||
267 | ret = -EFAULT; | ||
268 | break; | ||
269 | } | ||
270 | |||
271 | pfn++; | ||
272 | out++; | ||
273 | count -= KPMSIZE; | ||
274 | |||
275 | cond_resched(); | ||
276 | } | ||
277 | |||
278 | *ppos += (char __user *)out - buf; | ||
279 | if (!ret) | ||
280 | ret = (char __user *)out - buf; | ||
281 | return ret; | ||
282 | } | ||
283 | |||
284 | static const struct file_operations proc_kpagecgroup_operations = { | ||
285 | .llseek = mem_lseek, | ||
286 | .read = kpagecgroup_read, | ||
287 | }; | ||
288 | #endif /* CONFIG_MEMCG */ | ||
289 | |||
228 | static int __init proc_page_init(void) | 290 | static int __init proc_page_init(void) |
229 | { | 291 | { |
230 | proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); | 292 | proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); |
231 | proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); | 293 | proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); |
294 | #ifdef CONFIG_MEMCG | ||
295 | proc_create("kpagecgroup", S_IRUSR, NULL, &proc_kpagecgroup_operations); | ||
296 | #endif | ||
232 | return 0; | 297 | return 0; |
233 | } | 298 | } |
234 | fs_initcall(proc_page_init); | 299 | fs_initcall(proc_page_init); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 41f1a50c10c9..e2d46adb54b4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/swap.h> | 13 | #include <linux/swap.h> |
14 | #include <linux/swapops.h> | 14 | #include <linux/swapops.h> |
15 | #include <linux/mmu_notifier.h> | 15 | #include <linux/mmu_notifier.h> |
16 | #include <linux/page_idle.h> | ||
16 | 17 | ||
17 | #include <asm/elf.h> | 18 | #include <asm/elf.h> |
18 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
@@ -459,7 +460,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, | |||
459 | 460 | ||
460 | mss->resident += size; | 461 | mss->resident += size; |
461 | /* Accumulate the size in pages that have been accessed. */ | 462 | /* Accumulate the size in pages that have been accessed. */ |
462 | if (young || PageReferenced(page)) | 463 | if (young || page_is_young(page) || PageReferenced(page)) |
463 | mss->referenced += size; | 464 | mss->referenced += size; |
464 | mapcount = page_mapcount(page); | 465 | mapcount = page_mapcount(page); |
465 | if (mapcount >= 2) { | 466 | if (mapcount >= 2) { |
@@ -807,6 +808,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
807 | 808 | ||
808 | /* Clear accessed and referenced bits. */ | 809 | /* Clear accessed and referenced bits. */ |
809 | pmdp_test_and_clear_young(vma, addr, pmd); | 810 | pmdp_test_and_clear_young(vma, addr, pmd); |
811 | test_and_clear_page_young(page); | ||
810 | ClearPageReferenced(page); | 812 | ClearPageReferenced(page); |
811 | out: | 813 | out: |
812 | spin_unlock(ptl); | 814 | spin_unlock(ptl); |
@@ -834,6 +836,7 @@ out: | |||
834 | 836 | ||
835 | /* Clear accessed and referenced bits. */ | 837 | /* Clear accessed and referenced bits. */ |
836 | ptep_test_and_clear_young(vma, addr, pte); | 838 | ptep_test_and_clear_young(vma, addr, pte); |
839 | test_and_clear_page_young(page); | ||
837 | ClearPageReferenced(page); | 840 | ClearPageReferenced(page); |
838 | } | 841 | } |
839 | pte_unmap_unlock(pte - 1, ptl); | 842 | pte_unmap_unlock(pte - 1, ptl); |
diff --git a/fs/seq_file.c b/fs/seq_file.c index ce9e39fd5daf..263b125dbcf4 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/cred.h> | 13 | #include <linux/cred.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/printk.h> | ||
15 | 16 | ||
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
@@ -773,6 +774,47 @@ void seq_pad(struct seq_file *m, char c) | |||
773 | } | 774 | } |
774 | EXPORT_SYMBOL(seq_pad); | 775 | EXPORT_SYMBOL(seq_pad); |
775 | 776 | ||
777 | /* A complete analogue of print_hex_dump() */ | ||
778 | void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, | ||
779 | int rowsize, int groupsize, const void *buf, size_t len, | ||
780 | bool ascii) | ||
781 | { | ||
782 | const u8 *ptr = buf; | ||
783 | int i, linelen, remaining = len; | ||
784 | int ret; | ||
785 | |||
786 | if (rowsize != 16 && rowsize != 32) | ||
787 | rowsize = 16; | ||
788 | |||
789 | for (i = 0; i < len && !seq_has_overflowed(m); i += rowsize) { | ||
790 | linelen = min(remaining, rowsize); | ||
791 | remaining -= rowsize; | ||
792 | |||
793 | switch (prefix_type) { | ||
794 | case DUMP_PREFIX_ADDRESS: | ||
795 | seq_printf(m, "%s%p: ", prefix_str, ptr + i); | ||
796 | break; | ||
797 | case DUMP_PREFIX_OFFSET: | ||
798 | seq_printf(m, "%s%.8x: ", prefix_str, i); | ||
799 | break; | ||
800 | default: | ||
801 | seq_printf(m, "%s", prefix_str); | ||
802 | break; | ||
803 | } | ||
804 | |||
805 | ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, | ||
806 | m->buf + m->count, m->size - m->count, | ||
807 | ascii); | ||
808 | if (ret >= m->size - m->count) { | ||
809 | seq_set_overflow(m); | ||
810 | } else { | ||
811 | m->count += ret; | ||
812 | seq_putc(m, '\n'); | ||
813 | } | ||
814 | } | ||
815 | } | ||
816 | EXPORT_SYMBOL(seq_hex_dump); | ||
817 | |||
776 | struct list_head *seq_list_start(struct list_head *head, loff_t pos) | 818 | struct list_head *seq_list_start(struct list_head *head, loff_t pos) |
777 | { | 819 | { |
778 | struct list_head *lh; | 820 | struct list_head *lh; |
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index 940d5ec122c9..b1bc954eccf3 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/scatterlist.h> | 6 | #include <linux/scatterlist.h> |
7 | #include <linux/dma-debug.h> | 7 | #include <linux/dma-debug.h> |
8 | #include <linux/dma-attrs.h> | 8 | #include <linux/dma-attrs.h> |
9 | #include <asm-generic/dma-coherent.h> | ||
9 | 10 | ||
10 | static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, | 11 | static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, |
11 | size_t size, | 12 | size_t size, |
@@ -237,4 +238,121 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, | |||
237 | 238 | ||
238 | #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, NULL) | 239 | #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, NULL) |
239 | 240 | ||
241 | #ifndef arch_dma_alloc_attrs | ||
242 | #define arch_dma_alloc_attrs(dev, flag) (true) | ||
243 | #endif | ||
244 | |||
245 | static inline void *dma_alloc_attrs(struct device *dev, size_t size, | ||
246 | dma_addr_t *dma_handle, gfp_t flag, | ||
247 | struct dma_attrs *attrs) | ||
248 | { | ||
249 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
250 | void *cpu_addr; | ||
251 | |||
252 | BUG_ON(!ops); | ||
253 | |||
254 | if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)) | ||
255 | return cpu_addr; | ||
256 | |||
257 | if (!arch_dma_alloc_attrs(&dev, &flag)) | ||
258 | return NULL; | ||
259 | if (!ops->alloc) | ||
260 | return NULL; | ||
261 | |||
262 | cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); | ||
263 | debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); | ||
264 | return cpu_addr; | ||
265 | } | ||
266 | |||
267 | static inline void dma_free_attrs(struct device *dev, size_t size, | ||
268 | void *cpu_addr, dma_addr_t dma_handle, | ||
269 | struct dma_attrs *attrs) | ||
270 | { | ||
271 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
272 | |||
273 | BUG_ON(!ops); | ||
274 | WARN_ON(irqs_disabled()); | ||
275 | |||
276 | if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) | ||
277 | return; | ||
278 | |||
279 | if (!ops->free) | ||
280 | return; | ||
281 | |||
282 | debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); | ||
283 | ops->free(dev, size, cpu_addr, dma_handle, attrs); | ||
284 | } | ||
285 | |||
286 | static inline void *dma_alloc_coherent(struct device *dev, size_t size, | ||
287 | dma_addr_t *dma_handle, gfp_t flag) | ||
288 | { | ||
289 | return dma_alloc_attrs(dev, size, dma_handle, flag, NULL); | ||
290 | } | ||
291 | |||
292 | static inline void dma_free_coherent(struct device *dev, size_t size, | ||
293 | void *cpu_addr, dma_addr_t dma_handle) | ||
294 | { | ||
295 | return dma_free_attrs(dev, size, cpu_addr, dma_handle, NULL); | ||
296 | } | ||
297 | |||
298 | static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, | ||
299 | dma_addr_t *dma_handle, gfp_t gfp) | ||
300 | { | ||
301 | DEFINE_DMA_ATTRS(attrs); | ||
302 | |||
303 | dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); | ||
304 | return dma_alloc_attrs(dev, size, dma_handle, gfp, &attrs); | ||
305 | } | ||
306 | |||
307 | static inline void dma_free_noncoherent(struct device *dev, size_t size, | ||
308 | void *cpu_addr, dma_addr_t dma_handle) | ||
309 | { | ||
310 | DEFINE_DMA_ATTRS(attrs); | ||
311 | |||
312 | dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); | ||
313 | dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); | ||
314 | } | ||
315 | |||
316 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
317 | { | ||
318 | debug_dma_mapping_error(dev, dma_addr); | ||
319 | |||
320 | if (get_dma_ops(dev)->mapping_error) | ||
321 | return get_dma_ops(dev)->mapping_error(dev, dma_addr); | ||
322 | |||
323 | #ifdef DMA_ERROR_CODE | ||
324 | return dma_addr == DMA_ERROR_CODE; | ||
325 | #else | ||
326 | return 0; | ||
327 | #endif | ||
328 | } | ||
329 | |||
330 | #ifndef HAVE_ARCH_DMA_SUPPORTED | ||
331 | static inline int dma_supported(struct device *dev, u64 mask) | ||
332 | { | ||
333 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
334 | |||
335 | if (!ops) | ||
336 | return 0; | ||
337 | if (!ops->dma_supported) | ||
338 | return 1; | ||
339 | return ops->dma_supported(dev, mask); | ||
340 | } | ||
341 | #endif | ||
342 | |||
343 | #ifndef HAVE_ARCH_DMA_SET_MASK | ||
344 | static inline int dma_set_mask(struct device *dev, u64 mask) | ||
345 | { | ||
346 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
347 | |||
348 | if (ops->set_dma_mask) | ||
349 | return ops->set_dma_mask(dev, mask); | ||
350 | |||
351 | if (!dev->dma_mask || !dma_supported(dev, mask)) | ||
352 | return -EIO; | ||
353 | *dev->dma_mask = mask; | ||
354 | return 0; | ||
355 | } | ||
356 | #endif | ||
357 | |||
240 | #endif | 358 | #endif |
diff --git a/include/linux/kexec.h b/include/linux/kexec.h index b63218f68c4b..d140b1e9faa7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h | |||
@@ -16,7 +16,7 @@ | |||
16 | 16 | ||
17 | #include <uapi/linux/kexec.h> | 17 | #include <uapi/linux/kexec.h> |
18 | 18 | ||
19 | #ifdef CONFIG_KEXEC | 19 | #ifdef CONFIG_KEXEC_CORE |
20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
21 | #include <linux/linkage.h> | 21 | #include <linux/linkage.h> |
22 | #include <linux/compat.h> | 22 | #include <linux/compat.h> |
@@ -318,13 +318,24 @@ int crash_shrink_memory(unsigned long new_size); | |||
318 | size_t crash_get_memory_size(void); | 318 | size_t crash_get_memory_size(void); |
319 | void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); | 319 | void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); |
320 | 320 | ||
321 | #else /* !CONFIG_KEXEC */ | 321 | int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, |
322 | unsigned long buf_len); | ||
323 | void * __weak arch_kexec_kernel_image_load(struct kimage *image); | ||
324 | int __weak arch_kimage_file_post_load_cleanup(struct kimage *image); | ||
325 | int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, | ||
326 | unsigned long buf_len); | ||
327 | int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, | ||
328 | Elf_Shdr *sechdrs, unsigned int relsec); | ||
329 | int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | ||
330 | unsigned int relsec); | ||
331 | |||
332 | #else /* !CONFIG_KEXEC_CORE */ | ||
322 | struct pt_regs; | 333 | struct pt_regs; |
323 | struct task_struct; | 334 | struct task_struct; |
324 | static inline void crash_kexec(struct pt_regs *regs) { } | 335 | static inline void crash_kexec(struct pt_regs *regs) { } |
325 | static inline int kexec_should_crash(struct task_struct *p) { return 0; } | 336 | static inline int kexec_should_crash(struct task_struct *p) { return 0; } |
326 | #define kexec_in_progress false | 337 | #define kexec_in_progress false |
327 | #endif /* CONFIG_KEXEC */ | 338 | #endif /* CONFIG_KEXEC_CORE */ |
328 | 339 | ||
329 | #endif /* !defined(__ASSEBMLY__) */ | 340 | #endif /* !defined(__ASSEBMLY__) */ |
330 | 341 | ||
diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 0555cc66a15b..fcfd2bf14d3f 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h | |||
@@ -85,8 +85,6 @@ enum umh_disable_depth { | |||
85 | UMH_DISABLED, | 85 | UMH_DISABLED, |
86 | }; | 86 | }; |
87 | 87 | ||
88 | extern void usermodehelper_init(void); | ||
89 | |||
90 | extern int __usermodehelper_disable(enum umh_disable_depth depth); | 88 | extern int __usermodehelper_disable(enum umh_disable_depth depth); |
91 | extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); | 89 | extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); |
92 | 90 | ||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d92b80b63c5c..ad800e62cb7a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -305,11 +305,9 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); | |||
305 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); | 305 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); |
306 | 306 | ||
307 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); | 307 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); |
308 | |||
309 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); | ||
310 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); | 308 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); |
311 | |||
312 | struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); | 309 | struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); |
310 | |||
313 | static inline | 311 | static inline |
314 | struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ | 312 | struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){ |
315 | return css ? container_of(css, struct mem_cgroup, css) : NULL; | 313 | return css ? container_of(css, struct mem_cgroup, css) : NULL; |
@@ -345,6 +343,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, | |||
345 | } | 343 | } |
346 | 344 | ||
347 | struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); | 345 | struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); |
346 | ino_t page_cgroup_ino(struct page *page); | ||
348 | 347 | ||
349 | static inline bool mem_cgroup_disabled(void) | 348 | static inline bool mem_cgroup_disabled(void) |
350 | { | 349 | { |
@@ -555,11 +554,6 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, | |||
555 | return &zone->lruvec; | 554 | return &zone->lruvec; |
556 | } | 555 | } |
557 | 556 | ||
558 | static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | ||
559 | { | ||
560 | return NULL; | ||
561 | } | ||
562 | |||
563 | static inline bool mm_match_cgroup(struct mm_struct *mm, | 557 | static inline bool mm_match_cgroup(struct mm_struct *mm, |
564 | struct mem_cgroup *memcg) | 558 | struct mem_cgroup *memcg) |
565 | { | 559 | { |
diff --git a/include/linux/mm.h b/include/linux/mm.h index f25a957bf0ab..fda728e3c27d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1873,11 +1873,19 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo | |||
1873 | 1873 | ||
1874 | extern unsigned long mmap_region(struct file *file, unsigned long addr, | 1874 | extern unsigned long mmap_region(struct file *file, unsigned long addr, |
1875 | unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); | 1875 | unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); |
1876 | extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | 1876 | extern unsigned long do_mmap(struct file *file, unsigned long addr, |
1877 | unsigned long len, unsigned long prot, unsigned long flags, | 1877 | unsigned long len, unsigned long prot, unsigned long flags, |
1878 | unsigned long pgoff, unsigned long *populate); | 1878 | vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate); |
1879 | extern int do_munmap(struct mm_struct *, unsigned long, size_t); | 1879 | extern int do_munmap(struct mm_struct *, unsigned long, size_t); |
1880 | 1880 | ||
1881 | static inline unsigned long | ||
1882 | do_mmap_pgoff(struct file *file, unsigned long addr, | ||
1883 | unsigned long len, unsigned long prot, unsigned long flags, | ||
1884 | unsigned long pgoff, unsigned long *populate) | ||
1885 | { | ||
1886 | return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); | ||
1887 | } | ||
1888 | |||
1881 | #ifdef CONFIG_MMU | 1889 | #ifdef CONFIG_MMU |
1882 | extern int __mm_populate(unsigned long addr, unsigned long len, | 1890 | extern int __mm_populate(unsigned long addr, unsigned long len, |
1883 | int ignore_errors); | 1891 | int ignore_errors); |
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 61cd67f4d788..a1a210d59961 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h | |||
@@ -66,6 +66,16 @@ struct mmu_notifier_ops { | |||
66 | unsigned long end); | 66 | unsigned long end); |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * clear_young is a lightweight version of clear_flush_young. Like the | ||
70 | * latter, it is supposed to test-and-clear the young/accessed bitflag | ||
71 | * in the secondary pte, but it may omit flushing the secondary tlb. | ||
72 | */ | ||
73 | int (*clear_young)(struct mmu_notifier *mn, | ||
74 | struct mm_struct *mm, | ||
75 | unsigned long start, | ||
76 | unsigned long end); | ||
77 | |||
78 | /* | ||
69 | * test_young is called to check the young/accessed bitflag in | 79 | * test_young is called to check the young/accessed bitflag in |
70 | * the secondary pte. This is used to know if the page is | 80 | * the secondary pte. This is used to know if the page is |
71 | * frequently used without actually clearing the flag or tearing | 81 | * frequently used without actually clearing the flag or tearing |
@@ -203,6 +213,9 @@ extern void __mmu_notifier_release(struct mm_struct *mm); | |||
203 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | 213 | extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, |
204 | unsigned long start, | 214 | unsigned long start, |
205 | unsigned long end); | 215 | unsigned long end); |
216 | extern int __mmu_notifier_clear_young(struct mm_struct *mm, | ||
217 | unsigned long start, | ||
218 | unsigned long end); | ||
206 | extern int __mmu_notifier_test_young(struct mm_struct *mm, | 219 | extern int __mmu_notifier_test_young(struct mm_struct *mm, |
207 | unsigned long address); | 220 | unsigned long address); |
208 | extern void __mmu_notifier_change_pte(struct mm_struct *mm, | 221 | extern void __mmu_notifier_change_pte(struct mm_struct *mm, |
@@ -231,6 +244,15 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, | |||
231 | return 0; | 244 | return 0; |
232 | } | 245 | } |
233 | 246 | ||
247 | static inline int mmu_notifier_clear_young(struct mm_struct *mm, | ||
248 | unsigned long start, | ||
249 | unsigned long end) | ||
250 | { | ||
251 | if (mm_has_notifiers(mm)) | ||
252 | return __mmu_notifier_clear_young(mm, start, end); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
234 | static inline int mmu_notifier_test_young(struct mm_struct *mm, | 256 | static inline int mmu_notifier_test_young(struct mm_struct *mm, |
235 | unsigned long address) | 257 | unsigned long address) |
236 | { | 258 | { |
@@ -311,6 +333,28 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | |||
311 | __young; \ | 333 | __young; \ |
312 | }) | 334 | }) |
313 | 335 | ||
336 | #define ptep_clear_young_notify(__vma, __address, __ptep) \ | ||
337 | ({ \ | ||
338 | int __young; \ | ||
339 | struct vm_area_struct *___vma = __vma; \ | ||
340 | unsigned long ___address = __address; \ | ||
341 | __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ | ||
342 | __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ | ||
343 | ___address + PAGE_SIZE); \ | ||
344 | __young; \ | ||
345 | }) | ||
346 | |||
347 | #define pmdp_clear_young_notify(__vma, __address, __pmdp) \ | ||
348 | ({ \ | ||
349 | int __young; \ | ||
350 | struct vm_area_struct *___vma = __vma; \ | ||
351 | unsigned long ___address = __address; \ | ||
352 | __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ | ||
353 | __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ | ||
354 | ___address + PMD_SIZE); \ | ||
355 | __young; \ | ||
356 | }) | ||
357 | |||
314 | #define ptep_clear_flush_notify(__vma, __address, __ptep) \ | 358 | #define ptep_clear_flush_notify(__vma, __address, __ptep) \ |
315 | ({ \ | 359 | ({ \ |
316 | unsigned long ___addr = __address & PAGE_MASK; \ | 360 | unsigned long ___addr = __address & PAGE_MASK; \ |
@@ -427,6 +471,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) | |||
427 | 471 | ||
428 | #define ptep_clear_flush_young_notify ptep_clear_flush_young | 472 | #define ptep_clear_flush_young_notify ptep_clear_flush_young |
429 | #define pmdp_clear_flush_young_notify pmdp_clear_flush_young | 473 | #define pmdp_clear_flush_young_notify pmdp_clear_flush_young |
474 | #define ptep_clear_young_notify ptep_test_and_clear_young | ||
475 | #define pmdp_clear_young_notify pmdp_test_and_clear_young | ||
430 | #define ptep_clear_flush_notify ptep_clear_flush | 476 | #define ptep_clear_flush_notify ptep_clear_flush |
431 | #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush | 477 | #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush |
432 | #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear | 478 | #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 41c93844fb1d..416509e26d6d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -109,6 +109,10 @@ enum pageflags { | |||
109 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 109 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
110 | PG_compound_lock, | 110 | PG_compound_lock, |
111 | #endif | 111 | #endif |
112 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) | ||
113 | PG_young, | ||
114 | PG_idle, | ||
115 | #endif | ||
112 | __NR_PAGEFLAGS, | 116 | __NR_PAGEFLAGS, |
113 | 117 | ||
114 | /* Filesystems */ | 118 | /* Filesystems */ |
@@ -289,6 +293,13 @@ PAGEFLAG_FALSE(HWPoison) | |||
289 | #define __PG_HWPOISON 0 | 293 | #define __PG_HWPOISON 0 |
290 | #endif | 294 | #endif |
291 | 295 | ||
296 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) | ||
297 | TESTPAGEFLAG(Young, young) | ||
298 | SETPAGEFLAG(Young, young) | ||
299 | TESTCLEARFLAG(Young, young) | ||
300 | PAGEFLAG(Idle, idle) | ||
301 | #endif | ||
302 | |||
292 | /* | 303 | /* |
293 | * On an anonymous page mapped into a user virtual memory area, | 304 | * On an anonymous page mapped into a user virtual memory area, |
294 | * page->mapping points to its anon_vma, not to a struct address_space; | 305 | * page->mapping points to its anon_vma, not to a struct address_space; |
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index c42981cd99aa..17f118a82854 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h | |||
@@ -26,6 +26,10 @@ enum page_ext_flags { | |||
26 | PAGE_EXT_DEBUG_POISON, /* Page is poisoned */ | 26 | PAGE_EXT_DEBUG_POISON, /* Page is poisoned */ |
27 | PAGE_EXT_DEBUG_GUARD, | 27 | PAGE_EXT_DEBUG_GUARD, |
28 | PAGE_EXT_OWNER, | 28 | PAGE_EXT_OWNER, |
29 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) | ||
30 | PAGE_EXT_YOUNG, | ||
31 | PAGE_EXT_IDLE, | ||
32 | #endif | ||
29 | }; | 33 | }; |
30 | 34 | ||
31 | /* | 35 | /* |
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h new file mode 100644 index 000000000000..bf268fa92c5b --- /dev/null +++ b/include/linux/page_idle.h | |||
@@ -0,0 +1,110 @@ | |||
1 | #ifndef _LINUX_MM_PAGE_IDLE_H | ||
2 | #define _LINUX_MM_PAGE_IDLE_H | ||
3 | |||
4 | #include <linux/bitops.h> | ||
5 | #include <linux/page-flags.h> | ||
6 | #include <linux/page_ext.h> | ||
7 | |||
8 | #ifdef CONFIG_IDLE_PAGE_TRACKING | ||
9 | |||
10 | #ifdef CONFIG_64BIT | ||
11 | static inline bool page_is_young(struct page *page) | ||
12 | { | ||
13 | return PageYoung(page); | ||
14 | } | ||
15 | |||
16 | static inline void set_page_young(struct page *page) | ||
17 | { | ||
18 | SetPageYoung(page); | ||
19 | } | ||
20 | |||
21 | static inline bool test_and_clear_page_young(struct page *page) | ||
22 | { | ||
23 | return TestClearPageYoung(page); | ||
24 | } | ||
25 | |||
26 | static inline bool page_is_idle(struct page *page) | ||
27 | { | ||
28 | return PageIdle(page); | ||
29 | } | ||
30 | |||
31 | static inline void set_page_idle(struct page *page) | ||
32 | { | ||
33 | SetPageIdle(page); | ||
34 | } | ||
35 | |||
36 | static inline void clear_page_idle(struct page *page) | ||
37 | { | ||
38 | ClearPageIdle(page); | ||
39 | } | ||
40 | #else /* !CONFIG_64BIT */ | ||
41 | /* | ||
42 | * If there is not enough space to store Idle and Young bits in page flags, use | ||
43 | * page ext flags instead. | ||
44 | */ | ||
45 | extern struct page_ext_operations page_idle_ops; | ||
46 | |||
47 | static inline bool page_is_young(struct page *page) | ||
48 | { | ||
49 | return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); | ||
50 | } | ||
51 | |||
52 | static inline void set_page_young(struct page *page) | ||
53 | { | ||
54 | set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); | ||
55 | } | ||
56 | |||
57 | static inline bool test_and_clear_page_young(struct page *page) | ||
58 | { | ||
59 | return test_and_clear_bit(PAGE_EXT_YOUNG, | ||
60 | &lookup_page_ext(page)->flags); | ||
61 | } | ||
62 | |||
63 | static inline bool page_is_idle(struct page *page) | ||
64 | { | ||
65 | return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); | ||
66 | } | ||
67 | |||
68 | static inline void set_page_idle(struct page *page) | ||
69 | { | ||
70 | set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); | ||
71 | } | ||
72 | |||
73 | static inline void clear_page_idle(struct page *page) | ||
74 | { | ||
75 | clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); | ||
76 | } | ||
77 | #endif /* CONFIG_64BIT */ | ||
78 | |||
79 | #else /* !CONFIG_IDLE_PAGE_TRACKING */ | ||
80 | |||
81 | static inline bool page_is_young(struct page *page) | ||
82 | { | ||
83 | return false; | ||
84 | } | ||
85 | |||
86 | static inline void set_page_young(struct page *page) | ||
87 | { | ||
88 | } | ||
89 | |||
90 | static inline bool test_and_clear_page_young(struct page *page) | ||
91 | { | ||
92 | return false; | ||
93 | } | ||
94 | |||
95 | static inline bool page_is_idle(struct page *page) | ||
96 | { | ||
97 | return false; | ||
98 | } | ||
99 | |||
100 | static inline void set_page_idle(struct page *page) | ||
101 | { | ||
102 | } | ||
103 | |||
104 | static inline void clear_page_idle(struct page *page) | ||
105 | { | ||
106 | } | ||
107 | |||
108 | #endif /* CONFIG_IDLE_PAGE_TRACKING */ | ||
109 | |||
110 | #endif /* _LINUX_MM_PAGE_IDLE_H */ | ||
diff --git a/include/linux/poison.h b/include/linux/poison.h index 2110a81c5e2a..317e16de09e5 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h | |||
@@ -19,8 +19,8 @@ | |||
19 | * under normal circumstances, used to verify that nobody uses | 19 | * under normal circumstances, used to verify that nobody uses |
20 | * non-initialized list entries. | 20 | * non-initialized list entries. |
21 | */ | 21 | */ |
22 | #define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA) | 22 | #define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) |
23 | #define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA) | 23 | #define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) |
24 | 24 | ||
25 | /********** include/linux/timer.h **********/ | 25 | /********** include/linux/timer.h **********/ |
26 | /* | 26 | /* |
@@ -69,10 +69,6 @@ | |||
69 | #define ATM_POISON_FREE 0x12 | 69 | #define ATM_POISON_FREE 0x12 |
70 | #define ATM_POISON 0xdeadbeef | 70 | #define ATM_POISON 0xdeadbeef |
71 | 71 | ||
72 | /********** net/ **********/ | ||
73 | #define NEIGHBOR_DEAD 0xdeadbeef | ||
74 | #define NETFILTER_LINK_POISON 0xdead57ac | ||
75 | |||
76 | /********** kernel/mutexes **********/ | 72 | /********** kernel/mutexes **********/ |
77 | #define MUTEX_DEBUG_INIT 0x11 | 73 | #define MUTEX_DEBUG_INIT 0x11 |
78 | #define MUTEX_DEBUG_FREE 0x22 | 74 | #define MUTEX_DEBUG_FREE 0x22 |
@@ -83,7 +79,4 @@ | |||
83 | /********** security/ **********/ | 79 | /********** security/ **********/ |
84 | #define KEY_DESTROY 0xbd | 80 | #define KEY_DESTROY 0xbd |
85 | 81 | ||
86 | /********** sound/oss/ **********/ | ||
87 | #define OSS_POISON_FREE 0xAB | ||
88 | |||
89 | #endif | 82 | #endif |
diff --git a/include/linux/printk.h b/include/linux/printk.h index a6298b27ac99..9729565c25ff 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h | |||
@@ -404,10 +404,10 @@ do { \ | |||
404 | static DEFINE_RATELIMIT_STATE(_rs, \ | 404 | static DEFINE_RATELIMIT_STATE(_rs, \ |
405 | DEFAULT_RATELIMIT_INTERVAL, \ | 405 | DEFAULT_RATELIMIT_INTERVAL, \ |
406 | DEFAULT_RATELIMIT_BURST); \ | 406 | DEFAULT_RATELIMIT_BURST); \ |
407 | DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ | 407 | DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt)); \ |
408 | if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ | 408 | if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ |
409 | __ratelimit(&_rs)) \ | 409 | __ratelimit(&_rs)) \ |
410 | __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ | 410 | __dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__); \ |
411 | } while (0) | 411 | } while (0) |
412 | #elif defined(DEBUG) | 412 | #elif defined(DEBUG) |
413 | #define pr_debug_ratelimited(fmt, ...) \ | 413 | #define pr_debug_ratelimited(fmt, ...) \ |
@@ -456,11 +456,17 @@ static inline void print_hex_dump_bytes(const char *prefix_str, int prefix_type, | |||
456 | groupsize, buf, len, ascii) \ | 456 | groupsize, buf, len, ascii) \ |
457 | dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ | 457 | dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ |
458 | groupsize, buf, len, ascii) | 458 | groupsize, buf, len, ascii) |
459 | #else | 459 | #elif defined(DEBUG) |
460 | #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ | 460 | #define print_hex_dump_debug(prefix_str, prefix_type, rowsize, \ |
461 | groupsize, buf, len, ascii) \ | 461 | groupsize, buf, len, ascii) \ |
462 | print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ | 462 | print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, rowsize, \ |
463 | groupsize, buf, len, ascii) | 463 | groupsize, buf, len, ascii) |
464 | #endif /* defined(CONFIG_DYNAMIC_DEBUG) */ | 464 | #else |
465 | static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type, | ||
466 | int rowsize, int groupsize, | ||
467 | const void *buf, size_t len, bool ascii) | ||
468 | { | ||
469 | } | ||
470 | #endif | ||
465 | 471 | ||
466 | #endif | 472 | #endif |
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index d4c7271382cb..adeadbd6d7bf 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h | |||
@@ -122,6 +122,10 @@ int seq_write(struct seq_file *seq, const void *data, size_t len); | |||
122 | __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); | 122 | __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); |
123 | __printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); | 123 | __printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); |
124 | 124 | ||
125 | void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, | ||
126 | int rowsize, int groupsize, const void *buf, size_t len, | ||
127 | bool ascii); | ||
128 | |||
125 | int seq_path(struct seq_file *, const struct path *, const char *); | 129 | int seq_path(struct seq_file *, const struct path *, const char *); |
126 | int seq_file_path(struct seq_file *, struct file *, const char *); | 130 | int seq_file_path(struct seq_file *, struct file *, const char *); |
127 | int seq_dentry(struct seq_file *, struct dentry *, const char *); | 131 | int seq_dentry(struct seq_file *, struct dentry *, const char *); |
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 71f711db4500..dabe643eb5fa 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h | |||
@@ -48,24 +48,24 @@ static inline int string_unescape_any_inplace(char *buf) | |||
48 | #define ESCAPE_HEX 0x20 | 48 | #define ESCAPE_HEX 0x20 |
49 | 49 | ||
50 | int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, | 50 | int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, |
51 | unsigned int flags, const char *esc); | 51 | unsigned int flags, const char *only); |
52 | 52 | ||
53 | static inline int string_escape_mem_any_np(const char *src, size_t isz, | 53 | static inline int string_escape_mem_any_np(const char *src, size_t isz, |
54 | char *dst, size_t osz, const char *esc) | 54 | char *dst, size_t osz, const char *only) |
55 | { | 55 | { |
56 | return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc); | 56 | return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, only); |
57 | } | 57 | } |
58 | 58 | ||
59 | static inline int string_escape_str(const char *src, char *dst, size_t sz, | 59 | static inline int string_escape_str(const char *src, char *dst, size_t sz, |
60 | unsigned int flags, const char *esc) | 60 | unsigned int flags, const char *only) |
61 | { | 61 | { |
62 | return string_escape_mem(src, strlen(src), dst, sz, flags, esc); | 62 | return string_escape_mem(src, strlen(src), dst, sz, flags, only); |
63 | } | 63 | } |
64 | 64 | ||
65 | static inline int string_escape_str_any_np(const char *src, char *dst, | 65 | static inline int string_escape_str_any_np(const char *src, char *dst, |
66 | size_t sz, const char *esc) | 66 | size_t sz, const char *only) |
67 | { | 67 | { |
68 | return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc); | 68 | return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, only); |
69 | } | 69 | } |
70 | 70 | ||
71 | #endif | 71 | #endif |
diff --git a/include/linux/zpool.h b/include/linux/zpool.h index c924a28d9805..42f8ec992452 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h | |||
@@ -36,6 +36,8 @@ enum zpool_mapmode { | |||
36 | ZPOOL_MM_DEFAULT = ZPOOL_MM_RW | 36 | ZPOOL_MM_DEFAULT = ZPOOL_MM_RW |
37 | }; | 37 | }; |
38 | 38 | ||
39 | bool zpool_has_pool(char *type); | ||
40 | |||
39 | struct zpool *zpool_create_pool(char *type, char *name, | 41 | struct zpool *zpool_create_pool(char *type, char *name, |
40 | gfp_t gfp, const struct zpool_ops *ops); | 42 | gfp_t gfp, const struct zpool_ops *ops); |
41 | 43 | ||
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index a6c4962e5d46..5da5f8751ce7 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h | |||
@@ -33,6 +33,7 @@ | |||
33 | #define KPF_THP 22 | 33 | #define KPF_THP 22 |
34 | #define KPF_BALLOON 23 | 34 | #define KPF_BALLOON 23 |
35 | #define KPF_ZERO_PAGE 24 | 35 | #define KPF_ZERO_PAGE 24 |
36 | #define KPF_IDLE 25 | ||
36 | 37 | ||
37 | 38 | ||
38 | #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ | 39 | #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ |
diff --git a/init/initramfs.c b/init/initramfs.c index ad1bd7787bbb..b32ad7d97ac9 100644 --- a/init/initramfs.c +++ b/init/initramfs.c | |||
@@ -526,14 +526,14 @@ extern unsigned long __initramfs_size; | |||
526 | 526 | ||
527 | static void __init free_initrd(void) | 527 | static void __init free_initrd(void) |
528 | { | 528 | { |
529 | #ifdef CONFIG_KEXEC | 529 | #ifdef CONFIG_KEXEC_CORE |
530 | unsigned long crashk_start = (unsigned long)__va(crashk_res.start); | 530 | unsigned long crashk_start = (unsigned long)__va(crashk_res.start); |
531 | unsigned long crashk_end = (unsigned long)__va(crashk_res.end); | 531 | unsigned long crashk_end = (unsigned long)__va(crashk_res.end); |
532 | #endif | 532 | #endif |
533 | if (do_retain_initrd) | 533 | if (do_retain_initrd) |
534 | goto skip; | 534 | goto skip; |
535 | 535 | ||
536 | #ifdef CONFIG_KEXEC | 536 | #ifdef CONFIG_KEXEC_CORE |
537 | /* | 537 | /* |
538 | * If the initrd region is overlapped with crashkernel reserved region, | 538 | * If the initrd region is overlapped with crashkernel reserved region, |
539 | * free only memory that is not part of crashkernel region. | 539 | * free only memory that is not part of crashkernel region. |
diff --git a/init/main.c b/init/main.c index 56506553d4d8..9e64d7097f1a 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -877,7 +877,6 @@ static void __init do_initcalls(void) | |||
877 | static void __init do_basic_setup(void) | 877 | static void __init do_basic_setup(void) |
878 | { | 878 | { |
879 | cpuset_init_smp(); | 879 | cpuset_init_smp(); |
880 | usermodehelper_init(); | ||
881 | shmem_init(); | 880 | shmem_init(); |
882 | driver_init(); | 881 | driver_init(); |
883 | init_irq_proc(); | 882 | init_irq_proc(); |
diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 2b491590ebab..71f448e5e927 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c | |||
@@ -123,7 +123,7 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) | |||
123 | size_t len = src->m_ts; | 123 | size_t len = src->m_ts; |
124 | size_t alen; | 124 | size_t alen; |
125 | 125 | ||
126 | BUG_ON(dst == NULL); | 126 | WARN_ON(dst == NULL); |
127 | if (src->m_ts > dst->m_ts) | 127 | if (src->m_ts > dst->m_ts) |
128 | return ERR_PTR(-EINVAL); | 128 | return ERR_PTR(-EINVAL); |
129 | 129 | ||
@@ -159,7 +159,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) | |||
159 | * We raced in the idr lookup or with shm_destroy(). Either way, the | 159 | * We raced in the idr lookup or with shm_destroy(). Either way, the |
160 | * ID is busted. | 160 | * ID is busted. |
161 | */ | 161 | */ |
162 | BUG_ON(IS_ERR(ipcp)); | 162 | WARN_ON(IS_ERR(ipcp)); |
163 | 163 | ||
164 | return container_of(ipcp, struct shmid_kernel, shm_perm); | 164 | return container_of(ipcp, struct shmid_kernel, shm_perm); |
165 | } | 165 | } |
@@ -393,7 +393,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) | |||
393 | return ret; | 393 | return ret; |
394 | sfd->vm_ops = vma->vm_ops; | 394 | sfd->vm_ops = vma->vm_ops; |
395 | #ifdef CONFIG_MMU | 395 | #ifdef CONFIG_MMU |
396 | BUG_ON(!sfd->vm_ops->fault); | 396 | WARN_ON(!sfd->vm_ops->fault); |
397 | #endif | 397 | #endif |
398 | vma->vm_ops = &shm_vm_ops; | 398 | vma->vm_ops = &shm_vm_ops; |
399 | shm_open(vma); | 399 | shm_open(vma); |
diff --git a/kernel/Makefile b/kernel/Makefile index e0d7587e7684..d4988410b410 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -49,7 +49,9 @@ obj-$(CONFIG_MODULES) += module.o | |||
49 | obj-$(CONFIG_MODULE_SIG) += module_signing.o | 49 | obj-$(CONFIG_MODULE_SIG) += module_signing.o |
50 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 50 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
51 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 51 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
52 | obj-$(CONFIG_KEXEC_CORE) += kexec_core.o | ||
52 | obj-$(CONFIG_KEXEC) += kexec.o | 53 | obj-$(CONFIG_KEXEC) += kexec.o |
54 | obj-$(CONFIG_KEXEC_FILE) += kexec_file.o | ||
53 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o | 55 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o |
54 | obj-$(CONFIG_COMPAT) += compat.o | 56 | obj-$(CONFIG_COMPAT) += compat.o |
55 | obj-$(CONFIG_CGROUPS) += cgroup.o | 57 | obj-$(CONFIG_CGROUPS) += cgroup.o |
diff --git a/kernel/cred.c b/kernel/cred.c index ec1c07667ec1..71179a09c1d6 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -20,11 +20,16 @@ | |||
20 | #include <linux/cn_proc.h> | 20 | #include <linux/cn_proc.h> |
21 | 21 | ||
22 | #if 0 | 22 | #if 0 |
23 | #define kdebug(FMT, ...) \ | 23 | #define kdebug(FMT, ...) \ |
24 | printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__) | 24 | printk("[%-5.5s%5u] " FMT "\n", \ |
25 | current->comm, current->pid, ##__VA_ARGS__) | ||
25 | #else | 26 | #else |
26 | #define kdebug(FMT, ...) \ | 27 | #define kdebug(FMT, ...) \ |
27 | no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__) | 28 | do { \ |
29 | if (0) \ | ||
30 | no_printk("[%-5.5s%5u] " FMT "\n", \ | ||
31 | current->comm, current->pid, ##__VA_ARGS__); \ | ||
32 | } while (0) | ||
28 | #endif | 33 | #endif |
29 | 34 | ||
30 | static struct kmem_cache *cred_jar; | 35 | static struct kmem_cache *cred_jar; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index e8183895691c..f548f69c4299 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -9094,7 +9094,7 @@ static void perf_event_init_cpu(int cpu) | |||
9094 | mutex_unlock(&swhash->hlist_mutex); | 9094 | mutex_unlock(&swhash->hlist_mutex); |
9095 | } | 9095 | } |
9096 | 9096 | ||
9097 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC | 9097 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE |
9098 | static void __perf_event_exit_context(void *__info) | 9098 | static void __perf_event_exit_context(void *__info) |
9099 | { | 9099 | { |
9100 | struct remove_event re = { .detach_group = true }; | 9100 | struct remove_event re = { .detach_group = true }; |
diff --git a/kernel/extable.c b/kernel/extable.c index c98f926277a8..e820ccee9846 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/ftrace.h> | 18 | #include <linux/ftrace.h> |
19 | #include <linux/memory.h> | 19 | #include <linux/memory.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/ftrace.h> | ||
22 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
23 | #include <linux/init.h> | 22 | #include <linux/init.h> |
24 | 23 | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index a785c1015e25..4c5edc357923 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1,156 +1,22 @@ | |||
1 | /* | 1 | /* |
2 | * kexec.c - kexec system call | 2 | * kexec.c - kexec_load system call |
3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> | 3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> |
4 | * | 4 | * |
5 | * This source code is licensed under the GNU General Public License, | 5 | * This source code is licensed under the GNU General Public License, |
6 | * Version 2. See the file COPYING for more details. | 6 | * Version 2. See the file COPYING for more details. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #define pr_fmt(fmt) "kexec: " fmt | ||
10 | |||
11 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
12 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
13 | #include <linux/file.h> | 11 | #include <linux/file.h> |
14 | #include <linux/slab.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
17 | #include <linux/mutex.h> | 13 | #include <linux/mutex.h> |
18 | #include <linux/list.h> | 14 | #include <linux/list.h> |
19 | #include <linux/highmem.h> | ||
20 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
21 | #include <linux/reboot.h> | ||
22 | #include <linux/ioport.h> | ||
23 | #include <linux/hardirq.h> | ||
24 | #include <linux/elf.h> | ||
25 | #include <linux/elfcore.h> | ||
26 | #include <linux/utsname.h> | ||
27 | #include <linux/numa.h> | ||
28 | #include <linux/suspend.h> | ||
29 | #include <linux/device.h> | ||
30 | #include <linux/freezer.h> | ||
31 | #include <linux/pm.h> | ||
32 | #include <linux/cpu.h> | ||
33 | #include <linux/console.h> | ||
34 | #include <linux/vmalloc.h> | 16 | #include <linux/vmalloc.h> |
35 | #include <linux/swap.h> | 17 | #include <linux/slab.h> |
36 | #include <linux/syscore_ops.h> | ||
37 | #include <linux/compiler.h> | ||
38 | #include <linux/hugetlb.h> | ||
39 | |||
40 | #include <asm/page.h> | ||
41 | #include <asm/uaccess.h> | ||
42 | #include <asm/io.h> | ||
43 | #include <asm/sections.h> | ||
44 | |||
45 | #include <crypto/hash.h> | ||
46 | #include <crypto/sha.h> | ||
47 | |||
48 | /* Per cpu memory for storing cpu states in case of system crash. */ | ||
49 | note_buf_t __percpu *crash_notes; | ||
50 | |||
51 | /* vmcoreinfo stuff */ | ||
52 | static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; | ||
53 | u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | ||
54 | size_t vmcoreinfo_size; | ||
55 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); | ||
56 | |||
57 | /* Flag to indicate we are going to kexec a new kernel */ | ||
58 | bool kexec_in_progress = false; | ||
59 | |||
60 | /* | ||
61 | * Declare these symbols weak so that if architecture provides a purgatory, | ||
62 | * these will be overridden. | ||
63 | */ | ||
64 | char __weak kexec_purgatory[0]; | ||
65 | size_t __weak kexec_purgatory_size = 0; | ||
66 | |||
67 | #ifdef CONFIG_KEXEC_FILE | ||
68 | static int kexec_calculate_store_digests(struct kimage *image); | ||
69 | #endif | ||
70 | |||
71 | /* Location of the reserved area for the crash kernel */ | ||
72 | struct resource crashk_res = { | ||
73 | .name = "Crash kernel", | ||
74 | .start = 0, | ||
75 | .end = 0, | ||
76 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
77 | }; | ||
78 | struct resource crashk_low_res = { | ||
79 | .name = "Crash kernel", | ||
80 | .start = 0, | ||
81 | .end = 0, | ||
82 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
83 | }; | ||
84 | |||
85 | int kexec_should_crash(struct task_struct *p) | ||
86 | { | ||
87 | /* | ||
88 | * If crash_kexec_post_notifiers is enabled, don't run | ||
89 | * crash_kexec() here yet, which must be run after panic | ||
90 | * notifiers in panic(). | ||
91 | */ | ||
92 | if (crash_kexec_post_notifiers) | ||
93 | return 0; | ||
94 | /* | ||
95 | * There are 4 panic() calls in do_exit() path, each of which | ||
96 | * corresponds to each of these 4 conditions. | ||
97 | */ | ||
98 | if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) | ||
99 | return 1; | ||
100 | return 0; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * When kexec transitions to the new kernel there is a one-to-one | ||
105 | * mapping between physical and virtual addresses. On processors | ||
106 | * where you can disable the MMU this is trivial, and easy. For | ||
107 | * others it is still a simple predictable page table to setup. | ||
108 | * | ||
109 | * In that environment kexec copies the new kernel to its final | ||
110 | * resting place. This means I can only support memory whose | ||
111 | * physical address can fit in an unsigned long. In particular | ||
112 | * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. | ||
113 | * If the assembly stub has more restrictive requirements | ||
114 | * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be | ||
115 | * defined more restrictively in <asm/kexec.h>. | ||
116 | * | ||
117 | * The code for the transition from the current kernel to the | ||
118 | * the new kernel is placed in the control_code_buffer, whose size | ||
119 | * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single | ||
120 | * page of memory is necessary, but some architectures require more. | ||
121 | * Because this memory must be identity mapped in the transition from | ||
122 | * virtual to physical addresses it must live in the range | ||
123 | * 0 - TASK_SIZE, as only the user space mappings are arbitrarily | ||
124 | * modifiable. | ||
125 | * | ||
126 | * The assembly stub in the control code buffer is passed a linked list | ||
127 | * of descriptor pages detailing the source pages of the new kernel, | ||
128 | * and the destination addresses of those source pages. As this data | ||
129 | * structure is not used in the context of the current OS, it must | ||
130 | * be self-contained. | ||
131 | * | ||
132 | * The code has been made to work with highmem pages and will use a | ||
133 | * destination page in its final resting place (if it happens | ||
134 | * to allocate it). The end product of this is that most of the | ||
135 | * physical address space, and most of RAM can be used. | ||
136 | * | ||
137 | * Future directions include: | ||
138 | * - allocating a page table with the control code buffer identity | ||
139 | * mapped, to simplify machine_kexec and make kexec_on_panic more | ||
140 | * reliable. | ||
141 | */ | ||
142 | |||
143 | /* | ||
144 | * KIMAGE_NO_DEST is an impossible destination address..., for | ||
145 | * allocating pages whose destination address we do not care about. | ||
146 | */ | ||
147 | #define KIMAGE_NO_DEST (-1UL) | ||
148 | 18 | ||
149 | static int kimage_is_destination_range(struct kimage *image, | 19 | #include "kexec_internal.h" |
150 | unsigned long start, unsigned long end); | ||
151 | static struct page *kimage_alloc_page(struct kimage *image, | ||
152 | gfp_t gfp_mask, | ||
153 | unsigned long dest); | ||
154 | 20 | ||
155 | static int copy_user_segment_list(struct kimage *image, | 21 | static int copy_user_segment_list(struct kimage *image, |
156 | unsigned long nr_segments, | 22 | unsigned long nr_segments, |
@@ -169,125 +35,6 @@ static int copy_user_segment_list(struct kimage *image, | |||
169 | return ret; | 35 | return ret; |
170 | } | 36 | } |
171 | 37 | ||
172 | static int sanity_check_segment_list(struct kimage *image) | ||
173 | { | ||
174 | int result, i; | ||
175 | unsigned long nr_segments = image->nr_segments; | ||
176 | |||
177 | /* | ||
178 | * Verify we have good destination addresses. The caller is | ||
179 | * responsible for making certain we don't attempt to load | ||
180 | * the new image into invalid or reserved areas of RAM. This | ||
181 | * just verifies it is an address we can use. | ||
182 | * | ||
183 | * Since the kernel does everything in page size chunks ensure | ||
184 | * the destination addresses are page aligned. Too many | ||
185 | * special cases crop of when we don't do this. The most | ||
186 | * insidious is getting overlapping destination addresses | ||
187 | * simply because addresses are changed to page size | ||
188 | * granularity. | ||
189 | */ | ||
190 | result = -EADDRNOTAVAIL; | ||
191 | for (i = 0; i < nr_segments; i++) { | ||
192 | unsigned long mstart, mend; | ||
193 | |||
194 | mstart = image->segment[i].mem; | ||
195 | mend = mstart + image->segment[i].memsz; | ||
196 | if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) | ||
197 | return result; | ||
198 | if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) | ||
199 | return result; | ||
200 | } | ||
201 | |||
202 | /* Verify our destination addresses do not overlap. | ||
203 | * If we alloed overlapping destination addresses | ||
204 | * through very weird things can happen with no | ||
205 | * easy explanation as one segment stops on another. | ||
206 | */ | ||
207 | result = -EINVAL; | ||
208 | for (i = 0; i < nr_segments; i++) { | ||
209 | unsigned long mstart, mend; | ||
210 | unsigned long j; | ||
211 | |||
212 | mstart = image->segment[i].mem; | ||
213 | mend = mstart + image->segment[i].memsz; | ||
214 | for (j = 0; j < i; j++) { | ||
215 | unsigned long pstart, pend; | ||
216 | pstart = image->segment[j].mem; | ||
217 | pend = pstart + image->segment[j].memsz; | ||
218 | /* Do the segments overlap ? */ | ||
219 | if ((mend > pstart) && (mstart < pend)) | ||
220 | return result; | ||
221 | } | ||
222 | } | ||
223 | |||
224 | /* Ensure our buffer sizes are strictly less than | ||
225 | * our memory sizes. This should always be the case, | ||
226 | * and it is easier to check up front than to be surprised | ||
227 | * later on. | ||
228 | */ | ||
229 | result = -EINVAL; | ||
230 | for (i = 0; i < nr_segments; i++) { | ||
231 | if (image->segment[i].bufsz > image->segment[i].memsz) | ||
232 | return result; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Verify we have good destination addresses. Normally | ||
237 | * the caller is responsible for making certain we don't | ||
238 | * attempt to load the new image into invalid or reserved | ||
239 | * areas of RAM. But crash kernels are preloaded into a | ||
240 | * reserved area of ram. We must ensure the addresses | ||
241 | * are in the reserved area otherwise preloading the | ||
242 | * kernel could corrupt things. | ||
243 | */ | ||
244 | |||
245 | if (image->type == KEXEC_TYPE_CRASH) { | ||
246 | result = -EADDRNOTAVAIL; | ||
247 | for (i = 0; i < nr_segments; i++) { | ||
248 | unsigned long mstart, mend; | ||
249 | |||
250 | mstart = image->segment[i].mem; | ||
251 | mend = mstart + image->segment[i].memsz - 1; | ||
252 | /* Ensure we are within the crash kernel limits */ | ||
253 | if ((mstart < crashk_res.start) || | ||
254 | (mend > crashk_res.end)) | ||
255 | return result; | ||
256 | } | ||
257 | } | ||
258 | |||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static struct kimage *do_kimage_alloc_init(void) | ||
263 | { | ||
264 | struct kimage *image; | ||
265 | |||
266 | /* Allocate a controlling structure */ | ||
267 | image = kzalloc(sizeof(*image), GFP_KERNEL); | ||
268 | if (!image) | ||
269 | return NULL; | ||
270 | |||
271 | image->head = 0; | ||
272 | image->entry = &image->head; | ||
273 | image->last_entry = &image->head; | ||
274 | image->control_page = ~0; /* By default this does not apply */ | ||
275 | image->type = KEXEC_TYPE_DEFAULT; | ||
276 | |||
277 | /* Initialize the list of control pages */ | ||
278 | INIT_LIST_HEAD(&image->control_pages); | ||
279 | |||
280 | /* Initialize the list of destination pages */ | ||
281 | INIT_LIST_HEAD(&image->dest_pages); | ||
282 | |||
283 | /* Initialize the list of unusable pages */ | ||
284 | INIT_LIST_HEAD(&image->unusable_pages); | ||
285 | |||
286 | return image; | ||
287 | } | ||
288 | |||
289 | static void kimage_free_page_list(struct list_head *list); | ||
290 | |||
291 | static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, | 38 | static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, |
292 | unsigned long nr_segments, | 39 | unsigned long nr_segments, |
293 | struct kexec_segment __user *segments, | 40 | struct kexec_segment __user *segments, |
@@ -354,873 +101,6 @@ out_free_image: | |||
354 | return ret; | 101 | return ret; |
355 | } | 102 | } |
356 | 103 | ||
357 | #ifdef CONFIG_KEXEC_FILE | ||
358 | static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len) | ||
359 | { | ||
360 | struct fd f = fdget(fd); | ||
361 | int ret; | ||
362 | struct kstat stat; | ||
363 | loff_t pos; | ||
364 | ssize_t bytes = 0; | ||
365 | |||
366 | if (!f.file) | ||
367 | return -EBADF; | ||
368 | |||
369 | ret = vfs_getattr(&f.file->f_path, &stat); | ||
370 | if (ret) | ||
371 | goto out; | ||
372 | |||
373 | if (stat.size > INT_MAX) { | ||
374 | ret = -EFBIG; | ||
375 | goto out; | ||
376 | } | ||
377 | |||
378 | /* Don't hand 0 to vmalloc, it whines. */ | ||
379 | if (stat.size == 0) { | ||
380 | ret = -EINVAL; | ||
381 | goto out; | ||
382 | } | ||
383 | |||
384 | *buf = vmalloc(stat.size); | ||
385 | if (!*buf) { | ||
386 | ret = -ENOMEM; | ||
387 | goto out; | ||
388 | } | ||
389 | |||
390 | pos = 0; | ||
391 | while (pos < stat.size) { | ||
392 | bytes = kernel_read(f.file, pos, (char *)(*buf) + pos, | ||
393 | stat.size - pos); | ||
394 | if (bytes < 0) { | ||
395 | vfree(*buf); | ||
396 | ret = bytes; | ||
397 | goto out; | ||
398 | } | ||
399 | |||
400 | if (bytes == 0) | ||
401 | break; | ||
402 | pos += bytes; | ||
403 | } | ||
404 | |||
405 | if (pos != stat.size) { | ||
406 | ret = -EBADF; | ||
407 | vfree(*buf); | ||
408 | goto out; | ||
409 | } | ||
410 | |||
411 | *buf_len = pos; | ||
412 | out: | ||
413 | fdput(f); | ||
414 | return ret; | ||
415 | } | ||
416 | |||
417 | /* Architectures can provide this probe function */ | ||
418 | int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, | ||
419 | unsigned long buf_len) | ||
420 | { | ||
421 | return -ENOEXEC; | ||
422 | } | ||
423 | |||
424 | void * __weak arch_kexec_kernel_image_load(struct kimage *image) | ||
425 | { | ||
426 | return ERR_PTR(-ENOEXEC); | ||
427 | } | ||
428 | |||
429 | void __weak arch_kimage_file_post_load_cleanup(struct kimage *image) | ||
430 | { | ||
431 | } | ||
432 | |||
433 | int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, | ||
434 | unsigned long buf_len) | ||
435 | { | ||
436 | return -EKEYREJECTED; | ||
437 | } | ||
438 | |||
439 | /* Apply relocations of type RELA */ | ||
440 | int __weak | ||
441 | arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | ||
442 | unsigned int relsec) | ||
443 | { | ||
444 | pr_err("RELA relocation unsupported.\n"); | ||
445 | return -ENOEXEC; | ||
446 | } | ||
447 | |||
448 | /* Apply relocations of type REL */ | ||
449 | int __weak | ||
450 | arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | ||
451 | unsigned int relsec) | ||
452 | { | ||
453 | pr_err("REL relocation unsupported.\n"); | ||
454 | return -ENOEXEC; | ||
455 | } | ||
456 | |||
457 | /* | ||
458 | * Free up memory used by kernel, initrd, and command line. This is temporary | ||
459 | * memory allocation which is not needed any more after these buffers have | ||
460 | * been loaded into separate segments and have been copied elsewhere. | ||
461 | */ | ||
462 | static void kimage_file_post_load_cleanup(struct kimage *image) | ||
463 | { | ||
464 | struct purgatory_info *pi = &image->purgatory_info; | ||
465 | |||
466 | vfree(image->kernel_buf); | ||
467 | image->kernel_buf = NULL; | ||
468 | |||
469 | vfree(image->initrd_buf); | ||
470 | image->initrd_buf = NULL; | ||
471 | |||
472 | kfree(image->cmdline_buf); | ||
473 | image->cmdline_buf = NULL; | ||
474 | |||
475 | vfree(pi->purgatory_buf); | ||
476 | pi->purgatory_buf = NULL; | ||
477 | |||
478 | vfree(pi->sechdrs); | ||
479 | pi->sechdrs = NULL; | ||
480 | |||
481 | /* See if architecture has anything to cleanup post load */ | ||
482 | arch_kimage_file_post_load_cleanup(image); | ||
483 | |||
484 | /* | ||
485 | * Above call should have called into bootloader to free up | ||
486 | * any data stored in kimage->image_loader_data. It should | ||
487 | * be ok now to free it up. | ||
488 | */ | ||
489 | kfree(image->image_loader_data); | ||
490 | image->image_loader_data = NULL; | ||
491 | } | ||
492 | |||
493 | /* | ||
494 | * In file mode list of segments is prepared by kernel. Copy relevant | ||
495 | * data from user space, do error checking, prepare segment list | ||
496 | */ | ||
497 | static int | ||
498 | kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, | ||
499 | const char __user *cmdline_ptr, | ||
500 | unsigned long cmdline_len, unsigned flags) | ||
501 | { | ||
502 | int ret = 0; | ||
503 | void *ldata; | ||
504 | |||
505 | ret = copy_file_from_fd(kernel_fd, &image->kernel_buf, | ||
506 | &image->kernel_buf_len); | ||
507 | if (ret) | ||
508 | return ret; | ||
509 | |||
510 | /* Call arch image probe handlers */ | ||
511 | ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, | ||
512 | image->kernel_buf_len); | ||
513 | |||
514 | if (ret) | ||
515 | goto out; | ||
516 | |||
517 | #ifdef CONFIG_KEXEC_VERIFY_SIG | ||
518 | ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf, | ||
519 | image->kernel_buf_len); | ||
520 | if (ret) { | ||
521 | pr_debug("kernel signature verification failed.\n"); | ||
522 | goto out; | ||
523 | } | ||
524 | pr_debug("kernel signature verification successful.\n"); | ||
525 | #endif | ||
526 | /* It is possible that there no initramfs is being loaded */ | ||
527 | if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { | ||
528 | ret = copy_file_from_fd(initrd_fd, &image->initrd_buf, | ||
529 | &image->initrd_buf_len); | ||
530 | if (ret) | ||
531 | goto out; | ||
532 | } | ||
533 | |||
534 | if (cmdline_len) { | ||
535 | image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL); | ||
536 | if (!image->cmdline_buf) { | ||
537 | ret = -ENOMEM; | ||
538 | goto out; | ||
539 | } | ||
540 | |||
541 | ret = copy_from_user(image->cmdline_buf, cmdline_ptr, | ||
542 | cmdline_len); | ||
543 | if (ret) { | ||
544 | ret = -EFAULT; | ||
545 | goto out; | ||
546 | } | ||
547 | |||
548 | image->cmdline_buf_len = cmdline_len; | ||
549 | |||
550 | /* command line should be a string with last byte null */ | ||
551 | if (image->cmdline_buf[cmdline_len - 1] != '\0') { | ||
552 | ret = -EINVAL; | ||
553 | goto out; | ||
554 | } | ||
555 | } | ||
556 | |||
557 | /* Call arch image load handlers */ | ||
558 | ldata = arch_kexec_kernel_image_load(image); | ||
559 | |||
560 | if (IS_ERR(ldata)) { | ||
561 | ret = PTR_ERR(ldata); | ||
562 | goto out; | ||
563 | } | ||
564 | |||
565 | image->image_loader_data = ldata; | ||
566 | out: | ||
567 | /* In case of error, free up all allocated memory in this function */ | ||
568 | if (ret) | ||
569 | kimage_file_post_load_cleanup(image); | ||
570 | return ret; | ||
571 | } | ||
572 | |||
573 | static int | ||
574 | kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, | ||
575 | int initrd_fd, const char __user *cmdline_ptr, | ||
576 | unsigned long cmdline_len, unsigned long flags) | ||
577 | { | ||
578 | int ret; | ||
579 | struct kimage *image; | ||
580 | bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH; | ||
581 | |||
582 | image = do_kimage_alloc_init(); | ||
583 | if (!image) | ||
584 | return -ENOMEM; | ||
585 | |||
586 | image->file_mode = 1; | ||
587 | |||
588 | if (kexec_on_panic) { | ||
589 | /* Enable special crash kernel control page alloc policy. */ | ||
590 | image->control_page = crashk_res.start; | ||
591 | image->type = KEXEC_TYPE_CRASH; | ||
592 | } | ||
593 | |||
594 | ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, | ||
595 | cmdline_ptr, cmdline_len, flags); | ||
596 | if (ret) | ||
597 | goto out_free_image; | ||
598 | |||
599 | ret = sanity_check_segment_list(image); | ||
600 | if (ret) | ||
601 | goto out_free_post_load_bufs; | ||
602 | |||
603 | ret = -ENOMEM; | ||
604 | image->control_code_page = kimage_alloc_control_pages(image, | ||
605 | get_order(KEXEC_CONTROL_PAGE_SIZE)); | ||
606 | if (!image->control_code_page) { | ||
607 | pr_err("Could not allocate control_code_buffer\n"); | ||
608 | goto out_free_post_load_bufs; | ||
609 | } | ||
610 | |||
611 | if (!kexec_on_panic) { | ||
612 | image->swap_page = kimage_alloc_control_pages(image, 0); | ||
613 | if (!image->swap_page) { | ||
614 | pr_err("Could not allocate swap buffer\n"); | ||
615 | goto out_free_control_pages; | ||
616 | } | ||
617 | } | ||
618 | |||
619 | *rimage = image; | ||
620 | return 0; | ||
621 | out_free_control_pages: | ||
622 | kimage_free_page_list(&image->control_pages); | ||
623 | out_free_post_load_bufs: | ||
624 | kimage_file_post_load_cleanup(image); | ||
625 | out_free_image: | ||
626 | kfree(image); | ||
627 | return ret; | ||
628 | } | ||
629 | #else /* CONFIG_KEXEC_FILE */ | ||
630 | static inline void kimage_file_post_load_cleanup(struct kimage *image) { } | ||
631 | #endif /* CONFIG_KEXEC_FILE */ | ||
632 | |||
633 | static int kimage_is_destination_range(struct kimage *image, | ||
634 | unsigned long start, | ||
635 | unsigned long end) | ||
636 | { | ||
637 | unsigned long i; | ||
638 | |||
639 | for (i = 0; i < image->nr_segments; i++) { | ||
640 | unsigned long mstart, mend; | ||
641 | |||
642 | mstart = image->segment[i].mem; | ||
643 | mend = mstart + image->segment[i].memsz; | ||
644 | if ((end > mstart) && (start < mend)) | ||
645 | return 1; | ||
646 | } | ||
647 | |||
648 | return 0; | ||
649 | } | ||
650 | |||
651 | static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | ||
652 | { | ||
653 | struct page *pages; | ||
654 | |||
655 | pages = alloc_pages(gfp_mask, order); | ||
656 | if (pages) { | ||
657 | unsigned int count, i; | ||
658 | pages->mapping = NULL; | ||
659 | set_page_private(pages, order); | ||
660 | count = 1 << order; | ||
661 | for (i = 0; i < count; i++) | ||
662 | SetPageReserved(pages + i); | ||
663 | } | ||
664 | |||
665 | return pages; | ||
666 | } | ||
667 | |||
668 | static void kimage_free_pages(struct page *page) | ||
669 | { | ||
670 | unsigned int order, count, i; | ||
671 | |||
672 | order = page_private(page); | ||
673 | count = 1 << order; | ||
674 | for (i = 0; i < count; i++) | ||
675 | ClearPageReserved(page + i); | ||
676 | __free_pages(page, order); | ||
677 | } | ||
678 | |||
679 | static void kimage_free_page_list(struct list_head *list) | ||
680 | { | ||
681 | struct list_head *pos, *next; | ||
682 | |||
683 | list_for_each_safe(pos, next, list) { | ||
684 | struct page *page; | ||
685 | |||
686 | page = list_entry(pos, struct page, lru); | ||
687 | list_del(&page->lru); | ||
688 | kimage_free_pages(page); | ||
689 | } | ||
690 | } | ||
691 | |||
692 | static struct page *kimage_alloc_normal_control_pages(struct kimage *image, | ||
693 | unsigned int order) | ||
694 | { | ||
695 | /* Control pages are special, they are the intermediaries | ||
696 | * that are needed while we copy the rest of the pages | ||
697 | * to their final resting place. As such they must | ||
698 | * not conflict with either the destination addresses | ||
699 | * or memory the kernel is already using. | ||
700 | * | ||
701 | * The only case where we really need more than one of | ||
702 | * these are for architectures where we cannot disable | ||
703 | * the MMU and must instead generate an identity mapped | ||
704 | * page table for all of the memory. | ||
705 | * | ||
706 | * At worst this runs in O(N) of the image size. | ||
707 | */ | ||
708 | struct list_head extra_pages; | ||
709 | struct page *pages; | ||
710 | unsigned int count; | ||
711 | |||
712 | count = 1 << order; | ||
713 | INIT_LIST_HEAD(&extra_pages); | ||
714 | |||
715 | /* Loop while I can allocate a page and the page allocated | ||
716 | * is a destination page. | ||
717 | */ | ||
718 | do { | ||
719 | unsigned long pfn, epfn, addr, eaddr; | ||
720 | |||
721 | pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); | ||
722 | if (!pages) | ||
723 | break; | ||
724 | pfn = page_to_pfn(pages); | ||
725 | epfn = pfn + count; | ||
726 | addr = pfn << PAGE_SHIFT; | ||
727 | eaddr = epfn << PAGE_SHIFT; | ||
728 | if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || | ||
729 | kimage_is_destination_range(image, addr, eaddr)) { | ||
730 | list_add(&pages->lru, &extra_pages); | ||
731 | pages = NULL; | ||
732 | } | ||
733 | } while (!pages); | ||
734 | |||
735 | if (pages) { | ||
736 | /* Remember the allocated page... */ | ||
737 | list_add(&pages->lru, &image->control_pages); | ||
738 | |||
739 | /* Because the page is already in it's destination | ||
740 | * location we will never allocate another page at | ||
741 | * that address. Therefore kimage_alloc_pages | ||
742 | * will not return it (again) and we don't need | ||
743 | * to give it an entry in image->segment[]. | ||
744 | */ | ||
745 | } | ||
746 | /* Deal with the destination pages I have inadvertently allocated. | ||
747 | * | ||
748 | * Ideally I would convert multi-page allocations into single | ||
749 | * page allocations, and add everything to image->dest_pages. | ||
750 | * | ||
751 | * For now it is simpler to just free the pages. | ||
752 | */ | ||
753 | kimage_free_page_list(&extra_pages); | ||
754 | |||
755 | return pages; | ||
756 | } | ||
757 | |||
758 | static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | ||
759 | unsigned int order) | ||
760 | { | ||
761 | /* Control pages are special, they are the intermediaries | ||
762 | * that are needed while we copy the rest of the pages | ||
763 | * to their final resting place. As such they must | ||
764 | * not conflict with either the destination addresses | ||
765 | * or memory the kernel is already using. | ||
766 | * | ||
767 | * Control pages are also the only pags we must allocate | ||
768 | * when loading a crash kernel. All of the other pages | ||
769 | * are specified by the segments and we just memcpy | ||
770 | * into them directly. | ||
771 | * | ||
772 | * The only case where we really need more than one of | ||
773 | * these are for architectures where we cannot disable | ||
774 | * the MMU and must instead generate an identity mapped | ||
775 | * page table for all of the memory. | ||
776 | * | ||
777 | * Given the low demand this implements a very simple | ||
778 | * allocator that finds the first hole of the appropriate | ||
779 | * size in the reserved memory region, and allocates all | ||
780 | * of the memory up to and including the hole. | ||
781 | */ | ||
782 | unsigned long hole_start, hole_end, size; | ||
783 | struct page *pages; | ||
784 | |||
785 | pages = NULL; | ||
786 | size = (1 << order) << PAGE_SHIFT; | ||
787 | hole_start = (image->control_page + (size - 1)) & ~(size - 1); | ||
788 | hole_end = hole_start + size - 1; | ||
789 | while (hole_end <= crashk_res.end) { | ||
790 | unsigned long i; | ||
791 | |||
792 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) | ||
793 | break; | ||
794 | /* See if I overlap any of the segments */ | ||
795 | for (i = 0; i < image->nr_segments; i++) { | ||
796 | unsigned long mstart, mend; | ||
797 | |||
798 | mstart = image->segment[i].mem; | ||
799 | mend = mstart + image->segment[i].memsz - 1; | ||
800 | if ((hole_end >= mstart) && (hole_start <= mend)) { | ||
801 | /* Advance the hole to the end of the segment */ | ||
802 | hole_start = (mend + (size - 1)) & ~(size - 1); | ||
803 | hole_end = hole_start + size - 1; | ||
804 | break; | ||
805 | } | ||
806 | } | ||
807 | /* If I don't overlap any segments I have found my hole! */ | ||
808 | if (i == image->nr_segments) { | ||
809 | pages = pfn_to_page(hole_start >> PAGE_SHIFT); | ||
810 | break; | ||
811 | } | ||
812 | } | ||
813 | if (pages) | ||
814 | image->control_page = hole_end; | ||
815 | |||
816 | return pages; | ||
817 | } | ||
818 | |||
819 | |||
820 | struct page *kimage_alloc_control_pages(struct kimage *image, | ||
821 | unsigned int order) | ||
822 | { | ||
823 | struct page *pages = NULL; | ||
824 | |||
825 | switch (image->type) { | ||
826 | case KEXEC_TYPE_DEFAULT: | ||
827 | pages = kimage_alloc_normal_control_pages(image, order); | ||
828 | break; | ||
829 | case KEXEC_TYPE_CRASH: | ||
830 | pages = kimage_alloc_crash_control_pages(image, order); | ||
831 | break; | ||
832 | } | ||
833 | |||
834 | return pages; | ||
835 | } | ||
836 | |||
837 | static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) | ||
838 | { | ||
839 | if (*image->entry != 0) | ||
840 | image->entry++; | ||
841 | |||
842 | if (image->entry == image->last_entry) { | ||
843 | kimage_entry_t *ind_page; | ||
844 | struct page *page; | ||
845 | |||
846 | page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); | ||
847 | if (!page) | ||
848 | return -ENOMEM; | ||
849 | |||
850 | ind_page = page_address(page); | ||
851 | *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; | ||
852 | image->entry = ind_page; | ||
853 | image->last_entry = ind_page + | ||
854 | ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); | ||
855 | } | ||
856 | *image->entry = entry; | ||
857 | image->entry++; | ||
858 | *image->entry = 0; | ||
859 | |||
860 | return 0; | ||
861 | } | ||
862 | |||
863 | static int kimage_set_destination(struct kimage *image, | ||
864 | unsigned long destination) | ||
865 | { | ||
866 | int result; | ||
867 | |||
868 | destination &= PAGE_MASK; | ||
869 | result = kimage_add_entry(image, destination | IND_DESTINATION); | ||
870 | |||
871 | return result; | ||
872 | } | ||
873 | |||
874 | |||
875 | static int kimage_add_page(struct kimage *image, unsigned long page) | ||
876 | { | ||
877 | int result; | ||
878 | |||
879 | page &= PAGE_MASK; | ||
880 | result = kimage_add_entry(image, page | IND_SOURCE); | ||
881 | |||
882 | return result; | ||
883 | } | ||
884 | |||
885 | |||
886 | static void kimage_free_extra_pages(struct kimage *image) | ||
887 | { | ||
888 | /* Walk through and free any extra destination pages I may have */ | ||
889 | kimage_free_page_list(&image->dest_pages); | ||
890 | |||
891 | /* Walk through and free any unusable pages I have cached */ | ||
892 | kimage_free_page_list(&image->unusable_pages); | ||
893 | |||
894 | } | ||
895 | static void kimage_terminate(struct kimage *image) | ||
896 | { | ||
897 | if (*image->entry != 0) | ||
898 | image->entry++; | ||
899 | |||
900 | *image->entry = IND_DONE; | ||
901 | } | ||
902 | |||
903 | #define for_each_kimage_entry(image, ptr, entry) \ | ||
904 | for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ | ||
905 | ptr = (entry & IND_INDIRECTION) ? \ | ||
906 | phys_to_virt((entry & PAGE_MASK)) : ptr + 1) | ||
907 | |||
908 | static void kimage_free_entry(kimage_entry_t entry) | ||
909 | { | ||
910 | struct page *page; | ||
911 | |||
912 | page = pfn_to_page(entry >> PAGE_SHIFT); | ||
913 | kimage_free_pages(page); | ||
914 | } | ||
915 | |||
916 | static void kimage_free(struct kimage *image) | ||
917 | { | ||
918 | kimage_entry_t *ptr, entry; | ||
919 | kimage_entry_t ind = 0; | ||
920 | |||
921 | if (!image) | ||
922 | return; | ||
923 | |||
924 | kimage_free_extra_pages(image); | ||
925 | for_each_kimage_entry(image, ptr, entry) { | ||
926 | if (entry & IND_INDIRECTION) { | ||
927 | /* Free the previous indirection page */ | ||
928 | if (ind & IND_INDIRECTION) | ||
929 | kimage_free_entry(ind); | ||
930 | /* Save this indirection page until we are | ||
931 | * done with it. | ||
932 | */ | ||
933 | ind = entry; | ||
934 | } else if (entry & IND_SOURCE) | ||
935 | kimage_free_entry(entry); | ||
936 | } | ||
937 | /* Free the final indirection page */ | ||
938 | if (ind & IND_INDIRECTION) | ||
939 | kimage_free_entry(ind); | ||
940 | |||
941 | /* Handle any machine specific cleanup */ | ||
942 | machine_kexec_cleanup(image); | ||
943 | |||
944 | /* Free the kexec control pages... */ | ||
945 | kimage_free_page_list(&image->control_pages); | ||
946 | |||
947 | /* | ||
948 | * Free up any temporary buffers allocated. This might hit if | ||
949 | * error occurred much later after buffer allocation. | ||
950 | */ | ||
951 | if (image->file_mode) | ||
952 | kimage_file_post_load_cleanup(image); | ||
953 | |||
954 | kfree(image); | ||
955 | } | ||
956 | |||
957 | static kimage_entry_t *kimage_dst_used(struct kimage *image, | ||
958 | unsigned long page) | ||
959 | { | ||
960 | kimage_entry_t *ptr, entry; | ||
961 | unsigned long destination = 0; | ||
962 | |||
963 | for_each_kimage_entry(image, ptr, entry) { | ||
964 | if (entry & IND_DESTINATION) | ||
965 | destination = entry & PAGE_MASK; | ||
966 | else if (entry & IND_SOURCE) { | ||
967 | if (page == destination) | ||
968 | return ptr; | ||
969 | destination += PAGE_SIZE; | ||
970 | } | ||
971 | } | ||
972 | |||
973 | return NULL; | ||
974 | } | ||
975 | |||
976 | static struct page *kimage_alloc_page(struct kimage *image, | ||
977 | gfp_t gfp_mask, | ||
978 | unsigned long destination) | ||
979 | { | ||
980 | /* | ||
981 | * Here we implement safeguards to ensure that a source page | ||
982 | * is not copied to its destination page before the data on | ||
983 | * the destination page is no longer useful. | ||
984 | * | ||
985 | * To do this we maintain the invariant that a source page is | ||
986 | * either its own destination page, or it is not a | ||
987 | * destination page at all. | ||
988 | * | ||
989 | * That is slightly stronger than required, but the proof | ||
990 | * that no problems will not occur is trivial, and the | ||
991 | * implementation is simply to verify. | ||
992 | * | ||
993 | * When allocating all pages normally this algorithm will run | ||
994 | * in O(N) time, but in the worst case it will run in O(N^2) | ||
995 | * time. If the runtime is a problem the data structures can | ||
996 | * be fixed. | ||
997 | */ | ||
998 | struct page *page; | ||
999 | unsigned long addr; | ||
1000 | |||
1001 | /* | ||
1002 | * Walk through the list of destination pages, and see if I | ||
1003 | * have a match. | ||
1004 | */ | ||
1005 | list_for_each_entry(page, &image->dest_pages, lru) { | ||
1006 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
1007 | if (addr == destination) { | ||
1008 | list_del(&page->lru); | ||
1009 | return page; | ||
1010 | } | ||
1011 | } | ||
1012 | page = NULL; | ||
1013 | while (1) { | ||
1014 | kimage_entry_t *old; | ||
1015 | |||
1016 | /* Allocate a page, if we run out of memory give up */ | ||
1017 | page = kimage_alloc_pages(gfp_mask, 0); | ||
1018 | if (!page) | ||
1019 | return NULL; | ||
1020 | /* If the page cannot be used file it away */ | ||
1021 | if (page_to_pfn(page) > | ||
1022 | (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { | ||
1023 | list_add(&page->lru, &image->unusable_pages); | ||
1024 | continue; | ||
1025 | } | ||
1026 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
1027 | |||
1028 | /* If it is the destination page we want use it */ | ||
1029 | if (addr == destination) | ||
1030 | break; | ||
1031 | |||
1032 | /* If the page is not a destination page use it */ | ||
1033 | if (!kimage_is_destination_range(image, addr, | ||
1034 | addr + PAGE_SIZE)) | ||
1035 | break; | ||
1036 | |||
1037 | /* | ||
1038 | * I know that the page is someones destination page. | ||
1039 | * See if there is already a source page for this | ||
1040 | * destination page. And if so swap the source pages. | ||
1041 | */ | ||
1042 | old = kimage_dst_used(image, addr); | ||
1043 | if (old) { | ||
1044 | /* If so move it */ | ||
1045 | unsigned long old_addr; | ||
1046 | struct page *old_page; | ||
1047 | |||
1048 | old_addr = *old & PAGE_MASK; | ||
1049 | old_page = pfn_to_page(old_addr >> PAGE_SHIFT); | ||
1050 | copy_highpage(page, old_page); | ||
1051 | *old = addr | (*old & ~PAGE_MASK); | ||
1052 | |||
1053 | /* The old page I have found cannot be a | ||
1054 | * destination page, so return it if it's | ||
1055 | * gfp_flags honor the ones passed in. | ||
1056 | */ | ||
1057 | if (!(gfp_mask & __GFP_HIGHMEM) && | ||
1058 | PageHighMem(old_page)) { | ||
1059 | kimage_free_pages(old_page); | ||
1060 | continue; | ||
1061 | } | ||
1062 | addr = old_addr; | ||
1063 | page = old_page; | ||
1064 | break; | ||
1065 | } else { | ||
1066 | /* Place the page on the destination list I | ||
1067 | * will use it later. | ||
1068 | */ | ||
1069 | list_add(&page->lru, &image->dest_pages); | ||
1070 | } | ||
1071 | } | ||
1072 | |||
1073 | return page; | ||
1074 | } | ||
1075 | |||
1076 | static int kimage_load_normal_segment(struct kimage *image, | ||
1077 | struct kexec_segment *segment) | ||
1078 | { | ||
1079 | unsigned long maddr; | ||
1080 | size_t ubytes, mbytes; | ||
1081 | int result; | ||
1082 | unsigned char __user *buf = NULL; | ||
1083 | unsigned char *kbuf = NULL; | ||
1084 | |||
1085 | result = 0; | ||
1086 | if (image->file_mode) | ||
1087 | kbuf = segment->kbuf; | ||
1088 | else | ||
1089 | buf = segment->buf; | ||
1090 | ubytes = segment->bufsz; | ||
1091 | mbytes = segment->memsz; | ||
1092 | maddr = segment->mem; | ||
1093 | |||
1094 | result = kimage_set_destination(image, maddr); | ||
1095 | if (result < 0) | ||
1096 | goto out; | ||
1097 | |||
1098 | while (mbytes) { | ||
1099 | struct page *page; | ||
1100 | char *ptr; | ||
1101 | size_t uchunk, mchunk; | ||
1102 | |||
1103 | page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); | ||
1104 | if (!page) { | ||
1105 | result = -ENOMEM; | ||
1106 | goto out; | ||
1107 | } | ||
1108 | result = kimage_add_page(image, page_to_pfn(page) | ||
1109 | << PAGE_SHIFT); | ||
1110 | if (result < 0) | ||
1111 | goto out; | ||
1112 | |||
1113 | ptr = kmap(page); | ||
1114 | /* Start with a clear page */ | ||
1115 | clear_page(ptr); | ||
1116 | ptr += maddr & ~PAGE_MASK; | ||
1117 | mchunk = min_t(size_t, mbytes, | ||
1118 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
1119 | uchunk = min(ubytes, mchunk); | ||
1120 | |||
1121 | /* For file based kexec, source pages are in kernel memory */ | ||
1122 | if (image->file_mode) | ||
1123 | memcpy(ptr, kbuf, uchunk); | ||
1124 | else | ||
1125 | result = copy_from_user(ptr, buf, uchunk); | ||
1126 | kunmap(page); | ||
1127 | if (result) { | ||
1128 | result = -EFAULT; | ||
1129 | goto out; | ||
1130 | } | ||
1131 | ubytes -= uchunk; | ||
1132 | maddr += mchunk; | ||
1133 | if (image->file_mode) | ||
1134 | kbuf += mchunk; | ||
1135 | else | ||
1136 | buf += mchunk; | ||
1137 | mbytes -= mchunk; | ||
1138 | } | ||
1139 | out: | ||
1140 | return result; | ||
1141 | } | ||
1142 | |||
1143 | static int kimage_load_crash_segment(struct kimage *image, | ||
1144 | struct kexec_segment *segment) | ||
1145 | { | ||
1146 | /* For crash dumps kernels we simply copy the data from | ||
1147 | * user space to it's destination. | ||
1148 | * We do things a page at a time for the sake of kmap. | ||
1149 | */ | ||
1150 | unsigned long maddr; | ||
1151 | size_t ubytes, mbytes; | ||
1152 | int result; | ||
1153 | unsigned char __user *buf = NULL; | ||
1154 | unsigned char *kbuf = NULL; | ||
1155 | |||
1156 | result = 0; | ||
1157 | if (image->file_mode) | ||
1158 | kbuf = segment->kbuf; | ||
1159 | else | ||
1160 | buf = segment->buf; | ||
1161 | ubytes = segment->bufsz; | ||
1162 | mbytes = segment->memsz; | ||
1163 | maddr = segment->mem; | ||
1164 | while (mbytes) { | ||
1165 | struct page *page; | ||
1166 | char *ptr; | ||
1167 | size_t uchunk, mchunk; | ||
1168 | |||
1169 | page = pfn_to_page(maddr >> PAGE_SHIFT); | ||
1170 | if (!page) { | ||
1171 | result = -ENOMEM; | ||
1172 | goto out; | ||
1173 | } | ||
1174 | ptr = kmap(page); | ||
1175 | ptr += maddr & ~PAGE_MASK; | ||
1176 | mchunk = min_t(size_t, mbytes, | ||
1177 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
1178 | uchunk = min(ubytes, mchunk); | ||
1179 | if (mchunk > uchunk) { | ||
1180 | /* Zero the trailing part of the page */ | ||
1181 | memset(ptr + uchunk, 0, mchunk - uchunk); | ||
1182 | } | ||
1183 | |||
1184 | /* For file based kexec, source pages are in kernel memory */ | ||
1185 | if (image->file_mode) | ||
1186 | memcpy(ptr, kbuf, uchunk); | ||
1187 | else | ||
1188 | result = copy_from_user(ptr, buf, uchunk); | ||
1189 | kexec_flush_icache_page(page); | ||
1190 | kunmap(page); | ||
1191 | if (result) { | ||
1192 | result = -EFAULT; | ||
1193 | goto out; | ||
1194 | } | ||
1195 | ubytes -= uchunk; | ||
1196 | maddr += mchunk; | ||
1197 | if (image->file_mode) | ||
1198 | kbuf += mchunk; | ||
1199 | else | ||
1200 | buf += mchunk; | ||
1201 | mbytes -= mchunk; | ||
1202 | } | ||
1203 | out: | ||
1204 | return result; | ||
1205 | } | ||
1206 | |||
1207 | static int kimage_load_segment(struct kimage *image, | ||
1208 | struct kexec_segment *segment) | ||
1209 | { | ||
1210 | int result = -ENOMEM; | ||
1211 | |||
1212 | switch (image->type) { | ||
1213 | case KEXEC_TYPE_DEFAULT: | ||
1214 | result = kimage_load_normal_segment(image, segment); | ||
1215 | break; | ||
1216 | case KEXEC_TYPE_CRASH: | ||
1217 | result = kimage_load_crash_segment(image, segment); | ||
1218 | break; | ||
1219 | } | ||
1220 | |||
1221 | return result; | ||
1222 | } | ||
1223 | |||
1224 | /* | 104 | /* |
1225 | * Exec Kernel system call: for obvious reasons only root may call it. | 105 | * Exec Kernel system call: for obvious reasons only root may call it. |
1226 | * | 106 | * |
@@ -1241,11 +121,6 @@ static int kimage_load_segment(struct kimage *image, | |||
1241 | * kexec does not sync, or unmount filesystems so if you need | 121 | * kexec does not sync, or unmount filesystems so if you need |
1242 | * that to happen you need to do that yourself. | 122 | * that to happen you need to do that yourself. |
1243 | */ | 123 | */ |
1244 | struct kimage *kexec_image; | ||
1245 | struct kimage *kexec_crash_image; | ||
1246 | int kexec_load_disabled; | ||
1247 | |||
1248 | static DEFINE_MUTEX(kexec_mutex); | ||
1249 | 124 | ||
1250 | SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, | 125 | SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, |
1251 | struct kexec_segment __user *, segments, unsigned long, flags) | 126 | struct kexec_segment __user *, segments, unsigned long, flags) |
@@ -1340,18 +215,6 @@ out: | |||
1340 | return result; | 215 | return result; |
1341 | } | 216 | } |
1342 | 217 | ||
1343 | /* | ||
1344 | * Add and remove page tables for crashkernel memory | ||
1345 | * | ||
1346 | * Provide an empty default implementation here -- architecture | ||
1347 | * code may override this | ||
1348 | */ | ||
1349 | void __weak crash_map_reserved_pages(void) | ||
1350 | {} | ||
1351 | |||
1352 | void __weak crash_unmap_reserved_pages(void) | ||
1353 | {} | ||
1354 | |||
1355 | #ifdef CONFIG_COMPAT | 218 | #ifdef CONFIG_COMPAT |
1356 | COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, | 219 | COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, |
1357 | compat_ulong_t, nr_segments, | 220 | compat_ulong_t, nr_segments, |
@@ -1390,1391 +253,3 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, | |||
1390 | return sys_kexec_load(entry, nr_segments, ksegments, flags); | 253 | return sys_kexec_load(entry, nr_segments, ksegments, flags); |
1391 | } | 254 | } |
1392 | #endif | 255 | #endif |
1393 | |||
1394 | #ifdef CONFIG_KEXEC_FILE | ||
1395 | SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, | ||
1396 | unsigned long, cmdline_len, const char __user *, cmdline_ptr, | ||
1397 | unsigned long, flags) | ||
1398 | { | ||
1399 | int ret = 0, i; | ||
1400 | struct kimage **dest_image, *image; | ||
1401 | |||
1402 | /* We only trust the superuser with rebooting the system. */ | ||
1403 | if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) | ||
1404 | return -EPERM; | ||
1405 | |||
1406 | /* Make sure we have a legal set of flags */ | ||
1407 | if (flags != (flags & KEXEC_FILE_FLAGS)) | ||
1408 | return -EINVAL; | ||
1409 | |||
1410 | image = NULL; | ||
1411 | |||
1412 | if (!mutex_trylock(&kexec_mutex)) | ||
1413 | return -EBUSY; | ||
1414 | |||
1415 | dest_image = &kexec_image; | ||
1416 | if (flags & KEXEC_FILE_ON_CRASH) | ||
1417 | dest_image = &kexec_crash_image; | ||
1418 | |||
1419 | if (flags & KEXEC_FILE_UNLOAD) | ||
1420 | goto exchange; | ||
1421 | |||
1422 | /* | ||
1423 | * In case of crash, new kernel gets loaded in reserved region. It is | ||
1424 | * same memory where old crash kernel might be loaded. Free any | ||
1425 | * current crash dump kernel before we corrupt it. | ||
1426 | */ | ||
1427 | if (flags & KEXEC_FILE_ON_CRASH) | ||
1428 | kimage_free(xchg(&kexec_crash_image, NULL)); | ||
1429 | |||
1430 | ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr, | ||
1431 | cmdline_len, flags); | ||
1432 | if (ret) | ||
1433 | goto out; | ||
1434 | |||
1435 | ret = machine_kexec_prepare(image); | ||
1436 | if (ret) | ||
1437 | goto out; | ||
1438 | |||
1439 | ret = kexec_calculate_store_digests(image); | ||
1440 | if (ret) | ||
1441 | goto out; | ||
1442 | |||
1443 | for (i = 0; i < image->nr_segments; i++) { | ||
1444 | struct kexec_segment *ksegment; | ||
1445 | |||
1446 | ksegment = &image->segment[i]; | ||
1447 | pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", | ||
1448 | i, ksegment->buf, ksegment->bufsz, ksegment->mem, | ||
1449 | ksegment->memsz); | ||
1450 | |||
1451 | ret = kimage_load_segment(image, &image->segment[i]); | ||
1452 | if (ret) | ||
1453 | goto out; | ||
1454 | } | ||
1455 | |||
1456 | kimage_terminate(image); | ||
1457 | |||
1458 | /* | ||
1459 | * Free up any temporary buffers allocated which are not needed | ||
1460 | * after image has been loaded | ||
1461 | */ | ||
1462 | kimage_file_post_load_cleanup(image); | ||
1463 | exchange: | ||
1464 | image = xchg(dest_image, image); | ||
1465 | out: | ||
1466 | mutex_unlock(&kexec_mutex); | ||
1467 | kimage_free(image); | ||
1468 | return ret; | ||
1469 | } | ||
1470 | |||
1471 | #endif /* CONFIG_KEXEC_FILE */ | ||
1472 | |||
1473 | void crash_kexec(struct pt_regs *regs) | ||
1474 | { | ||
1475 | /* Take the kexec_mutex here to prevent sys_kexec_load | ||
1476 | * running on one cpu from replacing the crash kernel | ||
1477 | * we are using after a panic on a different cpu. | ||
1478 | * | ||
1479 | * If the crash kernel was not located in a fixed area | ||
1480 | * of memory the xchg(&kexec_crash_image) would be | ||
1481 | * sufficient. But since I reuse the memory... | ||
1482 | */ | ||
1483 | if (mutex_trylock(&kexec_mutex)) { | ||
1484 | if (kexec_crash_image) { | ||
1485 | struct pt_regs fixed_regs; | ||
1486 | |||
1487 | crash_setup_regs(&fixed_regs, regs); | ||
1488 | crash_save_vmcoreinfo(); | ||
1489 | machine_crash_shutdown(&fixed_regs); | ||
1490 | machine_kexec(kexec_crash_image); | ||
1491 | } | ||
1492 | mutex_unlock(&kexec_mutex); | ||
1493 | } | ||
1494 | } | ||
1495 | |||
1496 | size_t crash_get_memory_size(void) | ||
1497 | { | ||
1498 | size_t size = 0; | ||
1499 | mutex_lock(&kexec_mutex); | ||
1500 | if (crashk_res.end != crashk_res.start) | ||
1501 | size = resource_size(&crashk_res); | ||
1502 | mutex_unlock(&kexec_mutex); | ||
1503 | return size; | ||
1504 | } | ||
1505 | |||
1506 | void __weak crash_free_reserved_phys_range(unsigned long begin, | ||
1507 | unsigned long end) | ||
1508 | { | ||
1509 | unsigned long addr; | ||
1510 | |||
1511 | for (addr = begin; addr < end; addr += PAGE_SIZE) | ||
1512 | free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); | ||
1513 | } | ||
1514 | |||
1515 | int crash_shrink_memory(unsigned long new_size) | ||
1516 | { | ||
1517 | int ret = 0; | ||
1518 | unsigned long start, end; | ||
1519 | unsigned long old_size; | ||
1520 | struct resource *ram_res; | ||
1521 | |||
1522 | mutex_lock(&kexec_mutex); | ||
1523 | |||
1524 | if (kexec_crash_image) { | ||
1525 | ret = -ENOENT; | ||
1526 | goto unlock; | ||
1527 | } | ||
1528 | start = crashk_res.start; | ||
1529 | end = crashk_res.end; | ||
1530 | old_size = (end == 0) ? 0 : end - start + 1; | ||
1531 | if (new_size >= old_size) { | ||
1532 | ret = (new_size == old_size) ? 0 : -EINVAL; | ||
1533 | goto unlock; | ||
1534 | } | ||
1535 | |||
1536 | ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); | ||
1537 | if (!ram_res) { | ||
1538 | ret = -ENOMEM; | ||
1539 | goto unlock; | ||
1540 | } | ||
1541 | |||
1542 | start = roundup(start, KEXEC_CRASH_MEM_ALIGN); | ||
1543 | end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); | ||
1544 | |||
1545 | crash_map_reserved_pages(); | ||
1546 | crash_free_reserved_phys_range(end, crashk_res.end); | ||
1547 | |||
1548 | if ((start == end) && (crashk_res.parent != NULL)) | ||
1549 | release_resource(&crashk_res); | ||
1550 | |||
1551 | ram_res->start = end; | ||
1552 | ram_res->end = crashk_res.end; | ||
1553 | ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; | ||
1554 | ram_res->name = "System RAM"; | ||
1555 | |||
1556 | crashk_res.end = end - 1; | ||
1557 | |||
1558 | insert_resource(&iomem_resource, ram_res); | ||
1559 | crash_unmap_reserved_pages(); | ||
1560 | |||
1561 | unlock: | ||
1562 | mutex_unlock(&kexec_mutex); | ||
1563 | return ret; | ||
1564 | } | ||
1565 | |||
1566 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, | ||
1567 | size_t data_len) | ||
1568 | { | ||
1569 | struct elf_note note; | ||
1570 | |||
1571 | note.n_namesz = strlen(name) + 1; | ||
1572 | note.n_descsz = data_len; | ||
1573 | note.n_type = type; | ||
1574 | memcpy(buf, ¬e, sizeof(note)); | ||
1575 | buf += (sizeof(note) + 3)/4; | ||
1576 | memcpy(buf, name, note.n_namesz); | ||
1577 | buf += (note.n_namesz + 3)/4; | ||
1578 | memcpy(buf, data, note.n_descsz); | ||
1579 | buf += (note.n_descsz + 3)/4; | ||
1580 | |||
1581 | return buf; | ||
1582 | } | ||
1583 | |||
1584 | static void final_note(u32 *buf) | ||
1585 | { | ||
1586 | struct elf_note note; | ||
1587 | |||
1588 | note.n_namesz = 0; | ||
1589 | note.n_descsz = 0; | ||
1590 | note.n_type = 0; | ||
1591 | memcpy(buf, ¬e, sizeof(note)); | ||
1592 | } | ||
1593 | |||
1594 | void crash_save_cpu(struct pt_regs *regs, int cpu) | ||
1595 | { | ||
1596 | struct elf_prstatus prstatus; | ||
1597 | u32 *buf; | ||
1598 | |||
1599 | if ((cpu < 0) || (cpu >= nr_cpu_ids)) | ||
1600 | return; | ||
1601 | |||
1602 | /* Using ELF notes here is opportunistic. | ||
1603 | * I need a well defined structure format | ||
1604 | * for the data I pass, and I need tags | ||
1605 | * on the data to indicate what information I have | ||
1606 | * squirrelled away. ELF notes happen to provide | ||
1607 | * all of that, so there is no need to invent something new. | ||
1608 | */ | ||
1609 | buf = (u32 *)per_cpu_ptr(crash_notes, cpu); | ||
1610 | if (!buf) | ||
1611 | return; | ||
1612 | memset(&prstatus, 0, sizeof(prstatus)); | ||
1613 | prstatus.pr_pid = current->pid; | ||
1614 | elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); | ||
1615 | buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, | ||
1616 | &prstatus, sizeof(prstatus)); | ||
1617 | final_note(buf); | ||
1618 | } | ||
1619 | |||
1620 | static int __init crash_notes_memory_init(void) | ||
1621 | { | ||
1622 | /* Allocate memory for saving cpu registers. */ | ||
1623 | crash_notes = alloc_percpu(note_buf_t); | ||
1624 | if (!crash_notes) { | ||
1625 | pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); | ||
1626 | return -ENOMEM; | ||
1627 | } | ||
1628 | return 0; | ||
1629 | } | ||
1630 | subsys_initcall(crash_notes_memory_init); | ||
1631 | |||
1632 | |||
1633 | /* | ||
1634 | * parsing the "crashkernel" commandline | ||
1635 | * | ||
1636 | * this code is intended to be called from architecture specific code | ||
1637 | */ | ||
1638 | |||
1639 | |||
1640 | /* | ||
1641 | * This function parses command lines in the format | ||
1642 | * | ||
1643 | * crashkernel=ramsize-range:size[,...][@offset] | ||
1644 | * | ||
1645 | * The function returns 0 on success and -EINVAL on failure. | ||
1646 | */ | ||
1647 | static int __init parse_crashkernel_mem(char *cmdline, | ||
1648 | unsigned long long system_ram, | ||
1649 | unsigned long long *crash_size, | ||
1650 | unsigned long long *crash_base) | ||
1651 | { | ||
1652 | char *cur = cmdline, *tmp; | ||
1653 | |||
1654 | /* for each entry of the comma-separated list */ | ||
1655 | do { | ||
1656 | unsigned long long start, end = ULLONG_MAX, size; | ||
1657 | |||
1658 | /* get the start of the range */ | ||
1659 | start = memparse(cur, &tmp); | ||
1660 | if (cur == tmp) { | ||
1661 | pr_warn("crashkernel: Memory value expected\n"); | ||
1662 | return -EINVAL; | ||
1663 | } | ||
1664 | cur = tmp; | ||
1665 | if (*cur != '-') { | ||
1666 | pr_warn("crashkernel: '-' expected\n"); | ||
1667 | return -EINVAL; | ||
1668 | } | ||
1669 | cur++; | ||
1670 | |||
1671 | /* if no ':' is here, than we read the end */ | ||
1672 | if (*cur != ':') { | ||
1673 | end = memparse(cur, &tmp); | ||
1674 | if (cur == tmp) { | ||
1675 | pr_warn("crashkernel: Memory value expected\n"); | ||
1676 | return -EINVAL; | ||
1677 | } | ||
1678 | cur = tmp; | ||
1679 | if (end <= start) { | ||
1680 | pr_warn("crashkernel: end <= start\n"); | ||
1681 | return -EINVAL; | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1685 | if (*cur != ':') { | ||
1686 | pr_warn("crashkernel: ':' expected\n"); | ||
1687 | return -EINVAL; | ||
1688 | } | ||
1689 | cur++; | ||
1690 | |||
1691 | size = memparse(cur, &tmp); | ||
1692 | if (cur == tmp) { | ||
1693 | pr_warn("Memory value expected\n"); | ||
1694 | return -EINVAL; | ||
1695 | } | ||
1696 | cur = tmp; | ||
1697 | if (size >= system_ram) { | ||
1698 | pr_warn("crashkernel: invalid size\n"); | ||
1699 | return -EINVAL; | ||
1700 | } | ||
1701 | |||
1702 | /* match ? */ | ||
1703 | if (system_ram >= start && system_ram < end) { | ||
1704 | *crash_size = size; | ||
1705 | break; | ||
1706 | } | ||
1707 | } while (*cur++ == ','); | ||
1708 | |||
1709 | if (*crash_size > 0) { | ||
1710 | while (*cur && *cur != ' ' && *cur != '@') | ||
1711 | cur++; | ||
1712 | if (*cur == '@') { | ||
1713 | cur++; | ||
1714 | *crash_base = memparse(cur, &tmp); | ||
1715 | if (cur == tmp) { | ||
1716 | pr_warn("Memory value expected after '@'\n"); | ||
1717 | return -EINVAL; | ||
1718 | } | ||
1719 | } | ||
1720 | } | ||
1721 | |||
1722 | return 0; | ||
1723 | } | ||
1724 | |||
1725 | /* | ||
1726 | * That function parses "simple" (old) crashkernel command lines like | ||
1727 | * | ||
1728 | * crashkernel=size[@offset] | ||
1729 | * | ||
1730 | * It returns 0 on success and -EINVAL on failure. | ||
1731 | */ | ||
1732 | static int __init parse_crashkernel_simple(char *cmdline, | ||
1733 | unsigned long long *crash_size, | ||
1734 | unsigned long long *crash_base) | ||
1735 | { | ||
1736 | char *cur = cmdline; | ||
1737 | |||
1738 | *crash_size = memparse(cmdline, &cur); | ||
1739 | if (cmdline == cur) { | ||
1740 | pr_warn("crashkernel: memory value expected\n"); | ||
1741 | return -EINVAL; | ||
1742 | } | ||
1743 | |||
1744 | if (*cur == '@') | ||
1745 | *crash_base = memparse(cur+1, &cur); | ||
1746 | else if (*cur != ' ' && *cur != '\0') { | ||
1747 | pr_warn("crashkernel: unrecognized char\n"); | ||
1748 | return -EINVAL; | ||
1749 | } | ||
1750 | |||
1751 | return 0; | ||
1752 | } | ||
1753 | |||
1754 | #define SUFFIX_HIGH 0 | ||
1755 | #define SUFFIX_LOW 1 | ||
1756 | #define SUFFIX_NULL 2 | ||
1757 | static __initdata char *suffix_tbl[] = { | ||
1758 | [SUFFIX_HIGH] = ",high", | ||
1759 | [SUFFIX_LOW] = ",low", | ||
1760 | [SUFFIX_NULL] = NULL, | ||
1761 | }; | ||
1762 | |||
1763 | /* | ||
1764 | * That function parses "suffix" crashkernel command lines like | ||
1765 | * | ||
1766 | * crashkernel=size,[high|low] | ||
1767 | * | ||
1768 | * It returns 0 on success and -EINVAL on failure. | ||
1769 | */ | ||
1770 | static int __init parse_crashkernel_suffix(char *cmdline, | ||
1771 | unsigned long long *crash_size, | ||
1772 | const char *suffix) | ||
1773 | { | ||
1774 | char *cur = cmdline; | ||
1775 | |||
1776 | *crash_size = memparse(cmdline, &cur); | ||
1777 | if (cmdline == cur) { | ||
1778 | pr_warn("crashkernel: memory value expected\n"); | ||
1779 | return -EINVAL; | ||
1780 | } | ||
1781 | |||
1782 | /* check with suffix */ | ||
1783 | if (strncmp(cur, suffix, strlen(suffix))) { | ||
1784 | pr_warn("crashkernel: unrecognized char\n"); | ||
1785 | return -EINVAL; | ||
1786 | } | ||
1787 | cur += strlen(suffix); | ||
1788 | if (*cur != ' ' && *cur != '\0') { | ||
1789 | pr_warn("crashkernel: unrecognized char\n"); | ||
1790 | return -EINVAL; | ||
1791 | } | ||
1792 | |||
1793 | return 0; | ||
1794 | } | ||
1795 | |||
1796 | static __init char *get_last_crashkernel(char *cmdline, | ||
1797 | const char *name, | ||
1798 | const char *suffix) | ||
1799 | { | ||
1800 | char *p = cmdline, *ck_cmdline = NULL; | ||
1801 | |||
1802 | /* find crashkernel and use the last one if there are more */ | ||
1803 | p = strstr(p, name); | ||
1804 | while (p) { | ||
1805 | char *end_p = strchr(p, ' '); | ||
1806 | char *q; | ||
1807 | |||
1808 | if (!end_p) | ||
1809 | end_p = p + strlen(p); | ||
1810 | |||
1811 | if (!suffix) { | ||
1812 | int i; | ||
1813 | |||
1814 | /* skip the one with any known suffix */ | ||
1815 | for (i = 0; suffix_tbl[i]; i++) { | ||
1816 | q = end_p - strlen(suffix_tbl[i]); | ||
1817 | if (!strncmp(q, suffix_tbl[i], | ||
1818 | strlen(suffix_tbl[i]))) | ||
1819 | goto next; | ||
1820 | } | ||
1821 | ck_cmdline = p; | ||
1822 | } else { | ||
1823 | q = end_p - strlen(suffix); | ||
1824 | if (!strncmp(q, suffix, strlen(suffix))) | ||
1825 | ck_cmdline = p; | ||
1826 | } | ||
1827 | next: | ||
1828 | p = strstr(p+1, name); | ||
1829 | } | ||
1830 | |||
1831 | if (!ck_cmdline) | ||
1832 | return NULL; | ||
1833 | |||
1834 | return ck_cmdline; | ||
1835 | } | ||
1836 | |||
1837 | static int __init __parse_crashkernel(char *cmdline, | ||
1838 | unsigned long long system_ram, | ||
1839 | unsigned long long *crash_size, | ||
1840 | unsigned long long *crash_base, | ||
1841 | const char *name, | ||
1842 | const char *suffix) | ||
1843 | { | ||
1844 | char *first_colon, *first_space; | ||
1845 | char *ck_cmdline; | ||
1846 | |||
1847 | BUG_ON(!crash_size || !crash_base); | ||
1848 | *crash_size = 0; | ||
1849 | *crash_base = 0; | ||
1850 | |||
1851 | ck_cmdline = get_last_crashkernel(cmdline, name, suffix); | ||
1852 | |||
1853 | if (!ck_cmdline) | ||
1854 | return -EINVAL; | ||
1855 | |||
1856 | ck_cmdline += strlen(name); | ||
1857 | |||
1858 | if (suffix) | ||
1859 | return parse_crashkernel_suffix(ck_cmdline, crash_size, | ||
1860 | suffix); | ||
1861 | /* | ||
1862 | * if the commandline contains a ':', then that's the extended | ||
1863 | * syntax -- if not, it must be the classic syntax | ||
1864 | */ | ||
1865 | first_colon = strchr(ck_cmdline, ':'); | ||
1866 | first_space = strchr(ck_cmdline, ' '); | ||
1867 | if (first_colon && (!first_space || first_colon < first_space)) | ||
1868 | return parse_crashkernel_mem(ck_cmdline, system_ram, | ||
1869 | crash_size, crash_base); | ||
1870 | |||
1871 | return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); | ||
1872 | } | ||
1873 | |||
1874 | /* | ||
1875 | * That function is the entry point for command line parsing and should be | ||
1876 | * called from the arch-specific code. | ||
1877 | */ | ||
1878 | int __init parse_crashkernel(char *cmdline, | ||
1879 | unsigned long long system_ram, | ||
1880 | unsigned long long *crash_size, | ||
1881 | unsigned long long *crash_base) | ||
1882 | { | ||
1883 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1884 | "crashkernel=", NULL); | ||
1885 | } | ||
1886 | |||
1887 | int __init parse_crashkernel_high(char *cmdline, | ||
1888 | unsigned long long system_ram, | ||
1889 | unsigned long long *crash_size, | ||
1890 | unsigned long long *crash_base) | ||
1891 | { | ||
1892 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1893 | "crashkernel=", suffix_tbl[SUFFIX_HIGH]); | ||
1894 | } | ||
1895 | |||
1896 | int __init parse_crashkernel_low(char *cmdline, | ||
1897 | unsigned long long system_ram, | ||
1898 | unsigned long long *crash_size, | ||
1899 | unsigned long long *crash_base) | ||
1900 | { | ||
1901 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1902 | "crashkernel=", suffix_tbl[SUFFIX_LOW]); | ||
1903 | } | ||
1904 | |||
1905 | static void update_vmcoreinfo_note(void) | ||
1906 | { | ||
1907 | u32 *buf = vmcoreinfo_note; | ||
1908 | |||
1909 | if (!vmcoreinfo_size) | ||
1910 | return; | ||
1911 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, | ||
1912 | vmcoreinfo_size); | ||
1913 | final_note(buf); | ||
1914 | } | ||
1915 | |||
1916 | void crash_save_vmcoreinfo(void) | ||
1917 | { | ||
1918 | vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); | ||
1919 | update_vmcoreinfo_note(); | ||
1920 | } | ||
1921 | |||
1922 | void vmcoreinfo_append_str(const char *fmt, ...) | ||
1923 | { | ||
1924 | va_list args; | ||
1925 | char buf[0x50]; | ||
1926 | size_t r; | ||
1927 | |||
1928 | va_start(args, fmt); | ||
1929 | r = vscnprintf(buf, sizeof(buf), fmt, args); | ||
1930 | va_end(args); | ||
1931 | |||
1932 | r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); | ||
1933 | |||
1934 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); | ||
1935 | |||
1936 | vmcoreinfo_size += r; | ||
1937 | } | ||
1938 | |||
1939 | /* | ||
1940 | * provide an empty default implementation here -- architecture | ||
1941 | * code may override this | ||
1942 | */ | ||
1943 | void __weak arch_crash_save_vmcoreinfo(void) | ||
1944 | {} | ||
1945 | |||
1946 | unsigned long __weak paddr_vmcoreinfo_note(void) | ||
1947 | { | ||
1948 | return __pa((unsigned long)(char *)&vmcoreinfo_note); | ||
1949 | } | ||
1950 | |||
1951 | static int __init crash_save_vmcoreinfo_init(void) | ||
1952 | { | ||
1953 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); | ||
1954 | VMCOREINFO_PAGESIZE(PAGE_SIZE); | ||
1955 | |||
1956 | VMCOREINFO_SYMBOL(init_uts_ns); | ||
1957 | VMCOREINFO_SYMBOL(node_online_map); | ||
1958 | #ifdef CONFIG_MMU | ||
1959 | VMCOREINFO_SYMBOL(swapper_pg_dir); | ||
1960 | #endif | ||
1961 | VMCOREINFO_SYMBOL(_stext); | ||
1962 | VMCOREINFO_SYMBOL(vmap_area_list); | ||
1963 | |||
1964 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
1965 | VMCOREINFO_SYMBOL(mem_map); | ||
1966 | VMCOREINFO_SYMBOL(contig_page_data); | ||
1967 | #endif | ||
1968 | #ifdef CONFIG_SPARSEMEM | ||
1969 | VMCOREINFO_SYMBOL(mem_section); | ||
1970 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); | ||
1971 | VMCOREINFO_STRUCT_SIZE(mem_section); | ||
1972 | VMCOREINFO_OFFSET(mem_section, section_mem_map); | ||
1973 | #endif | ||
1974 | VMCOREINFO_STRUCT_SIZE(page); | ||
1975 | VMCOREINFO_STRUCT_SIZE(pglist_data); | ||
1976 | VMCOREINFO_STRUCT_SIZE(zone); | ||
1977 | VMCOREINFO_STRUCT_SIZE(free_area); | ||
1978 | VMCOREINFO_STRUCT_SIZE(list_head); | ||
1979 | VMCOREINFO_SIZE(nodemask_t); | ||
1980 | VMCOREINFO_OFFSET(page, flags); | ||
1981 | VMCOREINFO_OFFSET(page, _count); | ||
1982 | VMCOREINFO_OFFSET(page, mapping); | ||
1983 | VMCOREINFO_OFFSET(page, lru); | ||
1984 | VMCOREINFO_OFFSET(page, _mapcount); | ||
1985 | VMCOREINFO_OFFSET(page, private); | ||
1986 | VMCOREINFO_OFFSET(pglist_data, node_zones); | ||
1987 | VMCOREINFO_OFFSET(pglist_data, nr_zones); | ||
1988 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
1989 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); | ||
1990 | #endif | ||
1991 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); | ||
1992 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); | ||
1993 | VMCOREINFO_OFFSET(pglist_data, node_id); | ||
1994 | VMCOREINFO_OFFSET(zone, free_area); | ||
1995 | VMCOREINFO_OFFSET(zone, vm_stat); | ||
1996 | VMCOREINFO_OFFSET(zone, spanned_pages); | ||
1997 | VMCOREINFO_OFFSET(free_area, free_list); | ||
1998 | VMCOREINFO_OFFSET(list_head, next); | ||
1999 | VMCOREINFO_OFFSET(list_head, prev); | ||
2000 | VMCOREINFO_OFFSET(vmap_area, va_start); | ||
2001 | VMCOREINFO_OFFSET(vmap_area, list); | ||
2002 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); | ||
2003 | log_buf_kexec_setup(); | ||
2004 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | ||
2005 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | ||
2006 | VMCOREINFO_NUMBER(PG_lru); | ||
2007 | VMCOREINFO_NUMBER(PG_private); | ||
2008 | VMCOREINFO_NUMBER(PG_swapcache); | ||
2009 | VMCOREINFO_NUMBER(PG_slab); | ||
2010 | #ifdef CONFIG_MEMORY_FAILURE | ||
2011 | VMCOREINFO_NUMBER(PG_hwpoison); | ||
2012 | #endif | ||
2013 | VMCOREINFO_NUMBER(PG_head_mask); | ||
2014 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | ||
2015 | #ifdef CONFIG_HUGETLBFS | ||
2016 | VMCOREINFO_SYMBOL(free_huge_page); | ||
2017 | #endif | ||
2018 | |||
2019 | arch_crash_save_vmcoreinfo(); | ||
2020 | update_vmcoreinfo_note(); | ||
2021 | |||
2022 | return 0; | ||
2023 | } | ||
2024 | |||
2025 | subsys_initcall(crash_save_vmcoreinfo_init); | ||
2026 | |||
2027 | #ifdef CONFIG_KEXEC_FILE | ||
2028 | static int locate_mem_hole_top_down(unsigned long start, unsigned long end, | ||
2029 | struct kexec_buf *kbuf) | ||
2030 | { | ||
2031 | struct kimage *image = kbuf->image; | ||
2032 | unsigned long temp_start, temp_end; | ||
2033 | |||
2034 | temp_end = min(end, kbuf->buf_max); | ||
2035 | temp_start = temp_end - kbuf->memsz; | ||
2036 | |||
2037 | do { | ||
2038 | /* align down start */ | ||
2039 | temp_start = temp_start & (~(kbuf->buf_align - 1)); | ||
2040 | |||
2041 | if (temp_start < start || temp_start < kbuf->buf_min) | ||
2042 | return 0; | ||
2043 | |||
2044 | temp_end = temp_start + kbuf->memsz - 1; | ||
2045 | |||
2046 | /* | ||
2047 | * Make sure this does not conflict with any of existing | ||
2048 | * segments | ||
2049 | */ | ||
2050 | if (kimage_is_destination_range(image, temp_start, temp_end)) { | ||
2051 | temp_start = temp_start - PAGE_SIZE; | ||
2052 | continue; | ||
2053 | } | ||
2054 | |||
2055 | /* We found a suitable memory range */ | ||
2056 | break; | ||
2057 | } while (1); | ||
2058 | |||
2059 | /* If we are here, we found a suitable memory range */ | ||
2060 | kbuf->mem = temp_start; | ||
2061 | |||
2062 | /* Success, stop navigating through remaining System RAM ranges */ | ||
2063 | return 1; | ||
2064 | } | ||
2065 | |||
2066 | static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, | ||
2067 | struct kexec_buf *kbuf) | ||
2068 | { | ||
2069 | struct kimage *image = kbuf->image; | ||
2070 | unsigned long temp_start, temp_end; | ||
2071 | |||
2072 | temp_start = max(start, kbuf->buf_min); | ||
2073 | |||
2074 | do { | ||
2075 | temp_start = ALIGN(temp_start, kbuf->buf_align); | ||
2076 | temp_end = temp_start + kbuf->memsz - 1; | ||
2077 | |||
2078 | if (temp_end > end || temp_end > kbuf->buf_max) | ||
2079 | return 0; | ||
2080 | /* | ||
2081 | * Make sure this does not conflict with any of existing | ||
2082 | * segments | ||
2083 | */ | ||
2084 | if (kimage_is_destination_range(image, temp_start, temp_end)) { | ||
2085 | temp_start = temp_start + PAGE_SIZE; | ||
2086 | continue; | ||
2087 | } | ||
2088 | |||
2089 | /* We found a suitable memory range */ | ||
2090 | break; | ||
2091 | } while (1); | ||
2092 | |||
2093 | /* If we are here, we found a suitable memory range */ | ||
2094 | kbuf->mem = temp_start; | ||
2095 | |||
2096 | /* Success, stop navigating through remaining System RAM ranges */ | ||
2097 | return 1; | ||
2098 | } | ||
2099 | |||
2100 | static int locate_mem_hole_callback(u64 start, u64 end, void *arg) | ||
2101 | { | ||
2102 | struct kexec_buf *kbuf = (struct kexec_buf *)arg; | ||
2103 | unsigned long sz = end - start + 1; | ||
2104 | |||
2105 | /* Returning 0 will take to next memory range */ | ||
2106 | if (sz < kbuf->memsz) | ||
2107 | return 0; | ||
2108 | |||
2109 | if (end < kbuf->buf_min || start > kbuf->buf_max) | ||
2110 | return 0; | ||
2111 | |||
2112 | /* | ||
2113 | * Allocate memory top down with-in ram range. Otherwise bottom up | ||
2114 | * allocation. | ||
2115 | */ | ||
2116 | if (kbuf->top_down) | ||
2117 | return locate_mem_hole_top_down(start, end, kbuf); | ||
2118 | return locate_mem_hole_bottom_up(start, end, kbuf); | ||
2119 | } | ||
2120 | |||
2121 | /* | ||
2122 | * Helper function for placing a buffer in a kexec segment. This assumes | ||
2123 | * that kexec_mutex is held. | ||
2124 | */ | ||
2125 | int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, | ||
2126 | unsigned long memsz, unsigned long buf_align, | ||
2127 | unsigned long buf_min, unsigned long buf_max, | ||
2128 | bool top_down, unsigned long *load_addr) | ||
2129 | { | ||
2130 | |||
2131 | struct kexec_segment *ksegment; | ||
2132 | struct kexec_buf buf, *kbuf; | ||
2133 | int ret; | ||
2134 | |||
2135 | /* Currently adding segment this way is allowed only in file mode */ | ||
2136 | if (!image->file_mode) | ||
2137 | return -EINVAL; | ||
2138 | |||
2139 | if (image->nr_segments >= KEXEC_SEGMENT_MAX) | ||
2140 | return -EINVAL; | ||
2141 | |||
2142 | /* | ||
2143 | * Make sure we are not trying to add buffer after allocating | ||
2144 | * control pages. All segments need to be placed first before | ||
2145 | * any control pages are allocated. As control page allocation | ||
2146 | * logic goes through list of segments to make sure there are | ||
2147 | * no destination overlaps. | ||
2148 | */ | ||
2149 | if (!list_empty(&image->control_pages)) { | ||
2150 | WARN_ON(1); | ||
2151 | return -EINVAL; | ||
2152 | } | ||
2153 | |||
2154 | memset(&buf, 0, sizeof(struct kexec_buf)); | ||
2155 | kbuf = &buf; | ||
2156 | kbuf->image = image; | ||
2157 | kbuf->buffer = buffer; | ||
2158 | kbuf->bufsz = bufsz; | ||
2159 | |||
2160 | kbuf->memsz = ALIGN(memsz, PAGE_SIZE); | ||
2161 | kbuf->buf_align = max(buf_align, PAGE_SIZE); | ||
2162 | kbuf->buf_min = buf_min; | ||
2163 | kbuf->buf_max = buf_max; | ||
2164 | kbuf->top_down = top_down; | ||
2165 | |||
2166 | /* Walk the RAM ranges and allocate a suitable range for the buffer */ | ||
2167 | if (image->type == KEXEC_TYPE_CRASH) | ||
2168 | ret = walk_iomem_res("Crash kernel", | ||
2169 | IORESOURCE_MEM | IORESOURCE_BUSY, | ||
2170 | crashk_res.start, crashk_res.end, kbuf, | ||
2171 | locate_mem_hole_callback); | ||
2172 | else | ||
2173 | ret = walk_system_ram_res(0, -1, kbuf, | ||
2174 | locate_mem_hole_callback); | ||
2175 | if (ret != 1) { | ||
2176 | /* A suitable memory range could not be found for buffer */ | ||
2177 | return -EADDRNOTAVAIL; | ||
2178 | } | ||
2179 | |||
2180 | /* Found a suitable memory range */ | ||
2181 | ksegment = &image->segment[image->nr_segments]; | ||
2182 | ksegment->kbuf = kbuf->buffer; | ||
2183 | ksegment->bufsz = kbuf->bufsz; | ||
2184 | ksegment->mem = kbuf->mem; | ||
2185 | ksegment->memsz = kbuf->memsz; | ||
2186 | image->nr_segments++; | ||
2187 | *load_addr = ksegment->mem; | ||
2188 | return 0; | ||
2189 | } | ||
2190 | |||
2191 | /* Calculate and store the digest of segments */ | ||
2192 | static int kexec_calculate_store_digests(struct kimage *image) | ||
2193 | { | ||
2194 | struct crypto_shash *tfm; | ||
2195 | struct shash_desc *desc; | ||
2196 | int ret = 0, i, j, zero_buf_sz, sha_region_sz; | ||
2197 | size_t desc_size, nullsz; | ||
2198 | char *digest; | ||
2199 | void *zero_buf; | ||
2200 | struct kexec_sha_region *sha_regions; | ||
2201 | struct purgatory_info *pi = &image->purgatory_info; | ||
2202 | |||
2203 | zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT); | ||
2204 | zero_buf_sz = PAGE_SIZE; | ||
2205 | |||
2206 | tfm = crypto_alloc_shash("sha256", 0, 0); | ||
2207 | if (IS_ERR(tfm)) { | ||
2208 | ret = PTR_ERR(tfm); | ||
2209 | goto out; | ||
2210 | } | ||
2211 | |||
2212 | desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); | ||
2213 | desc = kzalloc(desc_size, GFP_KERNEL); | ||
2214 | if (!desc) { | ||
2215 | ret = -ENOMEM; | ||
2216 | goto out_free_tfm; | ||
2217 | } | ||
2218 | |||
2219 | sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); | ||
2220 | sha_regions = vzalloc(sha_region_sz); | ||
2221 | if (!sha_regions) | ||
2222 | goto out_free_desc; | ||
2223 | |||
2224 | desc->tfm = tfm; | ||
2225 | desc->flags = 0; | ||
2226 | |||
2227 | ret = crypto_shash_init(desc); | ||
2228 | if (ret < 0) | ||
2229 | goto out_free_sha_regions; | ||
2230 | |||
2231 | digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); | ||
2232 | if (!digest) { | ||
2233 | ret = -ENOMEM; | ||
2234 | goto out_free_sha_regions; | ||
2235 | } | ||
2236 | |||
2237 | for (j = i = 0; i < image->nr_segments; i++) { | ||
2238 | struct kexec_segment *ksegment; | ||
2239 | |||
2240 | ksegment = &image->segment[i]; | ||
2241 | /* | ||
2242 | * Skip purgatory as it will be modified once we put digest | ||
2243 | * info in purgatory. | ||
2244 | */ | ||
2245 | if (ksegment->kbuf == pi->purgatory_buf) | ||
2246 | continue; | ||
2247 | |||
2248 | ret = crypto_shash_update(desc, ksegment->kbuf, | ||
2249 | ksegment->bufsz); | ||
2250 | if (ret) | ||
2251 | break; | ||
2252 | |||
2253 | /* | ||
2254 | * Assume rest of the buffer is filled with zero and | ||
2255 | * update digest accordingly. | ||
2256 | */ | ||
2257 | nullsz = ksegment->memsz - ksegment->bufsz; | ||
2258 | while (nullsz) { | ||
2259 | unsigned long bytes = nullsz; | ||
2260 | |||
2261 | if (bytes > zero_buf_sz) | ||
2262 | bytes = zero_buf_sz; | ||
2263 | ret = crypto_shash_update(desc, zero_buf, bytes); | ||
2264 | if (ret) | ||
2265 | break; | ||
2266 | nullsz -= bytes; | ||
2267 | } | ||
2268 | |||
2269 | if (ret) | ||
2270 | break; | ||
2271 | |||
2272 | sha_regions[j].start = ksegment->mem; | ||
2273 | sha_regions[j].len = ksegment->memsz; | ||
2274 | j++; | ||
2275 | } | ||
2276 | |||
2277 | if (!ret) { | ||
2278 | ret = crypto_shash_final(desc, digest); | ||
2279 | if (ret) | ||
2280 | goto out_free_digest; | ||
2281 | ret = kexec_purgatory_get_set_symbol(image, "sha_regions", | ||
2282 | sha_regions, sha_region_sz, 0); | ||
2283 | if (ret) | ||
2284 | goto out_free_digest; | ||
2285 | |||
2286 | ret = kexec_purgatory_get_set_symbol(image, "sha256_digest", | ||
2287 | digest, SHA256_DIGEST_SIZE, 0); | ||
2288 | if (ret) | ||
2289 | goto out_free_digest; | ||
2290 | } | ||
2291 | |||
2292 | out_free_digest: | ||
2293 | kfree(digest); | ||
2294 | out_free_sha_regions: | ||
2295 | vfree(sha_regions); | ||
2296 | out_free_desc: | ||
2297 | kfree(desc); | ||
2298 | out_free_tfm: | ||
2299 | kfree(tfm); | ||
2300 | out: | ||
2301 | return ret; | ||
2302 | } | ||
2303 | |||
2304 | /* Actually load purgatory. Lot of code taken from kexec-tools */ | ||
2305 | static int __kexec_load_purgatory(struct kimage *image, unsigned long min, | ||
2306 | unsigned long max, int top_down) | ||
2307 | { | ||
2308 | struct purgatory_info *pi = &image->purgatory_info; | ||
2309 | unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad; | ||
2310 | unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset; | ||
2311 | unsigned char *buf_addr, *src; | ||
2312 | int i, ret = 0, entry_sidx = -1; | ||
2313 | const Elf_Shdr *sechdrs_c; | ||
2314 | Elf_Shdr *sechdrs = NULL; | ||
2315 | void *purgatory_buf = NULL; | ||
2316 | |||
2317 | /* | ||
2318 | * sechdrs_c points to section headers in purgatory and are read | ||
2319 | * only. No modifications allowed. | ||
2320 | */ | ||
2321 | sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff; | ||
2322 | |||
2323 | /* | ||
2324 | * We can not modify sechdrs_c[] and its fields. It is read only. | ||
2325 | * Copy it over to a local copy where one can store some temporary | ||
2326 | * data and free it at the end. We need to modify ->sh_addr and | ||
2327 | * ->sh_offset fields to keep track of permanent and temporary | ||
2328 | * locations of sections. | ||
2329 | */ | ||
2330 | sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr)); | ||
2331 | if (!sechdrs) | ||
2332 | return -ENOMEM; | ||
2333 | |||
2334 | memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr)); | ||
2335 | |||
2336 | /* | ||
2337 | * We seem to have multiple copies of sections. First copy is which | ||
2338 | * is embedded in kernel in read only section. Some of these sections | ||
2339 | * will be copied to a temporary buffer and relocated. And these | ||
2340 | * sections will finally be copied to their final destination at | ||
2341 | * segment load time. | ||
2342 | * | ||
2343 | * Use ->sh_offset to reflect section address in memory. It will | ||
2344 | * point to original read only copy if section is not allocatable. | ||
2345 | * Otherwise it will point to temporary copy which will be relocated. | ||
2346 | * | ||
2347 | * Use ->sh_addr to contain final address of the section where it | ||
2348 | * will go during execution time. | ||
2349 | */ | ||
2350 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
2351 | if (sechdrs[i].sh_type == SHT_NOBITS) | ||
2352 | continue; | ||
2353 | |||
2354 | sechdrs[i].sh_offset = (unsigned long)pi->ehdr + | ||
2355 | sechdrs[i].sh_offset; | ||
2356 | } | ||
2357 | |||
2358 | /* | ||
2359 | * Identify entry point section and make entry relative to section | ||
2360 | * start. | ||
2361 | */ | ||
2362 | entry = pi->ehdr->e_entry; | ||
2363 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
2364 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | ||
2365 | continue; | ||
2366 | |||
2367 | if (!(sechdrs[i].sh_flags & SHF_EXECINSTR)) | ||
2368 | continue; | ||
2369 | |||
2370 | /* Make entry section relative */ | ||
2371 | if (sechdrs[i].sh_addr <= pi->ehdr->e_entry && | ||
2372 | ((sechdrs[i].sh_addr + sechdrs[i].sh_size) > | ||
2373 | pi->ehdr->e_entry)) { | ||
2374 | entry_sidx = i; | ||
2375 | entry -= sechdrs[i].sh_addr; | ||
2376 | break; | ||
2377 | } | ||
2378 | } | ||
2379 | |||
2380 | /* Determine how much memory is needed to load relocatable object. */ | ||
2381 | buf_align = 1; | ||
2382 | bss_align = 1; | ||
2383 | buf_sz = 0; | ||
2384 | bss_sz = 0; | ||
2385 | |||
2386 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
2387 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | ||
2388 | continue; | ||
2389 | |||
2390 | align = sechdrs[i].sh_addralign; | ||
2391 | if (sechdrs[i].sh_type != SHT_NOBITS) { | ||
2392 | if (buf_align < align) | ||
2393 | buf_align = align; | ||
2394 | buf_sz = ALIGN(buf_sz, align); | ||
2395 | buf_sz += sechdrs[i].sh_size; | ||
2396 | } else { | ||
2397 | /* bss section */ | ||
2398 | if (bss_align < align) | ||
2399 | bss_align = align; | ||
2400 | bss_sz = ALIGN(bss_sz, align); | ||
2401 | bss_sz += sechdrs[i].sh_size; | ||
2402 | } | ||
2403 | } | ||
2404 | |||
2405 | /* Determine the bss padding required to align bss properly */ | ||
2406 | bss_pad = 0; | ||
2407 | if (buf_sz & (bss_align - 1)) | ||
2408 | bss_pad = bss_align - (buf_sz & (bss_align - 1)); | ||
2409 | |||
2410 | memsz = buf_sz + bss_pad + bss_sz; | ||
2411 | |||
2412 | /* Allocate buffer for purgatory */ | ||
2413 | purgatory_buf = vzalloc(buf_sz); | ||
2414 | if (!purgatory_buf) { | ||
2415 | ret = -ENOMEM; | ||
2416 | goto out; | ||
2417 | } | ||
2418 | |||
2419 | if (buf_align < bss_align) | ||
2420 | buf_align = bss_align; | ||
2421 | |||
2422 | /* Add buffer to segment list */ | ||
2423 | ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz, | ||
2424 | buf_align, min, max, top_down, | ||
2425 | &pi->purgatory_load_addr); | ||
2426 | if (ret) | ||
2427 | goto out; | ||
2428 | |||
2429 | /* Load SHF_ALLOC sections */ | ||
2430 | buf_addr = purgatory_buf; | ||
2431 | load_addr = curr_load_addr = pi->purgatory_load_addr; | ||
2432 | bss_addr = load_addr + buf_sz + bss_pad; | ||
2433 | |||
2434 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
2435 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | ||
2436 | continue; | ||
2437 | |||
2438 | align = sechdrs[i].sh_addralign; | ||
2439 | if (sechdrs[i].sh_type != SHT_NOBITS) { | ||
2440 | curr_load_addr = ALIGN(curr_load_addr, align); | ||
2441 | offset = curr_load_addr - load_addr; | ||
2442 | /* We already modifed ->sh_offset to keep src addr */ | ||
2443 | src = (char *) sechdrs[i].sh_offset; | ||
2444 | memcpy(buf_addr + offset, src, sechdrs[i].sh_size); | ||
2445 | |||
2446 | /* Store load address and source address of section */ | ||
2447 | sechdrs[i].sh_addr = curr_load_addr; | ||
2448 | |||
2449 | /* | ||
2450 | * This section got copied to temporary buffer. Update | ||
2451 | * ->sh_offset accordingly. | ||
2452 | */ | ||
2453 | sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset); | ||
2454 | |||
2455 | /* Advance to the next address */ | ||
2456 | curr_load_addr += sechdrs[i].sh_size; | ||
2457 | } else { | ||
2458 | bss_addr = ALIGN(bss_addr, align); | ||
2459 | sechdrs[i].sh_addr = bss_addr; | ||
2460 | bss_addr += sechdrs[i].sh_size; | ||
2461 | } | ||
2462 | } | ||
2463 | |||
2464 | /* Update entry point based on load address of text section */ | ||
2465 | if (entry_sidx >= 0) | ||
2466 | entry += sechdrs[entry_sidx].sh_addr; | ||
2467 | |||
2468 | /* Make kernel jump to purgatory after shutdown */ | ||
2469 | image->start = entry; | ||
2470 | |||
2471 | /* Used later to get/set symbol values */ | ||
2472 | pi->sechdrs = sechdrs; | ||
2473 | |||
2474 | /* | ||
2475 | * Used later to identify which section is purgatory and skip it | ||
2476 | * from checksumming. | ||
2477 | */ | ||
2478 | pi->purgatory_buf = purgatory_buf; | ||
2479 | return ret; | ||
2480 | out: | ||
2481 | vfree(sechdrs); | ||
2482 | vfree(purgatory_buf); | ||
2483 | return ret; | ||
2484 | } | ||
2485 | |||
2486 | static int kexec_apply_relocations(struct kimage *image) | ||
2487 | { | ||
2488 | int i, ret; | ||
2489 | struct purgatory_info *pi = &image->purgatory_info; | ||
2490 | Elf_Shdr *sechdrs = pi->sechdrs; | ||
2491 | |||
2492 | /* Apply relocations */ | ||
2493 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
2494 | Elf_Shdr *section, *symtab; | ||
2495 | |||
2496 | if (sechdrs[i].sh_type != SHT_RELA && | ||
2497 | sechdrs[i].sh_type != SHT_REL) | ||
2498 | continue; | ||
2499 | |||
2500 | /* | ||
2501 | * For section of type SHT_RELA/SHT_REL, | ||
2502 | * ->sh_link contains section header index of associated | ||
2503 | * symbol table. And ->sh_info contains section header | ||
2504 | * index of section to which relocations apply. | ||
2505 | */ | ||
2506 | if (sechdrs[i].sh_info >= pi->ehdr->e_shnum || | ||
2507 | sechdrs[i].sh_link >= pi->ehdr->e_shnum) | ||
2508 | return -ENOEXEC; | ||
2509 | |||
2510 | section = &sechdrs[sechdrs[i].sh_info]; | ||
2511 | symtab = &sechdrs[sechdrs[i].sh_link]; | ||
2512 | |||
2513 | if (!(section->sh_flags & SHF_ALLOC)) | ||
2514 | continue; | ||
2515 | |||
2516 | /* | ||
2517 | * symtab->sh_link contain section header index of associated | ||
2518 | * string table. | ||
2519 | */ | ||
2520 | if (symtab->sh_link >= pi->ehdr->e_shnum) | ||
2521 | /* Invalid section number? */ | ||
2522 | continue; | ||
2523 | |||
2524 | /* | ||
2525 | * Respective architecture needs to provide support for applying | ||
2526 | * relocations of type SHT_RELA/SHT_REL. | ||
2527 | */ | ||
2528 | if (sechdrs[i].sh_type == SHT_RELA) | ||
2529 | ret = arch_kexec_apply_relocations_add(pi->ehdr, | ||
2530 | sechdrs, i); | ||
2531 | else if (sechdrs[i].sh_type == SHT_REL) | ||
2532 | ret = arch_kexec_apply_relocations(pi->ehdr, | ||
2533 | sechdrs, i); | ||
2534 | if (ret) | ||
2535 | return ret; | ||
2536 | } | ||
2537 | |||
2538 | return 0; | ||
2539 | } | ||
2540 | |||
2541 | /* Load relocatable purgatory object and relocate it appropriately */ | ||
2542 | int kexec_load_purgatory(struct kimage *image, unsigned long min, | ||
2543 | unsigned long max, int top_down, | ||
2544 | unsigned long *load_addr) | ||
2545 | { | ||
2546 | struct purgatory_info *pi = &image->purgatory_info; | ||
2547 | int ret; | ||
2548 | |||
2549 | if (kexec_purgatory_size <= 0) | ||
2550 | return -EINVAL; | ||
2551 | |||
2552 | if (kexec_purgatory_size < sizeof(Elf_Ehdr)) | ||
2553 | return -ENOEXEC; | ||
2554 | |||
2555 | pi->ehdr = (Elf_Ehdr *)kexec_purgatory; | ||
2556 | |||
2557 | if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0 | ||
2558 | || pi->ehdr->e_type != ET_REL | ||
2559 | || !elf_check_arch(pi->ehdr) | ||
2560 | || pi->ehdr->e_shentsize != sizeof(Elf_Shdr)) | ||
2561 | return -ENOEXEC; | ||
2562 | |||
2563 | if (pi->ehdr->e_shoff >= kexec_purgatory_size | ||
2564 | || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) > | ||
2565 | kexec_purgatory_size - pi->ehdr->e_shoff)) | ||
2566 | return -ENOEXEC; | ||
2567 | |||
2568 | ret = __kexec_load_purgatory(image, min, max, top_down); | ||
2569 | if (ret) | ||
2570 | return ret; | ||
2571 | |||
2572 | ret = kexec_apply_relocations(image); | ||
2573 | if (ret) | ||
2574 | goto out; | ||
2575 | |||
2576 | *load_addr = pi->purgatory_load_addr; | ||
2577 | return 0; | ||
2578 | out: | ||
2579 | vfree(pi->sechdrs); | ||
2580 | vfree(pi->purgatory_buf); | ||
2581 | return ret; | ||
2582 | } | ||
2583 | |||
2584 | static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi, | ||
2585 | const char *name) | ||
2586 | { | ||
2587 | Elf_Sym *syms; | ||
2588 | Elf_Shdr *sechdrs; | ||
2589 | Elf_Ehdr *ehdr; | ||
2590 | int i, k; | ||
2591 | const char *strtab; | ||
2592 | |||
2593 | if (!pi->sechdrs || !pi->ehdr) | ||
2594 | return NULL; | ||
2595 | |||
2596 | sechdrs = pi->sechdrs; | ||
2597 | ehdr = pi->ehdr; | ||
2598 | |||
2599 | for (i = 0; i < ehdr->e_shnum; i++) { | ||
2600 | if (sechdrs[i].sh_type != SHT_SYMTAB) | ||
2601 | continue; | ||
2602 | |||
2603 | if (sechdrs[i].sh_link >= ehdr->e_shnum) | ||
2604 | /* Invalid strtab section number */ | ||
2605 | continue; | ||
2606 | strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset; | ||
2607 | syms = (Elf_Sym *)sechdrs[i].sh_offset; | ||
2608 | |||
2609 | /* Go through symbols for a match */ | ||
2610 | for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { | ||
2611 | if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) | ||
2612 | continue; | ||
2613 | |||
2614 | if (strcmp(strtab + syms[k].st_name, name) != 0) | ||
2615 | continue; | ||
2616 | |||
2617 | if (syms[k].st_shndx == SHN_UNDEF || | ||
2618 | syms[k].st_shndx >= ehdr->e_shnum) { | ||
2619 | pr_debug("Symbol: %s has bad section index %d.\n", | ||
2620 | name, syms[k].st_shndx); | ||
2621 | return NULL; | ||
2622 | } | ||
2623 | |||
2624 | /* Found the symbol we are looking for */ | ||
2625 | return &syms[k]; | ||
2626 | } | ||
2627 | } | ||
2628 | |||
2629 | return NULL; | ||
2630 | } | ||
2631 | |||
2632 | void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name) | ||
2633 | { | ||
2634 | struct purgatory_info *pi = &image->purgatory_info; | ||
2635 | Elf_Sym *sym; | ||
2636 | Elf_Shdr *sechdr; | ||
2637 | |||
2638 | sym = kexec_purgatory_find_symbol(pi, name); | ||
2639 | if (!sym) | ||
2640 | return ERR_PTR(-EINVAL); | ||
2641 | |||
2642 | sechdr = &pi->sechdrs[sym->st_shndx]; | ||
2643 | |||
2644 | /* | ||
2645 | * Returns the address where symbol will finally be loaded after | ||
2646 | * kexec_load_segment() | ||
2647 | */ | ||
2648 | return (void *)(sechdr->sh_addr + sym->st_value); | ||
2649 | } | ||
2650 | |||
2651 | /* | ||
2652 | * Get or set value of a symbol. If "get_value" is true, symbol value is | ||
2653 | * returned in buf otherwise symbol value is set based on value in buf. | ||
2654 | */ | ||
2655 | int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, | ||
2656 | void *buf, unsigned int size, bool get_value) | ||
2657 | { | ||
2658 | Elf_Sym *sym; | ||
2659 | Elf_Shdr *sechdrs; | ||
2660 | struct purgatory_info *pi = &image->purgatory_info; | ||
2661 | char *sym_buf; | ||
2662 | |||
2663 | sym = kexec_purgatory_find_symbol(pi, name); | ||
2664 | if (!sym) | ||
2665 | return -EINVAL; | ||
2666 | |||
2667 | if (sym->st_size != size) { | ||
2668 | pr_err("symbol %s size mismatch: expected %lu actual %u\n", | ||
2669 | name, (unsigned long)sym->st_size, size); | ||
2670 | return -EINVAL; | ||
2671 | } | ||
2672 | |||
2673 | sechdrs = pi->sechdrs; | ||
2674 | |||
2675 | if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { | ||
2676 | pr_err("symbol %s is in a bss section. Cannot %s\n", name, | ||
2677 | get_value ? "get" : "set"); | ||
2678 | return -EINVAL; | ||
2679 | } | ||
2680 | |||
2681 | sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset + | ||
2682 | sym->st_value; | ||
2683 | |||
2684 | if (get_value) | ||
2685 | memcpy((void *)buf, sym_buf, size); | ||
2686 | else | ||
2687 | memcpy((void *)sym_buf, buf, size); | ||
2688 | |||
2689 | return 0; | ||
2690 | } | ||
2691 | #endif /* CONFIG_KEXEC_FILE */ | ||
2692 | |||
2693 | /* | ||
2694 | * Move into place and start executing a preloaded standalone | ||
2695 | * executable. If nothing was preloaded return an error. | ||
2696 | */ | ||
2697 | int kernel_kexec(void) | ||
2698 | { | ||
2699 | int error = 0; | ||
2700 | |||
2701 | if (!mutex_trylock(&kexec_mutex)) | ||
2702 | return -EBUSY; | ||
2703 | if (!kexec_image) { | ||
2704 | error = -EINVAL; | ||
2705 | goto Unlock; | ||
2706 | } | ||
2707 | |||
2708 | #ifdef CONFIG_KEXEC_JUMP | ||
2709 | if (kexec_image->preserve_context) { | ||
2710 | lock_system_sleep(); | ||
2711 | pm_prepare_console(); | ||
2712 | error = freeze_processes(); | ||
2713 | if (error) { | ||
2714 | error = -EBUSY; | ||
2715 | goto Restore_console; | ||
2716 | } | ||
2717 | suspend_console(); | ||
2718 | error = dpm_suspend_start(PMSG_FREEZE); | ||
2719 | if (error) | ||
2720 | goto Resume_console; | ||
2721 | /* At this point, dpm_suspend_start() has been called, | ||
2722 | * but *not* dpm_suspend_end(). We *must* call | ||
2723 | * dpm_suspend_end() now. Otherwise, drivers for | ||
2724 | * some devices (e.g. interrupt controllers) become | ||
2725 | * desynchronized with the actual state of the | ||
2726 | * hardware at resume time, and evil weirdness ensues. | ||
2727 | */ | ||
2728 | error = dpm_suspend_end(PMSG_FREEZE); | ||
2729 | if (error) | ||
2730 | goto Resume_devices; | ||
2731 | error = disable_nonboot_cpus(); | ||
2732 | if (error) | ||
2733 | goto Enable_cpus; | ||
2734 | local_irq_disable(); | ||
2735 | error = syscore_suspend(); | ||
2736 | if (error) | ||
2737 | goto Enable_irqs; | ||
2738 | } else | ||
2739 | #endif | ||
2740 | { | ||
2741 | kexec_in_progress = true; | ||
2742 | kernel_restart_prepare(NULL); | ||
2743 | migrate_to_reboot_cpu(); | ||
2744 | |||
2745 | /* | ||
2746 | * migrate_to_reboot_cpu() disables CPU hotplug assuming that | ||
2747 | * no further code needs to use CPU hotplug (which is true in | ||
2748 | * the reboot case). However, the kexec path depends on using | ||
2749 | * CPU hotplug again; so re-enable it here. | ||
2750 | */ | ||
2751 | cpu_hotplug_enable(); | ||
2752 | pr_emerg("Starting new kernel\n"); | ||
2753 | machine_shutdown(); | ||
2754 | } | ||
2755 | |||
2756 | machine_kexec(kexec_image); | ||
2757 | |||
2758 | #ifdef CONFIG_KEXEC_JUMP | ||
2759 | if (kexec_image->preserve_context) { | ||
2760 | syscore_resume(); | ||
2761 | Enable_irqs: | ||
2762 | local_irq_enable(); | ||
2763 | Enable_cpus: | ||
2764 | enable_nonboot_cpus(); | ||
2765 | dpm_resume_start(PMSG_RESTORE); | ||
2766 | Resume_devices: | ||
2767 | dpm_resume_end(PMSG_RESTORE); | ||
2768 | Resume_console: | ||
2769 | resume_console(); | ||
2770 | thaw_processes(); | ||
2771 | Restore_console: | ||
2772 | pm_restore_console(); | ||
2773 | unlock_system_sleep(); | ||
2774 | } | ||
2775 | #endif | ||
2776 | |||
2777 | Unlock: | ||
2778 | mutex_unlock(&kexec_mutex); | ||
2779 | return error; | ||
2780 | } | ||
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c new file mode 100644 index 000000000000..201b45327804 --- /dev/null +++ b/kernel/kexec_core.c | |||
@@ -0,0 +1,1534 @@ | |||
1 | /* | ||
2 | * kexec.c - kexec system call core code. | ||
3 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> | ||
4 | * | ||
5 | * This source code is licensed under the GNU General Public License, | ||
6 | * Version 2. See the file COPYING for more details. | ||
7 | */ | ||
8 | |||
9 | #define pr_fmt(fmt) "kexec: " fmt | ||
10 | |||
11 | #include <linux/capability.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/file.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/kexec.h> | ||
17 | #include <linux/mutex.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <linux/highmem.h> | ||
20 | #include <linux/syscalls.h> | ||
21 | #include <linux/reboot.h> | ||
22 | #include <linux/ioport.h> | ||
23 | #include <linux/hardirq.h> | ||
24 | #include <linux/elf.h> | ||
25 | #include <linux/elfcore.h> | ||
26 | #include <linux/utsname.h> | ||
27 | #include <linux/numa.h> | ||
28 | #include <linux/suspend.h> | ||
29 | #include <linux/device.h> | ||
30 | #include <linux/freezer.h> | ||
31 | #include <linux/pm.h> | ||
32 | #include <linux/cpu.h> | ||
33 | #include <linux/uaccess.h> | ||
34 | #include <linux/io.h> | ||
35 | #include <linux/console.h> | ||
36 | #include <linux/vmalloc.h> | ||
37 | #include <linux/swap.h> | ||
38 | #include <linux/syscore_ops.h> | ||
39 | #include <linux/compiler.h> | ||
40 | #include <linux/hugetlb.h> | ||
41 | |||
42 | #include <asm/page.h> | ||
43 | #include <asm/sections.h> | ||
44 | |||
45 | #include <crypto/hash.h> | ||
46 | #include <crypto/sha.h> | ||
47 | #include "kexec_internal.h" | ||
48 | |||
49 | DEFINE_MUTEX(kexec_mutex); | ||
50 | |||
51 | /* Per cpu memory for storing cpu states in case of system crash. */ | ||
52 | note_buf_t __percpu *crash_notes; | ||
53 | |||
54 | /* vmcoreinfo stuff */ | ||
55 | static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; | ||
56 | u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | ||
57 | size_t vmcoreinfo_size; | ||
58 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); | ||
59 | |||
60 | /* Flag to indicate we are going to kexec a new kernel */ | ||
61 | bool kexec_in_progress = false; | ||
62 | |||
63 | |||
64 | /* Location of the reserved area for the crash kernel */ | ||
65 | struct resource crashk_res = { | ||
66 | .name = "Crash kernel", | ||
67 | .start = 0, | ||
68 | .end = 0, | ||
69 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
70 | }; | ||
71 | struct resource crashk_low_res = { | ||
72 | .name = "Crash kernel", | ||
73 | .start = 0, | ||
74 | .end = 0, | ||
75 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
76 | }; | ||
77 | |||
78 | int kexec_should_crash(struct task_struct *p) | ||
79 | { | ||
80 | /* | ||
81 | * If crash_kexec_post_notifiers is enabled, don't run | ||
82 | * crash_kexec() here yet, which must be run after panic | ||
83 | * notifiers in panic(). | ||
84 | */ | ||
85 | if (crash_kexec_post_notifiers) | ||
86 | return 0; | ||
87 | /* | ||
88 | * There are 4 panic() calls in do_exit() path, each of which | ||
89 | * corresponds to each of these 4 conditions. | ||
90 | */ | ||
91 | if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) | ||
92 | return 1; | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * When kexec transitions to the new kernel there is a one-to-one | ||
98 | * mapping between physical and virtual addresses. On processors | ||
99 | * where you can disable the MMU this is trivial, and easy. For | ||
100 | * others it is still a simple predictable page table to setup. | ||
101 | * | ||
102 | * In that environment kexec copies the new kernel to its final | ||
103 | * resting place. This means I can only support memory whose | ||
104 | * physical address can fit in an unsigned long. In particular | ||
105 | * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. | ||
106 | * If the assembly stub has more restrictive requirements | ||
107 | * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be | ||
108 | * defined more restrictively in <asm/kexec.h>. | ||
109 | * | ||
110 | * The code for the transition from the current kernel to the | ||
111 | * the new kernel is placed in the control_code_buffer, whose size | ||
112 | * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single | ||
113 | * page of memory is necessary, but some architectures require more. | ||
114 | * Because this memory must be identity mapped in the transition from | ||
115 | * virtual to physical addresses it must live in the range | ||
116 | * 0 - TASK_SIZE, as only the user space mappings are arbitrarily | ||
117 | * modifiable. | ||
118 | * | ||
119 | * The assembly stub in the control code buffer is passed a linked list | ||
120 | * of descriptor pages detailing the source pages of the new kernel, | ||
121 | * and the destination addresses of those source pages. As this data | ||
122 | * structure is not used in the context of the current OS, it must | ||
123 | * be self-contained. | ||
124 | * | ||
125 | * The code has been made to work with highmem pages and will use a | ||
126 | * destination page in its final resting place (if it happens | ||
127 | * to allocate it). The end product of this is that most of the | ||
128 | * physical address space, and most of RAM can be used. | ||
129 | * | ||
130 | * Future directions include: | ||
131 | * - allocating a page table with the control code buffer identity | ||
132 | * mapped, to simplify machine_kexec and make kexec_on_panic more | ||
133 | * reliable. | ||
134 | */ | ||
135 | |||
136 | /* | ||
137 | * KIMAGE_NO_DEST is an impossible destination address..., for | ||
138 | * allocating pages whose destination address we do not care about. | ||
139 | */ | ||
140 | #define KIMAGE_NO_DEST (-1UL) | ||
141 | |||
142 | static struct page *kimage_alloc_page(struct kimage *image, | ||
143 | gfp_t gfp_mask, | ||
144 | unsigned long dest); | ||
145 | |||
146 | int sanity_check_segment_list(struct kimage *image) | ||
147 | { | ||
148 | int result, i; | ||
149 | unsigned long nr_segments = image->nr_segments; | ||
150 | |||
151 | /* | ||
152 | * Verify we have good destination addresses. The caller is | ||
153 | * responsible for making certain we don't attempt to load | ||
154 | * the new image into invalid or reserved areas of RAM. This | ||
155 | * just verifies it is an address we can use. | ||
156 | * | ||
157 | * Since the kernel does everything in page size chunks ensure | ||
158 | * the destination addresses are page aligned. Too many | ||
159 | * special cases crop of when we don't do this. The most | ||
160 | * insidious is getting overlapping destination addresses | ||
161 | * simply because addresses are changed to page size | ||
162 | * granularity. | ||
163 | */ | ||
164 | result = -EADDRNOTAVAIL; | ||
165 | for (i = 0; i < nr_segments; i++) { | ||
166 | unsigned long mstart, mend; | ||
167 | |||
168 | mstart = image->segment[i].mem; | ||
169 | mend = mstart + image->segment[i].memsz; | ||
170 | if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) | ||
171 | return result; | ||
172 | if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) | ||
173 | return result; | ||
174 | } | ||
175 | |||
176 | /* Verify our destination addresses do not overlap. | ||
177 | * If we alloed overlapping destination addresses | ||
178 | * through very weird things can happen with no | ||
179 | * easy explanation as one segment stops on another. | ||
180 | */ | ||
181 | result = -EINVAL; | ||
182 | for (i = 0; i < nr_segments; i++) { | ||
183 | unsigned long mstart, mend; | ||
184 | unsigned long j; | ||
185 | |||
186 | mstart = image->segment[i].mem; | ||
187 | mend = mstart + image->segment[i].memsz; | ||
188 | for (j = 0; j < i; j++) { | ||
189 | unsigned long pstart, pend; | ||
190 | |||
191 | pstart = image->segment[j].mem; | ||
192 | pend = pstart + image->segment[j].memsz; | ||
193 | /* Do the segments overlap ? */ | ||
194 | if ((mend > pstart) && (mstart < pend)) | ||
195 | return result; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | /* Ensure our buffer sizes are strictly less than | ||
200 | * our memory sizes. This should always be the case, | ||
201 | * and it is easier to check up front than to be surprised | ||
202 | * later on. | ||
203 | */ | ||
204 | result = -EINVAL; | ||
205 | for (i = 0; i < nr_segments; i++) { | ||
206 | if (image->segment[i].bufsz > image->segment[i].memsz) | ||
207 | return result; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Verify we have good destination addresses. Normally | ||
212 | * the caller is responsible for making certain we don't | ||
213 | * attempt to load the new image into invalid or reserved | ||
214 | * areas of RAM. But crash kernels are preloaded into a | ||
215 | * reserved area of ram. We must ensure the addresses | ||
216 | * are in the reserved area otherwise preloading the | ||
217 | * kernel could corrupt things. | ||
218 | */ | ||
219 | |||
220 | if (image->type == KEXEC_TYPE_CRASH) { | ||
221 | result = -EADDRNOTAVAIL; | ||
222 | for (i = 0; i < nr_segments; i++) { | ||
223 | unsigned long mstart, mend; | ||
224 | |||
225 | mstart = image->segment[i].mem; | ||
226 | mend = mstart + image->segment[i].memsz - 1; | ||
227 | /* Ensure we are within the crash kernel limits */ | ||
228 | if ((mstart < crashk_res.start) || | ||
229 | (mend > crashk_res.end)) | ||
230 | return result; | ||
231 | } | ||
232 | } | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | struct kimage *do_kimage_alloc_init(void) | ||
238 | { | ||
239 | struct kimage *image; | ||
240 | |||
241 | /* Allocate a controlling structure */ | ||
242 | image = kzalloc(sizeof(*image), GFP_KERNEL); | ||
243 | if (!image) | ||
244 | return NULL; | ||
245 | |||
246 | image->head = 0; | ||
247 | image->entry = &image->head; | ||
248 | image->last_entry = &image->head; | ||
249 | image->control_page = ~0; /* By default this does not apply */ | ||
250 | image->type = KEXEC_TYPE_DEFAULT; | ||
251 | |||
252 | /* Initialize the list of control pages */ | ||
253 | INIT_LIST_HEAD(&image->control_pages); | ||
254 | |||
255 | /* Initialize the list of destination pages */ | ||
256 | INIT_LIST_HEAD(&image->dest_pages); | ||
257 | |||
258 | /* Initialize the list of unusable pages */ | ||
259 | INIT_LIST_HEAD(&image->unusable_pages); | ||
260 | |||
261 | return image; | ||
262 | } | ||
263 | |||
264 | int kimage_is_destination_range(struct kimage *image, | ||
265 | unsigned long start, | ||
266 | unsigned long end) | ||
267 | { | ||
268 | unsigned long i; | ||
269 | |||
270 | for (i = 0; i < image->nr_segments; i++) { | ||
271 | unsigned long mstart, mend; | ||
272 | |||
273 | mstart = image->segment[i].mem; | ||
274 | mend = mstart + image->segment[i].memsz; | ||
275 | if ((end > mstart) && (start < mend)) | ||
276 | return 1; | ||
277 | } | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) | ||
283 | { | ||
284 | struct page *pages; | ||
285 | |||
286 | pages = alloc_pages(gfp_mask, order); | ||
287 | if (pages) { | ||
288 | unsigned int count, i; | ||
289 | |||
290 | pages->mapping = NULL; | ||
291 | set_page_private(pages, order); | ||
292 | count = 1 << order; | ||
293 | for (i = 0; i < count; i++) | ||
294 | SetPageReserved(pages + i); | ||
295 | } | ||
296 | |||
297 | return pages; | ||
298 | } | ||
299 | |||
300 | static void kimage_free_pages(struct page *page) | ||
301 | { | ||
302 | unsigned int order, count, i; | ||
303 | |||
304 | order = page_private(page); | ||
305 | count = 1 << order; | ||
306 | for (i = 0; i < count; i++) | ||
307 | ClearPageReserved(page + i); | ||
308 | __free_pages(page, order); | ||
309 | } | ||
310 | |||
311 | void kimage_free_page_list(struct list_head *list) | ||
312 | { | ||
313 | struct list_head *pos, *next; | ||
314 | |||
315 | list_for_each_safe(pos, next, list) { | ||
316 | struct page *page; | ||
317 | |||
318 | page = list_entry(pos, struct page, lru); | ||
319 | list_del(&page->lru); | ||
320 | kimage_free_pages(page); | ||
321 | } | ||
322 | } | ||
323 | |||
324 | static struct page *kimage_alloc_normal_control_pages(struct kimage *image, | ||
325 | unsigned int order) | ||
326 | { | ||
327 | /* Control pages are special, they are the intermediaries | ||
328 | * that are needed while we copy the rest of the pages | ||
329 | * to their final resting place. As such they must | ||
330 | * not conflict with either the destination addresses | ||
331 | * or memory the kernel is already using. | ||
332 | * | ||
333 | * The only case where we really need more than one of | ||
334 | * these are for architectures where we cannot disable | ||
335 | * the MMU and must instead generate an identity mapped | ||
336 | * page table for all of the memory. | ||
337 | * | ||
338 | * At worst this runs in O(N) of the image size. | ||
339 | */ | ||
340 | struct list_head extra_pages; | ||
341 | struct page *pages; | ||
342 | unsigned int count; | ||
343 | |||
344 | count = 1 << order; | ||
345 | INIT_LIST_HEAD(&extra_pages); | ||
346 | |||
347 | /* Loop while I can allocate a page and the page allocated | ||
348 | * is a destination page. | ||
349 | */ | ||
350 | do { | ||
351 | unsigned long pfn, epfn, addr, eaddr; | ||
352 | |||
353 | pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); | ||
354 | if (!pages) | ||
355 | break; | ||
356 | pfn = page_to_pfn(pages); | ||
357 | epfn = pfn + count; | ||
358 | addr = pfn << PAGE_SHIFT; | ||
359 | eaddr = epfn << PAGE_SHIFT; | ||
360 | if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || | ||
361 | kimage_is_destination_range(image, addr, eaddr)) { | ||
362 | list_add(&pages->lru, &extra_pages); | ||
363 | pages = NULL; | ||
364 | } | ||
365 | } while (!pages); | ||
366 | |||
367 | if (pages) { | ||
368 | /* Remember the allocated page... */ | ||
369 | list_add(&pages->lru, &image->control_pages); | ||
370 | |||
371 | /* Because the page is already in it's destination | ||
372 | * location we will never allocate another page at | ||
373 | * that address. Therefore kimage_alloc_pages | ||
374 | * will not return it (again) and we don't need | ||
375 | * to give it an entry in image->segment[]. | ||
376 | */ | ||
377 | } | ||
378 | /* Deal with the destination pages I have inadvertently allocated. | ||
379 | * | ||
380 | * Ideally I would convert multi-page allocations into single | ||
381 | * page allocations, and add everything to image->dest_pages. | ||
382 | * | ||
383 | * For now it is simpler to just free the pages. | ||
384 | */ | ||
385 | kimage_free_page_list(&extra_pages); | ||
386 | |||
387 | return pages; | ||
388 | } | ||
389 | |||
390 | static struct page *kimage_alloc_crash_control_pages(struct kimage *image, | ||
391 | unsigned int order) | ||
392 | { | ||
393 | /* Control pages are special, they are the intermediaries | ||
394 | * that are needed while we copy the rest of the pages | ||
395 | * to their final resting place. As such they must | ||
396 | * not conflict with either the destination addresses | ||
397 | * or memory the kernel is already using. | ||
398 | * | ||
399 | * Control pages are also the only pags we must allocate | ||
400 | * when loading a crash kernel. All of the other pages | ||
401 | * are specified by the segments and we just memcpy | ||
402 | * into them directly. | ||
403 | * | ||
404 | * The only case where we really need more than one of | ||
405 | * these are for architectures where we cannot disable | ||
406 | * the MMU and must instead generate an identity mapped | ||
407 | * page table for all of the memory. | ||
408 | * | ||
409 | * Given the low demand this implements a very simple | ||
410 | * allocator that finds the first hole of the appropriate | ||
411 | * size in the reserved memory region, and allocates all | ||
412 | * of the memory up to and including the hole. | ||
413 | */ | ||
414 | unsigned long hole_start, hole_end, size; | ||
415 | struct page *pages; | ||
416 | |||
417 | pages = NULL; | ||
418 | size = (1 << order) << PAGE_SHIFT; | ||
419 | hole_start = (image->control_page + (size - 1)) & ~(size - 1); | ||
420 | hole_end = hole_start + size - 1; | ||
421 | while (hole_end <= crashk_res.end) { | ||
422 | unsigned long i; | ||
423 | |||
424 | if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) | ||
425 | break; | ||
426 | /* See if I overlap any of the segments */ | ||
427 | for (i = 0; i < image->nr_segments; i++) { | ||
428 | unsigned long mstart, mend; | ||
429 | |||
430 | mstart = image->segment[i].mem; | ||
431 | mend = mstart + image->segment[i].memsz - 1; | ||
432 | if ((hole_end >= mstart) && (hole_start <= mend)) { | ||
433 | /* Advance the hole to the end of the segment */ | ||
434 | hole_start = (mend + (size - 1)) & ~(size - 1); | ||
435 | hole_end = hole_start + size - 1; | ||
436 | break; | ||
437 | } | ||
438 | } | ||
439 | /* If I don't overlap any segments I have found my hole! */ | ||
440 | if (i == image->nr_segments) { | ||
441 | pages = pfn_to_page(hole_start >> PAGE_SHIFT); | ||
442 | image->control_page = hole_end; | ||
443 | break; | ||
444 | } | ||
445 | } | ||
446 | |||
447 | return pages; | ||
448 | } | ||
449 | |||
450 | |||
451 | struct page *kimage_alloc_control_pages(struct kimage *image, | ||
452 | unsigned int order) | ||
453 | { | ||
454 | struct page *pages = NULL; | ||
455 | |||
456 | switch (image->type) { | ||
457 | case KEXEC_TYPE_DEFAULT: | ||
458 | pages = kimage_alloc_normal_control_pages(image, order); | ||
459 | break; | ||
460 | case KEXEC_TYPE_CRASH: | ||
461 | pages = kimage_alloc_crash_control_pages(image, order); | ||
462 | break; | ||
463 | } | ||
464 | |||
465 | return pages; | ||
466 | } | ||
467 | |||
468 | static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) | ||
469 | { | ||
470 | if (*image->entry != 0) | ||
471 | image->entry++; | ||
472 | |||
473 | if (image->entry == image->last_entry) { | ||
474 | kimage_entry_t *ind_page; | ||
475 | struct page *page; | ||
476 | |||
477 | page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); | ||
478 | if (!page) | ||
479 | return -ENOMEM; | ||
480 | |||
481 | ind_page = page_address(page); | ||
482 | *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; | ||
483 | image->entry = ind_page; | ||
484 | image->last_entry = ind_page + | ||
485 | ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); | ||
486 | } | ||
487 | *image->entry = entry; | ||
488 | image->entry++; | ||
489 | *image->entry = 0; | ||
490 | |||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | static int kimage_set_destination(struct kimage *image, | ||
495 | unsigned long destination) | ||
496 | { | ||
497 | int result; | ||
498 | |||
499 | destination &= PAGE_MASK; | ||
500 | result = kimage_add_entry(image, destination | IND_DESTINATION); | ||
501 | |||
502 | return result; | ||
503 | } | ||
504 | |||
505 | |||
506 | static int kimage_add_page(struct kimage *image, unsigned long page) | ||
507 | { | ||
508 | int result; | ||
509 | |||
510 | page &= PAGE_MASK; | ||
511 | result = kimage_add_entry(image, page | IND_SOURCE); | ||
512 | |||
513 | return result; | ||
514 | } | ||
515 | |||
516 | |||
517 | static void kimage_free_extra_pages(struct kimage *image) | ||
518 | { | ||
519 | /* Walk through and free any extra destination pages I may have */ | ||
520 | kimage_free_page_list(&image->dest_pages); | ||
521 | |||
522 | /* Walk through and free any unusable pages I have cached */ | ||
523 | kimage_free_page_list(&image->unusable_pages); | ||
524 | |||
525 | } | ||
526 | void kimage_terminate(struct kimage *image) | ||
527 | { | ||
528 | if (*image->entry != 0) | ||
529 | image->entry++; | ||
530 | |||
531 | *image->entry = IND_DONE; | ||
532 | } | ||
533 | |||
534 | #define for_each_kimage_entry(image, ptr, entry) \ | ||
535 | for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ | ||
536 | ptr = (entry & IND_INDIRECTION) ? \ | ||
537 | phys_to_virt((entry & PAGE_MASK)) : ptr + 1) | ||
538 | |||
539 | static void kimage_free_entry(kimage_entry_t entry) | ||
540 | { | ||
541 | struct page *page; | ||
542 | |||
543 | page = pfn_to_page(entry >> PAGE_SHIFT); | ||
544 | kimage_free_pages(page); | ||
545 | } | ||
546 | |||
547 | void kimage_free(struct kimage *image) | ||
548 | { | ||
549 | kimage_entry_t *ptr, entry; | ||
550 | kimage_entry_t ind = 0; | ||
551 | |||
552 | if (!image) | ||
553 | return; | ||
554 | |||
555 | kimage_free_extra_pages(image); | ||
556 | for_each_kimage_entry(image, ptr, entry) { | ||
557 | if (entry & IND_INDIRECTION) { | ||
558 | /* Free the previous indirection page */ | ||
559 | if (ind & IND_INDIRECTION) | ||
560 | kimage_free_entry(ind); | ||
561 | /* Save this indirection page until we are | ||
562 | * done with it. | ||
563 | */ | ||
564 | ind = entry; | ||
565 | } else if (entry & IND_SOURCE) | ||
566 | kimage_free_entry(entry); | ||
567 | } | ||
568 | /* Free the final indirection page */ | ||
569 | if (ind & IND_INDIRECTION) | ||
570 | kimage_free_entry(ind); | ||
571 | |||
572 | /* Handle any machine specific cleanup */ | ||
573 | machine_kexec_cleanup(image); | ||
574 | |||
575 | /* Free the kexec control pages... */ | ||
576 | kimage_free_page_list(&image->control_pages); | ||
577 | |||
578 | /* | ||
579 | * Free up any temporary buffers allocated. This might hit if | ||
580 | * error occurred much later after buffer allocation. | ||
581 | */ | ||
582 | if (image->file_mode) | ||
583 | kimage_file_post_load_cleanup(image); | ||
584 | |||
585 | kfree(image); | ||
586 | } | ||
587 | |||
588 | static kimage_entry_t *kimage_dst_used(struct kimage *image, | ||
589 | unsigned long page) | ||
590 | { | ||
591 | kimage_entry_t *ptr, entry; | ||
592 | unsigned long destination = 0; | ||
593 | |||
594 | for_each_kimage_entry(image, ptr, entry) { | ||
595 | if (entry & IND_DESTINATION) | ||
596 | destination = entry & PAGE_MASK; | ||
597 | else if (entry & IND_SOURCE) { | ||
598 | if (page == destination) | ||
599 | return ptr; | ||
600 | destination += PAGE_SIZE; | ||
601 | } | ||
602 | } | ||
603 | |||
604 | return NULL; | ||
605 | } | ||
606 | |||
607 | static struct page *kimage_alloc_page(struct kimage *image, | ||
608 | gfp_t gfp_mask, | ||
609 | unsigned long destination) | ||
610 | { | ||
611 | /* | ||
612 | * Here we implement safeguards to ensure that a source page | ||
613 | * is not copied to its destination page before the data on | ||
614 | * the destination page is no longer useful. | ||
615 | * | ||
616 | * To do this we maintain the invariant that a source page is | ||
617 | * either its own destination page, or it is not a | ||
618 | * destination page at all. | ||
619 | * | ||
620 | * That is slightly stronger than required, but the proof | ||
621 | * that no problems will not occur is trivial, and the | ||
622 | * implementation is simply to verify. | ||
623 | * | ||
624 | * When allocating all pages normally this algorithm will run | ||
625 | * in O(N) time, but in the worst case it will run in O(N^2) | ||
626 | * time. If the runtime is a problem the data structures can | ||
627 | * be fixed. | ||
628 | */ | ||
629 | struct page *page; | ||
630 | unsigned long addr; | ||
631 | |||
632 | /* | ||
633 | * Walk through the list of destination pages, and see if I | ||
634 | * have a match. | ||
635 | */ | ||
636 | list_for_each_entry(page, &image->dest_pages, lru) { | ||
637 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
638 | if (addr == destination) { | ||
639 | list_del(&page->lru); | ||
640 | return page; | ||
641 | } | ||
642 | } | ||
643 | page = NULL; | ||
644 | while (1) { | ||
645 | kimage_entry_t *old; | ||
646 | |||
647 | /* Allocate a page, if we run out of memory give up */ | ||
648 | page = kimage_alloc_pages(gfp_mask, 0); | ||
649 | if (!page) | ||
650 | return NULL; | ||
651 | /* If the page cannot be used file it away */ | ||
652 | if (page_to_pfn(page) > | ||
653 | (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { | ||
654 | list_add(&page->lru, &image->unusable_pages); | ||
655 | continue; | ||
656 | } | ||
657 | addr = page_to_pfn(page) << PAGE_SHIFT; | ||
658 | |||
659 | /* If it is the destination page we want use it */ | ||
660 | if (addr == destination) | ||
661 | break; | ||
662 | |||
663 | /* If the page is not a destination page use it */ | ||
664 | if (!kimage_is_destination_range(image, addr, | ||
665 | addr + PAGE_SIZE)) | ||
666 | break; | ||
667 | |||
668 | /* | ||
669 | * I know that the page is someones destination page. | ||
670 | * See if there is already a source page for this | ||
671 | * destination page. And if so swap the source pages. | ||
672 | */ | ||
673 | old = kimage_dst_used(image, addr); | ||
674 | if (old) { | ||
675 | /* If so move it */ | ||
676 | unsigned long old_addr; | ||
677 | struct page *old_page; | ||
678 | |||
679 | old_addr = *old & PAGE_MASK; | ||
680 | old_page = pfn_to_page(old_addr >> PAGE_SHIFT); | ||
681 | copy_highpage(page, old_page); | ||
682 | *old = addr | (*old & ~PAGE_MASK); | ||
683 | |||
684 | /* The old page I have found cannot be a | ||
685 | * destination page, so return it if it's | ||
686 | * gfp_flags honor the ones passed in. | ||
687 | */ | ||
688 | if (!(gfp_mask & __GFP_HIGHMEM) && | ||
689 | PageHighMem(old_page)) { | ||
690 | kimage_free_pages(old_page); | ||
691 | continue; | ||
692 | } | ||
693 | addr = old_addr; | ||
694 | page = old_page; | ||
695 | break; | ||
696 | } | ||
697 | /* Place the page on the destination list, to be used later */ | ||
698 | list_add(&page->lru, &image->dest_pages); | ||
699 | } | ||
700 | |||
701 | return page; | ||
702 | } | ||
703 | |||
704 | static int kimage_load_normal_segment(struct kimage *image, | ||
705 | struct kexec_segment *segment) | ||
706 | { | ||
707 | unsigned long maddr; | ||
708 | size_t ubytes, mbytes; | ||
709 | int result; | ||
710 | unsigned char __user *buf = NULL; | ||
711 | unsigned char *kbuf = NULL; | ||
712 | |||
713 | result = 0; | ||
714 | if (image->file_mode) | ||
715 | kbuf = segment->kbuf; | ||
716 | else | ||
717 | buf = segment->buf; | ||
718 | ubytes = segment->bufsz; | ||
719 | mbytes = segment->memsz; | ||
720 | maddr = segment->mem; | ||
721 | |||
722 | result = kimage_set_destination(image, maddr); | ||
723 | if (result < 0) | ||
724 | goto out; | ||
725 | |||
726 | while (mbytes) { | ||
727 | struct page *page; | ||
728 | char *ptr; | ||
729 | size_t uchunk, mchunk; | ||
730 | |||
731 | page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); | ||
732 | if (!page) { | ||
733 | result = -ENOMEM; | ||
734 | goto out; | ||
735 | } | ||
736 | result = kimage_add_page(image, page_to_pfn(page) | ||
737 | << PAGE_SHIFT); | ||
738 | if (result < 0) | ||
739 | goto out; | ||
740 | |||
741 | ptr = kmap(page); | ||
742 | /* Start with a clear page */ | ||
743 | clear_page(ptr); | ||
744 | ptr += maddr & ~PAGE_MASK; | ||
745 | mchunk = min_t(size_t, mbytes, | ||
746 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
747 | uchunk = min(ubytes, mchunk); | ||
748 | |||
749 | /* For file based kexec, source pages are in kernel memory */ | ||
750 | if (image->file_mode) | ||
751 | memcpy(ptr, kbuf, uchunk); | ||
752 | else | ||
753 | result = copy_from_user(ptr, buf, uchunk); | ||
754 | kunmap(page); | ||
755 | if (result) { | ||
756 | result = -EFAULT; | ||
757 | goto out; | ||
758 | } | ||
759 | ubytes -= uchunk; | ||
760 | maddr += mchunk; | ||
761 | if (image->file_mode) | ||
762 | kbuf += mchunk; | ||
763 | else | ||
764 | buf += mchunk; | ||
765 | mbytes -= mchunk; | ||
766 | } | ||
767 | out: | ||
768 | return result; | ||
769 | } | ||
770 | |||
771 | static int kimage_load_crash_segment(struct kimage *image, | ||
772 | struct kexec_segment *segment) | ||
773 | { | ||
774 | /* For crash dumps kernels we simply copy the data from | ||
775 | * user space to it's destination. | ||
776 | * We do things a page at a time for the sake of kmap. | ||
777 | */ | ||
778 | unsigned long maddr; | ||
779 | size_t ubytes, mbytes; | ||
780 | int result; | ||
781 | unsigned char __user *buf = NULL; | ||
782 | unsigned char *kbuf = NULL; | ||
783 | |||
784 | result = 0; | ||
785 | if (image->file_mode) | ||
786 | kbuf = segment->kbuf; | ||
787 | else | ||
788 | buf = segment->buf; | ||
789 | ubytes = segment->bufsz; | ||
790 | mbytes = segment->memsz; | ||
791 | maddr = segment->mem; | ||
792 | while (mbytes) { | ||
793 | struct page *page; | ||
794 | char *ptr; | ||
795 | size_t uchunk, mchunk; | ||
796 | |||
797 | page = pfn_to_page(maddr >> PAGE_SHIFT); | ||
798 | if (!page) { | ||
799 | result = -ENOMEM; | ||
800 | goto out; | ||
801 | } | ||
802 | ptr = kmap(page); | ||
803 | ptr += maddr & ~PAGE_MASK; | ||
804 | mchunk = min_t(size_t, mbytes, | ||
805 | PAGE_SIZE - (maddr & ~PAGE_MASK)); | ||
806 | uchunk = min(ubytes, mchunk); | ||
807 | if (mchunk > uchunk) { | ||
808 | /* Zero the trailing part of the page */ | ||
809 | memset(ptr + uchunk, 0, mchunk - uchunk); | ||
810 | } | ||
811 | |||
812 | /* For file based kexec, source pages are in kernel memory */ | ||
813 | if (image->file_mode) | ||
814 | memcpy(ptr, kbuf, uchunk); | ||
815 | else | ||
816 | result = copy_from_user(ptr, buf, uchunk); | ||
817 | kexec_flush_icache_page(page); | ||
818 | kunmap(page); | ||
819 | if (result) { | ||
820 | result = -EFAULT; | ||
821 | goto out; | ||
822 | } | ||
823 | ubytes -= uchunk; | ||
824 | maddr += mchunk; | ||
825 | if (image->file_mode) | ||
826 | kbuf += mchunk; | ||
827 | else | ||
828 | buf += mchunk; | ||
829 | mbytes -= mchunk; | ||
830 | } | ||
831 | out: | ||
832 | return result; | ||
833 | } | ||
834 | |||
835 | int kimage_load_segment(struct kimage *image, | ||
836 | struct kexec_segment *segment) | ||
837 | { | ||
838 | int result = -ENOMEM; | ||
839 | |||
840 | switch (image->type) { | ||
841 | case KEXEC_TYPE_DEFAULT: | ||
842 | result = kimage_load_normal_segment(image, segment); | ||
843 | break; | ||
844 | case KEXEC_TYPE_CRASH: | ||
845 | result = kimage_load_crash_segment(image, segment); | ||
846 | break; | ||
847 | } | ||
848 | |||
849 | return result; | ||
850 | } | ||
851 | |||
852 | struct kimage *kexec_image; | ||
853 | struct kimage *kexec_crash_image; | ||
854 | int kexec_load_disabled; | ||
855 | |||
856 | void crash_kexec(struct pt_regs *regs) | ||
857 | { | ||
858 | /* Take the kexec_mutex here to prevent sys_kexec_load | ||
859 | * running on one cpu from replacing the crash kernel | ||
860 | * we are using after a panic on a different cpu. | ||
861 | * | ||
862 | * If the crash kernel was not located in a fixed area | ||
863 | * of memory the xchg(&kexec_crash_image) would be | ||
864 | * sufficient. But since I reuse the memory... | ||
865 | */ | ||
866 | if (mutex_trylock(&kexec_mutex)) { | ||
867 | if (kexec_crash_image) { | ||
868 | struct pt_regs fixed_regs; | ||
869 | |||
870 | crash_setup_regs(&fixed_regs, regs); | ||
871 | crash_save_vmcoreinfo(); | ||
872 | machine_crash_shutdown(&fixed_regs); | ||
873 | machine_kexec(kexec_crash_image); | ||
874 | } | ||
875 | mutex_unlock(&kexec_mutex); | ||
876 | } | ||
877 | } | ||
878 | |||
879 | size_t crash_get_memory_size(void) | ||
880 | { | ||
881 | size_t size = 0; | ||
882 | |||
883 | mutex_lock(&kexec_mutex); | ||
884 | if (crashk_res.end != crashk_res.start) | ||
885 | size = resource_size(&crashk_res); | ||
886 | mutex_unlock(&kexec_mutex); | ||
887 | return size; | ||
888 | } | ||
889 | |||
890 | void __weak crash_free_reserved_phys_range(unsigned long begin, | ||
891 | unsigned long end) | ||
892 | { | ||
893 | unsigned long addr; | ||
894 | |||
895 | for (addr = begin; addr < end; addr += PAGE_SIZE) | ||
896 | free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); | ||
897 | } | ||
898 | |||
899 | int crash_shrink_memory(unsigned long new_size) | ||
900 | { | ||
901 | int ret = 0; | ||
902 | unsigned long start, end; | ||
903 | unsigned long old_size; | ||
904 | struct resource *ram_res; | ||
905 | |||
906 | mutex_lock(&kexec_mutex); | ||
907 | |||
908 | if (kexec_crash_image) { | ||
909 | ret = -ENOENT; | ||
910 | goto unlock; | ||
911 | } | ||
912 | start = crashk_res.start; | ||
913 | end = crashk_res.end; | ||
914 | old_size = (end == 0) ? 0 : end - start + 1; | ||
915 | if (new_size >= old_size) { | ||
916 | ret = (new_size == old_size) ? 0 : -EINVAL; | ||
917 | goto unlock; | ||
918 | } | ||
919 | |||
920 | ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); | ||
921 | if (!ram_res) { | ||
922 | ret = -ENOMEM; | ||
923 | goto unlock; | ||
924 | } | ||
925 | |||
926 | start = roundup(start, KEXEC_CRASH_MEM_ALIGN); | ||
927 | end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); | ||
928 | |||
929 | crash_map_reserved_pages(); | ||
930 | crash_free_reserved_phys_range(end, crashk_res.end); | ||
931 | |||
932 | if ((start == end) && (crashk_res.parent != NULL)) | ||
933 | release_resource(&crashk_res); | ||
934 | |||
935 | ram_res->start = end; | ||
936 | ram_res->end = crashk_res.end; | ||
937 | ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; | ||
938 | ram_res->name = "System RAM"; | ||
939 | |||
940 | crashk_res.end = end - 1; | ||
941 | |||
942 | insert_resource(&iomem_resource, ram_res); | ||
943 | crash_unmap_reserved_pages(); | ||
944 | |||
945 | unlock: | ||
946 | mutex_unlock(&kexec_mutex); | ||
947 | return ret; | ||
948 | } | ||
949 | |||
950 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, | ||
951 | size_t data_len) | ||
952 | { | ||
953 | struct elf_note note; | ||
954 | |||
955 | note.n_namesz = strlen(name) + 1; | ||
956 | note.n_descsz = data_len; | ||
957 | note.n_type = type; | ||
958 | memcpy(buf, ¬e, sizeof(note)); | ||
959 | buf += (sizeof(note) + 3)/4; | ||
960 | memcpy(buf, name, note.n_namesz); | ||
961 | buf += (note.n_namesz + 3)/4; | ||
962 | memcpy(buf, data, note.n_descsz); | ||
963 | buf += (note.n_descsz + 3)/4; | ||
964 | |||
965 | return buf; | ||
966 | } | ||
967 | |||
968 | static void final_note(u32 *buf) | ||
969 | { | ||
970 | struct elf_note note; | ||
971 | |||
972 | note.n_namesz = 0; | ||
973 | note.n_descsz = 0; | ||
974 | note.n_type = 0; | ||
975 | memcpy(buf, ¬e, sizeof(note)); | ||
976 | } | ||
977 | |||
978 | void crash_save_cpu(struct pt_regs *regs, int cpu) | ||
979 | { | ||
980 | struct elf_prstatus prstatus; | ||
981 | u32 *buf; | ||
982 | |||
983 | if ((cpu < 0) || (cpu >= nr_cpu_ids)) | ||
984 | return; | ||
985 | |||
986 | /* Using ELF notes here is opportunistic. | ||
987 | * I need a well defined structure format | ||
988 | * for the data I pass, and I need tags | ||
989 | * on the data to indicate what information I have | ||
990 | * squirrelled away. ELF notes happen to provide | ||
991 | * all of that, so there is no need to invent something new. | ||
992 | */ | ||
993 | buf = (u32 *)per_cpu_ptr(crash_notes, cpu); | ||
994 | if (!buf) | ||
995 | return; | ||
996 | memset(&prstatus, 0, sizeof(prstatus)); | ||
997 | prstatus.pr_pid = current->pid; | ||
998 | elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); | ||
999 | buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, | ||
1000 | &prstatus, sizeof(prstatus)); | ||
1001 | final_note(buf); | ||
1002 | } | ||
1003 | |||
1004 | static int __init crash_notes_memory_init(void) | ||
1005 | { | ||
1006 | /* Allocate memory for saving cpu registers. */ | ||
1007 | size_t size, align; | ||
1008 | |||
1009 | /* | ||
1010 | * crash_notes could be allocated across 2 vmalloc pages when percpu | ||
1011 | * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc | ||
1012 | * pages are also on 2 continuous physical pages. In this case the | ||
1013 | * 2nd part of crash_notes in 2nd page could be lost since only the | ||
1014 | * starting address and size of crash_notes are exported through sysfs. | ||
1015 | * Here round up the size of crash_notes to the nearest power of two | ||
1016 | * and pass it to __alloc_percpu as align value. This can make sure | ||
1017 | * crash_notes is allocated inside one physical page. | ||
1018 | */ | ||
1019 | size = sizeof(note_buf_t); | ||
1020 | align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); | ||
1021 | |||
1022 | /* | ||
1023 | * Break compile if size is bigger than PAGE_SIZE since crash_notes | ||
1024 | * definitely will be in 2 pages with that. | ||
1025 | */ | ||
1026 | BUILD_BUG_ON(size > PAGE_SIZE); | ||
1027 | |||
1028 | crash_notes = __alloc_percpu(size, align); | ||
1029 | if (!crash_notes) { | ||
1030 | pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); | ||
1031 | return -ENOMEM; | ||
1032 | } | ||
1033 | return 0; | ||
1034 | } | ||
1035 | subsys_initcall(crash_notes_memory_init); | ||
1036 | |||
1037 | |||
1038 | /* | ||
1039 | * parsing the "crashkernel" commandline | ||
1040 | * | ||
1041 | * this code is intended to be called from architecture specific code | ||
1042 | */ | ||
1043 | |||
1044 | |||
1045 | /* | ||
1046 | * This function parses command lines in the format | ||
1047 | * | ||
1048 | * crashkernel=ramsize-range:size[,...][@offset] | ||
1049 | * | ||
1050 | * The function returns 0 on success and -EINVAL on failure. | ||
1051 | */ | ||
1052 | static int __init parse_crashkernel_mem(char *cmdline, | ||
1053 | unsigned long long system_ram, | ||
1054 | unsigned long long *crash_size, | ||
1055 | unsigned long long *crash_base) | ||
1056 | { | ||
1057 | char *cur = cmdline, *tmp; | ||
1058 | |||
1059 | /* for each entry of the comma-separated list */ | ||
1060 | do { | ||
1061 | unsigned long long start, end = ULLONG_MAX, size; | ||
1062 | |||
1063 | /* get the start of the range */ | ||
1064 | start = memparse(cur, &tmp); | ||
1065 | if (cur == tmp) { | ||
1066 | pr_warn("crashkernel: Memory value expected\n"); | ||
1067 | return -EINVAL; | ||
1068 | } | ||
1069 | cur = tmp; | ||
1070 | if (*cur != '-') { | ||
1071 | pr_warn("crashkernel: '-' expected\n"); | ||
1072 | return -EINVAL; | ||
1073 | } | ||
1074 | cur++; | ||
1075 | |||
1076 | /* if no ':' is here, than we read the end */ | ||
1077 | if (*cur != ':') { | ||
1078 | end = memparse(cur, &tmp); | ||
1079 | if (cur == tmp) { | ||
1080 | pr_warn("crashkernel: Memory value expected\n"); | ||
1081 | return -EINVAL; | ||
1082 | } | ||
1083 | cur = tmp; | ||
1084 | if (end <= start) { | ||
1085 | pr_warn("crashkernel: end <= start\n"); | ||
1086 | return -EINVAL; | ||
1087 | } | ||
1088 | } | ||
1089 | |||
1090 | if (*cur != ':') { | ||
1091 | pr_warn("crashkernel: ':' expected\n"); | ||
1092 | return -EINVAL; | ||
1093 | } | ||
1094 | cur++; | ||
1095 | |||
1096 | size = memparse(cur, &tmp); | ||
1097 | if (cur == tmp) { | ||
1098 | pr_warn("Memory value expected\n"); | ||
1099 | return -EINVAL; | ||
1100 | } | ||
1101 | cur = tmp; | ||
1102 | if (size >= system_ram) { | ||
1103 | pr_warn("crashkernel: invalid size\n"); | ||
1104 | return -EINVAL; | ||
1105 | } | ||
1106 | |||
1107 | /* match ? */ | ||
1108 | if (system_ram >= start && system_ram < end) { | ||
1109 | *crash_size = size; | ||
1110 | break; | ||
1111 | } | ||
1112 | } while (*cur++ == ','); | ||
1113 | |||
1114 | if (*crash_size > 0) { | ||
1115 | while (*cur && *cur != ' ' && *cur != '@') | ||
1116 | cur++; | ||
1117 | if (*cur == '@') { | ||
1118 | cur++; | ||
1119 | *crash_base = memparse(cur, &tmp); | ||
1120 | if (cur == tmp) { | ||
1121 | pr_warn("Memory value expected after '@'\n"); | ||
1122 | return -EINVAL; | ||
1123 | } | ||
1124 | } | ||
1125 | } | ||
1126 | |||
1127 | return 0; | ||
1128 | } | ||
1129 | |||
1130 | /* | ||
1131 | * That function parses "simple" (old) crashkernel command lines like | ||
1132 | * | ||
1133 | * crashkernel=size[@offset] | ||
1134 | * | ||
1135 | * It returns 0 on success and -EINVAL on failure. | ||
1136 | */ | ||
1137 | static int __init parse_crashkernel_simple(char *cmdline, | ||
1138 | unsigned long long *crash_size, | ||
1139 | unsigned long long *crash_base) | ||
1140 | { | ||
1141 | char *cur = cmdline; | ||
1142 | |||
1143 | *crash_size = memparse(cmdline, &cur); | ||
1144 | if (cmdline == cur) { | ||
1145 | pr_warn("crashkernel: memory value expected\n"); | ||
1146 | return -EINVAL; | ||
1147 | } | ||
1148 | |||
1149 | if (*cur == '@') | ||
1150 | *crash_base = memparse(cur+1, &cur); | ||
1151 | else if (*cur != ' ' && *cur != '\0') { | ||
1152 | pr_warn("crashkernel: unrecognized char\n"); | ||
1153 | return -EINVAL; | ||
1154 | } | ||
1155 | |||
1156 | return 0; | ||
1157 | } | ||
1158 | |||
1159 | #define SUFFIX_HIGH 0 | ||
1160 | #define SUFFIX_LOW 1 | ||
1161 | #define SUFFIX_NULL 2 | ||
1162 | static __initdata char *suffix_tbl[] = { | ||
1163 | [SUFFIX_HIGH] = ",high", | ||
1164 | [SUFFIX_LOW] = ",low", | ||
1165 | [SUFFIX_NULL] = NULL, | ||
1166 | }; | ||
1167 | |||
1168 | /* | ||
1169 | * That function parses "suffix" crashkernel command lines like | ||
1170 | * | ||
1171 | * crashkernel=size,[high|low] | ||
1172 | * | ||
1173 | * It returns 0 on success and -EINVAL on failure. | ||
1174 | */ | ||
1175 | static int __init parse_crashkernel_suffix(char *cmdline, | ||
1176 | unsigned long long *crash_size, | ||
1177 | const char *suffix) | ||
1178 | { | ||
1179 | char *cur = cmdline; | ||
1180 | |||
1181 | *crash_size = memparse(cmdline, &cur); | ||
1182 | if (cmdline == cur) { | ||
1183 | pr_warn("crashkernel: memory value expected\n"); | ||
1184 | return -EINVAL; | ||
1185 | } | ||
1186 | |||
1187 | /* check with suffix */ | ||
1188 | if (strncmp(cur, suffix, strlen(suffix))) { | ||
1189 | pr_warn("crashkernel: unrecognized char\n"); | ||
1190 | return -EINVAL; | ||
1191 | } | ||
1192 | cur += strlen(suffix); | ||
1193 | if (*cur != ' ' && *cur != '\0') { | ||
1194 | pr_warn("crashkernel: unrecognized char\n"); | ||
1195 | return -EINVAL; | ||
1196 | } | ||
1197 | |||
1198 | return 0; | ||
1199 | } | ||
1200 | |||
1201 | static __init char *get_last_crashkernel(char *cmdline, | ||
1202 | const char *name, | ||
1203 | const char *suffix) | ||
1204 | { | ||
1205 | char *p = cmdline, *ck_cmdline = NULL; | ||
1206 | |||
1207 | /* find crashkernel and use the last one if there are more */ | ||
1208 | p = strstr(p, name); | ||
1209 | while (p) { | ||
1210 | char *end_p = strchr(p, ' '); | ||
1211 | char *q; | ||
1212 | |||
1213 | if (!end_p) | ||
1214 | end_p = p + strlen(p); | ||
1215 | |||
1216 | if (!suffix) { | ||
1217 | int i; | ||
1218 | |||
1219 | /* skip the one with any known suffix */ | ||
1220 | for (i = 0; suffix_tbl[i]; i++) { | ||
1221 | q = end_p - strlen(suffix_tbl[i]); | ||
1222 | if (!strncmp(q, suffix_tbl[i], | ||
1223 | strlen(suffix_tbl[i]))) | ||
1224 | goto next; | ||
1225 | } | ||
1226 | ck_cmdline = p; | ||
1227 | } else { | ||
1228 | q = end_p - strlen(suffix); | ||
1229 | if (!strncmp(q, suffix, strlen(suffix))) | ||
1230 | ck_cmdline = p; | ||
1231 | } | ||
1232 | next: | ||
1233 | p = strstr(p+1, name); | ||
1234 | } | ||
1235 | |||
1236 | if (!ck_cmdline) | ||
1237 | return NULL; | ||
1238 | |||
1239 | return ck_cmdline; | ||
1240 | } | ||
1241 | |||
1242 | static int __init __parse_crashkernel(char *cmdline, | ||
1243 | unsigned long long system_ram, | ||
1244 | unsigned long long *crash_size, | ||
1245 | unsigned long long *crash_base, | ||
1246 | const char *name, | ||
1247 | const char *suffix) | ||
1248 | { | ||
1249 | char *first_colon, *first_space; | ||
1250 | char *ck_cmdline; | ||
1251 | |||
1252 | BUG_ON(!crash_size || !crash_base); | ||
1253 | *crash_size = 0; | ||
1254 | *crash_base = 0; | ||
1255 | |||
1256 | ck_cmdline = get_last_crashkernel(cmdline, name, suffix); | ||
1257 | |||
1258 | if (!ck_cmdline) | ||
1259 | return -EINVAL; | ||
1260 | |||
1261 | ck_cmdline += strlen(name); | ||
1262 | |||
1263 | if (suffix) | ||
1264 | return parse_crashkernel_suffix(ck_cmdline, crash_size, | ||
1265 | suffix); | ||
1266 | /* | ||
1267 | * if the commandline contains a ':', then that's the extended | ||
1268 | * syntax -- if not, it must be the classic syntax | ||
1269 | */ | ||
1270 | first_colon = strchr(ck_cmdline, ':'); | ||
1271 | first_space = strchr(ck_cmdline, ' '); | ||
1272 | if (first_colon && (!first_space || first_colon < first_space)) | ||
1273 | return parse_crashkernel_mem(ck_cmdline, system_ram, | ||
1274 | crash_size, crash_base); | ||
1275 | |||
1276 | return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); | ||
1277 | } | ||
1278 | |||
1279 | /* | ||
1280 | * That function is the entry point for command line parsing and should be | ||
1281 | * called from the arch-specific code. | ||
1282 | */ | ||
1283 | int __init parse_crashkernel(char *cmdline, | ||
1284 | unsigned long long system_ram, | ||
1285 | unsigned long long *crash_size, | ||
1286 | unsigned long long *crash_base) | ||
1287 | { | ||
1288 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1289 | "crashkernel=", NULL); | ||
1290 | } | ||
1291 | |||
1292 | int __init parse_crashkernel_high(char *cmdline, | ||
1293 | unsigned long long system_ram, | ||
1294 | unsigned long long *crash_size, | ||
1295 | unsigned long long *crash_base) | ||
1296 | { | ||
1297 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1298 | "crashkernel=", suffix_tbl[SUFFIX_HIGH]); | ||
1299 | } | ||
1300 | |||
1301 | int __init parse_crashkernel_low(char *cmdline, | ||
1302 | unsigned long long system_ram, | ||
1303 | unsigned long long *crash_size, | ||
1304 | unsigned long long *crash_base) | ||
1305 | { | ||
1306 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1307 | "crashkernel=", suffix_tbl[SUFFIX_LOW]); | ||
1308 | } | ||
1309 | |||
1310 | static void update_vmcoreinfo_note(void) | ||
1311 | { | ||
1312 | u32 *buf = vmcoreinfo_note; | ||
1313 | |||
1314 | if (!vmcoreinfo_size) | ||
1315 | return; | ||
1316 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, | ||
1317 | vmcoreinfo_size); | ||
1318 | final_note(buf); | ||
1319 | } | ||
1320 | |||
1321 | void crash_save_vmcoreinfo(void) | ||
1322 | { | ||
1323 | vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); | ||
1324 | update_vmcoreinfo_note(); | ||
1325 | } | ||
1326 | |||
1327 | void vmcoreinfo_append_str(const char *fmt, ...) | ||
1328 | { | ||
1329 | va_list args; | ||
1330 | char buf[0x50]; | ||
1331 | size_t r; | ||
1332 | |||
1333 | va_start(args, fmt); | ||
1334 | r = vscnprintf(buf, sizeof(buf), fmt, args); | ||
1335 | va_end(args); | ||
1336 | |||
1337 | r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); | ||
1338 | |||
1339 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); | ||
1340 | |||
1341 | vmcoreinfo_size += r; | ||
1342 | } | ||
1343 | |||
1344 | /* | ||
1345 | * provide an empty default implementation here -- architecture | ||
1346 | * code may override this | ||
1347 | */ | ||
1348 | void __weak arch_crash_save_vmcoreinfo(void) | ||
1349 | {} | ||
1350 | |||
1351 | unsigned long __weak paddr_vmcoreinfo_note(void) | ||
1352 | { | ||
1353 | return __pa((unsigned long)(char *)&vmcoreinfo_note); | ||
1354 | } | ||
1355 | |||
1356 | static int __init crash_save_vmcoreinfo_init(void) | ||
1357 | { | ||
1358 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); | ||
1359 | VMCOREINFO_PAGESIZE(PAGE_SIZE); | ||
1360 | |||
1361 | VMCOREINFO_SYMBOL(init_uts_ns); | ||
1362 | VMCOREINFO_SYMBOL(node_online_map); | ||
1363 | #ifdef CONFIG_MMU | ||
1364 | VMCOREINFO_SYMBOL(swapper_pg_dir); | ||
1365 | #endif | ||
1366 | VMCOREINFO_SYMBOL(_stext); | ||
1367 | VMCOREINFO_SYMBOL(vmap_area_list); | ||
1368 | |||
1369 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
1370 | VMCOREINFO_SYMBOL(mem_map); | ||
1371 | VMCOREINFO_SYMBOL(contig_page_data); | ||
1372 | #endif | ||
1373 | #ifdef CONFIG_SPARSEMEM | ||
1374 | VMCOREINFO_SYMBOL(mem_section); | ||
1375 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); | ||
1376 | VMCOREINFO_STRUCT_SIZE(mem_section); | ||
1377 | VMCOREINFO_OFFSET(mem_section, section_mem_map); | ||
1378 | #endif | ||
1379 | VMCOREINFO_STRUCT_SIZE(page); | ||
1380 | VMCOREINFO_STRUCT_SIZE(pglist_data); | ||
1381 | VMCOREINFO_STRUCT_SIZE(zone); | ||
1382 | VMCOREINFO_STRUCT_SIZE(free_area); | ||
1383 | VMCOREINFO_STRUCT_SIZE(list_head); | ||
1384 | VMCOREINFO_SIZE(nodemask_t); | ||
1385 | VMCOREINFO_OFFSET(page, flags); | ||
1386 | VMCOREINFO_OFFSET(page, _count); | ||
1387 | VMCOREINFO_OFFSET(page, mapping); | ||
1388 | VMCOREINFO_OFFSET(page, lru); | ||
1389 | VMCOREINFO_OFFSET(page, _mapcount); | ||
1390 | VMCOREINFO_OFFSET(page, private); | ||
1391 | VMCOREINFO_OFFSET(pglist_data, node_zones); | ||
1392 | VMCOREINFO_OFFSET(pglist_data, nr_zones); | ||
1393 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
1394 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); | ||
1395 | #endif | ||
1396 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); | ||
1397 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); | ||
1398 | VMCOREINFO_OFFSET(pglist_data, node_id); | ||
1399 | VMCOREINFO_OFFSET(zone, free_area); | ||
1400 | VMCOREINFO_OFFSET(zone, vm_stat); | ||
1401 | VMCOREINFO_OFFSET(zone, spanned_pages); | ||
1402 | VMCOREINFO_OFFSET(free_area, free_list); | ||
1403 | VMCOREINFO_OFFSET(list_head, next); | ||
1404 | VMCOREINFO_OFFSET(list_head, prev); | ||
1405 | VMCOREINFO_OFFSET(vmap_area, va_start); | ||
1406 | VMCOREINFO_OFFSET(vmap_area, list); | ||
1407 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); | ||
1408 | log_buf_kexec_setup(); | ||
1409 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); | ||
1410 | VMCOREINFO_NUMBER(NR_FREE_PAGES); | ||
1411 | VMCOREINFO_NUMBER(PG_lru); | ||
1412 | VMCOREINFO_NUMBER(PG_private); | ||
1413 | VMCOREINFO_NUMBER(PG_swapcache); | ||
1414 | VMCOREINFO_NUMBER(PG_slab); | ||
1415 | #ifdef CONFIG_MEMORY_FAILURE | ||
1416 | VMCOREINFO_NUMBER(PG_hwpoison); | ||
1417 | #endif | ||
1418 | VMCOREINFO_NUMBER(PG_head_mask); | ||
1419 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | ||
1420 | #ifdef CONFIG_X86 | ||
1421 | VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); | ||
1422 | #endif | ||
1423 | #ifdef CONFIG_HUGETLBFS | ||
1424 | VMCOREINFO_SYMBOL(free_huge_page); | ||
1425 | #endif | ||
1426 | |||
1427 | arch_crash_save_vmcoreinfo(); | ||
1428 | update_vmcoreinfo_note(); | ||
1429 | |||
1430 | return 0; | ||
1431 | } | ||
1432 | |||
1433 | subsys_initcall(crash_save_vmcoreinfo_init); | ||
1434 | |||
1435 | /* | ||
1436 | * Move into place and start executing a preloaded standalone | ||
1437 | * executable. If nothing was preloaded return an error. | ||
1438 | */ | ||
1439 | int kernel_kexec(void) | ||
1440 | { | ||
1441 | int error = 0; | ||
1442 | |||
1443 | if (!mutex_trylock(&kexec_mutex)) | ||
1444 | return -EBUSY; | ||
1445 | if (!kexec_image) { | ||
1446 | error = -EINVAL; | ||
1447 | goto Unlock; | ||
1448 | } | ||
1449 | |||
1450 | #ifdef CONFIG_KEXEC_JUMP | ||
1451 | if (kexec_image->preserve_context) { | ||
1452 | lock_system_sleep(); | ||
1453 | pm_prepare_console(); | ||
1454 | error = freeze_processes(); | ||
1455 | if (error) { | ||
1456 | error = -EBUSY; | ||
1457 | goto Restore_console; | ||
1458 | } | ||
1459 | suspend_console(); | ||
1460 | error = dpm_suspend_start(PMSG_FREEZE); | ||
1461 | if (error) | ||
1462 | goto Resume_console; | ||
1463 | /* At this point, dpm_suspend_start() has been called, | ||
1464 | * but *not* dpm_suspend_end(). We *must* call | ||
1465 | * dpm_suspend_end() now. Otherwise, drivers for | ||
1466 | * some devices (e.g. interrupt controllers) become | ||
1467 | * desynchronized with the actual state of the | ||
1468 | * hardware at resume time, and evil weirdness ensues. | ||
1469 | */ | ||
1470 | error = dpm_suspend_end(PMSG_FREEZE); | ||
1471 | if (error) | ||
1472 | goto Resume_devices; | ||
1473 | error = disable_nonboot_cpus(); | ||
1474 | if (error) | ||
1475 | goto Enable_cpus; | ||
1476 | local_irq_disable(); | ||
1477 | error = syscore_suspend(); | ||
1478 | if (error) | ||
1479 | goto Enable_irqs; | ||
1480 | } else | ||
1481 | #endif | ||
1482 | { | ||
1483 | kexec_in_progress = true; | ||
1484 | kernel_restart_prepare(NULL); | ||
1485 | migrate_to_reboot_cpu(); | ||
1486 | |||
1487 | /* | ||
1488 | * migrate_to_reboot_cpu() disables CPU hotplug assuming that | ||
1489 | * no further code needs to use CPU hotplug (which is true in | ||
1490 | * the reboot case). However, the kexec path depends on using | ||
1491 | * CPU hotplug again; so re-enable it here. | ||
1492 | */ | ||
1493 | cpu_hotplug_enable(); | ||
1494 | pr_emerg("Starting new kernel\n"); | ||
1495 | machine_shutdown(); | ||
1496 | } | ||
1497 | |||
1498 | machine_kexec(kexec_image); | ||
1499 | |||
1500 | #ifdef CONFIG_KEXEC_JUMP | ||
1501 | if (kexec_image->preserve_context) { | ||
1502 | syscore_resume(); | ||
1503 | Enable_irqs: | ||
1504 | local_irq_enable(); | ||
1505 | Enable_cpus: | ||
1506 | enable_nonboot_cpus(); | ||
1507 | dpm_resume_start(PMSG_RESTORE); | ||
1508 | Resume_devices: | ||
1509 | dpm_resume_end(PMSG_RESTORE); | ||
1510 | Resume_console: | ||
1511 | resume_console(); | ||
1512 | thaw_processes(); | ||
1513 | Restore_console: | ||
1514 | pm_restore_console(); | ||
1515 | unlock_system_sleep(); | ||
1516 | } | ||
1517 | #endif | ||
1518 | |||
1519 | Unlock: | ||
1520 | mutex_unlock(&kexec_mutex); | ||
1521 | return error; | ||
1522 | } | ||
1523 | |||
1524 | /* | ||
1525 | * Add and remove page tables for crashkernel memory | ||
1526 | * | ||
1527 | * Provide an empty default implementation here -- architecture | ||
1528 | * code may override this | ||
1529 | */ | ||
1530 | void __weak crash_map_reserved_pages(void) | ||
1531 | {} | ||
1532 | |||
1533 | void __weak crash_unmap_reserved_pages(void) | ||
1534 | {} | ||
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c new file mode 100644 index 000000000000..6a9a3f2a0e8e --- /dev/null +++ b/kernel/kexec_file.c | |||
@@ -0,0 +1,1045 @@ | |||
1 | /* | ||
2 | * kexec: kexec_file_load system call | ||
3 | * | ||
4 | * Copyright (C) 2014 Red Hat Inc. | ||
5 | * Authors: | ||
6 | * Vivek Goyal <vgoyal@redhat.com> | ||
7 | * | ||
8 | * This source code is licensed under the GNU General Public License, | ||
9 | * Version 2. See the file COPYING for more details. | ||
10 | */ | ||
11 | |||
12 | #include <linux/capability.h> | ||
13 | #include <linux/mm.h> | ||
14 | #include <linux/file.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/kexec.h> | ||
17 | #include <linux/mutex.h> | ||
18 | #include <linux/list.h> | ||
19 | #include <crypto/hash.h> | ||
20 | #include <crypto/sha.h> | ||
21 | #include <linux/syscalls.h> | ||
22 | #include <linux/vmalloc.h> | ||
23 | #include "kexec_internal.h" | ||
24 | |||
25 | /* | ||
26 | * Declare these symbols weak so that if architecture provides a purgatory, | ||
27 | * these will be overridden. | ||
28 | */ | ||
29 | char __weak kexec_purgatory[0]; | ||
30 | size_t __weak kexec_purgatory_size = 0; | ||
31 | |||
32 | static int kexec_calculate_store_digests(struct kimage *image); | ||
33 | |||
34 | static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len) | ||
35 | { | ||
36 | struct fd f = fdget(fd); | ||
37 | int ret; | ||
38 | struct kstat stat; | ||
39 | loff_t pos; | ||
40 | ssize_t bytes = 0; | ||
41 | |||
42 | if (!f.file) | ||
43 | return -EBADF; | ||
44 | |||
45 | ret = vfs_getattr(&f.file->f_path, &stat); | ||
46 | if (ret) | ||
47 | goto out; | ||
48 | |||
49 | if (stat.size > INT_MAX) { | ||
50 | ret = -EFBIG; | ||
51 | goto out; | ||
52 | } | ||
53 | |||
54 | /* Don't hand 0 to vmalloc, it whines. */ | ||
55 | if (stat.size == 0) { | ||
56 | ret = -EINVAL; | ||
57 | goto out; | ||
58 | } | ||
59 | |||
60 | *buf = vmalloc(stat.size); | ||
61 | if (!*buf) { | ||
62 | ret = -ENOMEM; | ||
63 | goto out; | ||
64 | } | ||
65 | |||
66 | pos = 0; | ||
67 | while (pos < stat.size) { | ||
68 | bytes = kernel_read(f.file, pos, (char *)(*buf) + pos, | ||
69 | stat.size - pos); | ||
70 | if (bytes < 0) { | ||
71 | vfree(*buf); | ||
72 | ret = bytes; | ||
73 | goto out; | ||
74 | } | ||
75 | |||
76 | if (bytes == 0) | ||
77 | break; | ||
78 | pos += bytes; | ||
79 | } | ||
80 | |||
81 | if (pos != stat.size) { | ||
82 | ret = -EBADF; | ||
83 | vfree(*buf); | ||
84 | goto out; | ||
85 | } | ||
86 | |||
87 | *buf_len = pos; | ||
88 | out: | ||
89 | fdput(f); | ||
90 | return ret; | ||
91 | } | ||
92 | |||
93 | /* Architectures can provide this probe function */ | ||
94 | int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, | ||
95 | unsigned long buf_len) | ||
96 | { | ||
97 | return -ENOEXEC; | ||
98 | } | ||
99 | |||
100 | void * __weak arch_kexec_kernel_image_load(struct kimage *image) | ||
101 | { | ||
102 | return ERR_PTR(-ENOEXEC); | ||
103 | } | ||
104 | |||
105 | int __weak arch_kimage_file_post_load_cleanup(struct kimage *image) | ||
106 | { | ||
107 | return -EINVAL; | ||
108 | } | ||
109 | |||
110 | int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, | ||
111 | unsigned long buf_len) | ||
112 | { | ||
113 | return -EKEYREJECTED; | ||
114 | } | ||
115 | |||
116 | /* Apply relocations of type RELA */ | ||
117 | int __weak | ||
118 | arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | ||
119 | unsigned int relsec) | ||
120 | { | ||
121 | pr_err("RELA relocation unsupported.\n"); | ||
122 | return -ENOEXEC; | ||
123 | } | ||
124 | |||
125 | /* Apply relocations of type REL */ | ||
126 | int __weak | ||
127 | arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | ||
128 | unsigned int relsec) | ||
129 | { | ||
130 | pr_err("REL relocation unsupported.\n"); | ||
131 | return -ENOEXEC; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * Free up memory used by kernel, initrd, and command line. This is temporary | ||
136 | * memory allocation which is not needed any more after these buffers have | ||
137 | * been loaded into separate segments and have been copied elsewhere. | ||
138 | */ | ||
139 | void kimage_file_post_load_cleanup(struct kimage *image) | ||
140 | { | ||
141 | struct purgatory_info *pi = &image->purgatory_info; | ||
142 | |||
143 | vfree(image->kernel_buf); | ||
144 | image->kernel_buf = NULL; | ||
145 | |||
146 | vfree(image->initrd_buf); | ||
147 | image->initrd_buf = NULL; | ||
148 | |||
149 | kfree(image->cmdline_buf); | ||
150 | image->cmdline_buf = NULL; | ||
151 | |||
152 | vfree(pi->purgatory_buf); | ||
153 | pi->purgatory_buf = NULL; | ||
154 | |||
155 | vfree(pi->sechdrs); | ||
156 | pi->sechdrs = NULL; | ||
157 | |||
158 | /* See if architecture has anything to cleanup post load */ | ||
159 | arch_kimage_file_post_load_cleanup(image); | ||
160 | |||
161 | /* | ||
162 | * Above call should have called into bootloader to free up | ||
163 | * any data stored in kimage->image_loader_data. It should | ||
164 | * be ok now to free it up. | ||
165 | */ | ||
166 | kfree(image->image_loader_data); | ||
167 | image->image_loader_data = NULL; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * In file mode list of segments is prepared by kernel. Copy relevant | ||
172 | * data from user space, do error checking, prepare segment list | ||
173 | */ | ||
174 | static int | ||
175 | kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, | ||
176 | const char __user *cmdline_ptr, | ||
177 | unsigned long cmdline_len, unsigned flags) | ||
178 | { | ||
179 | int ret = 0; | ||
180 | void *ldata; | ||
181 | |||
182 | ret = copy_file_from_fd(kernel_fd, &image->kernel_buf, | ||
183 | &image->kernel_buf_len); | ||
184 | if (ret) | ||
185 | return ret; | ||
186 | |||
187 | /* Call arch image probe handlers */ | ||
188 | ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, | ||
189 | image->kernel_buf_len); | ||
190 | |||
191 | if (ret) | ||
192 | goto out; | ||
193 | |||
194 | #ifdef CONFIG_KEXEC_VERIFY_SIG | ||
195 | ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf, | ||
196 | image->kernel_buf_len); | ||
197 | if (ret) { | ||
198 | pr_debug("kernel signature verification failed.\n"); | ||
199 | goto out; | ||
200 | } | ||
201 | pr_debug("kernel signature verification successful.\n"); | ||
202 | #endif | ||
203 | /* It is possible that there no initramfs is being loaded */ | ||
204 | if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { | ||
205 | ret = copy_file_from_fd(initrd_fd, &image->initrd_buf, | ||
206 | &image->initrd_buf_len); | ||
207 | if (ret) | ||
208 | goto out; | ||
209 | } | ||
210 | |||
211 | if (cmdline_len) { | ||
212 | image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL); | ||
213 | if (!image->cmdline_buf) { | ||
214 | ret = -ENOMEM; | ||
215 | goto out; | ||
216 | } | ||
217 | |||
218 | ret = copy_from_user(image->cmdline_buf, cmdline_ptr, | ||
219 | cmdline_len); | ||
220 | if (ret) { | ||
221 | ret = -EFAULT; | ||
222 | goto out; | ||
223 | } | ||
224 | |||
225 | image->cmdline_buf_len = cmdline_len; | ||
226 | |||
227 | /* command line should be a string with last byte null */ | ||
228 | if (image->cmdline_buf[cmdline_len - 1] != '\0') { | ||
229 | ret = -EINVAL; | ||
230 | goto out; | ||
231 | } | ||
232 | } | ||
233 | |||
234 | /* Call arch image load handlers */ | ||
235 | ldata = arch_kexec_kernel_image_load(image); | ||
236 | |||
237 | if (IS_ERR(ldata)) { | ||
238 | ret = PTR_ERR(ldata); | ||
239 | goto out; | ||
240 | } | ||
241 | |||
242 | image->image_loader_data = ldata; | ||
243 | out: | ||
244 | /* In case of error, free up all allocated memory in this function */ | ||
245 | if (ret) | ||
246 | kimage_file_post_load_cleanup(image); | ||
247 | return ret; | ||
248 | } | ||
249 | |||
250 | static int | ||
251 | kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, | ||
252 | int initrd_fd, const char __user *cmdline_ptr, | ||
253 | unsigned long cmdline_len, unsigned long flags) | ||
254 | { | ||
255 | int ret; | ||
256 | struct kimage *image; | ||
257 | bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH; | ||
258 | |||
259 | image = do_kimage_alloc_init(); | ||
260 | if (!image) | ||
261 | return -ENOMEM; | ||
262 | |||
263 | image->file_mode = 1; | ||
264 | |||
265 | if (kexec_on_panic) { | ||
266 | /* Enable special crash kernel control page alloc policy. */ | ||
267 | image->control_page = crashk_res.start; | ||
268 | image->type = KEXEC_TYPE_CRASH; | ||
269 | } | ||
270 | |||
271 | ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, | ||
272 | cmdline_ptr, cmdline_len, flags); | ||
273 | if (ret) | ||
274 | goto out_free_image; | ||
275 | |||
276 | ret = sanity_check_segment_list(image); | ||
277 | if (ret) | ||
278 | goto out_free_post_load_bufs; | ||
279 | |||
280 | ret = -ENOMEM; | ||
281 | image->control_code_page = kimage_alloc_control_pages(image, | ||
282 | get_order(KEXEC_CONTROL_PAGE_SIZE)); | ||
283 | if (!image->control_code_page) { | ||
284 | pr_err("Could not allocate control_code_buffer\n"); | ||
285 | goto out_free_post_load_bufs; | ||
286 | } | ||
287 | |||
288 | if (!kexec_on_panic) { | ||
289 | image->swap_page = kimage_alloc_control_pages(image, 0); | ||
290 | if (!image->swap_page) { | ||
291 | pr_err("Could not allocate swap buffer\n"); | ||
292 | goto out_free_control_pages; | ||
293 | } | ||
294 | } | ||
295 | |||
296 | *rimage = image; | ||
297 | return 0; | ||
298 | out_free_control_pages: | ||
299 | kimage_free_page_list(&image->control_pages); | ||
300 | out_free_post_load_bufs: | ||
301 | kimage_file_post_load_cleanup(image); | ||
302 | out_free_image: | ||
303 | kfree(image); | ||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, | ||
308 | unsigned long, cmdline_len, const char __user *, cmdline_ptr, | ||
309 | unsigned long, flags) | ||
310 | { | ||
311 | int ret = 0, i; | ||
312 | struct kimage **dest_image, *image; | ||
313 | |||
314 | /* We only trust the superuser with rebooting the system. */ | ||
315 | if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) | ||
316 | return -EPERM; | ||
317 | |||
318 | /* Make sure we have a legal set of flags */ | ||
319 | if (flags != (flags & KEXEC_FILE_FLAGS)) | ||
320 | return -EINVAL; | ||
321 | |||
322 | image = NULL; | ||
323 | |||
324 | if (!mutex_trylock(&kexec_mutex)) | ||
325 | return -EBUSY; | ||
326 | |||
327 | dest_image = &kexec_image; | ||
328 | if (flags & KEXEC_FILE_ON_CRASH) | ||
329 | dest_image = &kexec_crash_image; | ||
330 | |||
331 | if (flags & KEXEC_FILE_UNLOAD) | ||
332 | goto exchange; | ||
333 | |||
334 | /* | ||
335 | * In case of crash, new kernel gets loaded in reserved region. It is | ||
336 | * same memory where old crash kernel might be loaded. Free any | ||
337 | * current crash dump kernel before we corrupt it. | ||
338 | */ | ||
339 | if (flags & KEXEC_FILE_ON_CRASH) | ||
340 | kimage_free(xchg(&kexec_crash_image, NULL)); | ||
341 | |||
342 | ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr, | ||
343 | cmdline_len, flags); | ||
344 | if (ret) | ||
345 | goto out; | ||
346 | |||
347 | ret = machine_kexec_prepare(image); | ||
348 | if (ret) | ||
349 | goto out; | ||
350 | |||
351 | ret = kexec_calculate_store_digests(image); | ||
352 | if (ret) | ||
353 | goto out; | ||
354 | |||
355 | for (i = 0; i < image->nr_segments; i++) { | ||
356 | struct kexec_segment *ksegment; | ||
357 | |||
358 | ksegment = &image->segment[i]; | ||
359 | pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", | ||
360 | i, ksegment->buf, ksegment->bufsz, ksegment->mem, | ||
361 | ksegment->memsz); | ||
362 | |||
363 | ret = kimage_load_segment(image, &image->segment[i]); | ||
364 | if (ret) | ||
365 | goto out; | ||
366 | } | ||
367 | |||
368 | kimage_terminate(image); | ||
369 | |||
370 | /* | ||
371 | * Free up any temporary buffers allocated which are not needed | ||
372 | * after image has been loaded | ||
373 | */ | ||
374 | kimage_file_post_load_cleanup(image); | ||
375 | exchange: | ||
376 | image = xchg(dest_image, image); | ||
377 | out: | ||
378 | mutex_unlock(&kexec_mutex); | ||
379 | kimage_free(image); | ||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | static int locate_mem_hole_top_down(unsigned long start, unsigned long end, | ||
384 | struct kexec_buf *kbuf) | ||
385 | { | ||
386 | struct kimage *image = kbuf->image; | ||
387 | unsigned long temp_start, temp_end; | ||
388 | |||
389 | temp_end = min(end, kbuf->buf_max); | ||
390 | temp_start = temp_end - kbuf->memsz; | ||
391 | |||
392 | do { | ||
393 | /* align down start */ | ||
394 | temp_start = temp_start & (~(kbuf->buf_align - 1)); | ||
395 | |||
396 | if (temp_start < start || temp_start < kbuf->buf_min) | ||
397 | return 0; | ||
398 | |||
399 | temp_end = temp_start + kbuf->memsz - 1; | ||
400 | |||
401 | /* | ||
402 | * Make sure this does not conflict with any of existing | ||
403 | * segments | ||
404 | */ | ||
405 | if (kimage_is_destination_range(image, temp_start, temp_end)) { | ||
406 | temp_start = temp_start - PAGE_SIZE; | ||
407 | continue; | ||
408 | } | ||
409 | |||
410 | /* We found a suitable memory range */ | ||
411 | break; | ||
412 | } while (1); | ||
413 | |||
414 | /* If we are here, we found a suitable memory range */ | ||
415 | kbuf->mem = temp_start; | ||
416 | |||
417 | /* Success, stop navigating through remaining System RAM ranges */ | ||
418 | return 1; | ||
419 | } | ||
420 | |||
421 | static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, | ||
422 | struct kexec_buf *kbuf) | ||
423 | { | ||
424 | struct kimage *image = kbuf->image; | ||
425 | unsigned long temp_start, temp_end; | ||
426 | |||
427 | temp_start = max(start, kbuf->buf_min); | ||
428 | |||
429 | do { | ||
430 | temp_start = ALIGN(temp_start, kbuf->buf_align); | ||
431 | temp_end = temp_start + kbuf->memsz - 1; | ||
432 | |||
433 | if (temp_end > end || temp_end > kbuf->buf_max) | ||
434 | return 0; | ||
435 | /* | ||
436 | * Make sure this does not conflict with any of existing | ||
437 | * segments | ||
438 | */ | ||
439 | if (kimage_is_destination_range(image, temp_start, temp_end)) { | ||
440 | temp_start = temp_start + PAGE_SIZE; | ||
441 | continue; | ||
442 | } | ||
443 | |||
444 | /* We found a suitable memory range */ | ||
445 | break; | ||
446 | } while (1); | ||
447 | |||
448 | /* If we are here, we found a suitable memory range */ | ||
449 | kbuf->mem = temp_start; | ||
450 | |||
451 | /* Success, stop navigating through remaining System RAM ranges */ | ||
452 | return 1; | ||
453 | } | ||
454 | |||
455 | static int locate_mem_hole_callback(u64 start, u64 end, void *arg) | ||
456 | { | ||
457 | struct kexec_buf *kbuf = (struct kexec_buf *)arg; | ||
458 | unsigned long sz = end - start + 1; | ||
459 | |||
460 | /* Returning 0 will take to next memory range */ | ||
461 | if (sz < kbuf->memsz) | ||
462 | return 0; | ||
463 | |||
464 | if (end < kbuf->buf_min || start > kbuf->buf_max) | ||
465 | return 0; | ||
466 | |||
467 | /* | ||
468 | * Allocate memory top down with-in ram range. Otherwise bottom up | ||
469 | * allocation. | ||
470 | */ | ||
471 | if (kbuf->top_down) | ||
472 | return locate_mem_hole_top_down(start, end, kbuf); | ||
473 | return locate_mem_hole_bottom_up(start, end, kbuf); | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Helper function for placing a buffer in a kexec segment. This assumes | ||
478 | * that kexec_mutex is held. | ||
479 | */ | ||
480 | int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, | ||
481 | unsigned long memsz, unsigned long buf_align, | ||
482 | unsigned long buf_min, unsigned long buf_max, | ||
483 | bool top_down, unsigned long *load_addr) | ||
484 | { | ||
485 | |||
486 | struct kexec_segment *ksegment; | ||
487 | struct kexec_buf buf, *kbuf; | ||
488 | int ret; | ||
489 | |||
490 | /* Currently adding segment this way is allowed only in file mode */ | ||
491 | if (!image->file_mode) | ||
492 | return -EINVAL; | ||
493 | |||
494 | if (image->nr_segments >= KEXEC_SEGMENT_MAX) | ||
495 | return -EINVAL; | ||
496 | |||
497 | /* | ||
498 | * Make sure we are not trying to add buffer after allocating | ||
499 | * control pages. All segments need to be placed first before | ||
500 | * any control pages are allocated. As control page allocation | ||
501 | * logic goes through list of segments to make sure there are | ||
502 | * no destination overlaps. | ||
503 | */ | ||
504 | if (!list_empty(&image->control_pages)) { | ||
505 | WARN_ON(1); | ||
506 | return -EINVAL; | ||
507 | } | ||
508 | |||
509 | memset(&buf, 0, sizeof(struct kexec_buf)); | ||
510 | kbuf = &buf; | ||
511 | kbuf->image = image; | ||
512 | kbuf->buffer = buffer; | ||
513 | kbuf->bufsz = bufsz; | ||
514 | |||
515 | kbuf->memsz = ALIGN(memsz, PAGE_SIZE); | ||
516 | kbuf->buf_align = max(buf_align, PAGE_SIZE); | ||
517 | kbuf->buf_min = buf_min; | ||
518 | kbuf->buf_max = buf_max; | ||
519 | kbuf->top_down = top_down; | ||
520 | |||
521 | /* Walk the RAM ranges and allocate a suitable range for the buffer */ | ||
522 | if (image->type == KEXEC_TYPE_CRASH) | ||
523 | ret = walk_iomem_res("Crash kernel", | ||
524 | IORESOURCE_MEM | IORESOURCE_BUSY, | ||
525 | crashk_res.start, crashk_res.end, kbuf, | ||
526 | locate_mem_hole_callback); | ||
527 | else | ||
528 | ret = walk_system_ram_res(0, -1, kbuf, | ||
529 | locate_mem_hole_callback); | ||
530 | if (ret != 1) { | ||
531 | /* A suitable memory range could not be found for buffer */ | ||
532 | return -EADDRNOTAVAIL; | ||
533 | } | ||
534 | |||
535 | /* Found a suitable memory range */ | ||
536 | ksegment = &image->segment[image->nr_segments]; | ||
537 | ksegment->kbuf = kbuf->buffer; | ||
538 | ksegment->bufsz = kbuf->bufsz; | ||
539 | ksegment->mem = kbuf->mem; | ||
540 | ksegment->memsz = kbuf->memsz; | ||
541 | image->nr_segments++; | ||
542 | *load_addr = ksegment->mem; | ||
543 | return 0; | ||
544 | } | ||
545 | |||
546 | /* Calculate and store the digest of segments */ | ||
547 | static int kexec_calculate_store_digests(struct kimage *image) | ||
548 | { | ||
549 | struct crypto_shash *tfm; | ||
550 | struct shash_desc *desc; | ||
551 | int ret = 0, i, j, zero_buf_sz, sha_region_sz; | ||
552 | size_t desc_size, nullsz; | ||
553 | char *digest; | ||
554 | void *zero_buf; | ||
555 | struct kexec_sha_region *sha_regions; | ||
556 | struct purgatory_info *pi = &image->purgatory_info; | ||
557 | |||
558 | zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT); | ||
559 | zero_buf_sz = PAGE_SIZE; | ||
560 | |||
561 | tfm = crypto_alloc_shash("sha256", 0, 0); | ||
562 | if (IS_ERR(tfm)) { | ||
563 | ret = PTR_ERR(tfm); | ||
564 | goto out; | ||
565 | } | ||
566 | |||
567 | desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); | ||
568 | desc = kzalloc(desc_size, GFP_KERNEL); | ||
569 | if (!desc) { | ||
570 | ret = -ENOMEM; | ||
571 | goto out_free_tfm; | ||
572 | } | ||
573 | |||
574 | sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); | ||
575 | sha_regions = vzalloc(sha_region_sz); | ||
576 | if (!sha_regions) | ||
577 | goto out_free_desc; | ||
578 | |||
579 | desc->tfm = tfm; | ||
580 | desc->flags = 0; | ||
581 | |||
582 | ret = crypto_shash_init(desc); | ||
583 | if (ret < 0) | ||
584 | goto out_free_sha_regions; | ||
585 | |||
586 | digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); | ||
587 | if (!digest) { | ||
588 | ret = -ENOMEM; | ||
589 | goto out_free_sha_regions; | ||
590 | } | ||
591 | |||
592 | for (j = i = 0; i < image->nr_segments; i++) { | ||
593 | struct kexec_segment *ksegment; | ||
594 | |||
595 | ksegment = &image->segment[i]; | ||
596 | /* | ||
597 | * Skip purgatory as it will be modified once we put digest | ||
598 | * info in purgatory. | ||
599 | */ | ||
600 | if (ksegment->kbuf == pi->purgatory_buf) | ||
601 | continue; | ||
602 | |||
603 | ret = crypto_shash_update(desc, ksegment->kbuf, | ||
604 | ksegment->bufsz); | ||
605 | if (ret) | ||
606 | break; | ||
607 | |||
608 | /* | ||
609 | * Assume rest of the buffer is filled with zero and | ||
610 | * update digest accordingly. | ||
611 | */ | ||
612 | nullsz = ksegment->memsz - ksegment->bufsz; | ||
613 | while (nullsz) { | ||
614 | unsigned long bytes = nullsz; | ||
615 | |||
616 | if (bytes > zero_buf_sz) | ||
617 | bytes = zero_buf_sz; | ||
618 | ret = crypto_shash_update(desc, zero_buf, bytes); | ||
619 | if (ret) | ||
620 | break; | ||
621 | nullsz -= bytes; | ||
622 | } | ||
623 | |||
624 | if (ret) | ||
625 | break; | ||
626 | |||
627 | sha_regions[j].start = ksegment->mem; | ||
628 | sha_regions[j].len = ksegment->memsz; | ||
629 | j++; | ||
630 | } | ||
631 | |||
632 | if (!ret) { | ||
633 | ret = crypto_shash_final(desc, digest); | ||
634 | if (ret) | ||
635 | goto out_free_digest; | ||
636 | ret = kexec_purgatory_get_set_symbol(image, "sha_regions", | ||
637 | sha_regions, sha_region_sz, 0); | ||
638 | if (ret) | ||
639 | goto out_free_digest; | ||
640 | |||
641 | ret = kexec_purgatory_get_set_symbol(image, "sha256_digest", | ||
642 | digest, SHA256_DIGEST_SIZE, 0); | ||
643 | if (ret) | ||
644 | goto out_free_digest; | ||
645 | } | ||
646 | |||
647 | out_free_digest: | ||
648 | kfree(digest); | ||
649 | out_free_sha_regions: | ||
650 | vfree(sha_regions); | ||
651 | out_free_desc: | ||
652 | kfree(desc); | ||
653 | out_free_tfm: | ||
654 | kfree(tfm); | ||
655 | out: | ||
656 | return ret; | ||
657 | } | ||
658 | |||
659 | /* Actually load purgatory. Lot of code taken from kexec-tools */ | ||
660 | static int __kexec_load_purgatory(struct kimage *image, unsigned long min, | ||
661 | unsigned long max, int top_down) | ||
662 | { | ||
663 | struct purgatory_info *pi = &image->purgatory_info; | ||
664 | unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad; | ||
665 | unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset; | ||
666 | unsigned char *buf_addr, *src; | ||
667 | int i, ret = 0, entry_sidx = -1; | ||
668 | const Elf_Shdr *sechdrs_c; | ||
669 | Elf_Shdr *sechdrs = NULL; | ||
670 | void *purgatory_buf = NULL; | ||
671 | |||
672 | /* | ||
673 | * sechdrs_c points to section headers in purgatory and are read | ||
674 | * only. No modifications allowed. | ||
675 | */ | ||
676 | sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff; | ||
677 | |||
678 | /* | ||
679 | * We can not modify sechdrs_c[] and its fields. It is read only. | ||
680 | * Copy it over to a local copy where one can store some temporary | ||
681 | * data and free it at the end. We need to modify ->sh_addr and | ||
682 | * ->sh_offset fields to keep track of permanent and temporary | ||
683 | * locations of sections. | ||
684 | */ | ||
685 | sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr)); | ||
686 | if (!sechdrs) | ||
687 | return -ENOMEM; | ||
688 | |||
689 | memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr)); | ||
690 | |||
691 | /* | ||
692 | * We seem to have multiple copies of sections. First copy is which | ||
693 | * is embedded in kernel in read only section. Some of these sections | ||
694 | * will be copied to a temporary buffer and relocated. And these | ||
695 | * sections will finally be copied to their final destination at | ||
696 | * segment load time. | ||
697 | * | ||
698 | * Use ->sh_offset to reflect section address in memory. It will | ||
699 | * point to original read only copy if section is not allocatable. | ||
700 | * Otherwise it will point to temporary copy which will be relocated. | ||
701 | * | ||
702 | * Use ->sh_addr to contain final address of the section where it | ||
703 | * will go during execution time. | ||
704 | */ | ||
705 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
706 | if (sechdrs[i].sh_type == SHT_NOBITS) | ||
707 | continue; | ||
708 | |||
709 | sechdrs[i].sh_offset = (unsigned long)pi->ehdr + | ||
710 | sechdrs[i].sh_offset; | ||
711 | } | ||
712 | |||
713 | /* | ||
714 | * Identify entry point section and make entry relative to section | ||
715 | * start. | ||
716 | */ | ||
717 | entry = pi->ehdr->e_entry; | ||
718 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
719 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | ||
720 | continue; | ||
721 | |||
722 | if (!(sechdrs[i].sh_flags & SHF_EXECINSTR)) | ||
723 | continue; | ||
724 | |||
725 | /* Make entry section relative */ | ||
726 | if (sechdrs[i].sh_addr <= pi->ehdr->e_entry && | ||
727 | ((sechdrs[i].sh_addr + sechdrs[i].sh_size) > | ||
728 | pi->ehdr->e_entry)) { | ||
729 | entry_sidx = i; | ||
730 | entry -= sechdrs[i].sh_addr; | ||
731 | break; | ||
732 | } | ||
733 | } | ||
734 | |||
735 | /* Determine how much memory is needed to load relocatable object. */ | ||
736 | buf_align = 1; | ||
737 | bss_align = 1; | ||
738 | buf_sz = 0; | ||
739 | bss_sz = 0; | ||
740 | |||
741 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
742 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | ||
743 | continue; | ||
744 | |||
745 | align = sechdrs[i].sh_addralign; | ||
746 | if (sechdrs[i].sh_type != SHT_NOBITS) { | ||
747 | if (buf_align < align) | ||
748 | buf_align = align; | ||
749 | buf_sz = ALIGN(buf_sz, align); | ||
750 | buf_sz += sechdrs[i].sh_size; | ||
751 | } else { | ||
752 | /* bss section */ | ||
753 | if (bss_align < align) | ||
754 | bss_align = align; | ||
755 | bss_sz = ALIGN(bss_sz, align); | ||
756 | bss_sz += sechdrs[i].sh_size; | ||
757 | } | ||
758 | } | ||
759 | |||
760 | /* Determine the bss padding required to align bss properly */ | ||
761 | bss_pad = 0; | ||
762 | if (buf_sz & (bss_align - 1)) | ||
763 | bss_pad = bss_align - (buf_sz & (bss_align - 1)); | ||
764 | |||
765 | memsz = buf_sz + bss_pad + bss_sz; | ||
766 | |||
767 | /* Allocate buffer for purgatory */ | ||
768 | purgatory_buf = vzalloc(buf_sz); | ||
769 | if (!purgatory_buf) { | ||
770 | ret = -ENOMEM; | ||
771 | goto out; | ||
772 | } | ||
773 | |||
774 | if (buf_align < bss_align) | ||
775 | buf_align = bss_align; | ||
776 | |||
777 | /* Add buffer to segment list */ | ||
778 | ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz, | ||
779 | buf_align, min, max, top_down, | ||
780 | &pi->purgatory_load_addr); | ||
781 | if (ret) | ||
782 | goto out; | ||
783 | |||
784 | /* Load SHF_ALLOC sections */ | ||
785 | buf_addr = purgatory_buf; | ||
786 | load_addr = curr_load_addr = pi->purgatory_load_addr; | ||
787 | bss_addr = load_addr + buf_sz + bss_pad; | ||
788 | |||
789 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
790 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | ||
791 | continue; | ||
792 | |||
793 | align = sechdrs[i].sh_addralign; | ||
794 | if (sechdrs[i].sh_type != SHT_NOBITS) { | ||
795 | curr_load_addr = ALIGN(curr_load_addr, align); | ||
796 | offset = curr_load_addr - load_addr; | ||
797 | /* We already modifed ->sh_offset to keep src addr */ | ||
798 | src = (char *) sechdrs[i].sh_offset; | ||
799 | memcpy(buf_addr + offset, src, sechdrs[i].sh_size); | ||
800 | |||
801 | /* Store load address and source address of section */ | ||
802 | sechdrs[i].sh_addr = curr_load_addr; | ||
803 | |||
804 | /* | ||
805 | * This section got copied to temporary buffer. Update | ||
806 | * ->sh_offset accordingly. | ||
807 | */ | ||
808 | sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset); | ||
809 | |||
810 | /* Advance to the next address */ | ||
811 | curr_load_addr += sechdrs[i].sh_size; | ||
812 | } else { | ||
813 | bss_addr = ALIGN(bss_addr, align); | ||
814 | sechdrs[i].sh_addr = bss_addr; | ||
815 | bss_addr += sechdrs[i].sh_size; | ||
816 | } | ||
817 | } | ||
818 | |||
819 | /* Update entry point based on load address of text section */ | ||
820 | if (entry_sidx >= 0) | ||
821 | entry += sechdrs[entry_sidx].sh_addr; | ||
822 | |||
823 | /* Make kernel jump to purgatory after shutdown */ | ||
824 | image->start = entry; | ||
825 | |||
826 | /* Used later to get/set symbol values */ | ||
827 | pi->sechdrs = sechdrs; | ||
828 | |||
829 | /* | ||
830 | * Used later to identify which section is purgatory and skip it | ||
831 | * from checksumming. | ||
832 | */ | ||
833 | pi->purgatory_buf = purgatory_buf; | ||
834 | return ret; | ||
835 | out: | ||
836 | vfree(sechdrs); | ||
837 | vfree(purgatory_buf); | ||
838 | return ret; | ||
839 | } | ||
840 | |||
841 | static int kexec_apply_relocations(struct kimage *image) | ||
842 | { | ||
843 | int i, ret; | ||
844 | struct purgatory_info *pi = &image->purgatory_info; | ||
845 | Elf_Shdr *sechdrs = pi->sechdrs; | ||
846 | |||
847 | /* Apply relocations */ | ||
848 | for (i = 0; i < pi->ehdr->e_shnum; i++) { | ||
849 | Elf_Shdr *section, *symtab; | ||
850 | |||
851 | if (sechdrs[i].sh_type != SHT_RELA && | ||
852 | sechdrs[i].sh_type != SHT_REL) | ||
853 | continue; | ||
854 | |||
855 | /* | ||
856 | * For section of type SHT_RELA/SHT_REL, | ||
857 | * ->sh_link contains section header index of associated | ||
858 | * symbol table. And ->sh_info contains section header | ||
859 | * index of section to which relocations apply. | ||
860 | */ | ||
861 | if (sechdrs[i].sh_info >= pi->ehdr->e_shnum || | ||
862 | sechdrs[i].sh_link >= pi->ehdr->e_shnum) | ||
863 | return -ENOEXEC; | ||
864 | |||
865 | section = &sechdrs[sechdrs[i].sh_info]; | ||
866 | symtab = &sechdrs[sechdrs[i].sh_link]; | ||
867 | |||
868 | if (!(section->sh_flags & SHF_ALLOC)) | ||
869 | continue; | ||
870 | |||
871 | /* | ||
872 | * symtab->sh_link contain section header index of associated | ||
873 | * string table. | ||
874 | */ | ||
875 | if (symtab->sh_link >= pi->ehdr->e_shnum) | ||
876 | /* Invalid section number? */ | ||
877 | continue; | ||
878 | |||
879 | /* | ||
880 | * Respective architecture needs to provide support for applying | ||
881 | * relocations of type SHT_RELA/SHT_REL. | ||
882 | */ | ||
883 | if (sechdrs[i].sh_type == SHT_RELA) | ||
884 | ret = arch_kexec_apply_relocations_add(pi->ehdr, | ||
885 | sechdrs, i); | ||
886 | else if (sechdrs[i].sh_type == SHT_REL) | ||
887 | ret = arch_kexec_apply_relocations(pi->ehdr, | ||
888 | sechdrs, i); | ||
889 | if (ret) | ||
890 | return ret; | ||
891 | } | ||
892 | |||
893 | return 0; | ||
894 | } | ||
895 | |||
896 | /* Load relocatable purgatory object and relocate it appropriately */ | ||
897 | int kexec_load_purgatory(struct kimage *image, unsigned long min, | ||
898 | unsigned long max, int top_down, | ||
899 | unsigned long *load_addr) | ||
900 | { | ||
901 | struct purgatory_info *pi = &image->purgatory_info; | ||
902 | int ret; | ||
903 | |||
904 | if (kexec_purgatory_size <= 0) | ||
905 | return -EINVAL; | ||
906 | |||
907 | if (kexec_purgatory_size < sizeof(Elf_Ehdr)) | ||
908 | return -ENOEXEC; | ||
909 | |||
910 | pi->ehdr = (Elf_Ehdr *)kexec_purgatory; | ||
911 | |||
912 | if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0 | ||
913 | || pi->ehdr->e_type != ET_REL | ||
914 | || !elf_check_arch(pi->ehdr) | ||
915 | || pi->ehdr->e_shentsize != sizeof(Elf_Shdr)) | ||
916 | return -ENOEXEC; | ||
917 | |||
918 | if (pi->ehdr->e_shoff >= kexec_purgatory_size | ||
919 | || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) > | ||
920 | kexec_purgatory_size - pi->ehdr->e_shoff)) | ||
921 | return -ENOEXEC; | ||
922 | |||
923 | ret = __kexec_load_purgatory(image, min, max, top_down); | ||
924 | if (ret) | ||
925 | return ret; | ||
926 | |||
927 | ret = kexec_apply_relocations(image); | ||
928 | if (ret) | ||
929 | goto out; | ||
930 | |||
931 | *load_addr = pi->purgatory_load_addr; | ||
932 | return 0; | ||
933 | out: | ||
934 | vfree(pi->sechdrs); | ||
935 | vfree(pi->purgatory_buf); | ||
936 | return ret; | ||
937 | } | ||
938 | |||
939 | static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi, | ||
940 | const char *name) | ||
941 | { | ||
942 | Elf_Sym *syms; | ||
943 | Elf_Shdr *sechdrs; | ||
944 | Elf_Ehdr *ehdr; | ||
945 | int i, k; | ||
946 | const char *strtab; | ||
947 | |||
948 | if (!pi->sechdrs || !pi->ehdr) | ||
949 | return NULL; | ||
950 | |||
951 | sechdrs = pi->sechdrs; | ||
952 | ehdr = pi->ehdr; | ||
953 | |||
954 | for (i = 0; i < ehdr->e_shnum; i++) { | ||
955 | if (sechdrs[i].sh_type != SHT_SYMTAB) | ||
956 | continue; | ||
957 | |||
958 | if (sechdrs[i].sh_link >= ehdr->e_shnum) | ||
959 | /* Invalid strtab section number */ | ||
960 | continue; | ||
961 | strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset; | ||
962 | syms = (Elf_Sym *)sechdrs[i].sh_offset; | ||
963 | |||
964 | /* Go through symbols for a match */ | ||
965 | for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { | ||
966 | if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) | ||
967 | continue; | ||
968 | |||
969 | if (strcmp(strtab + syms[k].st_name, name) != 0) | ||
970 | continue; | ||
971 | |||
972 | if (syms[k].st_shndx == SHN_UNDEF || | ||
973 | syms[k].st_shndx >= ehdr->e_shnum) { | ||
974 | pr_debug("Symbol: %s has bad section index %d.\n", | ||
975 | name, syms[k].st_shndx); | ||
976 | return NULL; | ||
977 | } | ||
978 | |||
979 | /* Found the symbol we are looking for */ | ||
980 | return &syms[k]; | ||
981 | } | ||
982 | } | ||
983 | |||
984 | return NULL; | ||
985 | } | ||
986 | |||
987 | void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name) | ||
988 | { | ||
989 | struct purgatory_info *pi = &image->purgatory_info; | ||
990 | Elf_Sym *sym; | ||
991 | Elf_Shdr *sechdr; | ||
992 | |||
993 | sym = kexec_purgatory_find_symbol(pi, name); | ||
994 | if (!sym) | ||
995 | return ERR_PTR(-EINVAL); | ||
996 | |||
997 | sechdr = &pi->sechdrs[sym->st_shndx]; | ||
998 | |||
999 | /* | ||
1000 | * Returns the address where symbol will finally be loaded after | ||
1001 | * kexec_load_segment() | ||
1002 | */ | ||
1003 | return (void *)(sechdr->sh_addr + sym->st_value); | ||
1004 | } | ||
1005 | |||
1006 | /* | ||
1007 | * Get or set value of a symbol. If "get_value" is true, symbol value is | ||
1008 | * returned in buf otherwise symbol value is set based on value in buf. | ||
1009 | */ | ||
1010 | int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, | ||
1011 | void *buf, unsigned int size, bool get_value) | ||
1012 | { | ||
1013 | Elf_Sym *sym; | ||
1014 | Elf_Shdr *sechdrs; | ||
1015 | struct purgatory_info *pi = &image->purgatory_info; | ||
1016 | char *sym_buf; | ||
1017 | |||
1018 | sym = kexec_purgatory_find_symbol(pi, name); | ||
1019 | if (!sym) | ||
1020 | return -EINVAL; | ||
1021 | |||
1022 | if (sym->st_size != size) { | ||
1023 | pr_err("symbol %s size mismatch: expected %lu actual %u\n", | ||
1024 | name, (unsigned long)sym->st_size, size); | ||
1025 | return -EINVAL; | ||
1026 | } | ||
1027 | |||
1028 | sechdrs = pi->sechdrs; | ||
1029 | |||
1030 | if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) { | ||
1031 | pr_err("symbol %s is in a bss section. Cannot %s\n", name, | ||
1032 | get_value ? "get" : "set"); | ||
1033 | return -EINVAL; | ||
1034 | } | ||
1035 | |||
1036 | sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset + | ||
1037 | sym->st_value; | ||
1038 | |||
1039 | if (get_value) | ||
1040 | memcpy((void *)buf, sym_buf, size); | ||
1041 | else | ||
1042 | memcpy((void *)sym_buf, buf, size); | ||
1043 | |||
1044 | return 0; | ||
1045 | } | ||
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h new file mode 100644 index 000000000000..e4392a698ad4 --- /dev/null +++ b/kernel/kexec_internal.h | |||
@@ -0,0 +1,22 @@ | |||
1 | #ifndef LINUX_KEXEC_INTERNAL_H | ||
2 | #define LINUX_KEXEC_INTERNAL_H | ||
3 | |||
4 | #include <linux/kexec.h> | ||
5 | |||
6 | struct kimage *do_kimage_alloc_init(void); | ||
7 | int sanity_check_segment_list(struct kimage *image); | ||
8 | void kimage_free_page_list(struct list_head *list); | ||
9 | void kimage_free(struct kimage *image); | ||
10 | int kimage_load_segment(struct kimage *image, struct kexec_segment *segment); | ||
11 | void kimage_terminate(struct kimage *image); | ||
12 | int kimage_is_destination_range(struct kimage *image, | ||
13 | unsigned long start, unsigned long end); | ||
14 | |||
15 | extern struct mutex kexec_mutex; | ||
16 | |||
17 | #ifdef CONFIG_KEXEC_FILE | ||
18 | void kimage_file_post_load_cleanup(struct kimage *image); | ||
19 | #else /* CONFIG_KEXEC_FILE */ | ||
20 | static inline void kimage_file_post_load_cleanup(struct kimage *image) { } | ||
21 | #endif /* CONFIG_KEXEC_FILE */ | ||
22 | #endif /* LINUX_KEXEC_INTERNAL_H */ | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 2777f40a9c7b..da98d0593de2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -45,8 +45,6 @@ | |||
45 | 45 | ||
46 | extern int max_threads; | 46 | extern int max_threads; |
47 | 47 | ||
48 | static struct workqueue_struct *khelper_wq; | ||
49 | |||
50 | #define CAP_BSET (void *)1 | 48 | #define CAP_BSET (void *)1 |
51 | #define CAP_PI (void *)2 | 49 | #define CAP_PI (void *)2 |
52 | 50 | ||
@@ -114,10 +112,11 @@ out: | |||
114 | * @...: arguments as specified in the format string | 112 | * @...: arguments as specified in the format string |
115 | * | 113 | * |
116 | * Load a module using the user mode module loader. The function returns | 114 | * Load a module using the user mode module loader. The function returns |
117 | * zero on success or a negative errno code on failure. Note that a | 115 | * zero on success or a negative errno code or positive exit code from |
118 | * successful module load does not mean the module did not then unload | 116 | * "modprobe" on failure. Note that a successful module load does not mean |
119 | * and exit on an error of its own. Callers must check that the service | 117 | * the module did not then unload and exit on an error of its own. Callers |
120 | * they requested is now available not blindly invoke it. | 118 | * must check that the service they requested is now available not blindly |
119 | * invoke it. | ||
121 | * | 120 | * |
122 | * If module auto-loading support is disabled then this function | 121 | * If module auto-loading support is disabled then this function |
123 | * becomes a no-operation. | 122 | * becomes a no-operation. |
@@ -213,7 +212,7 @@ static void umh_complete(struct subprocess_info *sub_info) | |||
213 | /* | 212 | /* |
214 | * This is the task which runs the usermode application | 213 | * This is the task which runs the usermode application |
215 | */ | 214 | */ |
216 | static int ____call_usermodehelper(void *data) | 215 | static int call_usermodehelper_exec_async(void *data) |
217 | { | 216 | { |
218 | struct subprocess_info *sub_info = data; | 217 | struct subprocess_info *sub_info = data; |
219 | struct cred *new; | 218 | struct cred *new; |
@@ -223,12 +222,9 @@ static int ____call_usermodehelper(void *data) | |||
223 | flush_signal_handlers(current, 1); | 222 | flush_signal_handlers(current, 1); |
224 | spin_unlock_irq(¤t->sighand->siglock); | 223 | spin_unlock_irq(¤t->sighand->siglock); |
225 | 224 | ||
226 | /* We can run anywhere, unlike our parent keventd(). */ | ||
227 | set_cpus_allowed_ptr(current, cpu_all_mask); | ||
228 | |||
229 | /* | 225 | /* |
230 | * Our parent is keventd, which runs with elevated scheduling priority. | 226 | * Our parent (unbound workqueue) runs with elevated scheduling |
231 | * Avoid propagating that into the userspace child. | 227 | * priority. Avoid propagating that into the userspace child. |
232 | */ | 228 | */ |
233 | set_user_nice(current, 0); | 229 | set_user_nice(current, 0); |
234 | 230 | ||
@@ -258,7 +254,10 @@ static int ____call_usermodehelper(void *data) | |||
258 | (const char __user *const __user *)sub_info->envp); | 254 | (const char __user *const __user *)sub_info->envp); |
259 | out: | 255 | out: |
260 | sub_info->retval = retval; | 256 | sub_info->retval = retval; |
261 | /* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */ | 257 | /* |
258 | * call_usermodehelper_exec_sync() will call umh_complete | ||
259 | * if UHM_WAIT_PROC. | ||
260 | */ | ||
262 | if (!(sub_info->wait & UMH_WAIT_PROC)) | 261 | if (!(sub_info->wait & UMH_WAIT_PROC)) |
263 | umh_complete(sub_info); | 262 | umh_complete(sub_info); |
264 | if (!retval) | 263 | if (!retval) |
@@ -266,15 +265,14 @@ out: | |||
266 | do_exit(0); | 265 | do_exit(0); |
267 | } | 266 | } |
268 | 267 | ||
269 | /* Keventd can't block, but this (a child) can. */ | 268 | /* Handles UMH_WAIT_PROC. */ |
270 | static int wait_for_helper(void *data) | 269 | static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) |
271 | { | 270 | { |
272 | struct subprocess_info *sub_info = data; | ||
273 | pid_t pid; | 271 | pid_t pid; |
274 | 272 | ||
275 | /* If SIGCLD is ignored sys_wait4 won't populate the status. */ | 273 | /* If SIGCLD is ignored sys_wait4 won't populate the status. */ |
276 | kernel_sigaction(SIGCHLD, SIG_DFL); | 274 | kernel_sigaction(SIGCHLD, SIG_DFL); |
277 | pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD); | 275 | pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); |
278 | if (pid < 0) { | 276 | if (pid < 0) { |
279 | sub_info->retval = pid; | 277 | sub_info->retval = pid; |
280 | } else { | 278 | } else { |
@@ -282,44 +280,60 @@ static int wait_for_helper(void *data) | |||
282 | /* | 280 | /* |
283 | * Normally it is bogus to call wait4() from in-kernel because | 281 | * Normally it is bogus to call wait4() from in-kernel because |
284 | * wait4() wants to write the exit code to a userspace address. | 282 | * wait4() wants to write the exit code to a userspace address. |
285 | * But wait_for_helper() always runs as keventd, and put_user() | 283 | * But call_usermodehelper_exec_sync() always runs as kernel |
286 | * to a kernel address works OK for kernel threads, due to their | 284 | * thread (workqueue) and put_user() to a kernel address works |
287 | * having an mm_segment_t which spans the entire address space. | 285 | * OK for kernel threads, due to their having an mm_segment_t |
286 | * which spans the entire address space. | ||
288 | * | 287 | * |
289 | * Thus the __user pointer cast is valid here. | 288 | * Thus the __user pointer cast is valid here. |
290 | */ | 289 | */ |
291 | sys_wait4(pid, (int __user *)&ret, 0, NULL); | 290 | sys_wait4(pid, (int __user *)&ret, 0, NULL); |
292 | 291 | ||
293 | /* | 292 | /* |
294 | * If ret is 0, either ____call_usermodehelper failed and the | 293 | * If ret is 0, either call_usermodehelper_exec_async failed and |
295 | * real error code is already in sub_info->retval or | 294 | * the real error code is already in sub_info->retval or |
296 | * sub_info->retval is 0 anyway, so don't mess with it then. | 295 | * sub_info->retval is 0 anyway, so don't mess with it then. |
297 | */ | 296 | */ |
298 | if (ret) | 297 | if (ret) |
299 | sub_info->retval = ret; | 298 | sub_info->retval = ret; |
300 | } | 299 | } |
301 | 300 | ||
301 | /* Restore default kernel sig handler */ | ||
302 | kernel_sigaction(SIGCHLD, SIG_IGN); | ||
303 | |||
302 | umh_complete(sub_info); | 304 | umh_complete(sub_info); |
303 | do_exit(0); | ||
304 | } | 305 | } |
305 | 306 | ||
306 | /* This is run by khelper thread */ | 307 | /* |
307 | static void __call_usermodehelper(struct work_struct *work) | 308 | * We need to create the usermodehelper kernel thread from a task that is affine |
309 | * to an optimized set of CPUs (or nohz housekeeping ones) such that they | ||
310 | * inherit a widest affinity irrespective of call_usermodehelper() callers with | ||
311 | * possibly reduced affinity (eg: per-cpu workqueues). We don't want | ||
312 | * usermodehelper targets to contend a busy CPU. | ||
313 | * | ||
314 | * Unbound workqueues provide such wide affinity and allow to block on | ||
315 | * UMH_WAIT_PROC requests without blocking pending request (up to some limit). | ||
316 | * | ||
317 | * Besides, workqueues provide the privilege level that caller might not have | ||
318 | * to perform the usermodehelper request. | ||
319 | * | ||
320 | */ | ||
321 | static void call_usermodehelper_exec_work(struct work_struct *work) | ||
308 | { | 322 | { |
309 | struct subprocess_info *sub_info = | 323 | struct subprocess_info *sub_info = |
310 | container_of(work, struct subprocess_info, work); | 324 | container_of(work, struct subprocess_info, work); |
311 | pid_t pid; | ||
312 | 325 | ||
313 | if (sub_info->wait & UMH_WAIT_PROC) | 326 | if (sub_info->wait & UMH_WAIT_PROC) { |
314 | pid = kernel_thread(wait_for_helper, sub_info, | 327 | call_usermodehelper_exec_sync(sub_info); |
315 | CLONE_FS | CLONE_FILES | SIGCHLD); | 328 | } else { |
316 | else | 329 | pid_t pid; |
317 | pid = kernel_thread(____call_usermodehelper, sub_info, | ||
318 | SIGCHLD); | ||
319 | 330 | ||
320 | if (pid < 0) { | 331 | pid = kernel_thread(call_usermodehelper_exec_async, sub_info, |
321 | sub_info->retval = pid; | 332 | SIGCHLD); |
322 | umh_complete(sub_info); | 333 | if (pid < 0) { |
334 | sub_info->retval = pid; | ||
335 | umh_complete(sub_info); | ||
336 | } | ||
323 | } | 337 | } |
324 | } | 338 | } |
325 | 339 | ||
@@ -509,7 +523,7 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, | |||
509 | if (!sub_info) | 523 | if (!sub_info) |
510 | goto out; | 524 | goto out; |
511 | 525 | ||
512 | INIT_WORK(&sub_info->work, __call_usermodehelper); | 526 | INIT_WORK(&sub_info->work, call_usermodehelper_exec_work); |
513 | sub_info->path = path; | 527 | sub_info->path = path; |
514 | sub_info->argv = argv; | 528 | sub_info->argv = argv; |
515 | sub_info->envp = envp; | 529 | sub_info->envp = envp; |
@@ -531,8 +545,8 @@ EXPORT_SYMBOL(call_usermodehelper_setup); | |||
531 | * from interrupt context. | 545 | * from interrupt context. |
532 | * | 546 | * |
533 | * Runs a user-space application. The application is started | 547 | * Runs a user-space application. The application is started |
534 | * asynchronously if wait is not set, and runs as a child of keventd. | 548 | * asynchronously if wait is not set, and runs as a child of system workqueues. |
535 | * (ie. it runs with full root capabilities). | 549 | * (ie. it runs with full root capabilities and optimized affinity). |
536 | */ | 550 | */ |
537 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | 551 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) |
538 | { | 552 | { |
@@ -544,7 +558,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | |||
544 | return -EINVAL; | 558 | return -EINVAL; |
545 | } | 559 | } |
546 | helper_lock(); | 560 | helper_lock(); |
547 | if (!khelper_wq || usermodehelper_disabled) { | 561 | if (usermodehelper_disabled) { |
548 | retval = -EBUSY; | 562 | retval = -EBUSY; |
549 | goto out; | 563 | goto out; |
550 | } | 564 | } |
@@ -556,7 +570,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | |||
556 | sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; | 570 | sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; |
557 | sub_info->wait = wait; | 571 | sub_info->wait = wait; |
558 | 572 | ||
559 | queue_work(khelper_wq, &sub_info->work); | 573 | queue_work(system_unbound_wq, &sub_info->work); |
560 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ | 574 | if (wait == UMH_NO_WAIT) /* task has freed sub_info */ |
561 | goto unlock; | 575 | goto unlock; |
562 | 576 | ||
@@ -686,9 +700,3 @@ struct ctl_table usermodehelper_table[] = { | |||
686 | }, | 700 | }, |
687 | { } | 701 | { } |
688 | }; | 702 | }; |
689 | |||
690 | void __init usermodehelper_init(void) | ||
691 | { | ||
692 | khelper_wq = create_singlethread_workqueue("khelper"); | ||
693 | BUG_ON(!khelper_wq); | ||
694 | } | ||
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 6683ccef9fff..e83b26464061 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -90,7 +90,7 @@ static ssize_t profiling_store(struct kobject *kobj, | |||
90 | KERNEL_ATTR_RW(profiling); | 90 | KERNEL_ATTR_RW(profiling); |
91 | #endif | 91 | #endif |
92 | 92 | ||
93 | #ifdef CONFIG_KEXEC | 93 | #ifdef CONFIG_KEXEC_CORE |
94 | static ssize_t kexec_loaded_show(struct kobject *kobj, | 94 | static ssize_t kexec_loaded_show(struct kobject *kobj, |
95 | struct kobj_attribute *attr, char *buf) | 95 | struct kobj_attribute *attr, char *buf) |
96 | { | 96 | { |
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj, | |||
134 | } | 134 | } |
135 | KERNEL_ATTR_RO(vmcoreinfo); | 135 | KERNEL_ATTR_RO(vmcoreinfo); |
136 | 136 | ||
137 | #endif /* CONFIG_KEXEC */ | 137 | #endif /* CONFIG_KEXEC_CORE */ |
138 | 138 | ||
139 | /* whether file capabilities are enabled */ | 139 | /* whether file capabilities are enabled */ |
140 | static ssize_t fscaps_show(struct kobject *kobj, | 140 | static ssize_t fscaps_show(struct kobject *kobj, |
@@ -196,7 +196,7 @@ static struct attribute * kernel_attrs[] = { | |||
196 | #ifdef CONFIG_PROFILING | 196 | #ifdef CONFIG_PROFILING |
197 | &profiling_attr.attr, | 197 | &profiling_attr.attr, |
198 | #endif | 198 | #endif |
199 | #ifdef CONFIG_KEXEC | 199 | #ifdef CONFIG_KEXEC_CORE |
200 | &kexec_loaded_attr.attr, | 200 | &kexec_loaded_attr.attr, |
201 | &kexec_crash_loaded_attr.attr, | 201 | &kexec_crash_loaded_attr.attr, |
202 | &kexec_crash_size_attr.attr, | 202 | &kexec_crash_size_attr.attr, |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index cf8c24203368..8f0324ef72ab 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -835,7 +835,7 @@ const struct file_operations kmsg_fops = { | |||
835 | .release = devkmsg_release, | 835 | .release = devkmsg_release, |
836 | }; | 836 | }; |
837 | 837 | ||
838 | #ifdef CONFIG_KEXEC | 838 | #ifdef CONFIG_KEXEC_CORE |
839 | /* | 839 | /* |
840 | * This appends the listed symbols to /proc/vmcore | 840 | * This appends the listed symbols to /proc/vmcore |
841 | * | 841 | * |
diff --git a/kernel/reboot.c b/kernel/reboot.c index d20c85d9f8c0..bd30a973fe94 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c | |||
@@ -346,7 +346,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
346 | kernel_restart(buffer); | 346 | kernel_restart(buffer); |
347 | break; | 347 | break; |
348 | 348 | ||
349 | #ifdef CONFIG_KEXEC | 349 | #ifdef CONFIG_KEXEC_CORE |
350 | case LINUX_REBOOT_CMD_KEXEC: | 350 | case LINUX_REBOOT_CMD_KEXEC: |
351 | ret = kernel_kexec(); | 351 | ret = kernel_kexec(); |
352 | break; | 352 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 19b62b522158..e69201d8094e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -621,7 +621,7 @@ static struct ctl_table kern_table[] = { | |||
621 | .proc_handler = proc_dointvec, | 621 | .proc_handler = proc_dointvec, |
622 | }, | 622 | }, |
623 | #endif | 623 | #endif |
624 | #ifdef CONFIG_KEXEC | 624 | #ifdef CONFIG_KEXEC_CORE |
625 | { | 625 | { |
626 | .procname = "kexec_load_disabled", | 626 | .procname = "kexec_load_disabled", |
627 | .data = &kexec_load_disabled, | 627 | .data = &kexec_load_disabled, |
@@ -1995,7 +1995,7 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, | |||
1995 | int val = *valp; | 1995 | int val = *valp; |
1996 | if (val < 0) { | 1996 | if (val < 0) { |
1997 | *negp = true; | 1997 | *negp = true; |
1998 | *lvalp = (unsigned long)-val; | 1998 | *lvalp = -(unsigned long)val; |
1999 | } else { | 1999 | } else { |
2000 | *negp = false; | 2000 | *negp = false; |
2001 | *lvalp = (unsigned long)val; | 2001 | *lvalp = (unsigned long)val; |
@@ -2201,7 +2201,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, | |||
2201 | int val = *valp; | 2201 | int val = *valp; |
2202 | if (val < 0) { | 2202 | if (val < 0) { |
2203 | *negp = true; | 2203 | *negp = true; |
2204 | *lvalp = (unsigned long)-val; | 2204 | *lvalp = -(unsigned long)val; |
2205 | } else { | 2205 | } else { |
2206 | *negp = false; | 2206 | *negp = false; |
2207 | *lvalp = (unsigned long)val; | 2207 | *lvalp = (unsigned long)val; |
@@ -2436,7 +2436,7 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, | |||
2436 | unsigned long lval; | 2436 | unsigned long lval; |
2437 | if (val < 0) { | 2437 | if (val < 0) { |
2438 | *negp = true; | 2438 | *negp = true; |
2439 | lval = (unsigned long)-val; | 2439 | lval = -(unsigned long)val; |
2440 | } else { | 2440 | } else { |
2441 | *negp = false; | 2441 | *negp = false; |
2442 | lval = (unsigned long)val; | 2442 | lval = (unsigned long)val; |
@@ -2459,7 +2459,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp | |||
2459 | unsigned long lval; | 2459 | unsigned long lval; |
2460 | if (val < 0) { | 2460 | if (val < 0) { |
2461 | *negp = true; | 2461 | *negp = true; |
2462 | lval = (unsigned long)-val; | 2462 | lval = -(unsigned long)val; |
2463 | } else { | 2463 | } else { |
2464 | *negp = false; | 2464 | *negp = false; |
2465 | lval = (unsigned long)val; | 2465 | lval = (unsigned long)val; |
@@ -2484,7 +2484,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, | |||
2484 | unsigned long lval; | 2484 | unsigned long lval; |
2485 | if (val < 0) { | 2485 | if (val < 0) { |
2486 | *negp = true; | 2486 | *negp = true; |
2487 | lval = (unsigned long)-val; | 2487 | lval = -(unsigned long)val; |
2488 | } else { | 2488 | } else { |
2489 | *negp = false; | 2489 | *negp = false; |
2490 | lval = (unsigned long)val; | 2490 | lval = (unsigned long)val; |
diff --git a/lib/bitmap.c b/lib/bitmap.c index a578a0189199..814814397cce 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c | |||
@@ -367,7 +367,8 @@ int __bitmap_parse(const char *buf, unsigned int buflen, | |||
367 | 367 | ||
368 | nchunks = nbits = totaldigits = c = 0; | 368 | nchunks = nbits = totaldigits = c = 0; |
369 | do { | 369 | do { |
370 | chunk = ndigits = 0; | 370 | chunk = 0; |
371 | ndigits = totaldigits; | ||
371 | 372 | ||
372 | /* Get the next chunk of the bitmap */ | 373 | /* Get the next chunk of the bitmap */ |
373 | while (buflen) { | 374 | while (buflen) { |
@@ -406,9 +407,9 @@ int __bitmap_parse(const char *buf, unsigned int buflen, | |||
406 | return -EOVERFLOW; | 407 | return -EOVERFLOW; |
407 | 408 | ||
408 | chunk = (chunk << 4) | hex_to_bin(c); | 409 | chunk = (chunk << 4) | hex_to_bin(c); |
409 | ndigits++; totaldigits++; | 410 | totaldigits++; |
410 | } | 411 | } |
411 | if (ndigits == 0) | 412 | if (ndigits == totaldigits) |
412 | return -EINVAL; | 413 | return -EINVAL; |
413 | if (nchunks == 0 && chunk == 0) | 414 | if (nchunks == 0 && chunk == 0) |
414 | continue; | 415 | continue; |
@@ -505,7 +506,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
505 | int nmaskbits) | 506 | int nmaskbits) |
506 | { | 507 | { |
507 | unsigned a, b; | 508 | unsigned a, b; |
508 | int c, old_c, totaldigits; | 509 | int c, old_c, totaldigits, ndigits; |
509 | const char __user __force *ubuf = (const char __user __force *)buf; | 510 | const char __user __force *ubuf = (const char __user __force *)buf; |
510 | int at_start, in_range; | 511 | int at_start, in_range; |
511 | 512 | ||
@@ -515,6 +516,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
515 | at_start = 1; | 516 | at_start = 1; |
516 | in_range = 0; | 517 | in_range = 0; |
517 | a = b = 0; | 518 | a = b = 0; |
519 | ndigits = totaldigits; | ||
518 | 520 | ||
519 | /* Get the next cpu# or a range of cpu#'s */ | 521 | /* Get the next cpu# or a range of cpu#'s */ |
520 | while (buflen) { | 522 | while (buflen) { |
@@ -528,23 +530,27 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
528 | if (isspace(c)) | 530 | if (isspace(c)) |
529 | continue; | 531 | continue; |
530 | 532 | ||
531 | /* | ||
532 | * If the last character was a space and the current | ||
533 | * character isn't '\0', we've got embedded whitespace. | ||
534 | * This is a no-no, so throw an error. | ||
535 | */ | ||
536 | if (totaldigits && c && isspace(old_c)) | ||
537 | return -EINVAL; | ||
538 | |||
539 | /* A '\0' or a ',' signal the end of a cpu# or range */ | 533 | /* A '\0' or a ',' signal the end of a cpu# or range */ |
540 | if (c == '\0' || c == ',') | 534 | if (c == '\0' || c == ',') |
541 | break; | 535 | break; |
536 | /* | ||
537 | * whitespaces between digits are not allowed, | ||
538 | * but it's ok if whitespaces are on head or tail. | ||
539 | * when old_c is whilespace, | ||
540 | * if totaldigits == ndigits, whitespace is on head. | ||
541 | * if whitespace is on tail, it should not run here. | ||
542 | * as c was ',' or '\0', | ||
543 | * the last code line has broken the current loop. | ||
544 | */ | ||
545 | if ((totaldigits != ndigits) && isspace(old_c)) | ||
546 | return -EINVAL; | ||
542 | 547 | ||
543 | if (c == '-') { | 548 | if (c == '-') { |
544 | if (at_start || in_range) | 549 | if (at_start || in_range) |
545 | return -EINVAL; | 550 | return -EINVAL; |
546 | b = 0; | 551 | b = 0; |
547 | in_range = 1; | 552 | in_range = 1; |
553 | at_start = 1; | ||
548 | continue; | 554 | continue; |
549 | } | 555 | } |
550 | 556 | ||
@@ -557,15 +563,18 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, | |||
557 | at_start = 0; | 563 | at_start = 0; |
558 | totaldigits++; | 564 | totaldigits++; |
559 | } | 565 | } |
566 | if (ndigits == totaldigits) | ||
567 | continue; | ||
568 | /* if no digit is after '-', it's wrong*/ | ||
569 | if (at_start && in_range) | ||
570 | return -EINVAL; | ||
560 | if (!(a <= b)) | 571 | if (!(a <= b)) |
561 | return -EINVAL; | 572 | return -EINVAL; |
562 | if (b >= nmaskbits) | 573 | if (b >= nmaskbits) |
563 | return -ERANGE; | 574 | return -ERANGE; |
564 | if (!at_start) { | 575 | while (a <= b) { |
565 | while (a <= b) { | 576 | set_bit(a, maskp); |
566 | set_bit(a, maskp); | 577 | a++; |
567 | a++; | ||
568 | } | ||
569 | } | 578 | } |
570 | } while (buflen && c == ','); | 579 | } while (buflen && c == ','); |
571 | return 0; | 580 | return 0; |
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 6dd0335ea61b..0234361b24b8 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c | |||
@@ -743,12 +743,12 @@ exit_0: | |||
743 | } | 743 | } |
744 | 744 | ||
745 | #ifdef PREBOOT | 745 | #ifdef PREBOOT |
746 | STATIC int INIT decompress(unsigned char *buf, long len, | 746 | STATIC int INIT __decompress(unsigned char *buf, long len, |
747 | long (*fill)(void*, unsigned long), | 747 | long (*fill)(void*, unsigned long), |
748 | long (*flush)(void*, unsigned long), | 748 | long (*flush)(void*, unsigned long), |
749 | unsigned char *outbuf, | 749 | unsigned char *outbuf, long olen, |
750 | long *pos, | 750 | long *pos, |
751 | void(*error)(char *x)) | 751 | void (*error)(char *x)) |
752 | { | 752 | { |
753 | return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); | 753 | return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error); |
754 | } | 754 | } |
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d4c7891635ec..555c06bf20da 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #ifdef STATIC | 1 | #ifdef STATIC |
2 | #define PREBOOT | ||
2 | /* Pre-boot environment: included */ | 3 | /* Pre-boot environment: included */ |
3 | 4 | ||
4 | /* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots | 5 | /* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots |
@@ -33,23 +34,23 @@ static long INIT nofill(void *buffer, unsigned long len) | |||
33 | } | 34 | } |
34 | 35 | ||
35 | /* Included from initramfs et al code */ | 36 | /* Included from initramfs et al code */ |
36 | STATIC int INIT gunzip(unsigned char *buf, long len, | 37 | STATIC int INIT __gunzip(unsigned char *buf, long len, |
37 | long (*fill)(void*, unsigned long), | 38 | long (*fill)(void*, unsigned long), |
38 | long (*flush)(void*, unsigned long), | 39 | long (*flush)(void*, unsigned long), |
39 | unsigned char *out_buf, | 40 | unsigned char *out_buf, long out_len, |
40 | long *pos, | 41 | long *pos, |
41 | void(*error)(char *x)) { | 42 | void(*error)(char *x)) { |
42 | u8 *zbuf; | 43 | u8 *zbuf; |
43 | struct z_stream_s *strm; | 44 | struct z_stream_s *strm; |
44 | int rc; | 45 | int rc; |
45 | size_t out_len; | ||
46 | 46 | ||
47 | rc = -1; | 47 | rc = -1; |
48 | if (flush) { | 48 | if (flush) { |
49 | out_len = 0x8000; /* 32 K */ | 49 | out_len = 0x8000; /* 32 K */ |
50 | out_buf = malloc(out_len); | 50 | out_buf = malloc(out_len); |
51 | } else { | 51 | } else { |
52 | out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ | 52 | if (!out_len) |
53 | out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ | ||
53 | } | 54 | } |
54 | if (!out_buf) { | 55 | if (!out_buf) { |
55 | error("Out of memory while allocating output buffer"); | 56 | error("Out of memory while allocating output buffer"); |
@@ -181,4 +182,24 @@ gunzip_nomem1: | |||
181 | return rc; /* returns Z_OK (0) if successful */ | 182 | return rc; /* returns Z_OK (0) if successful */ |
182 | } | 183 | } |
183 | 184 | ||
184 | #define decompress gunzip | 185 | #ifndef PREBOOT |
186 | STATIC int INIT gunzip(unsigned char *buf, long len, | ||
187 | long (*fill)(void*, unsigned long), | ||
188 | long (*flush)(void*, unsigned long), | ||
189 | unsigned char *out_buf, | ||
190 | long *pos, | ||
191 | void (*error)(char *x)) | ||
192 | { | ||
193 | return __gunzip(buf, len, fill, flush, out_buf, 0, pos, error); | ||
194 | } | ||
195 | #else | ||
196 | STATIC int INIT __decompress(unsigned char *buf, long len, | ||
197 | long (*fill)(void*, unsigned long), | ||
198 | long (*flush)(void*, unsigned long), | ||
199 | unsigned char *out_buf, long out_len, | ||
200 | long *pos, | ||
201 | void (*error)(char *x)) | ||
202 | { | ||
203 | return __gunzip(buf, len, fill, flush, out_buf, out_len, pos, error); | ||
204 | } | ||
205 | #endif | ||
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 40f66ebe57b7..036fc882cd72 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c | |||
@@ -196,12 +196,12 @@ exit_0: | |||
196 | } | 196 | } |
197 | 197 | ||
198 | #ifdef PREBOOT | 198 | #ifdef PREBOOT |
199 | STATIC int INIT decompress(unsigned char *buf, long in_len, | 199 | STATIC int INIT __decompress(unsigned char *buf, long in_len, |
200 | long (*fill)(void*, unsigned long), | 200 | long (*fill)(void*, unsigned long), |
201 | long (*flush)(void*, unsigned long), | 201 | long (*flush)(void*, unsigned long), |
202 | unsigned char *output, | 202 | unsigned char *output, long out_len, |
203 | long *posp, | 203 | long *posp, |
204 | void(*error)(char *x) | 204 | void (*error)(char *x) |
205 | ) | 205 | ) |
206 | { | 206 | { |
207 | return unlz4(buf, in_len - 4, fill, flush, output, posp, error); | 207 | return unlz4(buf, in_len - 4, fill, flush, output, posp, error); |
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 0be83af62b88..ed7a1fd819f2 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c | |||
@@ -620,7 +620,7 @@ STATIC inline int INIT unlzma(unsigned char *buf, long in_len, | |||
620 | 620 | ||
621 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); | 621 | num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); |
622 | p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); | 622 | p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); |
623 | if (p == 0) | 623 | if (p == NULL) |
624 | goto exit_2; | 624 | goto exit_2; |
625 | num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); | 625 | num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); |
626 | for (i = 0; i < num_probs; i++) | 626 | for (i = 0; i < num_probs; i++) |
@@ -667,13 +667,12 @@ exit_0: | |||
667 | } | 667 | } |
668 | 668 | ||
669 | #ifdef PREBOOT | 669 | #ifdef PREBOOT |
670 | STATIC int INIT decompress(unsigned char *buf, long in_len, | 670 | STATIC int INIT __decompress(unsigned char *buf, long in_len, |
671 | long (*fill)(void*, unsigned long), | 671 | long (*fill)(void*, unsigned long), |
672 | long (*flush)(void*, unsigned long), | 672 | long (*flush)(void*, unsigned long), |
673 | unsigned char *output, | 673 | unsigned char *output, long out_len, |
674 | long *posp, | 674 | long *posp, |
675 | void(*error)(char *x) | 675 | void (*error)(char *x)) |
676 | ) | ||
677 | { | 676 | { |
678 | return unlzma(buf, in_len - 4, fill, flush, output, posp, error); | 677 | return unlzma(buf, in_len - 4, fill, flush, output, posp, error); |
679 | } | 678 | } |
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c index b94a31bdd87d..f4c158e3a022 100644 --- a/lib/decompress_unlzo.c +++ b/lib/decompress_unlzo.c | |||
@@ -31,6 +31,7 @@ | |||
31 | */ | 31 | */ |
32 | 32 | ||
33 | #ifdef STATIC | 33 | #ifdef STATIC |
34 | #define PREBOOT | ||
34 | #include "lzo/lzo1x_decompress_safe.c" | 35 | #include "lzo/lzo1x_decompress_safe.c" |
35 | #else | 36 | #else |
36 | #include <linux/decompress/unlzo.h> | 37 | #include <linux/decompress/unlzo.h> |
@@ -287,4 +288,14 @@ exit: | |||
287 | return ret; | 288 | return ret; |
288 | } | 289 | } |
289 | 290 | ||
290 | #define decompress unlzo | 291 | #ifdef PREBOOT |
292 | STATIC int INIT __decompress(unsigned char *buf, long len, | ||
293 | long (*fill)(void*, unsigned long), | ||
294 | long (*flush)(void*, unsigned long), | ||
295 | unsigned char *out_buf, long olen, | ||
296 | long *pos, | ||
297 | void (*error)(char *x)) | ||
298 | { | ||
299 | return unlzo(buf, len, fill, flush, out_buf, pos, error); | ||
300 | } | ||
301 | #endif | ||
diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index b07a78340e9d..25d59a95bd66 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c | |||
@@ -394,4 +394,14 @@ error_alloc_state: | |||
394 | * This macro is used by architecture-specific files to decompress | 394 | * This macro is used by architecture-specific files to decompress |
395 | * the kernel image. | 395 | * the kernel image. |
396 | */ | 396 | */ |
397 | #define decompress unxz | 397 | #ifdef XZ_PREBOOT |
398 | STATIC int INIT __decompress(unsigned char *buf, long len, | ||
399 | long (*fill)(void*, unsigned long), | ||
400 | long (*flush)(void*, unsigned long), | ||
401 | unsigned char *out_buf, long olen, | ||
402 | long *pos, | ||
403 | void (*error)(char *x)) | ||
404 | { | ||
405 | return unxz(buf, len, fill, flush, out_buf, pos, error); | ||
406 | } | ||
407 | #endif | ||
diff --git a/lib/kstrtox.c b/lib/kstrtox.c index ec8da78df9be..94be244e8441 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c | |||
@@ -152,7 +152,7 @@ int kstrtoll(const char *s, unsigned int base, long long *res) | |||
152 | rv = _kstrtoull(s + 1, base, &tmp); | 152 | rv = _kstrtoull(s + 1, base, &tmp); |
153 | if (rv < 0) | 153 | if (rv < 0) |
154 | return rv; | 154 | return rv; |
155 | if ((long long)(-tmp) >= 0) | 155 | if ((long long)-tmp > 0) |
156 | return -ERANGE; | 156 | return -ERANGE; |
157 | *res = -tmp; | 157 | *res = -tmp; |
158 | } else { | 158 | } else { |
diff --git a/lib/string_helpers.c b/lib/string_helpers.c index c98ae818eb4e..54036ce2e2dd 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c | |||
@@ -410,7 +410,7 @@ static bool escape_hex(unsigned char c, char **dst, char *end) | |||
410 | * @dst: destination buffer (escaped) | 410 | * @dst: destination buffer (escaped) |
411 | * @osz: destination buffer size | 411 | * @osz: destination buffer size |
412 | * @flags: combination of the flags (bitwise OR): | 412 | * @flags: combination of the flags (bitwise OR): |
413 | * %ESCAPE_SPACE: | 413 | * %ESCAPE_SPACE: (special white space, not space itself) |
414 | * '\f' - form feed | 414 | * '\f' - form feed |
415 | * '\n' - new line | 415 | * '\n' - new line |
416 | * '\r' - carriage return | 416 | * '\r' - carriage return |
@@ -432,16 +432,18 @@ static bool escape_hex(unsigned char c, char **dst, char *end) | |||
432 | * all previous together | 432 | * all previous together |
433 | * %ESCAPE_HEX: | 433 | * %ESCAPE_HEX: |
434 | * '\xHH' - byte with hexadecimal value HH (2 digits) | 434 | * '\xHH' - byte with hexadecimal value HH (2 digits) |
435 | * @esc: NULL-terminated string of characters any of which, if found in | 435 | * @only: NULL-terminated string containing characters used to limit |
436 | * the source, has to be escaped | 436 | * the selected escape class. If characters are included in @only |
437 | * that would not normally be escaped by the classes selected | ||
438 | * in @flags, they will be copied to @dst unescaped. | ||
437 | * | 439 | * |
438 | * Description: | 440 | * Description: |
439 | * The process of escaping byte buffer includes several parts. They are applied | 441 | * The process of escaping byte buffer includes several parts. They are applied |
440 | * in the following sequence. | 442 | * in the following sequence. |
441 | * 1. The character is matched to the printable class, if asked, and in | 443 | * 1. The character is matched to the printable class, if asked, and in |
442 | * case of match it passes through to the output. | 444 | * case of match it passes through to the output. |
443 | * 2. The character is not matched to the one from @esc string and thus | 445 | * 2. The character is not matched to the one from @only string and thus |
444 | * must go as is to the output. | 446 | * must go as-is to the output. |
445 | * 3. The character is checked if it falls into the class given by @flags. | 447 | * 3. The character is checked if it falls into the class given by @flags. |
446 | * %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any | 448 | * %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any |
447 | * character. Note that they actually can't go together, otherwise | 449 | * character. Note that they actually can't go together, otherwise |
@@ -458,11 +460,11 @@ static bool escape_hex(unsigned char c, char **dst, char *end) | |||
458 | * dst for a '\0' terminator if and only if ret < osz. | 460 | * dst for a '\0' terminator if and only if ret < osz. |
459 | */ | 461 | */ |
460 | int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, | 462 | int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, |
461 | unsigned int flags, const char *esc) | 463 | unsigned int flags, const char *only) |
462 | { | 464 | { |
463 | char *p = dst; | 465 | char *p = dst; |
464 | char *end = p + osz; | 466 | char *end = p + osz; |
465 | bool is_dict = esc && *esc; | 467 | bool is_dict = only && *only; |
466 | 468 | ||
467 | while (isz--) { | 469 | while (isz--) { |
468 | unsigned char c = *src++; | 470 | unsigned char c = *src++; |
@@ -471,7 +473,7 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, | |||
471 | * Apply rules in the following sequence: | 473 | * Apply rules in the following sequence: |
472 | * - the character is printable, when @flags has | 474 | * - the character is printable, when @flags has |
473 | * %ESCAPE_NP bit set | 475 | * %ESCAPE_NP bit set |
474 | * - the @esc string is supplied and does not contain a | 476 | * - the @only string is supplied and does not contain a |
475 | * character under question | 477 | * character under question |
476 | * - the character doesn't fall into a class of symbols | 478 | * - the character doesn't fall into a class of symbols |
477 | * defined by given @flags | 479 | * defined by given @flags |
@@ -479,7 +481,7 @@ int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, | |||
479 | * output buffer. | 481 | * output buffer. |
480 | */ | 482 | */ |
481 | if ((flags & ESCAPE_NP && isprint(c)) || | 483 | if ((flags & ESCAPE_NP && isprint(c)) || |
482 | (is_dict && !strchr(esc, c))) { | 484 | (is_dict && !strchr(only, c))) { |
483 | /* do nothing */ | 485 | /* do nothing */ |
484 | } else { | 486 | } else { |
485 | if (flags & ESCAPE_SPACE && escape_space(c, &p, end)) | 487 | if (flags & ESCAPE_SPACE && escape_space(c, &p, end)) |
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c index 4137bca5f8e8..f355f67169b6 100644 --- a/lib/test-kstrtox.c +++ b/lib/test-kstrtox.c | |||
@@ -260,6 +260,7 @@ static void __init test_kstrtoll_ok(void) | |||
260 | {"4294967297", 10, 4294967297LL}, | 260 | {"4294967297", 10, 4294967297LL}, |
261 | {"9223372036854775807", 10, 9223372036854775807LL}, | 261 | {"9223372036854775807", 10, 9223372036854775807LL}, |
262 | 262 | ||
263 | {"-0", 10, 0LL}, | ||
263 | {"-1", 10, -1LL}, | 264 | {"-1", 10, -1LL}, |
264 | {"-2", 10, -2LL}, | 265 | {"-2", 10, -2LL}, |
265 | {"-9223372036854775808", 10, LLONG_MIN}, | 266 | {"-9223372036854775808", 10, LLONG_MIN}, |
@@ -277,11 +278,6 @@ static void __init test_kstrtoll_fail(void) | |||
277 | {"-9223372036854775809", 10}, | 278 | {"-9223372036854775809", 10}, |
278 | {"-18446744073709551614", 10}, | 279 | {"-18446744073709551614", 10}, |
279 | {"-18446744073709551615", 10}, | 280 | {"-18446744073709551615", 10}, |
280 | /* negative zero isn't an integer in Linux */ | ||
281 | {"-0", 0}, | ||
282 | {"-0", 8}, | ||
283 | {"-0", 10}, | ||
284 | {"-0", 16}, | ||
285 | /* sign is first character if any */ | 281 | /* sign is first character if any */ |
286 | {"-+1", 0}, | 282 | {"-+1", 0}, |
287 | {"-+1", 8}, | 283 | {"-+1", 8}, |
diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 098c08eddfab..c1efb1b61017 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c | |||
@@ -65,7 +65,7 @@ static noinline void __init kmalloc_node_oob_right(void) | |||
65 | kfree(ptr); | 65 | kfree(ptr); |
66 | } | 66 | } |
67 | 67 | ||
68 | static noinline void __init kmalloc_large_oob_rigth(void) | 68 | static noinline void __init kmalloc_large_oob_right(void) |
69 | { | 69 | { |
70 | char *ptr; | 70 | char *ptr; |
71 | size_t size = KMALLOC_MAX_CACHE_SIZE + 10; | 71 | size_t size = KMALLOC_MAX_CACHE_SIZE + 10; |
@@ -114,7 +114,7 @@ static noinline void __init kmalloc_oob_krealloc_less(void) | |||
114 | kfree(ptr1); | 114 | kfree(ptr1); |
115 | return; | 115 | return; |
116 | } | 116 | } |
117 | ptr2[size1] = 'x'; | 117 | ptr2[size2] = 'x'; |
118 | kfree(ptr2); | 118 | kfree(ptr2); |
119 | } | 119 | } |
120 | 120 | ||
@@ -259,7 +259,7 @@ static int __init kmalloc_tests_init(void) | |||
259 | kmalloc_oob_right(); | 259 | kmalloc_oob_right(); |
260 | kmalloc_oob_left(); | 260 | kmalloc_oob_left(); |
261 | kmalloc_node_oob_right(); | 261 | kmalloc_node_oob_right(); |
262 | kmalloc_large_oob_rigth(); | 262 | kmalloc_large_oob_right(); |
263 | kmalloc_oob_krealloc_more(); | 263 | kmalloc_oob_krealloc_more(); |
264 | kmalloc_oob_krealloc_less(); | 264 | kmalloc_oob_krealloc_less(); |
265 | kmalloc_oob_16(); | 265 | kmalloc_oob_16(); |
diff --git a/lib/zlib_deflate/deftree.c b/lib/zlib_deflate/deftree.c index ddf348299f24..9b1756b12743 100644 --- a/lib/zlib_deflate/deftree.c +++ b/lib/zlib_deflate/deftree.c | |||
@@ -35,6 +35,7 @@ | |||
35 | /* #include "deflate.h" */ | 35 | /* #include "deflate.h" */ |
36 | 36 | ||
37 | #include <linux/zutil.h> | 37 | #include <linux/zutil.h> |
38 | #include <linux/bitrev.h> | ||
38 | #include "defutil.h" | 39 | #include "defutil.h" |
39 | 40 | ||
40 | #ifdef DEBUG_ZLIB | 41 | #ifdef DEBUG_ZLIB |
@@ -146,7 +147,6 @@ static void send_all_trees (deflate_state *s, int lcodes, int dcodes, | |||
146 | static void compress_block (deflate_state *s, ct_data *ltree, | 147 | static void compress_block (deflate_state *s, ct_data *ltree, |
147 | ct_data *dtree); | 148 | ct_data *dtree); |
148 | static void set_data_type (deflate_state *s); | 149 | static void set_data_type (deflate_state *s); |
149 | static unsigned bi_reverse (unsigned value, int length); | ||
150 | static void bi_windup (deflate_state *s); | 150 | static void bi_windup (deflate_state *s); |
151 | static void bi_flush (deflate_state *s); | 151 | static void bi_flush (deflate_state *s); |
152 | static void copy_block (deflate_state *s, char *buf, unsigned len, | 152 | static void copy_block (deflate_state *s, char *buf, unsigned len, |
@@ -284,7 +284,7 @@ static void tr_static_init(void) | |||
284 | /* The static distance tree is trivial: */ | 284 | /* The static distance tree is trivial: */ |
285 | for (n = 0; n < D_CODES; n++) { | 285 | for (n = 0; n < D_CODES; n++) { |
286 | static_dtree[n].Len = 5; | 286 | static_dtree[n].Len = 5; |
287 | static_dtree[n].Code = bi_reverse((unsigned)n, 5); | 287 | static_dtree[n].Code = bitrev32((u32)n) >> (32 - 5); |
288 | } | 288 | } |
289 | static_init_done = 1; | 289 | static_init_done = 1; |
290 | } | 290 | } |
@@ -520,7 +520,7 @@ static void gen_codes( | |||
520 | int len = tree[n].Len; | 520 | int len = tree[n].Len; |
521 | if (len == 0) continue; | 521 | if (len == 0) continue; |
522 | /* Now reverse the bits */ | 522 | /* Now reverse the bits */ |
523 | tree[n].Code = bi_reverse(next_code[len]++, len); | 523 | tree[n].Code = bitrev32((u32)(next_code[len]++)) >> (32 - len); |
524 | 524 | ||
525 | Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", | 525 | Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", |
526 | n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); | 526 | n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); |
diff --git a/lib/zlib_deflate/defutil.h b/lib/zlib_deflate/defutil.h index b640b6402e99..a8c370897c9f 100644 --- a/lib/zlib_deflate/defutil.h +++ b/lib/zlib_deflate/defutil.h | |||
@@ -293,22 +293,6 @@ void zlib_tr_stored_type_only (deflate_state *); | |||
293 | } | 293 | } |
294 | 294 | ||
295 | /* =========================================================================== | 295 | /* =========================================================================== |
296 | * Reverse the first len bits of a code, using straightforward code (a faster | ||
297 | * method would use a table) | ||
298 | * IN assertion: 1 <= len <= 15 | ||
299 | */ | ||
300 | static inline unsigned bi_reverse(unsigned code, /* the value to invert */ | ||
301 | int len) /* its bit length */ | ||
302 | { | ||
303 | register unsigned res = 0; | ||
304 | do { | ||
305 | res |= code & 1; | ||
306 | code >>= 1, res <<= 1; | ||
307 | } while (--len > 0); | ||
308 | return res >> 1; | ||
309 | } | ||
310 | |||
311 | /* =========================================================================== | ||
312 | * Flush the bit buffer, keeping at most 7 bits in it. | 296 | * Flush the bit buffer, keeping at most 7 bits in it. |
313 | */ | 297 | */ |
314 | static inline void bi_flush(deflate_state *s) | 298 | static inline void bi_flush(deflate_state *s) |
diff --git a/mm/Kconfig b/mm/Kconfig index 3a4070f5ab79..6413d027c0b2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -649,6 +649,18 @@ config DEFERRED_STRUCT_PAGE_INIT | |||
649 | processes running early in the lifetime of the systemm until kswapd | 649 | processes running early in the lifetime of the systemm until kswapd |
650 | finishes the initialisation. | 650 | finishes the initialisation. |
651 | 651 | ||
652 | config IDLE_PAGE_TRACKING | ||
653 | bool "Enable idle page tracking" | ||
654 | depends on SYSFS && MMU | ||
655 | select PAGE_EXTENSION if !64BIT | ||
656 | help | ||
657 | This feature allows to estimate the amount of user pages that have | ||
658 | not been touched during a given period of time. This information can | ||
659 | be useful to tune memory cgroup limits and/or for job placement | ||
660 | within a compute cluster. | ||
661 | |||
662 | See Documentation/vm/idle_page_tracking.txt for more details. | ||
663 | |||
652 | config ZONE_DEVICE | 664 | config ZONE_DEVICE |
653 | bool "Device memory (pmem, etc...) hotplug support" if EXPERT | 665 | bool "Device memory (pmem, etc...) hotplug support" if EXPERT |
654 | default !ZONE_DMA | 666 | default !ZONE_DMA |
diff --git a/mm/Makefile b/mm/Makefile index b424d5e5b6ff..56f8eed73f1a 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -79,3 +79,4 @@ obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o | |||
79 | obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o | 79 | obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o |
80 | obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o | 80 | obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o |
81 | obj-$(CONFIG_USERFAULTFD) += userfaultfd.o | 81 | obj-$(CONFIG_USERFAULTFD) += userfaultfd.o |
82 | obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o | ||
diff --git a/mm/debug.c b/mm/debug.c index 76089ddf99ea..6c1b3ea61bfd 100644 --- a/mm/debug.c +++ b/mm/debug.c | |||
@@ -48,6 +48,10 @@ static const struct trace_print_flags pageflag_names[] = { | |||
48 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 48 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
49 | {1UL << PG_compound_lock, "compound_lock" }, | 49 | {1UL << PG_compound_lock, "compound_lock" }, |
50 | #endif | 50 | #endif |
51 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) | ||
52 | {1UL << PG_young, "young" }, | ||
53 | {1UL << PG_idle, "idle" }, | ||
54 | #endif | ||
51 | }; | 55 | }; |
52 | 56 | ||
53 | static void dump_flags(unsigned long flags, | 57 | static void dump_flags(unsigned long flags, |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b16279cbd91d..4b06b8db9df2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/migrate.h> | 25 | #include <linux/migrate.h> |
26 | #include <linux/hashtable.h> | 26 | #include <linux/hashtable.h> |
27 | #include <linux/userfaultfd_k.h> | 27 | #include <linux/userfaultfd_k.h> |
28 | #include <linux/page_idle.h> | ||
28 | 29 | ||
29 | #include <asm/tlb.h> | 30 | #include <asm/tlb.h> |
30 | #include <asm/pgalloc.h> | 31 | #include <asm/pgalloc.h> |
@@ -1757,6 +1758,11 @@ static void __split_huge_page_refcount(struct page *page, | |||
1757 | /* clear PageTail before overwriting first_page */ | 1758 | /* clear PageTail before overwriting first_page */ |
1758 | smp_wmb(); | 1759 | smp_wmb(); |
1759 | 1760 | ||
1761 | if (page_is_young(page)) | ||
1762 | set_page_young(page_tail); | ||
1763 | if (page_is_idle(page)) | ||
1764 | set_page_idle(page_tail); | ||
1765 | |||
1760 | /* | 1766 | /* |
1761 | * __split_huge_page_splitting() already set the | 1767 | * __split_huge_page_splitting() already set the |
1762 | * splitting bit in all pmd that could map this | 1768 | * splitting bit in all pmd that could map this |
@@ -2262,7 +2268,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
2262 | VM_BUG_ON_PAGE(PageLRU(page), page); | 2268 | VM_BUG_ON_PAGE(PageLRU(page), page); |
2263 | 2269 | ||
2264 | /* If there is no mapped pte young don't collapse the page */ | 2270 | /* If there is no mapped pte young don't collapse the page */ |
2265 | if (pte_young(pteval) || PageReferenced(page) || | 2271 | if (pte_young(pteval) || |
2272 | page_is_young(page) || PageReferenced(page) || | ||
2266 | mmu_notifier_test_young(vma->vm_mm, address)) | 2273 | mmu_notifier_test_young(vma->vm_mm, address)) |
2267 | referenced = true; | 2274 | referenced = true; |
2268 | } | 2275 | } |
@@ -2693,7 +2700,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2693 | */ | 2700 | */ |
2694 | if (page_count(page) != 1 + !!PageSwapCache(page)) | 2701 | if (page_count(page) != 1 + !!PageSwapCache(page)) |
2695 | goto out_unmap; | 2702 | goto out_unmap; |
2696 | if (pte_young(pteval) || PageReferenced(page) || | 2703 | if (pte_young(pteval) || |
2704 | page_is_young(page) || PageReferenced(page) || | ||
2697 | mmu_notifier_test_young(vma->vm_mm, address)) | 2705 | mmu_notifier_test_young(vma->vm_mm, address)) |
2698 | referenced = true; | 2706 | referenced = true; |
2699 | } | 2707 | } |
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index aeba0edd6e44..9d26fd9fefe4 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c | |||
@@ -45,12 +45,9 @@ static int hwpoison_inject(void *data, u64 val) | |||
45 | /* | 45 | /* |
46 | * do a racy check with elevated page count, to make sure PG_hwpoison | 46 | * do a racy check with elevated page count, to make sure PG_hwpoison |
47 | * will only be set for the targeted owner (or on a free page). | 47 | * will only be set for the targeted owner (or on a free page). |
48 | * We temporarily take page lock for try_get_mem_cgroup_from_page(). | ||
49 | * memory_failure() will redo the check reliably inside page lock. | 48 | * memory_failure() will redo the check reliably inside page lock. |
50 | */ | 49 | */ |
51 | lock_page(hpage); | ||
52 | err = hwpoison_filter(hpage); | 50 | err = hwpoison_filter(hpage); |
53 | unlock_page(hpage); | ||
54 | if (err) | 51 | if (err) |
55 | goto put_out; | 52 | goto put_out; |
56 | 53 | ||
@@ -126,7 +123,7 @@ static int pfn_inject_init(void) | |||
126 | if (!dentry) | 123 | if (!dentry) |
127 | goto fail; | 124 | goto fail; |
128 | 125 | ||
129 | #ifdef CONFIG_MEMCG_SWAP | 126 | #ifdef CONFIG_MEMCG |
130 | dentry = debugfs_create_u64("corrupt-filter-memcg", 0600, | 127 | dentry = debugfs_create_u64("corrupt-filter-memcg", 0600, |
131 | hwpoison_dir, &hwpoison_filter_memcg); | 128 | hwpoison_dir, &hwpoison_filter_memcg); |
132 | if (!dentry) | 129 | if (!dentry) |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index f532f6a37b55..77191eccdc6f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -302,23 +302,14 @@ static void hex_dump_object(struct seq_file *seq, | |||
302 | struct kmemleak_object *object) | 302 | struct kmemleak_object *object) |
303 | { | 303 | { |
304 | const u8 *ptr = (const u8 *)object->pointer; | 304 | const u8 *ptr = (const u8 *)object->pointer; |
305 | int i, len, remaining; | 305 | size_t len; |
306 | unsigned char linebuf[HEX_ROW_SIZE * 5]; | ||
307 | 306 | ||
308 | /* limit the number of lines to HEX_MAX_LINES */ | 307 | /* limit the number of lines to HEX_MAX_LINES */ |
309 | remaining = len = | 308 | len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE); |
310 | min(object->size, (size_t)(HEX_MAX_LINES * HEX_ROW_SIZE)); | 309 | |
311 | 310 | seq_printf(seq, " hex dump (first %zu bytes):\n", len); | |
312 | seq_printf(seq, " hex dump (first %d bytes):\n", len); | 311 | seq_hex_dump(seq, " ", DUMP_PREFIX_NONE, HEX_ROW_SIZE, |
313 | for (i = 0; i < len; i += HEX_ROW_SIZE) { | 312 | HEX_GROUP_SIZE, ptr, len, HEX_ASCII); |
314 | int linelen = min(remaining, HEX_ROW_SIZE); | ||
315 | |||
316 | remaining -= HEX_ROW_SIZE; | ||
317 | hex_dump_to_buffer(ptr + i, linelen, HEX_ROW_SIZE, | ||
318 | HEX_GROUP_SIZE, linebuf, sizeof(linebuf), | ||
319 | HEX_ASCII); | ||
320 | seq_printf(seq, " %s\n", linebuf); | ||
321 | } | ||
322 | } | 313 | } |
323 | 314 | ||
324 | /* | 315 | /* |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1742a2db89c7..6ddaeba34e09 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -441,6 +441,34 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page) | |||
441 | return &memcg->css; | 441 | return &memcg->css; |
442 | } | 442 | } |
443 | 443 | ||
444 | /** | ||
445 | * page_cgroup_ino - return inode number of the memcg a page is charged to | ||
446 | * @page: the page | ||
447 | * | ||
448 | * Look up the closest online ancestor of the memory cgroup @page is charged to | ||
449 | * and return its inode number or 0 if @page is not charged to any cgroup. It | ||
450 | * is safe to call this function without holding a reference to @page. | ||
451 | * | ||
452 | * Note, this function is inherently racy, because there is nothing to prevent | ||
453 | * the cgroup inode from getting torn down and potentially reallocated a moment | ||
454 | * after page_cgroup_ino() returns, so it only should be used by callers that | ||
455 | * do not care (such as procfs interfaces). | ||
456 | */ | ||
457 | ino_t page_cgroup_ino(struct page *page) | ||
458 | { | ||
459 | struct mem_cgroup *memcg; | ||
460 | unsigned long ino = 0; | ||
461 | |||
462 | rcu_read_lock(); | ||
463 | memcg = READ_ONCE(page->mem_cgroup); | ||
464 | while (memcg && !(memcg->css.flags & CSS_ONLINE)) | ||
465 | memcg = parent_mem_cgroup(memcg); | ||
466 | if (memcg) | ||
467 | ino = cgroup_ino(memcg->css.cgroup); | ||
468 | rcu_read_unlock(); | ||
469 | return ino; | ||
470 | } | ||
471 | |||
444 | static struct mem_cgroup_per_zone * | 472 | static struct mem_cgroup_per_zone * |
445 | mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) | 473 | mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) |
446 | { | 474 | { |
@@ -2071,40 +2099,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
2071 | css_put_many(&memcg->css, nr_pages); | 2099 | css_put_many(&memcg->css, nr_pages); |
2072 | } | 2100 | } |
2073 | 2101 | ||
2074 | /* | ||
2075 | * try_get_mem_cgroup_from_page - look up page's memcg association | ||
2076 | * @page: the page | ||
2077 | * | ||
2078 | * Look up, get a css reference, and return the memcg that owns @page. | ||
2079 | * | ||
2080 | * The page must be locked to prevent racing with swap-in and page | ||
2081 | * cache charges. If coming from an unlocked page table, the caller | ||
2082 | * must ensure the page is on the LRU or this can race with charging. | ||
2083 | */ | ||
2084 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | ||
2085 | { | ||
2086 | struct mem_cgroup *memcg; | ||
2087 | unsigned short id; | ||
2088 | swp_entry_t ent; | ||
2089 | |||
2090 | VM_BUG_ON_PAGE(!PageLocked(page), page); | ||
2091 | |||
2092 | memcg = page->mem_cgroup; | ||
2093 | if (memcg) { | ||
2094 | if (!css_tryget_online(&memcg->css)) | ||
2095 | memcg = NULL; | ||
2096 | } else if (PageSwapCache(page)) { | ||
2097 | ent.val = page_private(page); | ||
2098 | id = lookup_swap_cgroup_id(ent); | ||
2099 | rcu_read_lock(); | ||
2100 | memcg = mem_cgroup_from_id(id); | ||
2101 | if (memcg && !css_tryget_online(&memcg->css)) | ||
2102 | memcg = NULL; | ||
2103 | rcu_read_unlock(); | ||
2104 | } | ||
2105 | return memcg; | ||
2106 | } | ||
2107 | |||
2108 | static void lock_page_lru(struct page *page, int *isolated) | 2102 | static void lock_page_lru(struct page *page, int *isolated) |
2109 | { | 2103 | { |
2110 | struct zone *zone = page_zone(page); | 2104 | struct zone *zone = page_zone(page); |
@@ -5301,8 +5295,20 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | |||
5301 | * the page lock, which serializes swap cache removal, which | 5295 | * the page lock, which serializes swap cache removal, which |
5302 | * in turn serializes uncharging. | 5296 | * in turn serializes uncharging. |
5303 | */ | 5297 | */ |
5298 | VM_BUG_ON_PAGE(!PageLocked(page), page); | ||
5304 | if (page->mem_cgroup) | 5299 | if (page->mem_cgroup) |
5305 | goto out; | 5300 | goto out; |
5301 | |||
5302 | if (do_swap_account) { | ||
5303 | swp_entry_t ent = { .val = page_private(page), }; | ||
5304 | unsigned short id = lookup_swap_cgroup_id(ent); | ||
5305 | |||
5306 | rcu_read_lock(); | ||
5307 | memcg = mem_cgroup_from_id(id); | ||
5308 | if (memcg && !css_tryget_online(&memcg->css)) | ||
5309 | memcg = NULL; | ||
5310 | rcu_read_unlock(); | ||
5311 | } | ||
5306 | } | 5312 | } |
5307 | 5313 | ||
5308 | if (PageTransHuge(page)) { | 5314 | if (PageTransHuge(page)) { |
@@ -5310,8 +5316,6 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | |||
5310 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | 5316 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
5311 | } | 5317 | } |
5312 | 5318 | ||
5313 | if (do_swap_account && PageSwapCache(page)) | ||
5314 | memcg = try_get_mem_cgroup_from_page(page); | ||
5315 | if (!memcg) | 5319 | if (!memcg) |
5316 | memcg = get_mem_cgroup_from_mm(mm); | 5320 | memcg = get_mem_cgroup_from_mm(mm); |
5317 | 5321 | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index eeda6485e76c..95882692e747 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -130,27 +130,15 @@ static int hwpoison_filter_flags(struct page *p) | |||
130 | * can only guarantee that the page either belongs to the memcg tasks, or is | 130 | * can only guarantee that the page either belongs to the memcg tasks, or is |
131 | * a freed page. | 131 | * a freed page. |
132 | */ | 132 | */ |
133 | #ifdef CONFIG_MEMCG_SWAP | 133 | #ifdef CONFIG_MEMCG |
134 | u64 hwpoison_filter_memcg; | 134 | u64 hwpoison_filter_memcg; |
135 | EXPORT_SYMBOL_GPL(hwpoison_filter_memcg); | 135 | EXPORT_SYMBOL_GPL(hwpoison_filter_memcg); |
136 | static int hwpoison_filter_task(struct page *p) | 136 | static int hwpoison_filter_task(struct page *p) |
137 | { | 137 | { |
138 | struct mem_cgroup *mem; | ||
139 | struct cgroup_subsys_state *css; | ||
140 | unsigned long ino; | ||
141 | |||
142 | if (!hwpoison_filter_memcg) | 138 | if (!hwpoison_filter_memcg) |
143 | return 0; | 139 | return 0; |
144 | 140 | ||
145 | mem = try_get_mem_cgroup_from_page(p); | 141 | if (page_cgroup_ino(p) != hwpoison_filter_memcg) |
146 | if (!mem) | ||
147 | return -EINVAL; | ||
148 | |||
149 | css = &mem->css; | ||
150 | ino = cgroup_ino(css->cgroup); | ||
151 | css_put(css); | ||
152 | |||
153 | if (ino != hwpoison_filter_memcg) | ||
154 | return -EINVAL; | 142 | return -EINVAL; |
155 | 143 | ||
156 | return 0; | 144 | return 0; |
diff --git a/mm/memory.c b/mm/memory.c index 6cd0b2160401..9cb27470fee9 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3233,7 +3233,7 @@ out: | |||
3233 | static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, | 3233 | static int create_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, |
3234 | unsigned long address, pmd_t *pmd, unsigned int flags) | 3234 | unsigned long address, pmd_t *pmd, unsigned int flags) |
3235 | { | 3235 | { |
3236 | if (!vma->vm_ops) | 3236 | if (vma_is_anonymous(vma)) |
3237 | return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags); | 3237 | return do_huge_pmd_anonymous_page(mm, vma, address, pmd, flags); |
3238 | if (vma->vm_ops->pmd_fault) | 3238 | if (vma->vm_ops->pmd_fault) |
3239 | return vma->vm_ops->pmd_fault(vma, address, pmd, flags); | 3239 | return vma->vm_ops->pmd_fault(vma, address, pmd, flags); |
@@ -3244,7 +3244,7 @@ static int wp_huge_pmd(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3244 | unsigned long address, pmd_t *pmd, pmd_t orig_pmd, | 3244 | unsigned long address, pmd_t *pmd, pmd_t orig_pmd, |
3245 | unsigned int flags) | 3245 | unsigned int flags) |
3246 | { | 3246 | { |
3247 | if (!vma->vm_ops) | 3247 | if (vma_is_anonymous(vma)) |
3248 | return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd); | 3248 | return do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd); |
3249 | if (vma->vm_ops->pmd_fault) | 3249 | if (vma->vm_ops->pmd_fault) |
3250 | return vma->vm_ops->pmd_fault(vma, address, pmd, flags); | 3250 | return vma->vm_ops->pmd_fault(vma, address, pmd, flags); |
diff --git a/mm/migrate.c b/mm/migrate.c index 02ce25df16c2..c3cb566af3e2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/gfp.h> | 37 | #include <linux/gfp.h> |
38 | #include <linux/balloon_compaction.h> | 38 | #include <linux/balloon_compaction.h> |
39 | #include <linux/mmu_notifier.h> | 39 | #include <linux/mmu_notifier.h> |
40 | #include <linux/page_idle.h> | ||
40 | 41 | ||
41 | #include <asm/tlbflush.h> | 42 | #include <asm/tlbflush.h> |
42 | 43 | ||
@@ -524,6 +525,11 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
524 | __set_page_dirty_nobuffers(newpage); | 525 | __set_page_dirty_nobuffers(newpage); |
525 | } | 526 | } |
526 | 527 | ||
528 | if (page_is_young(page)) | ||
529 | set_page_young(newpage); | ||
530 | if (page_is_idle(page)) | ||
531 | set_page_idle(newpage); | ||
532 | |||
527 | /* | 533 | /* |
528 | * Copy NUMA information to the new page, to prevent over-eager | 534 | * Copy NUMA information to the new page, to prevent over-eager |
529 | * future migrations of this same page. | 535 | * future migrations of this same page. |
@@ -612,6 +612,8 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, | |||
612 | void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, | 612 | void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, |
613 | struct rb_node **rb_link, struct rb_node *rb_parent) | 613 | struct rb_node **rb_link, struct rb_node *rb_parent) |
614 | { | 614 | { |
615 | WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops"); | ||
616 | |||
615 | /* Update tracking information for the gap following the new vma. */ | 617 | /* Update tracking information for the gap following the new vma. */ |
616 | if (vma->vm_next) | 618 | if (vma->vm_next) |
617 | vma_gap_update(vma->vm_next); | 619 | vma_gap_update(vma->vm_next); |
@@ -1260,14 +1262,12 @@ static inline int mlock_future_check(struct mm_struct *mm, | |||
1260 | /* | 1262 | /* |
1261 | * The caller must hold down_write(¤t->mm->mmap_sem). | 1263 | * The caller must hold down_write(¤t->mm->mmap_sem). |
1262 | */ | 1264 | */ |
1263 | 1265 | unsigned long do_mmap(struct file *file, unsigned long addr, | |
1264 | unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | ||
1265 | unsigned long len, unsigned long prot, | 1266 | unsigned long len, unsigned long prot, |
1266 | unsigned long flags, unsigned long pgoff, | 1267 | unsigned long flags, vm_flags_t vm_flags, |
1267 | unsigned long *populate) | 1268 | unsigned long pgoff, unsigned long *populate) |
1268 | { | 1269 | { |
1269 | struct mm_struct *mm = current->mm; | 1270 | struct mm_struct *mm = current->mm; |
1270 | vm_flags_t vm_flags; | ||
1271 | 1271 | ||
1272 | *populate = 0; | 1272 | *populate = 0; |
1273 | 1273 | ||
@@ -1311,7 +1311,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1311 | * to. we assume access permissions have been handled by the open | 1311 | * to. we assume access permissions have been handled by the open |
1312 | * of the memory object, so we don't do any here. | 1312 | * of the memory object, so we don't do any here. |
1313 | */ | 1313 | */ |
1314 | vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | | 1314 | vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | |
1315 | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; | 1315 | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; |
1316 | 1316 | ||
1317 | if (flags & MAP_LOCKED) | 1317 | if (flags & MAP_LOCKED) |
@@ -1638,6 +1638,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | |||
1638 | */ | 1638 | */ |
1639 | WARN_ON_ONCE(addr != vma->vm_start); | 1639 | WARN_ON_ONCE(addr != vma->vm_start); |
1640 | 1640 | ||
1641 | /* All file mapping must have ->vm_ops set */ | ||
1642 | if (!vma->vm_ops) { | ||
1643 | static const struct vm_operations_struct dummy_ops = {}; | ||
1644 | vma->vm_ops = &dummy_ops; | ||
1645 | } | ||
1646 | |||
1641 | addr = vma->vm_start; | 1647 | addr = vma->vm_start; |
1642 | vm_flags = vma->vm_flags; | 1648 | vm_flags = vma->vm_flags; |
1643 | } else if (vm_flags & VM_SHARED) { | 1649 | } else if (vm_flags & VM_SHARED) { |
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 3b9b3d0741b2..5fbdd367bbed 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -123,6 +123,23 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, | |||
123 | return young; | 123 | return young; |
124 | } | 124 | } |
125 | 125 | ||
126 | int __mmu_notifier_clear_young(struct mm_struct *mm, | ||
127 | unsigned long start, | ||
128 | unsigned long end) | ||
129 | { | ||
130 | struct mmu_notifier *mn; | ||
131 | int young = 0, id; | ||
132 | |||
133 | id = srcu_read_lock(&srcu); | ||
134 | hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { | ||
135 | if (mn->ops->clear_young) | ||
136 | young |= mn->ops->clear_young(mn, mm, start, end); | ||
137 | } | ||
138 | srcu_read_unlock(&srcu, id); | ||
139 | |||
140 | return young; | ||
141 | } | ||
142 | |||
126 | int __mmu_notifier_test_young(struct mm_struct *mm, | 143 | int __mmu_notifier_test_young(struct mm_struct *mm, |
127 | unsigned long address) | 144 | unsigned long address) |
128 | { | 145 | { |
diff --git a/mm/nommu.c b/mm/nommu.c index 1cc0709fcaa5..ab14a2014dea 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1233,18 +1233,19 @@ enomem: | |||
1233 | /* | 1233 | /* |
1234 | * handle mapping creation for uClinux | 1234 | * handle mapping creation for uClinux |
1235 | */ | 1235 | */ |
1236 | unsigned long do_mmap_pgoff(struct file *file, | 1236 | unsigned long do_mmap(struct file *file, |
1237 | unsigned long addr, | 1237 | unsigned long addr, |
1238 | unsigned long len, | 1238 | unsigned long len, |
1239 | unsigned long prot, | 1239 | unsigned long prot, |
1240 | unsigned long flags, | 1240 | unsigned long flags, |
1241 | unsigned long pgoff, | 1241 | vm_flags_t vm_flags, |
1242 | unsigned long *populate) | 1242 | unsigned long pgoff, |
1243 | unsigned long *populate) | ||
1243 | { | 1244 | { |
1244 | struct vm_area_struct *vma; | 1245 | struct vm_area_struct *vma; |
1245 | struct vm_region *region; | 1246 | struct vm_region *region; |
1246 | struct rb_node *rb; | 1247 | struct rb_node *rb; |
1247 | unsigned long capabilities, vm_flags, result; | 1248 | unsigned long capabilities, result; |
1248 | int ret; | 1249 | int ret; |
1249 | 1250 | ||
1250 | *populate = 0; | 1251 | *populate = 0; |
@@ -1262,7 +1263,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1262 | 1263 | ||
1263 | /* we've determined that we can make the mapping, now translate what we | 1264 | /* we've determined that we can make the mapping, now translate what we |
1264 | * now know into VMA flags */ | 1265 | * now know into VMA flags */ |
1265 | vm_flags = determine_vm_flags(file, prot, flags, capabilities); | 1266 | vm_flags |= determine_vm_flags(file, prot, flags, capabilities); |
1266 | 1267 | ||
1267 | /* we're going to need to record the mapping */ | 1268 | /* we're going to need to record the mapping */ |
1268 | region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); | 1269 | region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); |
diff --git a/mm/page_ext.c b/mm/page_ext.c index d86fd2f5353f..292ca7b8debd 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/vmalloc.h> | 6 | #include <linux/vmalloc.h> |
7 | #include <linux/kmemleak.h> | 7 | #include <linux/kmemleak.h> |
8 | #include <linux/page_owner.h> | 8 | #include <linux/page_owner.h> |
9 | #include <linux/page_idle.h> | ||
9 | 10 | ||
10 | /* | 11 | /* |
11 | * struct page extension | 12 | * struct page extension |
@@ -59,6 +60,9 @@ static struct page_ext_operations *page_ext_ops[] = { | |||
59 | #ifdef CONFIG_PAGE_OWNER | 60 | #ifdef CONFIG_PAGE_OWNER |
60 | &page_owner_ops, | 61 | &page_owner_ops, |
61 | #endif | 62 | #endif |
63 | #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) | ||
64 | &page_idle_ops, | ||
65 | #endif | ||
62 | }; | 66 | }; |
63 | 67 | ||
64 | static unsigned long total_usage; | 68 | static unsigned long total_usage; |
diff --git a/mm/page_idle.c b/mm/page_idle.c new file mode 100644 index 000000000000..d5dd79041484 --- /dev/null +++ b/mm/page_idle.c | |||
@@ -0,0 +1,232 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/bootmem.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/sysfs.h> | ||
5 | #include <linux/kobject.h> | ||
6 | #include <linux/mm.h> | ||
7 | #include <linux/mmzone.h> | ||
8 | #include <linux/pagemap.h> | ||
9 | #include <linux/rmap.h> | ||
10 | #include <linux/mmu_notifier.h> | ||
11 | #include <linux/page_ext.h> | ||
12 | #include <linux/page_idle.h> | ||
13 | |||
14 | #define BITMAP_CHUNK_SIZE sizeof(u64) | ||
15 | #define BITMAP_CHUNK_BITS (BITMAP_CHUNK_SIZE * BITS_PER_BYTE) | ||
16 | |||
17 | /* | ||
18 | * Idle page tracking only considers user memory pages, for other types of | ||
19 | * pages the idle flag is always unset and an attempt to set it is silently | ||
20 | * ignored. | ||
21 | * | ||
22 | * We treat a page as a user memory page if it is on an LRU list, because it is | ||
23 | * always safe to pass such a page to rmap_walk(), which is essential for idle | ||
24 | * page tracking. With such an indicator of user pages we can skip isolated | ||
25 | * pages, but since there are not usually many of them, it will hardly affect | ||
26 | * the overall result. | ||
27 | * | ||
28 | * This function tries to get a user memory page by pfn as described above. | ||
29 | */ | ||
30 | static struct page *page_idle_get_page(unsigned long pfn) | ||
31 | { | ||
32 | struct page *page; | ||
33 | struct zone *zone; | ||
34 | |||
35 | if (!pfn_valid(pfn)) | ||
36 | return NULL; | ||
37 | |||
38 | page = pfn_to_page(pfn); | ||
39 | if (!page || !PageLRU(page) || | ||
40 | !get_page_unless_zero(page)) | ||
41 | return NULL; | ||
42 | |||
43 | zone = page_zone(page); | ||
44 | spin_lock_irq(&zone->lru_lock); | ||
45 | if (unlikely(!PageLRU(page))) { | ||
46 | put_page(page); | ||
47 | page = NULL; | ||
48 | } | ||
49 | spin_unlock_irq(&zone->lru_lock); | ||
50 | return page; | ||
51 | } | ||
52 | |||
53 | static int page_idle_clear_pte_refs_one(struct page *page, | ||
54 | struct vm_area_struct *vma, | ||
55 | unsigned long addr, void *arg) | ||
56 | { | ||
57 | struct mm_struct *mm = vma->vm_mm; | ||
58 | spinlock_t *ptl; | ||
59 | pmd_t *pmd; | ||
60 | pte_t *pte; | ||
61 | bool referenced = false; | ||
62 | |||
63 | if (unlikely(PageTransHuge(page))) { | ||
64 | pmd = page_check_address_pmd(page, mm, addr, | ||
65 | PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); | ||
66 | if (pmd) { | ||
67 | referenced = pmdp_clear_young_notify(vma, addr, pmd); | ||
68 | spin_unlock(ptl); | ||
69 | } | ||
70 | } else { | ||
71 | pte = page_check_address(page, mm, addr, &ptl, 0); | ||
72 | if (pte) { | ||
73 | referenced = ptep_clear_young_notify(vma, addr, pte); | ||
74 | pte_unmap_unlock(pte, ptl); | ||
75 | } | ||
76 | } | ||
77 | if (referenced) { | ||
78 | clear_page_idle(page); | ||
79 | /* | ||
80 | * We cleared the referenced bit in a mapping to this page. To | ||
81 | * avoid interference with page reclaim, mark it young so that | ||
82 | * page_referenced() will return > 0. | ||
83 | */ | ||
84 | set_page_young(page); | ||
85 | } | ||
86 | return SWAP_AGAIN; | ||
87 | } | ||
88 | |||
89 | static void page_idle_clear_pte_refs(struct page *page) | ||
90 | { | ||
91 | /* | ||
92 | * Since rwc.arg is unused, rwc is effectively immutable, so we | ||
93 | * can make it static const to save some cycles and stack. | ||
94 | */ | ||
95 | static const struct rmap_walk_control rwc = { | ||
96 | .rmap_one = page_idle_clear_pte_refs_one, | ||
97 | .anon_lock = page_lock_anon_vma_read, | ||
98 | }; | ||
99 | bool need_lock; | ||
100 | |||
101 | if (!page_mapped(page) || | ||
102 | !page_rmapping(page)) | ||
103 | return; | ||
104 | |||
105 | need_lock = !PageAnon(page) || PageKsm(page); | ||
106 | if (need_lock && !trylock_page(page)) | ||
107 | return; | ||
108 | |||
109 | rmap_walk(page, (struct rmap_walk_control *)&rwc); | ||
110 | |||
111 | if (need_lock) | ||
112 | unlock_page(page); | ||
113 | } | ||
114 | |||
115 | static ssize_t page_idle_bitmap_read(struct file *file, struct kobject *kobj, | ||
116 | struct bin_attribute *attr, char *buf, | ||
117 | loff_t pos, size_t count) | ||
118 | { | ||
119 | u64 *out = (u64 *)buf; | ||
120 | struct page *page; | ||
121 | unsigned long pfn, end_pfn; | ||
122 | int bit; | ||
123 | |||
124 | if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE) | ||
125 | return -EINVAL; | ||
126 | |||
127 | pfn = pos * BITS_PER_BYTE; | ||
128 | if (pfn >= max_pfn) | ||
129 | return 0; | ||
130 | |||
131 | end_pfn = pfn + count * BITS_PER_BYTE; | ||
132 | if (end_pfn > max_pfn) | ||
133 | end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); | ||
134 | |||
135 | for (; pfn < end_pfn; pfn++) { | ||
136 | bit = pfn % BITMAP_CHUNK_BITS; | ||
137 | if (!bit) | ||
138 | *out = 0ULL; | ||
139 | page = page_idle_get_page(pfn); | ||
140 | if (page) { | ||
141 | if (page_is_idle(page)) { | ||
142 | /* | ||
143 | * The page might have been referenced via a | ||
144 | * pte, in which case it is not idle. Clear | ||
145 | * refs and recheck. | ||
146 | */ | ||
147 | page_idle_clear_pte_refs(page); | ||
148 | if (page_is_idle(page)) | ||
149 | *out |= 1ULL << bit; | ||
150 | } | ||
151 | put_page(page); | ||
152 | } | ||
153 | if (bit == BITMAP_CHUNK_BITS - 1) | ||
154 | out++; | ||
155 | cond_resched(); | ||
156 | } | ||
157 | return (char *)out - buf; | ||
158 | } | ||
159 | |||
160 | static ssize_t page_idle_bitmap_write(struct file *file, struct kobject *kobj, | ||
161 | struct bin_attribute *attr, char *buf, | ||
162 | loff_t pos, size_t count) | ||
163 | { | ||
164 | const u64 *in = (u64 *)buf; | ||
165 | struct page *page; | ||
166 | unsigned long pfn, end_pfn; | ||
167 | int bit; | ||
168 | |||
169 | if (pos % BITMAP_CHUNK_SIZE || count % BITMAP_CHUNK_SIZE) | ||
170 | return -EINVAL; | ||
171 | |||
172 | pfn = pos * BITS_PER_BYTE; | ||
173 | if (pfn >= max_pfn) | ||
174 | return -ENXIO; | ||
175 | |||
176 | end_pfn = pfn + count * BITS_PER_BYTE; | ||
177 | if (end_pfn > max_pfn) | ||
178 | end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS); | ||
179 | |||
180 | for (; pfn < end_pfn; pfn++) { | ||
181 | bit = pfn % BITMAP_CHUNK_BITS; | ||
182 | if ((*in >> bit) & 1) { | ||
183 | page = page_idle_get_page(pfn); | ||
184 | if (page) { | ||
185 | page_idle_clear_pte_refs(page); | ||
186 | set_page_idle(page); | ||
187 | put_page(page); | ||
188 | } | ||
189 | } | ||
190 | if (bit == BITMAP_CHUNK_BITS - 1) | ||
191 | in++; | ||
192 | cond_resched(); | ||
193 | } | ||
194 | return (char *)in - buf; | ||
195 | } | ||
196 | |||
197 | static struct bin_attribute page_idle_bitmap_attr = | ||
198 | __BIN_ATTR(bitmap, S_IRUSR | S_IWUSR, | ||
199 | page_idle_bitmap_read, page_idle_bitmap_write, 0); | ||
200 | |||
201 | static struct bin_attribute *page_idle_bin_attrs[] = { | ||
202 | &page_idle_bitmap_attr, | ||
203 | NULL, | ||
204 | }; | ||
205 | |||
206 | static struct attribute_group page_idle_attr_group = { | ||
207 | .bin_attrs = page_idle_bin_attrs, | ||
208 | .name = "page_idle", | ||
209 | }; | ||
210 | |||
211 | #ifndef CONFIG_64BIT | ||
212 | static bool need_page_idle(void) | ||
213 | { | ||
214 | return true; | ||
215 | } | ||
216 | struct page_ext_operations page_idle_ops = { | ||
217 | .need = need_page_idle, | ||
218 | }; | ||
219 | #endif | ||
220 | |||
221 | static int __init page_idle_init(void) | ||
222 | { | ||
223 | int err; | ||
224 | |||
225 | err = sysfs_create_group(mm_kobj, &page_idle_attr_group); | ||
226 | if (err) { | ||
227 | pr_err("page_idle: register sysfs failed\n"); | ||
228 | return err; | ||
229 | } | ||
230 | return 0; | ||
231 | } | ||
232 | subsys_initcall(page_idle_init); | ||
@@ -59,6 +59,7 @@ | |||
59 | #include <linux/migrate.h> | 59 | #include <linux/migrate.h> |
60 | #include <linux/hugetlb.h> | 60 | #include <linux/hugetlb.h> |
61 | #include <linux/backing-dev.h> | 61 | #include <linux/backing-dev.h> |
62 | #include <linux/page_idle.h> | ||
62 | 63 | ||
63 | #include <asm/tlbflush.h> | 64 | #include <asm/tlbflush.h> |
64 | 65 | ||
@@ -886,6 +887,11 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
886 | pte_unmap_unlock(pte, ptl); | 887 | pte_unmap_unlock(pte, ptl); |
887 | } | 888 | } |
888 | 889 | ||
890 | if (referenced) | ||
891 | clear_page_idle(page); | ||
892 | if (test_and_clear_page_young(page)) | ||
893 | referenced++; | ||
894 | |||
889 | if (referenced) { | 895 | if (referenced) { |
890 | pra->referenced++; | 896 | pra->referenced++; |
891 | pra->vm_flags |= vma->vm_flags; | 897 | pra->vm_flags |= vma->vm_flags; |
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
33 | #include <linux/uio.h> | 33 | #include <linux/uio.h> |
34 | #include <linux/hugetlb.h> | 34 | #include <linux/hugetlb.h> |
35 | #include <linux/page_idle.h> | ||
35 | 36 | ||
36 | #include "internal.h" | 37 | #include "internal.h" |
37 | 38 | ||
@@ -622,6 +623,8 @@ void mark_page_accessed(struct page *page) | |||
622 | } else if (!PageReferenced(page)) { | 623 | } else if (!PageReferenced(page)) { |
623 | SetPageReferenced(page); | 624 | SetPageReferenced(page); |
624 | } | 625 | } |
626 | if (page_is_idle(page)) | ||
627 | clear_page_idle(page); | ||
625 | } | 628 | } |
626 | EXPORT_SYMBOL(mark_page_accessed); | 629 | EXPORT_SYMBOL(mark_page_accessed); |
627 | 630 | ||
diff --git a/mm/zpool.c b/mm/zpool.c index 68d2dd8ed2d8..8f670d3e8706 100644 --- a/mm/zpool.c +++ b/mm/zpool.c | |||
@@ -100,6 +100,39 @@ static void zpool_put_driver(struct zpool_driver *driver) | |||
100 | } | 100 | } |
101 | 101 | ||
102 | /** | 102 | /** |
103 | * zpool_has_pool() - Check if the pool driver is available | ||
104 | * @type The type of the zpool to check (e.g. zbud, zsmalloc) | ||
105 | * | ||
106 | * This checks if the @type pool driver is available. This will try to load | ||
107 | * the requested module, if needed, but there is no guarantee the module will | ||
108 | * still be loaded and available immediately after calling. If this returns | ||
109 | * true, the caller should assume the pool is available, but must be prepared | ||
110 | * to handle the @zpool_create_pool() returning failure. However if this | ||
111 | * returns false, the caller should assume the requested pool type is not | ||
112 | * available; either the requested pool type module does not exist, or could | ||
113 | * not be loaded, and calling @zpool_create_pool() with the pool type will | ||
114 | * fail. | ||
115 | * | ||
116 | * Returns: true if @type pool is available, false if not | ||
117 | */ | ||
118 | bool zpool_has_pool(char *type) | ||
119 | { | ||
120 | struct zpool_driver *driver = zpool_get_driver(type); | ||
121 | |||
122 | if (!driver) { | ||
123 | request_module("zpool-%s", type); | ||
124 | driver = zpool_get_driver(type); | ||
125 | } | ||
126 | |||
127 | if (!driver) | ||
128 | return false; | ||
129 | |||
130 | zpool_put_driver(driver); | ||
131 | return true; | ||
132 | } | ||
133 | EXPORT_SYMBOL(zpool_has_pool); | ||
134 | |||
135 | /** | ||
103 | * zpool_create_pool() - Create a new zpool | 136 | * zpool_create_pool() - Create a new zpool |
104 | * @type The type of the zpool to create (e.g. zbud, zsmalloc) | 137 | * @type The type of the zpool to create (e.g. zbud, zsmalloc) |
105 | * @name The name of the zpool (e.g. zram0, zswap) | 138 | * @name The name of the zpool (e.g. zram0, zswap) |
diff --git a/mm/zswap.c b/mm/zswap.c index 48a1d081e2a5..4043df7c672f 100644 --- a/mm/zswap.c +++ b/mm/zswap.c | |||
@@ -80,85 +80,54 @@ static u64 zswap_duplicate_entry; | |||
80 | static bool zswap_enabled; | 80 | static bool zswap_enabled; |
81 | module_param_named(enabled, zswap_enabled, bool, 0644); | 81 | module_param_named(enabled, zswap_enabled, bool, 0644); |
82 | 82 | ||
83 | /* Compressor to be used by zswap (fixed at boot for now) */ | 83 | /* Crypto compressor to use */ |
84 | #define ZSWAP_COMPRESSOR_DEFAULT "lzo" | 84 | #define ZSWAP_COMPRESSOR_DEFAULT "lzo" |
85 | static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; | 85 | static char zswap_compressor[CRYPTO_MAX_ALG_NAME] = ZSWAP_COMPRESSOR_DEFAULT; |
86 | module_param_named(compressor, zswap_compressor, charp, 0444); | 86 | static struct kparam_string zswap_compressor_kparam = { |
87 | 87 | .string = zswap_compressor, | |
88 | /* The maximum percentage of memory that the compressed pool can occupy */ | 88 | .maxlen = sizeof(zswap_compressor), |
89 | static unsigned int zswap_max_pool_percent = 20; | 89 | }; |
90 | module_param_named(max_pool_percent, | 90 | static int zswap_compressor_param_set(const char *, |
91 | zswap_max_pool_percent, uint, 0644); | 91 | const struct kernel_param *); |
92 | static struct kernel_param_ops zswap_compressor_param_ops = { | ||
93 | .set = zswap_compressor_param_set, | ||
94 | .get = param_get_string, | ||
95 | }; | ||
96 | module_param_cb(compressor, &zswap_compressor_param_ops, | ||
97 | &zswap_compressor_kparam, 0644); | ||
92 | 98 | ||
93 | /* Compressed storage to use */ | 99 | /* Compressed storage zpool to use */ |
94 | #define ZSWAP_ZPOOL_DEFAULT "zbud" | 100 | #define ZSWAP_ZPOOL_DEFAULT "zbud" |
95 | static char *zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; | 101 | static char zswap_zpool_type[32 /* arbitrary */] = ZSWAP_ZPOOL_DEFAULT; |
96 | module_param_named(zpool, zswap_zpool_type, charp, 0444); | 102 | static struct kparam_string zswap_zpool_kparam = { |
103 | .string = zswap_zpool_type, | ||
104 | .maxlen = sizeof(zswap_zpool_type), | ||
105 | }; | ||
106 | static int zswap_zpool_param_set(const char *, const struct kernel_param *); | ||
107 | static struct kernel_param_ops zswap_zpool_param_ops = { | ||
108 | .set = zswap_zpool_param_set, | ||
109 | .get = param_get_string, | ||
110 | }; | ||
111 | module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_kparam, 0644); | ||
97 | 112 | ||
98 | /* zpool is shared by all of zswap backend */ | 113 | /* The maximum percentage of memory that the compressed pool can occupy */ |
99 | static struct zpool *zswap_pool; | 114 | static unsigned int zswap_max_pool_percent = 20; |
115 | module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); | ||
100 | 116 | ||
101 | /********************************* | 117 | /********************************* |
102 | * compression functions | 118 | * data structures |
103 | **********************************/ | 119 | **********************************/ |
104 | /* per-cpu compression transforms */ | ||
105 | static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms; | ||
106 | 120 | ||
107 | enum comp_op { | 121 | struct zswap_pool { |
108 | ZSWAP_COMPOP_COMPRESS, | 122 | struct zpool *zpool; |
109 | ZSWAP_COMPOP_DECOMPRESS | 123 | struct crypto_comp * __percpu *tfm; |
124 | struct kref kref; | ||
125 | struct list_head list; | ||
126 | struct rcu_head rcu_head; | ||
127 | struct notifier_block notifier; | ||
128 | char tfm_name[CRYPTO_MAX_ALG_NAME]; | ||
110 | }; | 129 | }; |
111 | 130 | ||
112 | static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen, | ||
113 | u8 *dst, unsigned int *dlen) | ||
114 | { | ||
115 | struct crypto_comp *tfm; | ||
116 | int ret; | ||
117 | |||
118 | tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu()); | ||
119 | switch (op) { | ||
120 | case ZSWAP_COMPOP_COMPRESS: | ||
121 | ret = crypto_comp_compress(tfm, src, slen, dst, dlen); | ||
122 | break; | ||
123 | case ZSWAP_COMPOP_DECOMPRESS: | ||
124 | ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); | ||
125 | break; | ||
126 | default: | ||
127 | ret = -EINVAL; | ||
128 | } | ||
129 | |||
130 | put_cpu(); | ||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | static int __init zswap_comp_init(void) | ||
135 | { | ||
136 | if (!crypto_has_comp(zswap_compressor, 0, 0)) { | ||
137 | pr_info("%s compressor not available\n", zswap_compressor); | ||
138 | /* fall back to default compressor */ | ||
139 | zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; | ||
140 | if (!crypto_has_comp(zswap_compressor, 0, 0)) | ||
141 | /* can't even load the default compressor */ | ||
142 | return -ENODEV; | ||
143 | } | ||
144 | pr_info("using %s compressor\n", zswap_compressor); | ||
145 | |||
146 | /* alloc percpu transforms */ | ||
147 | zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); | ||
148 | if (!zswap_comp_pcpu_tfms) | ||
149 | return -ENOMEM; | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | static void __init zswap_comp_exit(void) | ||
154 | { | ||
155 | /* free percpu transforms */ | ||
156 | free_percpu(zswap_comp_pcpu_tfms); | ||
157 | } | ||
158 | |||
159 | /********************************* | ||
160 | * data structures | ||
161 | **********************************/ | ||
162 | /* | 131 | /* |
163 | * struct zswap_entry | 132 | * struct zswap_entry |
164 | * | 133 | * |
@@ -166,22 +135,24 @@ static void __init zswap_comp_exit(void) | |||
166 | * page within zswap. | 135 | * page within zswap. |
167 | * | 136 | * |
168 | * rbnode - links the entry into red-black tree for the appropriate swap type | 137 | * rbnode - links the entry into red-black tree for the appropriate swap type |
138 | * offset - the swap offset for the entry. Index into the red-black tree. | ||
169 | * refcount - the number of outstanding reference to the entry. This is needed | 139 | * refcount - the number of outstanding reference to the entry. This is needed |
170 | * to protect against premature freeing of the entry by code | 140 | * to protect against premature freeing of the entry by code |
171 | * concurrent calls to load, invalidate, and writeback. The lock | 141 | * concurrent calls to load, invalidate, and writeback. The lock |
172 | * for the zswap_tree structure that contains the entry must | 142 | * for the zswap_tree structure that contains the entry must |
173 | * be held while changing the refcount. Since the lock must | 143 | * be held while changing the refcount. Since the lock must |
174 | * be held, there is no reason to also make refcount atomic. | 144 | * be held, there is no reason to also make refcount atomic. |
175 | * offset - the swap offset for the entry. Index into the red-black tree. | ||
176 | * handle - zpool allocation handle that stores the compressed page data | ||
177 | * length - the length in bytes of the compressed page data. Needed during | 145 | * length - the length in bytes of the compressed page data. Needed during |
178 | * decompression | 146 | * decompression |
147 | * pool - the zswap_pool the entry's data is in | ||
148 | * handle - zpool allocation handle that stores the compressed page data | ||
179 | */ | 149 | */ |
180 | struct zswap_entry { | 150 | struct zswap_entry { |
181 | struct rb_node rbnode; | 151 | struct rb_node rbnode; |
182 | pgoff_t offset; | 152 | pgoff_t offset; |
183 | int refcount; | 153 | int refcount; |
184 | unsigned int length; | 154 | unsigned int length; |
155 | struct zswap_pool *pool; | ||
185 | unsigned long handle; | 156 | unsigned long handle; |
186 | }; | 157 | }; |
187 | 158 | ||
@@ -201,6 +172,51 @@ struct zswap_tree { | |||
201 | 172 | ||
202 | static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; | 173 | static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; |
203 | 174 | ||
175 | /* RCU-protected iteration */ | ||
176 | static LIST_HEAD(zswap_pools); | ||
177 | /* protects zswap_pools list modification */ | ||
178 | static DEFINE_SPINLOCK(zswap_pools_lock); | ||
179 | |||
180 | /* used by param callback function */ | ||
181 | static bool zswap_init_started; | ||
182 | |||
183 | /********************************* | ||
184 | * helpers and fwd declarations | ||
185 | **********************************/ | ||
186 | |||
187 | #define zswap_pool_debug(msg, p) \ | ||
188 | pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ | ||
189 | zpool_get_type((p)->zpool)) | ||
190 | |||
191 | static int zswap_writeback_entry(struct zpool *pool, unsigned long handle); | ||
192 | static int zswap_pool_get(struct zswap_pool *pool); | ||
193 | static void zswap_pool_put(struct zswap_pool *pool); | ||
194 | |||
195 | static const struct zpool_ops zswap_zpool_ops = { | ||
196 | .evict = zswap_writeback_entry | ||
197 | }; | ||
198 | |||
199 | static bool zswap_is_full(void) | ||
200 | { | ||
201 | return totalram_pages * zswap_max_pool_percent / 100 < | ||
202 | DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); | ||
203 | } | ||
204 | |||
205 | static void zswap_update_total_size(void) | ||
206 | { | ||
207 | struct zswap_pool *pool; | ||
208 | u64 total = 0; | ||
209 | |||
210 | rcu_read_lock(); | ||
211 | |||
212 | list_for_each_entry_rcu(pool, &zswap_pools, list) | ||
213 | total += zpool_get_total_size(pool->zpool); | ||
214 | |||
215 | rcu_read_unlock(); | ||
216 | |||
217 | zswap_pool_total_size = total; | ||
218 | } | ||
219 | |||
204 | /********************************* | 220 | /********************************* |
205 | * zswap entry functions | 221 | * zswap entry functions |
206 | **********************************/ | 222 | **********************************/ |
@@ -294,10 +310,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) | |||
294 | */ | 310 | */ |
295 | static void zswap_free_entry(struct zswap_entry *entry) | 311 | static void zswap_free_entry(struct zswap_entry *entry) |
296 | { | 312 | { |
297 | zpool_free(zswap_pool, entry->handle); | 313 | zpool_free(entry->pool->zpool, entry->handle); |
314 | zswap_pool_put(entry->pool); | ||
298 | zswap_entry_cache_free(entry); | 315 | zswap_entry_cache_free(entry); |
299 | atomic_dec(&zswap_stored_pages); | 316 | atomic_dec(&zswap_stored_pages); |
300 | zswap_pool_total_size = zpool_get_total_size(zswap_pool); | 317 | zswap_update_total_size(); |
301 | } | 318 | } |
302 | 319 | ||
303 | /* caller must hold the tree lock */ | 320 | /* caller must hold the tree lock */ |
@@ -339,35 +356,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, | |||
339 | **********************************/ | 356 | **********************************/ |
340 | static DEFINE_PER_CPU(u8 *, zswap_dstmem); | 357 | static DEFINE_PER_CPU(u8 *, zswap_dstmem); |
341 | 358 | ||
342 | static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) | 359 | static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu) |
343 | { | 360 | { |
344 | struct crypto_comp *tfm; | ||
345 | u8 *dst; | 361 | u8 *dst; |
346 | 362 | ||
347 | switch (action) { | 363 | switch (action) { |
348 | case CPU_UP_PREPARE: | 364 | case CPU_UP_PREPARE: |
349 | tfm = crypto_alloc_comp(zswap_compressor, 0, 0); | ||
350 | if (IS_ERR(tfm)) { | ||
351 | pr_err("can't allocate compressor transform\n"); | ||
352 | return NOTIFY_BAD; | ||
353 | } | ||
354 | *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm; | ||
355 | dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); | 365 | dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); |
356 | if (!dst) { | 366 | if (!dst) { |
357 | pr_err("can't allocate compressor buffer\n"); | 367 | pr_err("can't allocate compressor buffer\n"); |
358 | crypto_free_comp(tfm); | ||
359 | *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; | ||
360 | return NOTIFY_BAD; | 368 | return NOTIFY_BAD; |
361 | } | 369 | } |
362 | per_cpu(zswap_dstmem, cpu) = dst; | 370 | per_cpu(zswap_dstmem, cpu) = dst; |
363 | break; | 371 | break; |
364 | case CPU_DEAD: | 372 | case CPU_DEAD: |
365 | case CPU_UP_CANCELED: | 373 | case CPU_UP_CANCELED: |
366 | tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu); | ||
367 | if (tfm) { | ||
368 | crypto_free_comp(tfm); | ||
369 | *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; | ||
370 | } | ||
371 | dst = per_cpu(zswap_dstmem, cpu); | 374 | dst = per_cpu(zswap_dstmem, cpu); |
372 | kfree(dst); | 375 | kfree(dst); |
373 | per_cpu(zswap_dstmem, cpu) = NULL; | 376 | per_cpu(zswap_dstmem, cpu) = NULL; |
@@ -378,43 +381,398 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) | |||
378 | return NOTIFY_OK; | 381 | return NOTIFY_OK; |
379 | } | 382 | } |
380 | 383 | ||
381 | static int zswap_cpu_notifier(struct notifier_block *nb, | 384 | static int zswap_cpu_dstmem_notifier(struct notifier_block *nb, |
382 | unsigned long action, void *pcpu) | 385 | unsigned long action, void *pcpu) |
383 | { | 386 | { |
384 | unsigned long cpu = (unsigned long)pcpu; | 387 | return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu); |
385 | return __zswap_cpu_notifier(action, cpu); | ||
386 | } | 388 | } |
387 | 389 | ||
388 | static struct notifier_block zswap_cpu_notifier_block = { | 390 | static struct notifier_block zswap_dstmem_notifier = { |
389 | .notifier_call = zswap_cpu_notifier | 391 | .notifier_call = zswap_cpu_dstmem_notifier, |
390 | }; | 392 | }; |
391 | 393 | ||
392 | static int __init zswap_cpu_init(void) | 394 | static int __init zswap_cpu_dstmem_init(void) |
395 | { | ||
396 | unsigned long cpu; | ||
397 | |||
398 | cpu_notifier_register_begin(); | ||
399 | for_each_online_cpu(cpu) | ||
400 | if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) == | ||
401 | NOTIFY_BAD) | ||
402 | goto cleanup; | ||
403 | __register_cpu_notifier(&zswap_dstmem_notifier); | ||
404 | cpu_notifier_register_done(); | ||
405 | return 0; | ||
406 | |||
407 | cleanup: | ||
408 | for_each_online_cpu(cpu) | ||
409 | __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu); | ||
410 | cpu_notifier_register_done(); | ||
411 | return -ENOMEM; | ||
412 | } | ||
413 | |||
414 | static void zswap_cpu_dstmem_destroy(void) | ||
415 | { | ||
416 | unsigned long cpu; | ||
417 | |||
418 | cpu_notifier_register_begin(); | ||
419 | for_each_online_cpu(cpu) | ||
420 | __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu); | ||
421 | __unregister_cpu_notifier(&zswap_dstmem_notifier); | ||
422 | cpu_notifier_register_done(); | ||
423 | } | ||
424 | |||
425 | static int __zswap_cpu_comp_notifier(struct zswap_pool *pool, | ||
426 | unsigned long action, unsigned long cpu) | ||
427 | { | ||
428 | struct crypto_comp *tfm; | ||
429 | |||
430 | switch (action) { | ||
431 | case CPU_UP_PREPARE: | ||
432 | if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu))) | ||
433 | break; | ||
434 | tfm = crypto_alloc_comp(pool->tfm_name, 0, 0); | ||
435 | if (IS_ERR_OR_NULL(tfm)) { | ||
436 | pr_err("could not alloc crypto comp %s : %ld\n", | ||
437 | pool->tfm_name, PTR_ERR(tfm)); | ||
438 | return NOTIFY_BAD; | ||
439 | } | ||
440 | *per_cpu_ptr(pool->tfm, cpu) = tfm; | ||
441 | break; | ||
442 | case CPU_DEAD: | ||
443 | case CPU_UP_CANCELED: | ||
444 | tfm = *per_cpu_ptr(pool->tfm, cpu); | ||
445 | if (!IS_ERR_OR_NULL(tfm)) | ||
446 | crypto_free_comp(tfm); | ||
447 | *per_cpu_ptr(pool->tfm, cpu) = NULL; | ||
448 | break; | ||
449 | default: | ||
450 | break; | ||
451 | } | ||
452 | return NOTIFY_OK; | ||
453 | } | ||
454 | |||
455 | static int zswap_cpu_comp_notifier(struct notifier_block *nb, | ||
456 | unsigned long action, void *pcpu) | ||
457 | { | ||
458 | unsigned long cpu = (unsigned long)pcpu; | ||
459 | struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier); | ||
460 | |||
461 | return __zswap_cpu_comp_notifier(pool, action, cpu); | ||
462 | } | ||
463 | |||
464 | static int zswap_cpu_comp_init(struct zswap_pool *pool) | ||
393 | { | 465 | { |
394 | unsigned long cpu; | 466 | unsigned long cpu; |
395 | 467 | ||
468 | memset(&pool->notifier, 0, sizeof(pool->notifier)); | ||
469 | pool->notifier.notifier_call = zswap_cpu_comp_notifier; | ||
470 | |||
396 | cpu_notifier_register_begin(); | 471 | cpu_notifier_register_begin(); |
397 | for_each_online_cpu(cpu) | 472 | for_each_online_cpu(cpu) |
398 | if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) | 473 | if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) == |
474 | NOTIFY_BAD) | ||
399 | goto cleanup; | 475 | goto cleanup; |
400 | __register_cpu_notifier(&zswap_cpu_notifier_block); | 476 | __register_cpu_notifier(&pool->notifier); |
401 | cpu_notifier_register_done(); | 477 | cpu_notifier_register_done(); |
402 | return 0; | 478 | return 0; |
403 | 479 | ||
404 | cleanup: | 480 | cleanup: |
405 | for_each_online_cpu(cpu) | 481 | for_each_online_cpu(cpu) |
406 | __zswap_cpu_notifier(CPU_UP_CANCELED, cpu); | 482 | __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu); |
407 | cpu_notifier_register_done(); | 483 | cpu_notifier_register_done(); |
408 | return -ENOMEM; | 484 | return -ENOMEM; |
409 | } | 485 | } |
410 | 486 | ||
487 | static void zswap_cpu_comp_destroy(struct zswap_pool *pool) | ||
488 | { | ||
489 | unsigned long cpu; | ||
490 | |||
491 | cpu_notifier_register_begin(); | ||
492 | for_each_online_cpu(cpu) | ||
493 | __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu); | ||
494 | __unregister_cpu_notifier(&pool->notifier); | ||
495 | cpu_notifier_register_done(); | ||
496 | } | ||
497 | |||
411 | /********************************* | 498 | /********************************* |
412 | * helpers | 499 | * pool functions |
413 | **********************************/ | 500 | **********************************/ |
414 | static bool zswap_is_full(void) | 501 | |
502 | static struct zswap_pool *__zswap_pool_current(void) | ||
415 | { | 503 | { |
416 | return totalram_pages * zswap_max_pool_percent / 100 < | 504 | struct zswap_pool *pool; |
417 | DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); | 505 | |
506 | pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); | ||
507 | WARN_ON(!pool); | ||
508 | |||
509 | return pool; | ||
510 | } | ||
511 | |||
512 | static struct zswap_pool *zswap_pool_current(void) | ||
513 | { | ||
514 | assert_spin_locked(&zswap_pools_lock); | ||
515 | |||
516 | return __zswap_pool_current(); | ||
517 | } | ||
518 | |||
519 | static struct zswap_pool *zswap_pool_current_get(void) | ||
520 | { | ||
521 | struct zswap_pool *pool; | ||
522 | |||
523 | rcu_read_lock(); | ||
524 | |||
525 | pool = __zswap_pool_current(); | ||
526 | if (!pool || !zswap_pool_get(pool)) | ||
527 | pool = NULL; | ||
528 | |||
529 | rcu_read_unlock(); | ||
530 | |||
531 | return pool; | ||
532 | } | ||
533 | |||
534 | static struct zswap_pool *zswap_pool_last_get(void) | ||
535 | { | ||
536 | struct zswap_pool *pool, *last = NULL; | ||
537 | |||
538 | rcu_read_lock(); | ||
539 | |||
540 | list_for_each_entry_rcu(pool, &zswap_pools, list) | ||
541 | last = pool; | ||
542 | if (!WARN_ON(!last) && !zswap_pool_get(last)) | ||
543 | last = NULL; | ||
544 | |||
545 | rcu_read_unlock(); | ||
546 | |||
547 | return last; | ||
548 | } | ||
549 | |||
550 | static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) | ||
551 | { | ||
552 | struct zswap_pool *pool; | ||
553 | |||
554 | assert_spin_locked(&zswap_pools_lock); | ||
555 | |||
556 | list_for_each_entry_rcu(pool, &zswap_pools, list) { | ||
557 | if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name))) | ||
558 | continue; | ||
559 | if (strncmp(zpool_get_type(pool->zpool), type, | ||
560 | sizeof(zswap_zpool_type))) | ||
561 | continue; | ||
562 | /* if we can't get it, it's about to be destroyed */ | ||
563 | if (!zswap_pool_get(pool)) | ||
564 | continue; | ||
565 | return pool; | ||
566 | } | ||
567 | |||
568 | return NULL; | ||
569 | } | ||
570 | |||
571 | static struct zswap_pool *zswap_pool_create(char *type, char *compressor) | ||
572 | { | ||
573 | struct zswap_pool *pool; | ||
574 | gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; | ||
575 | |||
576 | pool = kzalloc(sizeof(*pool), GFP_KERNEL); | ||
577 | if (!pool) { | ||
578 | pr_err("pool alloc failed\n"); | ||
579 | return NULL; | ||
580 | } | ||
581 | |||
582 | pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops); | ||
583 | if (!pool->zpool) { | ||
584 | pr_err("%s zpool not available\n", type); | ||
585 | goto error; | ||
586 | } | ||
587 | pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); | ||
588 | |||
589 | strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); | ||
590 | pool->tfm = alloc_percpu(struct crypto_comp *); | ||
591 | if (!pool->tfm) { | ||
592 | pr_err("percpu alloc failed\n"); | ||
593 | goto error; | ||
594 | } | ||
595 | |||
596 | if (zswap_cpu_comp_init(pool)) | ||
597 | goto error; | ||
598 | pr_debug("using %s compressor\n", pool->tfm_name); | ||
599 | |||
600 | /* being the current pool takes 1 ref; this func expects the | ||
601 | * caller to always add the new pool as the current pool | ||
602 | */ | ||
603 | kref_init(&pool->kref); | ||
604 | INIT_LIST_HEAD(&pool->list); | ||
605 | |||
606 | zswap_pool_debug("created", pool); | ||
607 | |||
608 | return pool; | ||
609 | |||
610 | error: | ||
611 | free_percpu(pool->tfm); | ||
612 | if (pool->zpool) | ||
613 | zpool_destroy_pool(pool->zpool); | ||
614 | kfree(pool); | ||
615 | return NULL; | ||
616 | } | ||
617 | |||
618 | static struct zswap_pool *__zswap_pool_create_fallback(void) | ||
619 | { | ||
620 | if (!crypto_has_comp(zswap_compressor, 0, 0)) { | ||
621 | pr_err("compressor %s not available, using default %s\n", | ||
622 | zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT); | ||
623 | strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT, | ||
624 | sizeof(zswap_compressor)); | ||
625 | } | ||
626 | if (!zpool_has_pool(zswap_zpool_type)) { | ||
627 | pr_err("zpool %s not available, using default %s\n", | ||
628 | zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT); | ||
629 | strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT, | ||
630 | sizeof(zswap_zpool_type)); | ||
631 | } | ||
632 | |||
633 | return zswap_pool_create(zswap_zpool_type, zswap_compressor); | ||
634 | } | ||
635 | |||
636 | static void zswap_pool_destroy(struct zswap_pool *pool) | ||
637 | { | ||
638 | zswap_pool_debug("destroying", pool); | ||
639 | |||
640 | zswap_cpu_comp_destroy(pool); | ||
641 | free_percpu(pool->tfm); | ||
642 | zpool_destroy_pool(pool->zpool); | ||
643 | kfree(pool); | ||
644 | } | ||
645 | |||
646 | static int __must_check zswap_pool_get(struct zswap_pool *pool) | ||
647 | { | ||
648 | return kref_get_unless_zero(&pool->kref); | ||
649 | } | ||
650 | |||
651 | static void __zswap_pool_release(struct rcu_head *head) | ||
652 | { | ||
653 | struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head); | ||
654 | |||
655 | /* nobody should have been able to get a kref... */ | ||
656 | WARN_ON(kref_get_unless_zero(&pool->kref)); | ||
657 | |||
658 | /* pool is now off zswap_pools list and has no references. */ | ||
659 | zswap_pool_destroy(pool); | ||
660 | } | ||
661 | |||
662 | static void __zswap_pool_empty(struct kref *kref) | ||
663 | { | ||
664 | struct zswap_pool *pool; | ||
665 | |||
666 | pool = container_of(kref, typeof(*pool), kref); | ||
667 | |||
668 | spin_lock(&zswap_pools_lock); | ||
669 | |||
670 | WARN_ON(pool == zswap_pool_current()); | ||
671 | |||
672 | list_del_rcu(&pool->list); | ||
673 | call_rcu(&pool->rcu_head, __zswap_pool_release); | ||
674 | |||
675 | spin_unlock(&zswap_pools_lock); | ||
676 | } | ||
677 | |||
678 | static void zswap_pool_put(struct zswap_pool *pool) | ||
679 | { | ||
680 | kref_put(&pool->kref, __zswap_pool_empty); | ||
681 | } | ||
682 | |||
683 | /********************************* | ||
684 | * param callbacks | ||
685 | **********************************/ | ||
686 | |||
687 | static int __zswap_param_set(const char *val, const struct kernel_param *kp, | ||
688 | char *type, char *compressor) | ||
689 | { | ||
690 | struct zswap_pool *pool, *put_pool = NULL; | ||
691 | char str[kp->str->maxlen], *s; | ||
692 | int ret; | ||
693 | |||
694 | /* | ||
695 | * kp is either zswap_zpool_kparam or zswap_compressor_kparam, defined | ||
696 | * at the top of this file, so maxlen is CRYPTO_MAX_ALG_NAME (64) or | ||
697 | * 32 (arbitrary). | ||
698 | */ | ||
699 | strlcpy(str, val, kp->str->maxlen); | ||
700 | s = strim(str); | ||
701 | |||
702 | /* if this is load-time (pre-init) param setting, | ||
703 | * don't create a pool; that's done during init. | ||
704 | */ | ||
705 | if (!zswap_init_started) | ||
706 | return param_set_copystring(s, kp); | ||
707 | |||
708 | /* no change required */ | ||
709 | if (!strncmp(kp->str->string, s, kp->str->maxlen)) | ||
710 | return 0; | ||
711 | |||
712 | if (!type) { | ||
713 | type = s; | ||
714 | if (!zpool_has_pool(type)) { | ||
715 | pr_err("zpool %s not available\n", type); | ||
716 | return -ENOENT; | ||
717 | } | ||
718 | } else if (!compressor) { | ||
719 | compressor = s; | ||
720 | if (!crypto_has_comp(compressor, 0, 0)) { | ||
721 | pr_err("compressor %s not available\n", compressor); | ||
722 | return -ENOENT; | ||
723 | } | ||
724 | } | ||
725 | |||
726 | spin_lock(&zswap_pools_lock); | ||
727 | |||
728 | pool = zswap_pool_find_get(type, compressor); | ||
729 | if (pool) { | ||
730 | zswap_pool_debug("using existing", pool); | ||
731 | list_del_rcu(&pool->list); | ||
732 | } else { | ||
733 | spin_unlock(&zswap_pools_lock); | ||
734 | pool = zswap_pool_create(type, compressor); | ||
735 | spin_lock(&zswap_pools_lock); | ||
736 | } | ||
737 | |||
738 | if (pool) | ||
739 | ret = param_set_copystring(s, kp); | ||
740 | else | ||
741 | ret = -EINVAL; | ||
742 | |||
743 | if (!ret) { | ||
744 | put_pool = zswap_pool_current(); | ||
745 | list_add_rcu(&pool->list, &zswap_pools); | ||
746 | } else if (pool) { | ||
747 | /* add the possibly pre-existing pool to the end of the pools | ||
748 | * list; if it's new (and empty) then it'll be removed and | ||
749 | * destroyed by the put after we drop the lock | ||
750 | */ | ||
751 | list_add_tail_rcu(&pool->list, &zswap_pools); | ||
752 | put_pool = pool; | ||
753 | } | ||
754 | |||
755 | spin_unlock(&zswap_pools_lock); | ||
756 | |||
757 | /* drop the ref from either the old current pool, | ||
758 | * or the new pool we failed to add | ||
759 | */ | ||
760 | if (put_pool) | ||
761 | zswap_pool_put(put_pool); | ||
762 | |||
763 | return ret; | ||
764 | } | ||
765 | |||
766 | static int zswap_compressor_param_set(const char *val, | ||
767 | const struct kernel_param *kp) | ||
768 | { | ||
769 | return __zswap_param_set(val, kp, zswap_zpool_type, NULL); | ||
770 | } | ||
771 | |||
772 | static int zswap_zpool_param_set(const char *val, | ||
773 | const struct kernel_param *kp) | ||
774 | { | ||
775 | return __zswap_param_set(val, kp, NULL, zswap_compressor); | ||
418 | } | 776 | } |
419 | 777 | ||
420 | /********************************* | 778 | /********************************* |
@@ -477,6 +835,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) | |||
477 | pgoff_t offset; | 835 | pgoff_t offset; |
478 | struct zswap_entry *entry; | 836 | struct zswap_entry *entry; |
479 | struct page *page; | 837 | struct page *page; |
838 | struct crypto_comp *tfm; | ||
480 | u8 *src, *dst; | 839 | u8 *src, *dst; |
481 | unsigned int dlen; | 840 | unsigned int dlen; |
482 | int ret; | 841 | int ret; |
@@ -517,13 +876,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) | |||
517 | case ZSWAP_SWAPCACHE_NEW: /* page is locked */ | 876 | case ZSWAP_SWAPCACHE_NEW: /* page is locked */ |
518 | /* decompress */ | 877 | /* decompress */ |
519 | dlen = PAGE_SIZE; | 878 | dlen = PAGE_SIZE; |
520 | src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, | 879 | src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, |
521 | ZPOOL_MM_RO) + sizeof(struct zswap_header); | 880 | ZPOOL_MM_RO) + sizeof(struct zswap_header); |
522 | dst = kmap_atomic(page); | 881 | dst = kmap_atomic(page); |
523 | ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, | 882 | tfm = *get_cpu_ptr(entry->pool->tfm); |
524 | entry->length, dst, &dlen); | 883 | ret = crypto_comp_decompress(tfm, src, entry->length, |
884 | dst, &dlen); | ||
885 | put_cpu_ptr(entry->pool->tfm); | ||
525 | kunmap_atomic(dst); | 886 | kunmap_atomic(dst); |
526 | zpool_unmap_handle(zswap_pool, entry->handle); | 887 | zpool_unmap_handle(entry->pool->zpool, entry->handle); |
527 | BUG_ON(ret); | 888 | BUG_ON(ret); |
528 | BUG_ON(dlen != PAGE_SIZE); | 889 | BUG_ON(dlen != PAGE_SIZE); |
529 | 890 | ||
@@ -572,6 +933,22 @@ end: | |||
572 | return ret; | 933 | return ret; |
573 | } | 934 | } |
574 | 935 | ||
936 | static int zswap_shrink(void) | ||
937 | { | ||
938 | struct zswap_pool *pool; | ||
939 | int ret; | ||
940 | |||
941 | pool = zswap_pool_last_get(); | ||
942 | if (!pool) | ||
943 | return -ENOENT; | ||
944 | |||
945 | ret = zpool_shrink(pool->zpool, 1, NULL); | ||
946 | |||
947 | zswap_pool_put(pool); | ||
948 | |||
949 | return ret; | ||
950 | } | ||
951 | |||
575 | /********************************* | 952 | /********************************* |
576 | * frontswap hooks | 953 | * frontswap hooks |
577 | **********************************/ | 954 | **********************************/ |
@@ -581,6 +958,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, | |||
581 | { | 958 | { |
582 | struct zswap_tree *tree = zswap_trees[type]; | 959 | struct zswap_tree *tree = zswap_trees[type]; |
583 | struct zswap_entry *entry, *dupentry; | 960 | struct zswap_entry *entry, *dupentry; |
961 | struct crypto_comp *tfm; | ||
584 | int ret; | 962 | int ret; |
585 | unsigned int dlen = PAGE_SIZE, len; | 963 | unsigned int dlen = PAGE_SIZE, len; |
586 | unsigned long handle; | 964 | unsigned long handle; |
@@ -596,7 +974,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, | |||
596 | /* reclaim space if needed */ | 974 | /* reclaim space if needed */ |
597 | if (zswap_is_full()) { | 975 | if (zswap_is_full()) { |
598 | zswap_pool_limit_hit++; | 976 | zswap_pool_limit_hit++; |
599 | if (zpool_shrink(zswap_pool, 1, NULL)) { | 977 | if (zswap_shrink()) { |
600 | zswap_reject_reclaim_fail++; | 978 | zswap_reject_reclaim_fail++; |
601 | ret = -ENOMEM; | 979 | ret = -ENOMEM; |
602 | goto reject; | 980 | goto reject; |
@@ -611,33 +989,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, | |||
611 | goto reject; | 989 | goto reject; |
612 | } | 990 | } |
613 | 991 | ||
992 | /* if entry is successfully added, it keeps the reference */ | ||
993 | entry->pool = zswap_pool_current_get(); | ||
994 | if (!entry->pool) { | ||
995 | ret = -EINVAL; | ||
996 | goto freepage; | ||
997 | } | ||
998 | |||
614 | /* compress */ | 999 | /* compress */ |
615 | dst = get_cpu_var(zswap_dstmem); | 1000 | dst = get_cpu_var(zswap_dstmem); |
1001 | tfm = *get_cpu_ptr(entry->pool->tfm); | ||
616 | src = kmap_atomic(page); | 1002 | src = kmap_atomic(page); |
617 | ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen); | 1003 | ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen); |
618 | kunmap_atomic(src); | 1004 | kunmap_atomic(src); |
1005 | put_cpu_ptr(entry->pool->tfm); | ||
619 | if (ret) { | 1006 | if (ret) { |
620 | ret = -EINVAL; | 1007 | ret = -EINVAL; |
621 | goto freepage; | 1008 | goto put_dstmem; |
622 | } | 1009 | } |
623 | 1010 | ||
624 | /* store */ | 1011 | /* store */ |
625 | len = dlen + sizeof(struct zswap_header); | 1012 | len = dlen + sizeof(struct zswap_header); |
626 | ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN, | 1013 | ret = zpool_malloc(entry->pool->zpool, len, |
627 | &handle); | 1014 | __GFP_NORETRY | __GFP_NOWARN, &handle); |
628 | if (ret == -ENOSPC) { | 1015 | if (ret == -ENOSPC) { |
629 | zswap_reject_compress_poor++; | 1016 | zswap_reject_compress_poor++; |
630 | goto freepage; | 1017 | goto put_dstmem; |
631 | } | 1018 | } |
632 | if (ret) { | 1019 | if (ret) { |
633 | zswap_reject_alloc_fail++; | 1020 | zswap_reject_alloc_fail++; |
634 | goto freepage; | 1021 | goto put_dstmem; |
635 | } | 1022 | } |
636 | zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW); | 1023 | zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW); |
637 | zhdr->swpentry = swp_entry(type, offset); | 1024 | zhdr->swpentry = swp_entry(type, offset); |
638 | buf = (u8 *)(zhdr + 1); | 1025 | buf = (u8 *)(zhdr + 1); |
639 | memcpy(buf, dst, dlen); | 1026 | memcpy(buf, dst, dlen); |
640 | zpool_unmap_handle(zswap_pool, handle); | 1027 | zpool_unmap_handle(entry->pool->zpool, handle); |
641 | put_cpu_var(zswap_dstmem); | 1028 | put_cpu_var(zswap_dstmem); |
642 | 1029 | ||
643 | /* populate entry */ | 1030 | /* populate entry */ |
@@ -660,12 +1047,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, | |||
660 | 1047 | ||
661 | /* update stats */ | 1048 | /* update stats */ |
662 | atomic_inc(&zswap_stored_pages); | 1049 | atomic_inc(&zswap_stored_pages); |
663 | zswap_pool_total_size = zpool_get_total_size(zswap_pool); | 1050 | zswap_update_total_size(); |
664 | 1051 | ||
665 | return 0; | 1052 | return 0; |
666 | 1053 | ||
667 | freepage: | 1054 | put_dstmem: |
668 | put_cpu_var(zswap_dstmem); | 1055 | put_cpu_var(zswap_dstmem); |
1056 | zswap_pool_put(entry->pool); | ||
1057 | freepage: | ||
669 | zswap_entry_cache_free(entry); | 1058 | zswap_entry_cache_free(entry); |
670 | reject: | 1059 | reject: |
671 | return ret; | 1060 | return ret; |
@@ -680,6 +1069,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, | |||
680 | { | 1069 | { |
681 | struct zswap_tree *tree = zswap_trees[type]; | 1070 | struct zswap_tree *tree = zswap_trees[type]; |
682 | struct zswap_entry *entry; | 1071 | struct zswap_entry *entry; |
1072 | struct crypto_comp *tfm; | ||
683 | u8 *src, *dst; | 1073 | u8 *src, *dst; |
684 | unsigned int dlen; | 1074 | unsigned int dlen; |
685 | int ret; | 1075 | int ret; |
@@ -696,13 +1086,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, | |||
696 | 1086 | ||
697 | /* decompress */ | 1087 | /* decompress */ |
698 | dlen = PAGE_SIZE; | 1088 | dlen = PAGE_SIZE; |
699 | src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, | 1089 | src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, |
700 | ZPOOL_MM_RO) + sizeof(struct zswap_header); | 1090 | ZPOOL_MM_RO) + sizeof(struct zswap_header); |
701 | dst = kmap_atomic(page); | 1091 | dst = kmap_atomic(page); |
702 | ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, | 1092 | tfm = *get_cpu_ptr(entry->pool->tfm); |
703 | dst, &dlen); | 1093 | ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen); |
1094 | put_cpu_ptr(entry->pool->tfm); | ||
704 | kunmap_atomic(dst); | 1095 | kunmap_atomic(dst); |
705 | zpool_unmap_handle(zswap_pool, entry->handle); | 1096 | zpool_unmap_handle(entry->pool->zpool, entry->handle); |
706 | BUG_ON(ret); | 1097 | BUG_ON(ret); |
707 | 1098 | ||
708 | spin_lock(&tree->lock); | 1099 | spin_lock(&tree->lock); |
@@ -755,10 +1146,6 @@ static void zswap_frontswap_invalidate_area(unsigned type) | |||
755 | zswap_trees[type] = NULL; | 1146 | zswap_trees[type] = NULL; |
756 | } | 1147 | } |
757 | 1148 | ||
758 | static const struct zpool_ops zswap_zpool_ops = { | ||
759 | .evict = zswap_writeback_entry | ||
760 | }; | ||
761 | |||
762 | static void zswap_frontswap_init(unsigned type) | 1149 | static void zswap_frontswap_init(unsigned type) |
763 | { | 1150 | { |
764 | struct zswap_tree *tree; | 1151 | struct zswap_tree *tree; |
@@ -839,49 +1226,40 @@ static void __exit zswap_debugfs_exit(void) { } | |||
839 | **********************************/ | 1226 | **********************************/ |
840 | static int __init init_zswap(void) | 1227 | static int __init init_zswap(void) |
841 | { | 1228 | { |
842 | gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; | 1229 | struct zswap_pool *pool; |
843 | 1230 | ||
844 | pr_info("loading zswap\n"); | 1231 | zswap_init_started = true; |
845 | |||
846 | zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, | ||
847 | &zswap_zpool_ops); | ||
848 | if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { | ||
849 | pr_info("%s zpool not available\n", zswap_zpool_type); | ||
850 | zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; | ||
851 | zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, | ||
852 | &zswap_zpool_ops); | ||
853 | } | ||
854 | if (!zswap_pool) { | ||
855 | pr_err("%s zpool not available\n", zswap_zpool_type); | ||
856 | pr_err("zpool creation failed\n"); | ||
857 | goto error; | ||
858 | } | ||
859 | pr_info("using %s pool\n", zswap_zpool_type); | ||
860 | 1232 | ||
861 | if (zswap_entry_cache_create()) { | 1233 | if (zswap_entry_cache_create()) { |
862 | pr_err("entry cache creation failed\n"); | 1234 | pr_err("entry cache creation failed\n"); |
863 | goto cachefail; | 1235 | goto cache_fail; |
864 | } | 1236 | } |
865 | if (zswap_comp_init()) { | 1237 | |
866 | pr_err("compressor initialization failed\n"); | 1238 | if (zswap_cpu_dstmem_init()) { |
867 | goto compfail; | 1239 | pr_err("dstmem alloc failed\n"); |
1240 | goto dstmem_fail; | ||
868 | } | 1241 | } |
869 | if (zswap_cpu_init()) { | 1242 | |
870 | pr_err("per-cpu initialization failed\n"); | 1243 | pool = __zswap_pool_create_fallback(); |
871 | goto pcpufail; | 1244 | if (!pool) { |
1245 | pr_err("pool creation failed\n"); | ||
1246 | goto pool_fail; | ||
872 | } | 1247 | } |
1248 | pr_info("loaded using pool %s/%s\n", pool->tfm_name, | ||
1249 | zpool_get_type(pool->zpool)); | ||
1250 | |||
1251 | list_add(&pool->list, &zswap_pools); | ||
873 | 1252 | ||
874 | frontswap_register_ops(&zswap_frontswap_ops); | 1253 | frontswap_register_ops(&zswap_frontswap_ops); |
875 | if (zswap_debugfs_init()) | 1254 | if (zswap_debugfs_init()) |
876 | pr_warn("debugfs initialization failed\n"); | 1255 | pr_warn("debugfs initialization failed\n"); |
877 | return 0; | 1256 | return 0; |
878 | pcpufail: | 1257 | |
879 | zswap_comp_exit(); | 1258 | pool_fail: |
880 | compfail: | 1259 | zswap_cpu_dstmem_destroy(); |
1260 | dstmem_fail: | ||
881 | zswap_entry_cache_destroy(); | 1261 | zswap_entry_cache_destroy(); |
882 | cachefail: | 1262 | cache_fail: |
883 | zpool_destroy_pool(zswap_pool); | ||
884 | error: | ||
885 | return -ENOMEM; | 1263 | return -ENOMEM; |
886 | } | 1264 | } |
887 | /* must be late so crypto has time to come up */ | 1265 | /* must be late so crypto has time to come up */ |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a51ca0e5beef..f2a1131b2f8b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -264,6 +264,7 @@ our $Sparse = qr{ | |||
264 | __kernel| | 264 | __kernel| |
265 | __force| | 265 | __force| |
266 | __iomem| | 266 | __iomem| |
267 | __pmem| | ||
267 | __must_check| | 268 | __must_check| |
268 | __init_refok| | 269 | __init_refok| |
269 | __kprobes| | 270 | __kprobes| |
@@ -584,7 +585,7 @@ our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*}; | |||
584 | our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)}; | 585 | our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)}; |
585 | 586 | ||
586 | our $declaration_macros = qr{(?x: | 587 | our $declaration_macros = qr{(?x: |
587 | (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(| | 588 | (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(| |
588 | (?:$Storage\s+)?LIST_HEAD\s*\(| | 589 | (?:$Storage\s+)?LIST_HEAD\s*\(| |
589 | (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\( | 590 | (?:$Storage\s+)?${Type}\s+uninitialized_var\s*\( |
590 | )}; | 591 | )}; |
@@ -1953,9 +1954,9 @@ sub process { | |||
1953 | our $clean = 1; | 1954 | our $clean = 1; |
1954 | my $signoff = 0; | 1955 | my $signoff = 0; |
1955 | my $is_patch = 0; | 1956 | my $is_patch = 0; |
1956 | |||
1957 | my $in_header_lines = $file ? 0 : 1; | 1957 | my $in_header_lines = $file ? 0 : 1; |
1958 | my $in_commit_log = 0; #Scanning lines before patch | 1958 | my $in_commit_log = 0; #Scanning lines before patch |
1959 | my $commit_log_possible_stack_dump = 0; | ||
1959 | my $commit_log_long_line = 0; | 1960 | my $commit_log_long_line = 0; |
1960 | my $commit_log_has_diff = 0; | 1961 | my $commit_log_has_diff = 0; |
1961 | my $reported_maintainer_file = 0; | 1962 | my $reported_maintainer_file = 0; |
@@ -2166,11 +2167,15 @@ sub process { | |||
2166 | if ($showfile) { | 2167 | if ($showfile) { |
2167 | $prefix = "$realfile:$realline: " | 2168 | $prefix = "$realfile:$realline: " |
2168 | } elsif ($emacs) { | 2169 | } elsif ($emacs) { |
2169 | $prefix = "$filename:$linenr: "; | 2170 | if ($file) { |
2171 | $prefix = "$filename:$realline: "; | ||
2172 | } else { | ||
2173 | $prefix = "$filename:$linenr: "; | ||
2174 | } | ||
2170 | } | 2175 | } |
2171 | 2176 | ||
2172 | if ($found_file) { | 2177 | if ($found_file) { |
2173 | if ($realfile =~ m@^(drivers/net/|net/)@) { | 2178 | if ($realfile =~ m@^(?:drivers/net/|net/|drivers/staging/)@) { |
2174 | $check = 1; | 2179 | $check = 1; |
2175 | } else { | 2180 | } else { |
2176 | $check = $check_orig; | 2181 | $check = $check_orig; |
@@ -2310,16 +2315,42 @@ sub process { | |||
2310 | 2315 | ||
2311 | # Check for line lengths > 75 in commit log, warn once | 2316 | # Check for line lengths > 75 in commit log, warn once |
2312 | if ($in_commit_log && !$commit_log_long_line && | 2317 | if ($in_commit_log && !$commit_log_long_line && |
2313 | length($line) > 75) { | 2318 | length($line) > 75 && |
2319 | !($line =~ /^\s*[a-zA-Z0-9_\/\.]+\s+\|\s+\d+/ || | ||
2320 | # file delta changes | ||
2321 | $line =~ /^\s*(?:[\w\.\-]+\/)++[\w\.\-]+:/ || | ||
2322 | # filename then : | ||
2323 | $line =~ /^\s*(?:Fixes:|Link:)/i || | ||
2324 | # A Fixes: or Link: line | ||
2325 | $commit_log_possible_stack_dump)) { | ||
2314 | WARN("COMMIT_LOG_LONG_LINE", | 2326 | WARN("COMMIT_LOG_LONG_LINE", |
2315 | "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr); | 2327 | "Possible unwrapped commit description (prefer a maximum 75 chars per line)\n" . $herecurr); |
2316 | $commit_log_long_line = 1; | 2328 | $commit_log_long_line = 1; |
2317 | } | 2329 | } |
2318 | 2330 | ||
2331 | # Check if the commit log is in a possible stack dump | ||
2332 | if ($in_commit_log && !$commit_log_possible_stack_dump && | ||
2333 | ($line =~ /^\s*(?:WARNING:|BUG:)/ || | ||
2334 | $line =~ /^\s*\[\s*\d+\.\d{6,6}\s*\]/ || | ||
2335 | # timestamp | ||
2336 | $line =~ /^\s*\[\<[0-9a-fA-F]{8,}\>\]/)) { | ||
2337 | # stack dump address | ||
2338 | $commit_log_possible_stack_dump = 1; | ||
2339 | } | ||
2340 | |||
2341 | # Reset possible stack dump if a blank line is found | ||
2342 | if ($in_commit_log && $commit_log_possible_stack_dump && | ||
2343 | $line =~ /^\s*$/) { | ||
2344 | $commit_log_possible_stack_dump = 0; | ||
2345 | } | ||
2346 | |||
2319 | # Check for git id commit length and improperly formed commit descriptions | 2347 | # Check for git id commit length and improperly formed commit descriptions |
2320 | if ($in_commit_log && $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i) { | 2348 | if ($in_commit_log && |
2321 | my $init_char = $1; | 2349 | ($line =~ /\bcommit\s+[0-9a-f]{5,}\b/i || |
2322 | my $orig_commit = lc($2); | 2350 | ($line =~ /\b[0-9a-f]{12,40}\b/i && |
2351 | $line !~ /\bfixes:\s*[0-9a-f]{12,40}/i))) { | ||
2352 | my $init_char = "c"; | ||
2353 | my $orig_commit = ""; | ||
2323 | my $short = 1; | 2354 | my $short = 1; |
2324 | my $long = 0; | 2355 | my $long = 0; |
2325 | my $case = 1; | 2356 | my $case = 1; |
@@ -2330,6 +2361,13 @@ sub process { | |||
2330 | my $orig_desc = "commit description"; | 2361 | my $orig_desc = "commit description"; |
2331 | my $description = ""; | 2362 | my $description = ""; |
2332 | 2363 | ||
2364 | if ($line =~ /\b(c)ommit\s+([0-9a-f]{5,})\b/i) { | ||
2365 | $init_char = $1; | ||
2366 | $orig_commit = lc($2); | ||
2367 | } elsif ($line =~ /\b([0-9a-f]{12,40})\b/i) { | ||
2368 | $orig_commit = lc($1); | ||
2369 | } | ||
2370 | |||
2333 | $short = 0 if ($line =~ /\bcommit\s+[0-9a-f]{12,40}/i); | 2371 | $short = 0 if ($line =~ /\bcommit\s+[0-9a-f]{12,40}/i); |
2334 | $long = 1 if ($line =~ /\bcommit\s+[0-9a-f]{41,}/i); | 2372 | $long = 1 if ($line =~ /\bcommit\s+[0-9a-f]{41,}/i); |
2335 | $space = 0 if ($line =~ /\bcommit [0-9a-f]/i); | 2373 | $space = 0 if ($line =~ /\bcommit [0-9a-f]/i); |
@@ -2738,6 +2776,8 @@ sub process { | |||
2738 | } | 2776 | } |
2739 | } | 2777 | } |
2740 | 2778 | ||
2779 | # Block comment styles | ||
2780 | # Networking with an initial /* | ||
2741 | if ($realfile =~ m@^(drivers/net/|net/)@ && | 2781 | if ($realfile =~ m@^(drivers/net/|net/)@ && |
2742 | $prevrawline =~ /^\+[ \t]*\/\*[ \t]*$/ && | 2782 | $prevrawline =~ /^\+[ \t]*\/\*[ \t]*$/ && |
2743 | $rawline =~ /^\+[ \t]*\*/ && | 2783 | $rawline =~ /^\+[ \t]*\*/ && |
@@ -2746,22 +2786,23 @@ sub process { | |||
2746 | "networking block comments don't use an empty /* line, use /* Comment...\n" . $hereprev); | 2786 | "networking block comments don't use an empty /* line, use /* Comment...\n" . $hereprev); |
2747 | } | 2787 | } |
2748 | 2788 | ||
2749 | if ($realfile =~ m@^(drivers/net/|net/)@ && | 2789 | # Block comments use * on subsequent lines |
2750 | $prevrawline =~ /^\+[ \t]*\/\*/ && #starting /* | 2790 | if ($prevline =~ /$;[ \t]*$/ && #ends in comment |
2791 | $prevrawline =~ /^\+.*?\/\*/ && #starting /* | ||
2751 | $prevrawline !~ /\*\/[ \t]*$/ && #no trailing */ | 2792 | $prevrawline !~ /\*\/[ \t]*$/ && #no trailing */ |
2752 | $rawline =~ /^\+/ && #line is new | 2793 | $rawline =~ /^\+/ && #line is new |
2753 | $rawline !~ /^\+[ \t]*\*/) { #no leading * | 2794 | $rawline !~ /^\+[ \t]*\*/) { #no leading * |
2754 | WARN("NETWORKING_BLOCK_COMMENT_STYLE", | 2795 | WARN("BLOCK_COMMENT_STYLE", |
2755 | "networking block comments start with * on subsequent lines\n" . $hereprev); | 2796 | "Block comments use * on subsequent lines\n" . $hereprev); |
2756 | } | 2797 | } |
2757 | 2798 | ||
2758 | if ($realfile =~ m@^(drivers/net/|net/)@ && | 2799 | # Block comments use */ on trailing lines |
2759 | $rawline !~ m@^\+[ \t]*\*/[ \t]*$@ && #trailing */ | 2800 | if ($rawline !~ m@^\+[ \t]*\*/[ \t]*$@ && #trailing */ |
2760 | $rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ && #inline /*...*/ | 2801 | $rawline !~ m@^\+.*/\*.*\*/[ \t]*$@ && #inline /*...*/ |
2761 | $rawline !~ m@^\+.*\*{2,}/[ \t]*$@ && #trailing **/ | 2802 | $rawline !~ m@^\+.*\*{2,}/[ \t]*$@ && #trailing **/ |
2762 | $rawline =~ m@^\+[ \t]*.+\*\/[ \t]*$@) { #non blank */ | 2803 | $rawline =~ m@^\+[ \t]*.+\*\/[ \t]*$@) { #non blank */ |
2763 | WARN("NETWORKING_BLOCK_COMMENT_STYLE", | 2804 | WARN("BLOCK_COMMENT_STYLE", |
2764 | "networking block comments put the trailing */ on a separate line\n" . $herecurr); | 2805 | "Block comments use a trailing */ on a separate line\n" . $herecurr); |
2765 | } | 2806 | } |
2766 | 2807 | ||
2767 | # check for missing blank lines after struct/union declarations | 2808 | # check for missing blank lines after struct/union declarations |
@@ -3067,15 +3108,22 @@ sub process { | |||
3067 | 3108 | ||
3068 | substr($s, 0, length($c), ''); | 3109 | substr($s, 0, length($c), ''); |
3069 | 3110 | ||
3070 | # Make sure we remove the line prefixes as we have | 3111 | # remove inline comments |
3071 | # none on the first line, and are going to readd them | 3112 | $s =~ s/$;/ /g; |
3072 | # where necessary. | 3113 | $c =~ s/$;/ /g; |
3073 | $s =~ s/\n./\n/gs; | ||
3074 | 3114 | ||
3075 | # Find out how long the conditional actually is. | 3115 | # Find out how long the conditional actually is. |
3076 | my @newlines = ($c =~ /\n/gs); | 3116 | my @newlines = ($c =~ /\n/gs); |
3077 | my $cond_lines = 1 + $#newlines; | 3117 | my $cond_lines = 1 + $#newlines; |
3078 | 3118 | ||
3119 | # Make sure we remove the line prefixes as we have | ||
3120 | # none on the first line, and are going to readd them | ||
3121 | # where necessary. | ||
3122 | $s =~ s/\n./\n/gs; | ||
3123 | while ($s =~ /\n\s+\\\n/) { | ||
3124 | $cond_lines += $s =~ s/\n\s+\\\n/\n/g; | ||
3125 | } | ||
3126 | |||
3079 | # We want to check the first line inside the block | 3127 | # We want to check the first line inside the block |
3080 | # starting at the end of the conditional, so remove: | 3128 | # starting at the end of the conditional, so remove: |
3081 | # 1) any blank line termination | 3129 | # 1) any blank line termination |
@@ -3141,8 +3189,10 @@ sub process { | |||
3141 | 3189 | ||
3142 | #print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n"; | 3190 | #print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n"; |
3143 | 3191 | ||
3144 | if ($check && (($sindent % 8) != 0 || | 3192 | if ($check && $s ne '' && |
3145 | ($sindent <= $indent && $s ne ''))) { | 3193 | (($sindent % 8) != 0 || |
3194 | ($sindent < $indent) || | ||
3195 | ($sindent > $indent + 8))) { | ||
3146 | WARN("SUSPECT_CODE_INDENT", | 3196 | WARN("SUSPECT_CODE_INDENT", |
3147 | "suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n"); | 3197 | "suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n"); |
3148 | } | 3198 | } |
@@ -3439,13 +3489,15 @@ sub process { | |||
3439 | } | 3489 | } |
3440 | } | 3490 | } |
3441 | 3491 | ||
3442 | # # no BUG() or BUG_ON() | 3492 | # avoid BUG() or BUG_ON() |
3443 | # if ($line =~ /\b(BUG|BUG_ON)\b/) { | 3493 | if ($line =~ /\b(?:BUG|BUG_ON)\b/) { |
3444 | # print "Try to use WARN_ON & Recovery code rather than BUG() or BUG_ON()\n"; | 3494 | my $msg_type = \&WARN; |
3445 | # print "$herecurr"; | 3495 | $msg_type = \&CHK if ($file); |
3446 | # $clean = 0; | 3496 | &{$msg_type}("AVOID_BUG", |
3447 | # } | 3497 | "Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()\n" . $herecurr); |
3498 | } | ||
3448 | 3499 | ||
3500 | # avoid LINUX_VERSION_CODE | ||
3449 | if ($line =~ /\bLINUX_VERSION_CODE\b/) { | 3501 | if ($line =~ /\bLINUX_VERSION_CODE\b/) { |
3450 | WARN("LINUX_VERSION_CODE", | 3502 | WARN("LINUX_VERSION_CODE", |
3451 | "LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged\n" . $herecurr); | 3503 | "LINUX_VERSION_CODE should be avoided, code should be for the version to which it is merged\n" . $herecurr); |
@@ -3520,7 +3572,7 @@ sub process { | |||
3520 | # function brace can't be on same line, except for #defines of do while, | 3572 | # function brace can't be on same line, except for #defines of do while, |
3521 | # or if closed on same line | 3573 | # or if closed on same line |
3522 | if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and | 3574 | if (($line=~/$Type\s*$Ident\(.*\).*\s*{/) and |
3523 | !($line=~/\#\s*define.*do\s{/) and !($line=~/}/)) { | 3575 | !($line=~/\#\s*define.*do\s\{/) and !($line=~/}/)) { |
3524 | if (ERROR("OPEN_BRACE", | 3576 | if (ERROR("OPEN_BRACE", |
3525 | "open brace '{' following function declarations go on the next line\n" . $herecurr) && | 3577 | "open brace '{' following function declarations go on the next line\n" . $herecurr) && |
3526 | $fix) { | 3578 | $fix) { |
@@ -4032,8 +4084,8 @@ sub process { | |||
4032 | ## } | 4084 | ## } |
4033 | 4085 | ||
4034 | #need space before brace following if, while, etc | 4086 | #need space before brace following if, while, etc |
4035 | if (($line =~ /\(.*\){/ && $line !~ /\($Type\){/) || | 4087 | if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\){/) || |
4036 | $line =~ /do{/) { | 4088 | $line =~ /do\{/) { |
4037 | if (ERROR("SPACING", | 4089 | if (ERROR("SPACING", |
4038 | "space required before the open brace '{'\n" . $herecurr) && | 4090 | "space required before the open brace '{'\n" . $herecurr) && |
4039 | $fix) { | 4091 | $fix) { |
@@ -4179,6 +4231,35 @@ sub process { | |||
4179 | } | 4231 | } |
4180 | } | 4232 | } |
4181 | 4233 | ||
4234 | # comparisons with a constant or upper case identifier on the left | ||
4235 | # avoid cases like "foo + BAR < baz" | ||
4236 | # only fix matches surrounded by parentheses to avoid incorrect | ||
4237 | # conversions like "FOO < baz() + 5" being "misfixed" to "baz() > FOO + 5" | ||
4238 | if ($^V && $^V ge 5.10.0 && | ||
4239 | $line =~ /^\+(.*)\b($Constant|[A-Z_][A-Z0-9_]*)\s*($Compare)\s*($LvalOrFunc)/) { | ||
4240 | my $lead = $1; | ||
4241 | my $const = $2; | ||
4242 | my $comp = $3; | ||
4243 | my $to = $4; | ||
4244 | my $newcomp = $comp; | ||
4245 | if ($lead !~ /$Operators\s*$/ && | ||
4246 | $to !~ /^(?:Constant|[A-Z_][A-Z0-9_]*)$/ && | ||
4247 | WARN("CONSTANT_COMPARISON", | ||
4248 | "Comparisons should place the constant on the right side of the test\n" . $herecurr) && | ||
4249 | $fix) { | ||
4250 | if ($comp eq "<") { | ||
4251 | $newcomp = ">"; | ||
4252 | } elsif ($comp eq "<=") { | ||
4253 | $newcomp = ">="; | ||
4254 | } elsif ($comp eq ">") { | ||
4255 | $newcomp = "<"; | ||
4256 | } elsif ($comp eq ">=") { | ||
4257 | $newcomp = "<="; | ||
4258 | } | ||
4259 | $fixed[$fixlinenr] =~ s/\(\s*\Q$const\E\s*$Compare\s*\Q$to\E\s*\)/($to $newcomp $const)/; | ||
4260 | } | ||
4261 | } | ||
4262 | |||
4182 | # Return of what appears to be an errno should normally be negative | 4263 | # Return of what appears to be an errno should normally be negative |
4183 | if ($sline =~ /\breturn(?:\s*\(+\s*|\s+)(E[A-Z]+)(?:\s*\)+\s*|\s*)[;:,]/) { | 4264 | if ($sline =~ /\breturn(?:\s*\(+\s*|\s+)(E[A-Z]+)(?:\s*\)+\s*|\s*)[;:,]/) { |
4184 | my $name = $1; | 4265 | my $name = $1; |
@@ -4480,7 +4561,7 @@ sub process { | |||
4480 | $dstat !~ /^for\s*$Constant$/ && # for (...) | 4561 | $dstat !~ /^for\s*$Constant$/ && # for (...) |
4481 | $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar() | 4562 | $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar() |
4482 | $dstat !~ /^do\s*{/ && # do {... | 4563 | $dstat !~ /^do\s*{/ && # do {... |
4483 | $dstat !~ /^\({/ && # ({... | 4564 | $dstat !~ /^\(\{/ && # ({... |
4484 | $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/) | 4565 | $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/) |
4485 | { | 4566 | { |
4486 | $ctx =~ s/\n*$//; | 4567 | $ctx =~ s/\n*$//; |
@@ -4789,16 +4870,20 @@ sub process { | |||
4789 | "Consecutive strings are generally better as a single string\n" . $herecurr); | 4870 | "Consecutive strings are generally better as a single string\n" . $herecurr); |
4790 | } | 4871 | } |
4791 | 4872 | ||
4792 | # check for %L{u,d,i} in strings | 4873 | # check for %L{u,d,i} and 0x%[udi] in strings |
4793 | my $string; | 4874 | my $string; |
4794 | while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) { | 4875 | while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) { |
4795 | $string = substr($rawline, $-[1], $+[1] - $-[1]); | 4876 | $string = substr($rawline, $-[1], $+[1] - $-[1]); |
4796 | $string =~ s/%%/__/g; | 4877 | $string =~ s/%%/__/g; |
4797 | if ($string =~ /(?<!%)%L[udi]/) { | 4878 | if ($string =~ /(?<!%)%[\*\d\.\$]*L[udi]/) { |
4798 | WARN("PRINTF_L", | 4879 | WARN("PRINTF_L", |
4799 | "\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr); | 4880 | "\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr); |
4800 | last; | 4881 | last; |
4801 | } | 4882 | } |
4883 | if ($string =~ /0x%[\*\d\.\$\Llzth]*[udi]/) { | ||
4884 | ERROR("PRINTF_0xDECIMAL", | ||
4885 | "Prefixing 0x with decimal output is defective\n" . $herecurr); | ||
4886 | } | ||
4802 | } | 4887 | } |
4803 | 4888 | ||
4804 | # check for line continuations in quoted strings with odd counts of " | 4889 | # check for line continuations in quoted strings with odd counts of " |
@@ -4816,10 +4901,34 @@ sub process { | |||
4816 | 4901 | ||
4817 | # check for needless "if (<foo>) fn(<foo>)" uses | 4902 | # check for needless "if (<foo>) fn(<foo>)" uses |
4818 | if ($prevline =~ /\bif\s*\(\s*($Lval)\s*\)/) { | 4903 | if ($prevline =~ /\bif\s*\(\s*($Lval)\s*\)/) { |
4819 | my $expr = '\s*\(\s*' . quotemeta($1) . '\s*\)\s*;'; | 4904 | my $tested = quotemeta($1); |
4820 | if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?)$expr/) { | 4905 | my $expr = '\s*\(\s*' . $tested . '\s*\)\s*;'; |
4821 | WARN('NEEDLESS_IF', | 4906 | if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?|(?:kmem_cache|mempool|dma_pool)_destroy)$expr/) { |
4822 | "$1(NULL) is safe and this check is probably not required\n" . $hereprev); | 4907 | my $func = $1; |
4908 | if (WARN('NEEDLESS_IF', | ||
4909 | "$func(NULL) is safe and this check is probably not required\n" . $hereprev) && | ||
4910 | $fix) { | ||
4911 | my $do_fix = 1; | ||
4912 | my $leading_tabs = ""; | ||
4913 | my $new_leading_tabs = ""; | ||
4914 | if ($lines[$linenr - 2] =~ /^\+(\t*)if\s*\(\s*$tested\s*\)\s*$/) { | ||
4915 | $leading_tabs = $1; | ||
4916 | } else { | ||
4917 | $do_fix = 0; | ||
4918 | } | ||
4919 | if ($lines[$linenr - 1] =~ /^\+(\t+)$func\s*\(\s*$tested\s*\)\s*;\s*$/) { | ||
4920 | $new_leading_tabs = $1; | ||
4921 | if (length($leading_tabs) + 1 ne length($new_leading_tabs)) { | ||
4922 | $do_fix = 0; | ||
4923 | } | ||
4924 | } else { | ||
4925 | $do_fix = 0; | ||
4926 | } | ||
4927 | if ($do_fix) { | ||
4928 | fix_delete_line($fixlinenr - 1, $prevrawline); | ||
4929 | $fixed[$fixlinenr] =~ s/^\+$new_leading_tabs/\+$leading_tabs/; | ||
4930 | } | ||
4931 | } | ||
4823 | } | 4932 | } |
4824 | } | 4933 | } |
4825 | 4934 | ||
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 3d2201413028..5bed7716f8ab 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c | |||
@@ -472,7 +472,7 @@ static int sel_mmap_policy_fault(struct vm_area_struct *vma, | |||
472 | return 0; | 472 | return 0; |
473 | } | 473 | } |
474 | 474 | ||
475 | static struct vm_operations_struct sel_mmap_policy_ops = { | 475 | static const struct vm_operations_struct sel_mmap_policy_ops = { |
476 | .fault = sel_mmap_policy_fault, | 476 | .fault = sel_mmap_policy_fault, |
477 | .page_mkwrite = sel_mmap_policy_fault, | 477 | .page_mkwrite = sel_mmap_policy_fault, |
478 | }; | 478 | }; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4662a8877f6c..a25a73147f71 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -397,6 +397,36 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
397 | return young; | 397 | return young; |
398 | } | 398 | } |
399 | 399 | ||
400 | static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, | ||
401 | struct mm_struct *mm, | ||
402 | unsigned long start, | ||
403 | unsigned long end) | ||
404 | { | ||
405 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | ||
406 | int young, idx; | ||
407 | |||
408 | idx = srcu_read_lock(&kvm->srcu); | ||
409 | spin_lock(&kvm->mmu_lock); | ||
410 | /* | ||
411 | * Even though we do not flush TLB, this will still adversely | ||
412 | * affect performance on pre-Haswell Intel EPT, where there is | ||
413 | * no EPT Access Bit to clear so that we have to tear down EPT | ||
414 | * tables instead. If we find this unacceptable, we can always | ||
415 | * add a parameter to kvm_age_hva so that it effectively doesn't | ||
416 | * do anything on clear_young. | ||
417 | * | ||
418 | * Also note that currently we never issue secondary TLB flushes | ||
419 | * from clear_young, leaving this job up to the regular system | ||
420 | * cadence. If we find this inaccurate, we might come up with a | ||
421 | * more sophisticated heuristic later. | ||
422 | */ | ||
423 | young = kvm_age_hva(kvm, start, end); | ||
424 | spin_unlock(&kvm->mmu_lock); | ||
425 | srcu_read_unlock(&kvm->srcu, idx); | ||
426 | |||
427 | return young; | ||
428 | } | ||
429 | |||
400 | static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, | 430 | static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, |
401 | struct mm_struct *mm, | 431 | struct mm_struct *mm, |
402 | unsigned long address) | 432 | unsigned long address) |
@@ -429,6 +459,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
429 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, | 459 | .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, |
430 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, | 460 | .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, |
431 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, | 461 | .clear_flush_young = kvm_mmu_notifier_clear_flush_young, |
462 | .clear_young = kvm_mmu_notifier_clear_young, | ||
432 | .test_young = kvm_mmu_notifier_test_young, | 463 | .test_young = kvm_mmu_notifier_test_young, |
433 | .change_pte = kvm_mmu_notifier_change_pte, | 464 | .change_pte = kvm_mmu_notifier_change_pte, |
434 | .release = kvm_mmu_notifier_release, | 465 | .release = kvm_mmu_notifier_release, |