diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/base.c | 462 | ||||
-rw-r--r-- | fs/proc/fd.c | 114 | ||||
-rw-r--r-- | fs/proc/generic.c | 100 | ||||
-rw-r--r-- | fs/proc/internal.h | 10 | ||||
-rw-r--r-- | fs/proc/kcore.c | 2 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 87 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 9 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 78 | ||||
-rw-r--r-- | fs/proc/root.c | 19 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 168 | ||||
-rw-r--r-- | fs/proc/uptime.c | 3 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 694 |
12 files changed, 950 insertions, 796 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index c3834dad09b3..1485e38daaa3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1681,46 +1681,34 @@ const struct dentry_operations pid_dentry_operations = | |||
1681 | * reported by readdir in sync with the inode numbers reported | 1681 | * reported by readdir in sync with the inode numbers reported |
1682 | * by stat. | 1682 | * by stat. |
1683 | */ | 1683 | */ |
1684 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 1684 | bool proc_fill_cache(struct file *file, struct dir_context *ctx, |
1685 | const char *name, int len, | 1685 | const char *name, int len, |
1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
1687 | { | 1687 | { |
1688 | struct dentry *child, *dir = filp->f_path.dentry; | 1688 | struct dentry *child, *dir = file->f_path.dentry; |
1689 | struct qstr qname = QSTR_INIT(name, len); | ||
1689 | struct inode *inode; | 1690 | struct inode *inode; |
1690 | struct qstr qname; | 1691 | unsigned type; |
1691 | ino_t ino = 0; | 1692 | ino_t ino; |
1692 | unsigned type = DT_UNKNOWN; | ||
1693 | |||
1694 | qname.name = name; | ||
1695 | qname.len = len; | ||
1696 | qname.hash = full_name_hash(name, len); | ||
1697 | 1693 | ||
1698 | child = d_lookup(dir, &qname); | 1694 | child = d_hash_and_lookup(dir, &qname); |
1699 | if (!child) { | 1695 | if (!child) { |
1700 | struct dentry *new; | 1696 | child = d_alloc(dir, &qname); |
1701 | new = d_alloc(dir, &qname); | 1697 | if (!child) |
1702 | if (new) { | 1698 | goto end_instantiate; |
1703 | child = instantiate(dir->d_inode, new, task, ptr); | 1699 | if (instantiate(dir->d_inode, child, task, ptr) < 0) { |
1704 | if (child) | 1700 | dput(child); |
1705 | dput(new); | 1701 | goto end_instantiate; |
1706 | else | ||
1707 | child = new; | ||
1708 | } | 1702 | } |
1709 | } | 1703 | } |
1710 | if (!child || IS_ERR(child) || !child->d_inode) | ||
1711 | goto end_instantiate; | ||
1712 | inode = child->d_inode; | 1704 | inode = child->d_inode; |
1713 | if (inode) { | 1705 | ino = inode->i_ino; |
1714 | ino = inode->i_ino; | 1706 | type = inode->i_mode >> 12; |
1715 | type = inode->i_mode >> 12; | ||
1716 | } | ||
1717 | dput(child); | 1707 | dput(child); |
1708 | return dir_emit(ctx, name, len, ino, type); | ||
1709 | |||
1718 | end_instantiate: | 1710 | end_instantiate: |
1719 | if (!ino) | 1711 | return dir_emit(ctx, name, len, 1, DT_UNKNOWN); |
1720 | ino = find_inode_number(dir, &qname); | ||
1721 | if (!ino) | ||
1722 | ino = 1; | ||
1723 | return filldir(dirent, name, len, filp->f_pos, ino, type); | ||
1724 | } | 1712 | } |
1725 | 1713 | ||
1726 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1714 | #ifdef CONFIG_CHECKPOINT_RESTORE |
@@ -1846,7 +1834,7 @@ struct map_files_info { | |||
1846 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ | 1834 | unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
1847 | }; | 1835 | }; |
1848 | 1836 | ||
1849 | static struct dentry * | 1837 | static int |
1850 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | 1838 | proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, |
1851 | struct task_struct *task, const void *ptr) | 1839 | struct task_struct *task, const void *ptr) |
1852 | { | 1840 | { |
@@ -1856,7 +1844,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
1856 | 1844 | ||
1857 | inode = proc_pid_make_inode(dir->i_sb, task); | 1845 | inode = proc_pid_make_inode(dir->i_sb, task); |
1858 | if (!inode) | 1846 | if (!inode) |
1859 | return ERR_PTR(-ENOENT); | 1847 | return -ENOENT; |
1860 | 1848 | ||
1861 | ei = PROC_I(inode); | 1849 | ei = PROC_I(inode); |
1862 | ei->op.proc_get_link = proc_map_files_get_link; | 1850 | ei->op.proc_get_link = proc_map_files_get_link; |
@@ -1873,7 +1861,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, | |||
1873 | d_set_d_op(dentry, &tid_map_files_dentry_operations); | 1861 | d_set_d_op(dentry, &tid_map_files_dentry_operations); |
1874 | d_add(dentry, inode); | 1862 | d_add(dentry, inode); |
1875 | 1863 | ||
1876 | return NULL; | 1864 | return 0; |
1877 | } | 1865 | } |
1878 | 1866 | ||
1879 | static struct dentry *proc_map_files_lookup(struct inode *dir, | 1867 | static struct dentry *proc_map_files_lookup(struct inode *dir, |
@@ -1882,23 +1870,23 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, | |||
1882 | unsigned long vm_start, vm_end; | 1870 | unsigned long vm_start, vm_end; |
1883 | struct vm_area_struct *vma; | 1871 | struct vm_area_struct *vma; |
1884 | struct task_struct *task; | 1872 | struct task_struct *task; |
1885 | struct dentry *result; | 1873 | int result; |
1886 | struct mm_struct *mm; | 1874 | struct mm_struct *mm; |
1887 | 1875 | ||
1888 | result = ERR_PTR(-EPERM); | 1876 | result = -EPERM; |
1889 | if (!capable(CAP_SYS_ADMIN)) | 1877 | if (!capable(CAP_SYS_ADMIN)) |
1890 | goto out; | 1878 | goto out; |
1891 | 1879 | ||
1892 | result = ERR_PTR(-ENOENT); | 1880 | result = -ENOENT; |
1893 | task = get_proc_task(dir); | 1881 | task = get_proc_task(dir); |
1894 | if (!task) | 1882 | if (!task) |
1895 | goto out; | 1883 | goto out; |
1896 | 1884 | ||
1897 | result = ERR_PTR(-EACCES); | 1885 | result = -EACCES; |
1898 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 1886 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
1899 | goto out_put_task; | 1887 | goto out_put_task; |
1900 | 1888 | ||
1901 | result = ERR_PTR(-ENOENT); | 1889 | result = -ENOENT; |
1902 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) | 1890 | if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) |
1903 | goto out_put_task; | 1891 | goto out_put_task; |
1904 | 1892 | ||
@@ -1921,7 +1909,7 @@ out_no_vma: | |||
1921 | out_put_task: | 1909 | out_put_task: |
1922 | put_task_struct(task); | 1910 | put_task_struct(task); |
1923 | out: | 1911 | out: |
1924 | return result; | 1912 | return ERR_PTR(result); |
1925 | } | 1913 | } |
1926 | 1914 | ||
1927 | static const struct inode_operations proc_map_files_inode_operations = { | 1915 | static const struct inode_operations proc_map_files_inode_operations = { |
@@ -1931,14 +1919,15 @@ static const struct inode_operations proc_map_files_inode_operations = { | |||
1931 | }; | 1919 | }; |
1932 | 1920 | ||
1933 | static int | 1921 | static int |
1934 | proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | 1922 | proc_map_files_readdir(struct file *file, struct dir_context *ctx) |
1935 | { | 1923 | { |
1936 | struct dentry *dentry = filp->f_path.dentry; | ||
1937 | struct inode *inode = dentry->d_inode; | ||
1938 | struct vm_area_struct *vma; | 1924 | struct vm_area_struct *vma; |
1939 | struct task_struct *task; | 1925 | struct task_struct *task; |
1940 | struct mm_struct *mm; | 1926 | struct mm_struct *mm; |
1941 | ino_t ino; | 1927 | unsigned long nr_files, pos, i; |
1928 | struct flex_array *fa = NULL; | ||
1929 | struct map_files_info info; | ||
1930 | struct map_files_info *p; | ||
1942 | int ret; | 1931 | int ret; |
1943 | 1932 | ||
1944 | ret = -EPERM; | 1933 | ret = -EPERM; |
@@ -1946,7 +1935,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1946 | goto out; | 1935 | goto out; |
1947 | 1936 | ||
1948 | ret = -ENOENT; | 1937 | ret = -ENOENT; |
1949 | task = get_proc_task(inode); | 1938 | task = get_proc_task(file_inode(file)); |
1950 | if (!task) | 1939 | if (!task) |
1951 | goto out; | 1940 | goto out; |
1952 | 1941 | ||
@@ -1955,91 +1944,73 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
1955 | goto out_put_task; | 1944 | goto out_put_task; |
1956 | 1945 | ||
1957 | ret = 0; | 1946 | ret = 0; |
1958 | switch (filp->f_pos) { | 1947 | if (!dir_emit_dots(file, ctx)) |
1959 | case 0: | 1948 | goto out_put_task; |
1960 | ino = inode->i_ino; | ||
1961 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) | ||
1962 | goto out_put_task; | ||
1963 | filp->f_pos++; | ||
1964 | case 1: | ||
1965 | ino = parent_ino(dentry); | ||
1966 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
1967 | goto out_put_task; | ||
1968 | filp->f_pos++; | ||
1969 | default: | ||
1970 | { | ||
1971 | unsigned long nr_files, pos, i; | ||
1972 | struct flex_array *fa = NULL; | ||
1973 | struct map_files_info info; | ||
1974 | struct map_files_info *p; | ||
1975 | |||
1976 | mm = get_task_mm(task); | ||
1977 | if (!mm) | ||
1978 | goto out_put_task; | ||
1979 | down_read(&mm->mmap_sem); | ||
1980 | 1949 | ||
1981 | nr_files = 0; | 1950 | mm = get_task_mm(task); |
1951 | if (!mm) | ||
1952 | goto out_put_task; | ||
1953 | down_read(&mm->mmap_sem); | ||
1982 | 1954 | ||
1983 | /* | 1955 | nr_files = 0; |
1984 | * We need two passes here: | ||
1985 | * | ||
1986 | * 1) Collect vmas of mapped files with mmap_sem taken | ||
1987 | * 2) Release mmap_sem and instantiate entries | ||
1988 | * | ||
1989 | * otherwise we get lockdep complained, since filldir() | ||
1990 | * routine might require mmap_sem taken in might_fault(). | ||
1991 | */ | ||
1992 | 1956 | ||
1993 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { | 1957 | /* |
1994 | if (vma->vm_file && ++pos > filp->f_pos) | 1958 | * We need two passes here: |
1995 | nr_files++; | 1959 | * |
1996 | } | 1960 | * 1) Collect vmas of mapped files with mmap_sem taken |
1961 | * 2) Release mmap_sem and instantiate entries | ||
1962 | * | ||
1963 | * otherwise we get lockdep complained, since filldir() | ||
1964 | * routine might require mmap_sem taken in might_fault(). | ||
1965 | */ | ||
1997 | 1966 | ||
1998 | if (nr_files) { | 1967 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { |
1999 | fa = flex_array_alloc(sizeof(info), nr_files, | 1968 | if (vma->vm_file && ++pos > ctx->pos) |
2000 | GFP_KERNEL); | 1969 | nr_files++; |
2001 | if (!fa || flex_array_prealloc(fa, 0, nr_files, | 1970 | } |
2002 | GFP_KERNEL)) { | 1971 | |
2003 | ret = -ENOMEM; | 1972 | if (nr_files) { |
2004 | if (fa) | 1973 | fa = flex_array_alloc(sizeof(info), nr_files, |
2005 | flex_array_free(fa); | 1974 | GFP_KERNEL); |
2006 | up_read(&mm->mmap_sem); | 1975 | if (!fa || flex_array_prealloc(fa, 0, nr_files, |
2007 | mmput(mm); | 1976 | GFP_KERNEL)) { |
2008 | goto out_put_task; | 1977 | ret = -ENOMEM; |
2009 | } | 1978 | if (fa) |
2010 | for (i = 0, vma = mm->mmap, pos = 2; vma; | 1979 | flex_array_free(fa); |
2011 | vma = vma->vm_next) { | 1980 | up_read(&mm->mmap_sem); |
2012 | if (!vma->vm_file) | 1981 | mmput(mm); |
2013 | continue; | 1982 | goto out_put_task; |
2014 | if (++pos <= filp->f_pos) | ||
2015 | continue; | ||
2016 | |||
2017 | info.mode = vma->vm_file->f_mode; | ||
2018 | info.len = snprintf(info.name, | ||
2019 | sizeof(info.name), "%lx-%lx", | ||
2020 | vma->vm_start, vma->vm_end); | ||
2021 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) | ||
2022 | BUG(); | ||
2023 | } | ||
2024 | } | 1983 | } |
2025 | up_read(&mm->mmap_sem); | 1984 | for (i = 0, vma = mm->mmap, pos = 2; vma; |
2026 | 1985 | vma = vma->vm_next) { | |
2027 | for (i = 0; i < nr_files; i++) { | 1986 | if (!vma->vm_file) |
2028 | p = flex_array_get(fa, i); | 1987 | continue; |
2029 | ret = proc_fill_cache(filp, dirent, filldir, | 1988 | if (++pos <= ctx->pos) |
2030 | p->name, p->len, | 1989 | continue; |
2031 | proc_map_files_instantiate, | 1990 | |
2032 | task, | 1991 | info.mode = vma->vm_file->f_mode; |
2033 | (void *)(unsigned long)p->mode); | 1992 | info.len = snprintf(info.name, |
2034 | if (ret) | 1993 | sizeof(info.name), "%lx-%lx", |
2035 | break; | 1994 | vma->vm_start, vma->vm_end); |
2036 | filp->f_pos++; | 1995 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) |
1996 | BUG(); | ||
2037 | } | 1997 | } |
2038 | if (fa) | ||
2039 | flex_array_free(fa); | ||
2040 | mmput(mm); | ||
2041 | } | 1998 | } |
1999 | up_read(&mm->mmap_sem); | ||
2000 | |||
2001 | for (i = 0; i < nr_files; i++) { | ||
2002 | p = flex_array_get(fa, i); | ||
2003 | if (!proc_fill_cache(file, ctx, | ||
2004 | p->name, p->len, | ||
2005 | proc_map_files_instantiate, | ||
2006 | task, | ||
2007 | (void *)(unsigned long)p->mode)) | ||
2008 | break; | ||
2009 | ctx->pos++; | ||
2042 | } | 2010 | } |
2011 | if (fa) | ||
2012 | flex_array_free(fa); | ||
2013 | mmput(mm); | ||
2043 | 2014 | ||
2044 | out_put_task: | 2015 | out_put_task: |
2045 | put_task_struct(task); | 2016 | put_task_struct(task); |
@@ -2049,7 +2020,7 @@ out: | |||
2049 | 2020 | ||
2050 | static const struct file_operations proc_map_files_operations = { | 2021 | static const struct file_operations proc_map_files_operations = { |
2051 | .read = generic_read_dir, | 2022 | .read = generic_read_dir, |
2052 | .readdir = proc_map_files_readdir, | 2023 | .iterate = proc_map_files_readdir, |
2053 | .llseek = default_llseek, | 2024 | .llseek = default_llseek, |
2054 | }; | 2025 | }; |
2055 | 2026 | ||
@@ -2152,13 +2123,12 @@ static const struct file_operations proc_timers_operations = { | |||
2152 | }; | 2123 | }; |
2153 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | 2124 | #endif /* CONFIG_CHECKPOINT_RESTORE */ |
2154 | 2125 | ||
2155 | static struct dentry *proc_pident_instantiate(struct inode *dir, | 2126 | static int proc_pident_instantiate(struct inode *dir, |
2156 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 2127 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
2157 | { | 2128 | { |
2158 | const struct pid_entry *p = ptr; | 2129 | const struct pid_entry *p = ptr; |
2159 | struct inode *inode; | 2130 | struct inode *inode; |
2160 | struct proc_inode *ei; | 2131 | struct proc_inode *ei; |
2161 | struct dentry *error = ERR_PTR(-ENOENT); | ||
2162 | 2132 | ||
2163 | inode = proc_pid_make_inode(dir->i_sb, task); | 2133 | inode = proc_pid_make_inode(dir->i_sb, task); |
2164 | if (!inode) | 2134 | if (!inode) |
@@ -2177,9 +2147,9 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, | |||
2177 | d_add(dentry, inode); | 2147 | d_add(dentry, inode); |
2178 | /* Close the race of the process dying before we return the dentry */ | 2148 | /* Close the race of the process dying before we return the dentry */ |
2179 | if (pid_revalidate(dentry, 0)) | 2149 | if (pid_revalidate(dentry, 0)) |
2180 | error = NULL; | 2150 | return 0; |
2181 | out: | 2151 | out: |
2182 | return error; | 2152 | return -ENOENT; |
2183 | } | 2153 | } |
2184 | 2154 | ||
2185 | static struct dentry *proc_pident_lookup(struct inode *dir, | 2155 | static struct dentry *proc_pident_lookup(struct inode *dir, |
@@ -2187,11 +2157,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
2187 | const struct pid_entry *ents, | 2157 | const struct pid_entry *ents, |
2188 | unsigned int nents) | 2158 | unsigned int nents) |
2189 | { | 2159 | { |
2190 | struct dentry *error; | 2160 | int error; |
2191 | struct task_struct *task = get_proc_task(dir); | 2161 | struct task_struct *task = get_proc_task(dir); |
2192 | const struct pid_entry *p, *last; | 2162 | const struct pid_entry *p, *last; |
2193 | 2163 | ||
2194 | error = ERR_PTR(-ENOENT); | 2164 | error = -ENOENT; |
2195 | 2165 | ||
2196 | if (!task) | 2166 | if (!task) |
2197 | goto out_no_task; | 2167 | goto out_no_task; |
@@ -2214,70 +2184,33 @@ static struct dentry *proc_pident_lookup(struct inode *dir, | |||
2214 | out: | 2184 | out: |
2215 | put_task_struct(task); | 2185 | put_task_struct(task); |
2216 | out_no_task: | 2186 | out_no_task: |
2217 | return error; | 2187 | return ERR_PTR(error); |
2218 | } | ||
2219 | |||
2220 | static int proc_pident_fill_cache(struct file *filp, void *dirent, | ||
2221 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) | ||
2222 | { | ||
2223 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | ||
2224 | proc_pident_instantiate, task, p); | ||
2225 | } | 2188 | } |
2226 | 2189 | ||
2227 | static int proc_pident_readdir(struct file *filp, | 2190 | static int proc_pident_readdir(struct file *file, struct dir_context *ctx, |
2228 | void *dirent, filldir_t filldir, | ||
2229 | const struct pid_entry *ents, unsigned int nents) | 2191 | const struct pid_entry *ents, unsigned int nents) |
2230 | { | 2192 | { |
2231 | int i; | 2193 | struct task_struct *task = get_proc_task(file_inode(file)); |
2232 | struct dentry *dentry = filp->f_path.dentry; | 2194 | const struct pid_entry *p; |
2233 | struct inode *inode = dentry->d_inode; | ||
2234 | struct task_struct *task = get_proc_task(inode); | ||
2235 | const struct pid_entry *p, *last; | ||
2236 | ino_t ino; | ||
2237 | int ret; | ||
2238 | 2195 | ||
2239 | ret = -ENOENT; | ||
2240 | if (!task) | 2196 | if (!task) |
2241 | goto out_no_task; | 2197 | return -ENOENT; |
2242 | 2198 | ||
2243 | ret = 0; | 2199 | if (!dir_emit_dots(file, ctx)) |
2244 | i = filp->f_pos; | 2200 | goto out; |
2245 | switch (i) { | 2201 | |
2246 | case 0: | 2202 | if (ctx->pos >= nents + 2) |
2247 | ino = inode->i_ino; | 2203 | goto out; |
2248 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
2249 | goto out; | ||
2250 | i++; | ||
2251 | filp->f_pos++; | ||
2252 | /* fall through */ | ||
2253 | case 1: | ||
2254 | ino = parent_ino(dentry); | ||
2255 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
2256 | goto out; | ||
2257 | i++; | ||
2258 | filp->f_pos++; | ||
2259 | /* fall through */ | ||
2260 | default: | ||
2261 | i -= 2; | ||
2262 | if (i >= nents) { | ||
2263 | ret = 1; | ||
2264 | goto out; | ||
2265 | } | ||
2266 | p = ents + i; | ||
2267 | last = &ents[nents - 1]; | ||
2268 | while (p <= last) { | ||
2269 | if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) | ||
2270 | goto out; | ||
2271 | filp->f_pos++; | ||
2272 | p++; | ||
2273 | } | ||
2274 | } | ||
2275 | 2204 | ||
2276 | ret = 1; | 2205 | for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { |
2206 | if (!proc_fill_cache(file, ctx, p->name, p->len, | ||
2207 | proc_pident_instantiate, task, p)) | ||
2208 | break; | ||
2209 | ctx->pos++; | ||
2210 | } | ||
2277 | out: | 2211 | out: |
2278 | put_task_struct(task); | 2212 | put_task_struct(task); |
2279 | out_no_task: | 2213 | return 0; |
2280 | return ret; | ||
2281 | } | 2214 | } |
2282 | 2215 | ||
2283 | #ifdef CONFIG_SECURITY | 2216 | #ifdef CONFIG_SECURITY |
@@ -2362,16 +2295,15 @@ static const struct pid_entry attr_dir_stuff[] = { | |||
2362 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2295 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
2363 | }; | 2296 | }; |
2364 | 2297 | ||
2365 | static int proc_attr_dir_readdir(struct file * filp, | 2298 | static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) |
2366 | void * dirent, filldir_t filldir) | ||
2367 | { | 2299 | { |
2368 | return proc_pident_readdir(filp,dirent,filldir, | 2300 | return proc_pident_readdir(file, ctx, |
2369 | attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); | 2301 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); |
2370 | } | 2302 | } |
2371 | 2303 | ||
2372 | static const struct file_operations proc_attr_dir_operations = { | 2304 | static const struct file_operations proc_attr_dir_operations = { |
2373 | .read = generic_read_dir, | 2305 | .read = generic_read_dir, |
2374 | .readdir = proc_attr_dir_readdir, | 2306 | .iterate = proc_attr_dir_readdir, |
2375 | .llseek = default_llseek, | 2307 | .llseek = default_llseek, |
2376 | }; | 2308 | }; |
2377 | 2309 | ||
@@ -2725,16 +2657,15 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2725 | #endif | 2657 | #endif |
2726 | }; | 2658 | }; |
2727 | 2659 | ||
2728 | static int proc_tgid_base_readdir(struct file * filp, | 2660 | static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) |
2729 | void * dirent, filldir_t filldir) | ||
2730 | { | 2661 | { |
2731 | return proc_pident_readdir(filp,dirent,filldir, | 2662 | return proc_pident_readdir(file, ctx, |
2732 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); | 2663 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); |
2733 | } | 2664 | } |
2734 | 2665 | ||
2735 | static const struct file_operations proc_tgid_base_operations = { | 2666 | static const struct file_operations proc_tgid_base_operations = { |
2736 | .read = generic_read_dir, | 2667 | .read = generic_read_dir, |
2737 | .readdir = proc_tgid_base_readdir, | 2668 | .iterate = proc_tgid_base_readdir, |
2738 | .llseek = default_llseek, | 2669 | .llseek = default_llseek, |
2739 | }; | 2670 | }; |
2740 | 2671 | ||
@@ -2836,11 +2767,10 @@ void proc_flush_task(struct task_struct *task) | |||
2836 | } | 2767 | } |
2837 | } | 2768 | } |
2838 | 2769 | ||
2839 | static struct dentry *proc_pid_instantiate(struct inode *dir, | 2770 | static int proc_pid_instantiate(struct inode *dir, |
2840 | struct dentry * dentry, | 2771 | struct dentry * dentry, |
2841 | struct task_struct *task, const void *ptr) | 2772 | struct task_struct *task, const void *ptr) |
2842 | { | 2773 | { |
2843 | struct dentry *error = ERR_PTR(-ENOENT); | ||
2844 | struct inode *inode; | 2774 | struct inode *inode; |
2845 | 2775 | ||
2846 | inode = proc_pid_make_inode(dir->i_sb, task); | 2776 | inode = proc_pid_make_inode(dir->i_sb, task); |
@@ -2860,14 +2790,14 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, | |||
2860 | d_add(dentry, inode); | 2790 | d_add(dentry, inode); |
2861 | /* Close the race of the process dying before we return the dentry */ | 2791 | /* Close the race of the process dying before we return the dentry */ |
2862 | if (pid_revalidate(dentry, 0)) | 2792 | if (pid_revalidate(dentry, 0)) |
2863 | error = NULL; | 2793 | return 0; |
2864 | out: | 2794 | out: |
2865 | return error; | 2795 | return -ENOENT; |
2866 | } | 2796 | } |
2867 | 2797 | ||
2868 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 2798 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) |
2869 | { | 2799 | { |
2870 | struct dentry *result = NULL; | 2800 | int result = 0; |
2871 | struct task_struct *task; | 2801 | struct task_struct *task; |
2872 | unsigned tgid; | 2802 | unsigned tgid; |
2873 | struct pid_namespace *ns; | 2803 | struct pid_namespace *ns; |
@@ -2888,7 +2818,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign | |||
2888 | result = proc_pid_instantiate(dir, dentry, task, NULL); | 2818 | result = proc_pid_instantiate(dir, dentry, task, NULL); |
2889 | put_task_struct(task); | 2819 | put_task_struct(task); |
2890 | out: | 2820 | out: |
2891 | return result; | 2821 | return ERR_PTR(result); |
2892 | } | 2822 | } |
2893 | 2823 | ||
2894 | /* | 2824 | /* |
@@ -2936,58 +2866,42 @@ retry: | |||
2936 | 2866 | ||
2937 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) | 2867 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) |
2938 | 2868 | ||
2939 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
2940 | struct tgid_iter iter) | ||
2941 | { | ||
2942 | char name[PROC_NUMBUF]; | ||
2943 | int len = snprintf(name, sizeof(name), "%d", iter.tgid); | ||
2944 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
2945 | proc_pid_instantiate, iter.task, NULL); | ||
2946 | } | ||
2947 | |||
2948 | static int fake_filldir(void *buf, const char *name, int namelen, | ||
2949 | loff_t offset, u64 ino, unsigned d_type) | ||
2950 | { | ||
2951 | return 0; | ||
2952 | } | ||
2953 | |||
2954 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 2869 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
2955 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2870 | int proc_pid_readdir(struct file *file, struct dir_context *ctx) |
2956 | { | 2871 | { |
2957 | struct tgid_iter iter; | 2872 | struct tgid_iter iter; |
2958 | struct pid_namespace *ns; | 2873 | struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info; |
2959 | filldir_t __filldir; | 2874 | loff_t pos = ctx->pos; |
2960 | loff_t pos = filp->f_pos; | ||
2961 | 2875 | ||
2962 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) | 2876 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) |
2963 | goto out; | 2877 | return 0; |
2964 | 2878 | ||
2965 | if (pos == TGID_OFFSET - 1) { | 2879 | if (pos == TGID_OFFSET - 1) { |
2966 | if (proc_fill_cache(filp, dirent, filldir, "self", 4, | 2880 | struct inode *inode = ns->proc_self->d_inode; |
2967 | NULL, NULL, NULL) < 0) | 2881 | if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) |
2968 | goto out; | 2882 | return 0; |
2969 | iter.tgid = 0; | 2883 | iter.tgid = 0; |
2970 | } else { | 2884 | } else { |
2971 | iter.tgid = pos - TGID_OFFSET; | 2885 | iter.tgid = pos - TGID_OFFSET; |
2972 | } | 2886 | } |
2973 | iter.task = NULL; | 2887 | iter.task = NULL; |
2974 | ns = filp->f_dentry->d_sb->s_fs_info; | ||
2975 | for (iter = next_tgid(ns, iter); | 2888 | for (iter = next_tgid(ns, iter); |
2976 | iter.task; | 2889 | iter.task; |
2977 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 2890 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
2978 | if (has_pid_permissions(ns, iter.task, 2)) | 2891 | char name[PROC_NUMBUF]; |
2979 | __filldir = filldir; | 2892 | int len; |
2980 | else | 2893 | if (!has_pid_permissions(ns, iter.task, 2)) |
2981 | __filldir = fake_filldir; | 2894 | continue; |
2982 | 2895 | ||
2983 | filp->f_pos = iter.tgid + TGID_OFFSET; | 2896 | len = snprintf(name, sizeof(name), "%d", iter.tgid); |
2984 | if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { | 2897 | ctx->pos = iter.tgid + TGID_OFFSET; |
2898 | if (!proc_fill_cache(file, ctx, name, len, | ||
2899 | proc_pid_instantiate, iter.task, NULL)) { | ||
2985 | put_task_struct(iter.task); | 2900 | put_task_struct(iter.task); |
2986 | goto out; | 2901 | return 0; |
2987 | } | 2902 | } |
2988 | } | 2903 | } |
2989 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; | 2904 | ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; |
2990 | out: | ||
2991 | return 0; | 2905 | return 0; |
2992 | } | 2906 | } |
2993 | 2907 | ||
@@ -3075,11 +2989,10 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3075 | #endif | 2989 | #endif |
3076 | }; | 2990 | }; |
3077 | 2991 | ||
3078 | static int proc_tid_base_readdir(struct file * filp, | 2992 | static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) |
3079 | void * dirent, filldir_t filldir) | ||
3080 | { | 2993 | { |
3081 | return proc_pident_readdir(filp,dirent,filldir, | 2994 | return proc_pident_readdir(file, ctx, |
3082 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | 2995 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); |
3083 | } | 2996 | } |
3084 | 2997 | ||
3085 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 2998 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
@@ -3090,7 +3003,7 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den | |||
3090 | 3003 | ||
3091 | static const struct file_operations proc_tid_base_operations = { | 3004 | static const struct file_operations proc_tid_base_operations = { |
3092 | .read = generic_read_dir, | 3005 | .read = generic_read_dir, |
3093 | .readdir = proc_tid_base_readdir, | 3006 | .iterate = proc_tid_base_readdir, |
3094 | .llseek = default_llseek, | 3007 | .llseek = default_llseek, |
3095 | }; | 3008 | }; |
3096 | 3009 | ||
@@ -3100,10 +3013,9 @@ static const struct inode_operations proc_tid_base_inode_operations = { | |||
3100 | .setattr = proc_setattr, | 3013 | .setattr = proc_setattr, |
3101 | }; | 3014 | }; |
3102 | 3015 | ||
3103 | static struct dentry *proc_task_instantiate(struct inode *dir, | 3016 | static int proc_task_instantiate(struct inode *dir, |
3104 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 3017 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
3105 | { | 3018 | { |
3106 | struct dentry *error = ERR_PTR(-ENOENT); | ||
3107 | struct inode *inode; | 3019 | struct inode *inode; |
3108 | inode = proc_pid_make_inode(dir->i_sb, task); | 3020 | inode = proc_pid_make_inode(dir->i_sb, task); |
3109 | 3021 | ||
@@ -3122,14 +3034,14 @@ static struct dentry *proc_task_instantiate(struct inode *dir, | |||
3122 | d_add(dentry, inode); | 3034 | d_add(dentry, inode); |
3123 | /* Close the race of the process dying before we return the dentry */ | 3035 | /* Close the race of the process dying before we return the dentry */ |
3124 | if (pid_revalidate(dentry, 0)) | 3036 | if (pid_revalidate(dentry, 0)) |
3125 | error = NULL; | 3037 | return 0; |
3126 | out: | 3038 | out: |
3127 | return error; | 3039 | return -ENOENT; |
3128 | } | 3040 | } |
3129 | 3041 | ||
3130 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) | 3042 | static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) |
3131 | { | 3043 | { |
3132 | struct dentry *result = ERR_PTR(-ENOENT); | 3044 | int result = -ENOENT; |
3133 | struct task_struct *task; | 3045 | struct task_struct *task; |
3134 | struct task_struct *leader = get_proc_task(dir); | 3046 | struct task_struct *leader = get_proc_task(dir); |
3135 | unsigned tid; | 3047 | unsigned tid; |
@@ -3159,7 +3071,7 @@ out_drop_task: | |||
3159 | out: | 3071 | out: |
3160 | put_task_struct(leader); | 3072 | put_task_struct(leader); |
3161 | out_no_task: | 3073 | out_no_task: |
3162 | return result; | 3074 | return ERR_PTR(result); |
3163 | } | 3075 | } |
3164 | 3076 | ||
3165 | /* | 3077 | /* |
@@ -3231,30 +3143,16 @@ static struct task_struct *next_tid(struct task_struct *start) | |||
3231 | return pos; | 3143 | return pos; |
3232 | } | 3144 | } |
3233 | 3145 | ||
3234 | static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
3235 | struct task_struct *task, int tid) | ||
3236 | { | ||
3237 | char name[PROC_NUMBUF]; | ||
3238 | int len = snprintf(name, sizeof(name), "%d", tid); | ||
3239 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
3240 | proc_task_instantiate, task, NULL); | ||
3241 | } | ||
3242 | |||
3243 | /* for the /proc/TGID/task/ directories */ | 3146 | /* for the /proc/TGID/task/ directories */ |
3244 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3147 | static int proc_task_readdir(struct file *file, struct dir_context *ctx) |
3245 | { | 3148 | { |
3246 | struct dentry *dentry = filp->f_path.dentry; | ||
3247 | struct inode *inode = dentry->d_inode; | ||
3248 | struct task_struct *leader = NULL; | 3149 | struct task_struct *leader = NULL; |
3249 | struct task_struct *task; | 3150 | struct task_struct *task = get_proc_task(file_inode(file)); |
3250 | int retval = -ENOENT; | ||
3251 | ino_t ino; | ||
3252 | int tid; | ||
3253 | struct pid_namespace *ns; | 3151 | struct pid_namespace *ns; |
3152 | int tid; | ||
3254 | 3153 | ||
3255 | task = get_proc_task(inode); | ||
3256 | if (!task) | 3154 | if (!task) |
3257 | goto out_no_task; | 3155 | return -ENOENT; |
3258 | rcu_read_lock(); | 3156 | rcu_read_lock(); |
3259 | if (pid_alive(task)) { | 3157 | if (pid_alive(task)) { |
3260 | leader = task->group_leader; | 3158 | leader = task->group_leader; |
@@ -3263,46 +3161,36 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
3263 | rcu_read_unlock(); | 3161 | rcu_read_unlock(); |
3264 | put_task_struct(task); | 3162 | put_task_struct(task); |
3265 | if (!leader) | 3163 | if (!leader) |
3266 | goto out_no_task; | 3164 | return -ENOENT; |
3267 | retval = 0; | ||
3268 | 3165 | ||
3269 | switch ((unsigned long)filp->f_pos) { | 3166 | if (!dir_emit_dots(file, ctx)) |
3270 | case 0: | 3167 | goto out; |
3271 | ino = inode->i_ino; | ||
3272 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) | ||
3273 | goto out; | ||
3274 | filp->f_pos++; | ||
3275 | /* fall through */ | ||
3276 | case 1: | ||
3277 | ino = parent_ino(dentry); | ||
3278 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) | ||
3279 | goto out; | ||
3280 | filp->f_pos++; | ||
3281 | /* fall through */ | ||
3282 | } | ||
3283 | 3168 | ||
3284 | /* f_version caches the tgid value that the last readdir call couldn't | 3169 | /* f_version caches the tgid value that the last readdir call couldn't |
3285 | * return. lseek aka telldir automagically resets f_version to 0. | 3170 | * return. lseek aka telldir automagically resets f_version to 0. |
3286 | */ | 3171 | */ |
3287 | ns = filp->f_dentry->d_sb->s_fs_info; | 3172 | ns = file->f_dentry->d_sb->s_fs_info; |
3288 | tid = (int)filp->f_version; | 3173 | tid = (int)file->f_version; |
3289 | filp->f_version = 0; | 3174 | file->f_version = 0; |
3290 | for (task = first_tid(leader, tid, filp->f_pos - 2, ns); | 3175 | for (task = first_tid(leader, tid, ctx->pos - 2, ns); |
3291 | task; | 3176 | task; |
3292 | task = next_tid(task), filp->f_pos++) { | 3177 | task = next_tid(task), ctx->pos++) { |
3178 | char name[PROC_NUMBUF]; | ||
3179 | int len; | ||
3293 | tid = task_pid_nr_ns(task, ns); | 3180 | tid = task_pid_nr_ns(task, ns); |
3294 | if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { | 3181 | len = snprintf(name, sizeof(name), "%d", tid); |
3182 | if (!proc_fill_cache(file, ctx, name, len, | ||
3183 | proc_task_instantiate, task, NULL)) { | ||
3295 | /* returning this tgid failed, save it as the first | 3184 | /* returning this tgid failed, save it as the first |
3296 | * pid for the next readir call */ | 3185 | * pid for the next readir call */ |
3297 | filp->f_version = (u64)tid; | 3186 | file->f_version = (u64)tid; |
3298 | put_task_struct(task); | 3187 | put_task_struct(task); |
3299 | break; | 3188 | break; |
3300 | } | 3189 | } |
3301 | } | 3190 | } |
3302 | out: | 3191 | out: |
3303 | put_task_struct(leader); | 3192 | put_task_struct(leader); |
3304 | out_no_task: | 3193 | return 0; |
3305 | return retval; | ||
3306 | } | 3194 | } |
3307 | 3195 | ||
3308 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 3196 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
@@ -3328,6 +3216,6 @@ static const struct inode_operations proc_task_inode_operations = { | |||
3328 | 3216 | ||
3329 | static const struct file_operations proc_task_operations = { | 3217 | static const struct file_operations proc_task_operations = { |
3330 | .read = generic_read_dir, | 3218 | .read = generic_read_dir, |
3331 | .readdir = proc_task_readdir, | 3219 | .iterate = proc_task_readdir, |
3332 | .llseek = default_llseek, | 3220 | .llseek = default_llseek, |
3333 | }; | 3221 | }; |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c index d7a4a28ef630..75f2890abbd8 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c | |||
@@ -167,11 +167,10 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) | |||
167 | return ret; | 167 | return ret; |
168 | } | 168 | } |
169 | 169 | ||
170 | static struct dentry * | 170 | static int |
171 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | 171 | proc_fd_instantiate(struct inode *dir, struct dentry *dentry, |
172 | struct task_struct *task, const void *ptr) | 172 | struct task_struct *task, const void *ptr) |
173 | { | 173 | { |
174 | struct dentry *error = ERR_PTR(-ENOENT); | ||
175 | unsigned fd = (unsigned long)ptr; | 174 | unsigned fd = (unsigned long)ptr; |
176 | struct proc_inode *ei; | 175 | struct proc_inode *ei; |
177 | struct inode *inode; | 176 | struct inode *inode; |
@@ -194,9 +193,9 @@ proc_fd_instantiate(struct inode *dir, struct dentry *dentry, | |||
194 | 193 | ||
195 | /* Close the race of the process dying before we return the dentry */ | 194 | /* Close the race of the process dying before we return the dentry */ |
196 | if (tid_fd_revalidate(dentry, 0)) | 195 | if (tid_fd_revalidate(dentry, 0)) |
197 | error = NULL; | 196 | return 0; |
198 | out: | 197 | out: |
199 | return error; | 198 | return -ENOENT; |
200 | } | 199 | } |
201 | 200 | ||
202 | static struct dentry *proc_lookupfd_common(struct inode *dir, | 201 | static struct dentry *proc_lookupfd_common(struct inode *dir, |
@@ -204,7 +203,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, | |||
204 | instantiate_t instantiate) | 203 | instantiate_t instantiate) |
205 | { | 204 | { |
206 | struct task_struct *task = get_proc_task(dir); | 205 | struct task_struct *task = get_proc_task(dir); |
207 | struct dentry *result = ERR_PTR(-ENOENT); | 206 | int result = -ENOENT; |
208 | unsigned fd = name_to_int(dentry); | 207 | unsigned fd = name_to_int(dentry); |
209 | 208 | ||
210 | if (!task) | 209 | if (!task) |
@@ -216,77 +215,61 @@ static struct dentry *proc_lookupfd_common(struct inode *dir, | |||
216 | out: | 215 | out: |
217 | put_task_struct(task); | 216 | put_task_struct(task); |
218 | out_no_task: | 217 | out_no_task: |
219 | return result; | 218 | return ERR_PTR(result); |
220 | } | 219 | } |
221 | 220 | ||
222 | static int proc_readfd_common(struct file * filp, void * dirent, | 221 | static int proc_readfd_common(struct file *file, struct dir_context *ctx, |
223 | filldir_t filldir, instantiate_t instantiate) | 222 | instantiate_t instantiate) |
224 | { | 223 | { |
225 | struct dentry *dentry = filp->f_path.dentry; | 224 | struct task_struct *p = get_proc_task(file_inode(file)); |
226 | struct inode *inode = dentry->d_inode; | ||
227 | struct task_struct *p = get_proc_task(inode); | ||
228 | struct files_struct *files; | 225 | struct files_struct *files; |
229 | unsigned int fd, ino; | 226 | unsigned int fd; |
230 | int retval; | ||
231 | 227 | ||
232 | retval = -ENOENT; | ||
233 | if (!p) | 228 | if (!p) |
234 | goto out_no_task; | 229 | return -ENOENT; |
235 | retval = 0; | ||
236 | |||
237 | fd = filp->f_pos; | ||
238 | switch (fd) { | ||
239 | case 0: | ||
240 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
241 | goto out; | ||
242 | filp->f_pos++; | ||
243 | case 1: | ||
244 | ino = parent_ino(dentry); | ||
245 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
246 | goto out; | ||
247 | filp->f_pos++; | ||
248 | default: | ||
249 | files = get_files_struct(p); | ||
250 | if (!files) | ||
251 | goto out; | ||
252 | rcu_read_lock(); | ||
253 | for (fd = filp->f_pos - 2; | ||
254 | fd < files_fdtable(files)->max_fds; | ||
255 | fd++, filp->f_pos++) { | ||
256 | char name[PROC_NUMBUF]; | ||
257 | int len; | ||
258 | int rv; | ||
259 | |||
260 | if (!fcheck_files(files, fd)) | ||
261 | continue; | ||
262 | rcu_read_unlock(); | ||
263 | 230 | ||
264 | len = snprintf(name, sizeof(name), "%d", fd); | 231 | if (!dir_emit_dots(file, ctx)) |
265 | rv = proc_fill_cache(filp, dirent, filldir, | 232 | goto out; |
266 | name, len, instantiate, p, | 233 | if (!dir_emit_dots(file, ctx)) |
267 | (void *)(unsigned long)fd); | 234 | goto out; |
268 | if (rv < 0) | 235 | files = get_files_struct(p); |
269 | goto out_fd_loop; | 236 | if (!files) |
270 | rcu_read_lock(); | 237 | goto out; |
271 | } | 238 | |
272 | rcu_read_unlock(); | 239 | rcu_read_lock(); |
273 | out_fd_loop: | 240 | for (fd = ctx->pos - 2; |
274 | put_files_struct(files); | 241 | fd < files_fdtable(files)->max_fds; |
242 | fd++, ctx->pos++) { | ||
243 | char name[PROC_NUMBUF]; | ||
244 | int len; | ||
245 | |||
246 | if (!fcheck_files(files, fd)) | ||
247 | continue; | ||
248 | rcu_read_unlock(); | ||
249 | |||
250 | len = snprintf(name, sizeof(name), "%d", fd); | ||
251 | if (!proc_fill_cache(file, ctx, | ||
252 | name, len, instantiate, p, | ||
253 | (void *)(unsigned long)fd)) | ||
254 | goto out_fd_loop; | ||
255 | rcu_read_lock(); | ||
275 | } | 256 | } |
257 | rcu_read_unlock(); | ||
258 | out_fd_loop: | ||
259 | put_files_struct(files); | ||
276 | out: | 260 | out: |
277 | put_task_struct(p); | 261 | put_task_struct(p); |
278 | out_no_task: | 262 | return 0; |
279 | return retval; | ||
280 | } | 263 | } |
281 | 264 | ||
282 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | 265 | static int proc_readfd(struct file *file, struct dir_context *ctx) |
283 | { | 266 | { |
284 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | 267 | return proc_readfd_common(file, ctx, proc_fd_instantiate); |
285 | } | 268 | } |
286 | 269 | ||
287 | const struct file_operations proc_fd_operations = { | 270 | const struct file_operations proc_fd_operations = { |
288 | .read = generic_read_dir, | 271 | .read = generic_read_dir, |
289 | .readdir = proc_readfd, | 272 | .iterate = proc_readfd, |
290 | .llseek = default_llseek, | 273 | .llseek = default_llseek, |
291 | }; | 274 | }; |
292 | 275 | ||
@@ -316,11 +299,10 @@ const struct inode_operations proc_fd_inode_operations = { | |||
316 | .setattr = proc_setattr, | 299 | .setattr = proc_setattr, |
317 | }; | 300 | }; |
318 | 301 | ||
319 | static struct dentry * | 302 | static int |
320 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | 303 | proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, |
321 | struct task_struct *task, const void *ptr) | 304 | struct task_struct *task, const void *ptr) |
322 | { | 305 | { |
323 | struct dentry *error = ERR_PTR(-ENOENT); | ||
324 | unsigned fd = (unsigned long)ptr; | 306 | unsigned fd = (unsigned long)ptr; |
325 | struct proc_inode *ei; | 307 | struct proc_inode *ei; |
326 | struct inode *inode; | 308 | struct inode *inode; |
@@ -340,9 +322,9 @@ proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry, | |||
340 | 322 | ||
341 | /* Close the race of the process dying before we return the dentry */ | 323 | /* Close the race of the process dying before we return the dentry */ |
342 | if (tid_fd_revalidate(dentry, 0)) | 324 | if (tid_fd_revalidate(dentry, 0)) |
343 | error = NULL; | 325 | return 0; |
344 | out: | 326 | out: |
345 | return error; | 327 | return -ENOENT; |
346 | } | 328 | } |
347 | 329 | ||
348 | static struct dentry * | 330 | static struct dentry * |
@@ -351,9 +333,9 @@ proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
351 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | 333 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); |
352 | } | 334 | } |
353 | 335 | ||
354 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | 336 | static int proc_readfdinfo(struct file *file, struct dir_context *ctx) |
355 | { | 337 | { |
356 | return proc_readfd_common(filp, dirent, filldir, | 338 | return proc_readfd_common(file, ctx, |
357 | proc_fdinfo_instantiate); | 339 | proc_fdinfo_instantiate); |
358 | } | 340 | } |
359 | 341 | ||
@@ -364,6 +346,6 @@ const struct inode_operations proc_fdinfo_inode_operations = { | |||
364 | 346 | ||
365 | const struct file_operations proc_fdinfo_operations = { | 347 | const struct file_operations proc_fdinfo_operations = { |
366 | .read = generic_read_dir, | 348 | .read = generic_read_dir, |
367 | .readdir = proc_readfdinfo, | 349 | .iterate = proc_readfdinfo, |
368 | .llseek = default_llseek, | 350 | .llseek = default_llseek, |
369 | }; | 351 | }; |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a2596afffae6..94441a407337 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -233,76 +233,52 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, | |||
233 | * value of the readdir() call, as long as it's non-negative | 233 | * value of the readdir() call, as long as it's non-negative |
234 | * for success.. | 234 | * for success.. |
235 | */ | 235 | */ |
236 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | 236 | int proc_readdir_de(struct proc_dir_entry *de, struct file *file, |
237 | filldir_t filldir) | 237 | struct dir_context *ctx) |
238 | { | 238 | { |
239 | unsigned int ino; | ||
240 | int i; | 239 | int i; |
241 | struct inode *inode = file_inode(filp); | ||
242 | int ret = 0; | ||
243 | |||
244 | ino = inode->i_ino; | ||
245 | i = filp->f_pos; | ||
246 | switch (i) { | ||
247 | case 0: | ||
248 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
249 | goto out; | ||
250 | i++; | ||
251 | filp->f_pos++; | ||
252 | /* fall through */ | ||
253 | case 1: | ||
254 | if (filldir(dirent, "..", 2, i, | ||
255 | parent_ino(filp->f_path.dentry), | ||
256 | DT_DIR) < 0) | ||
257 | goto out; | ||
258 | i++; | ||
259 | filp->f_pos++; | ||
260 | /* fall through */ | ||
261 | default: | ||
262 | spin_lock(&proc_subdir_lock); | ||
263 | de = de->subdir; | ||
264 | i -= 2; | ||
265 | for (;;) { | ||
266 | if (!de) { | ||
267 | ret = 1; | ||
268 | spin_unlock(&proc_subdir_lock); | ||
269 | goto out; | ||
270 | } | ||
271 | if (!i) | ||
272 | break; | ||
273 | de = de->next; | ||
274 | i--; | ||
275 | } | ||
276 | 240 | ||
277 | do { | 241 | if (!dir_emit_dots(file, ctx)) |
278 | struct proc_dir_entry *next; | 242 | return 0; |
279 | 243 | ||
280 | /* filldir passes info to user space */ | 244 | spin_lock(&proc_subdir_lock); |
281 | pde_get(de); | 245 | de = de->subdir; |
282 | spin_unlock(&proc_subdir_lock); | 246 | i = ctx->pos - 2; |
283 | if (filldir(dirent, de->name, de->namelen, filp->f_pos, | 247 | for (;;) { |
284 | de->low_ino, de->mode >> 12) < 0) { | 248 | if (!de) { |
285 | pde_put(de); | ||
286 | goto out; | ||
287 | } | ||
288 | spin_lock(&proc_subdir_lock); | ||
289 | filp->f_pos++; | ||
290 | next = de->next; | ||
291 | pde_put(de); | ||
292 | de = next; | ||
293 | } while (de); | ||
294 | spin_unlock(&proc_subdir_lock); | 249 | spin_unlock(&proc_subdir_lock); |
250 | return 0; | ||
251 | } | ||
252 | if (!i) | ||
253 | break; | ||
254 | de = de->next; | ||
255 | i--; | ||
295 | } | 256 | } |
296 | ret = 1; | 257 | |
297 | out: | 258 | do { |
298 | return ret; | 259 | struct proc_dir_entry *next; |
260 | pde_get(de); | ||
261 | spin_unlock(&proc_subdir_lock); | ||
262 | if (!dir_emit(ctx, de->name, de->namelen, | ||
263 | de->low_ino, de->mode >> 12)) { | ||
264 | pde_put(de); | ||
265 | return 0; | ||
266 | } | ||
267 | spin_lock(&proc_subdir_lock); | ||
268 | ctx->pos++; | ||
269 | next = de->next; | ||
270 | pde_put(de); | ||
271 | de = next; | ||
272 | } while (de); | ||
273 | spin_unlock(&proc_subdir_lock); | ||
274 | return 0; | ||
299 | } | 275 | } |
300 | 276 | ||
301 | int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) | 277 | int proc_readdir(struct file *file, struct dir_context *ctx) |
302 | { | 278 | { |
303 | struct inode *inode = file_inode(filp); | 279 | struct inode *inode = file_inode(file); |
304 | 280 | ||
305 | return proc_readdir_de(PDE(inode), filp, dirent, filldir); | 281 | return proc_readdir_de(PDE(inode), file, ctx); |
306 | } | 282 | } |
307 | 283 | ||
308 | /* | 284 | /* |
@@ -313,7 +289,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
313 | static const struct file_operations proc_dir_operations = { | 289 | static const struct file_operations proc_dir_operations = { |
314 | .llseek = generic_file_llseek, | 290 | .llseek = generic_file_llseek, |
315 | .read = generic_read_dir, | 291 | .read = generic_read_dir, |
316 | .readdir = proc_readdir, | 292 | .iterate = proc_readdir, |
317 | }; | 293 | }; |
318 | 294 | ||
319 | /* | 295 | /* |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d600fb098b6a..651d09a11dde 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -165,14 +165,14 @@ extern int proc_setattr(struct dentry *, struct iattr *); | |||
165 | extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); | 165 | extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); |
166 | extern int pid_revalidate(struct dentry *, unsigned int); | 166 | extern int pid_revalidate(struct dentry *, unsigned int); |
167 | extern int pid_delete_dentry(const struct dentry *); | 167 | extern int pid_delete_dentry(const struct dentry *); |
168 | extern int proc_pid_readdir(struct file *, void *, filldir_t); | 168 | extern int proc_pid_readdir(struct file *, struct dir_context *); |
169 | extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); | 169 | extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); |
170 | extern loff_t mem_lseek(struct file *, loff_t, int); | 170 | extern loff_t mem_lseek(struct file *, loff_t, int); |
171 | 171 | ||
172 | /* Lookups */ | 172 | /* Lookups */ |
173 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | 173 | typedef int instantiate_t(struct inode *, struct dentry *, |
174 | struct task_struct *, const void *); | 174 | struct task_struct *, const void *); |
175 | extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int, | 175 | extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, |
176 | instantiate_t, struct task_struct *, const void *); | 176 | instantiate_t, struct task_struct *, const void *); |
177 | 177 | ||
178 | /* | 178 | /* |
@@ -183,8 +183,8 @@ extern spinlock_t proc_subdir_lock; | |||
183 | extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); | 183 | extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); |
184 | extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, | 184 | extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, |
185 | struct dentry *); | 185 | struct dentry *); |
186 | extern int proc_readdir(struct file *, void *, filldir_t); | 186 | extern int proc_readdir(struct file *, struct dir_context *); |
187 | extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t); | 187 | extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *); |
188 | 188 | ||
189 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) | 189 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) |
190 | { | 190 | { |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 0a22194e5d58..06ea155e1a59 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) | |||
408 | prpsinfo.pr_zomb = 0; | 408 | prpsinfo.pr_zomb = 0; |
409 | 409 | ||
410 | strcpy(prpsinfo.pr_fname, "vmlinux"); | 410 | strcpy(prpsinfo.pr_fname, "vmlinux"); |
411 | strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); | 411 | strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs)); |
412 | 412 | ||
413 | nhdr->p_filesz += notesize(¬es[1]); | 413 | nhdr->p_filesz += notesize(¬es[1]); |
414 | bufp = storenote(¬es[1], bufp); | 414 | bufp = storenote(¬es[1], bufp); |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 54bdc6701e9f..49a7fff2e83a 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
@@ -187,13 +187,12 @@ static const struct inode_operations proc_ns_link_inode_operations = { | |||
187 | .setattr = proc_setattr, | 187 | .setattr = proc_setattr, |
188 | }; | 188 | }; |
189 | 189 | ||
190 | static struct dentry *proc_ns_instantiate(struct inode *dir, | 190 | static int proc_ns_instantiate(struct inode *dir, |
191 | struct dentry *dentry, struct task_struct *task, const void *ptr) | 191 | struct dentry *dentry, struct task_struct *task, const void *ptr) |
192 | { | 192 | { |
193 | const struct proc_ns_operations *ns_ops = ptr; | 193 | const struct proc_ns_operations *ns_ops = ptr; |
194 | struct inode *inode; | 194 | struct inode *inode; |
195 | struct proc_inode *ei; | 195 | struct proc_inode *ei; |
196 | struct dentry *error = ERR_PTR(-ENOENT); | ||
197 | 196 | ||
198 | inode = proc_pid_make_inode(dir->i_sb, task); | 197 | inode = proc_pid_make_inode(dir->i_sb, task); |
199 | if (!inode) | 198 | if (!inode) |
@@ -208,90 +207,52 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, | |||
208 | d_add(dentry, inode); | 207 | d_add(dentry, inode); |
209 | /* Close the race of the process dying before we return the dentry */ | 208 | /* Close the race of the process dying before we return the dentry */ |
210 | if (pid_revalidate(dentry, 0)) | 209 | if (pid_revalidate(dentry, 0)) |
211 | error = NULL; | 210 | return 0; |
212 | out: | 211 | out: |
213 | return error; | 212 | return -ENOENT; |
214 | } | ||
215 | |||
216 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | ||
217 | filldir_t filldir, struct task_struct *task, | ||
218 | const struct proc_ns_operations *ops) | ||
219 | { | ||
220 | return proc_fill_cache(filp, dirent, filldir, | ||
221 | ops->name, strlen(ops->name), | ||
222 | proc_ns_instantiate, task, ops); | ||
223 | } | 213 | } |
224 | 214 | ||
225 | static int proc_ns_dir_readdir(struct file *filp, void *dirent, | 215 | static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) |
226 | filldir_t filldir) | ||
227 | { | 216 | { |
228 | int i; | 217 | struct task_struct *task = get_proc_task(file_inode(file)); |
229 | struct dentry *dentry = filp->f_path.dentry; | ||
230 | struct inode *inode = dentry->d_inode; | ||
231 | struct task_struct *task = get_proc_task(inode); | ||
232 | const struct proc_ns_operations **entry, **last; | 218 | const struct proc_ns_operations **entry, **last; |
233 | ino_t ino; | ||
234 | int ret; | ||
235 | 219 | ||
236 | ret = -ENOENT; | ||
237 | if (!task) | 220 | if (!task) |
238 | goto out_no_task; | 221 | return -ENOENT; |
239 | 222 | ||
240 | ret = 0; | 223 | if (!dir_emit_dots(file, ctx)) |
241 | i = filp->f_pos; | 224 | goto out; |
242 | switch (i) { | 225 | if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries)) |
243 | case 0: | 226 | goto out; |
244 | ino = inode->i_ino; | 227 | entry = ns_entries + (ctx->pos - 2); |
245 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 228 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; |
246 | goto out; | 229 | while (entry <= last) { |
247 | i++; | 230 | const struct proc_ns_operations *ops = *entry; |
248 | filp->f_pos++; | 231 | if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name), |
249 | /* fall through */ | 232 | proc_ns_instantiate, task, ops)) |
250 | case 1: | 233 | break; |
251 | ino = parent_ino(dentry); | 234 | ctx->pos++; |
252 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 235 | entry++; |
253 | goto out; | ||
254 | i++; | ||
255 | filp->f_pos++; | ||
256 | /* fall through */ | ||
257 | default: | ||
258 | i -= 2; | ||
259 | if (i >= ARRAY_SIZE(ns_entries)) { | ||
260 | ret = 1; | ||
261 | goto out; | ||
262 | } | ||
263 | entry = ns_entries + i; | ||
264 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
265 | while (entry <= last) { | ||
266 | if (proc_ns_fill_cache(filp, dirent, filldir, | ||
267 | task, *entry) < 0) | ||
268 | goto out; | ||
269 | filp->f_pos++; | ||
270 | entry++; | ||
271 | } | ||
272 | } | 236 | } |
273 | |||
274 | ret = 1; | ||
275 | out: | 237 | out: |
276 | put_task_struct(task); | 238 | put_task_struct(task); |
277 | out_no_task: | 239 | return 0; |
278 | return ret; | ||
279 | } | 240 | } |
280 | 241 | ||
281 | const struct file_operations proc_ns_dir_operations = { | 242 | const struct file_operations proc_ns_dir_operations = { |
282 | .read = generic_read_dir, | 243 | .read = generic_read_dir, |
283 | .readdir = proc_ns_dir_readdir, | 244 | .iterate = proc_ns_dir_readdir, |
284 | }; | 245 | }; |
285 | 246 | ||
286 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | 247 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, |
287 | struct dentry *dentry, unsigned int flags) | 248 | struct dentry *dentry, unsigned int flags) |
288 | { | 249 | { |
289 | struct dentry *error; | 250 | int error; |
290 | struct task_struct *task = get_proc_task(dir); | 251 | struct task_struct *task = get_proc_task(dir); |
291 | const struct proc_ns_operations **entry, **last; | 252 | const struct proc_ns_operations **entry, **last; |
292 | unsigned int len = dentry->d_name.len; | 253 | unsigned int len = dentry->d_name.len; |
293 | 254 | ||
294 | error = ERR_PTR(-ENOENT); | 255 | error = -ENOENT; |
295 | 256 | ||
296 | if (!task) | 257 | if (!task) |
297 | goto out_no_task; | 258 | goto out_no_task; |
@@ -310,7 +271,7 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, | |||
310 | out: | 271 | out: |
311 | put_task_struct(task); | 272 | put_task_struct(task); |
312 | out_no_task: | 273 | out_no_task: |
313 | return error; | 274 | return ERR_PTR(error); |
314 | } | 275 | } |
315 | 276 | ||
316 | const struct inode_operations proc_ns_dir_inode_operations = { | 277 | const struct inode_operations proc_ns_dir_inode_operations = { |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 986e83220d56..4677bb7dc7c2 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -160,16 +160,15 @@ const struct inode_operations proc_net_inode_operations = { | |||
160 | .getattr = proc_tgid_net_getattr, | 160 | .getattr = proc_tgid_net_getattr, |
161 | }; | 161 | }; |
162 | 162 | ||
163 | static int proc_tgid_net_readdir(struct file *filp, void *dirent, | 163 | static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) |
164 | filldir_t filldir) | ||
165 | { | 164 | { |
166 | int ret; | 165 | int ret; |
167 | struct net *net; | 166 | struct net *net; |
168 | 167 | ||
169 | ret = -EINVAL; | 168 | ret = -EINVAL; |
170 | net = get_proc_task_net(file_inode(filp)); | 169 | net = get_proc_task_net(file_inode(file)); |
171 | if (net != NULL) { | 170 | if (net != NULL) { |
172 | ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); | 171 | ret = proc_readdir_de(net->proc_net, file, ctx); |
173 | put_net(net); | 172 | put_net(net); |
174 | } | 173 | } |
175 | return ret; | 174 | return ret; |
@@ -178,7 +177,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent, | |||
178 | const struct file_operations proc_net_operations = { | 177 | const struct file_operations proc_net_operations = { |
179 | .llseek = generic_file_llseek, | 178 | .llseek = generic_file_llseek, |
180 | .read = generic_read_dir, | 179 | .read = generic_read_dir, |
181 | .readdir = proc_tgid_net_readdir, | 180 | .iterate = proc_tgid_net_readdir, |
182 | }; | 181 | }; |
183 | 182 | ||
184 | static __net_init int proc_net_ns_init(struct net *net) | 183 | static __net_init int proc_net_ns_init(struct net *net) |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ac05f33a0dde..71290463a1d3 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -573,12 +573,12 @@ out: | |||
573 | return ret; | 573 | return ret; |
574 | } | 574 | } |
575 | 575 | ||
576 | static int proc_sys_fill_cache(struct file *filp, void *dirent, | 576 | static bool proc_sys_fill_cache(struct file *file, |
577 | filldir_t filldir, | 577 | struct dir_context *ctx, |
578 | struct ctl_table_header *head, | 578 | struct ctl_table_header *head, |
579 | struct ctl_table *table) | 579 | struct ctl_table *table) |
580 | { | 580 | { |
581 | struct dentry *child, *dir = filp->f_path.dentry; | 581 | struct dentry *child, *dir = file->f_path.dentry; |
582 | struct inode *inode; | 582 | struct inode *inode; |
583 | struct qstr qname; | 583 | struct qstr qname; |
584 | ino_t ino = 0; | 584 | ino_t ino = 0; |
@@ -595,38 +595,38 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, | |||
595 | inode = proc_sys_make_inode(dir->d_sb, head, table); | 595 | inode = proc_sys_make_inode(dir->d_sb, head, table); |
596 | if (!inode) { | 596 | if (!inode) { |
597 | dput(child); | 597 | dput(child); |
598 | return -ENOMEM; | 598 | return false; |
599 | } else { | 599 | } else { |
600 | d_set_d_op(child, &proc_sys_dentry_operations); | 600 | d_set_d_op(child, &proc_sys_dentry_operations); |
601 | d_add(child, inode); | 601 | d_add(child, inode); |
602 | } | 602 | } |
603 | } else { | 603 | } else { |
604 | return -ENOMEM; | 604 | return false; |
605 | } | 605 | } |
606 | } | 606 | } |
607 | inode = child->d_inode; | 607 | inode = child->d_inode; |
608 | ino = inode->i_ino; | 608 | ino = inode->i_ino; |
609 | type = inode->i_mode >> 12; | 609 | type = inode->i_mode >> 12; |
610 | dput(child); | 610 | dput(child); |
611 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | 611 | return dir_emit(ctx, qname.name, qname.len, ino, type); |
612 | } | 612 | } |
613 | 613 | ||
614 | static int proc_sys_link_fill_cache(struct file *filp, void *dirent, | 614 | static bool proc_sys_link_fill_cache(struct file *file, |
615 | filldir_t filldir, | 615 | struct dir_context *ctx, |
616 | struct ctl_table_header *head, | 616 | struct ctl_table_header *head, |
617 | struct ctl_table *table) | 617 | struct ctl_table *table) |
618 | { | 618 | { |
619 | int err, ret = 0; | 619 | bool ret = true; |
620 | head = sysctl_head_grab(head); | 620 | head = sysctl_head_grab(head); |
621 | 621 | ||
622 | if (S_ISLNK(table->mode)) { | 622 | if (S_ISLNK(table->mode)) { |
623 | /* It is not an error if we can not follow the link ignore it */ | 623 | /* It is not an error if we can not follow the link ignore it */ |
624 | err = sysctl_follow_link(&head, &table, current->nsproxy); | 624 | int err = sysctl_follow_link(&head, &table, current->nsproxy); |
625 | if (err) | 625 | if (err) |
626 | goto out; | 626 | goto out; |
627 | } | 627 | } |
628 | 628 | ||
629 | ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); | 629 | ret = proc_sys_fill_cache(file, ctx, head, table); |
630 | out: | 630 | out: |
631 | sysctl_head_finish(head); | 631 | sysctl_head_finish(head); |
632 | return ret; | 632 | return ret; |
@@ -634,67 +634,50 @@ out: | |||
634 | 634 | ||
635 | static int scan(struct ctl_table_header *head, ctl_table *table, | 635 | static int scan(struct ctl_table_header *head, ctl_table *table, |
636 | unsigned long *pos, struct file *file, | 636 | unsigned long *pos, struct file *file, |
637 | void *dirent, filldir_t filldir) | 637 | struct dir_context *ctx) |
638 | { | 638 | { |
639 | int res; | 639 | bool res; |
640 | 640 | ||
641 | if ((*pos)++ < file->f_pos) | 641 | if ((*pos)++ < ctx->pos) |
642 | return 0; | 642 | return true; |
643 | 643 | ||
644 | if (unlikely(S_ISLNK(table->mode))) | 644 | if (unlikely(S_ISLNK(table->mode))) |
645 | res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); | 645 | res = proc_sys_link_fill_cache(file, ctx, head, table); |
646 | else | 646 | else |
647 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | 647 | res = proc_sys_fill_cache(file, ctx, head, table); |
648 | 648 | ||
649 | if (res == 0) | 649 | if (res) |
650 | file->f_pos = *pos; | 650 | ctx->pos = *pos; |
651 | 651 | ||
652 | return res; | 652 | return res; |
653 | } | 653 | } |
654 | 654 | ||
655 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | 655 | static int proc_sys_readdir(struct file *file, struct dir_context *ctx) |
656 | { | 656 | { |
657 | struct dentry *dentry = filp->f_path.dentry; | 657 | struct ctl_table_header *head = grab_header(file_inode(file)); |
658 | struct inode *inode = dentry->d_inode; | ||
659 | struct ctl_table_header *head = grab_header(inode); | ||
660 | struct ctl_table_header *h = NULL; | 658 | struct ctl_table_header *h = NULL; |
661 | struct ctl_table *entry; | 659 | struct ctl_table *entry; |
662 | struct ctl_dir *ctl_dir; | 660 | struct ctl_dir *ctl_dir; |
663 | unsigned long pos; | 661 | unsigned long pos; |
664 | int ret = -EINVAL; | ||
665 | 662 | ||
666 | if (IS_ERR(head)) | 663 | if (IS_ERR(head)) |
667 | return PTR_ERR(head); | 664 | return PTR_ERR(head); |
668 | 665 | ||
669 | ctl_dir = container_of(head, struct ctl_dir, header); | 666 | ctl_dir = container_of(head, struct ctl_dir, header); |
670 | 667 | ||
671 | ret = 0; | 668 | if (!dir_emit_dots(file, ctx)) |
672 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | 669 | return 0; |
673 | if (filp->f_pos == 0) { | 670 | |
674 | if (filldir(dirent, ".", 1, filp->f_pos, | ||
675 | inode->i_ino, DT_DIR) < 0) | ||
676 | goto out; | ||
677 | filp->f_pos++; | ||
678 | } | ||
679 | if (filp->f_pos == 1) { | ||
680 | if (filldir(dirent, "..", 2, filp->f_pos, | ||
681 | parent_ino(dentry), DT_DIR) < 0) | ||
682 | goto out; | ||
683 | filp->f_pos++; | ||
684 | } | ||
685 | pos = 2; | 671 | pos = 2; |
686 | 672 | ||
687 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { | 673 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { |
688 | ret = scan(h, entry, &pos, filp, dirent, filldir); | 674 | if (!scan(h, entry, &pos, file, ctx)) { |
689 | if (ret) { | ||
690 | sysctl_head_finish(h); | 675 | sysctl_head_finish(h); |
691 | break; | 676 | break; |
692 | } | 677 | } |
693 | } | 678 | } |
694 | ret = 1; | ||
695 | out: | ||
696 | sysctl_head_finish(head); | 679 | sysctl_head_finish(head); |
697 | return ret; | 680 | return 0; |
698 | } | 681 | } |
699 | 682 | ||
700 | static int proc_sys_permission(struct inode *inode, int mask) | 683 | static int proc_sys_permission(struct inode *inode, int mask) |
@@ -769,7 +752,7 @@ static const struct file_operations proc_sys_file_operations = { | |||
769 | 752 | ||
770 | static const struct file_operations proc_sys_dir_file_operations = { | 753 | static const struct file_operations proc_sys_dir_file_operations = { |
771 | .read = generic_read_dir, | 754 | .read = generic_read_dir, |
772 | .readdir = proc_sys_readdir, | 755 | .iterate = proc_sys_readdir, |
773 | .llseek = generic_file_llseek, | 756 | .llseek = generic_file_llseek, |
774 | }; | 757 | }; |
775 | 758 | ||
@@ -813,15 +796,16 @@ static int sysctl_is_seen(struct ctl_table_header *p) | |||
813 | return res; | 796 | return res; |
814 | } | 797 | } |
815 | 798 | ||
816 | static int proc_sys_compare(const struct dentry *parent, | 799 | static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry, |
817 | const struct inode *pinode, | ||
818 | const struct dentry *dentry, const struct inode *inode, | ||
819 | unsigned int len, const char *str, const struct qstr *name) | 800 | unsigned int len, const char *str, const struct qstr *name) |
820 | { | 801 | { |
821 | struct ctl_table_header *head; | 802 | struct ctl_table_header *head; |
803 | struct inode *inode; | ||
804 | |||
822 | /* Although proc doesn't have negative dentries, rcu-walk means | 805 | /* Although proc doesn't have negative dentries, rcu-walk means |
823 | * that inode here can be NULL */ | 806 | * that inode here can be NULL */ |
824 | /* AV: can it, indeed? */ | 807 | /* AV: can it, indeed? */ |
808 | inode = ACCESS_ONCE(dentry->d_inode); | ||
825 | if (!inode) | 809 | if (!inode) |
826 | return 1; | 810 | return 1; |
827 | if (name->len != len) | 811 | if (name->len != len) |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 41a6ea93f486..229e366598da 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -202,21 +202,14 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr | |||
202 | return proc_pid_lookup(dir, dentry, flags); | 202 | return proc_pid_lookup(dir, dentry, flags); |
203 | } | 203 | } |
204 | 204 | ||
205 | static int proc_root_readdir(struct file * filp, | 205 | static int proc_root_readdir(struct file *file, struct dir_context *ctx) |
206 | void * dirent, filldir_t filldir) | ||
207 | { | 206 | { |
208 | unsigned int nr = filp->f_pos; | 207 | if (ctx->pos < FIRST_PROCESS_ENTRY) { |
209 | int ret; | 208 | proc_readdir(file, ctx); |
210 | 209 | ctx->pos = FIRST_PROCESS_ENTRY; | |
211 | if (nr < FIRST_PROCESS_ENTRY) { | ||
212 | int error = proc_readdir(filp, dirent, filldir); | ||
213 | if (error <= 0) | ||
214 | return error; | ||
215 | filp->f_pos = FIRST_PROCESS_ENTRY; | ||
216 | } | 210 | } |
217 | 211 | ||
218 | ret = proc_pid_readdir(filp, dirent, filldir); | 212 | return proc_pid_readdir(file, ctx); |
219 | return ret; | ||
220 | } | 213 | } |
221 | 214 | ||
222 | /* | 215 | /* |
@@ -226,7 +219,7 @@ static int proc_root_readdir(struct file * filp, | |||
226 | */ | 219 | */ |
227 | static const struct file_operations proc_root_operations = { | 220 | static const struct file_operations proc_root_operations = { |
228 | .read = generic_read_dir, | 221 | .read = generic_read_dir, |
229 | .readdir = proc_root_readdir, | 222 | .iterate = proc_root_readdir, |
230 | .llseek = default_llseek, | 223 | .llseek = default_llseek, |
231 | }; | 224 | }; |
232 | 225 | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..107d026f5d6e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/rmap.h> | 11 | #include <linux/rmap.h> |
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/swapops.h> | 13 | #include <linux/swapops.h> |
14 | #include <linux/mmu_notifier.h> | ||
14 | 15 | ||
15 | #include <asm/elf.h> | 16 | #include <asm/elf.h> |
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
@@ -688,10 +689,66 @@ const struct file_operations proc_tid_smaps_operations = { | |||
688 | .release = seq_release_private, | 689 | .release = seq_release_private, |
689 | }; | 690 | }; |
690 | 691 | ||
692 | /* | ||
693 | * We do not want to have constant page-shift bits sitting in | ||
694 | * pagemap entries and are about to reuse them some time soon. | ||
695 | * | ||
696 | * Here's the "migration strategy": | ||
697 | * 1. when the system boots these bits remain what they are, | ||
698 | * but a warning about future change is printed in log; | ||
699 | * 2. once anyone clears soft-dirty bits via clear_refs file, | ||
700 | * these flag is set to denote, that user is aware of the | ||
701 | * new API and those page-shift bits change their meaning. | ||
702 | * The respective warning is printed in dmesg; | ||
703 | * 3. In a couple of releases we will remove all the mentions | ||
704 | * of page-shift in pagemap entries. | ||
705 | */ | ||
706 | |||
707 | static bool soft_dirty_cleared __read_mostly; | ||
708 | |||
709 | enum clear_refs_types { | ||
710 | CLEAR_REFS_ALL = 1, | ||
711 | CLEAR_REFS_ANON, | ||
712 | CLEAR_REFS_MAPPED, | ||
713 | CLEAR_REFS_SOFT_DIRTY, | ||
714 | CLEAR_REFS_LAST, | ||
715 | }; | ||
716 | |||
717 | struct clear_refs_private { | ||
718 | struct vm_area_struct *vma; | ||
719 | enum clear_refs_types type; | ||
720 | }; | ||
721 | |||
722 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | ||
723 | unsigned long addr, pte_t *pte) | ||
724 | { | ||
725 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
726 | /* | ||
727 | * The soft-dirty tracker uses #PF-s to catch writes | ||
728 | * to pages, so write-protect the pte as well. See the | ||
729 | * Documentation/vm/soft-dirty.txt for full description | ||
730 | * of how soft-dirty works. | ||
731 | */ | ||
732 | pte_t ptent = *pte; | ||
733 | |||
734 | if (pte_present(ptent)) { | ||
735 | ptent = pte_wrprotect(ptent); | ||
736 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | ||
737 | } else if (is_swap_pte(ptent)) { | ||
738 | ptent = pte_swp_clear_soft_dirty(ptent); | ||
739 | } else if (pte_file(ptent)) { | ||
740 | ptent = pte_file_clear_soft_dirty(ptent); | ||
741 | } | ||
742 | |||
743 | set_pte_at(vma->vm_mm, addr, pte, ptent); | ||
744 | #endif | ||
745 | } | ||
746 | |||
691 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 747 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
692 | unsigned long end, struct mm_walk *walk) | 748 | unsigned long end, struct mm_walk *walk) |
693 | { | 749 | { |
694 | struct vm_area_struct *vma = walk->private; | 750 | struct clear_refs_private *cp = walk->private; |
751 | struct vm_area_struct *vma = cp->vma; | ||
695 | pte_t *pte, ptent; | 752 | pte_t *pte, ptent; |
696 | spinlock_t *ptl; | 753 | spinlock_t *ptl; |
697 | struct page *page; | 754 | struct page *page; |
@@ -703,6 +760,12 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
703 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 760 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
704 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 761 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
705 | ptent = *pte; | 762 | ptent = *pte; |
763 | |||
764 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | ||
765 | clear_soft_dirty(vma, addr, pte); | ||
766 | continue; | ||
767 | } | ||
768 | |||
706 | if (!pte_present(ptent)) | 769 | if (!pte_present(ptent)) |
707 | continue; | 770 | continue; |
708 | 771 | ||
@@ -719,10 +782,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
719 | return 0; | 782 | return 0; |
720 | } | 783 | } |
721 | 784 | ||
722 | #define CLEAR_REFS_ALL 1 | ||
723 | #define CLEAR_REFS_ANON 2 | ||
724 | #define CLEAR_REFS_MAPPED 3 | ||
725 | |||
726 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 785 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
727 | size_t count, loff_t *ppos) | 786 | size_t count, loff_t *ppos) |
728 | { | 787 | { |
@@ -730,7 +789,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
730 | char buffer[PROC_NUMBUF]; | 789 | char buffer[PROC_NUMBUF]; |
731 | struct mm_struct *mm; | 790 | struct mm_struct *mm; |
732 | struct vm_area_struct *vma; | 791 | struct vm_area_struct *vma; |
733 | int type; | 792 | enum clear_refs_types type; |
793 | int itype; | ||
734 | int rv; | 794 | int rv; |
735 | 795 | ||
736 | memset(buffer, 0, sizeof(buffer)); | 796 | memset(buffer, 0, sizeof(buffer)); |
@@ -738,23 +798,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
738 | count = sizeof(buffer) - 1; | 798 | count = sizeof(buffer) - 1; |
739 | if (copy_from_user(buffer, buf, count)) | 799 | if (copy_from_user(buffer, buf, count)) |
740 | return -EFAULT; | 800 | return -EFAULT; |
741 | rv = kstrtoint(strstrip(buffer), 10, &type); | 801 | rv = kstrtoint(strstrip(buffer), 10, &itype); |
742 | if (rv < 0) | 802 | if (rv < 0) |
743 | return rv; | 803 | return rv; |
744 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) | 804 | type = (enum clear_refs_types)itype; |
805 | if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) | ||
745 | return -EINVAL; | 806 | return -EINVAL; |
807 | |||
808 | if (type == CLEAR_REFS_SOFT_DIRTY) { | ||
809 | soft_dirty_cleared = true; | ||
810 | pr_warn_once("The pagemap bits 55-60 has changed their meaning! " | ||
811 | "See the linux/Documentation/vm/pagemap.txt for details.\n"); | ||
812 | } | ||
813 | |||
746 | task = get_proc_task(file_inode(file)); | 814 | task = get_proc_task(file_inode(file)); |
747 | if (!task) | 815 | if (!task) |
748 | return -ESRCH; | 816 | return -ESRCH; |
749 | mm = get_task_mm(task); | 817 | mm = get_task_mm(task); |
750 | if (mm) { | 818 | if (mm) { |
819 | struct clear_refs_private cp = { | ||
820 | .type = type, | ||
821 | }; | ||
751 | struct mm_walk clear_refs_walk = { | 822 | struct mm_walk clear_refs_walk = { |
752 | .pmd_entry = clear_refs_pte_range, | 823 | .pmd_entry = clear_refs_pte_range, |
753 | .mm = mm, | 824 | .mm = mm, |
825 | .private = &cp, | ||
754 | }; | 826 | }; |
755 | down_read(&mm->mmap_sem); | 827 | down_read(&mm->mmap_sem); |
828 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
829 | mmu_notifier_invalidate_range_start(mm, 0, -1); | ||
756 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 830 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
757 | clear_refs_walk.private = vma; | 831 | cp.vma = vma; |
758 | if (is_vm_hugetlb_page(vma)) | 832 | if (is_vm_hugetlb_page(vma)) |
759 | continue; | 833 | continue; |
760 | /* | 834 | /* |
@@ -773,6 +847,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
773 | walk_page_range(vma->vm_start, vma->vm_end, | 847 | walk_page_range(vma->vm_start, vma->vm_end, |
774 | &clear_refs_walk); | 848 | &clear_refs_walk); |
775 | } | 849 | } |
850 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
851 | mmu_notifier_invalidate_range_end(mm, 0, -1); | ||
776 | flush_tlb_mm(mm); | 852 | flush_tlb_mm(mm); |
777 | up_read(&mm->mmap_sem); | 853 | up_read(&mm->mmap_sem); |
778 | mmput(mm); | 854 | mmput(mm); |
@@ -792,14 +868,15 @@ typedef struct { | |||
792 | } pagemap_entry_t; | 868 | } pagemap_entry_t; |
793 | 869 | ||
794 | struct pagemapread { | 870 | struct pagemapread { |
795 | int pos, len; | 871 | int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ |
796 | pagemap_entry_t *buffer; | 872 | pagemap_entry_t *buffer; |
873 | bool v2; | ||
797 | }; | 874 | }; |
798 | 875 | ||
799 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 876 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
800 | #define PAGEMAP_WALK_MASK (PMD_MASK) | 877 | #define PAGEMAP_WALK_MASK (PMD_MASK) |
801 | 878 | ||
802 | #define PM_ENTRY_BYTES sizeof(u64) | 879 | #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) |
803 | #define PM_STATUS_BITS 3 | 880 | #define PM_STATUS_BITS 3 |
804 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | 881 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) |
805 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | 882 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) |
@@ -807,14 +884,17 @@ struct pagemapread { | |||
807 | #define PM_PSHIFT_BITS 6 | 884 | #define PM_PSHIFT_BITS 6 |
808 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | 885 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) |
809 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | 886 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) |
810 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | 887 | #define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) |
811 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | 888 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) |
812 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | 889 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) |
890 | /* in "new" pagemap pshift bits are occupied with more status bits */ | ||
891 | #define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) | ||
813 | 892 | ||
893 | #define __PM_SOFT_DIRTY (1LL) | ||
814 | #define PM_PRESENT PM_STATUS(4LL) | 894 | #define PM_PRESENT PM_STATUS(4LL) |
815 | #define PM_SWAP PM_STATUS(2LL) | 895 | #define PM_SWAP PM_STATUS(2LL) |
816 | #define PM_FILE PM_STATUS(1LL) | 896 | #define PM_FILE PM_STATUS(1LL) |
817 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 897 | #define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) |
818 | #define PM_END_OF_BUFFER 1 | 898 | #define PM_END_OF_BUFFER 1 |
819 | 899 | ||
820 | static inline pagemap_entry_t make_pme(u64 val) | 900 | static inline pagemap_entry_t make_pme(u64 val) |
@@ -837,7 +917,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
837 | struct pagemapread *pm = walk->private; | 917 | struct pagemapread *pm = walk->private; |
838 | unsigned long addr; | 918 | unsigned long addr; |
839 | int err = 0; | 919 | int err = 0; |
840 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 920 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
841 | 921 | ||
842 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 922 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
843 | err = add_to_pagemap(addr, &pme, pm); | 923 | err = add_to_pagemap(addr, &pme, pm); |
@@ -847,38 +927,43 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
847 | return err; | 927 | return err; |
848 | } | 928 | } |
849 | 929 | ||
850 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, | 930 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
851 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) | 931 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
852 | { | 932 | { |
853 | u64 frame, flags; | 933 | u64 frame, flags; |
854 | struct page *page = NULL; | 934 | struct page *page = NULL; |
935 | int flags2 = 0; | ||
855 | 936 | ||
856 | if (pte_present(pte)) { | 937 | if (pte_present(pte)) { |
857 | frame = pte_pfn(pte); | 938 | frame = pte_pfn(pte); |
858 | flags = PM_PRESENT; | 939 | flags = PM_PRESENT; |
859 | page = vm_normal_page(vma, addr, pte); | 940 | page = vm_normal_page(vma, addr, pte); |
860 | } else if (is_swap_pte(pte)) { | 941 | } else if (is_swap_pte(pte)) { |
861 | swp_entry_t entry = pte_to_swp_entry(pte); | 942 | swp_entry_t entry; |
862 | 943 | if (pte_swp_soft_dirty(pte)) | |
944 | flags2 |= __PM_SOFT_DIRTY; | ||
945 | entry = pte_to_swp_entry(pte); | ||
863 | frame = swp_type(entry) | | 946 | frame = swp_type(entry) | |
864 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); | 947 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); |
865 | flags = PM_SWAP; | 948 | flags = PM_SWAP; |
866 | if (is_migration_entry(entry)) | 949 | if (is_migration_entry(entry)) |
867 | page = migration_entry_to_page(entry); | 950 | page = migration_entry_to_page(entry); |
868 | } else { | 951 | } else { |
869 | *pme = make_pme(PM_NOT_PRESENT); | 952 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
870 | return; | 953 | return; |
871 | } | 954 | } |
872 | 955 | ||
873 | if (page && !PageAnon(page)) | 956 | if (page && !PageAnon(page)) |
874 | flags |= PM_FILE; | 957 | flags |= PM_FILE; |
958 | if (pte_soft_dirty(pte)) | ||
959 | flags2 |= __PM_SOFT_DIRTY; | ||
875 | 960 | ||
876 | *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); | 961 | *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); |
877 | } | 962 | } |
878 | 963 | ||
879 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 964 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
880 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 965 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
881 | pmd_t pmd, int offset) | 966 | pmd_t pmd, int offset, int pmd_flags2) |
882 | { | 967 | { |
883 | /* | 968 | /* |
884 | * Currently pmd for thp is always present because thp can not be | 969 | * Currently pmd for thp is always present because thp can not be |
@@ -887,13 +972,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | |||
887 | */ | 972 | */ |
888 | if (pmd_present(pmd)) | 973 | if (pmd_present(pmd)) |
889 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | 974 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) |
890 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 975 | | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); |
891 | else | 976 | else |
892 | *pme = make_pme(PM_NOT_PRESENT); | 977 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
893 | } | 978 | } |
894 | #else | 979 | #else |
895 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 980 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
896 | pmd_t pmd, int offset) | 981 | pmd_t pmd, int offset, int pmd_flags2) |
897 | { | 982 | { |
898 | } | 983 | } |
899 | #endif | 984 | #endif |
@@ -905,17 +990,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
905 | struct pagemapread *pm = walk->private; | 990 | struct pagemapread *pm = walk->private; |
906 | pte_t *pte; | 991 | pte_t *pte; |
907 | int err = 0; | 992 | int err = 0; |
908 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 993 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
909 | 994 | ||
910 | /* find the first VMA at or above 'addr' */ | 995 | /* find the first VMA at or above 'addr' */ |
911 | vma = find_vma(walk->mm, addr); | 996 | vma = find_vma(walk->mm, addr); |
912 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | 997 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { |
998 | int pmd_flags2; | ||
999 | |||
1000 | pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); | ||
913 | for (; addr != end; addr += PAGE_SIZE) { | 1001 | for (; addr != end; addr += PAGE_SIZE) { |
914 | unsigned long offset; | 1002 | unsigned long offset; |
915 | 1003 | ||
916 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 1004 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
917 | PAGE_SHIFT; | 1005 | PAGE_SHIFT; |
918 | thp_pmd_to_pagemap_entry(&pme, *pmd, offset); | 1006 | thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); |
919 | err = add_to_pagemap(addr, &pme, pm); | 1007 | err = add_to_pagemap(addr, &pme, pm); |
920 | if (err) | 1008 | if (err) |
921 | break; | 1009 | break; |
@@ -932,7 +1020,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
932 | * and need a new, higher one */ | 1020 | * and need a new, higher one */ |
933 | if (vma && (addr >= vma->vm_end)) { | 1021 | if (vma && (addr >= vma->vm_end)) { |
934 | vma = find_vma(walk->mm, addr); | 1022 | vma = find_vma(walk->mm, addr); |
935 | pme = make_pme(PM_NOT_PRESENT); | 1023 | pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
936 | } | 1024 | } |
937 | 1025 | ||
938 | /* check that 'vma' actually covers this address, | 1026 | /* check that 'vma' actually covers this address, |
@@ -940,7 +1028,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
940 | if (vma && (vma->vm_start <= addr) && | 1028 | if (vma && (vma->vm_start <= addr) && |
941 | !is_vm_hugetlb_page(vma)) { | 1029 | !is_vm_hugetlb_page(vma)) { |
942 | pte = pte_offset_map(pmd, addr); | 1030 | pte = pte_offset_map(pmd, addr); |
943 | pte_to_pagemap_entry(&pme, vma, addr, *pte); | 1031 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
944 | /* unmap before userspace copy */ | 1032 | /* unmap before userspace copy */ |
945 | pte_unmap(pte); | 1033 | pte_unmap(pte); |
946 | } | 1034 | } |
@@ -955,14 +1043,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
955 | } | 1043 | } |
956 | 1044 | ||
957 | #ifdef CONFIG_HUGETLB_PAGE | 1045 | #ifdef CONFIG_HUGETLB_PAGE |
958 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, | 1046 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
959 | pte_t pte, int offset) | 1047 | pte_t pte, int offset) |
960 | { | 1048 | { |
961 | if (pte_present(pte)) | 1049 | if (pte_present(pte)) |
962 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | 1050 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
963 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 1051 | | PM_STATUS2(pm->v2, 0) | PM_PRESENT); |
964 | else | 1052 | else |
965 | *pme = make_pme(PM_NOT_PRESENT); | 1053 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
966 | } | 1054 | } |
967 | 1055 | ||
968 | /* This function walks within one hugetlb entry in the single call */ | 1056 | /* This function walks within one hugetlb entry in the single call */ |
@@ -976,7 +1064,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
976 | 1064 | ||
977 | for (; addr != end; addr += PAGE_SIZE) { | 1065 | for (; addr != end; addr += PAGE_SIZE) { |
978 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 1066 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
979 | huge_pte_to_pagemap_entry(&pme, *pte, offset); | 1067 | huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); |
980 | err = add_to_pagemap(addr, &pme, pm); | 1068 | err = add_to_pagemap(addr, &pme, pm); |
981 | if (err) | 1069 | if (err) |
982 | return err; | 1070 | return err; |
@@ -1038,8 +1126,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1038 | if (!count) | 1126 | if (!count) |
1039 | goto out_task; | 1127 | goto out_task; |
1040 | 1128 | ||
1041 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 1129 | pm.v2 = soft_dirty_cleared; |
1042 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 1130 | pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
1131 | pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); | ||
1043 | ret = -ENOMEM; | 1132 | ret = -ENOMEM; |
1044 | if (!pm.buffer) | 1133 | if (!pm.buffer) |
1045 | goto out_task; | 1134 | goto out_task; |
@@ -1110,9 +1199,18 @@ out: | |||
1110 | return ret; | 1199 | return ret; |
1111 | } | 1200 | } |
1112 | 1201 | ||
1202 | static int pagemap_open(struct inode *inode, struct file *file) | ||
1203 | { | ||
1204 | pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " | ||
1205 | "to stop being page-shift some time soon. See the " | ||
1206 | "linux/Documentation/vm/pagemap.txt for details.\n"); | ||
1207 | return 0; | ||
1208 | } | ||
1209 | |||
1113 | const struct file_operations proc_pagemap_operations = { | 1210 | const struct file_operations proc_pagemap_operations = { |
1114 | .llseek = mem_lseek, /* borrow this */ | 1211 | .llseek = mem_lseek, /* borrow this */ |
1115 | .read = pagemap_read, | 1212 | .read = pagemap_read, |
1213 | .open = pagemap_open, | ||
1116 | }; | 1214 | }; |
1117 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 1215 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
1118 | 1216 | ||
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 9610ac772d7e..061894625903 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c | |||
@@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v) | |||
20 | for_each_possible_cpu(i) | 20 | for_each_possible_cpu(i) |
21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; | 21 | idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; |
22 | 22 | ||
23 | do_posix_clock_monotonic_gettime(&uptime); | 23 | get_monotonic_boottime(&uptime); |
24 | monotonic_to_bootbased(&uptime); | ||
25 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; | 24 | nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; |
26 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); | 25 | idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); |
27 | idle.tv_nsec = rem; | 26 | idle.tv_nsec = rem; |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 17f7e080d7ff..a1a16eb97c7b 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/crash_dump.h> | 21 | #include <linux/crash_dump.h> |
22 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <linux/vmalloc.h> | ||
23 | #include <asm/uaccess.h> | 24 | #include <asm/uaccess.h> |
24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
25 | #include "internal.h" | 26 | #include "internal.h" |
@@ -32,6 +33,10 @@ static LIST_HEAD(vmcore_list); | |||
32 | /* Stores the pointer to the buffer containing kernel elf core headers. */ | 33 | /* Stores the pointer to the buffer containing kernel elf core headers. */ |
33 | static char *elfcorebuf; | 34 | static char *elfcorebuf; |
34 | static size_t elfcorebuf_sz; | 35 | static size_t elfcorebuf_sz; |
36 | static size_t elfcorebuf_sz_orig; | ||
37 | |||
38 | static char *elfnotes_buf; | ||
39 | static size_t elfnotes_sz; | ||
35 | 40 | ||
36 | /* Total size of vmcore file. */ | 41 | /* Total size of vmcore file. */ |
37 | static u64 vmcore_size; | 42 | static u64 vmcore_size; |
@@ -118,27 +123,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count, | |||
118 | return read; | 123 | return read; |
119 | } | 124 | } |
120 | 125 | ||
121 | /* Maps vmcore file offset to respective physical address in memroy. */ | ||
122 | static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list, | ||
123 | struct vmcore **m_ptr) | ||
124 | { | ||
125 | struct vmcore *m; | ||
126 | u64 paddr; | ||
127 | |||
128 | list_for_each_entry(m, vc_list, list) { | ||
129 | u64 start, end; | ||
130 | start = m->offset; | ||
131 | end = m->offset + m->size - 1; | ||
132 | if (offset >= start && offset <= end) { | ||
133 | paddr = m->paddr + offset - start; | ||
134 | *m_ptr = m; | ||
135 | return paddr; | ||
136 | } | ||
137 | } | ||
138 | *m_ptr = NULL; | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | /* Read from the ELF header and then the crash dump. On error, negative value is | 126 | /* Read from the ELF header and then the crash dump. On error, negative value is |
143 | * returned otherwise number of bytes read are returned. | 127 | * returned otherwise number of bytes read are returned. |
144 | */ | 128 | */ |
@@ -147,8 +131,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
147 | { | 131 | { |
148 | ssize_t acc = 0, tmp; | 132 | ssize_t acc = 0, tmp; |
149 | size_t tsz; | 133 | size_t tsz; |
150 | u64 start, nr_bytes; | 134 | u64 start; |
151 | struct vmcore *curr_m = NULL; | 135 | struct vmcore *m = NULL; |
152 | 136 | ||
153 | if (buflen == 0 || *fpos >= vmcore_size) | 137 | if (buflen == 0 || *fpos >= vmcore_size) |
154 | return 0; | 138 | return 0; |
@@ -159,9 +143,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
159 | 143 | ||
160 | /* Read ELF core header */ | 144 | /* Read ELF core header */ |
161 | if (*fpos < elfcorebuf_sz) { | 145 | if (*fpos < elfcorebuf_sz) { |
162 | tsz = elfcorebuf_sz - *fpos; | 146 | tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen); |
163 | if (buflen < tsz) | ||
164 | tsz = buflen; | ||
165 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) | 147 | if (copy_to_user(buffer, elfcorebuf + *fpos, tsz)) |
166 | return -EFAULT; | 148 | return -EFAULT; |
167 | buflen -= tsz; | 149 | buflen -= tsz; |
@@ -174,39 +156,161 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
174 | return acc; | 156 | return acc; |
175 | } | 157 | } |
176 | 158 | ||
177 | start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m); | 159 | /* Read Elf note segment */ |
178 | if (!curr_m) | 160 | if (*fpos < elfcorebuf_sz + elfnotes_sz) { |
179 | return -EINVAL; | 161 | void *kaddr; |
180 | |||
181 | while (buflen) { | ||
182 | tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK)); | ||
183 | 162 | ||
184 | /* Calculate left bytes in current memory segment. */ | 163 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen); |
185 | nr_bytes = (curr_m->size - (start - curr_m->paddr)); | 164 | kaddr = elfnotes_buf + *fpos - elfcorebuf_sz; |
186 | if (tsz > nr_bytes) | 165 | if (copy_to_user(buffer, kaddr, tsz)) |
187 | tsz = nr_bytes; | 166 | return -EFAULT; |
188 | |||
189 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
190 | if (tmp < 0) | ||
191 | return tmp; | ||
192 | buflen -= tsz; | 167 | buflen -= tsz; |
193 | *fpos += tsz; | 168 | *fpos += tsz; |
194 | buffer += tsz; | 169 | buffer += tsz; |
195 | acc += tsz; | 170 | acc += tsz; |
196 | if (start >= (curr_m->paddr + curr_m->size)) { | 171 | |
197 | if (curr_m->list.next == &vmcore_list) | 172 | /* leave now if filled buffer already */ |
198 | return acc; /*EOF*/ | 173 | if (buflen == 0) |
199 | curr_m = list_entry(curr_m->list.next, | 174 | return acc; |
200 | struct vmcore, list); | 175 | } |
201 | start = curr_m->paddr; | 176 | |
177 | list_for_each_entry(m, &vmcore_list, list) { | ||
178 | if (*fpos < m->offset + m->size) { | ||
179 | tsz = min_t(size_t, m->offset + m->size - *fpos, buflen); | ||
180 | start = m->paddr + *fpos - m->offset; | ||
181 | tmp = read_from_oldmem(buffer, tsz, &start, 1); | ||
182 | if (tmp < 0) | ||
183 | return tmp; | ||
184 | buflen -= tsz; | ||
185 | *fpos += tsz; | ||
186 | buffer += tsz; | ||
187 | acc += tsz; | ||
188 | |||
189 | /* leave now if filled buffer already */ | ||
190 | if (buflen == 0) | ||
191 | return acc; | ||
202 | } | 192 | } |
203 | } | 193 | } |
194 | |||
204 | return acc; | 195 | return acc; |
205 | } | 196 | } |
206 | 197 | ||
198 | /** | ||
199 | * alloc_elfnotes_buf - allocate buffer for ELF note segment in | ||
200 | * vmalloc memory | ||
201 | * | ||
202 | * @notes_sz: size of buffer | ||
203 | * | ||
204 | * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap | ||
205 | * the buffer to user-space by means of remap_vmalloc_range(). | ||
206 | * | ||
207 | * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is | ||
208 | * disabled and there's no need to allow users to mmap the buffer. | ||
209 | */ | ||
210 | static inline char *alloc_elfnotes_buf(size_t notes_sz) | ||
211 | { | ||
212 | #ifdef CONFIG_MMU | ||
213 | return vmalloc_user(notes_sz); | ||
214 | #else | ||
215 | return vzalloc(notes_sz); | ||
216 | #endif | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is | ||
221 | * essential for mmap_vmcore() in order to map physically | ||
222 | * non-contiguous objects (ELF header, ELF note segment and memory | ||
223 | * regions in the 1st kernel pointed to by PT_LOAD entries) into | ||
224 | * virtually contiguous user-space in ELF layout. | ||
225 | */ | ||
226 | #if defined(CONFIG_MMU) && !defined(CONFIG_S390) | ||
227 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
228 | { | ||
229 | size_t size = vma->vm_end - vma->vm_start; | ||
230 | u64 start, end, len, tsz; | ||
231 | struct vmcore *m; | ||
232 | |||
233 | start = (u64)vma->vm_pgoff << PAGE_SHIFT; | ||
234 | end = start + size; | ||
235 | |||
236 | if (size > vmcore_size || end > vmcore_size) | ||
237 | return -EINVAL; | ||
238 | |||
239 | if (vma->vm_flags & (VM_WRITE | VM_EXEC)) | ||
240 | return -EPERM; | ||
241 | |||
242 | vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); | ||
243 | vma->vm_flags |= VM_MIXEDMAP; | ||
244 | |||
245 | len = 0; | ||
246 | |||
247 | if (start < elfcorebuf_sz) { | ||
248 | u64 pfn; | ||
249 | |||
250 | tsz = min(elfcorebuf_sz - (size_t)start, size); | ||
251 | pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT; | ||
252 | if (remap_pfn_range(vma, vma->vm_start, pfn, tsz, | ||
253 | vma->vm_page_prot)) | ||
254 | return -EAGAIN; | ||
255 | size -= tsz; | ||
256 | start += tsz; | ||
257 | len += tsz; | ||
258 | |||
259 | if (size == 0) | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | if (start < elfcorebuf_sz + elfnotes_sz) { | ||
264 | void *kaddr; | ||
265 | |||
266 | tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); | ||
267 | kaddr = elfnotes_buf + start - elfcorebuf_sz; | ||
268 | if (remap_vmalloc_range_partial(vma, vma->vm_start + len, | ||
269 | kaddr, tsz)) | ||
270 | goto fail; | ||
271 | size -= tsz; | ||
272 | start += tsz; | ||
273 | len += tsz; | ||
274 | |||
275 | if (size == 0) | ||
276 | return 0; | ||
277 | } | ||
278 | |||
279 | list_for_each_entry(m, &vmcore_list, list) { | ||
280 | if (start < m->offset + m->size) { | ||
281 | u64 paddr = 0; | ||
282 | |||
283 | tsz = min_t(size_t, m->offset + m->size - start, size); | ||
284 | paddr = m->paddr + start - m->offset; | ||
285 | if (remap_pfn_range(vma, vma->vm_start + len, | ||
286 | paddr >> PAGE_SHIFT, tsz, | ||
287 | vma->vm_page_prot)) | ||
288 | goto fail; | ||
289 | size -= tsz; | ||
290 | start += tsz; | ||
291 | len += tsz; | ||
292 | |||
293 | if (size == 0) | ||
294 | return 0; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | return 0; | ||
299 | fail: | ||
300 | do_munmap(vma->vm_mm, vma->vm_start, len); | ||
301 | return -EAGAIN; | ||
302 | } | ||
303 | #else | ||
304 | static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) | ||
305 | { | ||
306 | return -ENOSYS; | ||
307 | } | ||
308 | #endif | ||
309 | |||
207 | static const struct file_operations proc_vmcore_operations = { | 310 | static const struct file_operations proc_vmcore_operations = { |
208 | .read = read_vmcore, | 311 | .read = read_vmcore, |
209 | .llseek = default_llseek, | 312 | .llseek = default_llseek, |
313 | .mmap = mmap_vmcore, | ||
210 | }; | 314 | }; |
211 | 315 | ||
212 | static struct vmcore* __init get_new_element(void) | 316 | static struct vmcore* __init get_new_element(void) |
@@ -214,61 +318,40 @@ static struct vmcore* __init get_new_element(void) | |||
214 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); | 318 | return kzalloc(sizeof(struct vmcore), GFP_KERNEL); |
215 | } | 319 | } |
216 | 320 | ||
217 | static u64 __init get_vmcore_size_elf64(char *elfptr) | 321 | static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz, |
322 | struct list_head *vc_list) | ||
218 | { | 323 | { |
219 | int i; | ||
220 | u64 size; | ||
221 | Elf64_Ehdr *ehdr_ptr; | ||
222 | Elf64_Phdr *phdr_ptr; | ||
223 | |||
224 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
225 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
226 | size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr)); | ||
227 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
228 | size += phdr_ptr->p_memsz; | ||
229 | phdr_ptr++; | ||
230 | } | ||
231 | return size; | ||
232 | } | ||
233 | |||
234 | static u64 __init get_vmcore_size_elf32(char *elfptr) | ||
235 | { | ||
236 | int i; | ||
237 | u64 size; | 324 | u64 size; |
238 | Elf32_Ehdr *ehdr_ptr; | 325 | struct vmcore *m; |
239 | Elf32_Phdr *phdr_ptr; | ||
240 | 326 | ||
241 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 327 | size = elfsz + elfnotesegsz; |
242 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | 328 | list_for_each_entry(m, vc_list, list) { |
243 | size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr)); | 329 | size += m->size; |
244 | for (i = 0; i < ehdr_ptr->e_phnum; i++) { | ||
245 | size += phdr_ptr->p_memsz; | ||
246 | phdr_ptr++; | ||
247 | } | 330 | } |
248 | return size; | 331 | return size; |
249 | } | 332 | } |
250 | 333 | ||
251 | /* Merges all the PT_NOTE headers into one. */ | 334 | /** |
252 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | 335 | * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry |
253 | struct list_head *vc_list) | 336 | * |
337 | * @ehdr_ptr: ELF header | ||
338 | * | ||
339 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
340 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
341 | * note segment. | ||
342 | */ | ||
343 | static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) | ||
254 | { | 344 | { |
255 | int i, nr_ptnote=0, rc=0; | 345 | int i, rc=0; |
256 | char *tmp; | 346 | Elf64_Phdr *phdr_ptr; |
257 | Elf64_Ehdr *ehdr_ptr; | ||
258 | Elf64_Phdr phdr, *phdr_ptr; | ||
259 | Elf64_Nhdr *nhdr_ptr; | 347 | Elf64_Nhdr *nhdr_ptr; |
260 | u64 phdr_sz = 0, note_off; | ||
261 | 348 | ||
262 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 349 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); |
263 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); | ||
264 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 350 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
265 | int j; | ||
266 | void *notes_section; | 351 | void *notes_section; |
267 | struct vmcore *new; | ||
268 | u64 offset, max_sz, sz, real_sz = 0; | 352 | u64 offset, max_sz, sz, real_sz = 0; |
269 | if (phdr_ptr->p_type != PT_NOTE) | 353 | if (phdr_ptr->p_type != PT_NOTE) |
270 | continue; | 354 | continue; |
271 | nr_ptnote++; | ||
272 | max_sz = phdr_ptr->p_memsz; | 355 | max_sz = phdr_ptr->p_memsz; |
273 | offset = phdr_ptr->p_offset; | 356 | offset = phdr_ptr->p_offset; |
274 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 357 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
@@ -280,7 +363,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
280 | return rc; | 363 | return rc; |
281 | } | 364 | } |
282 | nhdr_ptr = notes_section; | 365 | nhdr_ptr = notes_section; |
283 | for (j = 0; j < max_sz; j += sz) { | 366 | while (real_sz < max_sz) { |
284 | if (nhdr_ptr->n_namesz == 0) | 367 | if (nhdr_ptr->n_namesz == 0) |
285 | break; | 368 | break; |
286 | sz = sizeof(Elf64_Nhdr) + | 369 | sz = sizeof(Elf64_Nhdr) + |
@@ -289,26 +372,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
289 | real_sz += sz; | 372 | real_sz += sz; |
290 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); | 373 | nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz); |
291 | } | 374 | } |
292 | |||
293 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
294 | new = get_new_element(); | ||
295 | if (!new) { | ||
296 | kfree(notes_section); | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | new->paddr = phdr_ptr->p_offset; | ||
300 | new->size = real_sz; | ||
301 | list_add_tail(&new->list, vc_list); | ||
302 | phdr_sz += real_sz; | ||
303 | kfree(notes_section); | 375 | kfree(notes_section); |
376 | phdr_ptr->p_memsz = real_sz; | ||
377 | } | ||
378 | |||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | /** | ||
383 | * get_note_number_and_size_elf64 - get the number of PT_NOTE program | ||
384 | * headers and sum of real size of their ELF note segment headers and | ||
385 | * data. | ||
386 | * | ||
387 | * @ehdr_ptr: ELF header | ||
388 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
389 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
390 | * | ||
391 | * This function is used to merge multiple PT_NOTE program headers | ||
392 | * into a unique single one. The resulting unique entry will have | ||
393 | * @sz_ptnote in its phdr->p_mem. | ||
394 | * | ||
395 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
396 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
397 | * and each of PT_NOTE program headers has actual ELF note segment | ||
398 | * size in its p_memsz member. | ||
399 | */ | ||
400 | static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr, | ||
401 | int *nr_ptnote, u64 *sz_ptnote) | ||
402 | { | ||
403 | int i; | ||
404 | Elf64_Phdr *phdr_ptr; | ||
405 | |||
406 | *nr_ptnote = *sz_ptnote = 0; | ||
407 | |||
408 | phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1); | ||
409 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
410 | if (phdr_ptr->p_type != PT_NOTE) | ||
411 | continue; | ||
412 | *nr_ptnote += 1; | ||
413 | *sz_ptnote += phdr_ptr->p_memsz; | ||
414 | } | ||
415 | |||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | /** | ||
420 | * copy_notes_elf64 - copy ELF note segments in a given buffer | ||
421 | * | ||
422 | * @ehdr_ptr: ELF header | ||
423 | * @notes_buf: buffer into which ELF note segments are copied | ||
424 | * | ||
425 | * This function is used to copy ELF note segment in the 1st kernel | ||
426 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
427 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
428 | * real ELF note segment headers and data. | ||
429 | * | ||
430 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
431 | * @ehdr_ptr has already been updated by update_note_header_size_elf64 | ||
432 | * and each of PT_NOTE program headers has actual ELF note segment | ||
433 | * size in its p_memsz member. | ||
434 | */ | ||
435 | static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf) | ||
436 | { | ||
437 | int i, rc=0; | ||
438 | Elf64_Phdr *phdr_ptr; | ||
439 | |||
440 | phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1); | ||
441 | |||
442 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
443 | u64 offset; | ||
444 | if (phdr_ptr->p_type != PT_NOTE) | ||
445 | continue; | ||
446 | offset = phdr_ptr->p_offset; | ||
447 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
448 | if (rc < 0) | ||
449 | return rc; | ||
450 | notes_buf += phdr_ptr->p_memsz; | ||
304 | } | 451 | } |
305 | 452 | ||
453 | return 0; | ||
454 | } | ||
455 | |||
456 | /* Merges all the PT_NOTE headers into one. */ | ||
457 | static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | ||
458 | char **notes_buf, size_t *notes_sz) | ||
459 | { | ||
460 | int i, nr_ptnote=0, rc=0; | ||
461 | char *tmp; | ||
462 | Elf64_Ehdr *ehdr_ptr; | ||
463 | Elf64_Phdr phdr; | ||
464 | u64 phdr_sz = 0, note_off; | ||
465 | |||
466 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | ||
467 | |||
468 | rc = update_note_header_size_elf64(ehdr_ptr); | ||
469 | if (rc < 0) | ||
470 | return rc; | ||
471 | |||
472 | rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
473 | if (rc < 0) | ||
474 | return rc; | ||
475 | |||
476 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
477 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
478 | if (!*notes_buf) | ||
479 | return -ENOMEM; | ||
480 | |||
481 | rc = copy_notes_elf64(ehdr_ptr, *notes_buf); | ||
482 | if (rc < 0) | ||
483 | return rc; | ||
484 | |||
306 | /* Prepare merged PT_NOTE program header. */ | 485 | /* Prepare merged PT_NOTE program header. */ |
307 | phdr.p_type = PT_NOTE; | 486 | phdr.p_type = PT_NOTE; |
308 | phdr.p_flags = 0; | 487 | phdr.p_flags = 0; |
309 | note_off = sizeof(Elf64_Ehdr) + | 488 | note_off = sizeof(Elf64_Ehdr) + |
310 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); | 489 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr); |
311 | phdr.p_offset = note_off; | 490 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
312 | phdr.p_vaddr = phdr.p_paddr = 0; | 491 | phdr.p_vaddr = phdr.p_paddr = 0; |
313 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 492 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
314 | phdr.p_align = 0; | 493 | phdr.p_align = 0; |
@@ -322,6 +501,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
322 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); | 501 | i = (nr_ptnote - 1) * sizeof(Elf64_Phdr); |
323 | *elfsz = *elfsz - i; | 502 | *elfsz = *elfsz - i; |
324 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); | 503 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr))); |
504 | memset(elfptr + *elfsz, 0, i); | ||
505 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
325 | 506 | ||
326 | /* Modify e_phnum to reflect merged headers. */ | 507 | /* Modify e_phnum to reflect merged headers. */ |
327 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 508 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
@@ -329,27 +510,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz, | |||
329 | return 0; | 510 | return 0; |
330 | } | 511 | } |
331 | 512 | ||
332 | /* Merges all the PT_NOTE headers into one. */ | 513 | /** |
333 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | 514 | * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry |
334 | struct list_head *vc_list) | 515 | * |
516 | * @ehdr_ptr: ELF header | ||
517 | * | ||
518 | * This function updates p_memsz member of each PT_NOTE entry in the | ||
519 | * program header table pointed to by @ehdr_ptr to real size of ELF | ||
520 | * note segment. | ||
521 | */ | ||
522 | static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) | ||
335 | { | 523 | { |
336 | int i, nr_ptnote=0, rc=0; | 524 | int i, rc=0; |
337 | char *tmp; | 525 | Elf32_Phdr *phdr_ptr; |
338 | Elf32_Ehdr *ehdr_ptr; | ||
339 | Elf32_Phdr phdr, *phdr_ptr; | ||
340 | Elf32_Nhdr *nhdr_ptr; | 526 | Elf32_Nhdr *nhdr_ptr; |
341 | u64 phdr_sz = 0, note_off; | ||
342 | 527 | ||
343 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 528 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); |
344 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); | ||
345 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 529 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
346 | int j; | ||
347 | void *notes_section; | 530 | void *notes_section; |
348 | struct vmcore *new; | ||
349 | u64 offset, max_sz, sz, real_sz = 0; | 531 | u64 offset, max_sz, sz, real_sz = 0; |
350 | if (phdr_ptr->p_type != PT_NOTE) | 532 | if (phdr_ptr->p_type != PT_NOTE) |
351 | continue; | 533 | continue; |
352 | nr_ptnote++; | ||
353 | max_sz = phdr_ptr->p_memsz; | 534 | max_sz = phdr_ptr->p_memsz; |
354 | offset = phdr_ptr->p_offset; | 535 | offset = phdr_ptr->p_offset; |
355 | notes_section = kmalloc(max_sz, GFP_KERNEL); | 536 | notes_section = kmalloc(max_sz, GFP_KERNEL); |
@@ -361,7 +542,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
361 | return rc; | 542 | return rc; |
362 | } | 543 | } |
363 | nhdr_ptr = notes_section; | 544 | nhdr_ptr = notes_section; |
364 | for (j = 0; j < max_sz; j += sz) { | 545 | while (real_sz < max_sz) { |
365 | if (nhdr_ptr->n_namesz == 0) | 546 | if (nhdr_ptr->n_namesz == 0) |
366 | break; | 547 | break; |
367 | sz = sizeof(Elf32_Nhdr) + | 548 | sz = sizeof(Elf32_Nhdr) + |
@@ -370,26 +551,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
370 | real_sz += sz; | 551 | real_sz += sz; |
371 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); | 552 | nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz); |
372 | } | 553 | } |
373 | |||
374 | /* Add this contiguous chunk of notes section to vmcore list.*/ | ||
375 | new = get_new_element(); | ||
376 | if (!new) { | ||
377 | kfree(notes_section); | ||
378 | return -ENOMEM; | ||
379 | } | ||
380 | new->paddr = phdr_ptr->p_offset; | ||
381 | new->size = real_sz; | ||
382 | list_add_tail(&new->list, vc_list); | ||
383 | phdr_sz += real_sz; | ||
384 | kfree(notes_section); | 554 | kfree(notes_section); |
555 | phdr_ptr->p_memsz = real_sz; | ||
556 | } | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * get_note_number_and_size_elf32 - get the number of PT_NOTE program | ||
563 | * headers and sum of real size of their ELF note segment headers and | ||
564 | * data. | ||
565 | * | ||
566 | * @ehdr_ptr: ELF header | ||
567 | * @nr_ptnote: buffer for the number of PT_NOTE program headers | ||
568 | * @sz_ptnote: buffer for size of unique PT_NOTE program header | ||
569 | * | ||
570 | * This function is used to merge multiple PT_NOTE program headers | ||
571 | * into a unique single one. The resulting unique entry will have | ||
572 | * @sz_ptnote in its phdr->p_mem. | ||
573 | * | ||
574 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
575 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
576 | * and each of PT_NOTE program headers has actual ELF note segment | ||
577 | * size in its p_memsz member. | ||
578 | */ | ||
579 | static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr, | ||
580 | int *nr_ptnote, u64 *sz_ptnote) | ||
581 | { | ||
582 | int i; | ||
583 | Elf32_Phdr *phdr_ptr; | ||
584 | |||
585 | *nr_ptnote = *sz_ptnote = 0; | ||
586 | |||
587 | phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1); | ||
588 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
589 | if (phdr_ptr->p_type != PT_NOTE) | ||
590 | continue; | ||
591 | *nr_ptnote += 1; | ||
592 | *sz_ptnote += phdr_ptr->p_memsz; | ||
593 | } | ||
594 | |||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | /** | ||
599 | * copy_notes_elf32 - copy ELF note segments in a given buffer | ||
600 | * | ||
601 | * @ehdr_ptr: ELF header | ||
602 | * @notes_buf: buffer into which ELF note segments are copied | ||
603 | * | ||
604 | * This function is used to copy ELF note segment in the 1st kernel | ||
605 | * into the buffer @notes_buf in the 2nd kernel. It is assumed that | ||
606 | * size of the buffer @notes_buf is equal to or larger than sum of the | ||
607 | * real ELF note segment headers and data. | ||
608 | * | ||
609 | * It is assumed that program headers with PT_NOTE type pointed to by | ||
610 | * @ehdr_ptr has already been updated by update_note_header_size_elf32 | ||
611 | * and each of PT_NOTE program headers has actual ELF note segment | ||
612 | * size in its p_memsz member. | ||
613 | */ | ||
614 | static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf) | ||
615 | { | ||
616 | int i, rc=0; | ||
617 | Elf32_Phdr *phdr_ptr; | ||
618 | |||
619 | phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1); | ||
620 | |||
621 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | ||
622 | u64 offset; | ||
623 | if (phdr_ptr->p_type != PT_NOTE) | ||
624 | continue; | ||
625 | offset = phdr_ptr->p_offset; | ||
626 | rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0); | ||
627 | if (rc < 0) | ||
628 | return rc; | ||
629 | notes_buf += phdr_ptr->p_memsz; | ||
385 | } | 630 | } |
386 | 631 | ||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | /* Merges all the PT_NOTE headers into one. */ | ||
636 | static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | ||
637 | char **notes_buf, size_t *notes_sz) | ||
638 | { | ||
639 | int i, nr_ptnote=0, rc=0; | ||
640 | char *tmp; | ||
641 | Elf32_Ehdr *ehdr_ptr; | ||
642 | Elf32_Phdr phdr; | ||
643 | u64 phdr_sz = 0, note_off; | ||
644 | |||
645 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
646 | |||
647 | rc = update_note_header_size_elf32(ehdr_ptr); | ||
648 | if (rc < 0) | ||
649 | return rc; | ||
650 | |||
651 | rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz); | ||
652 | if (rc < 0) | ||
653 | return rc; | ||
654 | |||
655 | *notes_sz = roundup(phdr_sz, PAGE_SIZE); | ||
656 | *notes_buf = alloc_elfnotes_buf(*notes_sz); | ||
657 | if (!*notes_buf) | ||
658 | return -ENOMEM; | ||
659 | |||
660 | rc = copy_notes_elf32(ehdr_ptr, *notes_buf); | ||
661 | if (rc < 0) | ||
662 | return rc; | ||
663 | |||
387 | /* Prepare merged PT_NOTE program header. */ | 664 | /* Prepare merged PT_NOTE program header. */ |
388 | phdr.p_type = PT_NOTE; | 665 | phdr.p_type = PT_NOTE; |
389 | phdr.p_flags = 0; | 666 | phdr.p_flags = 0; |
390 | note_off = sizeof(Elf32_Ehdr) + | 667 | note_off = sizeof(Elf32_Ehdr) + |
391 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); | 668 | (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr); |
392 | phdr.p_offset = note_off; | 669 | phdr.p_offset = roundup(note_off, PAGE_SIZE); |
393 | phdr.p_vaddr = phdr.p_paddr = 0; | 670 | phdr.p_vaddr = phdr.p_paddr = 0; |
394 | phdr.p_filesz = phdr.p_memsz = phdr_sz; | 671 | phdr.p_filesz = phdr.p_memsz = phdr_sz; |
395 | phdr.p_align = 0; | 672 | phdr.p_align = 0; |
@@ -403,6 +680,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
403 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); | 680 | i = (nr_ptnote - 1) * sizeof(Elf32_Phdr); |
404 | *elfsz = *elfsz - i; | 681 | *elfsz = *elfsz - i; |
405 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); | 682 | memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr))); |
683 | memset(elfptr + *elfsz, 0, i); | ||
684 | *elfsz = roundup(*elfsz, PAGE_SIZE); | ||
406 | 685 | ||
407 | /* Modify e_phnum to reflect merged headers. */ | 686 | /* Modify e_phnum to reflect merged headers. */ |
408 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; | 687 | ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1; |
@@ -414,6 +693,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz, | |||
414 | * the new offset fields of exported program headers. */ | 693 | * the new offset fields of exported program headers. */ |
415 | static int __init process_ptload_program_headers_elf64(char *elfptr, | 694 | static int __init process_ptload_program_headers_elf64(char *elfptr, |
416 | size_t elfsz, | 695 | size_t elfsz, |
696 | size_t elfnotes_sz, | ||
417 | struct list_head *vc_list) | 697 | struct list_head *vc_list) |
418 | { | 698 | { |
419 | int i; | 699 | int i; |
@@ -425,32 +705,38 @@ static int __init process_ptload_program_headers_elf64(char *elfptr, | |||
425 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 705 | ehdr_ptr = (Elf64_Ehdr *)elfptr; |
426 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ | 706 | phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */ |
427 | 707 | ||
428 | /* First program header is PT_NOTE header. */ | 708 | /* Skip Elf header, program headers and Elf note segment. */ |
429 | vmcore_off = sizeof(Elf64_Ehdr) + | 709 | vmcore_off = elfsz + elfnotes_sz; |
430 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) + | ||
431 | phdr_ptr->p_memsz; /* Note sections */ | ||
432 | 710 | ||
433 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 711 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
712 | u64 paddr, start, end, size; | ||
713 | |||
434 | if (phdr_ptr->p_type != PT_LOAD) | 714 | if (phdr_ptr->p_type != PT_LOAD) |
435 | continue; | 715 | continue; |
436 | 716 | ||
717 | paddr = phdr_ptr->p_offset; | ||
718 | start = rounddown(paddr, PAGE_SIZE); | ||
719 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
720 | size = end - start; | ||
721 | |||
437 | /* Add this contiguous chunk of memory to vmcore list.*/ | 722 | /* Add this contiguous chunk of memory to vmcore list.*/ |
438 | new = get_new_element(); | 723 | new = get_new_element(); |
439 | if (!new) | 724 | if (!new) |
440 | return -ENOMEM; | 725 | return -ENOMEM; |
441 | new->paddr = phdr_ptr->p_offset; | 726 | new->paddr = start; |
442 | new->size = phdr_ptr->p_memsz; | 727 | new->size = size; |
443 | list_add_tail(&new->list, vc_list); | 728 | list_add_tail(&new->list, vc_list); |
444 | 729 | ||
445 | /* Update the program header offset. */ | 730 | /* Update the program header offset. */ |
446 | phdr_ptr->p_offset = vmcore_off; | 731 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
447 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 732 | vmcore_off = vmcore_off + size; |
448 | } | 733 | } |
449 | return 0; | 734 | return 0; |
450 | } | 735 | } |
451 | 736 | ||
452 | static int __init process_ptload_program_headers_elf32(char *elfptr, | 737 | static int __init process_ptload_program_headers_elf32(char *elfptr, |
453 | size_t elfsz, | 738 | size_t elfsz, |
739 | size_t elfnotes_sz, | ||
454 | struct list_head *vc_list) | 740 | struct list_head *vc_list) |
455 | { | 741 | { |
456 | int i; | 742 | int i; |
@@ -462,43 +748,44 @@ static int __init process_ptload_program_headers_elf32(char *elfptr, | |||
462 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | 748 | ehdr_ptr = (Elf32_Ehdr *)elfptr; |
463 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ | 749 | phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */ |
464 | 750 | ||
465 | /* First program header is PT_NOTE header. */ | 751 | /* Skip Elf header, program headers and Elf note segment. */ |
466 | vmcore_off = sizeof(Elf32_Ehdr) + | 752 | vmcore_off = elfsz + elfnotes_sz; |
467 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) + | ||
468 | phdr_ptr->p_memsz; /* Note sections */ | ||
469 | 753 | ||
470 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { | 754 | for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) { |
755 | u64 paddr, start, end, size; | ||
756 | |||
471 | if (phdr_ptr->p_type != PT_LOAD) | 757 | if (phdr_ptr->p_type != PT_LOAD) |
472 | continue; | 758 | continue; |
473 | 759 | ||
760 | paddr = phdr_ptr->p_offset; | ||
761 | start = rounddown(paddr, PAGE_SIZE); | ||
762 | end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE); | ||
763 | size = end - start; | ||
764 | |||
474 | /* Add this contiguous chunk of memory to vmcore list.*/ | 765 | /* Add this contiguous chunk of memory to vmcore list.*/ |
475 | new = get_new_element(); | 766 | new = get_new_element(); |
476 | if (!new) | 767 | if (!new) |
477 | return -ENOMEM; | 768 | return -ENOMEM; |
478 | new->paddr = phdr_ptr->p_offset; | 769 | new->paddr = start; |
479 | new->size = phdr_ptr->p_memsz; | 770 | new->size = size; |
480 | list_add_tail(&new->list, vc_list); | 771 | list_add_tail(&new->list, vc_list); |
481 | 772 | ||
482 | /* Update the program header offset */ | 773 | /* Update the program header offset */ |
483 | phdr_ptr->p_offset = vmcore_off; | 774 | phdr_ptr->p_offset = vmcore_off + (paddr - start); |
484 | vmcore_off = vmcore_off + phdr_ptr->p_memsz; | 775 | vmcore_off = vmcore_off + size; |
485 | } | 776 | } |
486 | return 0; | 777 | return 0; |
487 | } | 778 | } |
488 | 779 | ||
489 | /* Sets offset fields of vmcore elements. */ | 780 | /* Sets offset fields of vmcore elements. */ |
490 | static void __init set_vmcore_list_offsets_elf64(char *elfptr, | 781 | static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz, |
491 | struct list_head *vc_list) | 782 | struct list_head *vc_list) |
492 | { | 783 | { |
493 | loff_t vmcore_off; | 784 | loff_t vmcore_off; |
494 | Elf64_Ehdr *ehdr_ptr; | ||
495 | struct vmcore *m; | 785 | struct vmcore *m; |
496 | 786 | ||
497 | ehdr_ptr = (Elf64_Ehdr *)elfptr; | 787 | /* Skip Elf header, program headers and Elf note segment. */ |
498 | 788 | vmcore_off = elfsz + elfnotes_sz; | |
499 | /* Skip Elf header and program headers. */ | ||
500 | vmcore_off = sizeof(Elf64_Ehdr) + | ||
501 | (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr); | ||
502 | 789 | ||
503 | list_for_each_entry(m, vc_list, list) { | 790 | list_for_each_entry(m, vc_list, list) { |
504 | m->offset = vmcore_off; | 791 | m->offset = vmcore_off; |
@@ -506,24 +793,12 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr, | |||
506 | } | 793 | } |
507 | } | 794 | } |
508 | 795 | ||
509 | /* Sets offset fields of vmcore elements. */ | 796 | static void free_elfcorebuf(void) |
510 | static void __init set_vmcore_list_offsets_elf32(char *elfptr, | ||
511 | struct list_head *vc_list) | ||
512 | { | 797 | { |
513 | loff_t vmcore_off; | 798 | free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig)); |
514 | Elf32_Ehdr *ehdr_ptr; | 799 | elfcorebuf = NULL; |
515 | struct vmcore *m; | 800 | vfree(elfnotes_buf); |
516 | 801 | elfnotes_buf = NULL; | |
517 | ehdr_ptr = (Elf32_Ehdr *)elfptr; | ||
518 | |||
519 | /* Skip Elf header and program headers. */ | ||
520 | vmcore_off = sizeof(Elf32_Ehdr) + | ||
521 | (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr); | ||
522 | |||
523 | list_for_each_entry(m, vc_list, list) { | ||
524 | m->offset = vmcore_off; | ||
525 | vmcore_off += m->size; | ||
526 | } | ||
527 | } | 802 | } |
528 | 803 | ||
529 | static int __init parse_crash_elf64_headers(void) | 804 | static int __init parse_crash_elf64_headers(void) |
@@ -554,31 +829,32 @@ static int __init parse_crash_elf64_headers(void) | |||
554 | } | 829 | } |
555 | 830 | ||
556 | /* Read in all elf headers. */ | 831 | /* Read in all elf headers. */ |
557 | elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr); | 832 | elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) + |
558 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 833 | ehdr.e_phnum * sizeof(Elf64_Phdr); |
834 | elfcorebuf_sz = elfcorebuf_sz_orig; | ||
835 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
836 | get_order(elfcorebuf_sz_orig)); | ||
559 | if (!elfcorebuf) | 837 | if (!elfcorebuf) |
560 | return -ENOMEM; | 838 | return -ENOMEM; |
561 | addr = elfcorehdr_addr; | 839 | addr = elfcorehdr_addr; |
562 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 840 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
563 | if (rc < 0) { | 841 | if (rc < 0) |
564 | kfree(elfcorebuf); | 842 | goto fail; |
565 | return rc; | ||
566 | } | ||
567 | 843 | ||
568 | /* Merge all PT_NOTE headers into one. */ | 844 | /* Merge all PT_NOTE headers into one. */ |
569 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 845 | rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, |
570 | if (rc) { | 846 | &elfnotes_buf, &elfnotes_sz); |
571 | kfree(elfcorebuf); | 847 | if (rc) |
572 | return rc; | 848 | goto fail; |
573 | } | ||
574 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, | 849 | rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz, |
575 | &vmcore_list); | 850 | elfnotes_sz, &vmcore_list); |
576 | if (rc) { | 851 | if (rc) |
577 | kfree(elfcorebuf); | 852 | goto fail; |
578 | return rc; | 853 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
579 | } | ||
580 | set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list); | ||
581 | return 0; | 854 | return 0; |
855 | fail: | ||
856 | free_elfcorebuf(); | ||
857 | return rc; | ||
582 | } | 858 | } |
583 | 859 | ||
584 | static int __init parse_crash_elf32_headers(void) | 860 | static int __init parse_crash_elf32_headers(void) |
@@ -609,31 +885,31 @@ static int __init parse_crash_elf32_headers(void) | |||
609 | } | 885 | } |
610 | 886 | ||
611 | /* Read in all elf headers. */ | 887 | /* Read in all elf headers. */ |
612 | elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); | 888 | elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr); |
613 | elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL); | 889 | elfcorebuf_sz = elfcorebuf_sz_orig; |
890 | elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
891 | get_order(elfcorebuf_sz_orig)); | ||
614 | if (!elfcorebuf) | 892 | if (!elfcorebuf) |
615 | return -ENOMEM; | 893 | return -ENOMEM; |
616 | addr = elfcorehdr_addr; | 894 | addr = elfcorehdr_addr; |
617 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0); | 895 | rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0); |
618 | if (rc < 0) { | 896 | if (rc < 0) |
619 | kfree(elfcorebuf); | 897 | goto fail; |
620 | return rc; | ||
621 | } | ||
622 | 898 | ||
623 | /* Merge all PT_NOTE headers into one. */ | 899 | /* Merge all PT_NOTE headers into one. */ |
624 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list); | 900 | rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, |
625 | if (rc) { | 901 | &elfnotes_buf, &elfnotes_sz); |
626 | kfree(elfcorebuf); | 902 | if (rc) |
627 | return rc; | 903 | goto fail; |
628 | } | ||
629 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, | 904 | rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz, |
630 | &vmcore_list); | 905 | elfnotes_sz, &vmcore_list); |
631 | if (rc) { | 906 | if (rc) |
632 | kfree(elfcorebuf); | 907 | goto fail; |
633 | return rc; | 908 | set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list); |
634 | } | ||
635 | set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list); | ||
636 | return 0; | 909 | return 0; |
910 | fail: | ||
911 | free_elfcorebuf(); | ||
912 | return rc; | ||
637 | } | 913 | } |
638 | 914 | ||
639 | static int __init parse_crash_elf_headers(void) | 915 | static int __init parse_crash_elf_headers(void) |
@@ -655,20 +931,19 @@ static int __init parse_crash_elf_headers(void) | |||
655 | rc = parse_crash_elf64_headers(); | 931 | rc = parse_crash_elf64_headers(); |
656 | if (rc) | 932 | if (rc) |
657 | return rc; | 933 | return rc; |
658 | |||
659 | /* Determine vmcore size. */ | ||
660 | vmcore_size = get_vmcore_size_elf64(elfcorebuf); | ||
661 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { | 934 | } else if (e_ident[EI_CLASS] == ELFCLASS32) { |
662 | rc = parse_crash_elf32_headers(); | 935 | rc = parse_crash_elf32_headers(); |
663 | if (rc) | 936 | if (rc) |
664 | return rc; | 937 | return rc; |
665 | |||
666 | /* Determine vmcore size. */ | ||
667 | vmcore_size = get_vmcore_size_elf32(elfcorebuf); | ||
668 | } else { | 938 | } else { |
669 | pr_warn("Warning: Core image elf header is not sane\n"); | 939 | pr_warn("Warning: Core image elf header is not sane\n"); |
670 | return -EINVAL; | 940 | return -EINVAL; |
671 | } | 941 | } |
942 | |||
943 | /* Determine vmcore size. */ | ||
944 | vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz, | ||
945 | &vmcore_list); | ||
946 | |||
672 | return 0; | 947 | return 0; |
673 | } | 948 | } |
674 | 949 | ||
@@ -711,7 +986,6 @@ void vmcore_cleanup(void) | |||
711 | list_del(&m->list); | 986 | list_del(&m->list); |
712 | kfree(m); | 987 | kfree(m); |
713 | } | 988 | } |
714 | kfree(elfcorebuf); | 989 | free_elfcorebuf(); |
715 | elfcorebuf = NULL; | ||
716 | } | 990 | } |
717 | EXPORT_SYMBOL_GPL(vmcore_cleanup); | 991 | EXPORT_SYMBOL_GPL(vmcore_cleanup); |