diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt | 63 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/powerpc/fsl/mpic.txt | 22 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt | 6 | ||||
-rw-r--r-- | Documentation/filesystems/debugfs.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 6 | ||||
-rw-r--r-- | Documentation/filesystems/qnx6.txt | 174 | ||||
-rw-r--r-- | Documentation/ioctl/ioctl-number.txt | 1 | ||||
-rw-r--r-- | Documentation/networking/dns_resolver.txt | 4 | ||||
-rw-r--r-- | Documentation/powerpc/firmware-assisted-dump.txt | 270 | ||||
-rw-r--r-- | Documentation/powerpc/mpc52xx.txt | 12 | ||||
-rw-r--r-- | Documentation/powerpc/phyp-assisted-dump.txt | 127 | ||||
-rw-r--r-- | Documentation/security/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/security/Yama.txt | 65 | ||||
-rw-r--r-- | Documentation/security/keys.txt | 4 |
14 files changed, 621 insertions, 137 deletions
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt new file mode 100644 index 000000000000..bc8ded641ab6 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt | |||
@@ -0,0 +1,63 @@ | |||
1 | * FSL MPIC Message Registers | ||
2 | |||
3 | This binding specifies what properties must be available in the device tree | ||
4 | representation of the message register blocks found in some FSL MPIC | ||
5 | implementations. | ||
6 | |||
7 | Required properties: | ||
8 | |||
9 | - compatible: Specifies the compatibility list for the message register | ||
10 | block. The type shall be <string-list> and the value shall be of the form | ||
11 | "fsl,mpic-v<version>-msgr", where <version> is the version number of | ||
12 | the MPIC containing the message registers. | ||
13 | |||
14 | - reg: Specifies the base physical address(s) and size(s) of the | ||
15 | message register block's addressable register space. The type shall be | ||
16 | <prop-encoded-array>. | ||
17 | |||
18 | - interrupts: Specifies a list of interrupt-specifiers which are available | ||
19 | for receiving interrupts. Interrupt-specifier consists of two cells: first | ||
20 | cell is interrupt-number and second cell is level-sense. The type shall be | ||
21 | <prop-encoded-array>. | ||
22 | |||
23 | Optional properties: | ||
24 | |||
25 | - mpic-msgr-receive-mask: Specifies what registers in the containing block | ||
26 | are allowed to receive interrupts. The value is a bit mask where a set | ||
27 | bit at bit 'n' indicates that message register 'n' can receive interrupts. | ||
28 | Note that "bit 'n'" is numbered from LSB for PPC hardware. The type shall | ||
29 | be <u32>. If not present, then all of the message registers in the block | ||
30 | are available. | ||
31 | |||
32 | Aliases: | ||
33 | |||
34 | An alias should be created for every message register block. They are not | ||
35 | required, though. However, a particular implementation of this binding | ||
36 | may require aliases to be present. Aliases are of the form | ||
37 | 'mpic-msgr-block<n>', where <n> is an integer specifying the block's number. | ||
38 | Numbers shall start at 0. | ||
39 | |||
40 | Example: | ||
41 | |||
42 | aliases { | ||
43 | mpic-msgr-block0 = &mpic_msgr_block0; | ||
44 | mpic-msgr-block1 = &mpic_msgr_block1; | ||
45 | }; | ||
46 | |||
47 | mpic_msgr_block0: mpic-msgr-block@41400 { | ||
48 | compatible = "fsl,mpic-v3.1-msgr"; | ||
49 | reg = <0x41400 0x200>; | ||
50 | // Message registers 0 and 2 in this block can receive interrupts on | ||
51 | // sources 0xb0 and 0xb2, respectively. | ||
52 | interrupts = <0xb0 2 0xb2 2>; | ||
53 | mpic-msgr-receive-mask = <0x5>; | ||
54 | }; | ||
55 | |||
56 | mpic_msgr_block1: mpic-msgr-block@42400 { | ||
57 | compatible = "fsl,mpic-v3.1-msgr"; | ||
58 | reg = <0x42400 0x200>; | ||
59 | // Message registers 0 and 2 in this block can receive interrupts on | ||
60 | // sources 0xb4 and 0xb6, respectively. | ||
61 | interrupts = <0xb4 2 0xb6 2>; | ||
62 | mpic-msgr-receive-mask = <0x5>; | ||
63 | }; | ||
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt index 2cf38bd841fd..dc5744636a57 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt | |||
@@ -56,7 +56,27 @@ PROPERTIES | |||
56 | to the client. The presence of this property also mandates | 56 | to the client. The presence of this property also mandates |
57 | that any initialization related to interrupt sources shall | 57 | that any initialization related to interrupt sources shall |
58 | be limited to sources explicitly referenced in the device tree. | 58 | be limited to sources explicitly referenced in the device tree. |
59 | 59 | ||
60 | - big-endian | ||
61 | Usage: optional | ||
62 | Value type: <empty> | ||
63 | If present the MPIC will be assumed to be big-endian. Some | ||
64 | device-trees omit this property on MPIC nodes even when the MPIC is | ||
65 | in fact big-endian, so certain boards override this property. | ||
66 | |||
67 | - single-cpu-affinity | ||
68 | Usage: optional | ||
69 | Value type: <empty> | ||
70 | If present the MPIC will be assumed to only be able to route | ||
71 | non-IPI interrupts to a single CPU at a time (EG: Freescale MPIC). | ||
72 | |||
73 | - last-interrupt-source | ||
74 | Usage: optional | ||
75 | Value type: <u32> | ||
76 | Some MPICs do not correctly report the number of hardware sources | ||
77 | in the global feature registers. If specified, this field will | ||
78 | override the value read from MPIC_GREG_FEATURE_LAST_SRC. | ||
79 | |||
60 | INTERRUPT SPECIFIER DEFINITION | 80 | INTERRUPT SPECIFIER DEFINITION |
61 | 81 | ||
62 | Interrupt specifiers consists of 4 cells encoded as | 82 | Interrupt specifiers consists of 4 cells encoded as |
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt b/Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt index 5d586e1ccaf5..5693877ab377 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt | |||
@@ -6,8 +6,10 @@ Required properties: | |||
6 | etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on | 6 | etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" depending on |
7 | the parent type. | 7 | the parent type. |
8 | 8 | ||
9 | - reg : should contain the address and the length of the shared message | 9 | - reg : It may contain one or two regions. The first region should contain |
10 | interrupt register set. | 10 | the address and the length of the shared message interrupt register set. |
11 | The second region should contain the address of aliased MSIIR register for | ||
12 | platforms that have such an alias. | ||
11 | 13 | ||
12 | - msi-available-ranges: use <start count> style section to define which | 14 | - msi-available-ranges: use <start count> style section to define which |
13 | msi interrupt can be used in the 256 msi interrupts. This property is | 15 | msi interrupt can be used in the 256 msi interrupts. This property is |
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt index 4e2575873187..7a34f827989c 100644 --- a/Documentation/filesystems/debugfs.txt +++ b/Documentation/filesystems/debugfs.txt | |||
@@ -136,7 +136,7 @@ file. | |||
136 | void __iomem *base; | 136 | void __iomem *base; |
137 | }; | 137 | }; |
138 | 138 | ||
139 | struct dentry *debugfs_create_regset32(const char *name, mode_t mode, | 139 | struct dentry *debugfs_create_regset32(const char *name, umode_t mode, |
140 | struct dentry *parent, | 140 | struct dentry *parent, |
141 | struct debugfs_regset32 *regset); | 141 | struct debugfs_regset32 *regset); |
142 | 142 | ||
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index b4a3d765ff9a..74acd9618819 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting | |||
@@ -429,3 +429,9 @@ filemap_write_and_wait_range() so that all dirty pages are synced out properly. | |||
429 | You must also keep in mind that ->fsync() is not called with i_mutex held | 429 | You must also keep in mind that ->fsync() is not called with i_mutex held |
430 | anymore, so if you require i_mutex locking you must make sure to take it and | 430 | anymore, so if you require i_mutex locking you must make sure to take it and |
431 | release it yourself. | 431 | release it yourself. |
432 | |||
433 | -- | ||
434 | [mandatory] | ||
435 | d_alloc_root() is gone, along with a lot of bugs caused by code | ||
436 | misusing it. Replacement: d_make_root(inode). The difference is, | ||
437 | d_make_root() drops the reference to inode if dentry allocation fails. | ||
diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.txt new file mode 100644 index 000000000000..050223ea03c7 --- /dev/null +++ b/Documentation/filesystems/qnx6.txt | |||
@@ -0,0 +1,174 @@ | |||
1 | The QNX6 Filesystem | ||
2 | =================== | ||
3 | |||
4 | The qnx6fs is used by newer QNX operating system versions. (e.g. Neutrino) | ||
5 | It got introduced in QNX 6.4.0 and is used default since 6.4.1. | ||
6 | |||
7 | Option | ||
8 | ====== | ||
9 | |||
10 | mmi_fs Mount filesystem as used for example by Audi MMI 3G system | ||
11 | |||
12 | Specification | ||
13 | ============= | ||
14 | |||
15 | qnx6fs shares many properties with traditional Unix filesystems. It has the | ||
16 | concepts of blocks, inodes and directories. | ||
17 | On QNX it is possible to create little endian and big endian qnx6 filesystems. | ||
18 | This feature makes it possible to create and use a different endianness fs | ||
19 | for the target (QNX is used on quite a range of embedded systems) plattform | ||
20 | running on a different endianess. | ||
21 | The Linux driver handles endianness transparently. (LE and BE) | ||
22 | |||
23 | Blocks | ||
24 | ------ | ||
25 | |||
26 | The space in the device or file is split up into blocks. These are a fixed | ||
27 | size of 512, 1024, 2048 or 4096, which is decided when the filesystem is | ||
28 | created. | ||
29 | Blockpointers are 32bit, so the maximum space that can be adressed is | ||
30 | 2^32 * 4096 bytes or 16TB | ||
31 | |||
32 | The superblocks | ||
33 | --------------- | ||
34 | |||
35 | The superblock contains all global information about the filesystem. | ||
36 | Each qnx6fs got two superblocks, each one having a 64bit serial number. | ||
37 | That serial number is used to identify the "active" superblock. | ||
38 | In write mode with reach new snapshot (after each synchronous write), the | ||
39 | serial of the new master superblock is increased (old superblock serial + 1) | ||
40 | |||
41 | So basically the snapshot functionality is realized by an atomic final | ||
42 | update of the serial number. Before updating that serial, all modifications | ||
43 | are done by copying all modified blocks during that specific write request | ||
44 | (or period) and building up a new (stable) filesystem structure under the | ||
45 | inactive superblock. | ||
46 | |||
47 | Each superblock holds a set of root inodes for the different filesystem | ||
48 | parts. (Inode, Bitmap and Longfilenames) | ||
49 | Each of these root nodes holds information like total size of the stored | ||
50 | data and the adressing levels in that specific tree. | ||
51 | If the level value is 0, up to 16 direct blocks can be adressed by each | ||
52 | node. | ||
53 | Level 1 adds an additional indirect adressing level where each indirect | ||
54 | adressing block holds up to blocksize / 4 bytes pointers to data blocks. | ||
55 | Level 2 adds an additional indirect adressig block level (so, already up | ||
56 | to 16 * 256 * 256 = 1048576 blocks that can be adressed by such a tree)a | ||
57 | |||
58 | Unused block pointers are always set to ~0 - regardless of root node, | ||
59 | indirect adressing blocks or inodes. | ||
60 | Data leaves are always on the lowest level. So no data is stored on upper | ||
61 | tree levels. | ||
62 | |||
63 | The first Superblock is located at 0x2000. (0x2000 is the bootblock size) | ||
64 | The Audi MMI 3G first superblock directly starts at byte 0. | ||
65 | Second superblock position can either be calculated from the superblock | ||
66 | information (total number of filesystem blocks) or by taking the highest | ||
67 | device address, zeroing the last 3 bytes and then substracting 0x1000 from | ||
68 | that address. | ||
69 | |||
70 | 0x1000 is the size reserved for each superblock - regardless of the | ||
71 | blocksize of the filesystem. | ||
72 | |||
73 | Inodes | ||
74 | ------ | ||
75 | |||
76 | Each object in the filesystem is represented by an inode. (index node) | ||
77 | The inode structure contains pointers to the filesystem blocks which contain | ||
78 | the data held in the object and all of the metadata about an object except | ||
79 | its longname. (filenames longer than 27 characters) | ||
80 | The metadata about an object includes the permissions, owner, group, flags, | ||
81 | size, number of blocks used, access time, change time and modification time. | ||
82 | |||
83 | Object mode field is POSIX format. (which makes things easier) | ||
84 | |||
85 | There are also pointers to the first 16 blocks, if the object data can be | ||
86 | adressed with 16 direct blocks. | ||
87 | For more than 16 blocks an indirect adressing in form of another tree is | ||
88 | used. (scheme is the same as the one used for the superblock root nodes) | ||
89 | |||
90 | The filesize is stored 64bit. Inode counting starts with 1. (whilst long | ||
91 | filename inodes start with 0) | ||
92 | |||
93 | Directories | ||
94 | ----------- | ||
95 | |||
96 | A directory is a filesystem object and has an inode just like a file. | ||
97 | It is a specially formatted file containing records which associate each | ||
98 | name with an inode number. | ||
99 | '.' inode number points to the directory inode | ||
100 | '..' inode number points to the parent directory inode | ||
101 | Eeach filename record additionally got a filename length field. | ||
102 | |||
103 | One special case are long filenames or subdirectory names. | ||
104 | These got set a filename length field of 0xff in the corresponding directory | ||
105 | record plus the longfile inode number also stored in that record. | ||
106 | With that longfilename inode number, the longfilename tree can be walked | ||
107 | starting with the superblock longfilename root node pointers. | ||
108 | |||
109 | Special files | ||
110 | ------------- | ||
111 | |||
112 | Symbolic links are also filesystem objects with inodes. They got a specific | ||
113 | bit in the inode mode field identifying them as symbolic link. | ||
114 | The directory entry file inode pointer points to the target file inode. | ||
115 | |||
116 | Hard links got an inode, a directory entry, but a specific mode bit set, | ||
117 | no block pointers and the directory file record pointing to the target file | ||
118 | inode. | ||
119 | |||
120 | Character and block special devices do not exist in QNX as those files | ||
121 | are handled by the QNX kernel/drivers and created in /dev independant of the | ||
122 | underlaying filesystem. | ||
123 | |||
124 | Long filenames | ||
125 | -------------- | ||
126 | |||
127 | Long filenames are stored in a seperate adressing tree. The staring point | ||
128 | is the longfilename root node in the active superblock. | ||
129 | Each data block (tree leaves) holds one long filename. That filename is | ||
130 | limited to 510 bytes. The first two starting bytes are used as length field | ||
131 | for the actual filename. | ||
132 | If that structure shall fit for all allowed blocksizes, it is clear why there | ||
133 | is a limit of 510 bytes for the actual filename stored. | ||
134 | |||
135 | Bitmap | ||
136 | ------ | ||
137 | |||
138 | The qnx6fs filesystem allocation bitmap is stored in a tree under bitmap | ||
139 | root node in the superblock and each bit in the bitmap represents one | ||
140 | filesystem block. | ||
141 | The first block is block 0, which starts 0x1000 after superblock start. | ||
142 | So for a normal qnx6fs 0x3000 (bootblock + superblock) is the physical | ||
143 | address at which block 0 is located. | ||
144 | |||
145 | Bits at the end of the last bitmap block are set to 1, if the device is | ||
146 | smaller than addressing space in the bitmap. | ||
147 | |||
148 | Bitmap system area | ||
149 | ------------------ | ||
150 | |||
151 | The bitmap itself is devided into three parts. | ||
152 | First the system area, that is split into two halfs. | ||
153 | Then userspace. | ||
154 | |||
155 | The requirement for a static, fixed preallocated system area comes from how | ||
156 | qnx6fs deals with writes. | ||
157 | Each superblock got it's own half of the system area. So superblock #1 | ||
158 | always uses blocks from the lower half whilst superblock #2 just writes to | ||
159 | blocks represented by the upper half bitmap system area bits. | ||
160 | |||
161 | Bitmap blocks, Inode blocks and indirect addressing blocks for those two | ||
162 | tree structures are treated as system blocks. | ||
163 | |||
164 | The rational behind that is that a write request can work on a new snapshot | ||
165 | (system area of the inactive - resp. lower serial numbered superblock) while | ||
166 | at the same time there is still a complete stable filesystem structer in the | ||
167 | other half of the system area. | ||
168 | |||
169 | When finished with writing (a sync write is completed, the maximum sync leap | ||
170 | time or a filesystem sync is requested), serial of the previously inactive | ||
171 | superblock atomically is increased and the fs switches over to that - then | ||
172 | stable declared - superblock. | ||
173 | |||
174 | For all data outside the system area, blocks are just copied while writing. | ||
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index 68fbfb6529eb..3b7488fc3373 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt | |||
@@ -218,6 +218,7 @@ Code Seq#(hex) Include File Comments | |||
218 | 'h' 00-7F conflict! Charon filesystem | 218 | 'h' 00-7F conflict! Charon filesystem |
219 | <mailto:zapman@interlan.net> | 219 | <mailto:zapman@interlan.net> |
220 | 'h' 00-1F linux/hpet.h conflict! | 220 | 'h' 00-1F linux/hpet.h conflict! |
221 | 'h' 80-8F fs/hfsplus/ioctl.c | ||
221 | 'i' 00-3F linux/i2o-dev.h conflict! | 222 | 'i' 00-3F linux/i2o-dev.h conflict! |
222 | 'i' 0B-1F linux/ipmi.h conflict! | 223 | 'i' 0B-1F linux/ipmi.h conflict! |
223 | 'i' 80-8F linux/i8k.h | 224 | 'i' 80-8F linux/i8k.h |
diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt index 7f531ad83285..d86adcdae420 100644 --- a/Documentation/networking/dns_resolver.txt +++ b/Documentation/networking/dns_resolver.txt | |||
@@ -102,6 +102,10 @@ implemented in the module can be called after doing: | |||
102 | If _expiry is non-NULL, the expiry time (TTL) of the result will be | 102 | If _expiry is non-NULL, the expiry time (TTL) of the result will be |
103 | returned also. | 103 | returned also. |
104 | 104 | ||
105 | The kernel maintains an internal keyring in which it caches looked up keys. | ||
106 | This can be cleared by any process that has the CAP_SYS_ADMIN capability by | ||
107 | the use of KEYCTL_KEYRING_CLEAR on the keyring ID. | ||
108 | |||
105 | 109 | ||
106 | =============================== | 110 | =============================== |
107 | READING DNS KEYS FROM USERSPACE | 111 | READING DNS KEYS FROM USERSPACE |
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt new file mode 100644 index 000000000000..3007bc98af28 --- /dev/null +++ b/Documentation/powerpc/firmware-assisted-dump.txt | |||
@@ -0,0 +1,270 @@ | |||
1 | |||
2 | Firmware-Assisted Dump | ||
3 | ------------------------ | ||
4 | July 2011 | ||
5 | |||
6 | The goal of firmware-assisted dump is to enable the dump of | ||
7 | a crashed system, and to do so from a fully-reset system, and | ||
8 | to minimize the total elapsed time until the system is back | ||
9 | in production use. | ||
10 | |||
11 | - Firmware assisted dump (fadump) infrastructure is intended to replace | ||
12 | the existing phyp assisted dump. | ||
13 | - Fadump uses the same firmware interfaces and memory reservation model | ||
14 | as phyp assisted dump. | ||
15 | - Unlike phyp dump, fadump exports the memory dump through /proc/vmcore | ||
16 | in the ELF format in the same way as kdump. This helps us reuse the | ||
17 | kdump infrastructure for dump capture and filtering. | ||
18 | - Unlike phyp dump, userspace tool does not need to refer any sysfs | ||
19 | interface while reading /proc/vmcore. | ||
20 | - Unlike phyp dump, fadump allows user to release all the memory reserved | ||
21 | for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem. | ||
22 | - Once enabled through kernel boot parameter, fadump can be | ||
23 | started/stopped through /sys/kernel/fadump_registered interface (see | ||
24 | sysfs files section below) and can be easily integrated with kdump | ||
25 | service start/stop init scripts. | ||
26 | |||
27 | Comparing with kdump or other strategies, firmware-assisted | ||
28 | dump offers several strong, practical advantages: | ||
29 | |||
30 | -- Unlike kdump, the system has been reset, and loaded | ||
31 | with a fresh copy of the kernel. In particular, | ||
32 | PCI and I/O devices have been reinitialized and are | ||
33 | in a clean, consistent state. | ||
34 | -- Once the dump is copied out, the memory that held the dump | ||
35 | is immediately available to the running kernel. And therefore, | ||
36 | unlike kdump, fadump doesn't need a 2nd reboot to get back | ||
37 | the system to the production configuration. | ||
38 | |||
39 | The above can only be accomplished by coordination with, | ||
40 | and assistance from the Power firmware. The procedure is | ||
41 | as follows: | ||
42 | |||
43 | -- The first kernel registers the sections of memory with the | ||
44 | Power firmware for dump preservation during OS initialization. | ||
45 | These registered sections of memory are reserved by the first | ||
46 | kernel during early boot. | ||
47 | |||
48 | -- When a system crashes, the Power firmware will save | ||
49 | the low memory (boot memory of size larger of 5% of system RAM | ||
50 | or 256MB) of RAM to the previous registered region. It will | ||
51 | also save system registers, and hardware PTE's. | ||
52 | |||
53 | NOTE: The term 'boot memory' means size of the low memory chunk | ||
54 | that is required for a kernel to boot successfully when | ||
55 | booted with restricted memory. By default, the boot memory | ||
56 | size will be the larger of 5% of system RAM or 256MB. | ||
57 | Alternatively, user can also specify boot memory size | ||
58 | through boot parameter 'fadump_reserve_mem=' which will | ||
59 | override the default calculated size. Use this option | ||
60 | if default boot memory size is not sufficient for second | ||
61 | kernel to boot successfully. | ||
62 | |||
63 | -- After the low memory (boot memory) area has been saved, the | ||
64 | firmware will reset PCI and other hardware state. It will | ||
65 | *not* clear the RAM. It will then launch the bootloader, as | ||
66 | normal. | ||
67 | |||
68 | -- The freshly booted kernel will notice that there is a new | ||
69 | node (ibm,dump-kernel) in the device tree, indicating that | ||
70 | there is crash data available from a previous boot. During | ||
71 | the early boot OS will reserve rest of the memory above | ||
72 | boot memory size effectively booting with restricted memory | ||
73 | size. This will make sure that the second kernel will not | ||
74 | touch any of the dump memory area. | ||
75 | |||
76 | -- User-space tools will read /proc/vmcore to obtain the contents | ||
77 | of memory, which holds the previous crashed kernel dump in ELF | ||
78 | format. The userspace tools may copy this info to disk, or | ||
79 | network, nas, san, iscsi, etc. as desired. | ||
80 | |||
81 | -- Once the userspace tool is done saving dump, it will echo | ||
82 | '1' to /sys/kernel/fadump_release_mem to release the reserved | ||
83 | memory back to general use, except the memory required for | ||
84 | next firmware-assisted dump registration. | ||
85 | |||
86 | e.g. | ||
87 | # echo 1 > /sys/kernel/fadump_release_mem | ||
88 | |||
89 | Please note that the firmware-assisted dump feature | ||
90 | is only available on Power6 and above systems with recent | ||
91 | firmware versions. | ||
92 | |||
93 | Implementation details: | ||
94 | ---------------------- | ||
95 | |||
96 | During boot, a check is made to see if firmware supports | ||
97 | this feature on that particular machine. If it does, then | ||
98 | we check to see if an active dump is waiting for us. If yes | ||
99 | then everything but boot memory size of RAM is reserved during | ||
100 | early boot (See Fig. 2). This area is released once we finish | ||
101 | collecting the dump from user land scripts (e.g. kdump scripts) | ||
102 | that are run. If there is dump data, then the | ||
103 | /sys/kernel/fadump_release_mem file is created, and the reserved | ||
104 | memory is held. | ||
105 | |||
106 | If there is no waiting dump data, then only the memory required | ||
107 | to hold CPU state, HPTE region, boot memory dump and elfcore | ||
108 | header, is reserved at the top of memory (see Fig. 1). This area | ||
109 | is *not* released: this region will be kept permanently reserved, | ||
110 | so that it can act as a receptacle for a copy of the boot memory | ||
111 | content in addition to CPU state and HPTE region, in the case a | ||
112 | crash does occur. | ||
113 | |||
114 | o Memory Reservation during first kernel | ||
115 | |||
116 | Low memory Top of memory | ||
117 | 0 boot memory size | | ||
118 | | | |<--Reserved dump area -->| | ||
119 | V V | Permanent Reservation V | ||
120 | +-----------+----------/ /----------+---+----+-----------+----+ | ||
121 | | | |CPU|HPTE| DUMP |ELF | | ||
122 | +-----------+----------/ /----------+---+----+-----------+----+ | ||
123 | | ^ | ||
124 | | | | ||
125 | \ / | ||
126 | ------------------------------------------- | ||
127 | Boot memory content gets transferred to | ||
128 | reserved area by firmware at the time of | ||
129 | crash | ||
130 | Fig. 1 | ||
131 | |||
132 | o Memory Reservation during second kernel after crash | ||
133 | |||
134 | Low memory Top of memory | ||
135 | 0 boot memory size | | ||
136 | | |<------------- Reserved dump area ----------- -->| | ||
137 | V V V | ||
138 | +-----------+----------/ /----------+---+----+-----------+----+ | ||
139 | | | |CPU|HPTE| DUMP |ELF | | ||
140 | +-----------+----------/ /----------+---+----+-----------+----+ | ||
141 | | | | ||
142 | V V | ||
143 | Used by second /proc/vmcore | ||
144 | kernel to boot | ||
145 | Fig. 2 | ||
146 | |||
147 | Currently the dump will be copied from /proc/vmcore to a | ||
148 | a new file upon user intervention. The dump data available through | ||
149 | /proc/vmcore will be in ELF format. Hence the existing kdump | ||
150 | infrastructure (kdump scripts) to save the dump works fine with | ||
151 | minor modifications. | ||
152 | |||
153 | The tools to examine the dump will be same as the ones | ||
154 | used for kdump. | ||
155 | |||
156 | How to enable firmware-assisted dump (fadump): | ||
157 | ------------------------------------- | ||
158 | |||
159 | 1. Set config option CONFIG_FA_DUMP=y and build kernel. | ||
160 | 2. Boot into linux kernel with 'fadump=on' kernel cmdline option. | ||
161 | 3. Optionally, user can also set 'fadump_reserve_mem=' kernel cmdline | ||
162 | to specify size of the memory to reserve for boot memory dump | ||
163 | preservation. | ||
164 | |||
165 | NOTE: If firmware-assisted dump fails to reserve memory then it will | ||
166 | fallback to existing kdump mechanism if 'crashkernel=' option | ||
167 | is set at kernel cmdline. | ||
168 | |||
169 | Sysfs/debugfs files: | ||
170 | ------------ | ||
171 | |||
172 | Firmware-assisted dump feature uses sysfs file system to hold | ||
173 | the control files and debugfs file to display memory reserved region. | ||
174 | |||
175 | Here is the list of files under kernel sysfs: | ||
176 | |||
177 | /sys/kernel/fadump_enabled | ||
178 | |||
179 | This is used to display the fadump status. | ||
180 | 0 = fadump is disabled | ||
181 | 1 = fadump is enabled | ||
182 | |||
183 | This interface can be used by kdump init scripts to identify if | ||
184 | fadump is enabled in the kernel and act accordingly. | ||
185 | |||
186 | /sys/kernel/fadump_registered | ||
187 | |||
188 | This is used to display the fadump registration status as well | ||
189 | as to control (start/stop) the fadump registration. | ||
190 | 0 = fadump is not registered. | ||
191 | 1 = fadump is registered and ready to handle system crash. | ||
192 | |||
193 | To register fadump echo 1 > /sys/kernel/fadump_registered and | ||
194 | echo 0 > /sys/kernel/fadump_registered for un-register and stop the | ||
195 | fadump. Once the fadump is un-registered, the system crash will not | ||
196 | be handled and vmcore will not be captured. This interface can be | ||
197 | easily integrated with kdump service start/stop. | ||
198 | |||
199 | /sys/kernel/fadump_release_mem | ||
200 | |||
201 | This file is available only when fadump is active during | ||
202 | second kernel. This is used to release the reserved memory | ||
203 | region that are held for saving crash dump. To release the | ||
204 | reserved memory echo 1 to it: | ||
205 | |||
206 | echo 1 > /sys/kernel/fadump_release_mem | ||
207 | |||
208 | After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region | ||
209 | file will change to reflect the new memory reservations. | ||
210 | |||
211 | The existing userspace tools (kdump infrastructure) can be easily | ||
212 | enhanced to use this interface to release the memory reserved for | ||
213 | dump and continue without 2nd reboot. | ||
214 | |||
215 | Here is the list of files under powerpc debugfs: | ||
216 | (Assuming debugfs is mounted on /sys/kernel/debug directory.) | ||
217 | |||
218 | /sys/kernel/debug/powerpc/fadump_region | ||
219 | |||
220 | This file shows the reserved memory regions if fadump is | ||
221 | enabled otherwise this file is empty. The output format | ||
222 | is: | ||
223 | <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size> | ||
224 | |||
225 | e.g. | ||
226 | Contents when fadump is registered during first kernel | ||
227 | |||
228 | # cat /sys/kernel/debug/powerpc/fadump_region | ||
229 | CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0 | ||
230 | HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0 | ||
231 | DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0 | ||
232 | |||
233 | Contents when fadump is active during second kernel | ||
234 | |||
235 | # cat /sys/kernel/debug/powerpc/fadump_region | ||
236 | CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020 | ||
237 | HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000 | ||
238 | DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000 | ||
239 | : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000 | ||
240 | |||
241 | NOTE: Please refer to Documentation/filesystems/debugfs.txt on | ||
242 | how to mount the debugfs filesystem. | ||
243 | |||
244 | |||
245 | TODO: | ||
246 | ----- | ||
247 | o Need to come up with the better approach to find out more | ||
248 | accurate boot memory size that is required for a kernel to | ||
249 | boot successfully when booted with restricted memory. | ||
250 | o The fadump implementation introduces a fadump crash info structure | ||
251 | in the scratch area before the ELF core header. The idea of introducing | ||
252 | this structure is to pass some important crash info data to the second | ||
253 | kernel which will help second kernel to populate ELF core header with | ||
254 | correct data before it gets exported through /proc/vmcore. The current | ||
255 | design implementation does not address a possibility of introducing | ||
256 | additional fields (in future) to this structure without affecting | ||
257 | compatibility. Need to come up with the better approach to address this. | ||
258 | The possible approaches are: | ||
259 | 1. Introduce version field for version tracking, bump up the version | ||
260 | whenever a new field is added to the structure in future. The version | ||
261 | field can be used to find out what fields are valid for the current | ||
262 | version of the structure. | ||
263 | 2. Reserve the area of predefined size (say PAGE_SIZE) for this | ||
264 | structure and have unused area as reserved (initialized to zero) | ||
265 | for future field additions. | ||
266 | The advantage of approach 1 over 2 is we don't need to reserve extra space. | ||
267 | --- | ||
268 | Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | ||
269 | This document is based on the original documentation written for phyp | ||
270 | assisted dump by Linas Vepstas and Manish Ahuja. | ||
diff --git a/Documentation/powerpc/mpc52xx.txt b/Documentation/powerpc/mpc52xx.txt index 10dd4ab93b85..0d540a31ea1a 100644 --- a/Documentation/powerpc/mpc52xx.txt +++ b/Documentation/powerpc/mpc52xx.txt | |||
@@ -2,7 +2,7 @@ Linux 2.6.x on MPC52xx family | |||
2 | ----------------------------- | 2 | ----------------------------- |
3 | 3 | ||
4 | For the latest info, go to http://www.246tNt.com/mpc52xx/ | 4 | For the latest info, go to http://www.246tNt.com/mpc52xx/ |
5 | 5 | ||
6 | To compile/use : | 6 | To compile/use : |
7 | 7 | ||
8 | - U-Boot: | 8 | - U-Boot: |
@@ -10,23 +10,23 @@ To compile/use : | |||
10 | if you wish to ). | 10 | if you wish to ). |
11 | # make lite5200_defconfig | 11 | # make lite5200_defconfig |
12 | # make uImage | 12 | # make uImage |
13 | 13 | ||
14 | then, on U-boot: | 14 | then, on U-boot: |
15 | => tftpboot 200000 uImage | 15 | => tftpboot 200000 uImage |
16 | => tftpboot 400000 pRamdisk | 16 | => tftpboot 400000 pRamdisk |
17 | => bootm 200000 400000 | 17 | => bootm 200000 400000 |
18 | 18 | ||
19 | - DBug: | 19 | - DBug: |
20 | # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION | 20 | # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION |
21 | if you wish to ). | 21 | if you wish to ). |
22 | # make lite5200_defconfig | 22 | # make lite5200_defconfig |
23 | # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz | 23 | # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz |
24 | # make zImage.initrd | 24 | # make zImage.initrd |
25 | # make | 25 | # make |
26 | 26 | ||
27 | then in DBug: | 27 | then in DBug: |
28 | DBug> dn -i zImage.initrd.lite5200 | 28 | DBug> dn -i zImage.initrd.lite5200 |
29 | 29 | ||
30 | 30 | ||
31 | Some remarks : | 31 | Some remarks : |
32 | - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100 | 32 | - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100 |
diff --git a/Documentation/powerpc/phyp-assisted-dump.txt b/Documentation/powerpc/phyp-assisted-dump.txt deleted file mode 100644 index ad340205d96a..000000000000 --- a/Documentation/powerpc/phyp-assisted-dump.txt +++ /dev/null | |||
@@ -1,127 +0,0 @@ | |||
1 | |||
2 | Hypervisor-Assisted Dump | ||
3 | ------------------------ | ||
4 | November 2007 | ||
5 | |||
6 | The goal of hypervisor-assisted dump is to enable the dump of | ||
7 | a crashed system, and to do so from a fully-reset system, and | ||
8 | to minimize the total elapsed time until the system is back | ||
9 | in production use. | ||
10 | |||
11 | As compared to kdump or other strategies, hypervisor-assisted | ||
12 | dump offers several strong, practical advantages: | ||
13 | |||
14 | -- Unlike kdump, the system has been reset, and loaded | ||
15 | with a fresh copy of the kernel. In particular, | ||
16 | PCI and I/O devices have been reinitialized and are | ||
17 | in a clean, consistent state. | ||
18 | -- As the dump is performed, the dumped memory becomes | ||
19 | immediately available to the system for normal use. | ||
20 | -- After the dump is completed, no further reboots are | ||
21 | required; the system will be fully usable, and running | ||
22 | in its normal, production mode on its normal kernel. | ||
23 | |||
24 | The above can only be accomplished by coordination with, | ||
25 | and assistance from the hypervisor. The procedure is | ||
26 | as follows: | ||
27 | |||
28 | -- When a system crashes, the hypervisor will save | ||
29 | the low 256MB of RAM to a previously registered | ||
30 | save region. It will also save system state, system | ||
31 | registers, and hardware PTE's. | ||
32 | |||
33 | -- After the low 256MB area has been saved, the | ||
34 | hypervisor will reset PCI and other hardware state. | ||
35 | It will *not* clear RAM. It will then launch the | ||
36 | bootloader, as normal. | ||
37 | |||
38 | -- The freshly booted kernel will notice that there | ||
39 | is a new node (ibm,dump-kernel) in the device tree, | ||
40 | indicating that there is crash data available from | ||
41 | a previous boot. It will boot into only 256MB of RAM, | ||
42 | reserving the rest of system memory. | ||
43 | |||
44 | -- Userspace tools will parse /sys/kernel/release_region | ||
45 | and read /proc/vmcore to obtain the contents of memory, | ||
46 | which holds the previous crashed kernel. The userspace | ||
47 | tools may copy this info to disk, or network, nas, san, | ||
48 | iscsi, etc. as desired. | ||
49 | |||
50 | For Example: the values in /sys/kernel/release-region | ||
51 | would look something like this (address-range pairs). | ||
52 | CPU:0x177fee000-0x10000: HPTE:0x177ffe020-0x1000: / | ||
53 | DUMP:0x177fff020-0x10000000, 0x10000000-0x16F1D370A | ||
54 | |||
55 | -- As the userspace tools complete saving a portion of | ||
56 | dump, they echo an offset and size to | ||
57 | /sys/kernel/release_region to release the reserved | ||
58 | memory back to general use. | ||
59 | |||
60 | An example of this is: | ||
61 | "echo 0x40000000 0x10000000 > /sys/kernel/release_region" | ||
62 | which will release 256MB at the 1GB boundary. | ||
63 | |||
64 | Please note that the hypervisor-assisted dump feature | ||
65 | is only available on Power6-based systems with recent | ||
66 | firmware versions. | ||
67 | |||
68 | Implementation details: | ||
69 | ---------------------- | ||
70 | |||
71 | During boot, a check is made to see if firmware supports | ||
72 | this feature on this particular machine. If it does, then | ||
73 | we check to see if a active dump is waiting for us. If yes | ||
74 | then everything but 256 MB of RAM is reserved during early | ||
75 | boot. This area is released once we collect a dump from user | ||
76 | land scripts that are run. If there is dump data, then | ||
77 | the /sys/kernel/release_region file is created, and | ||
78 | the reserved memory is held. | ||
79 | |||
80 | If there is no waiting dump data, then only the highest | ||
81 | 256MB of the ram is reserved as a scratch area. This area | ||
82 | is *not* released: this region will be kept permanently | ||
83 | reserved, so that it can act as a receptacle for a copy | ||
84 | of the low 256MB in the case a crash does occur. See, | ||
85 | however, "open issues" below, as to whether | ||
86 | such a reserved region is really needed. | ||
87 | |||
88 | Currently the dump will be copied from /proc/vmcore to a | ||
89 | a new file upon user intervention. The starting address | ||
90 | to be read and the range for each data point in provided | ||
91 | in /sys/kernel/release_region. | ||
92 | |||
93 | The tools to examine the dump will be same as the ones | ||
94 | used for kdump. | ||
95 | |||
96 | General notes: | ||
97 | -------------- | ||
98 | Security: please note that there are potential security issues | ||
99 | with any sort of dump mechanism. In particular, plaintext | ||
100 | (unencrypted) data, and possibly passwords, may be present in | ||
101 | the dump data. Userspace tools must take adequate precautions to | ||
102 | preserve security. | ||
103 | |||
104 | Open issues/ToDo: | ||
105 | ------------ | ||
106 | o The various code paths that tell the hypervisor that a crash | ||
107 | occurred, vs. it simply being a normal reboot, should be | ||
108 | reviewed, and possibly clarified/fixed. | ||
109 | |||
110 | o Instead of using /sys/kernel, should there be a /sys/dump | ||
111 | instead? There is a dump_subsys being created by the s390 code, | ||
112 | perhaps the pseries code should use a similar layout as well. | ||
113 | |||
114 | o Is reserving a 256MB region really required? The goal of | ||
115 | reserving a 256MB scratch area is to make sure that no | ||
116 | important crash data is clobbered when the hypervisor | ||
117 | save low mem to the scratch area. But, if one could assure | ||
118 | that nothing important is located in some 256MB area, then | ||
119 | it would not need to be reserved. Something that can be | ||
120 | improved in subsequent versions. | ||
121 | |||
122 | o Still working the kdump team to integrate this with kdump, | ||
123 | some work remains but this would not affect the current | ||
124 | patches. | ||
125 | |||
126 | o Still need to write a shell script, to copy the dump away. | ||
127 | Currently I am parsing it manually. | ||
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX index 99b85d39751c..eeed1de546d4 100644 --- a/Documentation/security/00-INDEX +++ b/Documentation/security/00-INDEX | |||
@@ -6,6 +6,8 @@ SELinux.txt | |||
6 | - how to get started with the SELinux security enhancement. | 6 | - how to get started with the SELinux security enhancement. |
7 | Smack.txt | 7 | Smack.txt |
8 | - documentation on the Smack Linux Security Module. | 8 | - documentation on the Smack Linux Security Module. |
9 | Yama.txt | ||
10 | - documentation on the Yama Linux Security Module. | ||
9 | apparmor.txt | 11 | apparmor.txt |
10 | - documentation on the AppArmor security extension. | 12 | - documentation on the AppArmor security extension. |
11 | credentials.txt | 13 | credentials.txt |
diff --git a/Documentation/security/Yama.txt b/Documentation/security/Yama.txt new file mode 100644 index 000000000000..a9511f179069 --- /dev/null +++ b/Documentation/security/Yama.txt | |||
@@ -0,0 +1,65 @@ | |||
1 | Yama is a Linux Security Module that collects a number of system-wide DAC | ||
2 | security protections that are not handled by the core kernel itself. To | ||
3 | select it at boot time, specify "security=yama" (though this will disable | ||
4 | any other LSM). | ||
5 | |||
6 | Yama is controlled through sysctl in /proc/sys/kernel/yama: | ||
7 | |||
8 | - ptrace_scope | ||
9 | |||
10 | ============================================================== | ||
11 | |||
12 | ptrace_scope: | ||
13 | |||
14 | As Linux grows in popularity, it will become a larger target for | ||
15 | malware. One particularly troubling weakness of the Linux process | ||
16 | interfaces is that a single user is able to examine the memory and | ||
17 | running state of any of their processes. For example, if one application | ||
18 | (e.g. Pidgin) was compromised, it would be possible for an attacker to | ||
19 | attach to other running processes (e.g. Firefox, SSH sessions, GPG agent, | ||
20 | etc) to extract additional credentials and continue to expand the scope | ||
21 | of their attack without resorting to user-assisted phishing. | ||
22 | |||
23 | This is not a theoretical problem. SSH session hijacking | ||
24 | (http://www.storm.net.nz/projects/7) and arbitrary code injection | ||
25 | (http://c-skills.blogspot.com/2007/05/injectso.html) attacks already | ||
26 | exist and remain possible if ptrace is allowed to operate as before. | ||
27 | Since ptrace is not commonly used by non-developers and non-admins, system | ||
28 | builders should be allowed the option to disable this debugging system. | ||
29 | |||
30 | For a solution, some applications use prctl(PR_SET_DUMPABLE, ...) to | ||
31 | specifically disallow such ptrace attachment (e.g. ssh-agent), but many | ||
32 | do not. A more general solution is to only allow ptrace directly from a | ||
33 | parent to a child process (i.e. direct "gdb EXE" and "strace EXE" still | ||
34 | work), or with CAP_SYS_PTRACE (i.e. "gdb --pid=PID", and "strace -p PID" | ||
35 | still work as root). | ||
36 | |||
37 | For software that has defined application-specific relationships | ||
38 | between a debugging process and its inferior (crash handlers, etc), | ||
39 | prctl(PR_SET_PTRACER, pid, ...) can be used. An inferior can declare which | ||
40 | other process (and its descendents) are allowed to call PTRACE_ATTACH | ||
41 | against it. Only one such declared debugging process can exists for | ||
42 | each inferior at a time. For example, this is used by KDE, Chromium, and | ||
43 | Firefox's crash handlers, and by Wine for allowing only Wine processes | ||
44 | to ptrace each other. If a process wishes to entirely disable these ptrace | ||
45 | restrictions, it can call prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, ...) | ||
46 | so that any otherwise allowed process (even those in external pid namespaces) | ||
47 | may attach. | ||
48 | |||
49 | The sysctl settings are: | ||
50 | |||
51 | 0 - classic ptrace permissions: a process can PTRACE_ATTACH to any other | ||
52 | process running under the same uid, as long as it is dumpable (i.e. | ||
53 | did not transition uids, start privileged, or have called | ||
54 | prctl(PR_SET_DUMPABLE...) already). | ||
55 | |||
56 | 1 - restricted ptrace: a process must have a predefined relationship | ||
57 | with the inferior it wants to call PTRACE_ATTACH on. By default, | ||
58 | this relationship is that of only its descendants when the above | ||
59 | classic criteria is also met. To change the relationship, an | ||
60 | inferior can call prctl(PR_SET_PTRACER, debugger, ...) to declare | ||
61 | an allowed debugger PID to call PTRACE_ATTACH on the inferior. | ||
62 | |||
63 | The original children-only logic was based on the restrictions in grsecurity. | ||
64 | |||
65 | ============================================================== | ||
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index fcbe7a703405..787717091421 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt | |||
@@ -554,6 +554,10 @@ The keyctl syscall functions are: | |||
554 | process must have write permission on the keyring, and it must be a | 554 | process must have write permission on the keyring, and it must be a |
555 | keyring (or else error ENOTDIR will result). | 555 | keyring (or else error ENOTDIR will result). |
556 | 556 | ||
557 | This function can also be used to clear special kernel keyrings if they | ||
558 | are appropriately marked if the user has CAP_SYS_ADMIN capability. The | ||
559 | DNS resolver cache keyring is an example of this. | ||
560 | |||
557 | 561 | ||
558 | (*) Link a key into a keyring: | 562 | (*) Link a key into a keyring: |
559 | 563 | ||