diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2011-08-02 16:14:02 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2011-08-02 16:14:02 -0400 |
commit | 3d071cd313643cf82b1ce1ce4fdf08d63ad53964 (patch) | |
tree | 874c1683f32f07614aa123f6ca5cf6c2bd443704 /Documentation | |
parent | cf8e98d15361f8c594da00a3f7a500787fc1a426 (diff) | |
parent | 02f8c6aee8df3cdc935e9bdd4f2d020306035dbe (diff) |
Merge tag 'v3.0' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'Documentation')
31 files changed, 453 insertions, 267 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 new file mode 100644 index 000000000000..aa11dbdd794b --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 | |||
@@ -0,0 +1,56 @@ | |||
1 | What: /sys/class/backlight/<backlight>/<ambient light zone>_max | ||
2 | What: /sys/class/backlight/<backlight>/l1_daylight_max | ||
3 | What: /sys/class/backlight/<backlight>/l2_bright_max | ||
4 | What: /sys/class/backlight/<backlight>/l3_office_max | ||
5 | What: /sys/class/backlight/<backlight>/l4_indoor_max | ||
6 | What: /sys/class/backlight/<backlight>/l5_dark_max | ||
7 | Date: Mai 2011 | ||
8 | KernelVersion: 2.6.40 | ||
9 | Contact: device-drivers-devel@blackfin.uclinux.org | ||
10 | Description: | ||
11 | Control the maximum brightness for <ambient light zone> | ||
12 | on this <backlight>. Values are between 0 and 127. This file | ||
13 | will also show the brightness level stored for this | ||
14 | <ambient light zone>. | ||
15 | |||
16 | What: /sys/class/backlight/<backlight>/<ambient light zone>_dim | ||
17 | What: /sys/class/backlight/<backlight>/l2_bright_dim | ||
18 | What: /sys/class/backlight/<backlight>/l3_office_dim | ||
19 | What: /sys/class/backlight/<backlight>/l4_indoor_dim | ||
20 | What: /sys/class/backlight/<backlight>/l5_dark_dim | ||
21 | Date: Mai 2011 | ||
22 | KernelVersion: 2.6.40 | ||
23 | Contact: device-drivers-devel@blackfin.uclinux.org | ||
24 | Description: | ||
25 | Control the dim brightness for <ambient light zone> | ||
26 | on this <backlight>. Values are between 0 and 127, typically | ||
27 | set to 0. Full off when the backlight is disabled. | ||
28 | This file will also show the dim brightness level stored for | ||
29 | this <ambient light zone>. | ||
30 | |||
31 | What: /sys/class/backlight/<backlight>/ambient_light_level | ||
32 | Date: Mai 2011 | ||
33 | KernelVersion: 2.6.40 | ||
34 | Contact: device-drivers-devel@blackfin.uclinux.org | ||
35 | Description: | ||
36 | Get conversion value of the light sensor. | ||
37 | This value is updated every 80 ms (when the light sensor | ||
38 | is enabled). Returns integer between 0 (dark) and | ||
39 | 8000 (max ambient brightness) | ||
40 | |||
41 | What: /sys/class/backlight/<backlight>/ambient_light_zone | ||
42 | Date: Mai 2011 | ||
43 | KernelVersion: 2.6.40 | ||
44 | Contact: device-drivers-devel@blackfin.uclinux.org | ||
45 | Description: | ||
46 | Get/Set current ambient light zone. Reading returns | ||
47 | integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark). | ||
48 | Writing a value between 1..5 forces the backlight controller | ||
49 | to enter the corresponding ambient light zone. | ||
50 | Writing 0 returns to normal/automatic ambient light level | ||
51 | operation. The ambient light sensing feature on these devices | ||
52 | is an extension to the API documented in | ||
53 | Documentation/ABI/stable/sysfs-class-backlight. | ||
54 | It can be enabled by writing the value stored in | ||
55 | /sys/class/backlight/<backlight>/max_brightness to | ||
56 | /sys/class/backlight/<backlight>/brightness. \ No newline at end of file | ||
diff --git a/Documentation/Changes b/Documentation/Changes index 5f4828a034e3..b17580885273 100644 --- a/Documentation/Changes +++ b/Documentation/Changes | |||
@@ -2,13 +2,7 @@ Intro | |||
2 | ===== | 2 | ===== |
3 | 3 | ||
4 | This document is designed to provide a list of the minimum levels of | 4 | This document is designed to provide a list of the minimum levels of |
5 | software necessary to run the 2.6 kernels, as well as provide brief | 5 | software necessary to run the 3.0 kernels. |
6 | instructions regarding any other "Gotchas" users may encounter when | ||
7 | trying life on the Bleeding Edge. If upgrading from a pre-2.4.x | ||
8 | kernel, please consult the Changes file included with 2.4.x kernels for | ||
9 | additional information; most of that information will not be repeated | ||
10 | here. Basically, this document assumes that your system is already | ||
11 | functional and running at least 2.4.x kernels. | ||
12 | 6 | ||
13 | This document is originally based on my "Changes" file for 2.0.x kernels | 7 | This document is originally based on my "Changes" file for 2.0.x kernels |
14 | and therefore owes credit to the same people as that file (Jared Mauch, | 8 | and therefore owes credit to the same people as that file (Jared Mauch, |
@@ -22,11 +16,10 @@ Upgrade to at *least* these software revisions before thinking you've | |||
22 | encountered a bug! If you're unsure what version you're currently | 16 | encountered a bug! If you're unsure what version you're currently |
23 | running, the suggested command should tell you. | 17 | running, the suggested command should tell you. |
24 | 18 | ||
25 | Again, keep in mind that this list assumes you are already | 19 | Again, keep in mind that this list assumes you are already functionally |
26 | functionally running a Linux 2.4 kernel. Also, not all tools are | 20 | running a Linux kernel. Also, not all tools are necessary on all |
27 | necessary on all systems; obviously, if you don't have any ISDN | 21 | systems; obviously, if you don't have any ISDN hardware, for example, |
28 | hardware, for example, you probably needn't concern yourself with | 22 | you probably needn't concern yourself with isdn4k-utils. |
29 | isdn4k-utils. | ||
30 | 23 | ||
31 | o Gnu C 3.2 # gcc --version | 24 | o Gnu C 3.2 # gcc --version |
32 | o Gnu make 3.80 # make --version | 25 | o Gnu make 3.80 # make --version |
@@ -114,12 +107,12 @@ Ksymoops | |||
114 | 107 | ||
115 | If the unthinkable happens and your kernel oopses, you may need the | 108 | If the unthinkable happens and your kernel oopses, you may need the |
116 | ksymoops tool to decode it, but in most cases you don't. | 109 | ksymoops tool to decode it, but in most cases you don't. |
117 | In the 2.6 kernel it is generally preferred to build the kernel with | 110 | It is generally preferred to build the kernel with CONFIG_KALLSYMS so |
118 | CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is | 111 | that it produces readable dumps that can be used as-is (this also |
119 | (this also produces better output than ksymoops). | 112 | produces better output than ksymoops). If for some reason your kernel |
120 | If for some reason your kernel is not build with CONFIG_KALLSYMS and | 113 | is not build with CONFIG_KALLSYMS and you have no way to rebuild and |
121 | you have no way to rebuild and reproduce the Oops with that option, then | 114 | reproduce the Oops with that option, then you can still decode that Oops |
122 | you can still decode that Oops with ksymoops. | 115 | with ksymoops. |
123 | 116 | ||
124 | Module-Init-Tools | 117 | Module-Init-Tools |
125 | ----------------- | 118 | ----------------- |
@@ -261,8 +254,8 @@ needs to be recompiled or (preferably) upgraded. | |||
261 | NFS-utils | 254 | NFS-utils |
262 | --------- | 255 | --------- |
263 | 256 | ||
264 | In 2.4 and earlier kernels, the nfs server needed to know about any | 257 | In ancient (2.4 and earlier) kernels, the nfs server needed to know |
265 | client that expected to be able to access files via NFS. This | 258 | about any client that expected to be able to access files via NFS. This |
266 | information would be given to the kernel by "mountd" when the client | 259 | information would be given to the kernel by "mountd" when the client |
267 | mounted the filesystem, or by "exportfs" at system startup. exportfs | 260 | mounted the filesystem, or by "exportfs" at system startup. exportfs |
268 | would take information about active clients from /var/lib/nfs/rmtab. | 261 | would take information about active clients from /var/lib/nfs/rmtab. |
@@ -272,11 +265,11 @@ which is not always easy, particularly when trying to implement | |||
272 | fail-over. Even when the system is working well, rmtab suffers from | 265 | fail-over. Even when the system is working well, rmtab suffers from |
273 | getting lots of old entries that never get removed. | 266 | getting lots of old entries that never get removed. |
274 | 267 | ||
275 | With 2.6 we have the option of having the kernel tell mountd when it | 268 | With modern kernels we have the option of having the kernel tell mountd |
276 | gets a request from an unknown host, and mountd can give appropriate | 269 | when it gets a request from an unknown host, and mountd can give |
277 | export information to the kernel. This removes the dependency on | 270 | appropriate export information to the kernel. This removes the |
278 | rmtab and means that the kernel only needs to know about currently | 271 | dependency on rmtab and means that the kernel only needs to know about |
279 | active clients. | 272 | currently active clients. |
280 | 273 | ||
281 | To enable this new functionality, you need to: | 274 | To enable this new functionality, you need to: |
282 | 275 | ||
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle index 58b0bf917834..fa6e25b94a54 100644 --- a/Documentation/CodingStyle +++ b/Documentation/CodingStyle | |||
@@ -680,8 +680,8 @@ ones already enabled by DEBUG. | |||
680 | Chapter 14: Allocating memory | 680 | Chapter 14: Allocating memory |
681 | 681 | ||
682 | The kernel provides the following general purpose memory allocators: | 682 | The kernel provides the following general purpose memory allocators: |
683 | kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API | 683 | kmalloc(), kzalloc(), kcalloc(), vmalloc(), and vzalloc(). Please refer to |
684 | documentation for further information about them. | 684 | the API documentation for further information about them. |
685 | 685 | ||
686 | The preferred form for passing a size of a struct is the following: | 686 | The preferred form for passing a size of a struct is the following: |
687 | 687 | ||
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt index eda40fd39cad..d16a9849e60e 100644 --- a/Documentation/accounting/cgroupstats.txt +++ b/Documentation/accounting/cgroupstats.txt | |||
@@ -21,7 +21,7 @@ information will not be available. | |||
21 | To extract cgroup statistics a utility very similar to getdelays.c | 21 | To extract cgroup statistics a utility very similar to getdelays.c |
22 | has been developed, the sample output of the utility is shown below | 22 | has been developed, the sample output of the utility is shown below |
23 | 23 | ||
24 | ~/balbir/cgroupstats # ./getdelays -C "/cgroup/a" | 24 | ~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a" |
25 | sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0 | 25 | sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0 |
26 | ~/balbir/cgroupstats # ./getdelays -C "/cgroup" | 26 | ~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup" |
27 | sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2 | 27 | sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2 |
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 465351d4cf85..84f0a15fc210 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt | |||
@@ -28,16 +28,19 @@ cgroups. Here is what you can do. | |||
28 | - Enable group scheduling in CFQ | 28 | - Enable group scheduling in CFQ |
29 | CONFIG_CFQ_GROUP_IOSCHED=y | 29 | CONFIG_CFQ_GROUP_IOSCHED=y |
30 | 30 | ||
31 | - Compile and boot into kernel and mount IO controller (blkio). | 31 | - Compile and boot into kernel and mount IO controller (blkio); see |
32 | cgroups.txt, Why are cgroups needed?. | ||
32 | 33 | ||
33 | mount -t cgroup -o blkio none /cgroup | 34 | mount -t tmpfs cgroup_root /sys/fs/cgroup |
35 | mkdir /sys/fs/cgroup/blkio | ||
36 | mount -t cgroup -o blkio none /sys/fs/cgroup/blkio | ||
34 | 37 | ||
35 | - Create two cgroups | 38 | - Create two cgroups |
36 | mkdir -p /cgroup/test1/ /cgroup/test2 | 39 | mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2 |
37 | 40 | ||
38 | - Set weights of group test1 and test2 | 41 | - Set weights of group test1 and test2 |
39 | echo 1000 > /cgroup/test1/blkio.weight | 42 | echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight |
40 | echo 500 > /cgroup/test2/blkio.weight | 43 | echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight |
41 | 44 | ||
42 | - Create two same size files (say 512MB each) on same disk (file1, file2) and | 45 | - Create two same size files (say 512MB each) on same disk (file1, file2) and |
43 | launch two dd threads in different cgroup to read those files. | 46 | launch two dd threads in different cgroup to read those files. |
@@ -46,12 +49,12 @@ cgroups. Here is what you can do. | |||
46 | echo 3 > /proc/sys/vm/drop_caches | 49 | echo 3 > /proc/sys/vm/drop_caches |
47 | 50 | ||
48 | dd if=/mnt/sdb/zerofile1 of=/dev/null & | 51 | dd if=/mnt/sdb/zerofile1 of=/dev/null & |
49 | echo $! > /cgroup/test1/tasks | 52 | echo $! > /sys/fs/cgroup/blkio/test1/tasks |
50 | cat /cgroup/test1/tasks | 53 | cat /sys/fs/cgroup/blkio/test1/tasks |
51 | 54 | ||
52 | dd if=/mnt/sdb/zerofile2 of=/dev/null & | 55 | dd if=/mnt/sdb/zerofile2 of=/dev/null & |
53 | echo $! > /cgroup/test2/tasks | 56 | echo $! > /sys/fs/cgroup/blkio/test2/tasks |
54 | cat /cgroup/test2/tasks | 57 | cat /sys/fs/cgroup/blkio/test2/tasks |
55 | 58 | ||
56 | - At macro level, first dd should finish first. To get more precise data, keep | 59 | - At macro level, first dd should finish first. To get more precise data, keep |
57 | on looking at (with the help of script), at blkio.disk_time and | 60 | on looking at (with the help of script), at blkio.disk_time and |
@@ -68,13 +71,13 @@ Throttling/Upper Limit policy | |||
68 | - Enable throttling in block layer | 71 | - Enable throttling in block layer |
69 | CONFIG_BLK_DEV_THROTTLING=y | 72 | CONFIG_BLK_DEV_THROTTLING=y |
70 | 73 | ||
71 | - Mount blkio controller | 74 | - Mount blkio controller (see cgroups.txt, Why are cgroups needed?) |
72 | mount -t cgroup -o blkio none /cgroup/blkio | 75 | mount -t cgroup -o blkio none /sys/fs/cgroup/blkio |
73 | 76 | ||
74 | - Specify a bandwidth rate on particular device for root group. The format | 77 | - Specify a bandwidth rate on particular device for root group. The format |
75 | for policy is "<major>:<minor> <byes_per_second>". | 78 | for policy is "<major>:<minor> <byes_per_second>". |
76 | 79 | ||
77 | echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device | 80 | echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device |
78 | 81 | ||
79 | Above will put a limit of 1MB/second on reads happening for root group | 82 | Above will put a limit of 1MB/second on reads happening for root group |
80 | on device having major/minor number 8:16. | 83 | on device having major/minor number 8:16. |
@@ -87,7 +90,7 @@ Throttling/Upper Limit policy | |||
87 | 1024+0 records out | 90 | 1024+0 records out |
88 | 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s | 91 | 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s |
89 | 92 | ||
90 | Limits for writes can be put using blkio.write_bps_device file. | 93 | Limits for writes can be put using blkio.throttle.write_bps_device file. |
91 | 94 | ||
92 | Hierarchical Cgroups | 95 | Hierarchical Cgroups |
93 | ==================== | 96 | ==================== |
@@ -108,7 +111,7 @@ Hierarchical Cgroups | |||
108 | CFQ and throttling will practically treat all groups at same level. | 111 | CFQ and throttling will practically treat all groups at same level. |
109 | 112 | ||
110 | pivot | 113 | pivot |
111 | / | \ \ | 114 | / / \ \ |
112 | root test1 test2 test3 | 115 | root test1 test2 test3 |
113 | 116 | ||
114 | Down the line we can implement hierarchical accounting/control support | 117 | Down the line we can implement hierarchical accounting/control support |
@@ -149,7 +152,7 @@ Proportional weight policy files | |||
149 | 152 | ||
150 | Following is the format. | 153 | Following is the format. |
151 | 154 | ||
152 | #echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device | 155 | # echo dev_maj:dev_minor weight > blkio.weight_device |
153 | Configure weight=300 on /dev/sdb (8:16) in this cgroup | 156 | Configure weight=300 on /dev/sdb (8:16) in this cgroup |
154 | # echo 8:16 300 > blkio.weight_device | 157 | # echo 8:16 300 > blkio.weight_device |
155 | # cat blkio.weight_device | 158 | # cat blkio.weight_device |
@@ -283,28 +286,28 @@ Throttling/Upper limit policy files | |||
283 | specified in bytes per second. Rules are per deivce. Following is | 286 | specified in bytes per second. Rules are per deivce. Following is |
284 | the format. | 287 | the format. |
285 | 288 | ||
286 | echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.read_bps_device | 289 | echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device |
287 | 290 | ||
288 | - blkio.throttle.write_bps_device | 291 | - blkio.throttle.write_bps_device |
289 | - Specifies upper limit on WRITE rate to the device. IO rate is | 292 | - Specifies upper limit on WRITE rate to the device. IO rate is |
290 | specified in bytes per second. Rules are per deivce. Following is | 293 | specified in bytes per second. Rules are per deivce. Following is |
291 | the format. | 294 | the format. |
292 | 295 | ||
293 | echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.write_bps_device | 296 | echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device |
294 | 297 | ||
295 | - blkio.throttle.read_iops_device | 298 | - blkio.throttle.read_iops_device |
296 | - Specifies upper limit on READ rate from the device. IO rate is | 299 | - Specifies upper limit on READ rate from the device. IO rate is |
297 | specified in IO per second. Rules are per deivce. Following is | 300 | specified in IO per second. Rules are per deivce. Following is |
298 | the format. | 301 | the format. |
299 | 302 | ||
300 | echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.read_iops_device | 303 | echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device |
301 | 304 | ||
302 | - blkio.throttle.write_iops_device | 305 | - blkio.throttle.write_iops_device |
303 | - Specifies upper limit on WRITE rate to the device. IO rate is | 306 | - Specifies upper limit on WRITE rate to the device. IO rate is |
304 | specified in io per second. Rules are per deivce. Following is | 307 | specified in io per second. Rules are per deivce. Following is |
305 | the format. | 308 | the format. |
306 | 309 | ||
307 | echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.write_iops_device | 310 | echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device |
308 | 311 | ||
309 | Note: If both BW and IOPS rules are specified for a device, then IO is | 312 | Note: If both BW and IOPS rules are specified for a device, then IO is |
310 | subjectd to both the constraints. | 313 | subjectd to both the constraints. |
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 0ed99f08f1f3..cd67e90003c0 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -138,11 +138,11 @@ With the ability to classify tasks differently for different resources | |||
138 | the admin can easily set up a script which receives exec notifications | 138 | the admin can easily set up a script which receives exec notifications |
139 | and depending on who is launching the browser he can | 139 | and depending on who is launching the browser he can |
140 | 140 | ||
141 | # echo browser_pid > /mnt/<restype>/<userclass>/tasks | 141 | # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks |
142 | 142 | ||
143 | With only a single hierarchy, he now would potentially have to create | 143 | With only a single hierarchy, he now would potentially have to create |
144 | a separate cgroup for every browser launched and associate it with | 144 | a separate cgroup for every browser launched and associate it with |
145 | approp network and other resource class. This may lead to | 145 | appropriate network and other resource class. This may lead to |
146 | proliferation of such cgroups. | 146 | proliferation of such cgroups. |
147 | 147 | ||
148 | Also lets say that the administrator would like to give enhanced network | 148 | Also lets say that the administrator would like to give enhanced network |
@@ -153,9 +153,9 @@ apps enhanced CPU power, | |||
153 | With ability to write pids directly to resource classes, it's just a | 153 | With ability to write pids directly to resource classes, it's just a |
154 | matter of : | 154 | matter of : |
155 | 155 | ||
156 | # echo pid > /mnt/network/<new_class>/tasks | 156 | # echo pid > /sys/fs/cgroup/network/<new_class>/tasks |
157 | (after some time) | 157 | (after some time) |
158 | # echo pid > /mnt/network/<orig_class>/tasks | 158 | # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks |
159 | 159 | ||
160 | Without this ability, he would have to split the cgroup into | 160 | Without this ability, he would have to split the cgroup into |
161 | multiple separate ones and then associate the new cgroups with the | 161 | multiple separate ones and then associate the new cgroups with the |
@@ -310,21 +310,24 @@ subsystem, this is the case for the cpuset. | |||
310 | To start a new job that is to be contained within a cgroup, using | 310 | To start a new job that is to be contained within a cgroup, using |
311 | the "cpuset" cgroup subsystem, the steps are something like: | 311 | the "cpuset" cgroup subsystem, the steps are something like: |
312 | 312 | ||
313 | 1) mkdir /dev/cgroup | 313 | 1) mount -t tmpfs cgroup_root /sys/fs/cgroup |
314 | 2) mount -t cgroup -ocpuset cpuset /dev/cgroup | 314 | 2) mkdir /sys/fs/cgroup/cpuset |
315 | 3) Create the new cgroup by doing mkdir's and write's (or echo's) in | 315 | 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset |
316 | the /dev/cgroup virtual file system. | 316 | 4) Create the new cgroup by doing mkdir's and write's (or echo's) in |
317 | 4) Start a task that will be the "founding father" of the new job. | 317 | the /sys/fs/cgroup virtual file system. |
318 | 5) Attach that task to the new cgroup by writing its pid to the | 318 | 5) Start a task that will be the "founding father" of the new job. |
319 | /dev/cgroup tasks file for that cgroup. | 319 | 6) Attach that task to the new cgroup by writing its pid to the |
320 | 6) fork, exec or clone the job tasks from this founding father task. | 320 | /sys/fs/cgroup/cpuset/tasks file for that cgroup. |
321 | 7) fork, exec or clone the job tasks from this founding father task. | ||
321 | 322 | ||
322 | For example, the following sequence of commands will setup a cgroup | 323 | For example, the following sequence of commands will setup a cgroup |
323 | named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, | 324 | named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, |
324 | and then start a subshell 'sh' in that cgroup: | 325 | and then start a subshell 'sh' in that cgroup: |
325 | 326 | ||
326 | mount -t cgroup cpuset -ocpuset /dev/cgroup | 327 | mount -t tmpfs cgroup_root /sys/fs/cgroup |
327 | cd /dev/cgroup | 328 | mkdir /sys/fs/cgroup/cpuset |
329 | mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset | ||
330 | cd /sys/fs/cgroup/cpuset | ||
328 | mkdir Charlie | 331 | mkdir Charlie |
329 | cd Charlie | 332 | cd Charlie |
330 | /bin/echo 2-3 > cpuset.cpus | 333 | /bin/echo 2-3 > cpuset.cpus |
@@ -345,7 +348,7 @@ Creating, modifying, using the cgroups can be done through the cgroup | |||
345 | virtual filesystem. | 348 | virtual filesystem. |
346 | 349 | ||
347 | To mount a cgroup hierarchy with all available subsystems, type: | 350 | To mount a cgroup hierarchy with all available subsystems, type: |
348 | # mount -t cgroup xxx /dev/cgroup | 351 | # mount -t cgroup xxx /sys/fs/cgroup |
349 | 352 | ||
350 | The "xxx" is not interpreted by the cgroup code, but will appear in | 353 | The "xxx" is not interpreted by the cgroup code, but will appear in |
351 | /proc/mounts so may be any useful identifying string that you like. | 354 | /proc/mounts so may be any useful identifying string that you like. |
@@ -354,23 +357,32 @@ Note: Some subsystems do not work without some user input first. For instance, | |||
354 | if cpusets are enabled the user will have to populate the cpus and mems files | 357 | if cpusets are enabled the user will have to populate the cpus and mems files |
355 | for each new cgroup created before that group can be used. | 358 | for each new cgroup created before that group can be used. |
356 | 359 | ||
360 | As explained in section `1.2 Why are cgroups needed?' you should create | ||
361 | different hierarchies of cgroups for each single resource or group of | ||
362 | resources you want to control. Therefore, you should mount a tmpfs on | ||
363 | /sys/fs/cgroup and create directories for each cgroup resource or resource | ||
364 | group. | ||
365 | |||
366 | # mount -t tmpfs cgroup_root /sys/fs/cgroup | ||
367 | # mkdir /sys/fs/cgroup/rg1 | ||
368 | |||
357 | To mount a cgroup hierarchy with just the cpuset and memory | 369 | To mount a cgroup hierarchy with just the cpuset and memory |
358 | subsystems, type: | 370 | subsystems, type: |
359 | # mount -t cgroup -o cpuset,memory hier1 /dev/cgroup | 371 | # mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1 |
360 | 372 | ||
361 | To change the set of subsystems bound to a mounted hierarchy, just | 373 | To change the set of subsystems bound to a mounted hierarchy, just |
362 | remount with different options: | 374 | remount with different options: |
363 | # mount -o remount,cpuset,blkio hier1 /dev/cgroup | 375 | # mount -o remount,cpuset,blkio hier1 /sys/fs/cgroup/rg1 |
364 | 376 | ||
365 | Now memory is removed from the hierarchy and blkio is added. | 377 | Now memory is removed from the hierarchy and blkio is added. |
366 | 378 | ||
367 | Note this will add blkio to the hierarchy but won't remove memory or | 379 | Note this will add blkio to the hierarchy but won't remove memory or |
368 | cpuset, because the new options are appended to the old ones: | 380 | cpuset, because the new options are appended to the old ones: |
369 | # mount -o remount,blkio /dev/cgroup | 381 | # mount -o remount,blkio /sys/fs/cgroup/rg1 |
370 | 382 | ||
371 | To Specify a hierarchy's release_agent: | 383 | To Specify a hierarchy's release_agent: |
372 | # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \ | 384 | # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \ |
373 | xxx /dev/cgroup | 385 | xxx /sys/fs/cgroup/rg1 |
374 | 386 | ||
375 | Note that specifying 'release_agent' more than once will return failure. | 387 | Note that specifying 'release_agent' more than once will return failure. |
376 | 388 | ||
@@ -379,17 +391,17 @@ when the hierarchy consists of a single (root) cgroup. Supporting | |||
379 | the ability to arbitrarily bind/unbind subsystems from an existing | 391 | the ability to arbitrarily bind/unbind subsystems from an existing |
380 | cgroup hierarchy is intended to be implemented in the future. | 392 | cgroup hierarchy is intended to be implemented in the future. |
381 | 393 | ||
382 | Then under /dev/cgroup you can find a tree that corresponds to the | 394 | Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the |
383 | tree of the cgroups in the system. For instance, /dev/cgroup | 395 | tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1 |
384 | is the cgroup that holds the whole system. | 396 | is the cgroup that holds the whole system. |
385 | 397 | ||
386 | If you want to change the value of release_agent: | 398 | If you want to change the value of release_agent: |
387 | # echo "/sbin/new_release_agent" > /dev/cgroup/release_agent | 399 | # echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent |
388 | 400 | ||
389 | It can also be changed via remount. | 401 | It can also be changed via remount. |
390 | 402 | ||
391 | If you want to create a new cgroup under /dev/cgroup: | 403 | If you want to create a new cgroup under /sys/fs/cgroup/rg1: |
392 | # cd /dev/cgroup | 404 | # cd /sys/fs/cgroup/rg1 |
393 | # mkdir my_cgroup | 405 | # mkdir my_cgroup |
394 | 406 | ||
395 | Now you want to do something with this cgroup. | 407 | Now you want to do something with this cgroup. |
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt index 8b930946c52a..9ad85df4b983 100644 --- a/Documentation/cgroups/cpuacct.txt +++ b/Documentation/cgroups/cpuacct.txt | |||
@@ -10,26 +10,25 @@ directly present in its group. | |||
10 | 10 | ||
11 | Accounting groups can be created by first mounting the cgroup filesystem. | 11 | Accounting groups can be created by first mounting the cgroup filesystem. |
12 | 12 | ||
13 | # mkdir /cgroups | 13 | # mount -t cgroup -ocpuacct none /sys/fs/cgroup |
14 | # mount -t cgroup -ocpuacct none /cgroups | 14 | |
15 | 15 | With the above step, the initial or the parent accounting group becomes | |
16 | With the above step, the initial or the parent accounting group | 16 | visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in |
17 | becomes visible at /cgroups. At bootup, this group includes all the | 17 | the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup. |
18 | tasks in the system. /cgroups/tasks lists the tasks in this cgroup. | 18 | /sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained |
19 | /cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by | 19 | by this group which is essentially the CPU time obtained by all the tasks |
20 | this group which is essentially the CPU time obtained by all the tasks | ||
21 | in the system. | 20 | in the system. |
22 | 21 | ||
23 | New accounting groups can be created under the parent group /cgroups. | 22 | New accounting groups can be created under the parent group /sys/fs/cgroup. |
24 | 23 | ||
25 | # cd /cgroups | 24 | # cd /sys/fs/cgroup |
26 | # mkdir g1 | 25 | # mkdir g1 |
27 | # echo $$ > g1 | 26 | # echo $$ > g1 |
28 | 27 | ||
29 | The above steps create a new group g1 and move the current shell | 28 | The above steps create a new group g1 and move the current shell |
30 | process (bash) into it. CPU time consumed by this bash and its children | 29 | process (bash) into it. CPU time consumed by this bash and its children |
31 | can be obtained from g1/cpuacct.usage and the same is accumulated in | 30 | can be obtained from g1/cpuacct.usage and the same is accumulated in |
32 | /cgroups/cpuacct.usage also. | 31 | /sys/fs/cgroup/cpuacct.usage also. |
33 | 32 | ||
34 | cpuacct.stat file lists a few statistics which further divide the | 33 | cpuacct.stat file lists a few statistics which further divide the |
35 | CPU time obtained by the cgroup into user and system times. Currently | 34 | CPU time obtained by the cgroup into user and system times. Currently |
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 98a30829af7a..5b0d78e55ccc 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt | |||
@@ -661,21 +661,21 @@ than stress the kernel. | |||
661 | 661 | ||
662 | To start a new job that is to be contained within a cpuset, the steps are: | 662 | To start a new job that is to be contained within a cpuset, the steps are: |
663 | 663 | ||
664 | 1) mkdir /dev/cpuset | 664 | 1) mkdir /sys/fs/cgroup/cpuset |
665 | 2) mount -t cgroup -ocpuset cpuset /dev/cpuset | 665 | 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset |
666 | 3) Create the new cpuset by doing mkdir's and write's (or echo's) in | 666 | 3) Create the new cpuset by doing mkdir's and write's (or echo's) in |
667 | the /dev/cpuset virtual file system. | 667 | the /sys/fs/cgroup/cpuset virtual file system. |
668 | 4) Start a task that will be the "founding father" of the new job. | 668 | 4) Start a task that will be the "founding father" of the new job. |
669 | 5) Attach that task to the new cpuset by writing its pid to the | 669 | 5) Attach that task to the new cpuset by writing its pid to the |
670 | /dev/cpuset tasks file for that cpuset. | 670 | /sys/fs/cgroup/cpuset tasks file for that cpuset. |
671 | 6) fork, exec or clone the job tasks from this founding father task. | 671 | 6) fork, exec or clone the job tasks from this founding father task. |
672 | 672 | ||
673 | For example, the following sequence of commands will setup a cpuset | 673 | For example, the following sequence of commands will setup a cpuset |
674 | named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, | 674 | named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, |
675 | and then start a subshell 'sh' in that cpuset: | 675 | and then start a subshell 'sh' in that cpuset: |
676 | 676 | ||
677 | mount -t cgroup -ocpuset cpuset /dev/cpuset | 677 | mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset |
678 | cd /dev/cpuset | 678 | cd /sys/fs/cgroup/cpuset |
679 | mkdir Charlie | 679 | mkdir Charlie |
680 | cd Charlie | 680 | cd Charlie |
681 | /bin/echo 2-3 > cpuset.cpus | 681 | /bin/echo 2-3 > cpuset.cpus |
@@ -710,14 +710,14 @@ Creating, modifying, using the cpusets can be done through the cpuset | |||
710 | virtual filesystem. | 710 | virtual filesystem. |
711 | 711 | ||
712 | To mount it, type: | 712 | To mount it, type: |
713 | # mount -t cgroup -o cpuset cpuset /dev/cpuset | 713 | # mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset |
714 | 714 | ||
715 | Then under /dev/cpuset you can find a tree that corresponds to the | 715 | Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the |
716 | tree of the cpusets in the system. For instance, /dev/cpuset | 716 | tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset |
717 | is the cpuset that holds the whole system. | 717 | is the cpuset that holds the whole system. |
718 | 718 | ||
719 | If you want to create a new cpuset under /dev/cpuset: | 719 | If you want to create a new cpuset under /sys/fs/cgroup/cpuset: |
720 | # cd /dev/cpuset | 720 | # cd /sys/fs/cgroup/cpuset |
721 | # mkdir my_cpuset | 721 | # mkdir my_cpuset |
722 | 722 | ||
723 | Now you want to do something with this cpuset. | 723 | Now you want to do something with this cpuset. |
@@ -765,12 +765,12 @@ wrapper around the cgroup filesystem. | |||
765 | 765 | ||
766 | The command | 766 | The command |
767 | 767 | ||
768 | mount -t cpuset X /dev/cpuset | 768 | mount -t cpuset X /sys/fs/cgroup/cpuset |
769 | 769 | ||
770 | is equivalent to | 770 | is equivalent to |
771 | 771 | ||
772 | mount -t cgroup -ocpuset,noprefix X /dev/cpuset | 772 | mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset |
773 | echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent | 773 | echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent |
774 | 774 | ||
775 | 2.2 Adding/removing cpus | 775 | 2.2 Adding/removing cpus |
776 | ------------------------ | 776 | ------------------------ |
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt index 57ca4c89fe5c..16624a7f8222 100644 --- a/Documentation/cgroups/devices.txt +++ b/Documentation/cgroups/devices.txt | |||
@@ -22,16 +22,16 @@ removed from the child(ren). | |||
22 | An entry is added using devices.allow, and removed using | 22 | An entry is added using devices.allow, and removed using |
23 | devices.deny. For instance | 23 | devices.deny. For instance |
24 | 24 | ||
25 | echo 'c 1:3 mr' > /cgroups/1/devices.allow | 25 | echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow |
26 | 26 | ||
27 | allows cgroup 1 to read and mknod the device usually known as | 27 | allows cgroup 1 to read and mknod the device usually known as |
28 | /dev/null. Doing | 28 | /dev/null. Doing |
29 | 29 | ||
30 | echo a > /cgroups/1/devices.deny | 30 | echo a > /sys/fs/cgroup/1/devices.deny |
31 | 31 | ||
32 | will remove the default 'a *:* rwm' entry. Doing | 32 | will remove the default 'a *:* rwm' entry. Doing |
33 | 33 | ||
34 | echo a > /cgroups/1/devices.allow | 34 | echo a > /sys/fs/cgroup/1/devices.allow |
35 | 35 | ||
36 | will add the 'a *:* rwm' entry to the whitelist. | 36 | will add the 'a *:* rwm' entry to the whitelist. |
37 | 37 | ||
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt index 41f37fea1276..c21d77742a07 100644 --- a/Documentation/cgroups/freezer-subsystem.txt +++ b/Documentation/cgroups/freezer-subsystem.txt | |||
@@ -59,28 +59,28 @@ is non-freezable. | |||
59 | 59 | ||
60 | * Examples of usage : | 60 | * Examples of usage : |
61 | 61 | ||
62 | # mkdir /containers | 62 | # mkdir /sys/fs/cgroup/freezer |
63 | # mount -t cgroup -ofreezer freezer /containers | 63 | # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer |
64 | # mkdir /containers/0 | 64 | # mkdir /sys/fs/cgroup/freezer/0 |
65 | # echo $some_pid > /containers/0/tasks | 65 | # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks |
66 | 66 | ||
67 | to get status of the freezer subsystem : | 67 | to get status of the freezer subsystem : |
68 | 68 | ||
69 | # cat /containers/0/freezer.state | 69 | # cat /sys/fs/cgroup/freezer/0/freezer.state |
70 | THAWED | 70 | THAWED |
71 | 71 | ||
72 | to freeze all tasks in the container : | 72 | to freeze all tasks in the container : |
73 | 73 | ||
74 | # echo FROZEN > /containers/0/freezer.state | 74 | # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state |
75 | # cat /containers/0/freezer.state | 75 | # cat /sys/fs/cgroup/freezer/0/freezer.state |
76 | FREEZING | 76 | FREEZING |
77 | # cat /containers/0/freezer.state | 77 | # cat /sys/fs/cgroup/freezer/0/freezer.state |
78 | FROZEN | 78 | FROZEN |
79 | 79 | ||
80 | to unfreeze all tasks in the container : | 80 | to unfreeze all tasks in the container : |
81 | 81 | ||
82 | # echo THAWED > /containers/0/freezer.state | 82 | # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state |
83 | # cat /containers/0/freezer.state | 83 | # cat /sys/fs/cgroup/freezer/0/freezer.state |
84 | THAWED | 84 | THAWED |
85 | 85 | ||
86 | This is the basic mechanism which should do the right thing for user space task | 86 | This is the basic mechanism which should do the right thing for user space task |
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 7c163477fcd8..06eb6d957c83 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -1,8 +1,8 @@ | |||
1 | Memory Resource Controller | 1 | Memory Resource Controller |
2 | 2 | ||
3 | NOTE: The Memory Resource Controller has been generically been referred | 3 | NOTE: The Memory Resource Controller has generically been referred to as the |
4 | to as the memory controller in this document. Do not confuse memory | 4 | memory controller in this document. Do not confuse memory controller |
5 | controller used here with the memory controller that is used in hardware. | 5 | used here with the memory controller that is used in hardware. |
6 | 6 | ||
7 | (For editors) | 7 | (For editors) |
8 | In this document: | 8 | In this document: |
@@ -70,6 +70,7 @@ Brief summary of control files. | |||
70 | (See sysctl's vm.swappiness) | 70 | (See sysctl's vm.swappiness) |
71 | memory.move_charge_at_immigrate # set/show controls of moving charges | 71 | memory.move_charge_at_immigrate # set/show controls of moving charges |
72 | memory.oom_control # set/show oom controls. | 72 | memory.oom_control # set/show oom controls. |
73 | memory.numa_stat # show the number of memory usage per numa node | ||
73 | 74 | ||
74 | 1. History | 75 | 1. History |
75 | 76 | ||
@@ -181,7 +182,7 @@ behind this approach is that a cgroup that aggressively uses a shared | |||
181 | page will eventually get charged for it (once it is uncharged from | 182 | page will eventually get charged for it (once it is uncharged from |
182 | the cgroup that brought it in -- this will happen on memory pressure). | 183 | the cgroup that brought it in -- this will happen on memory pressure). |
183 | 184 | ||
184 | Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. | 185 | Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. |
185 | When you do swapoff and make swapped-out pages of shmem(tmpfs) to | 186 | When you do swapoff and make swapped-out pages of shmem(tmpfs) to |
186 | be backed into memory in force, charges for pages are accounted against the | 187 | be backed into memory in force, charges for pages are accounted against the |
187 | caller of swapoff rather than the users of shmem. | 188 | caller of swapoff rather than the users of shmem. |
@@ -213,7 +214,7 @@ affecting global LRU, memory+swap limit is better than just limiting swap from | |||
213 | OS point of view. | 214 | OS point of view. |
214 | 215 | ||
215 | * What happens when a cgroup hits memory.memsw.limit_in_bytes | 216 | * What happens when a cgroup hits memory.memsw.limit_in_bytes |
216 | When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out | 217 | When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out |
217 | in this cgroup. Then, swap-out will not be done by cgroup routine and file | 218 | in this cgroup. Then, swap-out will not be done by cgroup routine and file |
218 | caches are dropped. But as mentioned above, global LRU can do swapout memory | 219 | caches are dropped. But as mentioned above, global LRU can do swapout memory |
219 | from it for sanity of the system's memory management state. You can't forbid | 220 | from it for sanity of the system's memory management state. You can't forbid |
@@ -263,16 +264,17 @@ b. Enable CONFIG_RESOURCE_COUNTERS | |||
263 | c. Enable CONFIG_CGROUP_MEM_RES_CTLR | 264 | c. Enable CONFIG_CGROUP_MEM_RES_CTLR |
264 | d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension) | 265 | d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension) |
265 | 266 | ||
266 | 1. Prepare the cgroups | 267 | 1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) |
267 | # mkdir -p /cgroups | 268 | # mount -t tmpfs none /sys/fs/cgroup |
268 | # mount -t cgroup none /cgroups -o memory | 269 | # mkdir /sys/fs/cgroup/memory |
270 | # mount -t cgroup none /sys/fs/cgroup/memory -o memory | ||
269 | 271 | ||
270 | 2. Make the new group and move bash into it | 272 | 2. Make the new group and move bash into it |
271 | # mkdir /cgroups/0 | 273 | # mkdir /sys/fs/cgroup/memory/0 |
272 | # echo $$ > /cgroups/0/tasks | 274 | # echo $$ > /sys/fs/cgroup/memory/0/tasks |
273 | 275 | ||
274 | Since now we're in the 0 cgroup, we can alter the memory limit: | 276 | Since now we're in the 0 cgroup, we can alter the memory limit: |
275 | # echo 4M > /cgroups/0/memory.limit_in_bytes | 277 | # echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes |
276 | 278 | ||
277 | NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, | 279 | NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, |
278 | mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) | 280 | mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) |
@@ -280,11 +282,11 @@ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) | |||
280 | NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). | 282 | NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). |
281 | NOTE: We cannot set limits on the root cgroup any more. | 283 | NOTE: We cannot set limits on the root cgroup any more. |
282 | 284 | ||
283 | # cat /cgroups/0/memory.limit_in_bytes | 285 | # cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes |
284 | 4194304 | 286 | 4194304 |
285 | 287 | ||
286 | We can check the usage: | 288 | We can check the usage: |
287 | # cat /cgroups/0/memory.usage_in_bytes | 289 | # cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes |
288 | 1216512 | 290 | 1216512 |
289 | 291 | ||
290 | A successful write to this file does not guarantee a successful set of | 292 | A successful write to this file does not guarantee a successful set of |
@@ -464,6 +466,24 @@ value for efficient access. (Of course, when necessary, it's synchronized.) | |||
464 | If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP) | 466 | If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP) |
465 | value in memory.stat(see 5.2). | 467 | value in memory.stat(see 5.2). |
466 | 468 | ||
469 | 5.6 numa_stat | ||
470 | |||
471 | This is similar to numa_maps but operates on a per-memcg basis. This is | ||
472 | useful for providing visibility into the numa locality information within | ||
473 | an memcg since the pages are allowed to be allocated from any physical | ||
474 | node. One of the usecases is evaluating application performance by | ||
475 | combining this information with the application's cpu allocation. | ||
476 | |||
477 | We export "total", "file", "anon" and "unevictable" pages per-node for | ||
478 | each memcg. The ouput format of memory.numa_stat is: | ||
479 | |||
480 | total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ... | ||
481 | file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ... | ||
482 | anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ... | ||
483 | unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ... | ||
484 | |||
485 | And we have total = file + anon + unevictable. | ||
486 | |||
467 | 6. Hierarchy support | 487 | 6. Hierarchy support |
468 | 488 | ||
469 | The memory controller supports a deep hierarchy and hierarchical accounting. | 489 | The memory controller supports a deep hierarchy and hierarchical accounting. |
@@ -471,13 +491,13 @@ The hierarchy is created by creating the appropriate cgroups in the | |||
471 | cgroup filesystem. Consider for example, the following cgroup filesystem | 491 | cgroup filesystem. Consider for example, the following cgroup filesystem |
472 | hierarchy | 492 | hierarchy |
473 | 493 | ||
474 | root | 494 | root |
475 | / | \ | 495 | / | \ |
476 | / | \ | 496 | / | \ |
477 | a b c | 497 | a b c |
478 | | \ | 498 | | \ |
479 | | \ | 499 | | \ |
480 | d e | 500 | d e |
481 | 501 | ||
482 | In the diagram above, with hierarchical accounting enabled, all memory | 502 | In the diagram above, with hierarchical accounting enabled, all memory |
483 | usage of e, is accounted to its ancestors up until the root (i.e, c and root), | 503 | usage of e, is accounted to its ancestors up until the root (i.e, c and root), |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 1a9446b59153..b1c921c27519 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -481,23 +481,6 @@ Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> | |||
481 | 481 | ||
482 | ---------------------------- | 482 | ---------------------------- |
483 | 483 | ||
484 | What: namespace cgroup (ns_cgroup) | ||
485 | When: 2.6.38 | ||
486 | Why: The ns_cgroup leads to some problems: | ||
487 | * cgroup creation is out-of-control | ||
488 | * cgroup name can conflict when pids are looping | ||
489 | * it is not possible to have a single process handling | ||
490 | a lot of namespaces without falling in a exponential creation time | ||
491 | * we may want to create a namespace without creating a cgroup | ||
492 | |||
493 | The ns_cgroup is replaced by a compatibility flag 'clone_children', | ||
494 | where a newly created cgroup will copy the parent cgroup values. | ||
495 | The userspace has to manually create a cgroup and add a task to | ||
496 | the 'tasks' file. | ||
497 | Who: Daniel Lezcano <daniel.lezcano@free.fr> | ||
498 | |||
499 | ---------------------------- | ||
500 | |||
501 | What: iwlwifi disable_hw_scan module parameters | 484 | What: iwlwifi disable_hw_scan module parameters |
502 | When: 2.6.40 | 485 | When: 2.6.40 |
503 | Why: Hareware scan is the prefer method for iwlwifi devices for | 486 | Why: Hareware scan is the prefer method for iwlwifi devices for |
@@ -600,3 +583,25 @@ Why: Superseded by the UVCIOC_CTRL_QUERY ioctl. | |||
600 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> | 583 | Who: Laurent Pinchart <laurent.pinchart@ideasonboard.com> |
601 | 584 | ||
602 | ---------------------------- | 585 | ---------------------------- |
586 | |||
587 | What: For VIDIOC_S_FREQUENCY the type field must match the device node's type. | ||
588 | If not, return -EINVAL. | ||
589 | When: 3.2 | ||
590 | Why: It makes no sense to switch the tuner to radio mode by calling | ||
591 | VIDIOC_S_FREQUENCY on a video node, or to switch the tuner to tv mode by | ||
592 | calling VIDIOC_S_FREQUENCY on a radio node. This is the first step of a | ||
593 | move to more consistent handling of tv and radio tuners. | ||
594 | Who: Hans Verkuil <hans.verkuil@cisco.com> | ||
595 | |||
596 | ---------------------------- | ||
597 | |||
598 | What: Opening a radio device node will no longer automatically switch the | ||
599 | tuner mode from tv to radio. | ||
600 | When: 3.3 | ||
601 | Why: Just opening a V4L device should not change the state of the hardware | ||
602 | like that. It's very unexpected and against the V4L spec. Instead, you | ||
603 | switch to radio mode by calling VIDIOC_S_FREQUENCY. This is the second | ||
604 | and last step of the move to consistent handling of tv and radio tuners. | ||
605 | Who: Hans Verkuil <hans.verkuil@cisco.com> | ||
606 | |||
607 | ---------------------------- | ||
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index a167ab876c35..7cc6bf2871eb 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt | |||
@@ -673,6 +673,22 @@ storage request to complete, or it may attempt to cancel the storage request - | |||
673 | in which case the page will not be stored in the cache this time. | 673 | in which case the page will not be stored in the cache this time. |
674 | 674 | ||
675 | 675 | ||
676 | BULK INODE PAGE UNCACHE | ||
677 | ----------------------- | ||
678 | |||
679 | A convenience routine is provided to perform an uncache on all the pages | ||
680 | attached to an inode. This assumes that the pages on the inode correspond on a | ||
681 | 1:1 basis with the pages in the cache. | ||
682 | |||
683 | void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, | ||
684 | struct inode *inode); | ||
685 | |||
686 | This takes the netfs cookie that the pages were cached with and the inode that | ||
687 | the pages are attached to. This function will wait for pages to finish being | ||
688 | written to the cache and for the cache to finish with the page generally. No | ||
689 | error is returned. | ||
690 | |||
691 | |||
676 | ========================== | 692 | ========================== |
677 | INDEX AND DATA FILE UPDATE | 693 | INDEX AND DATA FILE UPDATE |
678 | ========================== | 694 | ========================== |
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index d5c0cef38a71..873a2ab2e9f8 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt | |||
@@ -40,7 +40,6 @@ Features which NILFS2 does not support yet: | |||
40 | - POSIX ACLs | 40 | - POSIX ACLs |
41 | - quotas | 41 | - quotas |
42 | - fsck | 42 | - fsck |
43 | - resize | ||
44 | - defragmentation | 43 | - defragmentation |
45 | 44 | ||
46 | Mount options | 45 | Mount options |
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f48178024067..db3b1aba32a3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -843,6 +843,7 @@ Provides counts of softirq handlers serviced since boot time, for each cpu. | |||
843 | TASKLET: 0 0 0 290 | 843 | TASKLET: 0 0 0 290 |
844 | SCHED: 27035 26983 26971 26746 | 844 | SCHED: 27035 26983 26971 26746 |
845 | HRTIMER: 0 0 0 0 | 845 | HRTIMER: 0 0 0 0 |
846 | RCU: 1678 1769 2178 2250 | ||
846 | 847 | ||
847 | 848 | ||
848 | 1.3 IDE devices in /proc/ide | 849 | 1.3 IDE devices in /proc/ide |
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg index 84d2623810f3..de91c0db5846 100644 --- a/Documentation/hwmon/f71882fg +++ b/Documentation/hwmon/f71882fg | |||
@@ -22,6 +22,10 @@ Supported chips: | |||
22 | Prefix: 'f71869' | 22 | Prefix: 'f71869' |
23 | Addresses scanned: none, address read from Super I/O config space | 23 | Addresses scanned: none, address read from Super I/O config space |
24 | Datasheet: Available from the Fintek website | 24 | Datasheet: Available from the Fintek website |
25 | * Fintek F71869A | ||
26 | Prefix: 'f71869a' | ||
27 | Addresses scanned: none, address read from Super I/O config space | ||
28 | Datasheet: Not public | ||
25 | * Fintek F71882FG and F71883FG | 29 | * Fintek F71882FG and F71883FG |
26 | Prefix: 'f71882fg' | 30 | Prefix: 'f71882fg' |
27 | Addresses scanned: none, address read from Super I/O config space | 31 | Addresses scanned: none, address read from Super I/O config space |
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp index 0393c89277c0..a10f73624ad3 100644 --- a/Documentation/hwmon/k10temp +++ b/Documentation/hwmon/k10temp | |||
@@ -9,8 +9,8 @@ Supported chips: | |||
9 | Socket S1G3: Athlon II, Sempron, Turion II | 9 | Socket S1G3: Athlon II, Sempron, Turion II |
10 | * AMD Family 11h processors: | 10 | * AMD Family 11h processors: |
11 | Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra) | 11 | Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra) |
12 | * AMD Family 12h processors: "Llano" | 12 | * AMD Family 12h processors: "Llano" (E2/A4/A6/A8-Series) |
13 | * AMD Family 14h processors: "Brazos" (C/E/G-Series) | 13 | * AMD Family 14h processors: "Brazos" (C/E/G/Z-Series) |
14 | * AMD Family 15h processors: "Bulldozer" | 14 | * AMD Family 15h processors: "Bulldozer" |
15 | 15 | ||
16 | Prefix: 'k10temp' | 16 | Prefix: 'k10temp' |
@@ -20,12 +20,16 @@ Supported chips: | |||
20 | http://support.amd.com/us/Processor_TechDocs/31116.pdf | 20 | http://support.amd.com/us/Processor_TechDocs/31116.pdf |
21 | BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors: | 21 | BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors: |
22 | http://support.amd.com/us/Processor_TechDocs/41256.pdf | 22 | http://support.amd.com/us/Processor_TechDocs/41256.pdf |
23 | BIOS and Kernel Developer's Guide (BKDG) for AMD Family 12h Processors: | ||
24 | http://support.amd.com/us/Processor_TechDocs/41131.pdf | ||
23 | BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors: | 25 | BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors: |
24 | http://support.amd.com/us/Processor_TechDocs/43170.pdf | 26 | http://support.amd.com/us/Processor_TechDocs/43170.pdf |
25 | Revision Guide for AMD Family 10h Processors: | 27 | Revision Guide for AMD Family 10h Processors: |
26 | http://support.amd.com/us/Processor_TechDocs/41322.pdf | 28 | http://support.amd.com/us/Processor_TechDocs/41322.pdf |
27 | Revision Guide for AMD Family 11h Processors: | 29 | Revision Guide for AMD Family 11h Processors: |
28 | http://support.amd.com/us/Processor_TechDocs/41788.pdf | 30 | http://support.amd.com/us/Processor_TechDocs/41788.pdf |
31 | Revision Guide for AMD Family 12h Processors: | ||
32 | http://support.amd.com/us/Processor_TechDocs/44739.pdf | ||
29 | Revision Guide for AMD Family 14h Models 00h-0Fh Processors: | 33 | Revision Guide for AMD Family 14h Models 00h-0Fh Processors: |
30 | http://support.amd.com/us/Processor_TechDocs/47534.pdf | 34 | http://support.amd.com/us/Processor_TechDocs/47534.pdf |
31 | AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks: | 35 | AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks: |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d9a203b058f1..aa47be71df4c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2015,6 +2015,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2015 | the default. | 2015 | the default. |
2016 | off: Turn ECRC off | 2016 | off: Turn ECRC off |
2017 | on: Turn ECRC on. | 2017 | on: Turn ECRC on. |
2018 | realloc reallocate PCI resources if allocations done by BIOS | ||
2019 | are erroneous. | ||
2018 | 2020 | ||
2019 | pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power | 2021 | pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power |
2020 | Management. | 2022 | Management. |
@@ -2598,6 +2600,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2598 | unlock ejectable media); | 2600 | unlock ejectable media); |
2599 | m = MAX_SECTORS_64 (don't transfer more | 2601 | m = MAX_SECTORS_64 (don't transfer more |
2600 | than 64 sectors = 32 KB at a time); | 2602 | than 64 sectors = 32 KB at a time); |
2603 | n = INITIAL_READ10 (force a retry of the | ||
2604 | initial READ(10) command); | ||
2601 | o = CAPACITY_OK (accept the capacity | 2605 | o = CAPACITY_OK (accept the capacity |
2602 | reported by the device); | 2606 | reported by the device); |
2603 | r = IGNORE_RESIDUE (the device reports | 2607 | r = IGNORE_RESIDUE (the device reports |
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index 090e6ee04536..51063e681ca4 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt | |||
@@ -11,7 +11,9 @@ with the difference that the orphan objects are not freed but only | |||
11 | reported via /sys/kernel/debug/kmemleak. A similar method is used by the | 11 | reported via /sys/kernel/debug/kmemleak. A similar method is used by the |
12 | Valgrind tool (memcheck --leak-check) to detect the memory leaks in | 12 | Valgrind tool (memcheck --leak-check) to detect the memory leaks in |
13 | user-space applications. | 13 | user-space applications. |
14 | Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze and tile. | 14 | |
15 | Please check DEBUG_KMEMLEAK dependencies in lib/Kconfig.debug for supported | ||
16 | architectures. | ||
15 | 17 | ||
16 | Usage | 18 | Usage |
17 | ----- | 19 | ----- |
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 1565eefd6fd5..61815483efa3 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt | |||
@@ -534,6 +534,8 @@ Events that are never propagated by the driver: | |||
534 | 0x2404 System is waking up from hibernation to undock | 534 | 0x2404 System is waking up from hibernation to undock |
535 | 0x2405 System is waking up from hibernation to eject bay | 535 | 0x2405 System is waking up from hibernation to eject bay |
536 | 0x5010 Brightness level changed/control event | 536 | 0x5010 Brightness level changed/control event |
537 | 0x6000 KEYBOARD: Numlock key pressed | ||
538 | 0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED) | ||
537 | 539 | ||
538 | Events that are propagated by the driver to userspace: | 540 | Events that are propagated by the driver to userspace: |
539 | 541 | ||
@@ -545,6 +547,8 @@ Events that are propagated by the driver to userspace: | |||
545 | 0x3006 Bay hotplug request (hint to power up SATA link when | 547 | 0x3006 Bay hotplug request (hint to power up SATA link when |
546 | the optical drive tray is ejected) | 548 | the optical drive tray is ejected) |
547 | 0x4003 Undocked (see 0x2x04), can sleep again | 549 | 0x4003 Undocked (see 0x2x04), can sleep again |
550 | 0x4010 Docked into hotplug port replicator (non-ACPI dock) | ||
551 | 0x4011 Undocked from hotplug port replicator (non-ACPI dock) | ||
548 | 0x500B Tablet pen inserted into its storage bay | 552 | 0x500B Tablet pen inserted into its storage bay |
549 | 0x500C Tablet pen removed from its storage bay | 553 | 0x500C Tablet pen removed from its storage bay |
550 | 0x6011 ALARM: battery is too hot | 554 | 0x6011 ALARM: battery is too hot |
@@ -552,6 +556,7 @@ Events that are propagated by the driver to userspace: | |||
552 | 0x6021 ALARM: a sensor is too hot | 556 | 0x6021 ALARM: a sensor is too hot |
553 | 0x6022 ALARM: a sensor is extremely hot | 557 | 0x6022 ALARM: a sensor is extremely hot |
554 | 0x6030 System thermal table changed | 558 | 0x6030 System thermal table changed |
559 | 0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED) | ||
555 | 560 | ||
556 | Battery nearly empty alarms are a last resort attempt to get the | 561 | Battery nearly empty alarms are a last resort attempt to get the |
557 | operating system to hibernate or shutdown cleanly (0x2313), or shutdown | 562 | operating system to hibernate or shutdown cleanly (0x2313), or shutdown |
diff --git a/Documentation/md.txt b/Documentation/md.txt index 2366b1c8cf19..f0eee83ff78a 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -555,7 +555,7 @@ also have | |||
555 | sync_min | 555 | sync_min |
556 | sync_max | 556 | sync_max |
557 | The two values, given as numbers of sectors, indicate a range | 557 | The two values, given as numbers of sectors, indicate a range |
558 | withing the array where 'check'/'repair' will operate. Must be | 558 | within the array where 'check'/'repair' will operate. Must be |
559 | a multiple of chunk_size. When it reaches "sync_max" it will | 559 | a multiple of chunk_size. When it reaches "sync_max" it will |
560 | pause, rather than complete. | 560 | pause, rather than complete. |
561 | You can use 'select' or 'poll' on "sync_completed" to wait for | 561 | You can use 'select' or 'poll' on "sync_completed" to wait for |
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d3d653a5f9b9..bfe924217f24 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -346,7 +346,7 @@ tcp_orphan_retries - INTEGER | |||
346 | when RTO retransmissions remain unacknowledged. | 346 | when RTO retransmissions remain unacknowledged. |
347 | See tcp_retries2 for more details. | 347 | See tcp_retries2 for more details. |
348 | 348 | ||
349 | The default value is 7. | 349 | The default value is 8. |
350 | If your machine is a loaded WEB server, | 350 | If your machine is a loaded WEB server, |
351 | you should think about lowering this value, such sockets | 351 | you should think about lowering this value, such sockets |
352 | may consume significant resources. Cf. tcp_max_orphans. | 352 | may consume significant resources. Cf. tcp_max_orphans. |
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 88880839ece4..64565aac6e40 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt | |||
@@ -520,59 +520,20 @@ Support for power domains is provided through the pwr_domain field of struct | |||
520 | device. This field is a pointer to an object of type struct dev_power_domain, | 520 | device. This field is a pointer to an object of type struct dev_power_domain, |
521 | defined in include/linux/pm.h, providing a set of power management callbacks | 521 | defined in include/linux/pm.h, providing a set of power management callbacks |
522 | analogous to the subsystem-level and device driver callbacks that are executed | 522 | analogous to the subsystem-level and device driver callbacks that are executed |
523 | for the given device during all power transitions, in addition to the respective | 523 | for the given device during all power transitions, instead of the respective |
524 | subsystem-level callbacks. Specifically, the power domain "suspend" callbacks | 524 | subsystem-level callbacks. Specifically, if a device's pm_domain pointer is |
525 | (i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are | 525 | not NULL, the ->suspend() callback from the object pointed to by it will be |
526 | executed after the analogous subsystem-level callbacks, while the power domain | 526 | executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and |
527 | "resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore, | 527 | anlogously for all of the remaining callbacks. In other words, power management |
528 | etc.) are executed before the analogous subsystem-level callbacks. Error codes | 528 | domain callbacks, if defined for the given device, always take precedence over |
529 | returned by the "suspend" and "resume" power domain callbacks are ignored. | 529 | the callbacks provided by the device's subsystem (e.g. bus type). |
530 | 530 | ||
531 | Power domain ->runtime_idle() callback is executed before the subsystem-level | 531 | The support for device power management domains is only relevant to platforms |
532 | ->runtime_idle() callback and the result returned by it is not ignored. Namely, | 532 | needing to use the same device driver power management callbacks in many |
533 | if it returns error code, the subsystem-level ->runtime_idle() callback will not | 533 | different power domain configurations and wanting to avoid incorporating the |
534 | be called and the helper function rpm_idle() executing it will return error | 534 | support for power domains into subsystem-level callbacks, for example by |
535 | code. This mechanism is intended to help platforms where saving device state | 535 | modifying the platform bus type. Other platforms need not implement it or take |
536 | is a time consuming operation and should only be carried out if all devices | 536 | it into account in any way. |
537 | in the power domain are idle, before turning off the shared power resource(s). | ||
538 | Namely, the power domain ->runtime_idle() callback may return error code until | ||
539 | the pm_runtime_idle() helper (or its asychronous version) has been called for | ||
540 | all devices in the power domain (it is recommended that the returned error code | ||
541 | be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle() | ||
542 | callback from being run prematurely. | ||
543 | |||
544 | The support for device power domains is only relevant to platforms needing to | ||
545 | use the same subsystem-level (e.g. platform bus type) and device driver power | ||
546 | management callbacks in many different power domain configurations and wanting | ||
547 | to avoid incorporating the support for power domains into the subsystem-level | ||
548 | callbacks. The other platforms need not implement it or take it into account | ||
549 | in any way. | ||
550 | |||
551 | |||
552 | System Devices | ||
553 | -------------- | ||
554 | System devices (sysdevs) follow a slightly different API, which can be found in | ||
555 | |||
556 | include/linux/sysdev.h | ||
557 | drivers/base/sys.c | ||
558 | |||
559 | System devices will be suspended with interrupts disabled, and after all other | ||
560 | devices have been suspended. On resume, they will be resumed before any other | ||
561 | devices, and also with interrupts disabled. These things occur in special | ||
562 | "sysdev_driver" phases, which affect only system devices. | ||
563 | |||
564 | Thus, after the suspend_noirq (or freeze_noirq or poweroff_noirq) phase, when | ||
565 | the non-boot CPUs are all offline and IRQs are disabled on the remaining online | ||
566 | CPU, then a sysdev_driver.suspend phase is carried out, and the system enters a | ||
567 | sleep state (or a system image is created). During resume (or after the image | ||
568 | has been created or loaded) a sysdev_driver.resume phase is carried out, IRQs | ||
569 | are enabled on the only online CPU, the non-boot CPUs are enabled, and the | ||
570 | resume_noirq (or thaw_noirq or restore_noirq) phase begins. | ||
571 | |||
572 | Code to actually enter and exit the system-wide low power state sometimes | ||
573 | involves hardware details that are only known to the boot firmware, and | ||
574 | may leave a CPU running software (from SRAM or flash memory) that monitors | ||
575 | the system and manages its wakeup sequence. | ||
576 | 537 | ||
577 | 538 | ||
578 | Device Low Power (suspend) States | 539 | Device Low Power (suspend) States |
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 654097b130b4..b24875b1ced5 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt | |||
@@ -501,13 +501,29 @@ helper functions described in Section 4. In that case, pm_runtime_resume() | |||
501 | should be used. Of course, for this purpose the device's run-time PM has to be | 501 | should be used. Of course, for this purpose the device's run-time PM has to be |
502 | enabled earlier by calling pm_runtime_enable(). | 502 | enabled earlier by calling pm_runtime_enable(). |
503 | 503 | ||
504 | If the device bus type's or driver's ->probe() or ->remove() callback runs | 504 | If the device bus type's or driver's ->probe() callback runs |
505 | pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts, | 505 | pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts, |
506 | they will fail returning -EAGAIN, because the device's usage counter is | 506 | they will fail returning -EAGAIN, because the device's usage counter is |
507 | incremented by the core before executing ->probe() and ->remove(). Still, it | 507 | incremented by the driver core before executing ->probe(). Still, it may be |
508 | may be desirable to suspend the device as soon as ->probe() or ->remove() has | 508 | desirable to suspend the device as soon as ->probe() has finished, so the driver |
509 | finished, so the PM core uses pm_runtime_idle_sync() to invoke the | 509 | core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for |
510 | subsystem-level idle callback for the device at that time. | 510 | the device at that time. |
511 | |||
512 | Moreover, the driver core prevents runtime PM callbacks from racing with the bus | ||
513 | notifier callback in __device_release_driver(), which is necessary, because the | ||
514 | notifier is used by some subsystems to carry out operations affecting the | ||
515 | runtime PM functionality. It does so by calling pm_runtime_get_sync() before | ||
516 | driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This | ||
517 | resumes the device if it's in the suspended state and prevents it from | ||
518 | being suspended again while those routines are being executed. | ||
519 | |||
520 | To allow bus types and drivers to put devices into the suspended state by | ||
521 | calling pm_runtime_suspend() from their ->remove() routines, the driver core | ||
522 | executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER | ||
523 | notifications in __device_release_driver(). This requires bus types and | ||
524 | drivers to make their ->remove() callbacks avoid races with runtime PM directly, | ||
525 | but also it allows of more flexibility in the handling of devices during the | ||
526 | removal of their drivers. | ||
511 | 527 | ||
512 | The user space can effectively disallow the driver of the device to power manage | 528 | The user space can effectively disallow the driver of the device to power manage |
513 | it at run time by changing the value of its /sys/devices/.../power/control | 529 | it at run time by changing the value of its /sys/devices/.../power/control |
@@ -566,11 +582,6 @@ to do this is: | |||
566 | pm_runtime_set_active(dev); | 582 | pm_runtime_set_active(dev); |
567 | pm_runtime_enable(dev); | 583 | pm_runtime_enable(dev); |
568 | 584 | ||
569 | The PM core always increments the run-time usage counter before calling the | ||
570 | ->prepare() callback and decrements it after calling the ->complete() callback. | ||
571 | Hence disabling run-time PM temporarily like this will not cause any run-time | ||
572 | suspend callbacks to be lost. | ||
573 | |||
574 | 7. Generic subsystem callbacks | 585 | 7. Generic subsystem callbacks |
575 | 586 | ||
576 | Subsystems may wish to conserve code space by using the set of generic power | 587 | Subsystems may wish to conserve code space by using the set of generic power |
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 1b5a5ddbc3ef..5df176ed59b8 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt | |||
@@ -9,7 +9,121 @@ If variable is of Type, use printk format specifier: | |||
9 | size_t %zu or %zx | 9 | size_t %zu or %zx |
10 | ssize_t %zd or %zx | 10 | ssize_t %zd or %zx |
11 | 11 | ||
12 | Raw pointer value SHOULD be printed with %p. | 12 | Raw pointer value SHOULD be printed with %p. The kernel supports |
13 | the following extended format specifiers for pointer types: | ||
14 | |||
15 | Symbols/Function Pointers: | ||
16 | |||
17 | %pF versatile_init+0x0/0x110 | ||
18 | %pf versatile_init | ||
19 | %pS versatile_init+0x0/0x110 | ||
20 | %ps versatile_init | ||
21 | %pB prev_fn_of_versatile_init+0x88/0x88 | ||
22 | |||
23 | For printing symbols and function pointers. The 'S' and 's' specifiers | ||
24 | result in the symbol name with ('S') or without ('s') offsets. Where | ||
25 | this is used on a kernel without KALLSYMS - the symbol address is | ||
26 | printed instead. | ||
27 | |||
28 | The 'B' specifier results in the symbol name with offsets and should be | ||
29 | used when printing stack backtraces. The specifier takes into | ||
30 | consideration the effect of compiler optimisations which may occur | ||
31 | when tail-call's are used and marked with the noreturn GCC attribute. | ||
32 | |||
33 | On ia64, ppc64 and parisc64 architectures function pointers are | ||
34 | actually function descriptors which must first be resolved. The 'F' and | ||
35 | 'f' specifiers perform this resolution and then provide the same | ||
36 | functionality as the 'S' and 's' specifiers. | ||
37 | |||
38 | Kernel Pointers: | ||
39 | |||
40 | %pK 0x01234567 or 0x0123456789abcdef | ||
41 | |||
42 | For printing kernel pointers which should be hidden from unprivileged | ||
43 | users. The behaviour of %pK depends on the kptr_restrict sysctl - see | ||
44 | Documentation/sysctl/kernel.txt for more details. | ||
45 | |||
46 | Struct Resources: | ||
47 | |||
48 | %pr [mem 0x60000000-0x6fffffff flags 0x2200] or | ||
49 | [mem 0x0000000060000000-0x000000006fffffff flags 0x2200] | ||
50 | %pR [mem 0x60000000-0x6fffffff pref] or | ||
51 | [mem 0x0000000060000000-0x000000006fffffff pref] | ||
52 | |||
53 | For printing struct resources. The 'R' and 'r' specifiers result in a | ||
54 | printed resource with ('R') or without ('r') a decoded flags member. | ||
55 | |||
56 | MAC/FDDI addresses: | ||
57 | |||
58 | %pM 00:01:02:03:04:05 | ||
59 | %pMF 00-01-02-03-04-05 | ||
60 | %pm 000102030405 | ||
61 | |||
62 | For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm' | ||
63 | specifiers result in a printed address with ('M') or without ('m') byte | ||
64 | separators. The default byte separator is the colon (':'). | ||
65 | |||
66 | Where FDDI addresses are concerned the 'F' specifier can be used after | ||
67 | the 'M' specifier to use dash ('-') separators instead of the default | ||
68 | separator. | ||
69 | |||
70 | IPv4 addresses: | ||
71 | |||
72 | %pI4 1.2.3.4 | ||
73 | %pi4 001.002.003.004 | ||
74 | %p[Ii][hnbl] | ||
75 | |||
76 | For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4' | ||
77 | specifiers result in a printed address with ('i4') or without ('I4') | ||
78 | leading zeros. | ||
79 | |||
80 | The additional 'h', 'n', 'b', and 'l' specifiers are used to specify | ||
81 | host, network, big or little endian order addresses respectively. Where | ||
82 | no specifier is provided the default network/big endian order is used. | ||
83 | |||
84 | IPv6 addresses: | ||
85 | |||
86 | %pI6 0001:0002:0003:0004:0005:0006:0007:0008 | ||
87 | %pi6 00010002000300040005000600070008 | ||
88 | %pI6c 1:2:3:4:5:6:7:8 | ||
89 | |||
90 | For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6' | ||
91 | specifiers result in a printed address with ('I6') or without ('i6') | ||
92 | colon-separators. Leading zeros are always used. | ||
93 | |||
94 | The additional 'c' specifier can be used with the 'I' specifier to | ||
95 | print a compressed IPv6 address as described by | ||
96 | http://tools.ietf.org/html/rfc5952 | ||
97 | |||
98 | UUID/GUID addresses: | ||
99 | |||
100 | %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f | ||
101 | %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F | ||
102 | %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f | ||
103 | %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F | ||
104 | |||
105 | For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L', | ||
106 | 'b' and 'B' specifiers are used to specify a little endian order in | ||
107 | lower ('l') or upper case ('L') hex characters - and big endian order | ||
108 | in lower ('b') or upper case ('B') hex characters. | ||
109 | |||
110 | Where no additional specifiers are used the default little endian | ||
111 | order with lower case hex characters will be printed. | ||
112 | |||
113 | struct va_format: | ||
114 | |||
115 | %pV | ||
116 | |||
117 | For printing struct va_format structures. These contain a format string | ||
118 | and va_list as follows: | ||
119 | |||
120 | struct va_format { | ||
121 | const char *fmt; | ||
122 | va_list *va; | ||
123 | }; | ||
124 | |||
125 | Do not use this feature without some mechanism to verify the | ||
126 | correctness of the format string and va_list arguments. | ||
13 | 127 | ||
14 | u64 SHOULD be printed with %llu/%llx, (unsigned long long): | 128 | u64 SHOULD be printed with %llu/%llx, (unsigned long long): |
15 | 129 | ||
@@ -32,4 +146,5 @@ Reminder: sizeof() result is of type size_t. | |||
32 | Thank you for your cooperation and attention. | 146 | Thank you for your cooperation and attention. |
33 | 147 | ||
34 | 148 | ||
35 | By Randy Dunlap <rdunlap@xenotime.net> | 149 | By Randy Dunlap <rdunlap@xenotime.net> and |
150 | Andrew Murray <amurray@mpc-data.co.uk> | ||
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 99961993257a..91ecff07cede 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt | |||
@@ -223,9 +223,10 @@ When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each | |||
223 | group created using the pseudo filesystem. See example steps below to create | 223 | group created using the pseudo filesystem. See example steps below to create |
224 | task groups and modify their CPU share using the "cgroups" pseudo filesystem. | 224 | task groups and modify their CPU share using the "cgroups" pseudo filesystem. |
225 | 225 | ||
226 | # mkdir /dev/cpuctl | 226 | # mount -t tmpfs cgroup_root /sys/fs/cgroup |
227 | # mount -t cgroup -ocpu none /dev/cpuctl | 227 | # mkdir /sys/fs/cgroup/cpu |
228 | # cd /dev/cpuctl | 228 | # mount -t cgroup -ocpu none /sys/fs/cgroup/cpu |
229 | # cd /sys/fs/cgroup/cpu | ||
229 | 230 | ||
230 | # mkdir multimedia # create "multimedia" group of tasks | 231 | # mkdir multimedia # create "multimedia" group of tasks |
231 | # mkdir browser # create "browser" group of tasks | 232 | # mkdir browser # create "browser" group of tasks |
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 605b0d40329d..71b54d549987 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt | |||
@@ -129,9 +129,8 @@ priority! | |||
129 | Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real | 129 | Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real |
130 | CPU bandwidth to task groups. | 130 | CPU bandwidth to task groups. |
131 | 131 | ||
132 | This uses the /cgroup virtual file system and | 132 | This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us" |
133 | "/cgroup/<cgroup>/cpu.rt_runtime_us" to control the CPU time reserved for each | 133 | to control the CPU time reserved for each control group. |
134 | control group. | ||
135 | 134 | ||
136 | For more information on working with control groups, you should read | 135 | For more information on working with control groups, you should read |
137 | Documentation/cgroups/cgroups.txt as well. | 136 | Documentation/cgroups/cgroups.txt as well. |
@@ -150,7 +149,7 @@ For now, this can be simplified to just the following (but see Future plans): | |||
150 | =============== | 149 | =============== |
151 | 150 | ||
152 | There is work in progress to make the scheduling period for each group | 151 | There is work in progress to make the scheduling period for each group |
153 | ("/cgroup/<cgroup>/cpu.rt_period_us") configurable as well. | 152 | ("<cgroup>/cpu.rt_period_us") configurable as well. |
154 | 153 | ||
155 | The constraint on the period is that a subgroup must have a smaller or | 154 | The constraint on the period is that a subgroup must have a smaller or |
156 | equal period to its parent. But realistically its not very useful _yet_ | 155 | equal period to its parent. But realistically its not very useful _yet_ |
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt index 2e3c64b1a6a5..9dbe885ecd8d 100644 --- a/Documentation/spinlocks.txt +++ b/Documentation/spinlocks.txt | |||
@@ -13,18 +13,8 @@ static DEFINE_SPINLOCK(xxx_lock); | |||
13 | The above is always safe. It will disable interrupts _locally_, but the | 13 | The above is always safe. It will disable interrupts _locally_, but the |
14 | spinlock itself will guarantee the global lock, so it will guarantee that | 14 | spinlock itself will guarantee the global lock, so it will guarantee that |
15 | there is only one thread-of-control within the region(s) protected by that | 15 | there is only one thread-of-control within the region(s) protected by that |
16 | lock. This works well even under UP. The above sequence under UP | 16 | lock. This works well even under UP also, so the code does _not_ need to |
17 | essentially is just the same as doing | 17 | worry about UP vs SMP issues: the spinlocks work correctly under both. |
18 | |||
19 | unsigned long flags; | ||
20 | |||
21 | save_flags(flags); cli(); | ||
22 | ... critical section ... | ||
23 | restore_flags(flags); | ||
24 | |||
25 | so the code does _not_ need to worry about UP vs SMP issues: the spinlocks | ||
26 | work correctly under both (and spinlocks are actually more efficient on | ||
27 | architectures that allow doing the "save_flags + cli" in one operation). | ||
28 | 18 | ||
29 | NOTE! Implications of spin_locks for memory are further described in: | 19 | NOTE! Implications of spin_locks for memory are further described in: |
30 | 20 | ||
@@ -36,27 +26,7 @@ The above is usually pretty simple (you usually need and want only one | |||
36 | spinlock for most things - using more than one spinlock can make things a | 26 | spinlock for most things - using more than one spinlock can make things a |
37 | lot more complex and even slower and is usually worth it only for | 27 | lot more complex and even slower and is usually worth it only for |
38 | sequences that you _know_ need to be split up: avoid it at all cost if you | 28 | sequences that you _know_ need to be split up: avoid it at all cost if you |
39 | aren't sure). HOWEVER, it _does_ mean that if you have some code that does | 29 | aren't sure). |
40 | |||
41 | cli(); | ||
42 | .. critical section .. | ||
43 | sti(); | ||
44 | |||
45 | and another sequence that does | ||
46 | |||
47 | spin_lock_irqsave(flags); | ||
48 | .. critical section .. | ||
49 | spin_unlock_irqrestore(flags); | ||
50 | |||
51 | then they are NOT mutually exclusive, and the critical regions can happen | ||
52 | at the same time on two different CPU's. That's fine per se, but the | ||
53 | critical regions had better be critical for different things (ie they | ||
54 | can't stomp on each other). | ||
55 | |||
56 | The above is a problem mainly if you end up mixing code - for example the | ||
57 | routines in ll_rw_block() tend to use cli/sti to protect the atomicity of | ||
58 | their actions, and if a driver uses spinlocks instead then you should | ||
59 | think about issues like the above. | ||
60 | 30 | ||
61 | This is really the only really hard part about spinlocks: once you start | 31 | This is really the only really hard part about spinlocks: once you start |
62 | using spinlocks they tend to expand to areas you might not have noticed | 32 | using spinlocks they tend to expand to areas you might not have noticed |
@@ -120,11 +90,10 @@ Lesson 3: spinlocks revisited. | |||
120 | 90 | ||
121 | The single spin-lock primitives above are by no means the only ones. They | 91 | The single spin-lock primitives above are by no means the only ones. They |
122 | are the most safe ones, and the ones that work under all circumstances, | 92 | are the most safe ones, and the ones that work under all circumstances, |
123 | but partly _because_ they are safe they are also fairly slow. They are | 93 | but partly _because_ they are safe they are also fairly slow. They are slower |
124 | much faster than a generic global cli/sti pair, but slower than they'd | 94 | than they'd need to be, because they do have to disable interrupts |
125 | need to be, because they do have to disable interrupts (which is just a | 95 | (which is just a single instruction on a x86, but it's an expensive one - |
126 | single instruction on a x86, but it's an expensive one - and on other | 96 | and on other architectures it can be worse). |
127 | architectures it can be worse). | ||
128 | 97 | ||
129 | If you have a case where you have to protect a data structure across | 98 | If you have a case where you have to protect a data structure across |
130 | several CPU's and you want to use spinlocks you can potentially use | 99 | several CPU's and you want to use spinlocks you can potentially use |
diff --git a/Documentation/usb/error-codes.txt b/Documentation/usb/error-codes.txt index d83703ea74b2..b3f606b81a03 100644 --- a/Documentation/usb/error-codes.txt +++ b/Documentation/usb/error-codes.txt | |||
@@ -76,6 +76,13 @@ A transfer's actual_length may be positive even when an error has been | |||
76 | reported. That's because transfers often involve several packets, so that | 76 | reported. That's because transfers often involve several packets, so that |
77 | one or more packets could finish before an error stops further endpoint I/O. | 77 | one or more packets could finish before an error stops further endpoint I/O. |
78 | 78 | ||
79 | For isochronous URBs, the urb status value is non-zero only if the URB is | ||
80 | unlinked, the device is removed, the host controller is disabled, or the total | ||
81 | transferred length is less than the requested length and the URB_SHORT_NOT_OK | ||
82 | flag is set. Completion handlers for isochronous URBs should only see | ||
83 | urb->status set to zero, -ENOENT, -ECONNRESET, -ESHUTDOWN, or -EREMOTEIO. | ||
84 | Individual frame descriptor status fields may report more status codes. | ||
85 | |||
79 | 86 | ||
80 | 0 Transfer completed successfully | 87 | 0 Transfer completed successfully |
81 | 88 | ||
@@ -132,7 +139,7 @@ one or more packets could finish before an error stops further endpoint I/O. | |||
132 | device removal events immediately. | 139 | device removal events immediately. |
133 | 140 | ||
134 | -EXDEV ISO transfer only partially completed | 141 | -EXDEV ISO transfer only partially completed |
135 | look at individual frame status for details | 142 | (only set in iso_frame_desc[n].status, not urb->status) |
136 | 143 | ||
137 | -EINVAL ISO madness, if this happens: Log off and go home | 144 | -EINVAL ISO madness, if this happens: Log off and go home |
138 | 145 | ||
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt index 12f9ba20ccb7..550068466605 100644 --- a/Documentation/vm/hwpoison.txt +++ b/Documentation/vm/hwpoison.txt | |||
@@ -129,12 +129,12 @@ Limit injection to pages owned by memgroup. Specified by inode number | |||
129 | of the memcg. | 129 | of the memcg. |
130 | 130 | ||
131 | Example: | 131 | Example: |
132 | mkdir /cgroup/hwpoison | 132 | mkdir /sys/fs/cgroup/mem/hwpoison |
133 | 133 | ||
134 | usemem -m 100 -s 1000 & | 134 | usemem -m 100 -s 1000 & |
135 | echo `jobs -p` > /cgroup/hwpoison/tasks | 135 | echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks |
136 | 136 | ||
137 | memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ') | 137 | memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ') |
138 | echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg | 138 | echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg |
139 | 139 | ||
140 | page-types -p `pidof init` --hwpoison # shall do nothing | 140 | page-types -p `pidof init` --hwpoison # shall do nothing |
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 9b7221a86df2..7c3a8801b7ce 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt | |||
@@ -674,7 +674,7 @@ Protocol: 2.10+ | |||
674 | 674 | ||
675 | Field name: init_size | 675 | Field name: init_size |
676 | Type: read | 676 | Type: read |
677 | Offset/size: 0x25c/4 | 677 | Offset/size: 0x260/4 |
678 | 678 | ||
679 | This field indicates the amount of linear contiguous memory starting | 679 | This field indicates the amount of linear contiguous memory starting |
680 | at the kernel runtime start address that the kernel needs before it | 680 | at the kernel runtime start address that the kernel needs before it |