diff options
253 files changed, 14531 insertions, 4855 deletions
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c index 1d2c010bae12..e7823ffb1ca0 100644 --- a/Documentation/auxdisplay/cfag12864b-example.c +++ b/Documentation/auxdisplay/cfag12864b-example.c | |||
@@ -194,7 +194,6 @@ static void cfag12864b_blit(void) | |||
194 | */ | 194 | */ |
195 | 195 | ||
196 | #include <stdio.h> | 196 | #include <stdio.h> |
197 | #include <string.h> | ||
198 | 197 | ||
199 | #define EXAMPLES 6 | 198 | #define EXAMPLES 6 |
200 | 199 | ||
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 6eb1a97e88ce..455d4e6d346d 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0: | |||
408 | 408 | ||
409 | # echo 0 > tasks | 409 | # echo 0 > tasks |
410 | 410 | ||
411 | 2.3 Mounting hierarchies by name | ||
412 | -------------------------------- | ||
413 | |||
414 | Passing the name=<x> option when mounting a cgroups hierarchy | ||
415 | associates the given name with the hierarchy. This can be used when | ||
416 | mounting a pre-existing hierarchy, in order to refer to it by name | ||
417 | rather than by its set of active subsystems. Each hierarchy is either | ||
418 | nameless, or has a unique name. | ||
419 | |||
420 | The name should match [\w.-]+ | ||
421 | |||
422 | When passing a name=<x> option for a new hierarchy, you need to | ||
423 | specify subsystems manually; the legacy behaviour of mounting all | ||
424 | subsystems when none are explicitly specified is not supported when | ||
425 | you give a subsystem a name. | ||
426 | |||
427 | The name of the subsystem appears as part of the hierarchy description | ||
428 | in /proc/mounts and /proc/<pid>/cgroups. | ||
429 | |||
430 | |||
411 | 3. Kernel API | 431 | 3. Kernel API |
412 | ============= | 432 | ============= |
413 | 433 | ||
@@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be | |||
501 | called multiple times against a cgroup. | 521 | called multiple times against a cgroup. |
502 | 522 | ||
503 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 523 | int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
504 | struct task_struct *task) | 524 | struct task_struct *task, bool threadgroup) |
505 | (cgroup_mutex held by caller) | 525 | (cgroup_mutex held by caller) |
506 | 526 | ||
507 | Called prior to moving a task into a cgroup; if the subsystem | 527 | Called prior to moving a task into a cgroup; if the subsystem |
@@ -509,14 +529,20 @@ returns an error, this will abort the attach operation. If a NULL | |||
509 | task is passed, then a successful result indicates that *any* | 529 | task is passed, then a successful result indicates that *any* |
510 | unspecified task can be moved into the cgroup. Note that this isn't | 530 | unspecified task can be moved into the cgroup. Note that this isn't |
511 | called on a fork. If this method returns 0 (success) then this should | 531 | called on a fork. If this method returns 0 (success) then this should |
512 | remain valid while the caller holds cgroup_mutex. | 532 | remain valid while the caller holds cgroup_mutex. If threadgroup is |
533 | true, then a successful result indicates that all threads in the given | ||
534 | thread's threadgroup can be moved together. | ||
513 | 535 | ||
514 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 536 | void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
515 | struct cgroup *old_cgrp, struct task_struct *task) | 537 | struct cgroup *old_cgrp, struct task_struct *task, |
538 | bool threadgroup) | ||
516 | (cgroup_mutex held by caller) | 539 | (cgroup_mutex held by caller) |
517 | 540 | ||
518 | Called after the task has been attached to the cgroup, to allow any | 541 | Called after the task has been attached to the cgroup, to allow any |
519 | post-attachment activity that requires memory allocations or blocking. | 542 | post-attachment activity that requires memory allocations or blocking. |
543 | If threadgroup is true, the subsystem should take care of all threads | ||
544 | in the specified thread's threadgroup. Currently does not support any | ||
545 | subsystem that might need the old_cgrp for every thread in the group. | ||
520 | 546 | ||
521 | void fork(struct cgroup_subsy *ss, struct task_struct *task) | 547 | void fork(struct cgroup_subsy *ss, struct task_struct *task) |
522 | 548 | ||
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 23d1262c0775..b871f2552b45 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that | |||
179 | pages that are selected for reclaiming come from the per cgroup LRU | 179 | pages that are selected for reclaiming come from the per cgroup LRU |
180 | list. | 180 | list. |
181 | 181 | ||
182 | NOTE: Reclaim does not work for the root cgroup, since we cannot set any | ||
183 | limits on the root cgroup. | ||
184 | |||
182 | 2. Locking | 185 | 2. Locking |
183 | 186 | ||
184 | The memory controller uses the following hierarchy | 187 | The memory controller uses the following hierarchy |
@@ -210,6 +213,7 @@ We can alter the memory limit: | |||
210 | NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, | 213 | NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, |
211 | mega or gigabytes. | 214 | mega or gigabytes. |
212 | NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). | 215 | NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). |
216 | NOTE: We cannot set limits on the root cgroup any more. | ||
213 | 217 | ||
214 | # cat /cgroups/0/memory.limit_in_bytes | 218 | # cat /cgroups/0/memory.limit_in_bytes |
215 | 4194304 | 219 | 4194304 |
@@ -375,7 +379,42 @@ cgroups created below it. | |||
375 | 379 | ||
376 | NOTE2: This feature can be enabled/disabled per subtree. | 380 | NOTE2: This feature can be enabled/disabled per subtree. |
377 | 381 | ||
378 | 7. TODO | 382 | 7. Soft limits |
383 | |||
384 | Soft limits allow for greater sharing of memory. The idea behind soft limits | ||
385 | is to allow control groups to use as much of the memory as needed, provided | ||
386 | |||
387 | a. There is no memory contention | ||
388 | b. They do not exceed their hard limit | ||
389 | |||
390 | When the system detects memory contention or low memory control groups | ||
391 | are pushed back to their soft limits. If the soft limit of each control | ||
392 | group is very high, they are pushed back as much as possible to make | ||
393 | sure that one control group does not starve the others of memory. | ||
394 | |||
395 | Please note that soft limits is a best effort feature, it comes with | ||
396 | no guarantees, but it does its best to make sure that when memory is | ||
397 | heavily contended for, memory is allocated based on the soft limit | ||
398 | hints/setup. Currently soft limit based reclaim is setup such that | ||
399 | it gets invoked from balance_pgdat (kswapd). | ||
400 | |||
401 | 7.1 Interface | ||
402 | |||
403 | Soft limits can be setup by using the following commands (in this example we | ||
404 | assume a soft limit of 256 megabytes) | ||
405 | |||
406 | # echo 256M > memory.soft_limit_in_bytes | ||
407 | |||
408 | If we want to change this to 1G, we can at any time use | ||
409 | |||
410 | # echo 1G > memory.soft_limit_in_bytes | ||
411 | |||
412 | NOTE1: Soft limits take effect over a long period of time, since they involve | ||
413 | reclaiming memory for balancing between memory cgroups | ||
414 | NOTE2: It is recommended to set the soft limit always below the hard limit, | ||
415 | otherwise the hard limit will take precedence. | ||
416 | |||
417 | 8. TODO | ||
379 | 418 | ||
380 | 1. Add support for accounting huge pages (as a separate controller) | 419 | 1. Add support for accounting huge pages (as a separate controller) |
381 | 2. Make per-cgroup scanner reclaim not-shared pages first | 420 | 2. Make per-cgroup scanner reclaim not-shared pages first |
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt index 9f59fcbf5d82..ba046b8fa92f 100644 --- a/Documentation/crypto/async-tx-api.txt +++ b/Documentation/crypto/async-tx-api.txt | |||
@@ -54,20 +54,23 @@ features surfaced as a result: | |||
54 | 54 | ||
55 | 3.1 General format of the API: | 55 | 3.1 General format of the API: |
56 | struct dma_async_tx_descriptor * | 56 | struct dma_async_tx_descriptor * |
57 | async_<operation>(<op specific parameters>, | 57 | async_<operation>(<op specific parameters>, struct async_submit ctl *submit) |
58 | enum async_tx_flags flags, | ||
59 | struct dma_async_tx_descriptor *dependency, | ||
60 | dma_async_tx_callback callback_routine, | ||
61 | void *callback_parameter); | ||
62 | 58 | ||
63 | 3.2 Supported operations: | 59 | 3.2 Supported operations: |
64 | memcpy - memory copy between a source and a destination buffer | 60 | memcpy - memory copy between a source and a destination buffer |
65 | memset - fill a destination buffer with a byte value | 61 | memset - fill a destination buffer with a byte value |
66 | xor - xor a series of source buffers and write the result to a | 62 | xor - xor a series of source buffers and write the result to a |
67 | destination buffer | 63 | destination buffer |
68 | xor_zero_sum - xor a series of source buffers and set a flag if the | 64 | xor_val - xor a series of source buffers and set a flag if the |
69 | result is zero. The implementation attempts to prevent | 65 | result is zero. The implementation attempts to prevent |
70 | writes to memory | 66 | writes to memory |
67 | pq - generate the p+q (raid6 syndrome) from a series of source buffers | ||
68 | pq_val - validate that a p and or q buffer are in sync with a given series of | ||
69 | sources | ||
70 | datap - (raid6_datap_recov) recover a raid6 data block and the p block | ||
71 | from the given sources | ||
72 | 2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given | ||
73 | sources | ||
71 | 74 | ||
72 | 3.3 Descriptor management: | 75 | 3.3 Descriptor management: |
73 | The return value is non-NULL and points to a 'descriptor' when the operation | 76 | The return value is non-NULL and points to a 'descriptor' when the operation |
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to | |||
80 | recycle (or free) the descriptor. A descriptor can be acked by one of the | 83 | recycle (or free) the descriptor. A descriptor can be acked by one of the |
81 | following methods: | 84 | following methods: |
82 | 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted | 85 | 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted |
83 | 2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent | 86 | 2/ submitting an unacknowledged descriptor as a dependency to another |
84 | descriptor of a new operation. | 87 | async_tx call will implicitly set the acknowledged state. |
85 | 3/ calling async_tx_ack() on the descriptor. | 88 | 3/ calling async_tx_ack() on the descriptor. |
86 | 89 | ||
87 | 3.4 When does the operation execute? | 90 | 3.4 When does the operation execute? |
@@ -119,30 +122,42 @@ of an operation. | |||
119 | Perform a xor->copy->xor operation where each operation depends on the | 122 | Perform a xor->copy->xor operation where each operation depends on the |
120 | result from the previous operation: | 123 | result from the previous operation: |
121 | 124 | ||
122 | void complete_xor_copy_xor(void *param) | 125 | void callback(void *param) |
123 | { | 126 | { |
124 | printk("complete\n"); | 127 | struct completion *cmp = param; |
128 | |||
129 | complete(cmp); | ||
125 | } | 130 | } |
126 | 131 | ||
127 | int run_xor_copy_xor(struct page **xor_srcs, | 132 | void run_xor_copy_xor(struct page **xor_srcs, |
128 | int xor_src_cnt, | 133 | int xor_src_cnt, |
129 | struct page *xor_dest, | 134 | struct page *xor_dest, |
130 | size_t xor_len, | 135 | size_t xor_len, |
131 | struct page *copy_src, | 136 | struct page *copy_src, |
132 | struct page *copy_dest, | 137 | struct page *copy_dest, |
133 | size_t copy_len) | 138 | size_t copy_len) |
134 | { | 139 | { |
135 | struct dma_async_tx_descriptor *tx; | 140 | struct dma_async_tx_descriptor *tx; |
141 | addr_conv_t addr_conv[xor_src_cnt]; | ||
142 | struct async_submit_ctl submit; | ||
143 | addr_conv_t addr_conv[NDISKS]; | ||
144 | struct completion cmp; | ||
145 | |||
146 | init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL, | ||
147 | addr_conv); | ||
148 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit) | ||
136 | 149 | ||
137 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, | 150 | submit->depend_tx = tx; |
138 | ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL); | 151 | tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit); |
139 | tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, | 152 | |
140 | ASYNC_TX_DEP_ACK, tx, NULL, NULL); | 153 | init_completion(&cmp); |
141 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, | 154 | init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx, |
142 | ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, | 155 | callback, &cmp, addr_conv); |
143 | tx, complete_xor_copy_xor, NULL); | 156 | tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit); |
144 | 157 | ||
145 | async_tx_issue_pending_all(); | 158 | async_tx_issue_pending_all(); |
159 | |||
160 | wait_for_completion(&cmp); | ||
146 | } | 161 | } |
147 | 162 | ||
148 | See include/linux/async_tx.h for more information on the flags. See the | 163 | See include/linux/async_tx.h for more information on the flags. See the |
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 736540045dc7..23a181074f94 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt | |||
@@ -4,7 +4,7 @@ Shared Subtrees | |||
4 | Contents: | 4 | Contents: |
5 | 1) Overview | 5 | 1) Overview |
6 | 2) Features | 6 | 2) Features |
7 | 3) smount command | 7 | 3) Setting mount states |
8 | 4) Use-case | 8 | 4) Use-case |
9 | 5) Detailed semantics | 9 | 5) Detailed semantics |
10 | 6) Quiz | 10 | 6) Quiz |
@@ -41,14 +41,14 @@ replicas continue to be exactly same. | |||
41 | 41 | ||
42 | Here is an example: | 42 | Here is an example: |
43 | 43 | ||
44 | Lets say /mnt has a mount that is shared. | 44 | Let's say /mnt has a mount that is shared. |
45 | mount --make-shared /mnt | 45 | mount --make-shared /mnt |
46 | 46 | ||
47 | note: mount command does not yet support the --make-shared flag. | 47 | Note: mount(8) command now supports the --make-shared flag, |
48 | I have included a small C program which does the same by executing | 48 | so the sample 'smount' program is no longer needed and has been |
49 | 'smount /mnt shared' | 49 | removed. |
50 | 50 | ||
51 | #mount --bind /mnt /tmp | 51 | # mount --bind /mnt /tmp |
52 | The above command replicates the mount at /mnt to the mountpoint /tmp | 52 | The above command replicates the mount at /mnt to the mountpoint /tmp |
53 | and the contents of both the mounts remain identical. | 53 | and the contents of both the mounts remain identical. |
54 | 54 | ||
@@ -58,8 +58,8 @@ replicas continue to be exactly same. | |||
58 | #ls /tmp | 58 | #ls /tmp |
59 | a b c | 59 | a b c |
60 | 60 | ||
61 | Now lets say we mount a device at /tmp/a | 61 | Now let's say we mount a device at /tmp/a |
62 | #mount /dev/sd0 /tmp/a | 62 | # mount /dev/sd0 /tmp/a |
63 | 63 | ||
64 | #ls /tmp/a | 64 | #ls /tmp/a |
65 | t1 t2 t2 | 65 | t1 t2 t2 |
@@ -80,21 +80,20 @@ replicas continue to be exactly same. | |||
80 | 80 | ||
81 | Here is an example: | 81 | Here is an example: |
82 | 82 | ||
83 | Lets say /mnt has a mount which is shared. | 83 | Let's say /mnt has a mount which is shared. |
84 | #mount --make-shared /mnt | 84 | # mount --make-shared /mnt |
85 | 85 | ||
86 | Lets bind mount /mnt to /tmp | 86 | Let's bind mount /mnt to /tmp |
87 | #mount --bind /mnt /tmp | 87 | # mount --bind /mnt /tmp |
88 | 88 | ||
89 | the new mount at /tmp becomes a shared mount and it is a replica of | 89 | the new mount at /tmp becomes a shared mount and it is a replica of |
90 | the mount at /mnt. | 90 | the mount at /mnt. |
91 | 91 | ||
92 | Now lets make the mount at /tmp; a slave of /mnt | 92 | Now let's make the mount at /tmp; a slave of /mnt |
93 | #mount --make-slave /tmp | 93 | # mount --make-slave /tmp |
94 | [or smount /tmp slave] | ||
95 | 94 | ||
96 | lets mount /dev/sd0 on /mnt/a | 95 | let's mount /dev/sd0 on /mnt/a |
97 | #mount /dev/sd0 /mnt/a | 96 | # mount /dev/sd0 /mnt/a |
98 | 97 | ||
99 | #ls /mnt/a | 98 | #ls /mnt/a |
100 | t1 t2 t3 | 99 | t1 t2 t3 |
@@ -104,9 +103,9 @@ replicas continue to be exactly same. | |||
104 | 103 | ||
105 | Note the mount event has propagated to the mount at /tmp | 104 | Note the mount event has propagated to the mount at /tmp |
106 | 105 | ||
107 | However lets see what happens if we mount something on the mount at /tmp | 106 | However let's see what happens if we mount something on the mount at /tmp |
108 | 107 | ||
109 | #mount /dev/sd1 /tmp/b | 108 | # mount /dev/sd1 /tmp/b |
110 | 109 | ||
111 | #ls /tmp/b | 110 | #ls /tmp/b |
112 | s1 s2 s3 | 111 | s1 s2 s3 |
@@ -124,12 +123,11 @@ replicas continue to be exactly same. | |||
124 | 123 | ||
125 | 2d) A unbindable mount is a unbindable private mount | 124 | 2d) A unbindable mount is a unbindable private mount |
126 | 125 | ||
127 | lets say we have a mount at /mnt and we make is unbindable | 126 | let's say we have a mount at /mnt and we make is unbindable |
128 | 127 | ||
129 | #mount --make-unbindable /mnt | 128 | # mount --make-unbindable /mnt |
130 | [ smount /mnt unbindable ] | ||
131 | 129 | ||
132 | Lets try to bind mount this mount somewhere else. | 130 | Let's try to bind mount this mount somewhere else. |
133 | # mount --bind /mnt /tmp | 131 | # mount --bind /mnt /tmp |
134 | mount: wrong fs type, bad option, bad superblock on /mnt, | 132 | mount: wrong fs type, bad option, bad superblock on /mnt, |
135 | or too many mounted file systems | 133 | or too many mounted file systems |
@@ -137,149 +135,15 @@ replicas continue to be exactly same. | |||
137 | Binding a unbindable mount is a invalid operation. | 135 | Binding a unbindable mount is a invalid operation. |
138 | 136 | ||
139 | 137 | ||
140 | 3) smount command | 138 | 3) Setting mount states |
141 | 139 | ||
142 | Currently the mount command is not aware of shared subtree features. | 140 | The mount command (util-linux package) can be used to set mount |
143 | Work is in progress to add the support in mount ( util-linux package ). | 141 | states: |
144 | Till then use the following program. | ||
145 | 142 | ||
146 | ------------------------------------------------------------------------ | 143 | mount --make-shared mountpoint |
147 | // | 144 | mount --make-slave mountpoint |
148 | //this code was developed my Miklos Szeredi <miklos@szeredi.hu> | 145 | mount --make-private mountpoint |
149 | //and modified by Ram Pai <linuxram@us.ibm.com> | 146 | mount --make-unbindable mountpoint |
150 | // sample usage: | ||
151 | // smount /tmp shared | ||
152 | // | ||
153 | #include <stdio.h> | ||
154 | #include <stdlib.h> | ||
155 | #include <unistd.h> | ||
156 | #include <string.h> | ||
157 | #include <sys/mount.h> | ||
158 | #include <sys/fsuid.h> | ||
159 | |||
160 | #ifndef MS_REC | ||
161 | #define MS_REC 0x4000 /* 16384: Recursive loopback */ | ||
162 | #endif | ||
163 | |||
164 | #ifndef MS_SHARED | ||
165 | #define MS_SHARED 1<<20 /* Shared */ | ||
166 | #endif | ||
167 | |||
168 | #ifndef MS_PRIVATE | ||
169 | #define MS_PRIVATE 1<<18 /* Private */ | ||
170 | #endif | ||
171 | |||
172 | #ifndef MS_SLAVE | ||
173 | #define MS_SLAVE 1<<19 /* Slave */ | ||
174 | #endif | ||
175 | |||
176 | #ifndef MS_UNBINDABLE | ||
177 | #define MS_UNBINDABLE 1<<17 /* Unbindable */ | ||
178 | #endif | ||
179 | |||
180 | int main(int argc, char *argv[]) | ||
181 | { | ||
182 | int type; | ||
183 | if(argc != 3) { | ||
184 | fprintf(stderr, "usage: %s dir " | ||
185 | "<rshared|rslave|rprivate|runbindable|shared|slave" | ||
186 | "|private|unbindable>\n" , argv[0]); | ||
187 | return 1; | ||
188 | } | ||
189 | |||
190 | fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]); | ||
191 | |||
192 | if (strcmp(argv[2],"rshared")==0) | ||
193 | type=(MS_SHARED|MS_REC); | ||
194 | else if (strcmp(argv[2],"rslave")==0) | ||
195 | type=(MS_SLAVE|MS_REC); | ||
196 | else if (strcmp(argv[2],"rprivate")==0) | ||
197 | type=(MS_PRIVATE|MS_REC); | ||
198 | else if (strcmp(argv[2],"runbindable")==0) | ||
199 | type=(MS_UNBINDABLE|MS_REC); | ||
200 | else if (strcmp(argv[2],"shared")==0) | ||
201 | type=MS_SHARED; | ||
202 | else if (strcmp(argv[2],"slave")==0) | ||
203 | type=MS_SLAVE; | ||
204 | else if (strcmp(argv[2],"private")==0) | ||
205 | type=MS_PRIVATE; | ||
206 | else if (strcmp(argv[2],"unbindable")==0) | ||
207 | type=MS_UNBINDABLE; | ||
208 | else { | ||
209 | fprintf(stderr, "invalid operation: %s\n", argv[2]); | ||
210 | return 1; | ||
211 | } | ||
212 | setfsuid(getuid()); | ||
213 | |||
214 | if(mount("", argv[1], "dontcare", type, "") == -1) { | ||
215 | perror("mount"); | ||
216 | return 1; | ||
217 | } | ||
218 | return 0; | ||
219 | } | ||
220 | ----------------------------------------------------------------------- | ||
221 | |||
222 | Copy the above code snippet into smount.c | ||
223 | gcc -o smount smount.c | ||
224 | |||
225 | |||
226 | (i) To mark all the mounts under /mnt as shared execute the following | ||
227 | command: | ||
228 | |||
229 | smount /mnt rshared | ||
230 | the corresponding syntax planned for mount command is | ||
231 | mount --make-rshared /mnt | ||
232 | |||
233 | just to mark a mount /mnt as shared, execute the following | ||
234 | command: | ||
235 | smount /mnt shared | ||
236 | the corresponding syntax planned for mount command is | ||
237 | mount --make-shared /mnt | ||
238 | |||
239 | (ii) To mark all the shared mounts under /mnt as slave execute the | ||
240 | following | ||
241 | |||
242 | command: | ||
243 | smount /mnt rslave | ||
244 | the corresponding syntax planned for mount command is | ||
245 | mount --make-rslave /mnt | ||
246 | |||
247 | just to mark a mount /mnt as slave, execute the following | ||
248 | command: | ||
249 | smount /mnt slave | ||
250 | the corresponding syntax planned for mount command is | ||
251 | mount --make-slave /mnt | ||
252 | |||
253 | (iii) To mark all the mounts under /mnt as private execute the | ||
254 | following command: | ||
255 | |||
256 | smount /mnt rprivate | ||
257 | the corresponding syntax planned for mount command is | ||
258 | mount --make-rprivate /mnt | ||
259 | |||
260 | just to mark a mount /mnt as private, execute the following | ||
261 | command: | ||
262 | smount /mnt private | ||
263 | the corresponding syntax planned for mount command is | ||
264 | mount --make-private /mnt | ||
265 | |||
266 | NOTE: by default all the mounts are created as private. But if | ||
267 | you want to change some shared/slave/unbindable mount as | ||
268 | private at a later point in time, this command can help. | ||
269 | |||
270 | (iv) To mark all the mounts under /mnt as unbindable execute the | ||
271 | following | ||
272 | |||
273 | command: | ||
274 | smount /mnt runbindable | ||
275 | the corresponding syntax planned for mount command is | ||
276 | mount --make-runbindable /mnt | ||
277 | |||
278 | just to mark a mount /mnt as unbindable, execute the following | ||
279 | command: | ||
280 | smount /mnt unbindable | ||
281 | the corresponding syntax planned for mount command is | ||
282 | mount --make-unbindable /mnt | ||
283 | 147 | ||
284 | 148 | ||
285 | 4) Use cases | 149 | 4) Use cases |
@@ -350,7 +214,7 @@ replicas continue to be exactly same. | |||
350 | mount --rbind / /view/v3 | 214 | mount --rbind / /view/v3 |
351 | mount --rbind / /view/v4 | 215 | mount --rbind / /view/v4 |
352 | 216 | ||
353 | and if /usr has a versioning filesystem mounted, than that | 217 | and if /usr has a versioning filesystem mounted, then that |
354 | mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and | 218 | mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and |
355 | /view/v4/usr too | 219 | /view/v4/usr too |
356 | 220 | ||
@@ -390,7 +254,7 @@ replicas continue to be exactly same. | |||
390 | 254 | ||
391 | For example: | 255 | For example: |
392 | mount --make-shared /mnt | 256 | mount --make-shared /mnt |
393 | mount --bin /mnt /tmp | 257 | mount --bind /mnt /tmp |
394 | 258 | ||
395 | The mount at /mnt and that at /tmp are both shared and belong | 259 | The mount at /mnt and that at /tmp are both shared and belong |
396 | to the same peer group. Anything mounted or unmounted under | 260 | to the same peer group. Anything mounted or unmounted under |
@@ -558,7 +422,7 @@ replicas continue to be exactly same. | |||
558 | then the subtree under the unbindable mount is pruned in the new | 422 | then the subtree under the unbindable mount is pruned in the new |
559 | location. | 423 | location. |
560 | 424 | ||
561 | eg: lets say we have the following mount tree. | 425 | eg: let's say we have the following mount tree. |
562 | 426 | ||
563 | A | 427 | A |
564 | / \ | 428 | / \ |
@@ -566,7 +430,7 @@ replicas continue to be exactly same. | |||
566 | / \ / \ | 430 | / \ / \ |
567 | D E F G | 431 | D E F G |
568 | 432 | ||
569 | Lets say all the mount except the mount C in the tree are | 433 | Let's say all the mount except the mount C in the tree are |
570 | of a type other than unbindable. | 434 | of a type other than unbindable. |
571 | 435 | ||
572 | If this tree is rbound to say Z | 436 | If this tree is rbound to say Z |
@@ -683,13 +547,13 @@ replicas continue to be exactly same. | |||
683 | 'b' on mounts that receive propagation from mount 'B' and does not have | 547 | 'b' on mounts that receive propagation from mount 'B' and does not have |
684 | sub-mounts within them are unmounted. | 548 | sub-mounts within them are unmounted. |
685 | 549 | ||
686 | Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to | 550 | Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to |
687 | each other. | 551 | each other. |
688 | 552 | ||
689 | lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount | 553 | let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount |
690 | 'B1', 'B2' and 'B3' respectively. | 554 | 'B1', 'B2' and 'B3' respectively. |
691 | 555 | ||
692 | lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on | 556 | let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on |
693 | mount 'B1', 'B2' and 'B3' respectively. | 557 | mount 'B1', 'B2' and 'B3' respectively. |
694 | 558 | ||
695 | if 'C1' is unmounted, all the mounts that are most-recently-mounted on | 559 | if 'C1' is unmounted, all the mounts that are most-recently-mounted on |
@@ -710,7 +574,7 @@ replicas continue to be exactly same. | |||
710 | A cloned namespace contains all the mounts as that of the parent | 574 | A cloned namespace contains all the mounts as that of the parent |
711 | namespace. | 575 | namespace. |
712 | 576 | ||
713 | Lets say 'A' and 'B' are the corresponding mounts in the parent and the | 577 | Let's say 'A' and 'B' are the corresponding mounts in the parent and the |
714 | child namespace. | 578 | child namespace. |
715 | 579 | ||
716 | If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to | 580 | If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to |
@@ -759,11 +623,11 @@ replicas continue to be exactly same. | |||
759 | mount --make-slave /mnt | 623 | mount --make-slave /mnt |
760 | 624 | ||
761 | At this point we have the first mount at /tmp and | 625 | At this point we have the first mount at /tmp and |
762 | its root dentry is 1. Lets call this mount 'A' | 626 | its root dentry is 1. Let's call this mount 'A' |
763 | And then we have a second mount at /tmp1 with root | 627 | And then we have a second mount at /tmp1 with root |
764 | dentry 2. Lets call this mount 'B' | 628 | dentry 2. Let's call this mount 'B' |
765 | Next we have a third mount at /mnt with root dentry | 629 | Next we have a third mount at /mnt with root dentry |
766 | mnt. Lets call this mount 'C' | 630 | mnt. Let's call this mount 'C' |
767 | 631 | ||
768 | 'B' is the slave of 'A' and 'C' is a slave of 'B' | 632 | 'B' is the slave of 'A' and 'C' is a slave of 'B' |
769 | A -> B -> C | 633 | A -> B -> C |
@@ -794,7 +658,7 @@ replicas continue to be exactly same. | |||
794 | 658 | ||
795 | Q3 Why is unbindable mount needed? | 659 | Q3 Why is unbindable mount needed? |
796 | 660 | ||
797 | Lets say we want to replicate the mount tree at multiple | 661 | Let's say we want to replicate the mount tree at multiple |
798 | locations within the same subtree. | 662 | locations within the same subtree. |
799 | 663 | ||
800 | if one rbind mounts a tree within the same subtree 'n' times | 664 | if one rbind mounts a tree within the same subtree 'n' times |
@@ -803,7 +667,7 @@ replicas continue to be exactly same. | |||
803 | mounts. Here is a example. | 667 | mounts. Here is a example. |
804 | 668 | ||
805 | step 1: | 669 | step 1: |
806 | lets say the root tree has just two directories with | 670 | let's say the root tree has just two directories with |
807 | one vfsmount. | 671 | one vfsmount. |
808 | root | 672 | root |
809 | / \ | 673 | / \ |
@@ -875,7 +739,7 @@ replicas continue to be exactly same. | |||
875 | Unclonable mounts come in handy here. | 739 | Unclonable mounts come in handy here. |
876 | 740 | ||
877 | step 1: | 741 | step 1: |
878 | lets say the root tree has just two directories with | 742 | let's say the root tree has just two directories with |
879 | one vfsmount. | 743 | one vfsmount. |
880 | root | 744 | root |
881 | / \ | 745 | / \ |
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index f49eecf2e573..623f094c9d8d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt | |||
@@ -536,6 +536,7 @@ struct address_space_operations { | |||
536 | /* migrate the contents of a page to the specified target */ | 536 | /* migrate the contents of a page to the specified target */ |
537 | int (*migratepage) (struct page *, struct page *); | 537 | int (*migratepage) (struct page *, struct page *); |
538 | int (*launder_page) (struct page *); | 538 | int (*launder_page) (struct page *); |
539 | int (*error_remove_page) (struct mapping *mapping, struct page *page); | ||
539 | }; | 540 | }; |
540 | 541 | ||
541 | writepage: called by the VM to write a dirty page to backing store. | 542 | writepage: called by the VM to write a dirty page to backing store. |
@@ -694,6 +695,12 @@ struct address_space_operations { | |||
694 | prevent redirtying the page, it is kept locked during the whole | 695 | prevent redirtying the page, it is kept locked during the whole |
695 | operation. | 696 | operation. |
696 | 697 | ||
698 | error_remove_page: normally set to generic_error_remove_page if truncation | ||
699 | is ok for this address space. Used for memory failure handling. | ||
700 | Setting this implies you deal with pages going away under you, | ||
701 | unless you have them locked or reference counts increased. | ||
702 | |||
703 | |||
697 | The File Object | 704 | The File Object |
698 | =============== | 705 | =============== |
699 | 706 | ||
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index aafca0a8f66a..947374977ca5 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt | |||
@@ -135,6 +135,7 @@ Code Seq# Include File Comments | |||
135 | <http://mikonos.dia.unisa.it/tcfs> | 135 | <http://mikonos.dia.unisa.it/tcfs> |
136 | 'l' 40-7F linux/udf_fs_i.h in development: | 136 | 'l' 40-7F linux/udf_fs_i.h in development: |
137 | <http://sourceforge.net/projects/linux-udf/> | 137 | <http://sourceforge.net/projects/linux-udf/> |
138 | 'm' 00-09 linux/mmtimer.h | ||
138 | 'm' all linux/mtio.h conflict! | 139 | 'm' all linux/mtio.h conflict! |
139 | 'm' all linux/soundcard.h conflict! | 140 | 'm' all linux/soundcard.h conflict! |
140 | 'm' all linux/synclink.h conflict! | 141 | 'm' all linux/synclink.h conflict! |
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 1458448436cc..62682500878a 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt | |||
@@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots | |||
96 | of error messages about running out of file handles, you might | 96 | of error messages about running out of file handles, you might |
97 | want to increase this limit. | 97 | want to increase this limit. |
98 | 98 | ||
99 | The three values in file-nr denote the number of allocated | 99 | Historically, the three values in file-nr denoted the number of |
100 | file handles, the number of unused file handles and the maximum | 100 | allocated file handles, the number of allocated but unused file |
101 | number of file handles. When the allocated file handles come | 101 | handles, and the maximum number of file handles. Linux 2.6 always |
102 | close to the maximum, but the number of unused file handles is | 102 | reports 0 as the number of free file handles -- this is not an |
103 | significantly greater than 0, you've encountered a peak in your | 103 | error, it just means that the number of allocated file handles |
104 | usage of file handles and you don't need to increase the maximum. | 104 | exactly matches the number of used file handles. |
105 | 105 | ||
106 | Attempts to allocate more file descriptors than file-max are | ||
107 | reported with printk, look for "VFS: file-max limit <number> | ||
108 | reached". | ||
106 | ============================================================== | 109 | ============================================================== |
107 | 110 | ||
108 | nr_open: | 111 | nr_open: |
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index b3d8b4922740..a028b92001ed 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -22,6 +22,7 @@ show up in /proc/sys/kernel: | |||
22 | - callhome [ S390 only ] | 22 | - callhome [ S390 only ] |
23 | - auto_msgmni | 23 | - auto_msgmni |
24 | - core_pattern | 24 | - core_pattern |
25 | - core_pipe_limit | ||
25 | - core_uses_pid | 26 | - core_uses_pid |
26 | - ctrl-alt-del | 27 | - ctrl-alt-del |
27 | - dentry-state | 28 | - dentry-state |
@@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name. | |||
135 | 136 | ||
136 | ============================================================== | 137 | ============================================================== |
137 | 138 | ||
139 | core_pipe_limit: | ||
140 | |||
141 | This sysctl is only applicable when core_pattern is configured to pipe core | ||
142 | files to user space helper a (when the first character of core_pattern is a '|', | ||
143 | see above). When collecting cores via a pipe to an application, it is | ||
144 | occasionally usefull for the collecting application to gather data about the | ||
145 | crashing process from its /proc/pid directory. In order to do this safely, the | ||
146 | kernel must wait for the collecting process to exit, so as not to remove the | ||
147 | crashing processes proc files prematurely. This in turn creates the possibility | ||
148 | that a misbehaving userspace collecting process can block the reaping of a | ||
149 | crashed process simply by never exiting. This sysctl defends against that. It | ||
150 | defines how many concurrent crashing processes may be piped to user space | ||
151 | applications in parallel. If this value is exceeded, then those crashing | ||
152 | processes above that value are noted via the kernel log and their cores are | ||
153 | skipped. 0 is a special value, indicating that unlimited processes may be | ||
154 | captured in parallel, but that no waiting will take place (i.e. the collecting | ||
155 | process is not guaranteed access to /proc/<crahing pid>/). This value defaults | ||
156 | to 0. | ||
157 | |||
158 | ============================================================== | ||
159 | |||
138 | core_uses_pid: | 160 | core_uses_pid: |
139 | 161 | ||
140 | The default coredump filename is "core". By setting | 162 | The default coredump filename is "core". By setting |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index e6fb1ec2744b..a6e360d2055c 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm: | |||
32 | - legacy_va_layout | 32 | - legacy_va_layout |
33 | - lowmem_reserve_ratio | 33 | - lowmem_reserve_ratio |
34 | - max_map_count | 34 | - max_map_count |
35 | - memory_failure_early_kill | ||
36 | - memory_failure_recovery | ||
35 | - min_free_kbytes | 37 | - min_free_kbytes |
36 | - min_slab_ratio | 38 | - min_slab_ratio |
37 | - min_unmapped_ratio | 39 | - min_unmapped_ratio |
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm: | |||
53 | - vfs_cache_pressure | 55 | - vfs_cache_pressure |
54 | - zone_reclaim_mode | 56 | - zone_reclaim_mode |
55 | 57 | ||
56 | |||
57 | ============================================================== | 58 | ============================================================== |
58 | 59 | ||
59 | block_dump | 60 | block_dump |
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation. | |||
275 | 276 | ||
276 | The default value is 65536. | 277 | The default value is 65536. |
277 | 278 | ||
279 | ============================================================= | ||
280 | |||
281 | memory_failure_early_kill: | ||
282 | |||
283 | Control how to kill processes when uncorrected memory error (typically | ||
284 | a 2bit error in a memory module) is detected in the background by hardware | ||
285 | that cannot be handled by the kernel. In some cases (like the page | ||
286 | still having a valid copy on disk) the kernel will handle the failure | ||
287 | transparently without affecting any applications. But if there is | ||
288 | no other uptodate copy of the data it will kill to prevent any data | ||
289 | corruptions from propagating. | ||
290 | |||
291 | 1: Kill all processes that have the corrupted and not reloadable page mapped | ||
292 | as soon as the corruption is detected. Note this is not supported | ||
293 | for a few types of pages, like kernel internally allocated data or | ||
294 | the swap cache, but works for the majority of user pages. | ||
295 | |||
296 | 0: Only unmap the corrupted page from all processes and only kill a process | ||
297 | who tries to access it. | ||
298 | |||
299 | The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can | ||
300 | handle this if they want to. | ||
301 | |||
302 | This is only active on architectures/platforms with advanced machine | ||
303 | check handling and depends on the hardware capabilities. | ||
304 | |||
305 | Applications can override this setting individually with the PR_MCE_KILL prctl | ||
306 | |||
307 | ============================================================== | ||
308 | |||
309 | memory_failure_recovery | ||
310 | |||
311 | Enable memory failure recovery (when supported by the platform) | ||
312 | |||
313 | 1: Attempt recovery. | ||
314 | |||
315 | 0: Always panic on a memory failure. | ||
316 | |||
278 | ============================================================== | 317 | ============================================================== |
279 | 318 | ||
280 | min_free_kbytes: | 319 | min_free_kbytes: |
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore index 33e8a023df02..09b164a5700f 100644 --- a/Documentation/vm/.gitignore +++ b/Documentation/vm/.gitignore | |||
@@ -1 +1,2 @@ | |||
1 | page-types | ||
1 | slabinfo | 2 | slabinfo |
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 3eda8ea00852..fa1a30d9e9d5 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> | 5 | * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com> |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define _LARGEFILE64_SOURCE | ||
8 | #include <stdio.h> | 9 | #include <stdio.h> |
9 | #include <stdlib.h> | 10 | #include <stdlib.h> |
10 | #include <unistd.h> | 11 | #include <unistd.h> |
@@ -13,12 +14,33 @@ | |||
13 | #include <string.h> | 14 | #include <string.h> |
14 | #include <getopt.h> | 15 | #include <getopt.h> |
15 | #include <limits.h> | 16 | #include <limits.h> |
17 | #include <assert.h> | ||
16 | #include <sys/types.h> | 18 | #include <sys/types.h> |
17 | #include <sys/errno.h> | 19 | #include <sys/errno.h> |
18 | #include <sys/fcntl.h> | 20 | #include <sys/fcntl.h> |
19 | 21 | ||
20 | 22 | ||
21 | /* | 23 | /* |
24 | * pagemap kernel ABI bits | ||
25 | */ | ||
26 | |||
27 | #define PM_ENTRY_BYTES sizeof(uint64_t) | ||
28 | #define PM_STATUS_BITS 3 | ||
29 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | ||
30 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | ||
31 | #define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) | ||
32 | #define PM_PSHIFT_BITS 6 | ||
33 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | ||
34 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | ||
35 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | ||
36 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | ||
37 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | ||
38 | |||
39 | #define PM_PRESENT PM_STATUS(4LL) | ||
40 | #define PM_SWAP PM_STATUS(2LL) | ||
41 | |||
42 | |||
43 | /* | ||
22 | * kernel page flags | 44 | * kernel page flags |
23 | */ | 45 | */ |
24 | 46 | ||
@@ -126,6 +148,14 @@ static int nr_addr_ranges; | |||
126 | static unsigned long opt_offset[MAX_ADDR_RANGES]; | 148 | static unsigned long opt_offset[MAX_ADDR_RANGES]; |
127 | static unsigned long opt_size[MAX_ADDR_RANGES]; | 149 | static unsigned long opt_size[MAX_ADDR_RANGES]; |
128 | 150 | ||
151 | #define MAX_VMAS 10240 | ||
152 | static int nr_vmas; | ||
153 | static unsigned long pg_start[MAX_VMAS]; | ||
154 | static unsigned long pg_end[MAX_VMAS]; | ||
155 | static unsigned long voffset; | ||
156 | |||
157 | static int pagemap_fd; | ||
158 | |||
129 | #define MAX_BIT_FILTERS 64 | 159 | #define MAX_BIT_FILTERS 64 |
130 | static int nr_bit_filters; | 160 | static int nr_bit_filters; |
131 | static uint64_t opt_mask[MAX_BIT_FILTERS]; | 161 | static uint64_t opt_mask[MAX_BIT_FILTERS]; |
@@ -135,7 +165,6 @@ static int page_size; | |||
135 | 165 | ||
136 | #define PAGES_BATCH (64 << 10) /* 64k pages */ | 166 | #define PAGES_BATCH (64 << 10) /* 64k pages */ |
137 | static int kpageflags_fd; | 167 | static int kpageflags_fd; |
138 | static uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; | ||
139 | 168 | ||
140 | #define HASH_SHIFT 13 | 169 | #define HASH_SHIFT 13 |
141 | #define HASH_SIZE (1 << HASH_SHIFT) | 170 | #define HASH_SIZE (1 << HASH_SHIFT) |
@@ -158,6 +187,11 @@ static uint64_t page_flags[HASH_SIZE]; | |||
158 | type __min2 = (y); \ | 187 | type __min2 = (y); \ |
159 | __min1 < __min2 ? __min1 : __min2; }) | 188 | __min1 < __min2 ? __min1 : __min2; }) |
160 | 189 | ||
190 | #define max_t(type, x, y) ({ \ | ||
191 | type __max1 = (x); \ | ||
192 | type __max2 = (y); \ | ||
193 | __max1 > __max2 ? __max1 : __max2; }) | ||
194 | |||
161 | static unsigned long pages2mb(unsigned long pages) | 195 | static unsigned long pages2mb(unsigned long pages) |
162 | { | 196 | { |
163 | return (pages * page_size) >> 20; | 197 | return (pages * page_size) >> 20; |
@@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags) | |||
224 | static void show_page_range(unsigned long offset, uint64_t flags) | 258 | static void show_page_range(unsigned long offset, uint64_t flags) |
225 | { | 259 | { |
226 | static uint64_t flags0; | 260 | static uint64_t flags0; |
261 | static unsigned long voff; | ||
227 | static unsigned long index; | 262 | static unsigned long index; |
228 | static unsigned long count; | 263 | static unsigned long count; |
229 | 264 | ||
230 | if (flags == flags0 && offset == index + count) { | 265 | if (flags == flags0 && offset == index + count && |
266 | (!opt_pid || voffset == voff + count)) { | ||
231 | count++; | 267 | count++; |
232 | return; | 268 | return; |
233 | } | 269 | } |
234 | 270 | ||
235 | if (count) | 271 | if (count) { |
236 | printf("%lu\t%lu\t%s\n", | 272 | if (opt_pid) |
273 | printf("%lx\t", voff); | ||
274 | printf("%lx\t%lx\t%s\n", | ||
237 | index, count, page_flag_name(flags0)); | 275 | index, count, page_flag_name(flags0)); |
276 | } | ||
238 | 277 | ||
239 | flags0 = flags; | 278 | flags0 = flags; |
240 | index = offset; | 279 | index = offset; |
280 | voff = voffset; | ||
241 | count = 1; | 281 | count = 1; |
242 | } | 282 | } |
243 | 283 | ||
244 | static void show_page(unsigned long offset, uint64_t flags) | 284 | static void show_page(unsigned long offset, uint64_t flags) |
245 | { | 285 | { |
246 | printf("%lu\t%s\n", offset, page_flag_name(flags)); | 286 | if (opt_pid) |
287 | printf("%lx\t", voffset); | ||
288 | printf("%lx\t%s\n", offset, page_flag_name(flags)); | ||
247 | } | 289 | } |
248 | 290 | ||
249 | static void show_summary(void) | 291 | static void show_summary(void) |
@@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count) | |||
383 | lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); | 425 | lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET); |
384 | 426 | ||
385 | while (count) { | 427 | while (count) { |
428 | uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH]; | ||
429 | |||
386 | batch = min_t(unsigned long, count, PAGES_BATCH); | 430 | batch = min_t(unsigned long, count, PAGES_BATCH); |
387 | n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); | 431 | n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES); |
388 | if (n == 0) | 432 | if (n == 0) |
@@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count) | |||
404 | } | 448 | } |
405 | } | 449 | } |
406 | 450 | ||
451 | |||
452 | #define PAGEMAP_BATCH 4096 | ||
453 | static unsigned long task_pfn(unsigned long pgoff) | ||
454 | { | ||
455 | static uint64_t buf[PAGEMAP_BATCH]; | ||
456 | static unsigned long start; | ||
457 | static long count; | ||
458 | uint64_t pfn; | ||
459 | |||
460 | if (pgoff < start || pgoff >= start + count) { | ||
461 | if (lseek64(pagemap_fd, | ||
462 | (uint64_t)pgoff * PM_ENTRY_BYTES, | ||
463 | SEEK_SET) < 0) { | ||
464 | perror("pagemap seek"); | ||
465 | exit(EXIT_FAILURE); | ||
466 | } | ||
467 | count = read(pagemap_fd, buf, sizeof(buf)); | ||
468 | if (count == 0) | ||
469 | return 0; | ||
470 | if (count < 0) { | ||
471 | perror("pagemap read"); | ||
472 | exit(EXIT_FAILURE); | ||
473 | } | ||
474 | if (count % PM_ENTRY_BYTES) { | ||
475 | fatal("pagemap read not aligned.\n"); | ||
476 | exit(EXIT_FAILURE); | ||
477 | } | ||
478 | count /= PM_ENTRY_BYTES; | ||
479 | start = pgoff; | ||
480 | } | ||
481 | |||
482 | pfn = buf[pgoff - start]; | ||
483 | if (pfn & PM_PRESENT) | ||
484 | pfn = PM_PFRAME(pfn); | ||
485 | else | ||
486 | pfn = 0; | ||
487 | |||
488 | return pfn; | ||
489 | } | ||
490 | |||
491 | static void walk_task(unsigned long index, unsigned long count) | ||
492 | { | ||
493 | int i = 0; | ||
494 | const unsigned long end = index + count; | ||
495 | |||
496 | while (index < end) { | ||
497 | |||
498 | while (pg_end[i] <= index) | ||
499 | if (++i >= nr_vmas) | ||
500 | return; | ||
501 | if (pg_start[i] >= end) | ||
502 | return; | ||
503 | |||
504 | voffset = max_t(unsigned long, pg_start[i], index); | ||
505 | index = min_t(unsigned long, pg_end[i], end); | ||
506 | |||
507 | assert(voffset < index); | ||
508 | for (; voffset < index; voffset++) { | ||
509 | unsigned long pfn = task_pfn(voffset); | ||
510 | if (pfn) | ||
511 | walk_pfn(pfn, 1); | ||
512 | } | ||
513 | } | ||
514 | } | ||
515 | |||
516 | static void add_addr_range(unsigned long offset, unsigned long size) | ||
517 | { | ||
518 | if (nr_addr_ranges >= MAX_ADDR_RANGES) | ||
519 | fatal("too many addr ranges\n"); | ||
520 | |||
521 | opt_offset[nr_addr_ranges] = offset; | ||
522 | opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); | ||
523 | nr_addr_ranges++; | ||
524 | } | ||
525 | |||
407 | static void walk_addr_ranges(void) | 526 | static void walk_addr_ranges(void) |
408 | { | 527 | { |
409 | int i; | 528 | int i; |
@@ -415,10 +534,13 @@ static void walk_addr_ranges(void) | |||
415 | } | 534 | } |
416 | 535 | ||
417 | if (!nr_addr_ranges) | 536 | if (!nr_addr_ranges) |
418 | walk_pfn(0, ULONG_MAX); | 537 | add_addr_range(0, ULONG_MAX); |
419 | 538 | ||
420 | for (i = 0; i < nr_addr_ranges; i++) | 539 | for (i = 0; i < nr_addr_ranges; i++) |
421 | walk_pfn(opt_offset[i], opt_size[i]); | 540 | if (!opt_pid) |
541 | walk_pfn(opt_offset[i], opt_size[i]); | ||
542 | else | ||
543 | walk_task(opt_offset[i], opt_size[i]); | ||
422 | 544 | ||
423 | close(kpageflags_fd); | 545 | close(kpageflags_fd); |
424 | } | 546 | } |
@@ -446,8 +568,8 @@ static void usage(void) | |||
446 | " -r|--raw Raw mode, for kernel developers\n" | 568 | " -r|--raw Raw mode, for kernel developers\n" |
447 | " -a|--addr addr-spec Walk a range of pages\n" | 569 | " -a|--addr addr-spec Walk a range of pages\n" |
448 | " -b|--bits bits-spec Walk pages with specified bits\n" | 570 | " -b|--bits bits-spec Walk pages with specified bits\n" |
449 | #if 0 /* planned features */ | ||
450 | " -p|--pid pid Walk process address space\n" | 571 | " -p|--pid pid Walk process address space\n" |
572 | #if 0 /* planned features */ | ||
451 | " -f|--file filename Walk file address space\n" | 573 | " -f|--file filename Walk file address space\n" |
452 | #endif | 574 | #endif |
453 | " -l|--list Show page details in ranges\n" | 575 | " -l|--list Show page details in ranges\n" |
@@ -459,7 +581,7 @@ static void usage(void) | |||
459 | " N+M pages range from N to N+M-1\n" | 581 | " N+M pages range from N to N+M-1\n" |
460 | " N,M pages range from N to M-1\n" | 582 | " N,M pages range from N to M-1\n" |
461 | " N, pages range from N to end\n" | 583 | " N, pages range from N to end\n" |
462 | " ,M pages range from 0 to M\n" | 584 | " ,M pages range from 0 to M-1\n" |
463 | "bits-spec:\n" | 585 | "bits-spec:\n" |
464 | " bit1,bit2 (flags & (bit1|bit2)) != 0\n" | 586 | " bit1,bit2 (flags & (bit1|bit2)) != 0\n" |
465 | " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" | 587 | " bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" |
@@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str) | |||
496 | 618 | ||
497 | static void parse_pid(const char *str) | 619 | static void parse_pid(const char *str) |
498 | { | 620 | { |
621 | FILE *file; | ||
622 | char buf[5000]; | ||
623 | |||
499 | opt_pid = parse_number(str); | 624 | opt_pid = parse_number(str); |
500 | } | ||
501 | 625 | ||
502 | static void parse_file(const char *name) | 626 | sprintf(buf, "/proc/%d/pagemap", opt_pid); |
503 | { | 627 | pagemap_fd = open(buf, O_RDONLY); |
628 | if (pagemap_fd < 0) { | ||
629 | perror(buf); | ||
630 | exit(EXIT_FAILURE); | ||
631 | } | ||
632 | |||
633 | sprintf(buf, "/proc/%d/maps", opt_pid); | ||
634 | file = fopen(buf, "r"); | ||
635 | if (!file) { | ||
636 | perror(buf); | ||
637 | exit(EXIT_FAILURE); | ||
638 | } | ||
639 | |||
640 | while (fgets(buf, sizeof(buf), file) != NULL) { | ||
641 | unsigned long vm_start; | ||
642 | unsigned long vm_end; | ||
643 | unsigned long long pgoff; | ||
644 | int major, minor; | ||
645 | char r, w, x, s; | ||
646 | unsigned long ino; | ||
647 | int n; | ||
648 | |||
649 | n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", | ||
650 | &vm_start, | ||
651 | &vm_end, | ||
652 | &r, &w, &x, &s, | ||
653 | &pgoff, | ||
654 | &major, &minor, | ||
655 | &ino); | ||
656 | if (n < 10) { | ||
657 | fprintf(stderr, "unexpected line: %s\n", buf); | ||
658 | continue; | ||
659 | } | ||
660 | pg_start[nr_vmas] = vm_start / page_size; | ||
661 | pg_end[nr_vmas] = vm_end / page_size; | ||
662 | if (++nr_vmas >= MAX_VMAS) { | ||
663 | fprintf(stderr, "too many VMAs\n"); | ||
664 | break; | ||
665 | } | ||
666 | } | ||
667 | fclose(file); | ||
504 | } | 668 | } |
505 | 669 | ||
506 | static void add_addr_range(unsigned long offset, unsigned long size) | 670 | static void parse_file(const char *name) |
507 | { | 671 | { |
508 | if (nr_addr_ranges >= MAX_ADDR_RANGES) | ||
509 | fatal("too much addr ranges\n"); | ||
510 | |||
511 | opt_offset[nr_addr_ranges] = offset; | ||
512 | opt_size[nr_addr_ranges] = size; | ||
513 | nr_addr_ranges++; | ||
514 | } | 672 | } |
515 | 673 | ||
516 | static void parse_addr_range(const char *optarg) | 674 | static void parse_addr_range(const char *optarg) |
@@ -676,8 +834,10 @@ int main(int argc, char *argv[]) | |||
676 | } | 834 | } |
677 | } | 835 | } |
678 | 836 | ||
837 | if (opt_list && opt_pid) | ||
838 | printf("voffset\t"); | ||
679 | if (opt_list == 1) | 839 | if (opt_list == 1) |
680 | printf("offset\tcount\tflags\n"); | 840 | printf("offset\tlen\tflags\n"); |
681 | if (opt_list == 2) | 841 | if (opt_list == 2) |
682 | printf("offset\tflags\n"); | 842 | printf("offset\tflags\n"); |
683 | 843 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 7c1c0b05b298..0c138ba86526 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -2331,7 +2331,9 @@ S: Orphan | |||
2331 | F: drivers/hwmon/ | 2331 | F: drivers/hwmon/ |
2332 | 2332 | ||
2333 | HARDWARE RANDOM NUMBER GENERATOR CORE | 2333 | HARDWARE RANDOM NUMBER GENERATOR CORE |
2334 | S: Orphan | 2334 | M: Matt Mackall <mpm@selenic.com> |
2335 | M: Herbert Xu <herbert@gondor.apana.org.au> | ||
2336 | S: Odd fixes | ||
2335 | F: Documentation/hw_random.txt | 2337 | F: Documentation/hw_random.txt |
2336 | F: drivers/char/hw_random/ | 2338 | F: drivers/char/hw_random/ |
2337 | F: include/linux/hw_random.h | 2339 | F: include/linux/hw_random.h |
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h index 25da0017ec87..e42823e954aa 100644 --- a/arch/alpha/include/asm/fcntl.h +++ b/arch/alpha/include/asm/fcntl.h | |||
@@ -26,6 +26,8 @@ | |||
26 | #define F_GETOWN 6 /* for sockets. */ | 26 | #define F_GETOWN 6 /* for sockets. */ |
27 | #define F_SETSIG 10 /* for sockets. */ | 27 | #define F_SETSIG 10 /* for sockets. */ |
28 | #define F_GETSIG 11 /* for sockets. */ | 28 | #define F_GETSIG 11 /* for sockets. */ |
29 | #define F_SETOWN_EX 12 | ||
30 | #define F_GETOWN_EX 13 | ||
29 | 31 | ||
30 | /* for posix fcntl() and lockf() */ | 32 | /* for posix fcntl() and lockf() */ |
31 | #define F_RDLCK 1 | 33 | #define F_RDLCK 1 |
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index e302daecbe56..8e059e58b0ac 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c | |||
@@ -1016,7 +1016,7 @@ marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *m | |||
1016 | { | 1016 | { |
1017 | struct marvel_agp_aperture *aper = agp->aperture.sysdata; | 1017 | struct marvel_agp_aperture *aper = agp->aperture.sysdata; |
1018 | return iommu_bind(aper->arena, aper->pg_start + pg_start, | 1018 | return iommu_bind(aper->arena, aper->pg_start + pg_start, |
1019 | mem->page_count, mem->memory); | 1019 | mem->page_count, mem->pages); |
1020 | } | 1020 | } |
1021 | 1021 | ||
1022 | static int | 1022 | static int |
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 319fcb74611e..76686497b1e2 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c | |||
@@ -680,7 +680,7 @@ titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *me | |||
680 | { | 680 | { |
681 | struct titan_agp_aperture *aper = agp->aperture.sysdata; | 681 | struct titan_agp_aperture *aper = agp->aperture.sysdata; |
682 | return iommu_bind(aper->arena, aper->pg_start + pg_start, | 682 | return iommu_bind(aper->arena, aper->pg_start + pg_start, |
683 | mem->page_count, mem->memory); | 683 | mem->page_count, mem->pages); |
684 | } | 684 | } |
685 | 685 | ||
686 | static int | 686 | static int |
diff --git a/arch/alpha/kernel/pci_impl.h b/arch/alpha/kernel/pci_impl.h index 00edd04b585e..85457b2d4516 100644 --- a/arch/alpha/kernel/pci_impl.h +++ b/arch/alpha/kernel/pci_impl.h | |||
@@ -198,7 +198,7 @@ extern unsigned long size_for_memory(unsigned long max); | |||
198 | 198 | ||
199 | extern int iommu_reserve(struct pci_iommu_arena *, long, long); | 199 | extern int iommu_reserve(struct pci_iommu_arena *, long, long); |
200 | extern int iommu_release(struct pci_iommu_arena *, long, long); | 200 | extern int iommu_release(struct pci_iommu_arena *, long, long); |
201 | extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *); | 201 | extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **); |
202 | extern int iommu_unbind(struct pci_iommu_arena *, long, long); | 202 | extern int iommu_unbind(struct pci_iommu_arena *, long, long); |
203 | 203 | ||
204 | 204 | ||
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index d15aedfe6066..8449504f5e0b 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c | |||
@@ -876,7 +876,7 @@ iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count) | |||
876 | 876 | ||
877 | int | 877 | int |
878 | iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, | 878 | iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, |
879 | unsigned long *physaddrs) | 879 | struct page **pages) |
880 | { | 880 | { |
881 | unsigned long flags; | 881 | unsigned long flags; |
882 | unsigned long *ptes; | 882 | unsigned long *ptes; |
@@ -896,7 +896,7 @@ iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, | |||
896 | } | 896 | } |
897 | 897 | ||
898 | for(i = 0, j = pg_start; i < pg_count; i++, j++) | 898 | for(i = 0, j = pg_start; i < pg_count; i++, j++) |
899 | ptes[j] = mk_iommu_pte(physaddrs[i]); | 899 | ptes[j] = mk_iommu_pte(page_to_phys(pages[i])); |
900 | 900 | ||
901 | spin_unlock_irqrestore(&arena->lock, flags); | 901 | spin_unlock_irqrestore(&arena->lock, flags); |
902 | 902 | ||
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 83e6ba338e2c..1a8c7279a28b 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h | |||
@@ -187,11 +187,74 @@ union iop3xx_desc { | |||
187 | void *ptr; | 187 | void *ptr; |
188 | }; | 188 | }; |
189 | 189 | ||
190 | /* No support for p+q operations */ | ||
191 | static inline int | ||
192 | iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op) | ||
193 | { | ||
194 | BUG(); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | static inline void | ||
199 | iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, | ||
200 | unsigned long flags) | ||
201 | { | ||
202 | BUG(); | ||
203 | } | ||
204 | |||
205 | static inline void | ||
206 | iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) | ||
207 | { | ||
208 | BUG(); | ||
209 | } | ||
210 | |||
211 | static inline void | ||
212 | iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, | ||
213 | dma_addr_t addr, unsigned char coef) | ||
214 | { | ||
215 | BUG(); | ||
216 | } | ||
217 | |||
218 | static inline int | ||
219 | iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op) | ||
220 | { | ||
221 | BUG(); | ||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | static inline void | ||
226 | iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, | ||
227 | unsigned long flags) | ||
228 | { | ||
229 | BUG(); | ||
230 | } | ||
231 | |||
232 | static inline void | ||
233 | iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) | ||
234 | { | ||
235 | BUG(); | ||
236 | } | ||
237 | |||
238 | #define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr | ||
239 | |||
240 | static inline void | ||
241 | iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, | ||
242 | dma_addr_t *src) | ||
243 | { | ||
244 | BUG(); | ||
245 | } | ||
246 | |||
190 | static inline int iop_adma_get_max_xor(void) | 247 | static inline int iop_adma_get_max_xor(void) |
191 | { | 248 | { |
192 | return 32; | 249 | return 32; |
193 | } | 250 | } |
194 | 251 | ||
252 | static inline int iop_adma_get_max_pq(void) | ||
253 | { | ||
254 | BUG(); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
195 | static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) | 258 | static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) |
196 | { | 259 | { |
197 | int id = chan->device->id; | 260 | int id = chan->device->id; |
@@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, | |||
332 | return slot_cnt; | 395 | return slot_cnt; |
333 | } | 396 | } |
334 | 397 | ||
398 | static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) | ||
399 | { | ||
400 | return 0; | ||
401 | } | ||
402 | |||
335 | static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, | 403 | static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, |
336 | struct iop_adma_chan *chan) | 404 | struct iop_adma_chan *chan) |
337 | { | 405 | { |
@@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, | |||
349 | return 0; | 417 | return 0; |
350 | } | 418 | } |
351 | 419 | ||
420 | |||
421 | static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, | ||
422 | struct iop_adma_chan *chan) | ||
423 | { | ||
424 | BUG(); | ||
425 | return 0; | ||
426 | } | ||
427 | |||
352 | static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, | 428 | static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, |
353 | struct iop_adma_chan *chan) | 429 | struct iop_adma_chan *chan) |
354 | { | 430 | { |
@@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, | |||
756 | hw_desc->src[0] = val; | 832 | hw_desc->src[0] = val; |
757 | } | 833 | } |
758 | 834 | ||
759 | static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | 835 | static inline enum sum_check_flags |
836 | iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | ||
760 | { | 837 | { |
761 | struct iop3xx_desc_aau *hw_desc = desc->hw_desc; | 838 | struct iop3xx_desc_aau *hw_desc = desc->hw_desc; |
762 | struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; | 839 | struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; |
763 | 840 | ||
764 | iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); | 841 | iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); |
765 | return desc_ctrl.zero_result_err; | 842 | return desc_ctrl.zero_result_err << SUM_CHECK_P; |
766 | } | 843 | } |
767 | 844 | ||
768 | static inline void iop_chan_append(struct iop_adma_chan *chan) | 845 | static inline void iop_chan_append(struct iop_adma_chan *chan) |
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h index 385c6e8cbbd2..59b8c3892f76 100644 --- a/arch/arm/include/asm/hardware/iop_adma.h +++ b/arch/arm/include/asm/hardware/iop_adma.h | |||
@@ -86,6 +86,7 @@ struct iop_adma_chan { | |||
86 | * @idx: pool index | 86 | * @idx: pool index |
87 | * @unmap_src_cnt: number of xor sources | 87 | * @unmap_src_cnt: number of xor sources |
88 | * @unmap_len: transaction bytecount | 88 | * @unmap_len: transaction bytecount |
89 | * @tx_list: list of descriptors that are associated with one operation | ||
89 | * @async_tx: support for the async_tx api | 90 | * @async_tx: support for the async_tx api |
90 | * @group_list: list of slots that make up a multi-descriptor transaction | 91 | * @group_list: list of slots that make up a multi-descriptor transaction |
91 | * for example transfer lengths larger than the supported hw max | 92 | * for example transfer lengths larger than the supported hw max |
@@ -102,10 +103,12 @@ struct iop_adma_desc_slot { | |||
102 | u16 idx; | 103 | u16 idx; |
103 | u16 unmap_src_cnt; | 104 | u16 unmap_src_cnt; |
104 | size_t unmap_len; | 105 | size_t unmap_len; |
106 | struct list_head tx_list; | ||
105 | struct dma_async_tx_descriptor async_tx; | 107 | struct dma_async_tx_descriptor async_tx; |
106 | union { | 108 | union { |
107 | u32 *xor_check_result; | 109 | u32 *xor_check_result; |
108 | u32 *crc32_result; | 110 | u32 *crc32_result; |
111 | u32 *pq_check_result; | ||
109 | }; | 112 | }; |
110 | }; | 113 | }; |
111 | 114 | ||
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 5722e86f2174..6d3782d85a9f 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h | |||
@@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void) | |||
150 | return 16; | 150 | return 16; |
151 | } | 151 | } |
152 | 152 | ||
153 | #define iop_adma_get_max_pq iop_adma_get_max_xor | ||
154 | |||
153 | static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) | 155 | static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) |
154 | { | 156 | { |
155 | return __raw_readl(ADMA_ADAR(chan)); | 157 | return __raw_readl(ADMA_ADAR(chan)); |
@@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) | |||
211 | #define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT | 213 | #define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT |
212 | #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT | 214 | #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT |
213 | #define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT | 215 | #define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT |
216 | #define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT | ||
214 | #define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) | 217 | #define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) |
218 | #define iop_chan_pq_slot_count iop_chan_xor_slot_count | ||
219 | #define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count | ||
215 | 220 | ||
216 | static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, | 221 | static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, |
217 | struct iop_adma_chan *chan) | 222 | struct iop_adma_chan *chan) |
@@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, | |||
220 | return hw_desc->dest_addr; | 225 | return hw_desc->dest_addr; |
221 | } | 226 | } |
222 | 227 | ||
228 | static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, | ||
229 | struct iop_adma_chan *chan) | ||
230 | { | ||
231 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; | ||
232 | return hw_desc->q_dest_addr; | ||
233 | } | ||
234 | |||
223 | static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, | 235 | static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, |
224 | struct iop_adma_chan *chan) | 236 | struct iop_adma_chan *chan) |
225 | { | 237 | { |
@@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, | |||
319 | return 1; | 331 | return 1; |
320 | } | 332 | } |
321 | 333 | ||
334 | static inline void | ||
335 | iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, | ||
336 | unsigned long flags) | ||
337 | { | ||
338 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; | ||
339 | union { | ||
340 | u32 value; | ||
341 | struct iop13xx_adma_desc_ctrl field; | ||
342 | } u_desc_ctrl; | ||
343 | |||
344 | u_desc_ctrl.value = 0; | ||
345 | u_desc_ctrl.field.src_select = src_cnt - 1; | ||
346 | u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ | ||
347 | u_desc_ctrl.field.pq_xfer_en = 1; | ||
348 | u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P); | ||
349 | u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; | ||
350 | hw_desc->desc_ctrl = u_desc_ctrl.value; | ||
351 | } | ||
352 | |||
353 | static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) | ||
354 | { | ||
355 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; | ||
356 | union { | ||
357 | u32 value; | ||
358 | struct iop13xx_adma_desc_ctrl field; | ||
359 | } u_desc_ctrl; | ||
360 | |||
361 | u_desc_ctrl.value = hw_desc->desc_ctrl; | ||
362 | return u_desc_ctrl.field.pq_xfer_en; | ||
363 | } | ||
364 | |||
365 | static inline void | ||
366 | iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, | ||
367 | unsigned long flags) | ||
368 | { | ||
369 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; | ||
370 | union { | ||
371 | u32 value; | ||
372 | struct iop13xx_adma_desc_ctrl field; | ||
373 | } u_desc_ctrl; | ||
374 | |||
375 | u_desc_ctrl.value = 0; | ||
376 | u_desc_ctrl.field.src_select = src_cnt - 1; | ||
377 | u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ | ||
378 | u_desc_ctrl.field.zero_result = 1; | ||
379 | u_desc_ctrl.field.status_write_back_en = 1; | ||
380 | u_desc_ctrl.field.pq_xfer_en = 1; | ||
381 | u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P); | ||
382 | u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT; | ||
383 | hw_desc->desc_ctrl = u_desc_ctrl.value; | ||
384 | } | ||
385 | |||
322 | static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, | 386 | static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, |
323 | struct iop_adma_chan *chan, | 387 | struct iop_adma_chan *chan, |
324 | u32 byte_count) | 388 | u32 byte_count) |
@@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) | |||
351 | } | 415 | } |
352 | } | 416 | } |
353 | 417 | ||
418 | #define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count | ||
354 | 419 | ||
355 | static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, | 420 | static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, |
356 | struct iop_adma_chan *chan, | 421 | struct iop_adma_chan *chan, |
@@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, | |||
361 | hw_desc->upper_dest_addr = 0; | 426 | hw_desc->upper_dest_addr = 0; |
362 | } | 427 | } |
363 | 428 | ||
429 | static inline void | ||
430 | iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr) | ||
431 | { | ||
432 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; | ||
433 | |||
434 | hw_desc->dest_addr = addr[0]; | ||
435 | hw_desc->q_dest_addr = addr[1]; | ||
436 | hw_desc->upper_dest_addr = 0; | ||
437 | } | ||
438 | |||
364 | static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, | 439 | static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, |
365 | dma_addr_t addr) | 440 | dma_addr_t addr) |
366 | { | 441 | { |
@@ -389,6 +464,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, | |||
389 | } | 464 | } |
390 | 465 | ||
391 | static inline void | 466 | static inline void |
467 | iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx, | ||
468 | dma_addr_t addr, unsigned char coef) | ||
469 | { | ||
470 | int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; | ||
471 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; | ||
472 | struct iop13xx_adma_src *src; | ||
473 | int i = 0; | ||
474 | |||
475 | do { | ||
476 | iter = iop_hw_desc_slot_idx(hw_desc, i); | ||
477 | src = &iter->src[src_idx]; | ||
478 | src->src_addr = addr; | ||
479 | src->pq_upper_src_addr = 0; | ||
480 | src->pq_dmlt = coef; | ||
481 | slot_cnt -= slots_per_op; | ||
482 | if (slot_cnt) { | ||
483 | i += slots_per_op; | ||
484 | addr += IOP_ADMA_PQ_MAX_BYTE_COUNT; | ||
485 | } | ||
486 | } while (slot_cnt); | ||
487 | } | ||
488 | |||
489 | static inline void | ||
392 | iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, | 490 | iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, |
393 | struct iop_adma_chan *chan) | 491 | struct iop_adma_chan *chan) |
394 | { | 492 | { |
@@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, | |||
399 | } | 497 | } |
400 | 498 | ||
401 | #define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr | 499 | #define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr |
500 | #define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr | ||
501 | |||
502 | static inline void | ||
503 | iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx, | ||
504 | dma_addr_t *src) | ||
505 | { | ||
506 | iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]); | ||
507 | iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]); | ||
508 | } | ||
402 | 509 | ||
403 | static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, | 510 | static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, |
404 | u32 next_desc_addr) | 511 | u32 next_desc_addr) |
@@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, | |||
428 | hw_desc->block_fill_data = val; | 535 | hw_desc->block_fill_data = val; |
429 | } | 536 | } |
430 | 537 | ||
431 | static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | 538 | static inline enum sum_check_flags |
539 | iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) | ||
432 | { | 540 | { |
433 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; | 541 | struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; |
434 | struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; | 542 | struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; |
435 | struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; | 543 | struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; |
544 | enum sum_check_flags flags; | ||
436 | 545 | ||
437 | BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); | 546 | BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); |
438 | 547 | ||
439 | if (desc_ctrl.pq_xfer_en) | 548 | flags = byte_count.zero_result_err_q << SUM_CHECK_Q; |
440 | return byte_count.zero_result_err_q; | 549 | flags |= byte_count.zero_result_err << SUM_CHECK_P; |
441 | else | 550 | |
442 | return byte_count.zero_result_err; | 551 | return flags; |
443 | } | 552 | } |
444 | 553 | ||
445 | static inline void iop_chan_append(struct iop_adma_chan *chan) | 554 | static inline void iop_chan_append(struct iop_adma_chan *chan) |
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index bee42c609df6..5c147fb66a01 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c | |||
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void) | |||
477 | plat_data = &iop13xx_adma_0_data; | 477 | plat_data = &iop13xx_adma_0_data; |
478 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); | 478 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); |
479 | dma_cap_set(DMA_XOR, plat_data->cap_mask); | 479 | dma_cap_set(DMA_XOR, plat_data->cap_mask); |
480 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); | 480 | dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); |
481 | dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); | ||
482 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); | 481 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); |
483 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); | ||
484 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); | 482 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); |
485 | break; | 483 | break; |
486 | case IOP13XX_INIT_ADMA_1: | 484 | case IOP13XX_INIT_ADMA_1: |
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void) | |||
489 | plat_data = &iop13xx_adma_1_data; | 487 | plat_data = &iop13xx_adma_1_data; |
490 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); | 488 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); |
491 | dma_cap_set(DMA_XOR, plat_data->cap_mask); | 489 | dma_cap_set(DMA_XOR, plat_data->cap_mask); |
492 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); | 490 | dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); |
493 | dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); | ||
494 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); | 491 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); |
495 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); | ||
496 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); | 492 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); |
497 | break; | 493 | break; |
498 | case IOP13XX_INIT_ADMA_2: | 494 | case IOP13XX_INIT_ADMA_2: |
@@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void) | |||
501 | plat_data = &iop13xx_adma_2_data; | 497 | plat_data = &iop13xx_adma_2_data; |
502 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); | 498 | dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); |
503 | dma_cap_set(DMA_XOR, plat_data->cap_mask); | 499 | dma_cap_set(DMA_XOR, plat_data->cap_mask); |
504 | dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); | 500 | dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask); |
505 | dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); | ||
506 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); | 501 | dma_cap_set(DMA_MEMSET, plat_data->cap_mask); |
507 | dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); | ||
508 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); | 502 | dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); |
509 | dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); | 503 | dma_cap_set(DMA_PQ, plat_data->cap_mask); |
510 | dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); | 504 | dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); |
511 | dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); | ||
512 | break; | 505 | break; |
513 | } | 506 | } |
514 | } | 507 | } |
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c index 3c127aabe214..1ff6a37e893c 100644 --- a/arch/arm/plat-iop/adma.c +++ b/arch/arm/plat-iop/adma.c | |||
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void) | |||
179 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); | 179 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); |
180 | #else | 180 | #else |
181 | dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); | 181 | dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); |
182 | dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask); | ||
183 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); | 182 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); |
184 | #endif | 183 | #endif |
185 | 184 | ||
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void) | |||
188 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); | 187 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); |
189 | #else | 188 | #else |
190 | dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); | 189 | dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); |
191 | dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask); | ||
192 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); | 190 | dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); |
193 | #endif | 191 | #endif |
194 | 192 | ||
@@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void) | |||
198 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); | 196 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); |
199 | #else | 197 | #else |
200 | dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); | 198 | dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); |
201 | dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); | 199 | dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask); |
202 | dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); | 200 | dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); |
203 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); | 201 | dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); |
204 | #endif | 202 | #endif |
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c index be722fc1acff..0d4d3e3a4cfc 100644 --- a/arch/frv/kernel/pm.c +++ b/arch/frv/kernel/pm.c | |||
@@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len) | |||
150 | /* | 150 | /* |
151 | * Send us to sleep. | 151 | * Send us to sleep. |
152 | */ | 152 | */ |
153 | static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp, | 153 | static int sysctl_pm_do_suspend(ctl_table *ctl, int write, |
154 | void __user *buffer, size_t *lenp, loff_t *fpos) | 154 | void __user *buffer, size_t *lenp, loff_t *fpos) |
155 | { | 155 | { |
156 | int retval, mode; | 156 | int retval, mode; |
@@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode) | |||
198 | } | 198 | } |
199 | 199 | ||
200 | 200 | ||
201 | static int cmode_procctl(ctl_table *ctl, int write, struct file *filp, | 201 | static int cmode_procctl(ctl_table *ctl, int write, |
202 | void __user *buffer, size_t *lenp, loff_t *fpos) | 202 | void __user *buffer, size_t *lenp, loff_t *fpos) |
203 | { | 203 | { |
204 | int new_cmode; | 204 | int new_cmode; |
205 | 205 | ||
206 | if (!write) | 206 | if (!write) |
207 | return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); | 207 | return proc_dointvec(ctl, write, buffer, lenp, fpos); |
208 | 208 | ||
209 | new_cmode = user_atoi(buffer, *lenp); | 209 | new_cmode = user_atoi(buffer, *lenp); |
210 | 210 | ||
@@ -301,13 +301,13 @@ static int try_set_cm(int new_cm) | |||
301 | return 0; | 301 | return 0; |
302 | } | 302 | } |
303 | 303 | ||
304 | static int p0_procctl(ctl_table *ctl, int write, struct file *filp, | 304 | static int p0_procctl(ctl_table *ctl, int write, |
305 | void __user *buffer, size_t *lenp, loff_t *fpos) | 305 | void __user *buffer, size_t *lenp, loff_t *fpos) |
306 | { | 306 | { |
307 | int new_p0; | 307 | int new_p0; |
308 | 308 | ||
309 | if (!write) | 309 | if (!write) |
310 | return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); | 310 | return proc_dointvec(ctl, write, buffer, lenp, fpos); |
311 | 311 | ||
312 | new_p0 = user_atoi(buffer, *lenp); | 312 | new_p0 = user_atoi(buffer, *lenp); |
313 | 313 | ||
@@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table, | |||
345 | return 1; | 345 | return 1; |
346 | } | 346 | } |
347 | 347 | ||
348 | static int cm_procctl(ctl_table *ctl, int write, struct file *filp, | 348 | static int cm_procctl(ctl_table *ctl, int write, |
349 | void __user *buffer, size_t *lenp, loff_t *fpos) | 349 | void __user *buffer, size_t *lenp, loff_t *fpos) |
350 | { | 350 | { |
351 | int new_cm; | 351 | int new_cm; |
352 | 352 | ||
353 | if (!write) | 353 | if (!write) |
354 | return proc_dointvec(ctl, write, filp, buffer, lenp, fpos); | 354 | return proc_dointvec(ctl, write, buffer, lenp, fpos); |
355 | 355 | ||
356 | new_cm = user_atoi(buffer, *lenp); | 356 | new_cm = user_atoi(buffer, *lenp); |
357 | 357 | ||
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 3f04d4c406b7..b3deed8db619 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c | |||
@@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table, | |||
56 | 56 | ||
57 | 57 | ||
58 | /* And the same for proc */ | 58 | /* And the same for proc */ |
59 | int proc_dolasatstring(ctl_table *table, int write, struct file *filp, | 59 | int proc_dolasatstring(ctl_table *table, int write, |
60 | void *buffer, size_t *lenp, loff_t *ppos) | 60 | void *buffer, size_t *lenp, loff_t *ppos) |
61 | { | 61 | { |
62 | int r; | 62 | int r; |
63 | 63 | ||
64 | r = proc_dostring(table, write, filp, buffer, lenp, ppos); | 64 | r = proc_dostring(table, write, buffer, lenp, ppos); |
65 | if ((!write) || r) | 65 | if ((!write) || r) |
66 | return r; | 66 | return r; |
67 | 67 | ||
@@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp, | |||
71 | } | 71 | } |
72 | 72 | ||
73 | /* proc function to write EEPROM after changing int entry */ | 73 | /* proc function to write EEPROM after changing int entry */ |
74 | int proc_dolasatint(ctl_table *table, int write, struct file *filp, | 74 | int proc_dolasatint(ctl_table *table, int write, |
75 | void *buffer, size_t *lenp, loff_t *ppos) | 75 | void *buffer, size_t *lenp, loff_t *ppos) |
76 | { | 76 | { |
77 | int r; | 77 | int r; |
78 | 78 | ||
79 | r = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 79 | r = proc_dointvec(table, write, buffer, lenp, ppos); |
80 | if ((!write) || r) | 80 | if ((!write) || r) |
81 | return r; | 81 | return r; |
82 | 82 | ||
@@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp, | |||
89 | static int rtctmp; | 89 | static int rtctmp; |
90 | 90 | ||
91 | /* proc function to read/write RealTime Clock */ | 91 | /* proc function to read/write RealTime Clock */ |
92 | int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, | 92 | int proc_dolasatrtc(ctl_table *table, int write, |
93 | void *buffer, size_t *lenp, loff_t *ppos) | 93 | void *buffer, size_t *lenp, loff_t *ppos) |
94 | { | 94 | { |
95 | struct timespec ts; | 95 | struct timespec ts; |
@@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, | |||
102 | if (rtctmp < 0) | 102 | if (rtctmp < 0) |
103 | rtctmp = 0; | 103 | rtctmp = 0; |
104 | } | 104 | } |
105 | r = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 105 | r = proc_dointvec(table, write, buffer, lenp, ppos); |
106 | if (r) | 106 | if (r) |
107 | return r; | 107 | return r; |
108 | 108 | ||
@@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table, | |||
154 | #endif | 154 | #endif |
155 | 155 | ||
156 | #ifdef CONFIG_INET | 156 | #ifdef CONFIG_INET |
157 | int proc_lasat_ip(ctl_table *table, int write, struct file *filp, | 157 | int proc_lasat_ip(ctl_table *table, int write, |
158 | void *buffer, size_t *lenp, loff_t *ppos) | 158 | void *buffer, size_t *lenp, loff_t *ppos) |
159 | { | 159 | { |
160 | unsigned int ip; | 160 | unsigned int ip; |
@@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table, | |||
231 | return 0; | 231 | return 0; |
232 | } | 232 | } |
233 | 233 | ||
234 | int proc_lasat_prid(ctl_table *table, int write, struct file *filp, | 234 | int proc_lasat_prid(ctl_table *table, int write, |
235 | void *buffer, size_t *lenp, loff_t *ppos) | 235 | void *buffer, size_t *lenp, loff_t *ppos) |
236 | { | 236 | { |
237 | int r; | 237 | int r; |
238 | 238 | ||
239 | r = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 239 | r = proc_dointvec(table, write, buffer, lenp, ppos); |
240 | if (r < 0) | 240 | if (r < 0) |
241 | return r; | 241 | return r; |
242 | if (write) { | 242 | if (write) { |
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h index 1e1c824764ee..5f39d5597ced 100644 --- a/arch/parisc/include/asm/fcntl.h +++ b/arch/parisc/include/asm/fcntl.h | |||
@@ -28,6 +28,8 @@ | |||
28 | #define F_SETOWN 12 /* for sockets. */ | 28 | #define F_SETOWN 12 /* for sockets. */ |
29 | #define F_SETSIG 13 /* for sockets. */ | 29 | #define F_SETSIG 13 /* for sockets. */ |
30 | #define F_GETSIG 14 /* for sockets. */ | 30 | #define F_GETSIG 14 /* for sockets. */ |
31 | #define F_GETOWN_EX 15 | ||
32 | #define F_SETOWN_EX 16 | ||
31 | 33 | ||
32 | /* for posix fcntl() and lockf() */ | 34 | /* for posix fcntl() and lockf() */ |
33 | #define F_RDLCK 01 | 35 | #define F_RDLCK 01 |
diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h new file mode 100644 index 000000000000..a67aeed17d40 --- /dev/null +++ b/arch/powerpc/include/asm/fsldma.h | |||
@@ -0,0 +1,136 @@ | |||
1 | /* | ||
2 | * Freescale MPC83XX / MPC85XX DMA Controller | ||
3 | * | ||
4 | * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu> | ||
5 | * | ||
6 | * This file is licensed under the terms of the GNU General Public License | ||
7 | * version 2. This program is licensed "as is" without any warranty of any | ||
8 | * kind, whether express or implied. | ||
9 | */ | ||
10 | |||
11 | #ifndef __ARCH_POWERPC_ASM_FSLDMA_H__ | ||
12 | #define __ARCH_POWERPC_ASM_FSLDMA_H__ | ||
13 | |||
14 | #include <linux/dmaengine.h> | ||
15 | |||
16 | /* | ||
17 | * Definitions for the Freescale DMA controller's DMA_SLAVE implemention | ||
18 | * | ||
19 | * The Freescale DMA_SLAVE implementation was designed to handle many-to-many | ||
20 | * transfers. An example usage would be an accelerated copy between two | ||
21 | * scatterlists. Another example use would be an accelerated copy from | ||
22 | * multiple non-contiguous device buffers into a single scatterlist. | ||
23 | * | ||
24 | * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This | ||
25 | * structure contains a list of hardware addresses that should be copied | ||
26 | * to/from the scatterlist passed into device_prep_slave_sg(). The structure | ||
27 | * also has some fields to enable hardware-specific features. | ||
28 | */ | ||
29 | |||
30 | /** | ||
31 | * struct fsl_dma_hw_addr | ||
32 | * @entry: linked list entry | ||
33 | * @address: the hardware address | ||
34 | * @length: length to transfer | ||
35 | * | ||
36 | * Holds a single physical hardware address / length pair for use | ||
37 | * with the DMAEngine DMA_SLAVE API. | ||
38 | */ | ||
39 | struct fsl_dma_hw_addr { | ||
40 | struct list_head entry; | ||
41 | |||
42 | dma_addr_t address; | ||
43 | size_t length; | ||
44 | }; | ||
45 | |||
46 | /** | ||
47 | * struct fsl_dma_slave | ||
48 | * @addresses: a linked list of struct fsl_dma_hw_addr structures | ||
49 | * @request_count: value for DMA request count | ||
50 | * @src_loop_size: setup and enable constant source-address DMA transfers | ||
51 | * @dst_loop_size: setup and enable constant destination address DMA transfers | ||
52 | * @external_start: enable externally started DMA transfers | ||
53 | * @external_pause: enable externally paused DMA transfers | ||
54 | * | ||
55 | * Holds a list of address / length pairs for use with the DMAEngine | ||
56 | * DMA_SLAVE API implementation for the Freescale DMA controller. | ||
57 | */ | ||
58 | struct fsl_dma_slave { | ||
59 | |||
60 | /* List of hardware address/length pairs */ | ||
61 | struct list_head addresses; | ||
62 | |||
63 | /* Support for extra controller features */ | ||
64 | unsigned int request_count; | ||
65 | unsigned int src_loop_size; | ||
66 | unsigned int dst_loop_size; | ||
67 | bool external_start; | ||
68 | bool external_pause; | ||
69 | }; | ||
70 | |||
71 | /** | ||
72 | * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave | ||
73 | * @slave: the &struct fsl_dma_slave to add to | ||
74 | * @address: the hardware address to add | ||
75 | * @length: the length of bytes to transfer from @address | ||
76 | * | ||
77 | * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on | ||
78 | * success, -ERRNO otherwise. | ||
79 | */ | ||
80 | static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave, | ||
81 | dma_addr_t address, size_t length) | ||
82 | { | ||
83 | struct fsl_dma_hw_addr *addr; | ||
84 | |||
85 | addr = kzalloc(sizeof(*addr), GFP_ATOMIC); | ||
86 | if (!addr) | ||
87 | return -ENOMEM; | ||
88 | |||
89 | INIT_LIST_HEAD(&addr->entry); | ||
90 | addr->address = address; | ||
91 | addr->length = length; | ||
92 | |||
93 | list_add_tail(&addr->entry, &slave->addresses); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | /** | ||
98 | * fsl_dma_slave_free - free a struct fsl_dma_slave | ||
99 | * @slave: the struct fsl_dma_slave to free | ||
100 | * | ||
101 | * Free a struct fsl_dma_slave and all associated address/length pairs | ||
102 | */ | ||
103 | static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave) | ||
104 | { | ||
105 | struct fsl_dma_hw_addr *addr, *tmp; | ||
106 | |||
107 | if (slave) { | ||
108 | list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) { | ||
109 | list_del(&addr->entry); | ||
110 | kfree(addr); | ||
111 | } | ||
112 | |||
113 | kfree(slave); | ||
114 | } | ||
115 | } | ||
116 | |||
117 | /** | ||
118 | * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave | ||
119 | * @gfp: the flags to pass to kmalloc when allocating this structure | ||
120 | * | ||
121 | * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new | ||
122 | * struct fsl_dma_slave on success, or NULL on failure. | ||
123 | */ | ||
124 | static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp) | ||
125 | { | ||
126 | struct fsl_dma_slave *slave; | ||
127 | |||
128 | slave = kzalloc(sizeof(*slave), gfp); | ||
129 | if (!slave) | ||
130 | return NULL; | ||
131 | |||
132 | INIT_LIST_HEAD(&slave->addresses); | ||
133 | return slave; | ||
134 | } | ||
135 | |||
136 | #endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */ | ||
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 264528e4f58d..b55fd7ed1c31 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c | |||
@@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev; | |||
50 | * /proc entries (sysctl) | 50 | * /proc entries (sysctl) |
51 | */ | 51 | */ |
52 | static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; | 52 | static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; |
53 | static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, | 53 | static int appldata_timer_handler(ctl_table *ctl, int write, |
54 | void __user *buffer, size_t *lenp, loff_t *ppos); | 54 | void __user *buffer, size_t *lenp, loff_t *ppos); |
55 | static int appldata_interval_handler(ctl_table *ctl, int write, | 55 | static int appldata_interval_handler(ctl_table *ctl, int write, |
56 | struct file *filp, | ||
57 | void __user *buffer, | 56 | void __user *buffer, |
58 | size_t *lenp, loff_t *ppos); | 57 | size_t *lenp, loff_t *ppos); |
59 | 58 | ||
@@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd) | |||
247 | * Start/Stop timer, show status of timer (0 = not active, 1 = active) | 246 | * Start/Stop timer, show status of timer (0 = not active, 1 = active) |
248 | */ | 247 | */ |
249 | static int | 248 | static int |
250 | appldata_timer_handler(ctl_table *ctl, int write, struct file *filp, | 249 | appldata_timer_handler(ctl_table *ctl, int write, |
251 | void __user *buffer, size_t *lenp, loff_t *ppos) | 250 | void __user *buffer, size_t *lenp, loff_t *ppos) |
252 | { | 251 | { |
253 | int len; | 252 | int len; |
@@ -289,7 +288,7 @@ out: | |||
289 | * current timer interval. | 288 | * current timer interval. |
290 | */ | 289 | */ |
291 | static int | 290 | static int |
292 | appldata_interval_handler(ctl_table *ctl, int write, struct file *filp, | 291 | appldata_interval_handler(ctl_table *ctl, int write, |
293 | void __user *buffer, size_t *lenp, loff_t *ppos) | 292 | void __user *buffer, size_t *lenp, loff_t *ppos) |
294 | { | 293 | { |
295 | int len, interval; | 294 | int len, interval; |
@@ -335,7 +334,7 @@ out: | |||
335 | * monitoring (0 = not in process, 1 = in process) | 334 | * monitoring (0 = not in process, 1 = in process) |
336 | */ | 335 | */ |
337 | static int | 336 | static int |
338 | appldata_generic_handler(ctl_table *ctl, int write, struct file *filp, | 337 | appldata_generic_handler(ctl_table *ctl, int write, |
339 | void __user *buffer, size_t *lenp, loff_t *ppos) | 338 | void __user *buffer, size_t *lenp, loff_t *ppos) |
340 | { | 339 | { |
341 | struct appldata_ops *ops = NULL, *tmp_ops; | 340 | struct appldata_ops *ops = NULL, *tmp_ops; |
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 4c512561687d..20f282c911c2 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c | |||
@@ -881,11 +881,11 @@ static int debug_active=1; | |||
881 | * if debug_active is already off | 881 | * if debug_active is already off |
882 | */ | 882 | */ |
883 | static int | 883 | static int |
884 | s390dbf_procactive(ctl_table *table, int write, struct file *filp, | 884 | s390dbf_procactive(ctl_table *table, int write, |
885 | void __user *buffer, size_t *lenp, loff_t *ppos) | 885 | void __user *buffer, size_t *lenp, loff_t *ppos) |
886 | { | 886 | { |
887 | if (!write || debug_stoppable || !debug_active) | 887 | if (!write || debug_stoppable || !debug_active) |
888 | return proc_dointvec(table, write, filp, buffer, lenp, ppos); | 888 | return proc_dointvec(table, write, buffer, lenp, ppos); |
889 | else | 889 | else |
890 | return 0; | 890 | return 0; |
891 | } | 891 | } |
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 413c240cbca7..b201135cc18c 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c | |||
@@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp) | |||
262 | static struct ctl_table cmm_table[]; | 262 | static struct ctl_table cmm_table[]; |
263 | 263 | ||
264 | static int | 264 | static int |
265 | cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, | 265 | cmm_pages_handler(ctl_table *ctl, int write, |
266 | void __user *buffer, size_t *lenp, loff_t *ppos) | 266 | void __user *buffer, size_t *lenp, loff_t *ppos) |
267 | { | 267 | { |
268 | char buf[16], *p; | 268 | char buf[16], *p; |
@@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, | |||
303 | } | 303 | } |
304 | 304 | ||
305 | static int | 305 | static int |
306 | cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, | 306 | cmm_timeout_handler(ctl_table *ctl, int write, |
307 | void __user *buffer, size_t *lenp, loff_t *ppos) | 307 | void __user *buffer, size_t *lenp, loff_t *ppos) |
308 | { | 308 | { |
309 | char buf[64], *p; | 309 | char buf[64], *p; |
diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig index b91fa8dbf047..4d58eb0973d4 100644 --- a/arch/sh/drivers/dma/Kconfig +++ b/arch/sh/drivers/dma/Kconfig | |||
@@ -1,12 +1,9 @@ | |||
1 | menu "DMA support" | 1 | menu "DMA support" |
2 | 2 | ||
3 | config SH_DMA_API | ||
4 | bool | ||
5 | 3 | ||
6 | config SH_DMA | 4 | config SH_DMA |
7 | bool "SuperH on-chip DMA controller (DMAC) support" | 5 | bool "SuperH on-chip DMA controller (DMAC) support" |
8 | depends on CPU_SH3 || CPU_SH4 | 6 | depends on CPU_SH3 || CPU_SH4 |
9 | select SH_DMA_API | ||
10 | default n | 7 | default n |
11 | 8 | ||
12 | config SH_DMA_IRQ_MULTI | 9 | config SH_DMA_IRQ_MULTI |
@@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI | |||
19 | CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ | 16 | CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 || \ |
20 | CPU_SUBTYPE_SH7760 | 17 | CPU_SUBTYPE_SH7760 |
21 | 18 | ||
19 | config SH_DMA_API | ||
20 | depends on SH_DMA | ||
21 | bool "SuperH DMA API support" | ||
22 | default n | ||
23 | help | ||
24 | SH_DMA_API always enabled DMA API of used SuperH. | ||
25 | If you want to use DMA ENGINE, you must not enable this. | ||
26 | Please enable DMA_ENGINE and SH_DMAE. | ||
27 | |||
22 | config NR_ONCHIP_DMA_CHANNELS | 28 | config NR_ONCHIP_DMA_CHANNELS |
23 | int | 29 | int |
24 | depends on SH_DMA | 30 | depends on SH_DMA |
diff --git a/arch/sh/drivers/dma/Makefile b/arch/sh/drivers/dma/Makefile index c6068137b46f..d88c9484762c 100644 --- a/arch/sh/drivers/dma/Makefile +++ b/arch/sh/drivers/dma/Makefile | |||
@@ -2,8 +2,7 @@ | |||
2 | # Makefile for the SuperH DMA specific kernel interface routines under Linux. | 2 | # Makefile for the SuperH DMA specific kernel interface routines under Linux. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_SH_DMA_API) += dma-api.o dma-sysfs.o | 5 | obj-$(CONFIG_SH_DMA_API) += dma-sh.o dma-api.o dma-sysfs.o |
6 | obj-$(CONFIG_SH_DMA) += dma-sh.o | ||
7 | obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o | 6 | obj-$(CONFIG_PVR2_DMA) += dma-pvr2.o |
8 | obj-$(CONFIG_G2_DMA) += dma-g2.o | 7 | obj-$(CONFIG_G2_DMA) += dma-g2.o |
9 | obj-$(CONFIG_SH_DMABRG) += dmabrg.o | 8 | obj-$(CONFIG_SH_DMABRG) += dmabrg.o |
diff --git a/arch/sh/include/asm/dma-sh.h b/arch/sh/include/asm/dma-sh.h index 68a5f4cb0343..78eed3e0bdf5 100644 --- a/arch/sh/include/asm/dma-sh.h +++ b/arch/sh/include/asm/dma-sh.h | |||
@@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = { | |||
116 | #define CHCR 0x0C | 116 | #define CHCR 0x0C |
117 | #define DMAOR 0x40 | 117 | #define DMAOR 0x40 |
118 | 118 | ||
119 | /* | ||
120 | * for dma engine | ||
121 | * | ||
122 | * SuperH DMA mode | ||
123 | */ | ||
124 | #define SHDMA_MIX_IRQ (1 << 1) | ||
125 | #define SHDMA_DMAOR1 (1 << 2) | ||
126 | #define SHDMA_DMAE1 (1 << 3) | ||
127 | |||
128 | struct sh_dmae_pdata { | ||
129 | unsigned int mode; | ||
130 | }; | ||
131 | |||
119 | #endif /* __DMA_SH_H */ | 132 | #endif /* __DMA_SH_H */ |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index e63cf7d441e1..139d4c1a33a7 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog; | |||
40 | #define NMI_INVALID 3 | 40 | #define NMI_INVALID 3 |
41 | 41 | ||
42 | struct ctl_table; | 42 | struct ctl_table; |
43 | struct file; | 43 | extern int proc_nmi_enabled(struct ctl_table *, int , |
44 | extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | ||
45 | void __user *, size_t *, loff_t *); | 44 | void __user *, size_t *, loff_t *); |
46 | extern int unknown_nmi_panic; | 45 | extern int unknown_nmi_panic; |
47 | 46 | ||
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index cb66a22d98ad..7ff61d6a188a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | |||
508 | /* | 508 | /* |
509 | * proc handler for /proc/sys/kernel/nmi | 509 | * proc handler for /proc/sys/kernel/nmi |
510 | */ | 510 | */ |
511 | int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | 511 | int proc_nmi_enabled(struct ctl_table *table, int write, |
512 | void __user *buffer, size_t *length, loff_t *ppos) | 512 | void __user *buffer, size_t *length, loff_t *ppos) |
513 | { | 513 | { |
514 | int old_state; | 514 | int old_state; |
515 | 515 | ||
516 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; | 516 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; |
517 | old_state = nmi_watchdog_enabled; | 517 | old_state = nmi_watchdog_enabled; |
518 | proc_dointvec(table, write, file, buffer, length, ppos); | 518 | proc_dointvec(table, write, buffer, length, ppos); |
519 | if (!!old_state == !!nmi_watchdog_enabled) | 519 | if (!!old_state == !!nmi_watchdog_enabled) |
520 | return 0; | 520 | return 0; |
521 | 521 | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index cf53a78e2dcf..8cb4974ff599 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void) | |||
228 | } | 228 | } |
229 | 229 | ||
230 | #ifdef CONFIG_SYSCTL | 230 | #ifdef CONFIG_SYSCTL |
231 | |||
232 | static int | ||
233 | vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | ||
234 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
235 | { | ||
236 | return proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | ||
237 | } | ||
238 | |||
239 | static ctl_table kernel_table2[] = { | 231 | static ctl_table kernel_table2[] = { |
240 | { .procname = "vsyscall64", | 232 | { .procname = "vsyscall64", |
241 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), | 233 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), |
242 | .mode = 0644, | 234 | .mode = 0644, |
243 | .proc_handler = vsyscall_sysctl_change }, | 235 | .proc_handler = proc_dointvec }, |
244 | {} | 236 | {} |
245 | }; | 237 | }; |
246 | 238 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 82728f2c6d55..f4cee9028cf0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, | |||
167 | info.si_errno = 0; | 167 | info.si_errno = 0; |
168 | info.si_code = si_code; | 168 | info.si_code = si_code; |
169 | info.si_addr = (void __user *)address; | 169 | info.si_addr = (void __user *)address; |
170 | info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; | ||
170 | 171 | ||
171 | force_sig_info(si_signo, &info, tsk); | 172 | force_sig_info(si_signo, &info, tsk); |
172 | } | 173 | } |
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code, | |||
790 | } | 791 | } |
791 | 792 | ||
792 | static void | 793 | static void |
793 | do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) | 794 | do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, |
795 | unsigned int fault) | ||
794 | { | 796 | { |
795 | struct task_struct *tsk = current; | 797 | struct task_struct *tsk = current; |
796 | struct mm_struct *mm = tsk->mm; | 798 | struct mm_struct *mm = tsk->mm; |
799 | int code = BUS_ADRERR; | ||
797 | 800 | ||
798 | up_read(&mm->mmap_sem); | 801 | up_read(&mm->mmap_sem); |
799 | 802 | ||
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) | |||
809 | tsk->thread.error_code = error_code; | 812 | tsk->thread.error_code = error_code; |
810 | tsk->thread.trap_no = 14; | 813 | tsk->thread.trap_no = 14; |
811 | 814 | ||
812 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | 815 | #ifdef CONFIG_MEMORY_FAILURE |
816 | if (fault & VM_FAULT_HWPOISON) { | ||
817 | printk(KERN_ERR | ||
818 | "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", | ||
819 | tsk->comm, tsk->pid, address); | ||
820 | code = BUS_MCEERR_AR; | ||
821 | } | ||
822 | #endif | ||
823 | force_sig_info_fault(SIGBUS, code, address, tsk); | ||
813 | } | 824 | } |
814 | 825 | ||
815 | static noinline void | 826 | static noinline void |
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
819 | if (fault & VM_FAULT_OOM) { | 830 | if (fault & VM_FAULT_OOM) { |
820 | out_of_memory(regs, error_code, address); | 831 | out_of_memory(regs, error_code, address); |
821 | } else { | 832 | } else { |
822 | if (fault & VM_FAULT_SIGBUS) | 833 | if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) |
823 | do_sigbus(regs, error_code, address); | 834 | do_sigbus(regs, error_code, address, fault); |
824 | else | 835 | else |
825 | BUG(); | 836 | BUG(); |
826 | } | 837 | } |
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig index d8fb39145986..e5aeb2b79e6f 100644 --- a/crypto/async_tx/Kconfig +++ b/crypto/async_tx/Kconfig | |||
@@ -14,3 +14,12 @@ config ASYNC_MEMSET | |||
14 | tristate | 14 | tristate |
15 | select ASYNC_CORE | 15 | select ASYNC_CORE |
16 | 16 | ||
17 | config ASYNC_PQ | ||
18 | tristate | ||
19 | select ASYNC_CORE | ||
20 | |||
21 | config ASYNC_RAID6_RECOV | ||
22 | tristate | ||
23 | select ASYNC_CORE | ||
24 | select ASYNC_PQ | ||
25 | |||
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile index 27baa7d52fbc..d1e0e6f72bc1 100644 --- a/crypto/async_tx/Makefile +++ b/crypto/async_tx/Makefile | |||
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o | |||
2 | obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o | 2 | obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o |
3 | obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o | 3 | obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o |
4 | obj-$(CONFIG_ASYNC_XOR) += async_xor.o | 4 | obj-$(CONFIG_ASYNC_XOR) += async_xor.o |
5 | obj-$(CONFIG_ASYNC_PQ) += async_pq.o | ||
6 | obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o | ||
7 | obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o | ||
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index ddccfb01c416..0ec1fb69d4ea 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c | |||
@@ -33,28 +33,31 @@ | |||
33 | * async_memcpy - attempt to copy memory with a dma engine. | 33 | * async_memcpy - attempt to copy memory with a dma engine. |
34 | * @dest: destination page | 34 | * @dest: destination page |
35 | * @src: src page | 35 | * @src: src page |
36 | * @offset: offset in pages to start transaction | 36 | * @dest_offset: offset into 'dest' to start transaction |
37 | * @src_offset: offset into 'src' to start transaction | ||
37 | * @len: length in bytes | 38 | * @len: length in bytes |
38 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, | 39 | * @submit: submission / completion modifiers |
39 | * @depend_tx: memcpy depends on the result of this transaction | 40 | * |
40 | * @cb_fn: function to call when the memcpy completes | 41 | * honored flags: ASYNC_TX_ACK |
41 | * @cb_param: parameter to pass to the callback routine | ||
42 | */ | 42 | */ |
43 | struct dma_async_tx_descriptor * | 43 | struct dma_async_tx_descriptor * |
44 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | 44 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, |
45 | unsigned int src_offset, size_t len, enum async_tx_flags flags, | 45 | unsigned int src_offset, size_t len, |
46 | struct dma_async_tx_descriptor *depend_tx, | 46 | struct async_submit_ctl *submit) |
47 | dma_async_tx_callback cb_fn, void *cb_param) | ||
48 | { | 47 | { |
49 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY, | 48 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY, |
50 | &dest, 1, &src, 1, len); | 49 | &dest, 1, &src, 1, len); |
51 | struct dma_device *device = chan ? chan->device : NULL; | 50 | struct dma_device *device = chan ? chan->device : NULL; |
52 | struct dma_async_tx_descriptor *tx = NULL; | 51 | struct dma_async_tx_descriptor *tx = NULL; |
53 | 52 | ||
54 | if (device) { | 53 | if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { |
55 | dma_addr_t dma_dest, dma_src; | 54 | dma_addr_t dma_dest, dma_src; |
56 | unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; | 55 | unsigned long dma_prep_flags = 0; |
57 | 56 | ||
57 | if (submit->cb_fn) | ||
58 | dma_prep_flags |= DMA_PREP_INTERRUPT; | ||
59 | if (submit->flags & ASYNC_TX_FENCE) | ||
60 | dma_prep_flags |= DMA_PREP_FENCE; | ||
58 | dma_dest = dma_map_page(device->dev, dest, dest_offset, len, | 61 | dma_dest = dma_map_page(device->dev, dest, dest_offset, len, |
59 | DMA_FROM_DEVICE); | 62 | DMA_FROM_DEVICE); |
60 | 63 | ||
@@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | |||
67 | 70 | ||
68 | if (tx) { | 71 | if (tx) { |
69 | pr_debug("%s: (async) len: %zu\n", __func__, len); | 72 | pr_debug("%s: (async) len: %zu\n", __func__, len); |
70 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 73 | async_tx_submit(chan, tx, submit); |
71 | } else { | 74 | } else { |
72 | void *dest_buf, *src_buf; | 75 | void *dest_buf, *src_buf; |
73 | pr_debug("%s: (sync) len: %zu\n", __func__, len); | 76 | pr_debug("%s: (sync) len: %zu\n", __func__, len); |
74 | 77 | ||
75 | /* wait for any prerequisite operations */ | 78 | /* wait for any prerequisite operations */ |
76 | async_tx_quiesce(&depend_tx); | 79 | async_tx_quiesce(&submit->depend_tx); |
77 | 80 | ||
78 | dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; | 81 | dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; |
79 | src_buf = kmap_atomic(src, KM_USER1) + src_offset; | 82 | src_buf = kmap_atomic(src, KM_USER1) + src_offset; |
@@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | |||
83 | kunmap_atomic(dest_buf, KM_USER0); | 86 | kunmap_atomic(dest_buf, KM_USER0); |
84 | kunmap_atomic(src_buf, KM_USER1); | 87 | kunmap_atomic(src_buf, KM_USER1); |
85 | 88 | ||
86 | async_tx_sync_epilog(cb_fn, cb_param); | 89 | async_tx_sync_epilog(submit); |
87 | } | 90 | } |
88 | 91 | ||
89 | return tx; | 92 | return tx; |
90 | } | 93 | } |
91 | EXPORT_SYMBOL_GPL(async_memcpy); | 94 | EXPORT_SYMBOL_GPL(async_memcpy); |
92 | 95 | ||
93 | static int __init async_memcpy_init(void) | ||
94 | { | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | static void __exit async_memcpy_exit(void) | ||
99 | { | ||
100 | do { } while (0); | ||
101 | } | ||
102 | |||
103 | module_init(async_memcpy_init); | ||
104 | module_exit(async_memcpy_exit); | ||
105 | |||
106 | MODULE_AUTHOR("Intel Corporation"); | 96 | MODULE_AUTHOR("Intel Corporation"); |
107 | MODULE_DESCRIPTION("asynchronous memcpy api"); | 97 | MODULE_DESCRIPTION("asynchronous memcpy api"); |
108 | MODULE_LICENSE("GPL"); | 98 | MODULE_LICENSE("GPL"); |
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index 5b5eb99bb244..58e4a8752aee 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c | |||
@@ -35,26 +35,26 @@ | |||
35 | * @val: fill value | 35 | * @val: fill value |
36 | * @offset: offset in pages to start transaction | 36 | * @offset: offset in pages to start transaction |
37 | * @len: length in bytes | 37 | * @len: length in bytes |
38 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 38 | * |
39 | * @depend_tx: memset depends on the result of this transaction | 39 | * honored flags: ASYNC_TX_ACK |
40 | * @cb_fn: function to call when the memcpy completes | ||
41 | * @cb_param: parameter to pass to the callback routine | ||
42 | */ | 40 | */ |
43 | struct dma_async_tx_descriptor * | 41 | struct dma_async_tx_descriptor * |
44 | async_memset(struct page *dest, int val, unsigned int offset, | 42 | async_memset(struct page *dest, int val, unsigned int offset, size_t len, |
45 | size_t len, enum async_tx_flags flags, | 43 | struct async_submit_ctl *submit) |
46 | struct dma_async_tx_descriptor *depend_tx, | ||
47 | dma_async_tx_callback cb_fn, void *cb_param) | ||
48 | { | 44 | { |
49 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET, | 45 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET, |
50 | &dest, 1, NULL, 0, len); | 46 | &dest, 1, NULL, 0, len); |
51 | struct dma_device *device = chan ? chan->device : NULL; | 47 | struct dma_device *device = chan ? chan->device : NULL; |
52 | struct dma_async_tx_descriptor *tx = NULL; | 48 | struct dma_async_tx_descriptor *tx = NULL; |
53 | 49 | ||
54 | if (device) { | 50 | if (device && is_dma_fill_aligned(device, offset, 0, len)) { |
55 | dma_addr_t dma_dest; | 51 | dma_addr_t dma_dest; |
56 | unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; | 52 | unsigned long dma_prep_flags = 0; |
57 | 53 | ||
54 | if (submit->cb_fn) | ||
55 | dma_prep_flags |= DMA_PREP_INTERRUPT; | ||
56 | if (submit->flags & ASYNC_TX_FENCE) | ||
57 | dma_prep_flags |= DMA_PREP_FENCE; | ||
58 | dma_dest = dma_map_page(device->dev, dest, offset, len, | 58 | dma_dest = dma_map_page(device->dev, dest, offset, len, |
59 | DMA_FROM_DEVICE); | 59 | DMA_FROM_DEVICE); |
60 | 60 | ||
@@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset, | |||
64 | 64 | ||
65 | if (tx) { | 65 | if (tx) { |
66 | pr_debug("%s: (async) len: %zu\n", __func__, len); | 66 | pr_debug("%s: (async) len: %zu\n", __func__, len); |
67 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 67 | async_tx_submit(chan, tx, submit); |
68 | } else { /* run the memset synchronously */ | 68 | } else { /* run the memset synchronously */ |
69 | void *dest_buf; | 69 | void *dest_buf; |
70 | pr_debug("%s: (sync) len: %zu\n", __func__, len); | 70 | pr_debug("%s: (sync) len: %zu\n", __func__, len); |
71 | 71 | ||
72 | dest_buf = (void *) (((char *) page_address(dest)) + offset); | 72 | dest_buf = page_address(dest) + offset; |
73 | 73 | ||
74 | /* wait for any prerequisite operations */ | 74 | /* wait for any prerequisite operations */ |
75 | async_tx_quiesce(&depend_tx); | 75 | async_tx_quiesce(&submit->depend_tx); |
76 | 76 | ||
77 | memset(dest_buf, val, len); | 77 | memset(dest_buf, val, len); |
78 | 78 | ||
79 | async_tx_sync_epilog(cb_fn, cb_param); | 79 | async_tx_sync_epilog(submit); |
80 | } | 80 | } |
81 | 81 | ||
82 | return tx; | 82 | return tx; |
83 | } | 83 | } |
84 | EXPORT_SYMBOL_GPL(async_memset); | 84 | EXPORT_SYMBOL_GPL(async_memset); |
85 | 85 | ||
86 | static int __init async_memset_init(void) | ||
87 | { | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static void __exit async_memset_exit(void) | ||
92 | { | ||
93 | do { } while (0); | ||
94 | } | ||
95 | |||
96 | module_init(async_memset_init); | ||
97 | module_exit(async_memset_exit); | ||
98 | |||
99 | MODULE_AUTHOR("Intel Corporation"); | 86 | MODULE_AUTHOR("Intel Corporation"); |
100 | MODULE_DESCRIPTION("asynchronous memset api"); | 87 | MODULE_DESCRIPTION("asynchronous memset api"); |
101 | MODULE_LICENSE("GPL"); | 88 | MODULE_LICENSE("GPL"); |
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c new file mode 100644 index 000000000000..b88db6d1dc65 --- /dev/null +++ b/crypto/async_tx/async_pq.c | |||
@@ -0,0 +1,395 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com> | ||
3 | * Copyright(c) 2009 Intel Corporation | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU General Public License as published by the Free | ||
7 | * Software Foundation; either version 2 of the License, or (at your option) | ||
8 | * any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
17 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
18 | * | ||
19 | * The full GNU General Public License is included in this distribution in the | ||
20 | * file called COPYING. | ||
21 | */ | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/dma-mapping.h> | ||
25 | #include <linux/raid/pq.h> | ||
26 | #include <linux/async_tx.h> | ||
27 | |||
28 | /** | ||
29 | * scribble - space to hold throwaway P buffer for synchronous gen_syndrome | ||
30 | */ | ||
31 | static struct page *scribble; | ||
32 | |||
33 | static bool is_raid6_zero_block(struct page *p) | ||
34 | { | ||
35 | return p == (void *) raid6_empty_zero_page; | ||
36 | } | ||
37 | |||
38 | /* the struct page *blocks[] parameter passed to async_gen_syndrome() | ||
39 | * and async_syndrome_val() contains the 'P' destination address at | ||
40 | * blocks[disks-2] and the 'Q' destination address at blocks[disks-1] | ||
41 | * | ||
42 | * note: these are macros as they are used as lvalues | ||
43 | */ | ||
44 | #define P(b, d) (b[d-2]) | ||
45 | #define Q(b, d) (b[d-1]) | ||
46 | |||
47 | /** | ||
48 | * do_async_gen_syndrome - asynchronously calculate P and/or Q | ||
49 | */ | ||
50 | static __async_inline struct dma_async_tx_descriptor * | ||
51 | do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, | ||
52 | const unsigned char *scfs, unsigned int offset, int disks, | ||
53 | size_t len, dma_addr_t *dma_src, | ||
54 | struct async_submit_ctl *submit) | ||
55 | { | ||
56 | struct dma_async_tx_descriptor *tx = NULL; | ||
57 | struct dma_device *dma = chan->device; | ||
58 | enum dma_ctrl_flags dma_flags = 0; | ||
59 | enum async_tx_flags flags_orig = submit->flags; | ||
60 | dma_async_tx_callback cb_fn_orig = submit->cb_fn; | ||
61 | dma_async_tx_callback cb_param_orig = submit->cb_param; | ||
62 | int src_cnt = disks - 2; | ||
63 | unsigned char coefs[src_cnt]; | ||
64 | unsigned short pq_src_cnt; | ||
65 | dma_addr_t dma_dest[2]; | ||
66 | int src_off = 0; | ||
67 | int idx; | ||
68 | int i; | ||
69 | |||
70 | /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ | ||
71 | if (P(blocks, disks)) | ||
72 | dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, | ||
73 | len, DMA_BIDIRECTIONAL); | ||
74 | else | ||
75 | dma_flags |= DMA_PREP_PQ_DISABLE_P; | ||
76 | if (Q(blocks, disks)) | ||
77 | dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, | ||
78 | len, DMA_BIDIRECTIONAL); | ||
79 | else | ||
80 | dma_flags |= DMA_PREP_PQ_DISABLE_Q; | ||
81 | |||
82 | /* convert source addresses being careful to collapse 'empty' | ||
83 | * sources and update the coefficients accordingly | ||
84 | */ | ||
85 | for (i = 0, idx = 0; i < src_cnt; i++) { | ||
86 | if (is_raid6_zero_block(blocks[i])) | ||
87 | continue; | ||
88 | dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, | ||
89 | DMA_TO_DEVICE); | ||
90 | coefs[idx] = scfs[i]; | ||
91 | idx++; | ||
92 | } | ||
93 | src_cnt = idx; | ||
94 | |||
95 | while (src_cnt > 0) { | ||
96 | submit->flags = flags_orig; | ||
97 | pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags)); | ||
98 | /* if we are submitting additional pqs, leave the chain open, | ||
99 | * clear the callback parameters, and leave the destination | ||
100 | * buffers mapped | ||
101 | */ | ||
102 | if (src_cnt > pq_src_cnt) { | ||
103 | submit->flags &= ~ASYNC_TX_ACK; | ||
104 | submit->flags |= ASYNC_TX_FENCE; | ||
105 | dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; | ||
106 | submit->cb_fn = NULL; | ||
107 | submit->cb_param = NULL; | ||
108 | } else { | ||
109 | dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; | ||
110 | submit->cb_fn = cb_fn_orig; | ||
111 | submit->cb_param = cb_param_orig; | ||
112 | if (cb_fn_orig) | ||
113 | dma_flags |= DMA_PREP_INTERRUPT; | ||
114 | } | ||
115 | if (submit->flags & ASYNC_TX_FENCE) | ||
116 | dma_flags |= DMA_PREP_FENCE; | ||
117 | |||
118 | /* Since we have clobbered the src_list we are committed | ||
119 | * to doing this asynchronously. Drivers force forward | ||
120 | * progress in case they can not provide a descriptor | ||
121 | */ | ||
122 | for (;;) { | ||
123 | tx = dma->device_prep_dma_pq(chan, dma_dest, | ||
124 | &dma_src[src_off], | ||
125 | pq_src_cnt, | ||
126 | &coefs[src_off], len, | ||
127 | dma_flags); | ||
128 | if (likely(tx)) | ||
129 | break; | ||
130 | async_tx_quiesce(&submit->depend_tx); | ||
131 | dma_async_issue_pending(chan); | ||
132 | } | ||
133 | |||
134 | async_tx_submit(chan, tx, submit); | ||
135 | submit->depend_tx = tx; | ||
136 | |||
137 | /* drop completed sources */ | ||
138 | src_cnt -= pq_src_cnt; | ||
139 | src_off += pq_src_cnt; | ||
140 | |||
141 | dma_flags |= DMA_PREP_CONTINUE; | ||
142 | } | ||
143 | |||
144 | return tx; | ||
145 | } | ||
146 | |||
147 | /** | ||
148 | * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome | ||
149 | */ | ||
150 | static void | ||
151 | do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, | ||
152 | size_t len, struct async_submit_ctl *submit) | ||
153 | { | ||
154 | void **srcs; | ||
155 | int i; | ||
156 | |||
157 | if (submit->scribble) | ||
158 | srcs = submit->scribble; | ||
159 | else | ||
160 | srcs = (void **) blocks; | ||
161 | |||
162 | for (i = 0; i < disks; i++) { | ||
163 | if (is_raid6_zero_block(blocks[i])) { | ||
164 | BUG_ON(i > disks - 3); /* P or Q can't be zero */ | ||
165 | srcs[i] = blocks[i]; | ||
166 | } else | ||
167 | srcs[i] = page_address(blocks[i]) + offset; | ||
168 | } | ||
169 | raid6_call.gen_syndrome(disks, len, srcs); | ||
170 | async_tx_sync_epilog(submit); | ||
171 | } | ||
172 | |||
173 | /** | ||
174 | * async_gen_syndrome - asynchronously calculate a raid6 syndrome | ||
175 | * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 | ||
176 | * @offset: common offset into each block (src and dest) to start transaction | ||
177 | * @disks: number of blocks (including missing P or Q, see below) | ||
178 | * @len: length of operation in bytes | ||
179 | * @submit: submission/completion modifiers | ||
180 | * | ||
181 | * General note: This routine assumes a field of GF(2^8) with a | ||
182 | * primitive polynomial of 0x11d and a generator of {02}. | ||
183 | * | ||
184 | * 'disks' note: callers can optionally omit either P or Q (but not | ||
185 | * both) from the calculation by setting blocks[disks-2] or | ||
186 | * blocks[disks-1] to NULL. When P or Q is omitted 'len' must be <= | ||
187 | * PAGE_SIZE as a temporary buffer of this size is used in the | ||
188 | * synchronous path. 'disks' always accounts for both destination | ||
189 | * buffers. | ||
190 | * | ||
191 | * 'blocks' note: if submit->scribble is NULL then the contents of | ||
192 | * 'blocks' may be overridden | ||
193 | */ | ||
194 | struct dma_async_tx_descriptor * | ||
195 | async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, | ||
196 | size_t len, struct async_submit_ctl *submit) | ||
197 | { | ||
198 | int src_cnt = disks - 2; | ||
199 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, | ||
200 | &P(blocks, disks), 2, | ||
201 | blocks, src_cnt, len); | ||
202 | struct dma_device *device = chan ? chan->device : NULL; | ||
203 | dma_addr_t *dma_src = NULL; | ||
204 | |||
205 | BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); | ||
206 | |||
207 | if (submit->scribble) | ||
208 | dma_src = submit->scribble; | ||
209 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) | ||
210 | dma_src = (dma_addr_t *) blocks; | ||
211 | |||
212 | if (dma_src && device && | ||
213 | (src_cnt <= dma_maxpq(device, 0) || | ||
214 | dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && | ||
215 | is_dma_pq_aligned(device, offset, 0, len)) { | ||
216 | /* run the p+q asynchronously */ | ||
217 | pr_debug("%s: (async) disks: %d len: %zu\n", | ||
218 | __func__, disks, len); | ||
219 | return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, | ||
220 | disks, len, dma_src, submit); | ||
221 | } | ||
222 | |||
223 | /* run the pq synchronously */ | ||
224 | pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); | ||
225 | |||
226 | /* wait for any prerequisite operations */ | ||
227 | async_tx_quiesce(&submit->depend_tx); | ||
228 | |||
229 | if (!P(blocks, disks)) { | ||
230 | P(blocks, disks) = scribble; | ||
231 | BUG_ON(len + offset > PAGE_SIZE); | ||
232 | } | ||
233 | if (!Q(blocks, disks)) { | ||
234 | Q(blocks, disks) = scribble; | ||
235 | BUG_ON(len + offset > PAGE_SIZE); | ||
236 | } | ||
237 | do_sync_gen_syndrome(blocks, offset, disks, len, submit); | ||
238 | |||
239 | return NULL; | ||
240 | } | ||
241 | EXPORT_SYMBOL_GPL(async_gen_syndrome); | ||
242 | |||
243 | /** | ||
244 | * async_syndrome_val - asynchronously validate a raid6 syndrome | ||
245 | * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 | ||
246 | * @offset: common offset into each block (src and dest) to start transaction | ||
247 | * @disks: number of blocks (including missing P or Q, see below) | ||
248 | * @len: length of operation in bytes | ||
249 | * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set | ||
250 | * @spare: temporary result buffer for the synchronous case | ||
251 | * @submit: submission / completion modifiers | ||
252 | * | ||
253 | * The same notes from async_gen_syndrome apply to the 'blocks', | ||
254 | * and 'disks' parameters of this routine. The synchronous path | ||
255 | * requires a temporary result buffer and submit->scribble to be | ||
256 | * specified. | ||
257 | */ | ||
258 | struct dma_async_tx_descriptor * | ||
259 | async_syndrome_val(struct page **blocks, unsigned int offset, int disks, | ||
260 | size_t len, enum sum_check_flags *pqres, struct page *spare, | ||
261 | struct async_submit_ctl *submit) | ||
262 | { | ||
263 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL, | ||
264 | NULL, 0, blocks, disks, | ||
265 | len); | ||
266 | struct dma_device *device = chan ? chan->device : NULL; | ||
267 | struct dma_async_tx_descriptor *tx; | ||
268 | enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; | ||
269 | dma_addr_t *dma_src = NULL; | ||
270 | |||
271 | BUG_ON(disks < 4); | ||
272 | |||
273 | if (submit->scribble) | ||
274 | dma_src = submit->scribble; | ||
275 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) | ||
276 | dma_src = (dma_addr_t *) blocks; | ||
277 | |||
278 | if (dma_src && device && disks <= dma_maxpq(device, 0) && | ||
279 | is_dma_pq_aligned(device, offset, 0, len)) { | ||
280 | struct device *dev = device->dev; | ||
281 | dma_addr_t *pq = &dma_src[disks-2]; | ||
282 | int i; | ||
283 | |||
284 | pr_debug("%s: (async) disks: %d len: %zu\n", | ||
285 | __func__, disks, len); | ||
286 | if (!P(blocks, disks)) | ||
287 | dma_flags |= DMA_PREP_PQ_DISABLE_P; | ||
288 | if (!Q(blocks, disks)) | ||
289 | dma_flags |= DMA_PREP_PQ_DISABLE_Q; | ||
290 | if (submit->flags & ASYNC_TX_FENCE) | ||
291 | dma_flags |= DMA_PREP_FENCE; | ||
292 | for (i = 0; i < disks; i++) | ||
293 | if (likely(blocks[i])) { | ||
294 | BUG_ON(is_raid6_zero_block(blocks[i])); | ||
295 | dma_src[i] = dma_map_page(dev, blocks[i], | ||
296 | offset, len, | ||
297 | DMA_TO_DEVICE); | ||
298 | } | ||
299 | |||
300 | for (;;) { | ||
301 | tx = device->device_prep_dma_pq_val(chan, pq, dma_src, | ||
302 | disks - 2, | ||
303 | raid6_gfexp, | ||
304 | len, pqres, | ||
305 | dma_flags); | ||
306 | if (likely(tx)) | ||
307 | break; | ||
308 | async_tx_quiesce(&submit->depend_tx); | ||
309 | dma_async_issue_pending(chan); | ||
310 | } | ||
311 | async_tx_submit(chan, tx, submit); | ||
312 | |||
313 | return tx; | ||
314 | } else { | ||
315 | struct page *p_src = P(blocks, disks); | ||
316 | struct page *q_src = Q(blocks, disks); | ||
317 | enum async_tx_flags flags_orig = submit->flags; | ||
318 | dma_async_tx_callback cb_fn_orig = submit->cb_fn; | ||
319 | void *scribble = submit->scribble; | ||
320 | void *cb_param_orig = submit->cb_param; | ||
321 | void *p, *q, *s; | ||
322 | |||
323 | pr_debug("%s: (sync) disks: %d len: %zu\n", | ||
324 | __func__, disks, len); | ||
325 | |||
326 | /* caller must provide a temporary result buffer and | ||
327 | * allow the input parameters to be preserved | ||
328 | */ | ||
329 | BUG_ON(!spare || !scribble); | ||
330 | |||
331 | /* wait for any prerequisite operations */ | ||
332 | async_tx_quiesce(&submit->depend_tx); | ||
333 | |||
334 | /* recompute p and/or q into the temporary buffer and then | ||
335 | * check to see the result matches the current value | ||
336 | */ | ||
337 | tx = NULL; | ||
338 | *pqres = 0; | ||
339 | if (p_src) { | ||
340 | init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, | ||
341 | NULL, NULL, scribble); | ||
342 | tx = async_xor(spare, blocks, offset, disks-2, len, submit); | ||
343 | async_tx_quiesce(&tx); | ||
344 | p = page_address(p_src) + offset; | ||
345 | s = page_address(spare) + offset; | ||
346 | *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; | ||
347 | } | ||
348 | |||
349 | if (q_src) { | ||
350 | P(blocks, disks) = NULL; | ||
351 | Q(blocks, disks) = spare; | ||
352 | init_async_submit(submit, 0, NULL, NULL, NULL, scribble); | ||
353 | tx = async_gen_syndrome(blocks, offset, disks, len, submit); | ||
354 | async_tx_quiesce(&tx); | ||
355 | q = page_address(q_src) + offset; | ||
356 | s = page_address(spare) + offset; | ||
357 | *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; | ||
358 | } | ||
359 | |||
360 | /* restore P, Q and submit */ | ||
361 | P(blocks, disks) = p_src; | ||
362 | Q(blocks, disks) = q_src; | ||
363 | |||
364 | submit->cb_fn = cb_fn_orig; | ||
365 | submit->cb_param = cb_param_orig; | ||
366 | submit->flags = flags_orig; | ||
367 | async_tx_sync_epilog(submit); | ||
368 | |||
369 | return NULL; | ||
370 | } | ||
371 | } | ||
372 | EXPORT_SYMBOL_GPL(async_syndrome_val); | ||
373 | |||
374 | static int __init async_pq_init(void) | ||
375 | { | ||
376 | scribble = alloc_page(GFP_KERNEL); | ||
377 | |||
378 | if (scribble) | ||
379 | return 0; | ||
380 | |||
381 | pr_err("%s: failed to allocate required spare page\n", __func__); | ||
382 | |||
383 | return -ENOMEM; | ||
384 | } | ||
385 | |||
386 | static void __exit async_pq_exit(void) | ||
387 | { | ||
388 | put_page(scribble); | ||
389 | } | ||
390 | |||
391 | module_init(async_pq_init); | ||
392 | module_exit(async_pq_exit); | ||
393 | |||
394 | MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation"); | ||
395 | MODULE_LICENSE("GPL"); | ||
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c new file mode 100644 index 000000000000..6d73dde4786d --- /dev/null +++ b/crypto/async_tx/async_raid6_recov.c | |||
@@ -0,0 +1,468 @@ | |||
1 | /* | ||
2 | * Asynchronous RAID-6 recovery calculations ASYNC_TX API. | ||
3 | * Copyright(c) 2009 Intel Corporation | ||
4 | * | ||
5 | * based on raid6recov.c: | ||
6 | * Copyright 2002 H. Peter Anvin | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms of the GNU General Public License as published by the Free | ||
10 | * Software Foundation; either version 2 of the License, or (at your option) | ||
11 | * any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
20 | * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/dma-mapping.h> | ||
26 | #include <linux/raid/pq.h> | ||
27 | #include <linux/async_tx.h> | ||
28 | |||
29 | static struct dma_async_tx_descriptor * | ||
30 | async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, | ||
31 | size_t len, struct async_submit_ctl *submit) | ||
32 | { | ||
33 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, | ||
34 | &dest, 1, srcs, 2, len); | ||
35 | struct dma_device *dma = chan ? chan->device : NULL; | ||
36 | const u8 *amul, *bmul; | ||
37 | u8 ax, bx; | ||
38 | u8 *a, *b, *c; | ||
39 | |||
40 | if (dma) { | ||
41 | dma_addr_t dma_dest[2]; | ||
42 | dma_addr_t dma_src[2]; | ||
43 | struct device *dev = dma->dev; | ||
44 | struct dma_async_tx_descriptor *tx; | ||
45 | enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; | ||
46 | |||
47 | if (submit->flags & ASYNC_TX_FENCE) | ||
48 | dma_flags |= DMA_PREP_FENCE; | ||
49 | dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); | ||
50 | dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); | ||
51 | dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); | ||
52 | tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, | ||
53 | len, dma_flags); | ||
54 | if (tx) { | ||
55 | async_tx_submit(chan, tx, submit); | ||
56 | return tx; | ||
57 | } | ||
58 | |||
59 | /* could not get a descriptor, unmap and fall through to | ||
60 | * the synchronous path | ||
61 | */ | ||
62 | dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); | ||
63 | dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); | ||
64 | dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); | ||
65 | } | ||
66 | |||
67 | /* run the operation synchronously */ | ||
68 | async_tx_quiesce(&submit->depend_tx); | ||
69 | amul = raid6_gfmul[coef[0]]; | ||
70 | bmul = raid6_gfmul[coef[1]]; | ||
71 | a = page_address(srcs[0]); | ||
72 | b = page_address(srcs[1]); | ||
73 | c = page_address(dest); | ||
74 | |||
75 | while (len--) { | ||
76 | ax = amul[*a++]; | ||
77 | bx = bmul[*b++]; | ||
78 | *c++ = ax ^ bx; | ||
79 | } | ||
80 | |||
81 | return NULL; | ||
82 | } | ||
83 | |||
84 | static struct dma_async_tx_descriptor * | ||
85 | async_mult(struct page *dest, struct page *src, u8 coef, size_t len, | ||
86 | struct async_submit_ctl *submit) | ||
87 | { | ||
88 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, | ||
89 | &dest, 1, &src, 1, len); | ||
90 | struct dma_device *dma = chan ? chan->device : NULL; | ||
91 | const u8 *qmul; /* Q multiplier table */ | ||
92 | u8 *d, *s; | ||
93 | |||
94 | if (dma) { | ||
95 | dma_addr_t dma_dest[2]; | ||
96 | dma_addr_t dma_src[1]; | ||
97 | struct device *dev = dma->dev; | ||
98 | struct dma_async_tx_descriptor *tx; | ||
99 | enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; | ||
100 | |||
101 | if (submit->flags & ASYNC_TX_FENCE) | ||
102 | dma_flags |= DMA_PREP_FENCE; | ||
103 | dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); | ||
104 | dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); | ||
105 | tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, | ||
106 | len, dma_flags); | ||
107 | if (tx) { | ||
108 | async_tx_submit(chan, tx, submit); | ||
109 | return tx; | ||
110 | } | ||
111 | |||
112 | /* could not get a descriptor, unmap and fall through to | ||
113 | * the synchronous path | ||
114 | */ | ||
115 | dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); | ||
116 | dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); | ||
117 | } | ||
118 | |||
119 | /* no channel available, or failed to allocate a descriptor, so | ||
120 | * perform the operation synchronously | ||
121 | */ | ||
122 | async_tx_quiesce(&submit->depend_tx); | ||
123 | qmul = raid6_gfmul[coef]; | ||
124 | d = page_address(dest); | ||
125 | s = page_address(src); | ||
126 | |||
127 | while (len--) | ||
128 | *d++ = qmul[*s++]; | ||
129 | |||
130 | return NULL; | ||
131 | } | ||
132 | |||
133 | static struct dma_async_tx_descriptor * | ||
134 | __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, | ||
135 | struct async_submit_ctl *submit) | ||
136 | { | ||
137 | struct dma_async_tx_descriptor *tx = NULL; | ||
138 | struct page *p, *q, *a, *b; | ||
139 | struct page *srcs[2]; | ||
140 | unsigned char coef[2]; | ||
141 | enum async_tx_flags flags = submit->flags; | ||
142 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
143 | void *cb_param = submit->cb_param; | ||
144 | void *scribble = submit->scribble; | ||
145 | |||
146 | p = blocks[4-2]; | ||
147 | q = blocks[4-1]; | ||
148 | |||
149 | a = blocks[faila]; | ||
150 | b = blocks[failb]; | ||
151 | |||
152 | /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ | ||
153 | /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ | ||
154 | srcs[0] = p; | ||
155 | srcs[1] = q; | ||
156 | coef[0] = raid6_gfexi[failb-faila]; | ||
157 | coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; | ||
158 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); | ||
159 | tx = async_sum_product(b, srcs, coef, bytes, submit); | ||
160 | |||
161 | /* Dy = P+Pxy+Dx */ | ||
162 | srcs[0] = p; | ||
163 | srcs[1] = b; | ||
164 | init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, | ||
165 | cb_param, scribble); | ||
166 | tx = async_xor(a, srcs, 0, 2, bytes, submit); | ||
167 | |||
168 | return tx; | ||
169 | |||
170 | } | ||
171 | |||
172 | static struct dma_async_tx_descriptor * | ||
173 | __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, | ||
174 | struct async_submit_ctl *submit) | ||
175 | { | ||
176 | struct dma_async_tx_descriptor *tx = NULL; | ||
177 | struct page *p, *q, *g, *dp, *dq; | ||
178 | struct page *srcs[2]; | ||
179 | unsigned char coef[2]; | ||
180 | enum async_tx_flags flags = submit->flags; | ||
181 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
182 | void *cb_param = submit->cb_param; | ||
183 | void *scribble = submit->scribble; | ||
184 | int uninitialized_var(good); | ||
185 | int i; | ||
186 | |||
187 | for (i = 0; i < 3; i++) { | ||
188 | if (i == faila || i == failb) | ||
189 | continue; | ||
190 | else { | ||
191 | good = i; | ||
192 | break; | ||
193 | } | ||
194 | } | ||
195 | BUG_ON(i >= 3); | ||
196 | |||
197 | p = blocks[5-2]; | ||
198 | q = blocks[5-1]; | ||
199 | g = blocks[good]; | ||
200 | |||
201 | /* Compute syndrome with zero for the missing data pages | ||
202 | * Use the dead data pages as temporary storage for delta p and | ||
203 | * delta q | ||
204 | */ | ||
205 | dp = blocks[faila]; | ||
206 | dq = blocks[failb]; | ||
207 | |||
208 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); | ||
209 | tx = async_memcpy(dp, g, 0, 0, bytes, submit); | ||
210 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); | ||
211 | tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); | ||
212 | |||
213 | /* compute P + Pxy */ | ||
214 | srcs[0] = dp; | ||
215 | srcs[1] = p; | ||
216 | init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, | ||
217 | NULL, NULL, scribble); | ||
218 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
219 | |||
220 | /* compute Q + Qxy */ | ||
221 | srcs[0] = dq; | ||
222 | srcs[1] = q; | ||
223 | init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, | ||
224 | NULL, NULL, scribble); | ||
225 | tx = async_xor(dq, srcs, 0, 2, bytes, submit); | ||
226 | |||
227 | /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ | ||
228 | srcs[0] = dp; | ||
229 | srcs[1] = dq; | ||
230 | coef[0] = raid6_gfexi[failb-faila]; | ||
231 | coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; | ||
232 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); | ||
233 | tx = async_sum_product(dq, srcs, coef, bytes, submit); | ||
234 | |||
235 | /* Dy = P+Pxy+Dx */ | ||
236 | srcs[0] = dp; | ||
237 | srcs[1] = dq; | ||
238 | init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, | ||
239 | cb_param, scribble); | ||
240 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
241 | |||
242 | return tx; | ||
243 | } | ||
244 | |||
245 | static struct dma_async_tx_descriptor * | ||
246 | __2data_recov_n(int disks, size_t bytes, int faila, int failb, | ||
247 | struct page **blocks, struct async_submit_ctl *submit) | ||
248 | { | ||
249 | struct dma_async_tx_descriptor *tx = NULL; | ||
250 | struct page *p, *q, *dp, *dq; | ||
251 | struct page *srcs[2]; | ||
252 | unsigned char coef[2]; | ||
253 | enum async_tx_flags flags = submit->flags; | ||
254 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
255 | void *cb_param = submit->cb_param; | ||
256 | void *scribble = submit->scribble; | ||
257 | |||
258 | p = blocks[disks-2]; | ||
259 | q = blocks[disks-1]; | ||
260 | |||
261 | /* Compute syndrome with zero for the missing data pages | ||
262 | * Use the dead data pages as temporary storage for | ||
263 | * delta p and delta q | ||
264 | */ | ||
265 | dp = blocks[faila]; | ||
266 | blocks[faila] = (void *)raid6_empty_zero_page; | ||
267 | blocks[disks-2] = dp; | ||
268 | dq = blocks[failb]; | ||
269 | blocks[failb] = (void *)raid6_empty_zero_page; | ||
270 | blocks[disks-1] = dq; | ||
271 | |||
272 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); | ||
273 | tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); | ||
274 | |||
275 | /* Restore pointer table */ | ||
276 | blocks[faila] = dp; | ||
277 | blocks[failb] = dq; | ||
278 | blocks[disks-2] = p; | ||
279 | blocks[disks-1] = q; | ||
280 | |||
281 | /* compute P + Pxy */ | ||
282 | srcs[0] = dp; | ||
283 | srcs[1] = p; | ||
284 | init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, | ||
285 | NULL, NULL, scribble); | ||
286 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
287 | |||
288 | /* compute Q + Qxy */ | ||
289 | srcs[0] = dq; | ||
290 | srcs[1] = q; | ||
291 | init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, | ||
292 | NULL, NULL, scribble); | ||
293 | tx = async_xor(dq, srcs, 0, 2, bytes, submit); | ||
294 | |||
295 | /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ | ||
296 | srcs[0] = dp; | ||
297 | srcs[1] = dq; | ||
298 | coef[0] = raid6_gfexi[failb-faila]; | ||
299 | coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; | ||
300 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); | ||
301 | tx = async_sum_product(dq, srcs, coef, bytes, submit); | ||
302 | |||
303 | /* Dy = P+Pxy+Dx */ | ||
304 | srcs[0] = dp; | ||
305 | srcs[1] = dq; | ||
306 | init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, | ||
307 | cb_param, scribble); | ||
308 | tx = async_xor(dp, srcs, 0, 2, bytes, submit); | ||
309 | |||
310 | return tx; | ||
311 | } | ||
312 | |||
313 | /** | ||
314 | * async_raid6_2data_recov - asynchronously calculate two missing data blocks | ||
315 | * @disks: number of disks in the RAID-6 array | ||
316 | * @bytes: block size | ||
317 | * @faila: first failed drive index | ||
318 | * @failb: second failed drive index | ||
319 | * @blocks: array of source pointers where the last two entries are p and q | ||
320 | * @submit: submission/completion modifiers | ||
321 | */ | ||
322 | struct dma_async_tx_descriptor * | ||
323 | async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | ||
324 | struct page **blocks, struct async_submit_ctl *submit) | ||
325 | { | ||
326 | BUG_ON(faila == failb); | ||
327 | if (failb < faila) | ||
328 | swap(faila, failb); | ||
329 | |||
330 | pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); | ||
331 | |||
332 | /* we need to preserve the contents of 'blocks' for the async | ||
333 | * case, so punt to synchronous if a scribble buffer is not available | ||
334 | */ | ||
335 | if (!submit->scribble) { | ||
336 | void **ptrs = (void **) blocks; | ||
337 | int i; | ||
338 | |||
339 | async_tx_quiesce(&submit->depend_tx); | ||
340 | for (i = 0; i < disks; i++) | ||
341 | ptrs[i] = page_address(blocks[i]); | ||
342 | |||
343 | raid6_2data_recov(disks, bytes, faila, failb, ptrs); | ||
344 | |||
345 | async_tx_sync_epilog(submit); | ||
346 | |||
347 | return NULL; | ||
348 | } | ||
349 | |||
350 | switch (disks) { | ||
351 | case 4: | ||
352 | /* dma devices do not uniformly understand a zero source pq | ||
353 | * operation (in contrast to the synchronous case), so | ||
354 | * explicitly handle the 4 disk special case | ||
355 | */ | ||
356 | return __2data_recov_4(bytes, faila, failb, blocks, submit); | ||
357 | case 5: | ||
358 | /* dma devices do not uniformly understand a single | ||
359 | * source pq operation (in contrast to the synchronous | ||
360 | * case), so explicitly handle the 5 disk special case | ||
361 | */ | ||
362 | return __2data_recov_5(bytes, faila, failb, blocks, submit); | ||
363 | default: | ||
364 | return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); | ||
365 | } | ||
366 | } | ||
367 | EXPORT_SYMBOL_GPL(async_raid6_2data_recov); | ||
368 | |||
369 | /** | ||
370 | * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block | ||
371 | * @disks: number of disks in the RAID-6 array | ||
372 | * @bytes: block size | ||
373 | * @faila: failed drive index | ||
374 | * @blocks: array of source pointers where the last two entries are p and q | ||
375 | * @submit: submission/completion modifiers | ||
376 | */ | ||
377 | struct dma_async_tx_descriptor * | ||
378 | async_raid6_datap_recov(int disks, size_t bytes, int faila, | ||
379 | struct page **blocks, struct async_submit_ctl *submit) | ||
380 | { | ||
381 | struct dma_async_tx_descriptor *tx = NULL; | ||
382 | struct page *p, *q, *dq; | ||
383 | u8 coef; | ||
384 | enum async_tx_flags flags = submit->flags; | ||
385 | dma_async_tx_callback cb_fn = submit->cb_fn; | ||
386 | void *cb_param = submit->cb_param; | ||
387 | void *scribble = submit->scribble; | ||
388 | struct page *srcs[2]; | ||
389 | |||
390 | pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); | ||
391 | |||
392 | /* we need to preserve the contents of 'blocks' for the async | ||
393 | * case, so punt to synchronous if a scribble buffer is not available | ||
394 | */ | ||
395 | if (!scribble) { | ||
396 | void **ptrs = (void **) blocks; | ||
397 | int i; | ||
398 | |||
399 | async_tx_quiesce(&submit->depend_tx); | ||
400 | for (i = 0; i < disks; i++) | ||
401 | ptrs[i] = page_address(blocks[i]); | ||
402 | |||
403 | raid6_datap_recov(disks, bytes, faila, ptrs); | ||
404 | |||
405 | async_tx_sync_epilog(submit); | ||
406 | |||
407 | return NULL; | ||
408 | } | ||
409 | |||
410 | p = blocks[disks-2]; | ||
411 | q = blocks[disks-1]; | ||
412 | |||
413 | /* Compute syndrome with zero for the missing data page | ||
414 | * Use the dead data page as temporary storage for delta q | ||
415 | */ | ||
416 | dq = blocks[faila]; | ||
417 | blocks[faila] = (void *)raid6_empty_zero_page; | ||
418 | blocks[disks-1] = dq; | ||
419 | |||
420 | /* in the 4 disk case we only need to perform a single source | ||
421 | * multiplication | ||
422 | */ | ||
423 | if (disks == 4) { | ||
424 | int good = faila == 0 ? 1 : 0; | ||
425 | struct page *g = blocks[good]; | ||
426 | |||
427 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, | ||
428 | scribble); | ||
429 | tx = async_memcpy(p, g, 0, 0, bytes, submit); | ||
430 | |||
431 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, | ||
432 | scribble); | ||
433 | tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); | ||
434 | } else { | ||
435 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, | ||
436 | scribble); | ||
437 | tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); | ||
438 | } | ||
439 | |||
440 | /* Restore pointer table */ | ||
441 | blocks[faila] = dq; | ||
442 | blocks[disks-1] = q; | ||
443 | |||
444 | /* calculate g^{-faila} */ | ||
445 | coef = raid6_gfinv[raid6_gfexp[faila]]; | ||
446 | |||
447 | srcs[0] = dq; | ||
448 | srcs[1] = q; | ||
449 | init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, | ||
450 | NULL, NULL, scribble); | ||
451 | tx = async_xor(dq, srcs, 0, 2, bytes, submit); | ||
452 | |||
453 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); | ||
454 | tx = async_mult(dq, dq, coef, bytes, submit); | ||
455 | |||
456 | srcs[0] = p; | ||
457 | srcs[1] = dq; | ||
458 | init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, | ||
459 | cb_param, scribble); | ||
460 | tx = async_xor(p, srcs, 0, 2, bytes, submit); | ||
461 | |||
462 | return tx; | ||
463 | } | ||
464 | EXPORT_SYMBOL_GPL(async_raid6_datap_recov); | ||
465 | |||
466 | MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); | ||
467 | MODULE_DESCRIPTION("asynchronous RAID-6 recovery api"); | ||
468 | MODULE_LICENSE("GPL"); | ||
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 06eb6cc09fef..f9cdf04fe7c0 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c | |||
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void) | |||
42 | async_dmaengine_put(); | 42 | async_dmaengine_put(); |
43 | } | 43 | } |
44 | 44 | ||
45 | module_init(async_tx_init); | ||
46 | module_exit(async_tx_exit); | ||
47 | |||
45 | /** | 48 | /** |
46 | * __async_tx_find_channel - find a channel to carry out the operation or let | 49 | * __async_tx_find_channel - find a channel to carry out the operation or let |
47 | * the transaction execute synchronously | 50 | * the transaction execute synchronously |
48 | * @depend_tx: transaction dependency | 51 | * @submit: transaction dependency and submission modifiers |
49 | * @tx_type: transaction type | 52 | * @tx_type: transaction type |
50 | */ | 53 | */ |
51 | struct dma_chan * | 54 | struct dma_chan * |
52 | __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | 55 | __async_tx_find_channel(struct async_submit_ctl *submit, |
53 | enum dma_transaction_type tx_type) | 56 | enum dma_transaction_type tx_type) |
54 | { | 57 | { |
58 | struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; | ||
59 | |||
55 | /* see if we can keep the chain on one channel */ | 60 | /* see if we can keep the chain on one channel */ |
56 | if (depend_tx && | 61 | if (depend_tx && |
57 | dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) | 62 | dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) |
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | |||
59 | return async_dma_find_channel(tx_type); | 64 | return async_dma_find_channel(tx_type); |
60 | } | 65 | } |
61 | EXPORT_SYMBOL_GPL(__async_tx_find_channel); | 66 | EXPORT_SYMBOL_GPL(__async_tx_find_channel); |
62 | #else | ||
63 | static int __init async_tx_init(void) | ||
64 | { | ||
65 | printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | static void __exit async_tx_exit(void) | ||
70 | { | ||
71 | do { } while (0); | ||
72 | } | ||
73 | #endif | 67 | #endif |
74 | 68 | ||
75 | 69 | ||
@@ -83,10 +77,14 @@ static void | |||
83 | async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | 77 | async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, |
84 | struct dma_async_tx_descriptor *tx) | 78 | struct dma_async_tx_descriptor *tx) |
85 | { | 79 | { |
86 | struct dma_chan *chan; | 80 | struct dma_chan *chan = depend_tx->chan; |
87 | struct dma_device *device; | 81 | struct dma_device *device = chan->device; |
88 | struct dma_async_tx_descriptor *intr_tx = (void *) ~0; | 82 | struct dma_async_tx_descriptor *intr_tx = (void *) ~0; |
89 | 83 | ||
84 | #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH | ||
85 | BUG(); | ||
86 | #endif | ||
87 | |||
90 | /* first check to see if we can still append to depend_tx */ | 88 | /* first check to see if we can still append to depend_tx */ |
91 | spin_lock_bh(&depend_tx->lock); | 89 | spin_lock_bh(&depend_tx->lock); |
92 | if (depend_tx->parent && depend_tx->chan == tx->chan) { | 90 | if (depend_tx->parent && depend_tx->chan == tx->chan) { |
@@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | |||
96 | } | 94 | } |
97 | spin_unlock_bh(&depend_tx->lock); | 95 | spin_unlock_bh(&depend_tx->lock); |
98 | 96 | ||
99 | if (!intr_tx) | 97 | /* attached dependency, flush the parent channel */ |
98 | if (!intr_tx) { | ||
99 | device->device_issue_pending(chan); | ||
100 | return; | 100 | return; |
101 | 101 | } | |
102 | chan = depend_tx->chan; | ||
103 | device = chan->device; | ||
104 | 102 | ||
105 | /* see if we can schedule an interrupt | 103 | /* see if we can schedule an interrupt |
106 | * otherwise poll for completion | 104 | * otherwise poll for completion |
@@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | |||
134 | intr_tx->tx_submit(intr_tx); | 132 | intr_tx->tx_submit(intr_tx); |
135 | async_tx_ack(intr_tx); | 133 | async_tx_ack(intr_tx); |
136 | } | 134 | } |
135 | device->device_issue_pending(chan); | ||
137 | } else { | 136 | } else { |
138 | if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) | 137 | if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) |
139 | panic("%s: DMA_ERROR waiting for depend_tx\n", | 138 | panic("%s: DMA_ERROR waiting for depend_tx\n", |
@@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx, | |||
144 | 143 | ||
145 | 144 | ||
146 | /** | 145 | /** |
147 | * submit_disposition - while holding depend_tx->lock we must avoid submitting | 146 | * submit_disposition - flags for routing an incoming operation |
148 | * new operations to prevent a circular locking dependency with | ||
149 | * drivers that already hold a channel lock when calling | ||
150 | * async_tx_run_dependencies. | ||
151 | * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock | 147 | * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock |
152 | * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch | 148 | * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch |
153 | * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly | 149 | * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly |
150 | * | ||
151 | * while holding depend_tx->lock we must avoid submitting new operations | ||
152 | * to prevent a circular locking dependency with drivers that already | ||
153 | * hold a channel lock when calling async_tx_run_dependencies. | ||
154 | */ | 154 | */ |
155 | enum submit_disposition { | 155 | enum submit_disposition { |
156 | ASYNC_TX_SUBMITTED, | 156 | ASYNC_TX_SUBMITTED, |
@@ -160,11 +160,12 @@ enum submit_disposition { | |||
160 | 160 | ||
161 | void | 161 | void |
162 | async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | 162 | async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, |
163 | enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, | 163 | struct async_submit_ctl *submit) |
164 | dma_async_tx_callback cb_fn, void *cb_param) | ||
165 | { | 164 | { |
166 | tx->callback = cb_fn; | 165 | struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; |
167 | tx->callback_param = cb_param; | 166 | |
167 | tx->callback = submit->cb_fn; | ||
168 | tx->callback_param = submit->cb_param; | ||
168 | 169 | ||
169 | if (depend_tx) { | 170 | if (depend_tx) { |
170 | enum submit_disposition s; | 171 | enum submit_disposition s; |
@@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | |||
220 | tx->tx_submit(tx); | 221 | tx->tx_submit(tx); |
221 | } | 222 | } |
222 | 223 | ||
223 | if (flags & ASYNC_TX_ACK) | 224 | if (submit->flags & ASYNC_TX_ACK) |
224 | async_tx_ack(tx); | 225 | async_tx_ack(tx); |
225 | 226 | ||
226 | if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) | 227 | if (depend_tx) |
227 | async_tx_ack(depend_tx); | 228 | async_tx_ack(depend_tx); |
228 | } | 229 | } |
229 | EXPORT_SYMBOL_GPL(async_tx_submit); | 230 | EXPORT_SYMBOL_GPL(async_tx_submit); |
230 | 231 | ||
231 | /** | 232 | /** |
232 | * async_trigger_callback - schedules the callback function to be run after | 233 | * async_trigger_callback - schedules the callback function to be run |
233 | * any dependent operations have been completed. | 234 | * @submit: submission and completion parameters |
234 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 235 | * |
235 | * @depend_tx: 'callback' requires the completion of this transaction | 236 | * honored flags: ASYNC_TX_ACK |
236 | * @cb_fn: function to call after depend_tx completes | 237 | * |
237 | * @cb_param: parameter to pass to the callback routine | 238 | * The callback is run after any dependent operations have completed. |
238 | */ | 239 | */ |
239 | struct dma_async_tx_descriptor * | 240 | struct dma_async_tx_descriptor * |
240 | async_trigger_callback(enum async_tx_flags flags, | 241 | async_trigger_callback(struct async_submit_ctl *submit) |
241 | struct dma_async_tx_descriptor *depend_tx, | ||
242 | dma_async_tx_callback cb_fn, void *cb_param) | ||
243 | { | 242 | { |
244 | struct dma_chan *chan; | 243 | struct dma_chan *chan; |
245 | struct dma_device *device; | 244 | struct dma_device *device; |
246 | struct dma_async_tx_descriptor *tx; | 245 | struct dma_async_tx_descriptor *tx; |
246 | struct dma_async_tx_descriptor *depend_tx = submit->depend_tx; | ||
247 | 247 | ||
248 | if (depend_tx) { | 248 | if (depend_tx) { |
249 | chan = depend_tx->chan; | 249 | chan = depend_tx->chan; |
@@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags, | |||
262 | if (tx) { | 262 | if (tx) { |
263 | pr_debug("%s: (async)\n", __func__); | 263 | pr_debug("%s: (async)\n", __func__); |
264 | 264 | ||
265 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 265 | async_tx_submit(chan, tx, submit); |
266 | } else { | 266 | } else { |
267 | pr_debug("%s: (sync)\n", __func__); | 267 | pr_debug("%s: (sync)\n", __func__); |
268 | 268 | ||
269 | /* wait for any prerequisite operations */ | 269 | /* wait for any prerequisite operations */ |
270 | async_tx_quiesce(&depend_tx); | 270 | async_tx_quiesce(&submit->depend_tx); |
271 | 271 | ||
272 | async_tx_sync_epilog(cb_fn, cb_param); | 272 | async_tx_sync_epilog(submit); |
273 | } | 273 | } |
274 | 274 | ||
275 | return tx; | 275 | return tx; |
@@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx) | |||
295 | } | 295 | } |
296 | EXPORT_SYMBOL_GPL(async_tx_quiesce); | 296 | EXPORT_SYMBOL_GPL(async_tx_quiesce); |
297 | 297 | ||
298 | module_init(async_tx_init); | ||
299 | module_exit(async_tx_exit); | ||
300 | |||
301 | MODULE_AUTHOR("Intel Corporation"); | 298 | MODULE_AUTHOR("Intel Corporation"); |
302 | MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); | 299 | MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); |
303 | MODULE_LICENSE("GPL"); | 300 | MODULE_LICENSE("GPL"); |
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 90dd3f8bd283..b459a9034aac 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c | |||
@@ -33,19 +33,16 @@ | |||
33 | /* do_async_xor - dma map the pages and perform the xor with an engine */ | 33 | /* do_async_xor - dma map the pages and perform the xor with an engine */ |
34 | static __async_inline struct dma_async_tx_descriptor * | 34 | static __async_inline struct dma_async_tx_descriptor * |
35 | do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | 35 | do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, |
36 | unsigned int offset, int src_cnt, size_t len, | 36 | unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, |
37 | enum async_tx_flags flags, | 37 | struct async_submit_ctl *submit) |
38 | struct dma_async_tx_descriptor *depend_tx, | ||
39 | dma_async_tx_callback cb_fn, void *cb_param) | ||
40 | { | 38 | { |
41 | struct dma_device *dma = chan->device; | 39 | struct dma_device *dma = chan->device; |
42 | dma_addr_t *dma_src = (dma_addr_t *) src_list; | ||
43 | struct dma_async_tx_descriptor *tx = NULL; | 40 | struct dma_async_tx_descriptor *tx = NULL; |
44 | int src_off = 0; | 41 | int src_off = 0; |
45 | int i; | 42 | int i; |
46 | dma_async_tx_callback _cb_fn; | 43 | dma_async_tx_callback cb_fn_orig = submit->cb_fn; |
47 | void *_cb_param; | 44 | void *cb_param_orig = submit->cb_param; |
48 | enum async_tx_flags async_flags; | 45 | enum async_tx_flags flags_orig = submit->flags; |
49 | enum dma_ctrl_flags dma_flags; | 46 | enum dma_ctrl_flags dma_flags; |
50 | int xor_src_cnt; | 47 | int xor_src_cnt; |
51 | dma_addr_t dma_dest; | 48 | dma_addr_t dma_dest; |
@@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
63 | } | 60 | } |
64 | 61 | ||
65 | while (src_cnt) { | 62 | while (src_cnt) { |
66 | async_flags = flags; | 63 | submit->flags = flags_orig; |
67 | dma_flags = 0; | 64 | dma_flags = 0; |
68 | xor_src_cnt = min(src_cnt, dma->max_xor); | 65 | xor_src_cnt = min(src_cnt, (int)dma->max_xor); |
69 | /* if we are submitting additional xors, leave the chain open, | 66 | /* if we are submitting additional xors, leave the chain open, |
70 | * clear the callback parameters, and leave the destination | 67 | * clear the callback parameters, and leave the destination |
71 | * buffer mapped | 68 | * buffer mapped |
72 | */ | 69 | */ |
73 | if (src_cnt > xor_src_cnt) { | 70 | if (src_cnt > xor_src_cnt) { |
74 | async_flags &= ~ASYNC_TX_ACK; | 71 | submit->flags &= ~ASYNC_TX_ACK; |
72 | submit->flags |= ASYNC_TX_FENCE; | ||
75 | dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; | 73 | dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; |
76 | _cb_fn = NULL; | 74 | submit->cb_fn = NULL; |
77 | _cb_param = NULL; | 75 | submit->cb_param = NULL; |
78 | } else { | 76 | } else { |
79 | _cb_fn = cb_fn; | 77 | submit->cb_fn = cb_fn_orig; |
80 | _cb_param = cb_param; | 78 | submit->cb_param = cb_param_orig; |
81 | } | 79 | } |
82 | if (_cb_fn) | 80 | if (submit->cb_fn) |
83 | dma_flags |= DMA_PREP_INTERRUPT; | 81 | dma_flags |= DMA_PREP_INTERRUPT; |
84 | 82 | if (submit->flags & ASYNC_TX_FENCE) | |
83 | dma_flags |= DMA_PREP_FENCE; | ||
85 | /* Since we have clobbered the src_list we are committed | 84 | /* Since we have clobbered the src_list we are committed |
86 | * to doing this asynchronously. Drivers force forward progress | 85 | * to doing this asynchronously. Drivers force forward progress |
87 | * in case they can not provide a descriptor | 86 | * in case they can not provide a descriptor |
@@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
90 | xor_src_cnt, len, dma_flags); | 89 | xor_src_cnt, len, dma_flags); |
91 | 90 | ||
92 | if (unlikely(!tx)) | 91 | if (unlikely(!tx)) |
93 | async_tx_quiesce(&depend_tx); | 92 | async_tx_quiesce(&submit->depend_tx); |
94 | 93 | ||
95 | /* spin wait for the preceeding transactions to complete */ | 94 | /* spin wait for the preceeding transactions to complete */ |
96 | while (unlikely(!tx)) { | 95 | while (unlikely(!tx)) { |
@@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
101 | dma_flags); | 100 | dma_flags); |
102 | } | 101 | } |
103 | 102 | ||
104 | async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, | 103 | async_tx_submit(chan, tx, submit); |
105 | _cb_param); | 104 | submit->depend_tx = tx; |
106 | |||
107 | depend_tx = tx; | ||
108 | flags |= ASYNC_TX_DEP_ACK; | ||
109 | 105 | ||
110 | if (src_cnt > xor_src_cnt) { | 106 | if (src_cnt > xor_src_cnt) { |
111 | /* drop completed sources */ | 107 | /* drop completed sources */ |
@@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, | |||
124 | 120 | ||
125 | static void | 121 | static void |
126 | do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, | 122 | do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, |
127 | int src_cnt, size_t len, enum async_tx_flags flags, | 123 | int src_cnt, size_t len, struct async_submit_ctl *submit) |
128 | dma_async_tx_callback cb_fn, void *cb_param) | ||
129 | { | 124 | { |
130 | int i; | 125 | int i; |
131 | int xor_src_cnt; | 126 | int xor_src_cnt; |
132 | int src_off = 0; | 127 | int src_off = 0; |
133 | void *dest_buf; | 128 | void *dest_buf; |
134 | void **srcs = (void **) src_list; | 129 | void **srcs; |
130 | |||
131 | if (submit->scribble) | ||
132 | srcs = submit->scribble; | ||
133 | else | ||
134 | srcs = (void **) src_list; | ||
135 | 135 | ||
136 | /* reuse the 'src_list' array to convert to buffer pointers */ | 136 | /* convert to buffer pointers */ |
137 | for (i = 0; i < src_cnt; i++) | 137 | for (i = 0; i < src_cnt; i++) |
138 | srcs[i] = page_address(src_list[i]) + offset; | 138 | srcs[i] = page_address(src_list[i]) + offset; |
139 | 139 | ||
140 | /* set destination address */ | 140 | /* set destination address */ |
141 | dest_buf = page_address(dest) + offset; | 141 | dest_buf = page_address(dest) + offset; |
142 | 142 | ||
143 | if (flags & ASYNC_TX_XOR_ZERO_DST) | 143 | if (submit->flags & ASYNC_TX_XOR_ZERO_DST) |
144 | memset(dest_buf, 0, len); | 144 | memset(dest_buf, 0, len); |
145 | 145 | ||
146 | while (src_cnt > 0) { | 146 | while (src_cnt > 0) { |
@@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, | |||
153 | src_off += xor_src_cnt; | 153 | src_off += xor_src_cnt; |
154 | } | 154 | } |
155 | 155 | ||
156 | async_tx_sync_epilog(cb_fn, cb_param); | 156 | async_tx_sync_epilog(submit); |
157 | } | 157 | } |
158 | 158 | ||
159 | /** | 159 | /** |
160 | * async_xor - attempt to xor a set of blocks with a dma engine. | 160 | * async_xor - attempt to xor a set of blocks with a dma engine. |
161 | * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST | ||
162 | * flag must be set to not include dest data in the calculation. The | ||
163 | * assumption with dma eninges is that they only use the destination | ||
164 | * buffer as a source when it is explicity specified in the source list. | ||
165 | * @dest: destination page | 161 | * @dest: destination page |
166 | * @src_list: array of source pages (if the dest is also a source it must be | 162 | * @src_list: array of source pages |
167 | * at index zero). The contents of this array may be overwritten. | 163 | * @offset: common src/dst offset to start transaction |
168 | * @offset: offset in pages to start transaction | ||
169 | * @src_cnt: number of source pages | 164 | * @src_cnt: number of source pages |
170 | * @len: length in bytes | 165 | * @len: length in bytes |
171 | * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, | 166 | * @submit: submission / completion modifiers |
172 | * ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 167 | * |
173 | * @depend_tx: xor depends on the result of this transaction. | 168 | * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST |
174 | * @cb_fn: function to call when the xor completes | 169 | * |
175 | * @cb_param: parameter to pass to the callback routine | 170 | * xor_blocks always uses the dest as a source so the |
171 | * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in | ||
172 | * the calculation. The assumption with dma eninges is that they only | ||
173 | * use the destination buffer as a source when it is explicity specified | ||
174 | * in the source list. | ||
175 | * | ||
176 | * src_list note: if the dest is also a source it must be at index zero. | ||
177 | * The contents of this array will be overwritten if a scribble region | ||
178 | * is not specified. | ||
176 | */ | 179 | */ |
177 | struct dma_async_tx_descriptor * | 180 | struct dma_async_tx_descriptor * |
178 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, | 181 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, |
179 | int src_cnt, size_t len, enum async_tx_flags flags, | 182 | int src_cnt, size_t len, struct async_submit_ctl *submit) |
180 | struct dma_async_tx_descriptor *depend_tx, | ||
181 | dma_async_tx_callback cb_fn, void *cb_param) | ||
182 | { | 183 | { |
183 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, | 184 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, |
184 | &dest, 1, src_list, | 185 | &dest, 1, src_list, |
185 | src_cnt, len); | 186 | src_cnt, len); |
187 | dma_addr_t *dma_src = NULL; | ||
188 | |||
186 | BUG_ON(src_cnt <= 1); | 189 | BUG_ON(src_cnt <= 1); |
187 | 190 | ||
188 | if (chan) { | 191 | if (submit->scribble) |
192 | dma_src = submit->scribble; | ||
193 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) | ||
194 | dma_src = (dma_addr_t *) src_list; | ||
195 | |||
196 | if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { | ||
189 | /* run the xor asynchronously */ | 197 | /* run the xor asynchronously */ |
190 | pr_debug("%s (async): len: %zu\n", __func__, len); | 198 | pr_debug("%s (async): len: %zu\n", __func__, len); |
191 | 199 | ||
192 | return do_async_xor(chan, dest, src_list, offset, src_cnt, len, | 200 | return do_async_xor(chan, dest, src_list, offset, src_cnt, len, |
193 | flags, depend_tx, cb_fn, cb_param); | 201 | dma_src, submit); |
194 | } else { | 202 | } else { |
195 | /* run the xor synchronously */ | 203 | /* run the xor synchronously */ |
196 | pr_debug("%s (sync): len: %zu\n", __func__, len); | 204 | pr_debug("%s (sync): len: %zu\n", __func__, len); |
205 | WARN_ONCE(chan, "%s: no space for dma address conversion\n", | ||
206 | __func__); | ||
197 | 207 | ||
198 | /* in the sync case the dest is an implied source | 208 | /* in the sync case the dest is an implied source |
199 | * (assumes the dest is the first source) | 209 | * (assumes the dest is the first source) |
200 | */ | 210 | */ |
201 | if (flags & ASYNC_TX_XOR_DROP_DST) { | 211 | if (submit->flags & ASYNC_TX_XOR_DROP_DST) { |
202 | src_cnt--; | 212 | src_cnt--; |
203 | src_list++; | 213 | src_list++; |
204 | } | 214 | } |
205 | 215 | ||
206 | /* wait for any prerequisite operations */ | 216 | /* wait for any prerequisite operations */ |
207 | async_tx_quiesce(&depend_tx); | 217 | async_tx_quiesce(&submit->depend_tx); |
208 | 218 | ||
209 | do_sync_xor(dest, src_list, offset, src_cnt, len, | 219 | do_sync_xor(dest, src_list, offset, src_cnt, len, submit); |
210 | flags, cb_fn, cb_param); | ||
211 | 220 | ||
212 | return NULL; | 221 | return NULL; |
213 | } | 222 | } |
@@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len) | |||
222 | } | 231 | } |
223 | 232 | ||
224 | /** | 233 | /** |
225 | * async_xor_zero_sum - attempt a xor parity check with a dma engine. | 234 | * async_xor_val - attempt a xor parity check with a dma engine. |
226 | * @dest: destination page used if the xor is performed synchronously | 235 | * @dest: destination page used if the xor is performed synchronously |
227 | * @src_list: array of source pages. The dest page must be listed as a source | 236 | * @src_list: array of source pages |
228 | * at index zero. The contents of this array may be overwritten. | ||
229 | * @offset: offset in pages to start transaction | 237 | * @offset: offset in pages to start transaction |
230 | * @src_cnt: number of source pages | 238 | * @src_cnt: number of source pages |
231 | * @len: length in bytes | 239 | * @len: length in bytes |
232 | * @result: 0 if sum == 0 else non-zero | 240 | * @result: 0 if sum == 0 else non-zero |
233 | * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK | 241 | * @submit: submission / completion modifiers |
234 | * @depend_tx: xor depends on the result of this transaction. | 242 | * |
235 | * @cb_fn: function to call when the xor completes | 243 | * honored flags: ASYNC_TX_ACK |
236 | * @cb_param: parameter to pass to the callback routine | 244 | * |
245 | * src_list note: if the dest is also a source it must be at index zero. | ||
246 | * The contents of this array will be overwritten if a scribble region | ||
247 | * is not specified. | ||
237 | */ | 248 | */ |
238 | struct dma_async_tx_descriptor * | 249 | struct dma_async_tx_descriptor * |
239 | async_xor_zero_sum(struct page *dest, struct page **src_list, | 250 | async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, |
240 | unsigned int offset, int src_cnt, size_t len, | 251 | int src_cnt, size_t len, enum sum_check_flags *result, |
241 | u32 *result, enum async_tx_flags flags, | 252 | struct async_submit_ctl *submit) |
242 | struct dma_async_tx_descriptor *depend_tx, | ||
243 | dma_async_tx_callback cb_fn, void *cb_param) | ||
244 | { | 253 | { |
245 | struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM, | 254 | struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL, |
246 | &dest, 1, src_list, | 255 | &dest, 1, src_list, |
247 | src_cnt, len); | 256 | src_cnt, len); |
248 | struct dma_device *device = chan ? chan->device : NULL; | 257 | struct dma_device *device = chan ? chan->device : NULL; |
249 | struct dma_async_tx_descriptor *tx = NULL; | 258 | struct dma_async_tx_descriptor *tx = NULL; |
259 | dma_addr_t *dma_src = NULL; | ||
250 | 260 | ||
251 | BUG_ON(src_cnt <= 1); | 261 | BUG_ON(src_cnt <= 1); |
252 | 262 | ||
253 | if (device && src_cnt <= device->max_xor) { | 263 | if (submit->scribble) |
254 | dma_addr_t *dma_src = (dma_addr_t *) src_list; | 264 | dma_src = submit->scribble; |
255 | unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; | 265 | else if (sizeof(dma_addr_t) <= sizeof(struct page *)) |
266 | dma_src = (dma_addr_t *) src_list; | ||
267 | |||
268 | if (dma_src && device && src_cnt <= device->max_xor && | ||
269 | is_dma_xor_aligned(device, offset, 0, len)) { | ||
270 | unsigned long dma_prep_flags = 0; | ||
256 | int i; | 271 | int i; |
257 | 272 | ||
258 | pr_debug("%s: (async) len: %zu\n", __func__, len); | 273 | pr_debug("%s: (async) len: %zu\n", __func__, len); |
259 | 274 | ||
275 | if (submit->cb_fn) | ||
276 | dma_prep_flags |= DMA_PREP_INTERRUPT; | ||
277 | if (submit->flags & ASYNC_TX_FENCE) | ||
278 | dma_prep_flags |= DMA_PREP_FENCE; | ||
260 | for (i = 0; i < src_cnt; i++) | 279 | for (i = 0; i < src_cnt; i++) |
261 | dma_src[i] = dma_map_page(device->dev, src_list[i], | 280 | dma_src[i] = dma_map_page(device->dev, src_list[i], |
262 | offset, len, DMA_TO_DEVICE); | 281 | offset, len, DMA_TO_DEVICE); |
263 | 282 | ||
264 | tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, | 283 | tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, |
265 | len, result, | 284 | len, result, |
266 | dma_prep_flags); | 285 | dma_prep_flags); |
267 | if (unlikely(!tx)) { | 286 | if (unlikely(!tx)) { |
268 | async_tx_quiesce(&depend_tx); | 287 | async_tx_quiesce(&submit->depend_tx); |
269 | 288 | ||
270 | while (!tx) { | 289 | while (!tx) { |
271 | dma_async_issue_pending(chan); | 290 | dma_async_issue_pending(chan); |
272 | tx = device->device_prep_dma_zero_sum(chan, | 291 | tx = device->device_prep_dma_xor_val(chan, |
273 | dma_src, src_cnt, len, result, | 292 | dma_src, src_cnt, len, result, |
274 | dma_prep_flags); | 293 | dma_prep_flags); |
275 | } | 294 | } |
276 | } | 295 | } |
277 | 296 | ||
278 | async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); | 297 | async_tx_submit(chan, tx, submit); |
279 | } else { | 298 | } else { |
280 | unsigned long xor_flags = flags; | 299 | enum async_tx_flags flags_orig = submit->flags; |
281 | 300 | ||
282 | pr_debug("%s: (sync) len: %zu\n", __func__, len); | 301 | pr_debug("%s: (sync) len: %zu\n", __func__, len); |
302 | WARN_ONCE(device && src_cnt <= device->max_xor, | ||
303 | "%s: no space for dma address conversion\n", | ||
304 | __func__); | ||
283 | 305 | ||
284 | xor_flags |= ASYNC_TX_XOR_DROP_DST; | 306 | submit->flags |= ASYNC_TX_XOR_DROP_DST; |
285 | xor_flags &= ~ASYNC_TX_ACK; | 307 | submit->flags &= ~ASYNC_TX_ACK; |
286 | 308 | ||
287 | tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, | 309 | tx = async_xor(dest, src_list, offset, src_cnt, len, submit); |
288 | depend_tx, NULL, NULL); | ||
289 | 310 | ||
290 | async_tx_quiesce(&tx); | 311 | async_tx_quiesce(&tx); |
291 | 312 | ||
292 | *result = page_is_zero(dest, offset, len) ? 0 : 1; | 313 | *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P; |
293 | 314 | ||
294 | async_tx_sync_epilog(cb_fn, cb_param); | 315 | async_tx_sync_epilog(submit); |
316 | submit->flags = flags_orig; | ||
295 | } | 317 | } |
296 | 318 | ||
297 | return tx; | 319 | return tx; |
298 | } | 320 | } |
299 | EXPORT_SYMBOL_GPL(async_xor_zero_sum); | 321 | EXPORT_SYMBOL_GPL(async_xor_val); |
300 | |||
301 | static int __init async_xor_init(void) | ||
302 | { | ||
303 | #ifdef CONFIG_ASYNC_TX_DMA | ||
304 | /* To conserve stack space the input src_list (array of page pointers) | ||
305 | * is reused to hold the array of dma addresses passed to the driver. | ||
306 | * This conversion is only possible when dma_addr_t is less than the | ||
307 | * the size of a pointer. HIGHMEM64G is known to violate this | ||
308 | * assumption. | ||
309 | */ | ||
310 | BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *)); | ||
311 | #endif | ||
312 | |||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | static void __exit async_xor_exit(void) | ||
317 | { | ||
318 | do { } while (0); | ||
319 | } | ||
320 | |||
321 | module_init(async_xor_init); | ||
322 | module_exit(async_xor_exit); | ||
323 | 322 | ||
324 | MODULE_AUTHOR("Intel Corporation"); | 323 | MODULE_AUTHOR("Intel Corporation"); |
325 | MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); | 324 | MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); |
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c new file mode 100644 index 000000000000..3ec27c7e62ea --- /dev/null +++ b/crypto/async_tx/raid6test.c | |||
@@ -0,0 +1,240 @@ | |||
1 | /* | ||
2 | * asynchronous raid6 recovery self test | ||
3 | * Copyright (c) 2009, Intel Corporation. | ||
4 | * | ||
5 | * based on drivers/md/raid6test/test.c: | ||
6 | * Copyright 2002-2007 H. Peter Anvin | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along with | ||
18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | * | ||
21 | */ | ||
22 | #include <linux/async_tx.h> | ||
23 | #include <linux/random.h> | ||
24 | |||
25 | #undef pr | ||
26 | #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) | ||
27 | |||
28 | #define NDISKS 16 /* Including P and Q */ | ||
29 | |||
30 | static struct page *dataptrs[NDISKS]; | ||
31 | static addr_conv_t addr_conv[NDISKS]; | ||
32 | static struct page *data[NDISKS+3]; | ||
33 | static struct page *spare; | ||
34 | static struct page *recovi; | ||
35 | static struct page *recovj; | ||
36 | |||
37 | static void callback(void *param) | ||
38 | { | ||
39 | struct completion *cmp = param; | ||
40 | |||
41 | complete(cmp); | ||
42 | } | ||
43 | |||
44 | static void makedata(int disks) | ||
45 | { | ||
46 | int i, j; | ||
47 | |||
48 | for (i = 0; i < disks; i++) { | ||
49 | for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) { | ||
50 | u32 *p = page_address(data[i]) + j; | ||
51 | |||
52 | *p = random32(); | ||
53 | } | ||
54 | |||
55 | dataptrs[i] = data[i]; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | static char disk_type(int d, int disks) | ||
60 | { | ||
61 | if (d == disks - 2) | ||
62 | return 'P'; | ||
63 | else if (d == disks - 1) | ||
64 | return 'Q'; | ||
65 | else | ||
66 | return 'D'; | ||
67 | } | ||
68 | |||
69 | /* Recover two failed blocks. */ | ||
70 | static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) | ||
71 | { | ||
72 | struct async_submit_ctl submit; | ||
73 | struct completion cmp; | ||
74 | struct dma_async_tx_descriptor *tx = NULL; | ||
75 | enum sum_check_flags result = ~0; | ||
76 | |||
77 | if (faila > failb) | ||
78 | swap(faila, failb); | ||
79 | |||
80 | if (failb == disks-1) { | ||
81 | if (faila == disks-2) { | ||
82 | /* P+Q failure. Just rebuild the syndrome. */ | ||
83 | init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); | ||
84 | tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); | ||
85 | } else { | ||
86 | struct page *blocks[disks]; | ||
87 | struct page *dest; | ||
88 | int count = 0; | ||
89 | int i; | ||
90 | |||
91 | /* data+Q failure. Reconstruct data from P, | ||
92 | * then rebuild syndrome | ||
93 | */ | ||
94 | for (i = disks; i-- ; ) { | ||
95 | if (i == faila || i == failb) | ||
96 | continue; | ||
97 | blocks[count++] = ptrs[i]; | ||
98 | } | ||
99 | dest = ptrs[faila]; | ||
100 | init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL, | ||
101 | NULL, NULL, addr_conv); | ||
102 | tx = async_xor(dest, blocks, 0, count, bytes, &submit); | ||
103 | |||
104 | init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); | ||
105 | tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); | ||
106 | } | ||
107 | } else { | ||
108 | if (failb == disks-2) { | ||
109 | /* data+P failure. */ | ||
110 | init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); | ||
111 | tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); | ||
112 | } else { | ||
113 | /* data+data failure. */ | ||
114 | init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); | ||
115 | tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); | ||
116 | } | ||
117 | } | ||
118 | init_completion(&cmp); | ||
119 | init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); | ||
120 | tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); | ||
121 | async_tx_issue_pending(tx); | ||
122 | |||
123 | if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) | ||
124 | pr("%s: timeout! (faila: %d failb: %d disks: %d)\n", | ||
125 | __func__, faila, failb, disks); | ||
126 | |||
127 | if (result != 0) | ||
128 | pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n", | ||
129 | __func__, faila, failb, result); | ||
130 | } | ||
131 | |||
132 | static int test_disks(int i, int j, int disks) | ||
133 | { | ||
134 | int erra, errb; | ||
135 | |||
136 | memset(page_address(recovi), 0xf0, PAGE_SIZE); | ||
137 | memset(page_address(recovj), 0xba, PAGE_SIZE); | ||
138 | |||
139 | dataptrs[i] = recovi; | ||
140 | dataptrs[j] = recovj; | ||
141 | |||
142 | raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); | ||
143 | |||
144 | erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); | ||
145 | errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); | ||
146 | |||
147 | pr("%s(%d, %d): faila=%3d(%c) failb=%3d(%c) %s\n", | ||
148 | __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks), | ||
149 | (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB"); | ||
150 | |||
151 | dataptrs[i] = data[i]; | ||
152 | dataptrs[j] = data[j]; | ||
153 | |||
154 | return erra || errb; | ||
155 | } | ||
156 | |||
157 | static int test(int disks, int *tests) | ||
158 | { | ||
159 | struct dma_async_tx_descriptor *tx; | ||
160 | struct async_submit_ctl submit; | ||
161 | struct completion cmp; | ||
162 | int err = 0; | ||
163 | int i, j; | ||
164 | |||
165 | recovi = data[disks]; | ||
166 | recovj = data[disks+1]; | ||
167 | spare = data[disks+2]; | ||
168 | |||
169 | makedata(disks); | ||
170 | |||
171 | /* Nuke syndromes */ | ||
172 | memset(page_address(data[disks-2]), 0xee, PAGE_SIZE); | ||
173 | memset(page_address(data[disks-1]), 0xee, PAGE_SIZE); | ||
174 | |||
175 | /* Generate assumed good syndrome */ | ||
176 | init_completion(&cmp); | ||
177 | init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); | ||
178 | tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); | ||
179 | async_tx_issue_pending(tx); | ||
180 | |||
181 | if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) { | ||
182 | pr("error: initial gen_syndrome(%d) timed out\n", disks); | ||
183 | return 1; | ||
184 | } | ||
185 | |||
186 | pr("testing the %d-disk case...\n", disks); | ||
187 | for (i = 0; i < disks-1; i++) | ||
188 | for (j = i+1; j < disks; j++) { | ||
189 | (*tests)++; | ||
190 | err += test_disks(i, j, disks); | ||
191 | } | ||
192 | |||
193 | return err; | ||
194 | } | ||
195 | |||
196 | |||
197 | static int raid6_test(void) | ||
198 | { | ||
199 | int err = 0; | ||
200 | int tests = 0; | ||
201 | int i; | ||
202 | |||
203 | for (i = 0; i < NDISKS+3; i++) { | ||
204 | data[i] = alloc_page(GFP_KERNEL); | ||
205 | if (!data[i]) { | ||
206 | while (i--) | ||
207 | put_page(data[i]); | ||
208 | return -ENOMEM; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /* the 4-disk and 5-disk cases are special for the recovery code */ | ||
213 | if (NDISKS > 4) | ||
214 | err += test(4, &tests); | ||
215 | if (NDISKS > 5) | ||
216 | err += test(5, &tests); | ||
217 | err += test(NDISKS, &tests); | ||
218 | |||
219 | pr("\n"); | ||
220 | pr("complete (%d tests, %d failure%s)\n", | ||
221 | tests, err, err == 1 ? "" : "s"); | ||
222 | |||
223 | for (i = 0; i < NDISKS+3; i++) | ||
224 | put_page(data[i]); | ||
225 | |||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | static void raid6_test_exit(void) | ||
230 | { | ||
231 | } | ||
232 | |||
233 | /* when compiled-in wait for drivers to load first (assumes dma drivers | ||
234 | * are also compliled-in) | ||
235 | */ | ||
236 | late_initcall(raid6_test); | ||
237 | module_exit(raid6_test_exit); | ||
238 | MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>"); | ||
239 | MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests"); | ||
240 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 71d1b9bab70b..614da5b8613a 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c | |||
@@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info, | |||
3412 | return 0; | 3412 | return 0; |
3413 | } | 3413 | } |
3414 | 3414 | ||
3415 | static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, | 3415 | static int cdrom_sysctl_info(ctl_table *ctl, int write, |
3416 | void __user *buffer, size_t *lenp, loff_t *ppos) | 3416 | void __user *buffer, size_t *lenp, loff_t *ppos) |
3417 | { | 3417 | { |
3418 | int pos; | 3418 | int pos; |
@@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp, | |||
3489 | goto done; | 3489 | goto done; |
3490 | doit: | 3490 | doit: |
3491 | mutex_unlock(&cdrom_mutex); | 3491 | mutex_unlock(&cdrom_mutex); |
3492 | return proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 3492 | return proc_dostring(ctl, write, buffer, lenp, ppos); |
3493 | done: | 3493 | done: |
3494 | printk(KERN_INFO "cdrom: info buffer too small\n"); | 3494 | printk(KERN_INFO "cdrom: info buffer too small\n"); |
3495 | goto doit; | 3495 | goto doit; |
@@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void) | |||
3525 | mutex_unlock(&cdrom_mutex); | 3525 | mutex_unlock(&cdrom_mutex); |
3526 | } | 3526 | } |
3527 | 3527 | ||
3528 | static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp, | 3528 | static int cdrom_sysctl_handler(ctl_table *ctl, int write, |
3529 | void __user *buffer, size_t *lenp, loff_t *ppos) | 3529 | void __user *buffer, size_t *lenp, loff_t *ppos) |
3530 | { | 3530 | { |
3531 | int ret; | 3531 | int ret; |
3532 | 3532 | ||
3533 | ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 3533 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
3534 | 3534 | ||
3535 | if (write) { | 3535 | if (write) { |
3536 | 3536 | ||
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 6a06913b01d3..08a6f50ae791 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig | |||
@@ -1087,6 +1087,14 @@ config MMTIMER | |||
1087 | The mmtimer device allows direct userspace access to the | 1087 | The mmtimer device allows direct userspace access to the |
1088 | Altix system timer. | 1088 | Altix system timer. |
1089 | 1089 | ||
1090 | config UV_MMTIMER | ||
1091 | tristate "UV_MMTIMER Memory mapped RTC for SGI UV" | ||
1092 | depends on X86_UV | ||
1093 | default m | ||
1094 | help | ||
1095 | The uv_mmtimer device allows direct userspace access to the | ||
1096 | UV system timer. | ||
1097 | |||
1090 | source "drivers/char/tpm/Kconfig" | 1098 | source "drivers/char/tpm/Kconfig" |
1091 | 1099 | ||
1092 | config TELCLOCK | 1100 | config TELCLOCK |
diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 66f779ad4f4c..19a79dd79eee 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile | |||
@@ -58,6 +58,7 @@ obj-$(CONFIG_RAW_DRIVER) += raw.o | |||
58 | obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o | 58 | obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o |
59 | obj-$(CONFIG_MSPEC) += mspec.o | 59 | obj-$(CONFIG_MSPEC) += mspec.o |
60 | obj-$(CONFIG_MMTIMER) += mmtimer.o | 60 | obj-$(CONFIG_MMTIMER) += mmtimer.o |
61 | obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o | ||
61 | obj-$(CONFIG_VIOTAPE) += viotape.o | 62 | obj-$(CONFIG_VIOTAPE) += viotape.o |
62 | obj-$(CONFIG_HVCS) += hvcs.o | 63 | obj-$(CONFIG_HVCS) += hvcs.o |
63 | obj-$(CONFIG_IBM_BSR) += bsr.o | 64 | obj-$(CONFIG_IBM_BSR) += bsr.o |
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c index 0a01329451e4..e3dd24bff514 100644 --- a/drivers/char/bfin-otp.c +++ b/drivers/char/bfin-otp.c | |||
@@ -1,8 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Blackfin On-Chip OTP Memory Interface | 2 | * Blackfin On-Chip OTP Memory Interface |
3 | * Supports BF52x/BF54x | ||
4 | * | 3 | * |
5 | * Copyright 2007-2008 Analog Devices Inc. | 4 | * Copyright 2007-2009 Analog Devices Inc. |
6 | * | 5 | * |
7 | * Enter bugs at http://blackfin.uclinux.org/ | 6 | * Enter bugs at http://blackfin.uclinux.org/ |
8 | * | 7 | * |
@@ -17,8 +16,10 @@ | |||
17 | #include <linux/module.h> | 16 | #include <linux/module.h> |
18 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
19 | #include <linux/types.h> | 18 | #include <linux/types.h> |
19 | #include <mtd/mtd-abi.h> | ||
20 | 20 | ||
21 | #include <asm/blackfin.h> | 21 | #include <asm/blackfin.h> |
22 | #include <asm/bfrom.h> | ||
22 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
23 | 24 | ||
24 | #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args) | 25 | #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args) |
@@ -30,39 +31,6 @@ | |||
30 | 31 | ||
31 | static DEFINE_MUTEX(bfin_otp_lock); | 32 | static DEFINE_MUTEX(bfin_otp_lock); |
32 | 33 | ||
33 | /* OTP Boot ROM functions */ | ||
34 | #define _BOOTROM_OTP_COMMAND 0xEF000018 | ||
35 | #define _BOOTROM_OTP_READ 0xEF00001A | ||
36 | #define _BOOTROM_OTP_WRITE 0xEF00001C | ||
37 | |||
38 | static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND; | ||
39 | static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ; | ||
40 | static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE; | ||
41 | |||
42 | /* otp_command(): defines for "command" */ | ||
43 | #define OTP_INIT 0x00000001 | ||
44 | #define OTP_CLOSE 0x00000002 | ||
45 | |||
46 | /* otp_{read,write}(): defines for "flags" */ | ||
47 | #define OTP_LOWER_HALF 0x00000000 /* select upper/lower 64-bit half (bit 0) */ | ||
48 | #define OTP_UPPER_HALF 0x00000001 | ||
49 | #define OTP_NO_ECC 0x00000010 /* do not use ECC */ | ||
50 | #define OTP_LOCK 0x00000020 /* sets page protection bit for page */ | ||
51 | #define OTP_ACCESS_READ 0x00001000 | ||
52 | #define OTP_ACCESS_READWRITE 0x00002000 | ||
53 | |||
54 | /* Return values for all functions */ | ||
55 | #define OTP_SUCCESS 0x00000000 | ||
56 | #define OTP_MASTER_ERROR 0x001 | ||
57 | #define OTP_WRITE_ERROR 0x003 | ||
58 | #define OTP_READ_ERROR 0x005 | ||
59 | #define OTP_ACC_VIO_ERROR 0x009 | ||
60 | #define OTP_DATA_MULT_ERROR 0x011 | ||
61 | #define OTP_ECC_MULT_ERROR 0x021 | ||
62 | #define OTP_PREV_WR_ERROR 0x041 | ||
63 | #define OTP_DATA_SB_WARN 0x100 | ||
64 | #define OTP_ECC_SB_WARN 0x200 | ||
65 | |||
66 | /** | 34 | /** |
67 | * bfin_otp_read - Read OTP pages | 35 | * bfin_otp_read - Read OTP pages |
68 | * | 36 | * |
@@ -86,9 +54,11 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, | |||
86 | page = *pos / (sizeof(u64) * 2); | 54 | page = *pos / (sizeof(u64) * 2); |
87 | while (bytes_done < count) { | 55 | while (bytes_done < count) { |
88 | flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); | 56 | flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); |
89 | stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower")); | 57 | stamp("processing page %i (0x%x:%s)", page, flags, |
90 | ret = otp_read(page, flags, &content); | 58 | (flags & OTP_UPPER_HALF ? "upper" : "lower")); |
59 | ret = bfrom_OtpRead(page, flags, &content); | ||
91 | if (ret & OTP_MASTER_ERROR) { | 60 | if (ret & OTP_MASTER_ERROR) { |
61 | stamp("error from otp: 0x%x", ret); | ||
92 | bytes_done = -EIO; | 62 | bytes_done = -EIO; |
93 | break; | 63 | break; |
94 | } | 64 | } |
@@ -96,7 +66,7 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, | |||
96 | bytes_done = -EFAULT; | 66 | bytes_done = -EFAULT; |
97 | break; | 67 | break; |
98 | } | 68 | } |
99 | if (flags == OTP_UPPER_HALF) | 69 | if (flags & OTP_UPPER_HALF) |
100 | ++page; | 70 | ++page; |
101 | bytes_done += sizeof(content); | 71 | bytes_done += sizeof(content); |
102 | *pos += sizeof(content); | 72 | *pos += sizeof(content); |
@@ -108,14 +78,53 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count, | |||
108 | } | 78 | } |
109 | 79 | ||
110 | #ifdef CONFIG_BFIN_OTP_WRITE_ENABLE | 80 | #ifdef CONFIG_BFIN_OTP_WRITE_ENABLE |
81 | static bool allow_writes; | ||
82 | |||
83 | /** | ||
84 | * bfin_otp_init_timing - setup OTP timing parameters | ||
85 | * | ||
86 | * Required before doing any write operation. Algorithms from HRM. | ||
87 | */ | ||
88 | static u32 bfin_otp_init_timing(void) | ||
89 | { | ||
90 | u32 tp1, tp2, tp3, timing; | ||
91 | |||
92 | tp1 = get_sclk() / 1000000; | ||
93 | tp2 = (2 * get_sclk() / 10000000) << 8; | ||
94 | tp3 = (0x1401) << 15; | ||
95 | timing = tp1 | tp2 | tp3; | ||
96 | if (bfrom_OtpCommand(OTP_INIT, timing)) | ||
97 | return 0; | ||
98 | |||
99 | return timing; | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * bfin_otp_deinit_timing - set timings to only allow reads | ||
104 | * | ||
105 | * Should be called after all writes are done. | ||
106 | */ | ||
107 | static void bfin_otp_deinit_timing(u32 timing) | ||
108 | { | ||
109 | /* mask bits [31:15] so that any attempts to write fail */ | ||
110 | bfrom_OtpCommand(OTP_CLOSE, 0); | ||
111 | bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15)); | ||
112 | bfrom_OtpCommand(OTP_CLOSE, 0); | ||
113 | } | ||
114 | |||
111 | /** | 115 | /** |
112 | * bfin_otp_write - Write OTP pages | 116 | * bfin_otp_write - write OTP pages |
113 | * | 117 | * |
114 | * All writes must be in half page chunks (half page == 64 bits). | 118 | * All writes must be in half page chunks (half page == 64 bits). |
115 | */ | 119 | */ |
116 | static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos) | 120 | static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos) |
117 | { | 121 | { |
118 | stampit(); | 122 | ssize_t bytes_done; |
123 | u32 timing, page, base_flags, flags, ret; | ||
124 | u64 content; | ||
125 | |||
126 | if (!allow_writes) | ||
127 | return -EACCES; | ||
119 | 128 | ||
120 | if (count % sizeof(u64)) | 129 | if (count % sizeof(u64)) |
121 | return -EMSGSIZE; | 130 | return -EMSGSIZE; |
@@ -123,20 +132,96 @@ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t | |||
123 | if (mutex_lock_interruptible(&bfin_otp_lock)) | 132 | if (mutex_lock_interruptible(&bfin_otp_lock)) |
124 | return -ERESTARTSYS; | 133 | return -ERESTARTSYS; |
125 | 134 | ||
126 | /* need otp_init() documentation before this can be implemented */ | 135 | stampit(); |
136 | |||
137 | timing = bfin_otp_init_timing(); | ||
138 | if (timing == 0) { | ||
139 | mutex_unlock(&bfin_otp_lock); | ||
140 | return -EIO; | ||
141 | } | ||
142 | |||
143 | base_flags = OTP_CHECK_FOR_PREV_WRITE; | ||
144 | |||
145 | bytes_done = 0; | ||
146 | page = *pos / (sizeof(u64) * 2); | ||
147 | while (bytes_done < count) { | ||
148 | flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF); | ||
149 | stamp("processing page %i (0x%x:%s) from %p", page, flags, | ||
150 | (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done); | ||
151 | if (copy_from_user(&content, buff + bytes_done, sizeof(content))) { | ||
152 | bytes_done = -EFAULT; | ||
153 | break; | ||
154 | } | ||
155 | ret = bfrom_OtpWrite(page, flags, &content); | ||
156 | if (ret & OTP_MASTER_ERROR) { | ||
157 | stamp("error from otp: 0x%x", ret); | ||
158 | bytes_done = -EIO; | ||
159 | break; | ||
160 | } | ||
161 | if (flags & OTP_UPPER_HALF) | ||
162 | ++page; | ||
163 | bytes_done += sizeof(content); | ||
164 | *pos += sizeof(content); | ||
165 | } | ||
166 | |||
167 | bfin_otp_deinit_timing(timing); | ||
127 | 168 | ||
128 | mutex_unlock(&bfin_otp_lock); | 169 | mutex_unlock(&bfin_otp_lock); |
129 | 170 | ||
171 | return bytes_done; | ||
172 | } | ||
173 | |||
174 | static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg) | ||
175 | { | ||
176 | stampit(); | ||
177 | |||
178 | switch (cmd) { | ||
179 | case OTPLOCK: { | ||
180 | u32 timing; | ||
181 | int ret = -EIO; | ||
182 | |||
183 | if (!allow_writes) | ||
184 | return -EACCES; | ||
185 | |||
186 | if (mutex_lock_interruptible(&bfin_otp_lock)) | ||
187 | return -ERESTARTSYS; | ||
188 | |||
189 | timing = bfin_otp_init_timing(); | ||
190 | if (timing) { | ||
191 | u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL); | ||
192 | stamp("locking page %lu resulted in 0x%x", arg, otp_result); | ||
193 | if (!(otp_result & OTP_MASTER_ERROR)) | ||
194 | ret = 0; | ||
195 | |||
196 | bfin_otp_deinit_timing(timing); | ||
197 | } | ||
198 | |||
199 | mutex_unlock(&bfin_otp_lock); | ||
200 | |||
201 | return ret; | ||
202 | } | ||
203 | |||
204 | case MEMLOCK: | ||
205 | allow_writes = false; | ||
206 | return 0; | ||
207 | |||
208 | case MEMUNLOCK: | ||
209 | allow_writes = true; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
130 | return -EINVAL; | 213 | return -EINVAL; |
131 | } | 214 | } |
132 | #else | 215 | #else |
133 | # define bfin_otp_write NULL | 216 | # define bfin_otp_write NULL |
217 | # define bfin_otp_ioctl NULL | ||
134 | #endif | 218 | #endif |
135 | 219 | ||
136 | static struct file_operations bfin_otp_fops = { | 220 | static struct file_operations bfin_otp_fops = { |
137 | .owner = THIS_MODULE, | 221 | .owner = THIS_MODULE, |
138 | .read = bfin_otp_read, | 222 | .unlocked_ioctl = bfin_otp_ioctl, |
139 | .write = bfin_otp_write, | 223 | .read = bfin_otp_read, |
224 | .write = bfin_otp_write, | ||
140 | }; | 225 | }; |
141 | 226 | ||
142 | static struct miscdevice bfin_otp_misc_device = { | 227 | static struct miscdevice bfin_otp_misc_device = { |
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 4a9f3492b921..70a770ac0138 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c | |||
@@ -166,9 +166,8 @@ static irqreturn_t hpet_interrupt(int irq, void *data) | |||
166 | unsigned long m, t; | 166 | unsigned long m, t; |
167 | 167 | ||
168 | t = devp->hd_ireqfreq; | 168 | t = devp->hd_ireqfreq; |
169 | m = read_counter(&devp->hd_hpet->hpet_mc); | 169 | m = read_counter(&devp->hd_timer->hpet_compare); |
170 | write_counter(t + m + devp->hd_hpets->hp_delta, | 170 | write_counter(t + m, &devp->hd_timer->hpet_compare); |
171 | &devp->hd_timer->hpet_compare); | ||
172 | } | 171 | } |
173 | 172 | ||
174 | if (devp->hd_flags & HPET_SHARED_IRQ) | 173 | if (devp->hd_flags & HPET_SHARED_IRQ) |
@@ -504,21 +503,25 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp) | |||
504 | g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK; | 503 | g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK; |
505 | 504 | ||
506 | if (devp->hd_flags & HPET_PERIODIC) { | 505 | if (devp->hd_flags & HPET_PERIODIC) { |
507 | write_counter(t, &timer->hpet_compare); | ||
508 | g |= Tn_TYPE_CNF_MASK; | 506 | g |= Tn_TYPE_CNF_MASK; |
509 | v |= Tn_TYPE_CNF_MASK; | 507 | v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK; |
510 | writeq(v, &timer->hpet_config); | ||
511 | v |= Tn_VAL_SET_CNF_MASK; | ||
512 | writeq(v, &timer->hpet_config); | 508 | writeq(v, &timer->hpet_config); |
513 | local_irq_save(flags); | 509 | local_irq_save(flags); |
514 | 510 | ||
515 | /* NOTE: what we modify here is a hidden accumulator | 511 | /* |
512 | * NOTE: First we modify the hidden accumulator | ||
516 | * register supported by periodic-capable comparators. | 513 | * register supported by periodic-capable comparators. |
517 | * We never want to modify the (single) counter; that | 514 | * We never want to modify the (single) counter; that |
518 | * would affect all the comparators. | 515 | * would affect all the comparators. The value written |
516 | * is the counter value when the first interrupt is due. | ||
519 | */ | 517 | */ |
520 | m = read_counter(&hpet->hpet_mc); | 518 | m = read_counter(&hpet->hpet_mc); |
521 | write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare); | 519 | write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare); |
520 | /* | ||
521 | * Then we modify the comparator, indicating the period | ||
522 | * for subsequent interrupt. | ||
523 | */ | ||
524 | write_counter(t, &timer->hpet_compare); | ||
522 | } else { | 525 | } else { |
523 | local_irq_save(flags); | 526 | local_irq_save(flags); |
524 | m = read_counter(&hpet->hpet_mc); | 527 | m = read_counter(&hpet->hpet_mc); |
diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 0aede1d6a9ea..6c8b65d069e5 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c | |||
@@ -690,7 +690,7 @@ static ssize_t read_zero(struct file * file, char __user * buf, | |||
690 | 690 | ||
691 | if (chunk > PAGE_SIZE) | 691 | if (chunk > PAGE_SIZE) |
692 | chunk = PAGE_SIZE; /* Just for latency reasons */ | 692 | chunk = PAGE_SIZE; /* Just for latency reasons */ |
693 | unwritten = clear_user(buf, chunk); | 693 | unwritten = __clear_user(buf, chunk); |
694 | written += chunk - unwritten; | 694 | written += chunk - unwritten; |
695 | if (unwritten) | 695 | if (unwritten) |
696 | break; | 696 | break; |
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c index 94ad2c3bfc4a..a4ec50c95072 100644 --- a/drivers/char/mwave/mwavedd.c +++ b/drivers/char/mwave/mwavedd.c | |||
@@ -281,12 +281,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, | |||
281 | case IOCTL_MW_REGISTER_IPC: { | 281 | case IOCTL_MW_REGISTER_IPC: { |
282 | unsigned int ipcnum = (unsigned int) ioarg; | 282 | unsigned int ipcnum = (unsigned int) ioarg; |
283 | 283 | ||
284 | PRINTK_3(TRACE_MWAVE, | ||
285 | "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" | ||
286 | " ipcnum %x entry usIntCount %x\n", | ||
287 | ipcnum, | ||
288 | pDrvData->IPCs[ipcnum].usIntCount); | ||
289 | |||
290 | if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { | 284 | if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { |
291 | PRINTK_ERROR(KERN_ERR_MWAVE | 285 | PRINTK_ERROR(KERN_ERR_MWAVE |
292 | "mwavedd::mwave_ioctl:" | 286 | "mwavedd::mwave_ioctl:" |
@@ -295,6 +289,12 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, | |||
295 | ipcnum); | 289 | ipcnum); |
296 | return -EINVAL; | 290 | return -EINVAL; |
297 | } | 291 | } |
292 | PRINTK_3(TRACE_MWAVE, | ||
293 | "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC" | ||
294 | " ipcnum %x entry usIntCount %x\n", | ||
295 | ipcnum, | ||
296 | pDrvData->IPCs[ipcnum].usIntCount); | ||
297 | |||
298 | lock_kernel(); | 298 | lock_kernel(); |
299 | pDrvData->IPCs[ipcnum].bIsHere = FALSE; | 299 | pDrvData->IPCs[ipcnum].bIsHere = FALSE; |
300 | pDrvData->IPCs[ipcnum].bIsEnabled = TRUE; | 300 | pDrvData->IPCs[ipcnum].bIsEnabled = TRUE; |
@@ -310,11 +310,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, | |||
310 | case IOCTL_MW_GET_IPC: { | 310 | case IOCTL_MW_GET_IPC: { |
311 | unsigned int ipcnum = (unsigned int) ioarg; | 311 | unsigned int ipcnum = (unsigned int) ioarg; |
312 | 312 | ||
313 | PRINTK_3(TRACE_MWAVE, | ||
314 | "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" | ||
315 | " ipcnum %x, usIntCount %x\n", | ||
316 | ipcnum, | ||
317 | pDrvData->IPCs[ipcnum].usIntCount); | ||
318 | if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { | 313 | if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) { |
319 | PRINTK_ERROR(KERN_ERR_MWAVE | 314 | PRINTK_ERROR(KERN_ERR_MWAVE |
320 | "mwavedd::mwave_ioctl:" | 315 | "mwavedd::mwave_ioctl:" |
@@ -322,6 +317,11 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd, | |||
322 | " Invalid ipcnum %x\n", ipcnum); | 317 | " Invalid ipcnum %x\n", ipcnum); |
323 | return -EINVAL; | 318 | return -EINVAL; |
324 | } | 319 | } |
320 | PRINTK_3(TRACE_MWAVE, | ||
321 | "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC" | ||
322 | " ipcnum %x, usIntCount %x\n", | ||
323 | ipcnum, | ||
324 | pDrvData->IPCs[ipcnum].usIntCount); | ||
325 | 325 | ||
326 | lock_kernel(); | 326 | lock_kernel(); |
327 | if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { | 327 | if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) { |
diff --git a/drivers/char/random.c b/drivers/char/random.c index d8a9255e1a3f..04b505e5a5e2 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c | |||
@@ -1231,7 +1231,7 @@ static char sysctl_bootid[16]; | |||
1231 | * as an ASCII string in the standard UUID format. If accesses via the | 1231 | * as an ASCII string in the standard UUID format. If accesses via the |
1232 | * sysctl system call, it is returned as 16 bytes of binary data. | 1232 | * sysctl system call, it is returned as 16 bytes of binary data. |
1233 | */ | 1233 | */ |
1234 | static int proc_do_uuid(ctl_table *table, int write, struct file *filp, | 1234 | static int proc_do_uuid(ctl_table *table, int write, |
1235 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1235 | void __user *buffer, size_t *lenp, loff_t *ppos) |
1236 | { | 1236 | { |
1237 | ctl_table fake_table; | 1237 | ctl_table fake_table; |
@@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp, | |||
1254 | fake_table.data = buf; | 1254 | fake_table.data = buf; |
1255 | fake_table.maxlen = sizeof(buf); | 1255 | fake_table.maxlen = sizeof(buf); |
1256 | 1256 | ||
1257 | return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos); | 1257 | return proc_dostring(&fake_table, write, buffer, lenp, ppos); |
1258 | } | 1258 | } |
1259 | 1259 | ||
1260 | static int uuid_strategy(ctl_table *table, | 1260 | static int uuid_strategy(ctl_table *table, |
diff --git a/drivers/char/rio/rioctrl.c b/drivers/char/rio/rioctrl.c index eecee0f576d2..74339559f0b9 100644 --- a/drivers/char/rio/rioctrl.c +++ b/drivers/char/rio/rioctrl.c | |||
@@ -873,7 +873,7 @@ int riocontrol(struct rio_info *p, dev_t dev, int cmd, unsigned long arg, int su | |||
873 | /* | 873 | /* |
874 | ** It is important that the product code is an unsigned object! | 874 | ** It is important that the product code is an unsigned object! |
875 | */ | 875 | */ |
876 | if (DownLoad.ProductCode > MAX_PRODUCT) { | 876 | if (DownLoad.ProductCode >= MAX_PRODUCT) { |
877 | rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode); | 877 | rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode); |
878 | p->RIOError.Error = NO_SUCH_PRODUCT; | 878 | p->RIOError.Error = NO_SUCH_PRODUCT; |
879 | return -ENXIO; | 879 | return -ENXIO; |
diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c new file mode 100644 index 000000000000..867b67be9f0a --- /dev/null +++ b/drivers/char/uv_mmtimer.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * Timer device implementation for SGI UV platform. | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General Public | ||
5 | * License. See the file "COPYING" in the main directory of this archive | ||
6 | * for more details. | ||
7 | * | ||
8 | * Copyright (c) 2009 Silicon Graphics, Inc. All rights reserved. | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/types.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/ioctl.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/mm.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/mmtimer.h> | ||
21 | #include <linux/miscdevice.h> | ||
22 | #include <linux/posix-timers.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/time.h> | ||
25 | #include <linux/math64.h> | ||
26 | #include <linux/smp_lock.h> | ||
27 | |||
28 | #include <asm/genapic.h> | ||
29 | #include <asm/uv/uv_hub.h> | ||
30 | #include <asm/uv/bios.h> | ||
31 | #include <asm/uv/uv.h> | ||
32 | |||
33 | MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>"); | ||
34 | MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer"); | ||
35 | MODULE_LICENSE("GPL"); | ||
36 | |||
37 | /* name of the device, usually in /dev */ | ||
38 | #define UV_MMTIMER_NAME "mmtimer" | ||
39 | #define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer" | ||
40 | #define UV_MMTIMER_VERSION "1.0" | ||
41 | |||
42 | static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, | ||
43 | unsigned long arg); | ||
44 | static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma); | ||
45 | |||
46 | /* | ||
47 | * Period in femtoseconds (10^-15 s) | ||
48 | */ | ||
49 | static unsigned long uv_mmtimer_femtoperiod; | ||
50 | |||
51 | static const struct file_operations uv_mmtimer_fops = { | ||
52 | .owner = THIS_MODULE, | ||
53 | .mmap = uv_mmtimer_mmap, | ||
54 | .unlocked_ioctl = uv_mmtimer_ioctl, | ||
55 | }; | ||
56 | |||
57 | /** | ||
58 | * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer | ||
59 | * @file: file structure for the device | ||
60 | * @cmd: command to execute | ||
61 | * @arg: optional argument to command | ||
62 | * | ||
63 | * Executes the command specified by @cmd. Returns 0 for success, < 0 for | ||
64 | * failure. | ||
65 | * | ||
66 | * Valid commands: | ||
67 | * | ||
68 | * %MMTIMER_GETOFFSET - Should return the offset (relative to the start | ||
69 | * of the page where the registers are mapped) for the counter in question. | ||
70 | * | ||
71 | * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15) | ||
72 | * seconds | ||
73 | * | ||
74 | * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address | ||
75 | * specified by @arg | ||
76 | * | ||
77 | * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter | ||
78 | * | ||
79 | * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace | ||
80 | * | ||
81 | * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it | ||
82 | * in the address specified by @arg. | ||
83 | */ | ||
84 | static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd, | ||
85 | unsigned long arg) | ||
86 | { | ||
87 | int ret = 0; | ||
88 | |||
89 | switch (cmd) { | ||
90 | case MMTIMER_GETOFFSET: /* offset of the counter */ | ||
91 | /* | ||
92 | * UV RTC register is on its own page | ||
93 | */ | ||
94 | if (PAGE_SIZE <= (1 << 16)) | ||
95 | ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1)) | ||
96 | / 8; | ||
97 | else | ||
98 | ret = -ENOSYS; | ||
99 | break; | ||
100 | |||
101 | case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */ | ||
102 | if (copy_to_user((unsigned long __user *)arg, | ||
103 | &uv_mmtimer_femtoperiod, sizeof(unsigned long))) | ||
104 | ret = -EFAULT; | ||
105 | break; | ||
106 | |||
107 | case MMTIMER_GETFREQ: /* frequency in Hz */ | ||
108 | if (copy_to_user((unsigned long __user *)arg, | ||
109 | &sn_rtc_cycles_per_second, | ||
110 | sizeof(unsigned long))) | ||
111 | ret = -EFAULT; | ||
112 | break; | ||
113 | |||
114 | case MMTIMER_GETBITS: /* number of bits in the clock */ | ||
115 | ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK); | ||
116 | break; | ||
117 | |||
118 | case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */ | ||
119 | ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0; | ||
120 | break; | ||
121 | |||
122 | case MMTIMER_GETCOUNTER: | ||
123 | if (copy_to_user((unsigned long __user *)arg, | ||
124 | (unsigned long *)uv_local_mmr_address(UVH_RTC), | ||
125 | sizeof(unsigned long))) | ||
126 | ret = -EFAULT; | ||
127 | break; | ||
128 | default: | ||
129 | ret = -ENOTTY; | ||
130 | break; | ||
131 | } | ||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * uv_mmtimer_mmap - maps the clock's registers into userspace | ||
137 | * @file: file structure for the device | ||
138 | * @vma: VMA to map the registers into | ||
139 | * | ||
140 | * Calls remap_pfn_range() to map the clock's registers into | ||
141 | * the calling process' address space. | ||
142 | */ | ||
143 | static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma) | ||
144 | { | ||
145 | unsigned long uv_mmtimer_addr; | ||
146 | |||
147 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | ||
148 | return -EINVAL; | ||
149 | |||
150 | if (vma->vm_flags & VM_WRITE) | ||
151 | return -EPERM; | ||
152 | |||
153 | if (PAGE_SIZE > (1 << 16)) | ||
154 | return -ENOSYS; | ||
155 | |||
156 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
157 | |||
158 | uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC; | ||
159 | uv_mmtimer_addr &= ~(PAGE_SIZE - 1); | ||
160 | uv_mmtimer_addr &= 0xfffffffffffffffUL; | ||
161 | |||
162 | if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT, | ||
163 | PAGE_SIZE, vma->vm_page_prot)) { | ||
164 | printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n"); | ||
165 | return -EAGAIN; | ||
166 | } | ||
167 | |||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | static struct miscdevice uv_mmtimer_miscdev = { | ||
172 | MISC_DYNAMIC_MINOR, | ||
173 | UV_MMTIMER_NAME, | ||
174 | &uv_mmtimer_fops | ||
175 | }; | ||
176 | |||
177 | |||
178 | /** | ||
179 | * uv_mmtimer_init - device initialization routine | ||
180 | * | ||
181 | * Does initial setup for the uv_mmtimer device. | ||
182 | */ | ||
183 | static int __init uv_mmtimer_init(void) | ||
184 | { | ||
185 | if (!is_uv_system()) { | ||
186 | printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME); | ||
187 | return -1; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Sanity check the cycles/sec variable | ||
192 | */ | ||
193 | if (sn_rtc_cycles_per_second < 100000) { | ||
194 | printk(KERN_ERR "%s: unable to determine clock frequency\n", | ||
195 | UV_MMTIMER_NAME); | ||
196 | return -1; | ||
197 | } | ||
198 | |||
199 | uv_mmtimer_femtoperiod = ((unsigned long)1E15 + | ||
200 | sn_rtc_cycles_per_second / 2) / | ||
201 | sn_rtc_cycles_per_second; | ||
202 | |||
203 | if (misc_register(&uv_mmtimer_miscdev)) { | ||
204 | printk(KERN_ERR "%s: failed to register device\n", | ||
205 | UV_MMTIMER_NAME); | ||
206 | return -1; | ||
207 | } | ||
208 | |||
209 | printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC, | ||
210 | UV_MMTIMER_VERSION, | ||
211 | sn_rtc_cycles_per_second/(unsigned long)1E6); | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | module_init(uv_mmtimer_init); | ||
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index 25b743abfb59..52e6bb70a490 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <linux/device.h> | 28 | #include <linux/device.h> |
29 | #include <linux/dca.h> | 29 | #include <linux/dca.h> |
30 | 30 | ||
31 | #define DCA_VERSION "1.8" | 31 | #define DCA_VERSION "1.12.1" |
32 | 32 | ||
33 | MODULE_VERSION(DCA_VERSION); | 33 | MODULE_VERSION(DCA_VERSION); |
34 | MODULE_LICENSE("GPL"); | 34 | MODULE_LICENSE("GPL"); |
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation"); | |||
36 | 36 | ||
37 | static DEFINE_SPINLOCK(dca_lock); | 37 | static DEFINE_SPINLOCK(dca_lock); |
38 | 38 | ||
39 | static LIST_HEAD(dca_providers); | 39 | static LIST_HEAD(dca_domains); |
40 | 40 | ||
41 | static struct dca_provider *dca_find_provider_by_dev(struct device *dev) | 41 | static struct pci_bus *dca_pci_rc_from_dev(struct device *dev) |
42 | { | 42 | { |
43 | struct dca_provider *dca, *ret = NULL; | 43 | struct pci_dev *pdev = to_pci_dev(dev); |
44 | struct pci_bus *bus = pdev->bus; | ||
44 | 45 | ||
45 | list_for_each_entry(dca, &dca_providers, node) { | 46 | while (bus->parent) |
46 | if ((!dev) || (dca->ops->dev_managed(dca, dev))) { | 47 | bus = bus->parent; |
47 | ret = dca; | 48 | |
48 | break; | 49 | return bus; |
49 | } | 50 | } |
51 | |||
52 | static struct dca_domain *dca_allocate_domain(struct pci_bus *rc) | ||
53 | { | ||
54 | struct dca_domain *domain; | ||
55 | |||
56 | domain = kzalloc(sizeof(*domain), GFP_NOWAIT); | ||
57 | if (!domain) | ||
58 | return NULL; | ||
59 | |||
60 | INIT_LIST_HEAD(&domain->dca_providers); | ||
61 | domain->pci_rc = rc; | ||
62 | |||
63 | return domain; | ||
64 | } | ||
65 | |||
66 | static void dca_free_domain(struct dca_domain *domain) | ||
67 | { | ||
68 | list_del(&domain->node); | ||
69 | kfree(domain); | ||
70 | } | ||
71 | |||
72 | static struct dca_domain *dca_find_domain(struct pci_bus *rc) | ||
73 | { | ||
74 | struct dca_domain *domain; | ||
75 | |||
76 | list_for_each_entry(domain, &dca_domains, node) | ||
77 | if (domain->pci_rc == rc) | ||
78 | return domain; | ||
79 | |||
80 | return NULL; | ||
81 | } | ||
82 | |||
83 | static struct dca_domain *dca_get_domain(struct device *dev) | ||
84 | { | ||
85 | struct pci_bus *rc; | ||
86 | struct dca_domain *domain; | ||
87 | |||
88 | rc = dca_pci_rc_from_dev(dev); | ||
89 | domain = dca_find_domain(rc); | ||
90 | |||
91 | if (!domain) { | ||
92 | domain = dca_allocate_domain(rc); | ||
93 | if (domain) | ||
94 | list_add(&domain->node, &dca_domains); | ||
95 | } | ||
96 | |||
97 | return domain; | ||
98 | } | ||
99 | |||
100 | static struct dca_provider *dca_find_provider_by_dev(struct device *dev) | ||
101 | { | ||
102 | struct dca_provider *dca; | ||
103 | struct pci_bus *rc; | ||
104 | struct dca_domain *domain; | ||
105 | |||
106 | if (dev) { | ||
107 | rc = dca_pci_rc_from_dev(dev); | ||
108 | domain = dca_find_domain(rc); | ||
109 | if (!domain) | ||
110 | return NULL; | ||
111 | } else { | ||
112 | if (!list_empty(&dca_domains)) | ||
113 | domain = list_first_entry(&dca_domains, | ||
114 | struct dca_domain, | ||
115 | node); | ||
116 | else | ||
117 | return NULL; | ||
50 | } | 118 | } |
51 | 119 | ||
52 | return ret; | 120 | list_for_each_entry(dca, &domain->dca_providers, node) |
121 | if ((!dev) || (dca->ops->dev_managed(dca, dev))) | ||
122 | return dca; | ||
123 | |||
124 | return NULL; | ||
53 | } | 125 | } |
54 | 126 | ||
55 | /** | 127 | /** |
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev) | |||
61 | struct dca_provider *dca; | 133 | struct dca_provider *dca; |
62 | int err, slot = -ENODEV; | 134 | int err, slot = -ENODEV; |
63 | unsigned long flags; | 135 | unsigned long flags; |
136 | struct pci_bus *pci_rc; | ||
137 | struct dca_domain *domain; | ||
64 | 138 | ||
65 | if (!dev) | 139 | if (!dev) |
66 | return -EFAULT; | 140 | return -EFAULT; |
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev) | |||
74 | return -EEXIST; | 148 | return -EEXIST; |
75 | } | 149 | } |
76 | 150 | ||
77 | list_for_each_entry(dca, &dca_providers, node) { | 151 | pci_rc = dca_pci_rc_from_dev(dev); |
152 | domain = dca_find_domain(pci_rc); | ||
153 | if (!domain) { | ||
154 | spin_unlock_irqrestore(&dca_lock, flags); | ||
155 | return -ENODEV; | ||
156 | } | ||
157 | |||
158 | list_for_each_entry(dca, &domain->dca_providers, node) { | ||
78 | slot = dca->ops->add_requester(dca, dev); | 159 | slot = dca->ops->add_requester(dca, dev); |
79 | if (slot >= 0) | 160 | if (slot >= 0) |
80 | break; | 161 | break; |
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) | |||
222 | { | 303 | { |
223 | int err; | 304 | int err; |
224 | unsigned long flags; | 305 | unsigned long flags; |
306 | struct dca_domain *domain; | ||
225 | 307 | ||
226 | err = dca_sysfs_add_provider(dca, dev); | 308 | err = dca_sysfs_add_provider(dca, dev); |
227 | if (err) | 309 | if (err) |
228 | return err; | 310 | return err; |
229 | 311 | ||
230 | spin_lock_irqsave(&dca_lock, flags); | 312 | spin_lock_irqsave(&dca_lock, flags); |
231 | list_add(&dca->node, &dca_providers); | 313 | domain = dca_get_domain(dev); |
314 | if (!domain) { | ||
315 | spin_unlock_irqrestore(&dca_lock, flags); | ||
316 | return -ENODEV; | ||
317 | } | ||
318 | list_add(&dca->node, &domain->dca_providers); | ||
232 | spin_unlock_irqrestore(&dca_lock, flags); | 319 | spin_unlock_irqrestore(&dca_lock, flags); |
233 | 320 | ||
234 | blocking_notifier_call_chain(&dca_provider_chain, | 321 | blocking_notifier_call_chain(&dca_provider_chain, |
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider); | |||
241 | * unregister_dca_provider - remove a dca provider | 328 | * unregister_dca_provider - remove a dca provider |
242 | * @dca - struct created by alloc_dca_provider() | 329 | * @dca - struct created by alloc_dca_provider() |
243 | */ | 330 | */ |
244 | void unregister_dca_provider(struct dca_provider *dca) | 331 | void unregister_dca_provider(struct dca_provider *dca, struct device *dev) |
245 | { | 332 | { |
246 | unsigned long flags; | 333 | unsigned long flags; |
334 | struct pci_bus *pci_rc; | ||
335 | struct dca_domain *domain; | ||
247 | 336 | ||
248 | blocking_notifier_call_chain(&dca_provider_chain, | 337 | blocking_notifier_call_chain(&dca_provider_chain, |
249 | DCA_PROVIDER_REMOVE, NULL); | 338 | DCA_PROVIDER_REMOVE, NULL); |
250 | 339 | ||
251 | spin_lock_irqsave(&dca_lock, flags); | 340 | spin_lock_irqsave(&dca_lock, flags); |
341 | |||
252 | list_del(&dca->node); | 342 | list_del(&dca->node); |
343 | |||
344 | pci_rc = dca_pci_rc_from_dev(dev); | ||
345 | domain = dca_find_domain(pci_rc); | ||
346 | if (list_empty(&domain->dca_providers)) | ||
347 | dca_free_domain(domain); | ||
348 | |||
253 | spin_unlock_irqrestore(&dca_lock, flags); | 349 | spin_unlock_irqrestore(&dca_lock, flags); |
254 | 350 | ||
255 | dca_sysfs_remove_provider(dca); | 351 | dca_sysfs_remove_provider(dca); |
@@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify); | |||
276 | 372 | ||
277 | static int __init dca_init(void) | 373 | static int __init dca_init(void) |
278 | { | 374 | { |
279 | printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); | 375 | pr_info("dca service started, version %s\n", DCA_VERSION); |
280 | return dca_sysfs_init(); | 376 | return dca_sysfs_init(); |
281 | } | 377 | } |
282 | 378 | ||
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 81e1020fb514..5903a88351bf 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig | |||
@@ -17,11 +17,15 @@ if DMADEVICES | |||
17 | 17 | ||
18 | comment "DMA Devices" | 18 | comment "DMA Devices" |
19 | 19 | ||
20 | config ASYNC_TX_DISABLE_CHANNEL_SWITCH | ||
21 | bool | ||
22 | |||
20 | config INTEL_IOATDMA | 23 | config INTEL_IOATDMA |
21 | tristate "Intel I/OAT DMA support" | 24 | tristate "Intel I/OAT DMA support" |
22 | depends on PCI && X86 | 25 | depends on PCI && X86 |
23 | select DMA_ENGINE | 26 | select DMA_ENGINE |
24 | select DCA | 27 | select DCA |
28 | select ASYNC_TX_DISABLE_CHANNEL_SWITCH | ||
25 | help | 29 | help |
26 | Enable support for the Intel(R) I/OAT DMA engine present | 30 | Enable support for the Intel(R) I/OAT DMA engine present |
27 | in recent Intel Xeon chipsets. | 31 | in recent Intel Xeon chipsets. |
@@ -97,6 +101,14 @@ config TXX9_DMAC | |||
97 | Support the TXx9 SoC internal DMA controller. This can be | 101 | Support the TXx9 SoC internal DMA controller. This can be |
98 | integrated in chips such as the Toshiba TX4927/38/39. | 102 | integrated in chips such as the Toshiba TX4927/38/39. |
99 | 103 | ||
104 | config SH_DMAE | ||
105 | tristate "Renesas SuperH DMAC support" | ||
106 | depends on SUPERH && SH_DMA | ||
107 | depends on !SH_DMA_API | ||
108 | select DMA_ENGINE | ||
109 | help | ||
110 | Enable support for the Renesas SuperH DMA controllers. | ||
111 | |||
100 | config DMA_ENGINE | 112 | config DMA_ENGINE |
101 | bool | 113 | bool |
102 | 114 | ||
@@ -116,7 +128,7 @@ config NET_DMA | |||
116 | 128 | ||
117 | config ASYNC_TX_DMA | 129 | config ASYNC_TX_DMA |
118 | bool "Async_tx: Offload support for the async_tx api" | 130 | bool "Async_tx: Offload support for the async_tx api" |
119 | depends on DMA_ENGINE && !HIGHMEM64G | 131 | depends on DMA_ENGINE |
120 | help | 132 | help |
121 | This allows the async_tx api to take advantage of offload engines for | 133 | This allows the async_tx api to take advantage of offload engines for |
122 | memcpy, memset, xor, and raid6 p+q operations. If your platform has | 134 | memcpy, memset, xor, and raid6 p+q operations. If your platform has |
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 40e1e0083571..eca71ba78ae9 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile | |||
@@ -1,8 +1,7 @@ | |||
1 | obj-$(CONFIG_DMA_ENGINE) += dmaengine.o | 1 | obj-$(CONFIG_DMA_ENGINE) += dmaengine.o |
2 | obj-$(CONFIG_NET_DMA) += iovlock.o | 2 | obj-$(CONFIG_NET_DMA) += iovlock.o |
3 | obj-$(CONFIG_DMATEST) += dmatest.o | 3 | obj-$(CONFIG_DMATEST) += dmatest.o |
4 | obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o | 4 | obj-$(CONFIG_INTEL_IOATDMA) += ioat/ |
5 | ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o | ||
6 | obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o | 5 | obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o |
7 | obj-$(CONFIG_FSL_DMA) += fsldma.o | 6 | obj-$(CONFIG_FSL_DMA) += fsldma.o |
8 | obj-$(CONFIG_MV_XOR) += mv_xor.o | 7 | obj-$(CONFIG_MV_XOR) += mv_xor.o |
@@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o | |||
10 | obj-$(CONFIG_AT_HDMAC) += at_hdmac.o | 9 | obj-$(CONFIG_AT_HDMAC) += at_hdmac.o |
11 | obj-$(CONFIG_MX3_IPU) += ipu/ | 10 | obj-$(CONFIG_MX3_IPU) += ipu/ |
12 | obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o | 11 | obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o |
12 | obj-$(CONFIG_SH_DMAE) += shdma.o | ||
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index c8522e6f1ad2..7585c4164bd5 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c | |||
@@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan, | |||
87 | desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); | 87 | desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys); |
88 | if (desc) { | 88 | if (desc) { |
89 | memset(desc, 0, sizeof(struct at_desc)); | 89 | memset(desc, 0, sizeof(struct at_desc)); |
90 | INIT_LIST_HEAD(&desc->tx_list); | ||
90 | dma_async_tx_descriptor_init(&desc->txd, chan); | 91 | dma_async_tx_descriptor_init(&desc->txd, chan); |
91 | /* txd.flags will be overwritten in prep functions */ | 92 | /* txd.flags will be overwritten in prep functions */ |
92 | desc->txd.flags = DMA_CTRL_ACK; | 93 | desc->txd.flags = DMA_CTRL_ACK; |
@@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) | |||
150 | struct at_desc *child; | 151 | struct at_desc *child; |
151 | 152 | ||
152 | spin_lock_bh(&atchan->lock); | 153 | spin_lock_bh(&atchan->lock); |
153 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 154 | list_for_each_entry(child, &desc->tx_list, desc_node) |
154 | dev_vdbg(chan2dev(&atchan->chan_common), | 155 | dev_vdbg(chan2dev(&atchan->chan_common), |
155 | "moving child desc %p to freelist\n", | 156 | "moving child desc %p to freelist\n", |
156 | child); | 157 | child); |
157 | list_splice_init(&desc->txd.tx_list, &atchan->free_list); | 158 | list_splice_init(&desc->tx_list, &atchan->free_list); |
158 | dev_vdbg(chan2dev(&atchan->chan_common), | 159 | dev_vdbg(chan2dev(&atchan->chan_common), |
159 | "moving desc %p to freelist\n", desc); | 160 | "moving desc %p to freelist\n", desc); |
160 | list_add(&desc->desc_node, &atchan->free_list); | 161 | list_add(&desc->desc_node, &atchan->free_list); |
@@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) | |||
247 | param = txd->callback_param; | 248 | param = txd->callback_param; |
248 | 249 | ||
249 | /* move children to free_list */ | 250 | /* move children to free_list */ |
250 | list_splice_init(&txd->tx_list, &atchan->free_list); | 251 | list_splice_init(&desc->tx_list, &atchan->free_list); |
251 | /* move myself to free_list */ | 252 | /* move myself to free_list */ |
252 | list_move(&desc->desc_node, &atchan->free_list); | 253 | list_move(&desc->desc_node, &atchan->free_list); |
253 | 254 | ||
254 | /* unmap dma addresses */ | 255 | /* unmap dma addresses */ |
255 | if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | 256 | if (!atchan->chan_common.private) { |
256 | if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) | 257 | struct device *parent = chan2parent(&atchan->chan_common); |
257 | dma_unmap_single(chan2parent(&atchan->chan_common), | 258 | if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { |
258 | desc->lli.daddr, | 259 | if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) |
259 | desc->len, DMA_FROM_DEVICE); | 260 | dma_unmap_single(parent, |
260 | else | 261 | desc->lli.daddr, |
261 | dma_unmap_page(chan2parent(&atchan->chan_common), | 262 | desc->len, DMA_FROM_DEVICE); |
262 | desc->lli.daddr, | 263 | else |
263 | desc->len, DMA_FROM_DEVICE); | 264 | dma_unmap_page(parent, |
264 | } | 265 | desc->lli.daddr, |
265 | if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | 266 | desc->len, DMA_FROM_DEVICE); |
266 | if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) | 267 | } |
267 | dma_unmap_single(chan2parent(&atchan->chan_common), | 268 | if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { |
268 | desc->lli.saddr, | 269 | if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) |
269 | desc->len, DMA_TO_DEVICE); | 270 | dma_unmap_single(parent, |
270 | else | 271 | desc->lli.saddr, |
271 | dma_unmap_page(chan2parent(&atchan->chan_common), | 272 | desc->len, DMA_TO_DEVICE); |
272 | desc->lli.saddr, | 273 | else |
273 | desc->len, DMA_TO_DEVICE); | 274 | dma_unmap_page(parent, |
275 | desc->lli.saddr, | ||
276 | desc->len, DMA_TO_DEVICE); | ||
277 | } | ||
274 | } | 278 | } |
275 | 279 | ||
276 | /* | 280 | /* |
@@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan) | |||
334 | /* This one is currently in progress */ | 338 | /* This one is currently in progress */ |
335 | return; | 339 | return; |
336 | 340 | ||
337 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 341 | list_for_each_entry(child, &desc->tx_list, desc_node) |
338 | if (!(child->lli.ctrla & ATC_DONE)) | 342 | if (!(child->lli.ctrla & ATC_DONE)) |
339 | /* Currently in progress */ | 343 | /* Currently in progress */ |
340 | return; | 344 | return; |
@@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan) | |||
407 | dev_crit(chan2dev(&atchan->chan_common), | 411 | dev_crit(chan2dev(&atchan->chan_common), |
408 | " cookie: %d\n", bad_desc->txd.cookie); | 412 | " cookie: %d\n", bad_desc->txd.cookie); |
409 | atc_dump_lli(atchan, &bad_desc->lli); | 413 | atc_dump_lli(atchan, &bad_desc->lli); |
410 | list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) | 414 | list_for_each_entry(child, &bad_desc->tx_list, desc_node) |
411 | atc_dump_lli(atchan, &child->lli); | 415 | atc_dump_lli(atchan, &child->lli); |
412 | 416 | ||
413 | /* Pretend the descriptor completed successfully */ | 417 | /* Pretend the descriptor completed successfully */ |
@@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, | |||
587 | prev->lli.dscr = desc->txd.phys; | 591 | prev->lli.dscr = desc->txd.phys; |
588 | /* insert the link descriptor to the LD ring */ | 592 | /* insert the link descriptor to the LD ring */ |
589 | list_add_tail(&desc->desc_node, | 593 | list_add_tail(&desc->desc_node, |
590 | &first->txd.tx_list); | 594 | &first->tx_list); |
591 | } | 595 | } |
592 | prev = desc; | 596 | prev = desc; |
593 | } | 597 | } |
@@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | |||
646 | 650 | ||
647 | reg_width = atslave->reg_width; | 651 | reg_width = atslave->reg_width; |
648 | 652 | ||
649 | sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); | ||
650 | |||
651 | ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; | 653 | ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; |
652 | ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; | 654 | ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; |
653 | 655 | ||
@@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | |||
687 | prev->lli.dscr = desc->txd.phys; | 689 | prev->lli.dscr = desc->txd.phys; |
688 | /* insert the link descriptor to the LD ring */ | 690 | /* insert the link descriptor to the LD ring */ |
689 | list_add_tail(&desc->desc_node, | 691 | list_add_tail(&desc->desc_node, |
690 | &first->txd.tx_list); | 692 | &first->tx_list); |
691 | } | 693 | } |
692 | prev = desc; | 694 | prev = desc; |
693 | total_len += len; | 695 | total_len += len; |
@@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | |||
729 | prev->lli.dscr = desc->txd.phys; | 731 | prev->lli.dscr = desc->txd.phys; |
730 | /* insert the link descriptor to the LD ring */ | 732 | /* insert the link descriptor to the LD ring */ |
731 | list_add_tail(&desc->desc_node, | 733 | list_add_tail(&desc->desc_node, |
732 | &first->txd.tx_list); | 734 | &first->tx_list); |
733 | } | 735 | } |
734 | prev = desc; | 736 | prev = desc; |
735 | total_len += len; | 737 | total_len += len; |
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 4c972afc49ec..495457e3dc4b 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h | |||
@@ -165,6 +165,7 @@ struct at_desc { | |||
165 | struct at_lli lli; | 165 | struct at_lli lli; |
166 | 166 | ||
167 | /* THEN values for driver housekeeping */ | 167 | /* THEN values for driver housekeeping */ |
168 | struct list_head tx_list; | ||
168 | struct dma_async_tx_descriptor txd; | 169 | struct dma_async_tx_descriptor txd; |
169 | struct list_head desc_node; | 170 | struct list_head desc_node; |
170 | size_t len; | 171 | size_t len; |
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 5a87384ea4ff..bd0b248de2cf 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c | |||
@@ -608,6 +608,40 @@ void dmaengine_put(void) | |||
608 | } | 608 | } |
609 | EXPORT_SYMBOL(dmaengine_put); | 609 | EXPORT_SYMBOL(dmaengine_put); |
610 | 610 | ||
611 | static bool device_has_all_tx_types(struct dma_device *device) | ||
612 | { | ||
613 | /* A device that satisfies this test has channels that will never cause | ||
614 | * an async_tx channel switch event as all possible operation types can | ||
615 | * be handled. | ||
616 | */ | ||
617 | #ifdef CONFIG_ASYNC_TX_DMA | ||
618 | if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask)) | ||
619 | return false; | ||
620 | #endif | ||
621 | |||
622 | #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE) | ||
623 | if (!dma_has_cap(DMA_MEMCPY, device->cap_mask)) | ||
624 | return false; | ||
625 | #endif | ||
626 | |||
627 | #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE) | ||
628 | if (!dma_has_cap(DMA_MEMSET, device->cap_mask)) | ||
629 | return false; | ||
630 | #endif | ||
631 | |||
632 | #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE) | ||
633 | if (!dma_has_cap(DMA_XOR, device->cap_mask)) | ||
634 | return false; | ||
635 | #endif | ||
636 | |||
637 | #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE) | ||
638 | if (!dma_has_cap(DMA_PQ, device->cap_mask)) | ||
639 | return false; | ||
640 | #endif | ||
641 | |||
642 | return true; | ||
643 | } | ||
644 | |||
611 | static int get_dma_id(struct dma_device *device) | 645 | static int get_dma_id(struct dma_device *device) |
612 | { | 646 | { |
613 | int rc; | 647 | int rc; |
@@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device) | |||
644 | !device->device_prep_dma_memcpy); | 678 | !device->device_prep_dma_memcpy); |
645 | BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && | 679 | BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && |
646 | !device->device_prep_dma_xor); | 680 | !device->device_prep_dma_xor); |
647 | BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && | 681 | BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && |
648 | !device->device_prep_dma_zero_sum); | 682 | !device->device_prep_dma_xor_val); |
683 | BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) && | ||
684 | !device->device_prep_dma_pq); | ||
685 | BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && | ||
686 | !device->device_prep_dma_pq_val); | ||
649 | BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && | 687 | BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && |
650 | !device->device_prep_dma_memset); | 688 | !device->device_prep_dma_memset); |
651 | BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && | 689 | BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && |
@@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device) | |||
661 | BUG_ON(!device->device_issue_pending); | 699 | BUG_ON(!device->device_issue_pending); |
662 | BUG_ON(!device->dev); | 700 | BUG_ON(!device->dev); |
663 | 701 | ||
702 | /* note: this only matters in the | ||
703 | * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case | ||
704 | */ | ||
705 | if (device_has_all_tx_types(device)) | ||
706 | dma_cap_set(DMA_ASYNC_TX, device->cap_mask); | ||
707 | |||
664 | idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); | 708 | idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL); |
665 | if (!idr_ref) | 709 | if (!idr_ref) |
666 | return -ENOMEM; | 710 | return -ENOMEM; |
@@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, | |||
933 | { | 977 | { |
934 | tx->chan = chan; | 978 | tx->chan = chan; |
935 | spin_lock_init(&tx->lock); | 979 | spin_lock_init(&tx->lock); |
936 | INIT_LIST_HEAD(&tx->tx_list); | ||
937 | } | 980 | } |
938 | EXPORT_SYMBOL(dma_async_tx_descriptor_init); | 981 | EXPORT_SYMBOL(dma_async_tx_descriptor_init); |
939 | 982 | ||
940 | /* dma_wait_for_async_tx - spin wait for a transaction to complete | 983 | /* dma_wait_for_async_tx - spin wait for a transaction to complete |
941 | * @tx: in-flight transaction to wait on | 984 | * @tx: in-flight transaction to wait on |
942 | * | ||
943 | * This routine assumes that tx was obtained from a call to async_memcpy, | ||
944 | * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped | ||
945 | * and submitted). Walking the parent chain is only meant to cover for DMA | ||
946 | * drivers that do not implement the DMA_INTERRUPT capability and may race with | ||
947 | * the driver's descriptor cleanup routine. | ||
948 | */ | 985 | */ |
949 | enum dma_status | 986 | enum dma_status |
950 | dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) | 987 | dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) |
951 | { | 988 | { |
952 | enum dma_status status; | 989 | unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); |
953 | struct dma_async_tx_descriptor *iter; | ||
954 | struct dma_async_tx_descriptor *parent; | ||
955 | 990 | ||
956 | if (!tx) | 991 | if (!tx) |
957 | return DMA_SUCCESS; | 992 | return DMA_SUCCESS; |
958 | 993 | ||
959 | WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for" | 994 | while (tx->cookie == -EBUSY) { |
960 | " %s\n", __func__, dma_chan_name(tx->chan)); | 995 | if (time_after_eq(jiffies, dma_sync_wait_timeout)) { |
961 | 996 | pr_err("%s timeout waiting for descriptor submission\n", | |
962 | /* poll through the dependency chain, return when tx is complete */ | 997 | __func__); |
963 | do { | 998 | return DMA_ERROR; |
964 | iter = tx; | 999 | } |
965 | 1000 | cpu_relax(); | |
966 | /* find the root of the unsubmitted dependency chain */ | 1001 | } |
967 | do { | 1002 | return dma_sync_wait(tx->chan, tx->cookie); |
968 | parent = iter->parent; | ||
969 | if (!parent) | ||
970 | break; | ||
971 | else | ||
972 | iter = parent; | ||
973 | } while (parent); | ||
974 | |||
975 | /* there is a small window for ->parent == NULL and | ||
976 | * ->cookie == -EBUSY | ||
977 | */ | ||
978 | while (iter->cookie == -EBUSY) | ||
979 | cpu_relax(); | ||
980 | |||
981 | status = dma_sync_wait(iter->chan, iter->cookie); | ||
982 | } while (status == DMA_IN_PROGRESS || (iter != tx)); | ||
983 | |||
984 | return status; | ||
985 | } | 1003 | } |
986 | EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); | 1004 | EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); |
987 | 1005 | ||
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index d93017fc7872..a32a4cf7b1e0 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c | |||
@@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO); | |||
48 | MODULE_PARM_DESC(xor_sources, | 48 | MODULE_PARM_DESC(xor_sources, |
49 | "Number of xor source buffers (default: 3)"); | 49 | "Number of xor source buffers (default: 3)"); |
50 | 50 | ||
51 | static unsigned int pq_sources = 3; | ||
52 | module_param(pq_sources, uint, S_IRUGO); | ||
53 | MODULE_PARM_DESC(pq_sources, | ||
54 | "Number of p+q source buffers (default: 3)"); | ||
55 | |||
51 | /* | 56 | /* |
52 | * Initialization patterns. All bytes in the source buffer has bit 7 | 57 | * Initialization patterns. All bytes in the source buffer has bit 7 |
53 | * set, all bytes in the destination buffer has bit 7 cleared. | 58 | * set, all bytes in the destination buffer has bit 7 cleared. |
@@ -232,6 +237,7 @@ static int dmatest_func(void *data) | |||
232 | dma_cookie_t cookie; | 237 | dma_cookie_t cookie; |
233 | enum dma_status status; | 238 | enum dma_status status; |
234 | enum dma_ctrl_flags flags; | 239 | enum dma_ctrl_flags flags; |
240 | u8 pq_coefs[pq_sources]; | ||
235 | int ret; | 241 | int ret; |
236 | int src_cnt; | 242 | int src_cnt; |
237 | int dst_cnt; | 243 | int dst_cnt; |
@@ -248,6 +254,11 @@ static int dmatest_func(void *data) | |||
248 | else if (thread->type == DMA_XOR) { | 254 | else if (thread->type == DMA_XOR) { |
249 | src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ | 255 | src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ |
250 | dst_cnt = 1; | 256 | dst_cnt = 1; |
257 | } else if (thread->type == DMA_PQ) { | ||
258 | src_cnt = pq_sources | 1; /* force odd to ensure dst = src */ | ||
259 | dst_cnt = 2; | ||
260 | for (i = 0; i < pq_sources; i++) | ||
261 | pq_coefs[i] = 1; | ||
251 | } else | 262 | } else |
252 | goto err_srcs; | 263 | goto err_srcs; |
253 | 264 | ||
@@ -283,6 +294,7 @@ static int dmatest_func(void *data) | |||
283 | dma_addr_t dma_dsts[dst_cnt]; | 294 | dma_addr_t dma_dsts[dst_cnt]; |
284 | struct completion cmp; | 295 | struct completion cmp; |
285 | unsigned long tmo = msecs_to_jiffies(3000); | 296 | unsigned long tmo = msecs_to_jiffies(3000); |
297 | u8 align = 0; | ||
286 | 298 | ||
287 | total_tests++; | 299 | total_tests++; |
288 | 300 | ||
@@ -290,6 +302,18 @@ static int dmatest_func(void *data) | |||
290 | src_off = dmatest_random() % (test_buf_size - len + 1); | 302 | src_off = dmatest_random() % (test_buf_size - len + 1); |
291 | dst_off = dmatest_random() % (test_buf_size - len + 1); | 303 | dst_off = dmatest_random() % (test_buf_size - len + 1); |
292 | 304 | ||
305 | /* honor alignment restrictions */ | ||
306 | if (thread->type == DMA_MEMCPY) | ||
307 | align = dev->copy_align; | ||
308 | else if (thread->type == DMA_XOR) | ||
309 | align = dev->xor_align; | ||
310 | else if (thread->type == DMA_PQ) | ||
311 | align = dev->pq_align; | ||
312 | |||
313 | len = (len >> align) << align; | ||
314 | src_off = (src_off >> align) << align; | ||
315 | dst_off = (dst_off >> align) << align; | ||
316 | |||
293 | dmatest_init_srcs(thread->srcs, src_off, len); | 317 | dmatest_init_srcs(thread->srcs, src_off, len); |
294 | dmatest_init_dsts(thread->dsts, dst_off, len); | 318 | dmatest_init_dsts(thread->dsts, dst_off, len); |
295 | 319 | ||
@@ -306,6 +330,7 @@ static int dmatest_func(void *data) | |||
306 | DMA_BIDIRECTIONAL); | 330 | DMA_BIDIRECTIONAL); |
307 | } | 331 | } |
308 | 332 | ||
333 | |||
309 | if (thread->type == DMA_MEMCPY) | 334 | if (thread->type == DMA_MEMCPY) |
310 | tx = dev->device_prep_dma_memcpy(chan, | 335 | tx = dev->device_prep_dma_memcpy(chan, |
311 | dma_dsts[0] + dst_off, | 336 | dma_dsts[0] + dst_off, |
@@ -316,6 +341,15 @@ static int dmatest_func(void *data) | |||
316 | dma_dsts[0] + dst_off, | 341 | dma_dsts[0] + dst_off, |
317 | dma_srcs, xor_sources, | 342 | dma_srcs, xor_sources, |
318 | len, flags); | 343 | len, flags); |
344 | else if (thread->type == DMA_PQ) { | ||
345 | dma_addr_t dma_pq[dst_cnt]; | ||
346 | |||
347 | for (i = 0; i < dst_cnt; i++) | ||
348 | dma_pq[i] = dma_dsts[i] + dst_off; | ||
349 | tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, | ||
350 | pq_sources, pq_coefs, | ||
351 | len, flags); | ||
352 | } | ||
319 | 353 | ||
320 | if (!tx) { | 354 | if (!tx) { |
321 | for (i = 0; i < src_cnt; i++) | 355 | for (i = 0; i < src_cnt; i++) |
@@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty | |||
459 | op = "copy"; | 493 | op = "copy"; |
460 | else if (type == DMA_XOR) | 494 | else if (type == DMA_XOR) |
461 | op = "xor"; | 495 | op = "xor"; |
496 | else if (type == DMA_PQ) | ||
497 | op = "pq"; | ||
462 | else | 498 | else |
463 | return -EINVAL; | 499 | return -EINVAL; |
464 | 500 | ||
@@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan) | |||
514 | cnt = dmatest_add_threads(dtc, DMA_XOR); | 550 | cnt = dmatest_add_threads(dtc, DMA_XOR); |
515 | thread_count += cnt > 0 ? cnt : 0; | 551 | thread_count += cnt > 0 ? cnt : 0; |
516 | } | 552 | } |
553 | if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { | ||
554 | cnt = dmatest_add_threads(dtc, DMA_PQ); | ||
555 | thread_count += cnt > 0 ?: 0; | ||
556 | } | ||
517 | 557 | ||
518 | pr_info("dmatest: Started %u threads using %s\n", | 558 | pr_info("dmatest: Started %u threads using %s\n", |
519 | thread_count, dma_chan_name(chan)); | 559 | thread_count, dma_chan_name(chan)); |
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 933c143b6a74..2eea823516a7 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c | |||
@@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) | |||
116 | { | 116 | { |
117 | struct dw_desc *child; | 117 | struct dw_desc *child; |
118 | 118 | ||
119 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 119 | list_for_each_entry(child, &desc->tx_list, desc_node) |
120 | dma_sync_single_for_cpu(chan2parent(&dwc->chan), | 120 | dma_sync_single_for_cpu(chan2parent(&dwc->chan), |
121 | child->txd.phys, sizeof(child->lli), | 121 | child->txd.phys, sizeof(child->lli), |
122 | DMA_TO_DEVICE); | 122 | DMA_TO_DEVICE); |
@@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) | |||
137 | dwc_sync_desc_for_cpu(dwc, desc); | 137 | dwc_sync_desc_for_cpu(dwc, desc); |
138 | 138 | ||
139 | spin_lock_bh(&dwc->lock); | 139 | spin_lock_bh(&dwc->lock); |
140 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 140 | list_for_each_entry(child, &desc->tx_list, desc_node) |
141 | dev_vdbg(chan2dev(&dwc->chan), | 141 | dev_vdbg(chan2dev(&dwc->chan), |
142 | "moving child desc %p to freelist\n", | 142 | "moving child desc %p to freelist\n", |
143 | child); | 143 | child); |
144 | list_splice_init(&desc->txd.tx_list, &dwc->free_list); | 144 | list_splice_init(&desc->tx_list, &dwc->free_list); |
145 | dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); | 145 | dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); |
146 | list_add(&desc->desc_node, &dwc->free_list); | 146 | list_add(&desc->desc_node, &dwc->free_list); |
147 | spin_unlock_bh(&dwc->lock); | 147 | spin_unlock_bh(&dwc->lock); |
@@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) | |||
209 | param = txd->callback_param; | 209 | param = txd->callback_param; |
210 | 210 | ||
211 | dwc_sync_desc_for_cpu(dwc, desc); | 211 | dwc_sync_desc_for_cpu(dwc, desc); |
212 | list_splice_init(&txd->tx_list, &dwc->free_list); | 212 | list_splice_init(&desc->tx_list, &dwc->free_list); |
213 | list_move(&desc->desc_node, &dwc->free_list); | 213 | list_move(&desc->desc_node, &dwc->free_list); |
214 | 214 | ||
215 | /* | 215 | if (!dwc->chan.private) { |
216 | * We use dma_unmap_page() regardless of how the buffers were | 216 | struct device *parent = chan2parent(&dwc->chan); |
217 | * mapped before they were submitted... | 217 | if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { |
218 | */ | 218 | if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) |
219 | if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) | 219 | dma_unmap_single(parent, desc->lli.dar, |
220 | dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar, | 220 | desc->len, DMA_FROM_DEVICE); |
221 | desc->len, DMA_FROM_DEVICE); | 221 | else |
222 | if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) | 222 | dma_unmap_page(parent, desc->lli.dar, |
223 | dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar, | 223 | desc->len, DMA_FROM_DEVICE); |
224 | desc->len, DMA_TO_DEVICE); | 224 | } |
225 | if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
226 | if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) | ||
227 | dma_unmap_single(parent, desc->lli.sar, | ||
228 | desc->len, DMA_TO_DEVICE); | ||
229 | else | ||
230 | dma_unmap_page(parent, desc->lli.sar, | ||
231 | desc->len, DMA_TO_DEVICE); | ||
232 | } | ||
233 | } | ||
225 | 234 | ||
226 | /* | 235 | /* |
227 | * The API requires that no submissions are done from a | 236 | * The API requires that no submissions are done from a |
@@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) | |||
289 | /* This one is currently in progress */ | 298 | /* This one is currently in progress */ |
290 | return; | 299 | return; |
291 | 300 | ||
292 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 301 | list_for_each_entry(child, &desc->tx_list, desc_node) |
293 | if (child->lli.llp == llp) | 302 | if (child->lli.llp == llp) |
294 | /* Currently in progress */ | 303 | /* Currently in progress */ |
295 | return; | 304 | return; |
@@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) | |||
356 | dev_printk(KERN_CRIT, chan2dev(&dwc->chan), | 365 | dev_printk(KERN_CRIT, chan2dev(&dwc->chan), |
357 | " cookie: %d\n", bad_desc->txd.cookie); | 366 | " cookie: %d\n", bad_desc->txd.cookie); |
358 | dwc_dump_lli(dwc, &bad_desc->lli); | 367 | dwc_dump_lli(dwc, &bad_desc->lli); |
359 | list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) | 368 | list_for_each_entry(child, &bad_desc->tx_list, desc_node) |
360 | dwc_dump_lli(dwc, &child->lli); | 369 | dwc_dump_lli(dwc, &child->lli); |
361 | 370 | ||
362 | /* Pretend the descriptor completed successfully */ | 371 | /* Pretend the descriptor completed successfully */ |
@@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, | |||
608 | prev->txd.phys, sizeof(prev->lli), | 617 | prev->txd.phys, sizeof(prev->lli), |
609 | DMA_TO_DEVICE); | 618 | DMA_TO_DEVICE); |
610 | list_add_tail(&desc->desc_node, | 619 | list_add_tail(&desc->desc_node, |
611 | &first->txd.tx_list); | 620 | &first->tx_list); |
612 | } | 621 | } |
613 | prev = desc; | 622 | prev = desc; |
614 | } | 623 | } |
@@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | |||
658 | reg_width = dws->reg_width; | 667 | reg_width = dws->reg_width; |
659 | prev = first = NULL; | 668 | prev = first = NULL; |
660 | 669 | ||
661 | sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction); | ||
662 | |||
663 | switch (direction) { | 670 | switch (direction) { |
664 | case DMA_TO_DEVICE: | 671 | case DMA_TO_DEVICE: |
665 | ctllo = (DWC_DEFAULT_CTLLO | 672 | ctllo = (DWC_DEFAULT_CTLLO |
@@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | |||
700 | sizeof(prev->lli), | 707 | sizeof(prev->lli), |
701 | DMA_TO_DEVICE); | 708 | DMA_TO_DEVICE); |
702 | list_add_tail(&desc->desc_node, | 709 | list_add_tail(&desc->desc_node, |
703 | &first->txd.tx_list); | 710 | &first->tx_list); |
704 | } | 711 | } |
705 | prev = desc; | 712 | prev = desc; |
706 | total_len += len; | 713 | total_len += len; |
@@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | |||
746 | sizeof(prev->lli), | 753 | sizeof(prev->lli), |
747 | DMA_TO_DEVICE); | 754 | DMA_TO_DEVICE); |
748 | list_add_tail(&desc->desc_node, | 755 | list_add_tail(&desc->desc_node, |
749 | &first->txd.tx_list); | 756 | &first->tx_list); |
750 | } | 757 | } |
751 | prev = desc; | 758 | prev = desc; |
752 | total_len += len; | 759 | total_len += len; |
@@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) | |||
902 | break; | 909 | break; |
903 | } | 910 | } |
904 | 911 | ||
912 | INIT_LIST_HEAD(&desc->tx_list); | ||
905 | dma_async_tx_descriptor_init(&desc->txd, chan); | 913 | dma_async_tx_descriptor_init(&desc->txd, chan); |
906 | desc->txd.tx_submit = dwc_tx_submit; | 914 | desc->txd.tx_submit = dwc_tx_submit; |
907 | desc->txd.flags = DMA_CTRL_ACK; | 915 | desc->txd.flags = DMA_CTRL_ACK; |
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 13a580767031..d9a939f67f46 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h | |||
@@ -217,6 +217,7 @@ struct dw_desc { | |||
217 | 217 | ||
218 | /* THEN values for driver housekeeping */ | 218 | /* THEN values for driver housekeeping */ |
219 | struct list_head desc_node; | 219 | struct list_head desc_node; |
220 | struct list_head tx_list; | ||
220 | struct dma_async_tx_descriptor txd; | 221 | struct dma_async_tx_descriptor txd; |
221 | size_t len; | 222 | size_t len; |
222 | }; | 223 | }; |
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index ef87a8984145..296f9e747fac 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/dmapool.h> | 34 | #include <linux/dmapool.h> |
35 | #include <linux/of_platform.h> | 35 | #include <linux/of_platform.h> |
36 | 36 | ||
37 | #include <asm/fsldma.h> | ||
37 | #include "fsldma.h" | 38 | #include "fsldma.h" |
38 | 39 | ||
39 | static void dma_init(struct fsl_dma_chan *fsl_chan) | 40 | static void dma_init(struct fsl_dma_chan *fsl_chan) |
@@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size) | |||
280 | } | 281 | } |
281 | 282 | ||
282 | /** | 283 | /** |
283 | * fsl_chan_toggle_ext_pause - Toggle channel external pause status | 284 | * fsl_chan_set_request_count - Set DMA Request Count for external control |
284 | * @fsl_chan : Freescale DMA channel | 285 | * @fsl_chan : Freescale DMA channel |
285 | * @size : Pause control size, 0 for disable external pause control. | 286 | * @size : Number of bytes to transfer in a single request |
286 | * The maximum is 1024. | 287 | * |
288 | * The Freescale DMA channel can be controlled by the external signal DREQ#. | ||
289 | * The DMA request count is how many bytes are allowed to transfer before | ||
290 | * pausing the channel, after which a new assertion of DREQ# resumes channel | ||
291 | * operation. | ||
287 | * | 292 | * |
288 | * The Freescale DMA channel can be controlled by the external | 293 | * A size of 0 disables external pause control. The maximum size is 1024. |
289 | * signal DREQ#. The pause control size is how many bytes are allowed | ||
290 | * to transfer before pausing the channel, after which a new assertion | ||
291 | * of DREQ# resumes channel operation. | ||
292 | */ | 294 | */ |
293 | static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size) | 295 | static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size) |
294 | { | 296 | { |
295 | if (size > 1024) | 297 | BUG_ON(size > 1024); |
296 | return; | 298 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, |
299 | DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | ||
300 | | ((__ilog2(size) << 24) & 0x0f000000), | ||
301 | 32); | ||
302 | } | ||
297 | 303 | ||
298 | if (size) { | 304 | /** |
299 | DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, | 305 | * fsl_chan_toggle_ext_pause - Toggle channel external pause status |
300 | DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | 306 | * @fsl_chan : Freescale DMA channel |
301 | | ((__ilog2(size) << 24) & 0x0f000000), | 307 | * @enable : 0 is disabled, 1 is enabled. |
302 | 32); | 308 | * |
309 | * The Freescale DMA channel can be controlled by the external signal DREQ#. | ||
310 | * The DMA Request Count feature should be used in addition to this feature | ||
311 | * to set the number of bytes to transfer before pausing the channel. | ||
312 | */ | ||
313 | static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable) | ||
314 | { | ||
315 | if (enable) | ||
303 | fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; | 316 | fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; |
304 | } else | 317 | else |
305 | fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; | 318 | fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; |
306 | } | 319 | } |
307 | 320 | ||
@@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable) | |||
326 | static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) | 339 | static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) |
327 | { | 340 | { |
328 | struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); | 341 | struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); |
329 | struct fsl_desc_sw *desc; | 342 | struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); |
343 | struct fsl_desc_sw *child; | ||
330 | unsigned long flags; | 344 | unsigned long flags; |
331 | dma_cookie_t cookie; | 345 | dma_cookie_t cookie; |
332 | 346 | ||
@@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) | |||
334 | spin_lock_irqsave(&fsl_chan->desc_lock, flags); | 348 | spin_lock_irqsave(&fsl_chan->desc_lock, flags); |
335 | 349 | ||
336 | cookie = fsl_chan->common.cookie; | 350 | cookie = fsl_chan->common.cookie; |
337 | list_for_each_entry(desc, &tx->tx_list, node) { | 351 | list_for_each_entry(child, &desc->tx_list, node) { |
338 | cookie++; | 352 | cookie++; |
339 | if (cookie < 0) | 353 | if (cookie < 0) |
340 | cookie = 1; | 354 | cookie = 1; |
@@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) | |||
343 | } | 357 | } |
344 | 358 | ||
345 | fsl_chan->common.cookie = cookie; | 359 | fsl_chan->common.cookie = cookie; |
346 | append_ld_queue(fsl_chan, tx_to_fsl_desc(tx)); | 360 | append_ld_queue(fsl_chan, desc); |
347 | list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev); | 361 | list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev); |
348 | 362 | ||
349 | spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); | 363 | spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); |
350 | 364 | ||
@@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor( | |||
366 | desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); | 380 | desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); |
367 | if (desc_sw) { | 381 | if (desc_sw) { |
368 | memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); | 382 | memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); |
383 | INIT_LIST_HEAD(&desc_sw->tx_list); | ||
369 | dma_async_tx_descriptor_init(&desc_sw->async_tx, | 384 | dma_async_tx_descriptor_init(&desc_sw->async_tx, |
370 | &fsl_chan->common); | 385 | &fsl_chan->common); |
371 | desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; | 386 | desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; |
@@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags) | |||
455 | new->async_tx.flags = flags; | 470 | new->async_tx.flags = flags; |
456 | 471 | ||
457 | /* Insert the link descriptor to the LD ring */ | 472 | /* Insert the link descriptor to the LD ring */ |
458 | list_add_tail(&new->node, &new->async_tx.tx_list); | 473 | list_add_tail(&new->node, &new->tx_list); |
459 | 474 | ||
460 | /* Set End-of-link to the last link descriptor of new list*/ | 475 | /* Set End-of-link to the last link descriptor of new list*/ |
461 | set_ld_eol(fsl_chan, new); | 476 | set_ld_eol(fsl_chan, new); |
@@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( | |||
513 | dma_dest += copy; | 528 | dma_dest += copy; |
514 | 529 | ||
515 | /* Insert the link descriptor to the LD ring */ | 530 | /* Insert the link descriptor to the LD ring */ |
516 | list_add_tail(&new->node, &first->async_tx.tx_list); | 531 | list_add_tail(&new->node, &first->tx_list); |
517 | } while (len); | 532 | } while (len); |
518 | 533 | ||
519 | new->async_tx.flags = flags; /* client is in control of this ack */ | 534 | new->async_tx.flags = flags; /* client is in control of this ack */ |
@@ -528,7 +543,7 @@ fail: | |||
528 | if (!first) | 543 | if (!first) |
529 | return NULL; | 544 | return NULL; |
530 | 545 | ||
531 | list = &first->async_tx.tx_list; | 546 | list = &first->tx_list; |
532 | list_for_each_entry_safe_reverse(new, prev, list, node) { | 547 | list_for_each_entry_safe_reverse(new, prev, list, node) { |
533 | list_del(&new->node); | 548 | list_del(&new->node); |
534 | dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); | 549 | dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); |
@@ -538,6 +553,229 @@ fail: | |||
538 | } | 553 | } |
539 | 554 | ||
540 | /** | 555 | /** |
556 | * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction | ||
557 | * @chan: DMA channel | ||
558 | * @sgl: scatterlist to transfer to/from | ||
559 | * @sg_len: number of entries in @scatterlist | ||
560 | * @direction: DMA direction | ||
561 | * @flags: DMAEngine flags | ||
562 | * | ||
563 | * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the | ||
564 | * DMA_SLAVE API, this gets the device-specific information from the | ||
565 | * chan->private variable. | ||
566 | */ | ||
567 | static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg( | ||
568 | struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, | ||
569 | enum dma_data_direction direction, unsigned long flags) | ||
570 | { | ||
571 | struct fsl_dma_chan *fsl_chan; | ||
572 | struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL; | ||
573 | struct fsl_dma_slave *slave; | ||
574 | struct list_head *tx_list; | ||
575 | size_t copy; | ||
576 | |||
577 | int i; | ||
578 | struct scatterlist *sg; | ||
579 | size_t sg_used; | ||
580 | size_t hw_used; | ||
581 | struct fsl_dma_hw_addr *hw; | ||
582 | dma_addr_t dma_dst, dma_src; | ||
583 | |||
584 | if (!chan) | ||
585 | return NULL; | ||
586 | |||
587 | if (!chan->private) | ||
588 | return NULL; | ||
589 | |||
590 | fsl_chan = to_fsl_chan(chan); | ||
591 | slave = chan->private; | ||
592 | |||
593 | if (list_empty(&slave->addresses)) | ||
594 | return NULL; | ||
595 | |||
596 | hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry); | ||
597 | hw_used = 0; | ||
598 | |||
599 | /* | ||
600 | * Build the hardware transaction to copy from the scatterlist to | ||
601 | * the hardware, or from the hardware to the scatterlist | ||
602 | * | ||
603 | * If you are copying from the hardware to the scatterlist and it | ||
604 | * takes two hardware entries to fill an entire page, then both | ||
605 | * hardware entries will be coalesced into the same page | ||
606 | * | ||
607 | * If you are copying from the scatterlist to the hardware and a | ||
608 | * single page can fill two hardware entries, then the data will | ||
609 | * be read out of the page into the first hardware entry, and so on | ||
610 | */ | ||
611 | for_each_sg(sgl, sg, sg_len, i) { | ||
612 | sg_used = 0; | ||
613 | |||
614 | /* Loop until the entire scatterlist entry is used */ | ||
615 | while (sg_used < sg_dma_len(sg)) { | ||
616 | |||
617 | /* | ||
618 | * If we've used up the current hardware address/length | ||
619 | * pair, we need to load a new one | ||
620 | * | ||
621 | * This is done in a while loop so that descriptors with | ||
622 | * length == 0 will be skipped | ||
623 | */ | ||
624 | while (hw_used >= hw->length) { | ||
625 | |||
626 | /* | ||
627 | * If the current hardware entry is the last | ||
628 | * entry in the list, we're finished | ||
629 | */ | ||
630 | if (list_is_last(&hw->entry, &slave->addresses)) | ||
631 | goto finished; | ||
632 | |||
633 | /* Get the next hardware address/length pair */ | ||
634 | hw = list_entry(hw->entry.next, | ||
635 | struct fsl_dma_hw_addr, entry); | ||
636 | hw_used = 0; | ||
637 | } | ||
638 | |||
639 | /* Allocate the link descriptor from DMA pool */ | ||
640 | new = fsl_dma_alloc_descriptor(fsl_chan); | ||
641 | if (!new) { | ||
642 | dev_err(fsl_chan->dev, "No free memory for " | ||
643 | "link descriptor\n"); | ||
644 | goto fail; | ||
645 | } | ||
646 | #ifdef FSL_DMA_LD_DEBUG | ||
647 | dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new); | ||
648 | #endif | ||
649 | |||
650 | /* | ||
651 | * Calculate the maximum number of bytes to transfer, | ||
652 | * making sure it is less than the DMA controller limit | ||
653 | */ | ||
654 | copy = min_t(size_t, sg_dma_len(sg) - sg_used, | ||
655 | hw->length - hw_used); | ||
656 | copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT); | ||
657 | |||
658 | /* | ||
659 | * DMA_FROM_DEVICE | ||
660 | * from the hardware to the scatterlist | ||
661 | * | ||
662 | * DMA_TO_DEVICE | ||
663 | * from the scatterlist to the hardware | ||
664 | */ | ||
665 | if (direction == DMA_FROM_DEVICE) { | ||
666 | dma_src = hw->address + hw_used; | ||
667 | dma_dst = sg_dma_address(sg) + sg_used; | ||
668 | } else { | ||
669 | dma_src = sg_dma_address(sg) + sg_used; | ||
670 | dma_dst = hw->address + hw_used; | ||
671 | } | ||
672 | |||
673 | /* Fill in the descriptor */ | ||
674 | set_desc_cnt(fsl_chan, &new->hw, copy); | ||
675 | set_desc_src(fsl_chan, &new->hw, dma_src); | ||
676 | set_desc_dest(fsl_chan, &new->hw, dma_dst); | ||
677 | |||
678 | /* | ||
679 | * If this is not the first descriptor, chain the | ||
680 | * current descriptor after the previous descriptor | ||
681 | */ | ||
682 | if (!first) { | ||
683 | first = new; | ||
684 | } else { | ||
685 | set_desc_next(fsl_chan, &prev->hw, | ||
686 | new->async_tx.phys); | ||
687 | } | ||
688 | |||
689 | new->async_tx.cookie = 0; | ||
690 | async_tx_ack(&new->async_tx); | ||
691 | |||
692 | prev = new; | ||
693 | sg_used += copy; | ||
694 | hw_used += copy; | ||
695 | |||
696 | /* Insert the link descriptor into the LD ring */ | ||
697 | list_add_tail(&new->node, &first->tx_list); | ||
698 | } | ||
699 | } | ||
700 | |||
701 | finished: | ||
702 | |||
703 | /* All of the hardware address/length pairs had length == 0 */ | ||
704 | if (!first || !new) | ||
705 | return NULL; | ||
706 | |||
707 | new->async_tx.flags = flags; | ||
708 | new->async_tx.cookie = -EBUSY; | ||
709 | |||
710 | /* Set End-of-link to the last link descriptor of new list */ | ||
711 | set_ld_eol(fsl_chan, new); | ||
712 | |||
713 | /* Enable extra controller features */ | ||
714 | if (fsl_chan->set_src_loop_size) | ||
715 | fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size); | ||
716 | |||
717 | if (fsl_chan->set_dest_loop_size) | ||
718 | fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size); | ||
719 | |||
720 | if (fsl_chan->toggle_ext_start) | ||
721 | fsl_chan->toggle_ext_start(fsl_chan, slave->external_start); | ||
722 | |||
723 | if (fsl_chan->toggle_ext_pause) | ||
724 | fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause); | ||
725 | |||
726 | if (fsl_chan->set_request_count) | ||
727 | fsl_chan->set_request_count(fsl_chan, slave->request_count); | ||
728 | |||
729 | return &first->async_tx; | ||
730 | |||
731 | fail: | ||
732 | /* If first was not set, then we failed to allocate the very first | ||
733 | * descriptor, and we're done */ | ||
734 | if (!first) | ||
735 | return NULL; | ||
736 | |||
737 | /* | ||
738 | * First is set, so all of the descriptors we allocated have been added | ||
739 | * to first->tx_list, INCLUDING "first" itself. Therefore we | ||
740 | * must traverse the list backwards freeing each descriptor in turn | ||
741 | * | ||
742 | * We're re-using variables for the loop, oh well | ||
743 | */ | ||
744 | tx_list = &first->tx_list; | ||
745 | list_for_each_entry_safe_reverse(new, prev, tx_list, node) { | ||
746 | list_del_init(&new->node); | ||
747 | dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys); | ||
748 | } | ||
749 | |||
750 | return NULL; | ||
751 | } | ||
752 | |||
753 | static void fsl_dma_device_terminate_all(struct dma_chan *chan) | ||
754 | { | ||
755 | struct fsl_dma_chan *fsl_chan; | ||
756 | struct fsl_desc_sw *desc, *tmp; | ||
757 | unsigned long flags; | ||
758 | |||
759 | if (!chan) | ||
760 | return; | ||
761 | |||
762 | fsl_chan = to_fsl_chan(chan); | ||
763 | |||
764 | /* Halt the DMA engine */ | ||
765 | dma_halt(fsl_chan); | ||
766 | |||
767 | spin_lock_irqsave(&fsl_chan->desc_lock, flags); | ||
768 | |||
769 | /* Remove and free all of the descriptors in the LD queue */ | ||
770 | list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) { | ||
771 | list_del(&desc->node); | ||
772 | dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys); | ||
773 | } | ||
774 | |||
775 | spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); | ||
776 | } | ||
777 | |||
778 | /** | ||
541 | * fsl_dma_update_completed_cookie - Update the completed cookie. | 779 | * fsl_dma_update_completed_cookie - Update the completed cookie. |
542 | * @fsl_chan : Freescale DMA channel | 780 | * @fsl_chan : Freescale DMA channel |
543 | */ | 781 | */ |
@@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, | |||
883 | new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; | 1121 | new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; |
884 | new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; | 1122 | new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; |
885 | new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; | 1123 | new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; |
1124 | new_fsl_chan->set_request_count = fsl_chan_set_request_count; | ||
886 | } | 1125 | } |
887 | 1126 | ||
888 | spin_lock_init(&new_fsl_chan->desc_lock); | 1127 | spin_lock_init(&new_fsl_chan->desc_lock); |
@@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, | |||
962 | 1201 | ||
963 | dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); | 1202 | dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); |
964 | dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); | 1203 | dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); |
1204 | dma_cap_set(DMA_SLAVE, fdev->common.cap_mask); | ||
965 | fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; | 1205 | fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; |
966 | fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; | 1206 | fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; |
967 | fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; | 1207 | fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; |
968 | fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; | 1208 | fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; |
969 | fdev->common.device_is_tx_complete = fsl_dma_is_complete; | 1209 | fdev->common.device_is_tx_complete = fsl_dma_is_complete; |
970 | fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; | 1210 | fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; |
1211 | fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg; | ||
1212 | fdev->common.device_terminate_all = fsl_dma_device_terminate_all; | ||
971 | fdev->common.dev = &dev->dev; | 1213 | fdev->common.dev = &dev->dev; |
972 | 1214 | ||
973 | fdev->irq = irq_of_parse_and_map(dev->node, 0); | 1215 | fdev->irq = irq_of_parse_and_map(dev->node, 0); |
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index dc7f26865797..0df14cbb8ca3 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h | |||
@@ -90,6 +90,7 @@ struct fsl_dma_ld_hw { | |||
90 | struct fsl_desc_sw { | 90 | struct fsl_desc_sw { |
91 | struct fsl_dma_ld_hw hw; | 91 | struct fsl_dma_ld_hw hw; |
92 | struct list_head node; | 92 | struct list_head node; |
93 | struct list_head tx_list; | ||
93 | struct dma_async_tx_descriptor async_tx; | 94 | struct dma_async_tx_descriptor async_tx; |
94 | struct list_head *ld; | 95 | struct list_head *ld; |
95 | void *priv; | 96 | void *priv; |
@@ -143,10 +144,11 @@ struct fsl_dma_chan { | |||
143 | struct tasklet_struct tasklet; | 144 | struct tasklet_struct tasklet; |
144 | u32 feature; | 145 | u32 feature; |
145 | 146 | ||
146 | void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size); | 147 | void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable); |
147 | void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); | 148 | void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); |
148 | void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); | 149 | void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); |
149 | void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); | 150 | void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); |
151 | void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size); | ||
150 | }; | 152 | }; |
151 | 153 | ||
152 | #define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) | 154 | #define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) |
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c deleted file mode 100644 index 2225bb6ba3d1..000000000000 --- a/drivers/dma/ioat.c +++ /dev/null | |||
@@ -1,202 +0,0 @@ | |||
1 | /* | ||
2 | * Intel I/OAT DMA Linux driver | ||
3 | * Copyright(c) 2007 - 2009 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in | ||
19 | * the file called "COPYING". | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * This driver supports an Intel I/OAT DMA engine, which does asynchronous | ||
25 | * copy operations. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/pci.h> | ||
31 | #include <linux/interrupt.h> | ||
32 | #include <linux/dca.h> | ||
33 | #include "ioatdma.h" | ||
34 | #include "ioatdma_registers.h" | ||
35 | #include "ioatdma_hw.h" | ||
36 | |||
37 | MODULE_VERSION(IOAT_DMA_VERSION); | ||
38 | MODULE_LICENSE("GPL"); | ||
39 | MODULE_AUTHOR("Intel Corporation"); | ||
40 | |||
41 | static struct pci_device_id ioat_pci_tbl[] = { | ||
42 | /* I/OAT v1 platforms */ | ||
43 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, | ||
44 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, | ||
45 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, | ||
46 | { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, | ||
47 | |||
48 | /* I/OAT v2 platforms */ | ||
49 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, | ||
50 | |||
51 | /* I/OAT v3 platforms */ | ||
52 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, | ||
53 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, | ||
54 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, | ||
55 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, | ||
56 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, | ||
57 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, | ||
58 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, | ||
59 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, | ||
60 | { 0, } | ||
61 | }; | ||
62 | |||
63 | struct ioat_device { | ||
64 | struct pci_dev *pdev; | ||
65 | void __iomem *iobase; | ||
66 | struct ioatdma_device *dma; | ||
67 | struct dca_provider *dca; | ||
68 | }; | ||
69 | |||
70 | static int __devinit ioat_probe(struct pci_dev *pdev, | ||
71 | const struct pci_device_id *id); | ||
72 | static void __devexit ioat_remove(struct pci_dev *pdev); | ||
73 | |||
74 | static int ioat_dca_enabled = 1; | ||
75 | module_param(ioat_dca_enabled, int, 0644); | ||
76 | MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); | ||
77 | |||
78 | static struct pci_driver ioat_pci_driver = { | ||
79 | .name = "ioatdma", | ||
80 | .id_table = ioat_pci_tbl, | ||
81 | .probe = ioat_probe, | ||
82 | .remove = __devexit_p(ioat_remove), | ||
83 | }; | ||
84 | |||
85 | static int __devinit ioat_probe(struct pci_dev *pdev, | ||
86 | const struct pci_device_id *id) | ||
87 | { | ||
88 | void __iomem *iobase; | ||
89 | struct ioat_device *device; | ||
90 | unsigned long mmio_start, mmio_len; | ||
91 | int err; | ||
92 | |||
93 | err = pci_enable_device(pdev); | ||
94 | if (err) | ||
95 | goto err_enable_device; | ||
96 | |||
97 | err = pci_request_regions(pdev, ioat_pci_driver.name); | ||
98 | if (err) | ||
99 | goto err_request_regions; | ||
100 | |||
101 | err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
102 | if (err) | ||
103 | err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); | ||
104 | if (err) | ||
105 | goto err_set_dma_mask; | ||
106 | |||
107 | err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
108 | if (err) | ||
109 | err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); | ||
110 | if (err) | ||
111 | goto err_set_dma_mask; | ||
112 | |||
113 | mmio_start = pci_resource_start(pdev, 0); | ||
114 | mmio_len = pci_resource_len(pdev, 0); | ||
115 | iobase = ioremap(mmio_start, mmio_len); | ||
116 | if (!iobase) { | ||
117 | err = -ENOMEM; | ||
118 | goto err_ioremap; | ||
119 | } | ||
120 | |||
121 | device = kzalloc(sizeof(*device), GFP_KERNEL); | ||
122 | if (!device) { | ||
123 | err = -ENOMEM; | ||
124 | goto err_kzalloc; | ||
125 | } | ||
126 | device->pdev = pdev; | ||
127 | pci_set_drvdata(pdev, device); | ||
128 | device->iobase = iobase; | ||
129 | |||
130 | pci_set_master(pdev); | ||
131 | |||
132 | switch (readb(iobase + IOAT_VER_OFFSET)) { | ||
133 | case IOAT_VER_1_2: | ||
134 | device->dma = ioat_dma_probe(pdev, iobase); | ||
135 | if (device->dma && ioat_dca_enabled) | ||
136 | device->dca = ioat_dca_init(pdev, iobase); | ||
137 | break; | ||
138 | case IOAT_VER_2_0: | ||
139 | device->dma = ioat_dma_probe(pdev, iobase); | ||
140 | if (device->dma && ioat_dca_enabled) | ||
141 | device->dca = ioat2_dca_init(pdev, iobase); | ||
142 | break; | ||
143 | case IOAT_VER_3_0: | ||
144 | device->dma = ioat_dma_probe(pdev, iobase); | ||
145 | if (device->dma && ioat_dca_enabled) | ||
146 | device->dca = ioat3_dca_init(pdev, iobase); | ||
147 | break; | ||
148 | default: | ||
149 | err = -ENODEV; | ||
150 | break; | ||
151 | } | ||
152 | if (!device->dma) | ||
153 | err = -ENODEV; | ||
154 | |||
155 | if (err) | ||
156 | goto err_version; | ||
157 | |||
158 | return 0; | ||
159 | |||
160 | err_version: | ||
161 | kfree(device); | ||
162 | err_kzalloc: | ||
163 | iounmap(iobase); | ||
164 | err_ioremap: | ||
165 | err_set_dma_mask: | ||
166 | pci_release_regions(pdev); | ||
167 | pci_disable_device(pdev); | ||
168 | err_request_regions: | ||
169 | err_enable_device: | ||
170 | return err; | ||
171 | } | ||
172 | |||
173 | static void __devexit ioat_remove(struct pci_dev *pdev) | ||
174 | { | ||
175 | struct ioat_device *device = pci_get_drvdata(pdev); | ||
176 | |||
177 | dev_err(&pdev->dev, "Removing dma and dca services\n"); | ||
178 | if (device->dca) { | ||
179 | unregister_dca_provider(device->dca); | ||
180 | free_dca_provider(device->dca); | ||
181 | device->dca = NULL; | ||
182 | } | ||
183 | |||
184 | if (device->dma) { | ||
185 | ioat_dma_remove(device->dma); | ||
186 | device->dma = NULL; | ||
187 | } | ||
188 | |||
189 | kfree(device); | ||
190 | } | ||
191 | |||
192 | static int __init ioat_init_module(void) | ||
193 | { | ||
194 | return pci_register_driver(&ioat_pci_driver); | ||
195 | } | ||
196 | module_init(ioat_init_module); | ||
197 | |||
198 | static void __exit ioat_exit_module(void) | ||
199 | { | ||
200 | pci_unregister_driver(&ioat_pci_driver); | ||
201 | } | ||
202 | module_exit(ioat_exit_module); | ||
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile new file mode 100644 index 000000000000..8997d3fb9051 --- /dev/null +++ b/drivers/dma/ioat/Makefile | |||
@@ -0,0 +1,2 @@ | |||
1 | obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o | ||
2 | ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o | ||
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat/dca.c index c012a1e15043..69d02615c4d6 100644 --- a/drivers/dma/ioat_dca.c +++ b/drivers/dma/ioat/dca.c | |||
@@ -33,8 +33,8 @@ | |||
33 | #define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) | 33 | #define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) |
34 | #endif | 34 | #endif |
35 | 35 | ||
36 | #include "ioatdma.h" | 36 | #include "dma.h" |
37 | #include "ioatdma_registers.h" | 37 | #include "registers.h" |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 | 40 | * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 |
@@ -242,7 +242,8 @@ static struct dca_ops ioat_dca_ops = { | |||
242 | }; | 242 | }; |
243 | 243 | ||
244 | 244 | ||
245 | struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) | 245 | struct dca_provider * __devinit |
246 | ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) | ||
246 | { | 247 | { |
247 | struct dca_provider *dca; | 248 | struct dca_provider *dca; |
248 | struct ioat_dca_priv *ioatdca; | 249 | struct ioat_dca_priv *ioatdca; |
@@ -407,7 +408,8 @@ static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) | |||
407 | return slots; | 408 | return slots; |
408 | } | 409 | } |
409 | 410 | ||
410 | struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) | 411 | struct dca_provider * __devinit |
412 | ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) | ||
411 | { | 413 | { |
412 | struct dca_provider *dca; | 414 | struct dca_provider *dca; |
413 | struct ioat_dca_priv *ioatdca; | 415 | struct ioat_dca_priv *ioatdca; |
@@ -602,7 +604,8 @@ static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) | |||
602 | return slots; | 604 | return slots; |
603 | } | 605 | } |
604 | 606 | ||
605 | struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) | 607 | struct dca_provider * __devinit |
608 | ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) | ||
606 | { | 609 | { |
607 | struct dca_provider *dca; | 610 | struct dca_provider *dca; |
608 | struct ioat_dca_priv *ioatdca; | 611 | struct ioat_dca_priv *ioatdca; |
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c new file mode 100644 index 000000000000..c524d36d3c2e --- /dev/null +++ b/drivers/dma/ioat/dma.c | |||
@@ -0,0 +1,1238 @@ | |||
1 | /* | ||
2 | * Intel I/OAT DMA Linux driver | ||
3 | * Copyright(c) 2004 - 2009 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in | ||
19 | * the file called "COPYING". | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * This driver supports an Intel I/OAT DMA engine, which does asynchronous | ||
25 | * copy operations. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/pci.h> | ||
31 | #include <linux/interrupt.h> | ||
32 | #include <linux/dmaengine.h> | ||
33 | #include <linux/delay.h> | ||
34 | #include <linux/dma-mapping.h> | ||
35 | #include <linux/workqueue.h> | ||
36 | #include <linux/i7300_idle.h> | ||
37 | #include "dma.h" | ||
38 | #include "registers.h" | ||
39 | #include "hw.h" | ||
40 | |||
41 | int ioat_pending_level = 4; | ||
42 | module_param(ioat_pending_level, int, 0644); | ||
43 | MODULE_PARM_DESC(ioat_pending_level, | ||
44 | "high-water mark for pushing ioat descriptors (default: 4)"); | ||
45 | |||
46 | /* internal functions */ | ||
47 | static void ioat1_cleanup(struct ioat_dma_chan *ioat); | ||
48 | static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat); | ||
49 | |||
50 | /** | ||
51 | * ioat_dma_do_interrupt - handler used for single vector interrupt mode | ||
52 | * @irq: interrupt id | ||
53 | * @data: interrupt data | ||
54 | */ | ||
55 | static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) | ||
56 | { | ||
57 | struct ioatdma_device *instance = data; | ||
58 | struct ioat_chan_common *chan; | ||
59 | unsigned long attnstatus; | ||
60 | int bit; | ||
61 | u8 intrctrl; | ||
62 | |||
63 | intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); | ||
64 | |||
65 | if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) | ||
66 | return IRQ_NONE; | ||
67 | |||
68 | if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { | ||
69 | writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); | ||
70 | return IRQ_NONE; | ||
71 | } | ||
72 | |||
73 | attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); | ||
74 | for_each_bit(bit, &attnstatus, BITS_PER_LONG) { | ||
75 | chan = ioat_chan_by_index(instance, bit); | ||
76 | tasklet_schedule(&chan->cleanup_task); | ||
77 | } | ||
78 | |||
79 | writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); | ||
80 | return IRQ_HANDLED; | ||
81 | } | ||
82 | |||
83 | /** | ||
84 | * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode | ||
85 | * @irq: interrupt id | ||
86 | * @data: interrupt data | ||
87 | */ | ||
88 | static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) | ||
89 | { | ||
90 | struct ioat_chan_common *chan = data; | ||
91 | |||
92 | tasklet_schedule(&chan->cleanup_task); | ||
93 | |||
94 | return IRQ_HANDLED; | ||
95 | } | ||
96 | |||
97 | static void ioat1_cleanup_tasklet(unsigned long data); | ||
98 | |||
99 | /* common channel initialization */ | ||
100 | void ioat_init_channel(struct ioatdma_device *device, | ||
101 | struct ioat_chan_common *chan, int idx, | ||
102 | void (*timer_fn)(unsigned long), | ||
103 | void (*tasklet)(unsigned long), | ||
104 | unsigned long ioat) | ||
105 | { | ||
106 | struct dma_device *dma = &device->common; | ||
107 | |||
108 | chan->device = device; | ||
109 | chan->reg_base = device->reg_base + (0x80 * (idx + 1)); | ||
110 | spin_lock_init(&chan->cleanup_lock); | ||
111 | chan->common.device = dma; | ||
112 | list_add_tail(&chan->common.device_node, &dma->channels); | ||
113 | device->idx[idx] = chan; | ||
114 | init_timer(&chan->timer); | ||
115 | chan->timer.function = timer_fn; | ||
116 | chan->timer.data = ioat; | ||
117 | tasklet_init(&chan->cleanup_task, tasklet, ioat); | ||
118 | tasklet_disable(&chan->cleanup_task); | ||
119 | } | ||
120 | |||
121 | static void ioat1_timer_event(unsigned long data); | ||
122 | |||
123 | /** | ||
124 | * ioat1_dma_enumerate_channels - find and initialize the device's channels | ||
125 | * @device: the device to be enumerated | ||
126 | */ | ||
127 | static int ioat1_enumerate_channels(struct ioatdma_device *device) | ||
128 | { | ||
129 | u8 xfercap_scale; | ||
130 | u32 xfercap; | ||
131 | int i; | ||
132 | struct ioat_dma_chan *ioat; | ||
133 | struct device *dev = &device->pdev->dev; | ||
134 | struct dma_device *dma = &device->common; | ||
135 | |||
136 | INIT_LIST_HEAD(&dma->channels); | ||
137 | dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); | ||
138 | dma->chancnt &= 0x1f; /* bits [4:0] valid */ | ||
139 | if (dma->chancnt > ARRAY_SIZE(device->idx)) { | ||
140 | dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", | ||
141 | dma->chancnt, ARRAY_SIZE(device->idx)); | ||
142 | dma->chancnt = ARRAY_SIZE(device->idx); | ||
143 | } | ||
144 | xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); | ||
145 | xfercap_scale &= 0x1f; /* bits [4:0] valid */ | ||
146 | xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); | ||
147 | dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap); | ||
148 | |||
149 | #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL | ||
150 | if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) | ||
151 | dma->chancnt--; | ||
152 | #endif | ||
153 | for (i = 0; i < dma->chancnt; i++) { | ||
154 | ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); | ||
155 | if (!ioat) | ||
156 | break; | ||
157 | |||
158 | ioat_init_channel(device, &ioat->base, i, | ||
159 | ioat1_timer_event, | ||
160 | ioat1_cleanup_tasklet, | ||
161 | (unsigned long) ioat); | ||
162 | ioat->xfercap = xfercap; | ||
163 | spin_lock_init(&ioat->desc_lock); | ||
164 | INIT_LIST_HEAD(&ioat->free_desc); | ||
165 | INIT_LIST_HEAD(&ioat->used_desc); | ||
166 | } | ||
167 | dma->chancnt = i; | ||
168 | return i; | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended | ||
173 | * descriptors to hw | ||
174 | * @chan: DMA channel handle | ||
175 | */ | ||
176 | static inline void | ||
177 | __ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat) | ||
178 | { | ||
179 | void __iomem *reg_base = ioat->base.reg_base; | ||
180 | |||
181 | dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n", | ||
182 | __func__, ioat->pending); | ||
183 | ioat->pending = 0; | ||
184 | writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET); | ||
185 | } | ||
186 | |||
187 | static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) | ||
188 | { | ||
189 | struct ioat_dma_chan *ioat = to_ioat_chan(chan); | ||
190 | |||
191 | if (ioat->pending > 0) { | ||
192 | spin_lock_bh(&ioat->desc_lock); | ||
193 | __ioat1_dma_memcpy_issue_pending(ioat); | ||
194 | spin_unlock_bh(&ioat->desc_lock); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | /** | ||
199 | * ioat1_reset_channel - restart a channel | ||
200 | * @ioat: IOAT DMA channel handle | ||
201 | */ | ||
202 | static void ioat1_reset_channel(struct ioat_dma_chan *ioat) | ||
203 | { | ||
204 | struct ioat_chan_common *chan = &ioat->base; | ||
205 | void __iomem *reg_base = chan->reg_base; | ||
206 | u32 chansts, chanerr; | ||
207 | |||
208 | dev_warn(to_dev(chan), "reset\n"); | ||
209 | chanerr = readl(reg_base + IOAT_CHANERR_OFFSET); | ||
210 | chansts = *chan->completion & IOAT_CHANSTS_STATUS; | ||
211 | if (chanerr) { | ||
212 | dev_err(to_dev(chan), | ||
213 | "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", | ||
214 | chan_num(chan), chansts, chanerr); | ||
215 | writel(chanerr, reg_base + IOAT_CHANERR_OFFSET); | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * whack it upside the head with a reset | ||
220 | * and wait for things to settle out. | ||
221 | * force the pending count to a really big negative | ||
222 | * to make sure no one forces an issue_pending | ||
223 | * while we're waiting. | ||
224 | */ | ||
225 | |||
226 | ioat->pending = INT_MIN; | ||
227 | writeb(IOAT_CHANCMD_RESET, | ||
228 | reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); | ||
229 | set_bit(IOAT_RESET_PENDING, &chan->state); | ||
230 | mod_timer(&chan->timer, jiffies + RESET_DELAY); | ||
231 | } | ||
232 | |||
233 | static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) | ||
234 | { | ||
235 | struct dma_chan *c = tx->chan; | ||
236 | struct ioat_dma_chan *ioat = to_ioat_chan(c); | ||
237 | struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); | ||
238 | struct ioat_chan_common *chan = &ioat->base; | ||
239 | struct ioat_desc_sw *first; | ||
240 | struct ioat_desc_sw *chain_tail; | ||
241 | dma_cookie_t cookie; | ||
242 | |||
243 | spin_lock_bh(&ioat->desc_lock); | ||
244 | /* cookie incr and addition to used_list must be atomic */ | ||
245 | cookie = c->cookie; | ||
246 | cookie++; | ||
247 | if (cookie < 0) | ||
248 | cookie = 1; | ||
249 | c->cookie = cookie; | ||
250 | tx->cookie = cookie; | ||
251 | dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); | ||
252 | |||
253 | /* write address into NextDescriptor field of last desc in chain */ | ||
254 | first = to_ioat_desc(desc->tx_list.next); | ||
255 | chain_tail = to_ioat_desc(ioat->used_desc.prev); | ||
256 | /* make descriptor updates globally visible before chaining */ | ||
257 | wmb(); | ||
258 | chain_tail->hw->next = first->txd.phys; | ||
259 | list_splice_tail_init(&desc->tx_list, &ioat->used_desc); | ||
260 | dump_desc_dbg(ioat, chain_tail); | ||
261 | dump_desc_dbg(ioat, first); | ||
262 | |||
263 | if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) | ||
264 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
265 | |||
266 | ioat->active += desc->hw->tx_cnt; | ||
267 | ioat->pending += desc->hw->tx_cnt; | ||
268 | if (ioat->pending >= ioat_pending_level) | ||
269 | __ioat1_dma_memcpy_issue_pending(ioat); | ||
270 | spin_unlock_bh(&ioat->desc_lock); | ||
271 | |||
272 | return cookie; | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair | ||
277 | * @ioat: the channel supplying the memory pool for the descriptors | ||
278 | * @flags: allocation flags | ||
279 | */ | ||
280 | static struct ioat_desc_sw * | ||
281 | ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags) | ||
282 | { | ||
283 | struct ioat_dma_descriptor *desc; | ||
284 | struct ioat_desc_sw *desc_sw; | ||
285 | struct ioatdma_device *ioatdma_device; | ||
286 | dma_addr_t phys; | ||
287 | |||
288 | ioatdma_device = ioat->base.device; | ||
289 | desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); | ||
290 | if (unlikely(!desc)) | ||
291 | return NULL; | ||
292 | |||
293 | desc_sw = kzalloc(sizeof(*desc_sw), flags); | ||
294 | if (unlikely(!desc_sw)) { | ||
295 | pci_pool_free(ioatdma_device->dma_pool, desc, phys); | ||
296 | return NULL; | ||
297 | } | ||
298 | |||
299 | memset(desc, 0, sizeof(*desc)); | ||
300 | |||
301 | INIT_LIST_HEAD(&desc_sw->tx_list); | ||
302 | dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common); | ||
303 | desc_sw->txd.tx_submit = ioat1_tx_submit; | ||
304 | desc_sw->hw = desc; | ||
305 | desc_sw->txd.phys = phys; | ||
306 | set_desc_id(desc_sw, -1); | ||
307 | |||
308 | return desc_sw; | ||
309 | } | ||
310 | |||
311 | static int ioat_initial_desc_count = 256; | ||
312 | module_param(ioat_initial_desc_count, int, 0644); | ||
313 | MODULE_PARM_DESC(ioat_initial_desc_count, | ||
314 | "ioat1: initial descriptors per channel (default: 256)"); | ||
315 | /** | ||
316 | * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors | ||
317 | * @chan: the channel to be filled out | ||
318 | */ | ||
319 | static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) | ||
320 | { | ||
321 | struct ioat_dma_chan *ioat = to_ioat_chan(c); | ||
322 | struct ioat_chan_common *chan = &ioat->base; | ||
323 | struct ioat_desc_sw *desc; | ||
324 | u32 chanerr; | ||
325 | int i; | ||
326 | LIST_HEAD(tmp_list); | ||
327 | |||
328 | /* have we already been set up? */ | ||
329 | if (!list_empty(&ioat->free_desc)) | ||
330 | return ioat->desccount; | ||
331 | |||
332 | /* Setup register to interrupt and write completion status on error */ | ||
333 | writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); | ||
334 | |||
335 | chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); | ||
336 | if (chanerr) { | ||
337 | dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); | ||
338 | writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); | ||
339 | } | ||
340 | |||
341 | /* Allocate descriptors */ | ||
342 | for (i = 0; i < ioat_initial_desc_count; i++) { | ||
343 | desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL); | ||
344 | if (!desc) { | ||
345 | dev_err(to_dev(chan), "Only %d initial descriptors\n", i); | ||
346 | break; | ||
347 | } | ||
348 | set_desc_id(desc, i); | ||
349 | list_add_tail(&desc->node, &tmp_list); | ||
350 | } | ||
351 | spin_lock_bh(&ioat->desc_lock); | ||
352 | ioat->desccount = i; | ||
353 | list_splice(&tmp_list, &ioat->free_desc); | ||
354 | spin_unlock_bh(&ioat->desc_lock); | ||
355 | |||
356 | /* allocate a completion writeback area */ | ||
357 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ | ||
358 | chan->completion = pci_pool_alloc(chan->device->completion_pool, | ||
359 | GFP_KERNEL, &chan->completion_dma); | ||
360 | memset(chan->completion, 0, sizeof(*chan->completion)); | ||
361 | writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, | ||
362 | chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); | ||
363 | writel(((u64) chan->completion_dma) >> 32, | ||
364 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | ||
365 | |||
366 | tasklet_enable(&chan->cleanup_task); | ||
367 | ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ | ||
368 | dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", | ||
369 | __func__, ioat->desccount); | ||
370 | return ioat->desccount; | ||
371 | } | ||
372 | |||
373 | /** | ||
374 | * ioat1_dma_free_chan_resources - release all the descriptors | ||
375 | * @chan: the channel to be cleaned | ||
376 | */ | ||
377 | static void ioat1_dma_free_chan_resources(struct dma_chan *c) | ||
378 | { | ||
379 | struct ioat_dma_chan *ioat = to_ioat_chan(c); | ||
380 | struct ioat_chan_common *chan = &ioat->base; | ||
381 | struct ioatdma_device *ioatdma_device = chan->device; | ||
382 | struct ioat_desc_sw *desc, *_desc; | ||
383 | int in_use_descs = 0; | ||
384 | |||
385 | /* Before freeing channel resources first check | ||
386 | * if they have been previously allocated for this channel. | ||
387 | */ | ||
388 | if (ioat->desccount == 0) | ||
389 | return; | ||
390 | |||
391 | tasklet_disable(&chan->cleanup_task); | ||
392 | del_timer_sync(&chan->timer); | ||
393 | ioat1_cleanup(ioat); | ||
394 | |||
395 | /* Delay 100ms after reset to allow internal DMA logic to quiesce | ||
396 | * before removing DMA descriptor resources. | ||
397 | */ | ||
398 | writeb(IOAT_CHANCMD_RESET, | ||
399 | chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); | ||
400 | mdelay(100); | ||
401 | |||
402 | spin_lock_bh(&ioat->desc_lock); | ||
403 | list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) { | ||
404 | dev_dbg(to_dev(chan), "%s: freeing %d from used list\n", | ||
405 | __func__, desc_id(desc)); | ||
406 | dump_desc_dbg(ioat, desc); | ||
407 | in_use_descs++; | ||
408 | list_del(&desc->node); | ||
409 | pci_pool_free(ioatdma_device->dma_pool, desc->hw, | ||
410 | desc->txd.phys); | ||
411 | kfree(desc); | ||
412 | } | ||
413 | list_for_each_entry_safe(desc, _desc, | ||
414 | &ioat->free_desc, node) { | ||
415 | list_del(&desc->node); | ||
416 | pci_pool_free(ioatdma_device->dma_pool, desc->hw, | ||
417 | desc->txd.phys); | ||
418 | kfree(desc); | ||
419 | } | ||
420 | spin_unlock_bh(&ioat->desc_lock); | ||
421 | |||
422 | pci_pool_free(ioatdma_device->completion_pool, | ||
423 | chan->completion, | ||
424 | chan->completion_dma); | ||
425 | |||
426 | /* one is ok since we left it on there on purpose */ | ||
427 | if (in_use_descs > 1) | ||
428 | dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", | ||
429 | in_use_descs - 1); | ||
430 | |||
431 | chan->last_completion = 0; | ||
432 | chan->completion_dma = 0; | ||
433 | ioat->pending = 0; | ||
434 | ioat->desccount = 0; | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * ioat1_dma_get_next_descriptor - return the next available descriptor | ||
439 | * @ioat: IOAT DMA channel handle | ||
440 | * | ||
441 | * Gets the next descriptor from the chain, and must be called with the | ||
442 | * channel's desc_lock held. Allocates more descriptors if the channel | ||
443 | * has run out. | ||
444 | */ | ||
445 | static struct ioat_desc_sw * | ||
446 | ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat) | ||
447 | { | ||
448 | struct ioat_desc_sw *new; | ||
449 | |||
450 | if (!list_empty(&ioat->free_desc)) { | ||
451 | new = to_ioat_desc(ioat->free_desc.next); | ||
452 | list_del(&new->node); | ||
453 | } else { | ||
454 | /* try to get another desc */ | ||
455 | new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC); | ||
456 | if (!new) { | ||
457 | dev_err(to_dev(&ioat->base), "alloc failed\n"); | ||
458 | return NULL; | ||
459 | } | ||
460 | } | ||
461 | dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n", | ||
462 | __func__, desc_id(new)); | ||
463 | prefetch(new->hw); | ||
464 | return new; | ||
465 | } | ||
466 | |||
467 | static struct dma_async_tx_descriptor * | ||
468 | ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, | ||
469 | dma_addr_t dma_src, size_t len, unsigned long flags) | ||
470 | { | ||
471 | struct ioat_dma_chan *ioat = to_ioat_chan(c); | ||
472 | struct ioat_desc_sw *desc; | ||
473 | size_t copy; | ||
474 | LIST_HEAD(chain); | ||
475 | dma_addr_t src = dma_src; | ||
476 | dma_addr_t dest = dma_dest; | ||
477 | size_t total_len = len; | ||
478 | struct ioat_dma_descriptor *hw = NULL; | ||
479 | int tx_cnt = 0; | ||
480 | |||
481 | spin_lock_bh(&ioat->desc_lock); | ||
482 | desc = ioat1_dma_get_next_descriptor(ioat); | ||
483 | do { | ||
484 | if (!desc) | ||
485 | break; | ||
486 | |||
487 | tx_cnt++; | ||
488 | copy = min_t(size_t, len, ioat->xfercap); | ||
489 | |||
490 | hw = desc->hw; | ||
491 | hw->size = copy; | ||
492 | hw->ctl = 0; | ||
493 | hw->src_addr = src; | ||
494 | hw->dst_addr = dest; | ||
495 | |||
496 | list_add_tail(&desc->node, &chain); | ||
497 | |||
498 | len -= copy; | ||
499 | dest += copy; | ||
500 | src += copy; | ||
501 | if (len) { | ||
502 | struct ioat_desc_sw *next; | ||
503 | |||
504 | async_tx_ack(&desc->txd); | ||
505 | next = ioat1_dma_get_next_descriptor(ioat); | ||
506 | hw->next = next ? next->txd.phys : 0; | ||
507 | dump_desc_dbg(ioat, desc); | ||
508 | desc = next; | ||
509 | } else | ||
510 | hw->next = 0; | ||
511 | } while (len); | ||
512 | |||
513 | if (!desc) { | ||
514 | struct ioat_chan_common *chan = &ioat->base; | ||
515 | |||
516 | dev_err(to_dev(chan), | ||
517 | "chan%d - get_next_desc failed\n", chan_num(chan)); | ||
518 | list_splice(&chain, &ioat->free_desc); | ||
519 | spin_unlock_bh(&ioat->desc_lock); | ||
520 | return NULL; | ||
521 | } | ||
522 | spin_unlock_bh(&ioat->desc_lock); | ||
523 | |||
524 | desc->txd.flags = flags; | ||
525 | desc->len = total_len; | ||
526 | list_splice(&chain, &desc->tx_list); | ||
527 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
528 | hw->ctl_f.compl_write = 1; | ||
529 | hw->tx_cnt = tx_cnt; | ||
530 | dump_desc_dbg(ioat, desc); | ||
531 | |||
532 | return &desc->txd; | ||
533 | } | ||
534 | |||
535 | static void ioat1_cleanup_tasklet(unsigned long data) | ||
536 | { | ||
537 | struct ioat_dma_chan *chan = (void *)data; | ||
538 | |||
539 | ioat1_cleanup(chan); | ||
540 | writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET); | ||
541 | } | ||
542 | |||
543 | void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, | ||
544 | size_t len, struct ioat_dma_descriptor *hw) | ||
545 | { | ||
546 | struct pci_dev *pdev = chan->device->pdev; | ||
547 | size_t offset = len - hw->size; | ||
548 | |||
549 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
550 | ioat_unmap(pdev, hw->dst_addr - offset, len, | ||
551 | PCI_DMA_FROMDEVICE, flags, 1); | ||
552 | |||
553 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) | ||
554 | ioat_unmap(pdev, hw->src_addr - offset, len, | ||
555 | PCI_DMA_TODEVICE, flags, 0); | ||
556 | } | ||
557 | |||
558 | unsigned long ioat_get_current_completion(struct ioat_chan_common *chan) | ||
559 | { | ||
560 | unsigned long phys_complete; | ||
561 | u64 completion; | ||
562 | |||
563 | completion = *chan->completion; | ||
564 | phys_complete = ioat_chansts_to_addr(completion); | ||
565 | |||
566 | dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__, | ||
567 | (unsigned long long) phys_complete); | ||
568 | |||
569 | if (is_ioat_halted(completion)) { | ||
570 | u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); | ||
571 | dev_err(to_dev(chan), "Channel halted, chanerr = %x\n", | ||
572 | chanerr); | ||
573 | |||
574 | /* TODO do something to salvage the situation */ | ||
575 | } | ||
576 | |||
577 | return phys_complete; | ||
578 | } | ||
579 | |||
580 | bool ioat_cleanup_preamble(struct ioat_chan_common *chan, | ||
581 | unsigned long *phys_complete) | ||
582 | { | ||
583 | *phys_complete = ioat_get_current_completion(chan); | ||
584 | if (*phys_complete == chan->last_completion) | ||
585 | return false; | ||
586 | clear_bit(IOAT_COMPLETION_ACK, &chan->state); | ||
587 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
588 | |||
589 | return true; | ||
590 | } | ||
591 | |||
592 | static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete) | ||
593 | { | ||
594 | struct ioat_chan_common *chan = &ioat->base; | ||
595 | struct list_head *_desc, *n; | ||
596 | struct dma_async_tx_descriptor *tx; | ||
597 | |||
598 | dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n", | ||
599 | __func__, phys_complete); | ||
600 | list_for_each_safe(_desc, n, &ioat->used_desc) { | ||
601 | struct ioat_desc_sw *desc; | ||
602 | |||
603 | prefetch(n); | ||
604 | desc = list_entry(_desc, typeof(*desc), node); | ||
605 | tx = &desc->txd; | ||
606 | /* | ||
607 | * Incoming DMA requests may use multiple descriptors, | ||
608 | * due to exceeding xfercap, perhaps. If so, only the | ||
609 | * last one will have a cookie, and require unmapping. | ||
610 | */ | ||
611 | dump_desc_dbg(ioat, desc); | ||
612 | if (tx->cookie) { | ||
613 | chan->completed_cookie = tx->cookie; | ||
614 | tx->cookie = 0; | ||
615 | ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); | ||
616 | ioat->active -= desc->hw->tx_cnt; | ||
617 | if (tx->callback) { | ||
618 | tx->callback(tx->callback_param); | ||
619 | tx->callback = NULL; | ||
620 | } | ||
621 | } | ||
622 | |||
623 | if (tx->phys != phys_complete) { | ||
624 | /* | ||
625 | * a completed entry, but not the last, so clean | ||
626 | * up if the client is done with the descriptor | ||
627 | */ | ||
628 | if (async_tx_test_ack(tx)) | ||
629 | list_move_tail(&desc->node, &ioat->free_desc); | ||
630 | } else { | ||
631 | /* | ||
632 | * last used desc. Do not remove, so we can | ||
633 | * append from it. | ||
634 | */ | ||
635 | |||
636 | /* if nothing else is pending, cancel the | ||
637 | * completion timeout | ||
638 | */ | ||
639 | if (n == &ioat->used_desc) { | ||
640 | dev_dbg(to_dev(chan), | ||
641 | "%s cancel completion timeout\n", | ||
642 | __func__); | ||
643 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); | ||
644 | } | ||
645 | |||
646 | /* TODO check status bits? */ | ||
647 | break; | ||
648 | } | ||
649 | } | ||
650 | |||
651 | chan->last_completion = phys_complete; | ||
652 | } | ||
653 | |||
654 | /** | ||
655 | * ioat1_cleanup - cleanup up finished descriptors | ||
656 | * @chan: ioat channel to be cleaned up | ||
657 | * | ||
658 | * To prevent lock contention we defer cleanup when the locks are | ||
659 | * contended with a terminal timeout that forces cleanup and catches | ||
660 | * completion notification errors. | ||
661 | */ | ||
662 | static void ioat1_cleanup(struct ioat_dma_chan *ioat) | ||
663 | { | ||
664 | struct ioat_chan_common *chan = &ioat->base; | ||
665 | unsigned long phys_complete; | ||
666 | |||
667 | prefetch(chan->completion); | ||
668 | |||
669 | if (!spin_trylock_bh(&chan->cleanup_lock)) | ||
670 | return; | ||
671 | |||
672 | if (!ioat_cleanup_preamble(chan, &phys_complete)) { | ||
673 | spin_unlock_bh(&chan->cleanup_lock); | ||
674 | return; | ||
675 | } | ||
676 | |||
677 | if (!spin_trylock_bh(&ioat->desc_lock)) { | ||
678 | spin_unlock_bh(&chan->cleanup_lock); | ||
679 | return; | ||
680 | } | ||
681 | |||
682 | __cleanup(ioat, phys_complete); | ||
683 | |||
684 | spin_unlock_bh(&ioat->desc_lock); | ||
685 | spin_unlock_bh(&chan->cleanup_lock); | ||
686 | } | ||
687 | |||
688 | static void ioat1_timer_event(unsigned long data) | ||
689 | { | ||
690 | struct ioat_dma_chan *ioat = (void *) data; | ||
691 | struct ioat_chan_common *chan = &ioat->base; | ||
692 | |||
693 | dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state); | ||
694 | |||
695 | spin_lock_bh(&chan->cleanup_lock); | ||
696 | if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) { | ||
697 | struct ioat_desc_sw *desc; | ||
698 | |||
699 | spin_lock_bh(&ioat->desc_lock); | ||
700 | |||
701 | /* restart active descriptors */ | ||
702 | desc = to_ioat_desc(ioat->used_desc.prev); | ||
703 | ioat_set_chainaddr(ioat, desc->txd.phys); | ||
704 | ioat_start(chan); | ||
705 | |||
706 | ioat->pending = 0; | ||
707 | set_bit(IOAT_COMPLETION_PENDING, &chan->state); | ||
708 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
709 | spin_unlock_bh(&ioat->desc_lock); | ||
710 | } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { | ||
711 | unsigned long phys_complete; | ||
712 | |||
713 | spin_lock_bh(&ioat->desc_lock); | ||
714 | /* if we haven't made progress and we have already | ||
715 | * acknowledged a pending completion once, then be more | ||
716 | * forceful with a restart | ||
717 | */ | ||
718 | if (ioat_cleanup_preamble(chan, &phys_complete)) | ||
719 | __cleanup(ioat, phys_complete); | ||
720 | else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) | ||
721 | ioat1_reset_channel(ioat); | ||
722 | else { | ||
723 | u64 status = ioat_chansts(chan); | ||
724 | |||
725 | /* manually update the last completion address */ | ||
726 | if (ioat_chansts_to_addr(status) != 0) | ||
727 | *chan->completion = status; | ||
728 | |||
729 | set_bit(IOAT_COMPLETION_ACK, &chan->state); | ||
730 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
731 | } | ||
732 | spin_unlock_bh(&ioat->desc_lock); | ||
733 | } | ||
734 | spin_unlock_bh(&chan->cleanup_lock); | ||
735 | } | ||
736 | |||
737 | static enum dma_status | ||
738 | ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie, | ||
739 | dma_cookie_t *done, dma_cookie_t *used) | ||
740 | { | ||
741 | struct ioat_dma_chan *ioat = to_ioat_chan(c); | ||
742 | |||
743 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) | ||
744 | return DMA_SUCCESS; | ||
745 | |||
746 | ioat1_cleanup(ioat); | ||
747 | |||
748 | return ioat_is_complete(c, cookie, done, used); | ||
749 | } | ||
750 | |||
751 | static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat) | ||
752 | { | ||
753 | struct ioat_chan_common *chan = &ioat->base; | ||
754 | struct ioat_desc_sw *desc; | ||
755 | struct ioat_dma_descriptor *hw; | ||
756 | |||
757 | spin_lock_bh(&ioat->desc_lock); | ||
758 | |||
759 | desc = ioat1_dma_get_next_descriptor(ioat); | ||
760 | |||
761 | if (!desc) { | ||
762 | dev_err(to_dev(chan), | ||
763 | "Unable to start null desc - get next desc failed\n"); | ||
764 | spin_unlock_bh(&ioat->desc_lock); | ||
765 | return; | ||
766 | } | ||
767 | |||
768 | hw = desc->hw; | ||
769 | hw->ctl = 0; | ||
770 | hw->ctl_f.null = 1; | ||
771 | hw->ctl_f.int_en = 1; | ||
772 | hw->ctl_f.compl_write = 1; | ||
773 | /* set size to non-zero value (channel returns error when size is 0) */ | ||
774 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
775 | hw->src_addr = 0; | ||
776 | hw->dst_addr = 0; | ||
777 | async_tx_ack(&desc->txd); | ||
778 | hw->next = 0; | ||
779 | list_add_tail(&desc->node, &ioat->used_desc); | ||
780 | dump_desc_dbg(ioat, desc); | ||
781 | |||
782 | ioat_set_chainaddr(ioat, desc->txd.phys); | ||
783 | ioat_start(chan); | ||
784 | spin_unlock_bh(&ioat->desc_lock); | ||
785 | } | ||
786 | |||
787 | /* | ||
788 | * Perform a IOAT transaction to verify the HW works. | ||
789 | */ | ||
790 | #define IOAT_TEST_SIZE 2000 | ||
791 | |||
792 | static void __devinit ioat_dma_test_callback(void *dma_async_param) | ||
793 | { | ||
794 | struct completion *cmp = dma_async_param; | ||
795 | |||
796 | complete(cmp); | ||
797 | } | ||
798 | |||
799 | /** | ||
800 | * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. | ||
801 | * @device: device to be tested | ||
802 | */ | ||
803 | int __devinit ioat_dma_self_test(struct ioatdma_device *device) | ||
804 | { | ||
805 | int i; | ||
806 | u8 *src; | ||
807 | u8 *dest; | ||
808 | struct dma_device *dma = &device->common; | ||
809 | struct device *dev = &device->pdev->dev; | ||
810 | struct dma_chan *dma_chan; | ||
811 | struct dma_async_tx_descriptor *tx; | ||
812 | dma_addr_t dma_dest, dma_src; | ||
813 | dma_cookie_t cookie; | ||
814 | int err = 0; | ||
815 | struct completion cmp; | ||
816 | unsigned long tmo; | ||
817 | unsigned long flags; | ||
818 | |||
819 | src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); | ||
820 | if (!src) | ||
821 | return -ENOMEM; | ||
822 | dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); | ||
823 | if (!dest) { | ||
824 | kfree(src); | ||
825 | return -ENOMEM; | ||
826 | } | ||
827 | |||
828 | /* Fill in src buffer */ | ||
829 | for (i = 0; i < IOAT_TEST_SIZE; i++) | ||
830 | src[i] = (u8)i; | ||
831 | |||
832 | /* Start copy, using first DMA channel */ | ||
833 | dma_chan = container_of(dma->channels.next, struct dma_chan, | ||
834 | device_node); | ||
835 | if (dma->device_alloc_chan_resources(dma_chan) < 1) { | ||
836 | dev_err(dev, "selftest cannot allocate chan resource\n"); | ||
837 | err = -ENODEV; | ||
838 | goto out; | ||
839 | } | ||
840 | |||
841 | dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); | ||
842 | dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); | ||
843 | flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE | | ||
844 | DMA_PREP_INTERRUPT; | ||
845 | tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, | ||
846 | IOAT_TEST_SIZE, flags); | ||
847 | if (!tx) { | ||
848 | dev_err(dev, "Self-test prep failed, disabling\n"); | ||
849 | err = -ENODEV; | ||
850 | goto free_resources; | ||
851 | } | ||
852 | |||
853 | async_tx_ack(tx); | ||
854 | init_completion(&cmp); | ||
855 | tx->callback = ioat_dma_test_callback; | ||
856 | tx->callback_param = &cmp; | ||
857 | cookie = tx->tx_submit(tx); | ||
858 | if (cookie < 0) { | ||
859 | dev_err(dev, "Self-test setup failed, disabling\n"); | ||
860 | err = -ENODEV; | ||
861 | goto free_resources; | ||
862 | } | ||
863 | dma->device_issue_pending(dma_chan); | ||
864 | |||
865 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
866 | |||
867 | if (tmo == 0 || | ||
868 | dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) | ||
869 | != DMA_SUCCESS) { | ||
870 | dev_err(dev, "Self-test copy timed out, disabling\n"); | ||
871 | err = -ENODEV; | ||
872 | goto free_resources; | ||
873 | } | ||
874 | if (memcmp(src, dest, IOAT_TEST_SIZE)) { | ||
875 | dev_err(dev, "Self-test copy failed compare, disabling\n"); | ||
876 | err = -ENODEV; | ||
877 | goto free_resources; | ||
878 | } | ||
879 | |||
880 | free_resources: | ||
881 | dma->device_free_chan_resources(dma_chan); | ||
882 | out: | ||
883 | kfree(src); | ||
884 | kfree(dest); | ||
885 | return err; | ||
886 | } | ||
887 | |||
888 | static char ioat_interrupt_style[32] = "msix"; | ||
889 | module_param_string(ioat_interrupt_style, ioat_interrupt_style, | ||
890 | sizeof(ioat_interrupt_style), 0644); | ||
891 | MODULE_PARM_DESC(ioat_interrupt_style, | ||
892 | "set ioat interrupt style: msix (default), " | ||
893 | "msix-single-vector, msi, intx)"); | ||
894 | |||
895 | /** | ||
896 | * ioat_dma_setup_interrupts - setup interrupt handler | ||
897 | * @device: ioat device | ||
898 | */ | ||
899 | static int ioat_dma_setup_interrupts(struct ioatdma_device *device) | ||
900 | { | ||
901 | struct ioat_chan_common *chan; | ||
902 | struct pci_dev *pdev = device->pdev; | ||
903 | struct device *dev = &pdev->dev; | ||
904 | struct msix_entry *msix; | ||
905 | int i, j, msixcnt; | ||
906 | int err = -EINVAL; | ||
907 | u8 intrctrl = 0; | ||
908 | |||
909 | if (!strcmp(ioat_interrupt_style, "msix")) | ||
910 | goto msix; | ||
911 | if (!strcmp(ioat_interrupt_style, "msix-single-vector")) | ||
912 | goto msix_single_vector; | ||
913 | if (!strcmp(ioat_interrupt_style, "msi")) | ||
914 | goto msi; | ||
915 | if (!strcmp(ioat_interrupt_style, "intx")) | ||
916 | goto intx; | ||
917 | dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style); | ||
918 | goto err_no_irq; | ||
919 | |||
920 | msix: | ||
921 | /* The number of MSI-X vectors should equal the number of channels */ | ||
922 | msixcnt = device->common.chancnt; | ||
923 | for (i = 0; i < msixcnt; i++) | ||
924 | device->msix_entries[i].entry = i; | ||
925 | |||
926 | err = pci_enable_msix(pdev, device->msix_entries, msixcnt); | ||
927 | if (err < 0) | ||
928 | goto msi; | ||
929 | if (err > 0) | ||
930 | goto msix_single_vector; | ||
931 | |||
932 | for (i = 0; i < msixcnt; i++) { | ||
933 | msix = &device->msix_entries[i]; | ||
934 | chan = ioat_chan_by_index(device, i); | ||
935 | err = devm_request_irq(dev, msix->vector, | ||
936 | ioat_dma_do_interrupt_msix, 0, | ||
937 | "ioat-msix", chan); | ||
938 | if (err) { | ||
939 | for (j = 0; j < i; j++) { | ||
940 | msix = &device->msix_entries[j]; | ||
941 | chan = ioat_chan_by_index(device, j); | ||
942 | devm_free_irq(dev, msix->vector, chan); | ||
943 | } | ||
944 | goto msix_single_vector; | ||
945 | } | ||
946 | } | ||
947 | intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; | ||
948 | goto done; | ||
949 | |||
950 | msix_single_vector: | ||
951 | msix = &device->msix_entries[0]; | ||
952 | msix->entry = 0; | ||
953 | err = pci_enable_msix(pdev, device->msix_entries, 1); | ||
954 | if (err) | ||
955 | goto msi; | ||
956 | |||
957 | err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, | ||
958 | "ioat-msix", device); | ||
959 | if (err) { | ||
960 | pci_disable_msix(pdev); | ||
961 | goto msi; | ||
962 | } | ||
963 | goto done; | ||
964 | |||
965 | msi: | ||
966 | err = pci_enable_msi(pdev); | ||
967 | if (err) | ||
968 | goto intx; | ||
969 | |||
970 | err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0, | ||
971 | "ioat-msi", device); | ||
972 | if (err) { | ||
973 | pci_disable_msi(pdev); | ||
974 | goto intx; | ||
975 | } | ||
976 | goto done; | ||
977 | |||
978 | intx: | ||
979 | err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, | ||
980 | IRQF_SHARED, "ioat-intx", device); | ||
981 | if (err) | ||
982 | goto err_no_irq; | ||
983 | |||
984 | done: | ||
985 | if (device->intr_quirk) | ||
986 | device->intr_quirk(device); | ||
987 | intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; | ||
988 | writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); | ||
989 | return 0; | ||
990 | |||
991 | err_no_irq: | ||
992 | /* Disable all interrupt generation */ | ||
993 | writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); | ||
994 | dev_err(dev, "no usable interrupts\n"); | ||
995 | return err; | ||
996 | } | ||
997 | |||
998 | static void ioat_disable_interrupts(struct ioatdma_device *device) | ||
999 | { | ||
1000 | /* Disable all interrupt generation */ | ||
1001 | writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); | ||
1002 | } | ||
1003 | |||
1004 | int __devinit ioat_probe(struct ioatdma_device *device) | ||
1005 | { | ||
1006 | int err = -ENODEV; | ||
1007 | struct dma_device *dma = &device->common; | ||
1008 | struct pci_dev *pdev = device->pdev; | ||
1009 | struct device *dev = &pdev->dev; | ||
1010 | |||
1011 | /* DMA coherent memory pool for DMA descriptor allocations */ | ||
1012 | device->dma_pool = pci_pool_create("dma_desc_pool", pdev, | ||
1013 | sizeof(struct ioat_dma_descriptor), | ||
1014 | 64, 0); | ||
1015 | if (!device->dma_pool) { | ||
1016 | err = -ENOMEM; | ||
1017 | goto err_dma_pool; | ||
1018 | } | ||
1019 | |||
1020 | device->completion_pool = pci_pool_create("completion_pool", pdev, | ||
1021 | sizeof(u64), SMP_CACHE_BYTES, | ||
1022 | SMP_CACHE_BYTES); | ||
1023 | |||
1024 | if (!device->completion_pool) { | ||
1025 | err = -ENOMEM; | ||
1026 | goto err_completion_pool; | ||
1027 | } | ||
1028 | |||
1029 | device->enumerate_channels(device); | ||
1030 | |||
1031 | dma_cap_set(DMA_MEMCPY, dma->cap_mask); | ||
1032 | dma->dev = &pdev->dev; | ||
1033 | |||
1034 | if (!dma->chancnt) { | ||
1035 | dev_err(dev, "zero channels detected\n"); | ||
1036 | goto err_setup_interrupts; | ||
1037 | } | ||
1038 | |||
1039 | err = ioat_dma_setup_interrupts(device); | ||
1040 | if (err) | ||
1041 | goto err_setup_interrupts; | ||
1042 | |||
1043 | err = device->self_test(device); | ||
1044 | if (err) | ||
1045 | goto err_self_test; | ||
1046 | |||
1047 | return 0; | ||
1048 | |||
1049 | err_self_test: | ||
1050 | ioat_disable_interrupts(device); | ||
1051 | err_setup_interrupts: | ||
1052 | pci_pool_destroy(device->completion_pool); | ||
1053 | err_completion_pool: | ||
1054 | pci_pool_destroy(device->dma_pool); | ||
1055 | err_dma_pool: | ||
1056 | return err; | ||
1057 | } | ||
1058 | |||
1059 | int __devinit ioat_register(struct ioatdma_device *device) | ||
1060 | { | ||
1061 | int err = dma_async_device_register(&device->common); | ||
1062 | |||
1063 | if (err) { | ||
1064 | ioat_disable_interrupts(device); | ||
1065 | pci_pool_destroy(device->completion_pool); | ||
1066 | pci_pool_destroy(device->dma_pool); | ||
1067 | } | ||
1068 | |||
1069 | return err; | ||
1070 | } | ||
1071 | |||
1072 | /* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */ | ||
1073 | static void ioat1_intr_quirk(struct ioatdma_device *device) | ||
1074 | { | ||
1075 | struct pci_dev *pdev = device->pdev; | ||
1076 | u32 dmactrl; | ||
1077 | |||
1078 | pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl); | ||
1079 | if (pdev->msi_enabled) | ||
1080 | dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; | ||
1081 | else | ||
1082 | dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN; | ||
1083 | pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl); | ||
1084 | } | ||
1085 | |||
1086 | static ssize_t ring_size_show(struct dma_chan *c, char *page) | ||
1087 | { | ||
1088 | struct ioat_dma_chan *ioat = to_ioat_chan(c); | ||
1089 | |||
1090 | return sprintf(page, "%d\n", ioat->desccount); | ||
1091 | } | ||
1092 | static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); | ||
1093 | |||
1094 | static ssize_t ring_active_show(struct dma_chan *c, char *page) | ||
1095 | { | ||
1096 | struct ioat_dma_chan *ioat = to_ioat_chan(c); | ||
1097 | |||
1098 | return sprintf(page, "%d\n", ioat->active); | ||
1099 | } | ||
1100 | static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); | ||
1101 | |||
1102 | static ssize_t cap_show(struct dma_chan *c, char *page) | ||
1103 | { | ||
1104 | struct dma_device *dma = c->device; | ||
1105 | |||
1106 | return sprintf(page, "copy%s%s%s%s%s%s\n", | ||
1107 | dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "", | ||
1108 | dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "", | ||
1109 | dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "", | ||
1110 | dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "", | ||
1111 | dma_has_cap(DMA_MEMSET, dma->cap_mask) ? " fill" : "", | ||
1112 | dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : ""); | ||
1113 | |||
1114 | } | ||
1115 | struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap); | ||
1116 | |||
1117 | static ssize_t version_show(struct dma_chan *c, char *page) | ||
1118 | { | ||
1119 | struct dma_device *dma = c->device; | ||
1120 | struct ioatdma_device *device = to_ioatdma_device(dma); | ||
1121 | |||
1122 | return sprintf(page, "%d.%d\n", | ||
1123 | device->version >> 4, device->version & 0xf); | ||
1124 | } | ||
1125 | struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version); | ||
1126 | |||
1127 | static struct attribute *ioat1_attrs[] = { | ||
1128 | &ring_size_attr.attr, | ||
1129 | &ring_active_attr.attr, | ||
1130 | &ioat_cap_attr.attr, | ||
1131 | &ioat_version_attr.attr, | ||
1132 | NULL, | ||
1133 | }; | ||
1134 | |||
1135 | static ssize_t | ||
1136 | ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) | ||
1137 | { | ||
1138 | struct ioat_sysfs_entry *entry; | ||
1139 | struct ioat_chan_common *chan; | ||
1140 | |||
1141 | entry = container_of(attr, struct ioat_sysfs_entry, attr); | ||
1142 | chan = container_of(kobj, struct ioat_chan_common, kobj); | ||
1143 | |||
1144 | if (!entry->show) | ||
1145 | return -EIO; | ||
1146 | return entry->show(&chan->common, page); | ||
1147 | } | ||
1148 | |||
1149 | struct sysfs_ops ioat_sysfs_ops = { | ||
1150 | .show = ioat_attr_show, | ||
1151 | }; | ||
1152 | |||
1153 | static struct kobj_type ioat1_ktype = { | ||
1154 | .sysfs_ops = &ioat_sysfs_ops, | ||
1155 | .default_attrs = ioat1_attrs, | ||
1156 | }; | ||
1157 | |||
1158 | void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type) | ||
1159 | { | ||
1160 | struct dma_device *dma = &device->common; | ||
1161 | struct dma_chan *c; | ||
1162 | |||
1163 | list_for_each_entry(c, &dma->channels, device_node) { | ||
1164 | struct ioat_chan_common *chan = to_chan_common(c); | ||
1165 | struct kobject *parent = &c->dev->device.kobj; | ||
1166 | int err; | ||
1167 | |||
1168 | err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata"); | ||
1169 | if (err) { | ||
1170 | dev_warn(to_dev(chan), | ||
1171 | "sysfs init error (%d), continuing...\n", err); | ||
1172 | kobject_put(&chan->kobj); | ||
1173 | set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state); | ||
1174 | } | ||
1175 | } | ||
1176 | } | ||
1177 | |||
1178 | void ioat_kobject_del(struct ioatdma_device *device) | ||
1179 | { | ||
1180 | struct dma_device *dma = &device->common; | ||
1181 | struct dma_chan *c; | ||
1182 | |||
1183 | list_for_each_entry(c, &dma->channels, device_node) { | ||
1184 | struct ioat_chan_common *chan = to_chan_common(c); | ||
1185 | |||
1186 | if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) { | ||
1187 | kobject_del(&chan->kobj); | ||
1188 | kobject_put(&chan->kobj); | ||
1189 | } | ||
1190 | } | ||
1191 | } | ||
1192 | |||
1193 | int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca) | ||
1194 | { | ||
1195 | struct pci_dev *pdev = device->pdev; | ||
1196 | struct dma_device *dma; | ||
1197 | int err; | ||
1198 | |||
1199 | device->intr_quirk = ioat1_intr_quirk; | ||
1200 | device->enumerate_channels = ioat1_enumerate_channels; | ||
1201 | device->self_test = ioat_dma_self_test; | ||
1202 | dma = &device->common; | ||
1203 | dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy; | ||
1204 | dma->device_issue_pending = ioat1_dma_memcpy_issue_pending; | ||
1205 | dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources; | ||
1206 | dma->device_free_chan_resources = ioat1_dma_free_chan_resources; | ||
1207 | dma->device_is_tx_complete = ioat1_dma_is_complete; | ||
1208 | |||
1209 | err = ioat_probe(device); | ||
1210 | if (err) | ||
1211 | return err; | ||
1212 | ioat_set_tcp_copy_break(4096); | ||
1213 | err = ioat_register(device); | ||
1214 | if (err) | ||
1215 | return err; | ||
1216 | ioat_kobject_add(device, &ioat1_ktype); | ||
1217 | |||
1218 | if (dca) | ||
1219 | device->dca = ioat_dca_init(pdev, device->reg_base); | ||
1220 | |||
1221 | return err; | ||
1222 | } | ||
1223 | |||
1224 | void __devexit ioat_dma_remove(struct ioatdma_device *device) | ||
1225 | { | ||
1226 | struct dma_device *dma = &device->common; | ||
1227 | |||
1228 | ioat_disable_interrupts(device); | ||
1229 | |||
1230 | ioat_kobject_del(device); | ||
1231 | |||
1232 | dma_async_device_unregister(dma); | ||
1233 | |||
1234 | pci_pool_destroy(device->dma_pool); | ||
1235 | pci_pool_destroy(device->completion_pool); | ||
1236 | |||
1237 | INIT_LIST_HEAD(&dma->channels); | ||
1238 | } | ||
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h new file mode 100644 index 000000000000..c14fdfeb7f33 --- /dev/null +++ b/drivers/dma/ioat/dma.h | |||
@@ -0,0 +1,337 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the Free | ||
6 | * Software Foundation; either version 2 of the License, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called COPYING. | ||
20 | */ | ||
21 | #ifndef IOATDMA_H | ||
22 | #define IOATDMA_H | ||
23 | |||
24 | #include <linux/dmaengine.h> | ||
25 | #include "hw.h" | ||
26 | #include "registers.h" | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/dmapool.h> | ||
29 | #include <linux/cache.h> | ||
30 | #include <linux/pci_ids.h> | ||
31 | #include <net/tcp.h> | ||
32 | |||
33 | #define IOAT_DMA_VERSION "4.00" | ||
34 | |||
35 | #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 | ||
36 | #define IOAT_DMA_DCA_ANY_CPU ~0 | ||
37 | |||
38 | #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) | ||
39 | #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) | ||
40 | #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd) | ||
41 | #define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev) | ||
42 | |||
43 | #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) | ||
44 | |||
45 | /* | ||
46 | * workaround for IOAT ver.3.0 null descriptor issue | ||
47 | * (channel returns error when size is 0) | ||
48 | */ | ||
49 | #define NULL_DESC_BUFFER_SIZE 1 | ||
50 | |||
51 | /** | ||
52 | * struct ioatdma_device - internal representation of a IOAT device | ||
53 | * @pdev: PCI-Express device | ||
54 | * @reg_base: MMIO register space base address | ||
55 | * @dma_pool: for allocating DMA descriptors | ||
56 | * @common: embedded struct dma_device | ||
57 | * @version: version of ioatdma device | ||
58 | * @msix_entries: irq handlers | ||
59 | * @idx: per channel data | ||
60 | * @dca: direct cache access context | ||
61 | * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) | ||
62 | * @enumerate_channels: hw version specific channel enumeration | ||
63 | * @cleanup_tasklet: select between the v2 and v3 cleanup routines | ||
64 | * @timer_fn: select between the v2 and v3 timer watchdog routines | ||
65 | * @self_test: hardware version specific self test for each supported op type | ||
66 | * | ||
67 | * Note: the v3 cleanup routine supports raid operations | ||
68 | */ | ||
69 | struct ioatdma_device { | ||
70 | struct pci_dev *pdev; | ||
71 | void __iomem *reg_base; | ||
72 | struct pci_pool *dma_pool; | ||
73 | struct pci_pool *completion_pool; | ||
74 | struct dma_device common; | ||
75 | u8 version; | ||
76 | struct msix_entry msix_entries[4]; | ||
77 | struct ioat_chan_common *idx[4]; | ||
78 | struct dca_provider *dca; | ||
79 | void (*intr_quirk)(struct ioatdma_device *device); | ||
80 | int (*enumerate_channels)(struct ioatdma_device *device); | ||
81 | void (*cleanup_tasklet)(unsigned long data); | ||
82 | void (*timer_fn)(unsigned long data); | ||
83 | int (*self_test)(struct ioatdma_device *device); | ||
84 | }; | ||
85 | |||
86 | struct ioat_chan_common { | ||
87 | struct dma_chan common; | ||
88 | void __iomem *reg_base; | ||
89 | unsigned long last_completion; | ||
90 | spinlock_t cleanup_lock; | ||
91 | dma_cookie_t completed_cookie; | ||
92 | unsigned long state; | ||
93 | #define IOAT_COMPLETION_PENDING 0 | ||
94 | #define IOAT_COMPLETION_ACK 1 | ||
95 | #define IOAT_RESET_PENDING 2 | ||
96 | #define IOAT_KOBJ_INIT_FAIL 3 | ||
97 | struct timer_list timer; | ||
98 | #define COMPLETION_TIMEOUT msecs_to_jiffies(100) | ||
99 | #define IDLE_TIMEOUT msecs_to_jiffies(2000) | ||
100 | #define RESET_DELAY msecs_to_jiffies(100) | ||
101 | struct ioatdma_device *device; | ||
102 | dma_addr_t completion_dma; | ||
103 | u64 *completion; | ||
104 | struct tasklet_struct cleanup_task; | ||
105 | struct kobject kobj; | ||
106 | }; | ||
107 | |||
108 | struct ioat_sysfs_entry { | ||
109 | struct attribute attr; | ||
110 | ssize_t (*show)(struct dma_chan *, char *); | ||
111 | }; | ||
112 | |||
113 | /** | ||
114 | * struct ioat_dma_chan - internal representation of a DMA channel | ||
115 | */ | ||
116 | struct ioat_dma_chan { | ||
117 | struct ioat_chan_common base; | ||
118 | |||
119 | size_t xfercap; /* XFERCAP register value expanded out */ | ||
120 | |||
121 | spinlock_t desc_lock; | ||
122 | struct list_head free_desc; | ||
123 | struct list_head used_desc; | ||
124 | |||
125 | int pending; | ||
126 | u16 desccount; | ||
127 | u16 active; | ||
128 | }; | ||
129 | |||
130 | static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) | ||
131 | { | ||
132 | return container_of(c, struct ioat_chan_common, common); | ||
133 | } | ||
134 | |||
135 | static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c) | ||
136 | { | ||
137 | struct ioat_chan_common *chan = to_chan_common(c); | ||
138 | |||
139 | return container_of(chan, struct ioat_dma_chan, base); | ||
140 | } | ||
141 | |||
142 | /** | ||
143 | * ioat_is_complete - poll the status of an ioat transaction | ||
144 | * @c: channel handle | ||
145 | * @cookie: transaction identifier | ||
146 | * @done: if set, updated with last completed transaction | ||
147 | * @used: if set, updated with last used transaction | ||
148 | */ | ||
149 | static inline enum dma_status | ||
150 | ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie, | ||
151 | dma_cookie_t *done, dma_cookie_t *used) | ||
152 | { | ||
153 | struct ioat_chan_common *chan = to_chan_common(c); | ||
154 | dma_cookie_t last_used; | ||
155 | dma_cookie_t last_complete; | ||
156 | |||
157 | last_used = c->cookie; | ||
158 | last_complete = chan->completed_cookie; | ||
159 | |||
160 | if (done) | ||
161 | *done = last_complete; | ||
162 | if (used) | ||
163 | *used = last_used; | ||
164 | |||
165 | return dma_async_is_complete(cookie, last_complete, last_used); | ||
166 | } | ||
167 | |||
168 | /* wrapper around hardware descriptor format + additional software fields */ | ||
169 | |||
170 | /** | ||
171 | * struct ioat_desc_sw - wrapper around hardware descriptor | ||
172 | * @hw: hardware DMA descriptor (for memcpy) | ||
173 | * @node: this descriptor will either be on the free list, | ||
174 | * or attached to a transaction list (tx_list) | ||
175 | * @txd: the generic software descriptor for all engines | ||
176 | * @id: identifier for debug | ||
177 | */ | ||
178 | struct ioat_desc_sw { | ||
179 | struct ioat_dma_descriptor *hw; | ||
180 | struct list_head node; | ||
181 | size_t len; | ||
182 | struct list_head tx_list; | ||
183 | struct dma_async_tx_descriptor txd; | ||
184 | #ifdef DEBUG | ||
185 | int id; | ||
186 | #endif | ||
187 | }; | ||
188 | |||
189 | #ifdef DEBUG | ||
190 | #define set_desc_id(desc, i) ((desc)->id = (i)) | ||
191 | #define desc_id(desc) ((desc)->id) | ||
192 | #else | ||
193 | #define set_desc_id(desc, i) | ||
194 | #define desc_id(desc) (0) | ||
195 | #endif | ||
196 | |||
197 | static inline void | ||
198 | __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw, | ||
199 | struct dma_async_tx_descriptor *tx, int id) | ||
200 | { | ||
201 | struct device *dev = to_dev(chan); | ||
202 | |||
203 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x" | ||
204 | " ctl: %#x (op: %d int_en: %d compl: %d)\n", id, | ||
205 | (unsigned long long) tx->phys, | ||
206 | (unsigned long long) hw->next, tx->cookie, tx->flags, | ||
207 | hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write); | ||
208 | } | ||
209 | |||
210 | #define dump_desc_dbg(c, d) \ | ||
211 | ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; }) | ||
212 | |||
213 | static inline void ioat_set_tcp_copy_break(unsigned long copybreak) | ||
214 | { | ||
215 | #ifdef CONFIG_NET_DMA | ||
216 | sysctl_tcp_dma_copybreak = copybreak; | ||
217 | #endif | ||
218 | } | ||
219 | |||
220 | static inline struct ioat_chan_common * | ||
221 | ioat_chan_by_index(struct ioatdma_device *device, int index) | ||
222 | { | ||
223 | return device->idx[index]; | ||
224 | } | ||
225 | |||
226 | static inline u64 ioat_chansts(struct ioat_chan_common *chan) | ||
227 | { | ||
228 | u8 ver = chan->device->version; | ||
229 | u64 status; | ||
230 | u32 status_lo; | ||
231 | |||
232 | /* We need to read the low address first as this causes the | ||
233 | * chipset to latch the upper bits for the subsequent read | ||
234 | */ | ||
235 | status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver)); | ||
236 | status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver)); | ||
237 | status <<= 32; | ||
238 | status |= status_lo; | ||
239 | |||
240 | return status; | ||
241 | } | ||
242 | |||
243 | static inline void ioat_start(struct ioat_chan_common *chan) | ||
244 | { | ||
245 | u8 ver = chan->device->version; | ||
246 | |||
247 | writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); | ||
248 | } | ||
249 | |||
250 | static inline u64 ioat_chansts_to_addr(u64 status) | ||
251 | { | ||
252 | return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | ||
253 | } | ||
254 | |||
255 | static inline u32 ioat_chanerr(struct ioat_chan_common *chan) | ||
256 | { | ||
257 | return readl(chan->reg_base + IOAT_CHANERR_OFFSET); | ||
258 | } | ||
259 | |||
260 | static inline void ioat_suspend(struct ioat_chan_common *chan) | ||
261 | { | ||
262 | u8 ver = chan->device->version; | ||
263 | |||
264 | writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); | ||
265 | } | ||
266 | |||
267 | static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) | ||
268 | { | ||
269 | struct ioat_chan_common *chan = &ioat->base; | ||
270 | |||
271 | writel(addr & 0x00000000FFFFFFFF, | ||
272 | chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); | ||
273 | writel(addr >> 32, | ||
274 | chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); | ||
275 | } | ||
276 | |||
277 | static inline bool is_ioat_active(unsigned long status) | ||
278 | { | ||
279 | return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); | ||
280 | } | ||
281 | |||
282 | static inline bool is_ioat_idle(unsigned long status) | ||
283 | { | ||
284 | return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE); | ||
285 | } | ||
286 | |||
287 | static inline bool is_ioat_halted(unsigned long status) | ||
288 | { | ||
289 | return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); | ||
290 | } | ||
291 | |||
292 | static inline bool is_ioat_suspended(unsigned long status) | ||
293 | { | ||
294 | return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); | ||
295 | } | ||
296 | |||
297 | /* channel was fatally programmed */ | ||
298 | static inline bool is_ioat_bug(unsigned long err) | ||
299 | { | ||
300 | return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR| | ||
301 | IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR| | ||
302 | IOAT_CHANERR_LENGTH_ERR)); | ||
303 | } | ||
304 | |||
305 | static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, | ||
306 | int direction, enum dma_ctrl_flags flags, bool dst) | ||
307 | { | ||
308 | if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || | ||
309 | (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) | ||
310 | pci_unmap_single(pdev, addr, len, direction); | ||
311 | else | ||
312 | pci_unmap_page(pdev, addr, len, direction); | ||
313 | } | ||
314 | |||
315 | int __devinit ioat_probe(struct ioatdma_device *device); | ||
316 | int __devinit ioat_register(struct ioatdma_device *device); | ||
317 | int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca); | ||
318 | int __devinit ioat_dma_self_test(struct ioatdma_device *device); | ||
319 | void __devexit ioat_dma_remove(struct ioatdma_device *device); | ||
320 | struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev, | ||
321 | void __iomem *iobase); | ||
322 | unsigned long ioat_get_current_completion(struct ioat_chan_common *chan); | ||
323 | void ioat_init_channel(struct ioatdma_device *device, | ||
324 | struct ioat_chan_common *chan, int idx, | ||
325 | void (*timer_fn)(unsigned long), | ||
326 | void (*tasklet)(unsigned long), | ||
327 | unsigned long ioat); | ||
328 | void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, | ||
329 | size_t len, struct ioat_dma_descriptor *hw); | ||
330 | bool ioat_cleanup_preamble(struct ioat_chan_common *chan, | ||
331 | unsigned long *phys_complete); | ||
332 | void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); | ||
333 | void ioat_kobject_del(struct ioatdma_device *device); | ||
334 | extern struct sysfs_ops ioat_sysfs_ops; | ||
335 | extern struct ioat_sysfs_entry ioat_version_attr; | ||
336 | extern struct ioat_sysfs_entry ioat_cap_attr; | ||
337 | #endif /* IOATDMA_H */ | ||
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c new file mode 100644 index 000000000000..96ffab7d37a7 --- /dev/null +++ b/drivers/dma/ioat/dma_v2.c | |||
@@ -0,0 +1,871 @@ | |||
1 | /* | ||
2 | * Intel I/OAT DMA Linux driver | ||
3 | * Copyright(c) 2004 - 2009 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in | ||
19 | * the file called "COPYING". | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * This driver supports an Intel I/OAT DMA engine (versions >= 2), which | ||
25 | * does asynchronous data movement and checksumming operations. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/pci.h> | ||
31 | #include <linux/interrupt.h> | ||
32 | #include <linux/dmaengine.h> | ||
33 | #include <linux/delay.h> | ||
34 | #include <linux/dma-mapping.h> | ||
35 | #include <linux/workqueue.h> | ||
36 | #include <linux/i7300_idle.h> | ||
37 | #include "dma.h" | ||
38 | #include "dma_v2.h" | ||
39 | #include "registers.h" | ||
40 | #include "hw.h" | ||
41 | |||
42 | int ioat_ring_alloc_order = 8; | ||
43 | module_param(ioat_ring_alloc_order, int, 0644); | ||
44 | MODULE_PARM_DESC(ioat_ring_alloc_order, | ||
45 | "ioat2+: allocate 2^n descriptors per channel" | ||
46 | " (default: 8 max: 16)"); | ||
47 | static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; | ||
48 | module_param(ioat_ring_max_alloc_order, int, 0644); | ||
49 | MODULE_PARM_DESC(ioat_ring_max_alloc_order, | ||
50 | "ioat2+: upper limit for ring size (default: 16)"); | ||
51 | |||
52 | void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) | ||
53 | { | ||
54 | void * __iomem reg_base = ioat->base.reg_base; | ||
55 | |||
56 | ioat->pending = 0; | ||
57 | ioat->dmacount += ioat2_ring_pending(ioat); | ||
58 | ioat->issued = ioat->head; | ||
59 | /* make descriptor updates globally visible before notifying channel */ | ||
60 | wmb(); | ||
61 | writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET); | ||
62 | dev_dbg(to_dev(&ioat->base), | ||
63 | "%s: head: %#x tail: %#x issued: %#x count: %#x\n", | ||
64 | __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); | ||
65 | } | ||
66 | |||
67 | void ioat2_issue_pending(struct dma_chan *chan) | ||
68 | { | ||
69 | struct ioat2_dma_chan *ioat = to_ioat2_chan(chan); | ||
70 | |||
71 | spin_lock_bh(&ioat->ring_lock); | ||
72 | if (ioat->pending == 1) | ||
73 | __ioat2_issue_pending(ioat); | ||
74 | spin_unlock_bh(&ioat->ring_lock); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * ioat2_update_pending - log pending descriptors | ||
79 | * @ioat: ioat2+ channel | ||
80 | * | ||
81 | * set pending to '1' unless pending is already set to '2', pending == 2 | ||
82 | * indicates that submission is temporarily blocked due to an in-flight | ||
83 | * reset. If we are already above the ioat_pending_level threshold then | ||
84 | * just issue pending. | ||
85 | * | ||
86 | * called with ring_lock held | ||
87 | */ | ||
88 | static void ioat2_update_pending(struct ioat2_dma_chan *ioat) | ||
89 | { | ||
90 | if (unlikely(ioat->pending == 2)) | ||
91 | return; | ||
92 | else if (ioat2_ring_pending(ioat) > ioat_pending_level) | ||
93 | __ioat2_issue_pending(ioat); | ||
94 | else | ||
95 | ioat->pending = 1; | ||
96 | } | ||
97 | |||
98 | static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat) | ||
99 | { | ||
100 | struct ioat_ring_ent *desc; | ||
101 | struct ioat_dma_descriptor *hw; | ||
102 | int idx; | ||
103 | |||
104 | if (ioat2_ring_space(ioat) < 1) { | ||
105 | dev_err(to_dev(&ioat->base), | ||
106 | "Unable to start null desc - ring full\n"); | ||
107 | return; | ||
108 | } | ||
109 | |||
110 | dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n", | ||
111 | __func__, ioat->head, ioat->tail, ioat->issued); | ||
112 | idx = ioat2_desc_alloc(ioat, 1); | ||
113 | desc = ioat2_get_ring_ent(ioat, idx); | ||
114 | |||
115 | hw = desc->hw; | ||
116 | hw->ctl = 0; | ||
117 | hw->ctl_f.null = 1; | ||
118 | hw->ctl_f.int_en = 1; | ||
119 | hw->ctl_f.compl_write = 1; | ||
120 | /* set size to non-zero value (channel returns error when size is 0) */ | ||
121 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
122 | hw->src_addr = 0; | ||
123 | hw->dst_addr = 0; | ||
124 | async_tx_ack(&desc->txd); | ||
125 | ioat2_set_chainaddr(ioat, desc->txd.phys); | ||
126 | dump_desc_dbg(ioat, desc); | ||
127 | __ioat2_issue_pending(ioat); | ||
128 | } | ||
129 | |||
130 | static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat) | ||
131 | { | ||
132 | spin_lock_bh(&ioat->ring_lock); | ||
133 | __ioat2_start_null_desc(ioat); | ||
134 | spin_unlock_bh(&ioat->ring_lock); | ||
135 | } | ||
136 | |||
137 | static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | ||
138 | { | ||
139 | struct ioat_chan_common *chan = &ioat->base; | ||
140 | struct dma_async_tx_descriptor *tx; | ||
141 | struct ioat_ring_ent *desc; | ||
142 | bool seen_current = false; | ||
143 | u16 active; | ||
144 | int i; | ||
145 | |||
146 | dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", | ||
147 | __func__, ioat->head, ioat->tail, ioat->issued); | ||
148 | |||
149 | active = ioat2_ring_active(ioat); | ||
150 | for (i = 0; i < active && !seen_current; i++) { | ||
151 | prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); | ||
152 | desc = ioat2_get_ring_ent(ioat, ioat->tail + i); | ||
153 | tx = &desc->txd; | ||
154 | dump_desc_dbg(ioat, desc); | ||
155 | if (tx->cookie) { | ||
156 | ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); | ||
157 | chan->completed_cookie = tx->cookie; | ||
158 | tx->cookie = 0; | ||
159 | if (tx->callback) { | ||
160 | tx->callback(tx->callback_param); | ||
161 | tx->callback = NULL; | ||
162 | } | ||
163 | } | ||
164 | |||
165 | if (tx->phys == phys_complete) | ||
166 | seen_current = true; | ||
167 | } | ||
168 | ioat->tail += i; | ||
169 | BUG_ON(!seen_current); /* no active descs have written a completion? */ | ||
170 | |||
171 | chan->last_completion = phys_complete; | ||
172 | if (ioat->head == ioat->tail) { | ||
173 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", | ||
174 | __func__); | ||
175 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); | ||
176 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
177 | } | ||
178 | } | ||
179 | |||
180 | /** | ||
181 | * ioat2_cleanup - clean finished descriptors (advance tail pointer) | ||
182 | * @chan: ioat channel to be cleaned up | ||
183 | */ | ||
184 | static void ioat2_cleanup(struct ioat2_dma_chan *ioat) | ||
185 | { | ||
186 | struct ioat_chan_common *chan = &ioat->base; | ||
187 | unsigned long phys_complete; | ||
188 | |||
189 | prefetch(chan->completion); | ||
190 | |||
191 | if (!spin_trylock_bh(&chan->cleanup_lock)) | ||
192 | return; | ||
193 | |||
194 | if (!ioat_cleanup_preamble(chan, &phys_complete)) { | ||
195 | spin_unlock_bh(&chan->cleanup_lock); | ||
196 | return; | ||
197 | } | ||
198 | |||
199 | if (!spin_trylock_bh(&ioat->ring_lock)) { | ||
200 | spin_unlock_bh(&chan->cleanup_lock); | ||
201 | return; | ||
202 | } | ||
203 | |||
204 | __cleanup(ioat, phys_complete); | ||
205 | |||
206 | spin_unlock_bh(&ioat->ring_lock); | ||
207 | spin_unlock_bh(&chan->cleanup_lock); | ||
208 | } | ||
209 | |||
210 | void ioat2_cleanup_tasklet(unsigned long data) | ||
211 | { | ||
212 | struct ioat2_dma_chan *ioat = (void *) data; | ||
213 | |||
214 | ioat2_cleanup(ioat); | ||
215 | writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); | ||
216 | } | ||
217 | |||
218 | void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) | ||
219 | { | ||
220 | struct ioat_chan_common *chan = &ioat->base; | ||
221 | |||
222 | /* set the tail to be re-issued */ | ||
223 | ioat->issued = ioat->tail; | ||
224 | ioat->dmacount = 0; | ||
225 | set_bit(IOAT_COMPLETION_PENDING, &chan->state); | ||
226 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
227 | |||
228 | dev_dbg(to_dev(chan), | ||
229 | "%s: head: %#x tail: %#x issued: %#x count: %#x\n", | ||
230 | __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount); | ||
231 | |||
232 | if (ioat2_ring_pending(ioat)) { | ||
233 | struct ioat_ring_ent *desc; | ||
234 | |||
235 | desc = ioat2_get_ring_ent(ioat, ioat->tail); | ||
236 | ioat2_set_chainaddr(ioat, desc->txd.phys); | ||
237 | __ioat2_issue_pending(ioat); | ||
238 | } else | ||
239 | __ioat2_start_null_desc(ioat); | ||
240 | } | ||
241 | |||
242 | static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) | ||
243 | { | ||
244 | struct ioat_chan_common *chan = &ioat->base; | ||
245 | unsigned long phys_complete; | ||
246 | u32 status; | ||
247 | |||
248 | status = ioat_chansts(chan); | ||
249 | if (is_ioat_active(status) || is_ioat_idle(status)) | ||
250 | ioat_suspend(chan); | ||
251 | while (is_ioat_active(status) || is_ioat_idle(status)) { | ||
252 | status = ioat_chansts(chan); | ||
253 | cpu_relax(); | ||
254 | } | ||
255 | |||
256 | if (ioat_cleanup_preamble(chan, &phys_complete)) | ||
257 | __cleanup(ioat, phys_complete); | ||
258 | |||
259 | __ioat2_restart_chan(ioat); | ||
260 | } | ||
261 | |||
262 | void ioat2_timer_event(unsigned long data) | ||
263 | { | ||
264 | struct ioat2_dma_chan *ioat = (void *) data; | ||
265 | struct ioat_chan_common *chan = &ioat->base; | ||
266 | |||
267 | spin_lock_bh(&chan->cleanup_lock); | ||
268 | if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { | ||
269 | unsigned long phys_complete; | ||
270 | u64 status; | ||
271 | |||
272 | spin_lock_bh(&ioat->ring_lock); | ||
273 | status = ioat_chansts(chan); | ||
274 | |||
275 | /* when halted due to errors check for channel | ||
276 | * programming errors before advancing the completion state | ||
277 | */ | ||
278 | if (is_ioat_halted(status)) { | ||
279 | u32 chanerr; | ||
280 | |||
281 | chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); | ||
282 | BUG_ON(is_ioat_bug(chanerr)); | ||
283 | } | ||
284 | |||
285 | /* if we haven't made progress and we have already | ||
286 | * acknowledged a pending completion once, then be more | ||
287 | * forceful with a restart | ||
288 | */ | ||
289 | if (ioat_cleanup_preamble(chan, &phys_complete)) | ||
290 | __cleanup(ioat, phys_complete); | ||
291 | else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) | ||
292 | ioat2_restart_channel(ioat); | ||
293 | else { | ||
294 | set_bit(IOAT_COMPLETION_ACK, &chan->state); | ||
295 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
296 | } | ||
297 | spin_unlock_bh(&ioat->ring_lock); | ||
298 | } else { | ||
299 | u16 active; | ||
300 | |||
301 | /* if the ring is idle, empty, and oversized try to step | ||
302 | * down the size | ||
303 | */ | ||
304 | spin_lock_bh(&ioat->ring_lock); | ||
305 | active = ioat2_ring_active(ioat); | ||
306 | if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) | ||
307 | reshape_ring(ioat, ioat->alloc_order-1); | ||
308 | spin_unlock_bh(&ioat->ring_lock); | ||
309 | |||
310 | /* keep shrinking until we get back to our minimum | ||
311 | * default size | ||
312 | */ | ||
313 | if (ioat->alloc_order > ioat_get_alloc_order()) | ||
314 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
315 | } | ||
316 | spin_unlock_bh(&chan->cleanup_lock); | ||
317 | } | ||
318 | |||
319 | /** | ||
320 | * ioat2_enumerate_channels - find and initialize the device's channels | ||
321 | * @device: the device to be enumerated | ||
322 | */ | ||
323 | int ioat2_enumerate_channels(struct ioatdma_device *device) | ||
324 | { | ||
325 | struct ioat2_dma_chan *ioat; | ||
326 | struct device *dev = &device->pdev->dev; | ||
327 | struct dma_device *dma = &device->common; | ||
328 | u8 xfercap_log; | ||
329 | int i; | ||
330 | |||
331 | INIT_LIST_HEAD(&dma->channels); | ||
332 | dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); | ||
333 | dma->chancnt &= 0x1f; /* bits [4:0] valid */ | ||
334 | if (dma->chancnt > ARRAY_SIZE(device->idx)) { | ||
335 | dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n", | ||
336 | dma->chancnt, ARRAY_SIZE(device->idx)); | ||
337 | dma->chancnt = ARRAY_SIZE(device->idx); | ||
338 | } | ||
339 | xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET); | ||
340 | xfercap_log &= 0x1f; /* bits [4:0] valid */ | ||
341 | if (xfercap_log == 0) | ||
342 | return 0; | ||
343 | dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); | ||
344 | |||
345 | /* FIXME which i/oat version is i7300? */ | ||
346 | #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL | ||
347 | if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) | ||
348 | dma->chancnt--; | ||
349 | #endif | ||
350 | for (i = 0; i < dma->chancnt; i++) { | ||
351 | ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL); | ||
352 | if (!ioat) | ||
353 | break; | ||
354 | |||
355 | ioat_init_channel(device, &ioat->base, i, | ||
356 | device->timer_fn, | ||
357 | device->cleanup_tasklet, | ||
358 | (unsigned long) ioat); | ||
359 | ioat->xfercap_log = xfercap_log; | ||
360 | spin_lock_init(&ioat->ring_lock); | ||
361 | } | ||
362 | dma->chancnt = i; | ||
363 | return i; | ||
364 | } | ||
365 | |||
366 | static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) | ||
367 | { | ||
368 | struct dma_chan *c = tx->chan; | ||
369 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
370 | struct ioat_chan_common *chan = &ioat->base; | ||
371 | dma_cookie_t cookie = c->cookie; | ||
372 | |||
373 | cookie++; | ||
374 | if (cookie < 0) | ||
375 | cookie = 1; | ||
376 | tx->cookie = cookie; | ||
377 | c->cookie = cookie; | ||
378 | dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie); | ||
379 | |||
380 | if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state)) | ||
381 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
382 | ioat2_update_pending(ioat); | ||
383 | spin_unlock_bh(&ioat->ring_lock); | ||
384 | |||
385 | return cookie; | ||
386 | } | ||
387 | |||
388 | static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) | ||
389 | { | ||
390 | struct ioat_dma_descriptor *hw; | ||
391 | struct ioat_ring_ent *desc; | ||
392 | struct ioatdma_device *dma; | ||
393 | dma_addr_t phys; | ||
394 | |||
395 | dma = to_ioatdma_device(chan->device); | ||
396 | hw = pci_pool_alloc(dma->dma_pool, flags, &phys); | ||
397 | if (!hw) | ||
398 | return NULL; | ||
399 | memset(hw, 0, sizeof(*hw)); | ||
400 | |||
401 | desc = kmem_cache_alloc(ioat2_cache, flags); | ||
402 | if (!desc) { | ||
403 | pci_pool_free(dma->dma_pool, hw, phys); | ||
404 | return NULL; | ||
405 | } | ||
406 | memset(desc, 0, sizeof(*desc)); | ||
407 | |||
408 | dma_async_tx_descriptor_init(&desc->txd, chan); | ||
409 | desc->txd.tx_submit = ioat2_tx_submit_unlock; | ||
410 | desc->hw = hw; | ||
411 | desc->txd.phys = phys; | ||
412 | return desc; | ||
413 | } | ||
414 | |||
415 | static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan) | ||
416 | { | ||
417 | struct ioatdma_device *dma; | ||
418 | |||
419 | dma = to_ioatdma_device(chan->device); | ||
420 | pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys); | ||
421 | kmem_cache_free(ioat2_cache, desc); | ||
422 | } | ||
423 | |||
424 | static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) | ||
425 | { | ||
426 | struct ioat_ring_ent **ring; | ||
427 | int descs = 1 << order; | ||
428 | int i; | ||
429 | |||
430 | if (order > ioat_get_max_alloc_order()) | ||
431 | return NULL; | ||
432 | |||
433 | /* allocate the array to hold the software ring */ | ||
434 | ring = kcalloc(descs, sizeof(*ring), flags); | ||
435 | if (!ring) | ||
436 | return NULL; | ||
437 | for (i = 0; i < descs; i++) { | ||
438 | ring[i] = ioat2_alloc_ring_ent(c, flags); | ||
439 | if (!ring[i]) { | ||
440 | while (i--) | ||
441 | ioat2_free_ring_ent(ring[i], c); | ||
442 | kfree(ring); | ||
443 | return NULL; | ||
444 | } | ||
445 | set_desc_id(ring[i], i); | ||
446 | } | ||
447 | |||
448 | /* link descs */ | ||
449 | for (i = 0; i < descs-1; i++) { | ||
450 | struct ioat_ring_ent *next = ring[i+1]; | ||
451 | struct ioat_dma_descriptor *hw = ring[i]->hw; | ||
452 | |||
453 | hw->next = next->txd.phys; | ||
454 | } | ||
455 | ring[i]->hw->next = ring[0]->txd.phys; | ||
456 | |||
457 | return ring; | ||
458 | } | ||
459 | |||
460 | /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring | ||
461 | * @chan: channel to be initialized | ||
462 | */ | ||
463 | int ioat2_alloc_chan_resources(struct dma_chan *c) | ||
464 | { | ||
465 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
466 | struct ioat_chan_common *chan = &ioat->base; | ||
467 | struct ioat_ring_ent **ring; | ||
468 | u32 chanerr; | ||
469 | int order; | ||
470 | |||
471 | /* have we already been set up? */ | ||
472 | if (ioat->ring) | ||
473 | return 1 << ioat->alloc_order; | ||
474 | |||
475 | /* Setup register to interrupt and write completion status on error */ | ||
476 | writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); | ||
477 | |||
478 | chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); | ||
479 | if (chanerr) { | ||
480 | dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); | ||
481 | writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); | ||
482 | } | ||
483 | |||
484 | /* allocate a completion writeback area */ | ||
485 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ | ||
486 | chan->completion = pci_pool_alloc(chan->device->completion_pool, | ||
487 | GFP_KERNEL, &chan->completion_dma); | ||
488 | if (!chan->completion) | ||
489 | return -ENOMEM; | ||
490 | |||
491 | memset(chan->completion, 0, sizeof(*chan->completion)); | ||
492 | writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF, | ||
493 | chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); | ||
494 | writel(((u64) chan->completion_dma) >> 32, | ||
495 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | ||
496 | |||
497 | order = ioat_get_alloc_order(); | ||
498 | ring = ioat2_alloc_ring(c, order, GFP_KERNEL); | ||
499 | if (!ring) | ||
500 | return -ENOMEM; | ||
501 | |||
502 | spin_lock_bh(&ioat->ring_lock); | ||
503 | ioat->ring = ring; | ||
504 | ioat->head = 0; | ||
505 | ioat->issued = 0; | ||
506 | ioat->tail = 0; | ||
507 | ioat->pending = 0; | ||
508 | ioat->alloc_order = order; | ||
509 | spin_unlock_bh(&ioat->ring_lock); | ||
510 | |||
511 | tasklet_enable(&chan->cleanup_task); | ||
512 | ioat2_start_null_desc(ioat); | ||
513 | |||
514 | return 1 << ioat->alloc_order; | ||
515 | } | ||
516 | |||
517 | bool reshape_ring(struct ioat2_dma_chan *ioat, int order) | ||
518 | { | ||
519 | /* reshape differs from normal ring allocation in that we want | ||
520 | * to allocate a new software ring while only | ||
521 | * extending/truncating the hardware ring | ||
522 | */ | ||
523 | struct ioat_chan_common *chan = &ioat->base; | ||
524 | struct dma_chan *c = &chan->common; | ||
525 | const u16 curr_size = ioat2_ring_mask(ioat) + 1; | ||
526 | const u16 active = ioat2_ring_active(ioat); | ||
527 | const u16 new_size = 1 << order; | ||
528 | struct ioat_ring_ent **ring; | ||
529 | u16 i; | ||
530 | |||
531 | if (order > ioat_get_max_alloc_order()) | ||
532 | return false; | ||
533 | |||
534 | /* double check that we have at least 1 free descriptor */ | ||
535 | if (active == curr_size) | ||
536 | return false; | ||
537 | |||
538 | /* when shrinking, verify that we can hold the current active | ||
539 | * set in the new ring | ||
540 | */ | ||
541 | if (active >= new_size) | ||
542 | return false; | ||
543 | |||
544 | /* allocate the array to hold the software ring */ | ||
545 | ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); | ||
546 | if (!ring) | ||
547 | return false; | ||
548 | |||
549 | /* allocate/trim descriptors as needed */ | ||
550 | if (new_size > curr_size) { | ||
551 | /* copy current descriptors to the new ring */ | ||
552 | for (i = 0; i < curr_size; i++) { | ||
553 | u16 curr_idx = (ioat->tail+i) & (curr_size-1); | ||
554 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
555 | |||
556 | ring[new_idx] = ioat->ring[curr_idx]; | ||
557 | set_desc_id(ring[new_idx], new_idx); | ||
558 | } | ||
559 | |||
560 | /* add new descriptors to the ring */ | ||
561 | for (i = curr_size; i < new_size; i++) { | ||
562 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
563 | |||
564 | ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); | ||
565 | if (!ring[new_idx]) { | ||
566 | while (i--) { | ||
567 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
568 | |||
569 | ioat2_free_ring_ent(ring[new_idx], c); | ||
570 | } | ||
571 | kfree(ring); | ||
572 | return false; | ||
573 | } | ||
574 | set_desc_id(ring[new_idx], new_idx); | ||
575 | } | ||
576 | |||
577 | /* hw link new descriptors */ | ||
578 | for (i = curr_size-1; i < new_size; i++) { | ||
579 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
580 | struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; | ||
581 | struct ioat_dma_descriptor *hw = ring[new_idx]->hw; | ||
582 | |||
583 | hw->next = next->txd.phys; | ||
584 | } | ||
585 | } else { | ||
586 | struct ioat_dma_descriptor *hw; | ||
587 | struct ioat_ring_ent *next; | ||
588 | |||
589 | /* copy current descriptors to the new ring, dropping the | ||
590 | * removed descriptors | ||
591 | */ | ||
592 | for (i = 0; i < new_size; i++) { | ||
593 | u16 curr_idx = (ioat->tail+i) & (curr_size-1); | ||
594 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
595 | |||
596 | ring[new_idx] = ioat->ring[curr_idx]; | ||
597 | set_desc_id(ring[new_idx], new_idx); | ||
598 | } | ||
599 | |||
600 | /* free deleted descriptors */ | ||
601 | for (i = new_size; i < curr_size; i++) { | ||
602 | struct ioat_ring_ent *ent; | ||
603 | |||
604 | ent = ioat2_get_ring_ent(ioat, ioat->tail+i); | ||
605 | ioat2_free_ring_ent(ent, c); | ||
606 | } | ||
607 | |||
608 | /* fix up hardware ring */ | ||
609 | hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; | ||
610 | next = ring[(ioat->tail+new_size) & (new_size-1)]; | ||
611 | hw->next = next->txd.phys; | ||
612 | } | ||
613 | |||
614 | dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", | ||
615 | __func__, new_size); | ||
616 | |||
617 | kfree(ioat->ring); | ||
618 | ioat->ring = ring; | ||
619 | ioat->alloc_order = order; | ||
620 | |||
621 | return true; | ||
622 | } | ||
623 | |||
624 | /** | ||
625 | * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops | ||
626 | * @idx: gets starting descriptor index on successful allocation | ||
627 | * @ioat: ioat2,3 channel (ring) to operate on | ||
628 | * @num_descs: allocation length | ||
629 | */ | ||
630 | int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs) | ||
631 | { | ||
632 | struct ioat_chan_common *chan = &ioat->base; | ||
633 | |||
634 | spin_lock_bh(&ioat->ring_lock); | ||
635 | /* never allow the last descriptor to be consumed, we need at | ||
636 | * least one free at all times to allow for on-the-fly ring | ||
637 | * resizing. | ||
638 | */ | ||
639 | while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { | ||
640 | if (reshape_ring(ioat, ioat->alloc_order + 1) && | ||
641 | ioat2_ring_space(ioat) > num_descs) | ||
642 | break; | ||
643 | |||
644 | if (printk_ratelimit()) | ||
645 | dev_dbg(to_dev(chan), | ||
646 | "%s: ring full! num_descs: %d (%x:%x:%x)\n", | ||
647 | __func__, num_descs, ioat->head, ioat->tail, | ||
648 | ioat->issued); | ||
649 | spin_unlock_bh(&ioat->ring_lock); | ||
650 | |||
651 | /* progress reclaim in the allocation failure case we | ||
652 | * may be called under bh_disabled so we need to trigger | ||
653 | * the timer event directly | ||
654 | */ | ||
655 | spin_lock_bh(&chan->cleanup_lock); | ||
656 | if (jiffies > chan->timer.expires && | ||
657 | timer_pending(&chan->timer)) { | ||
658 | struct ioatdma_device *device = chan->device; | ||
659 | |||
660 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
661 | spin_unlock_bh(&chan->cleanup_lock); | ||
662 | device->timer_fn((unsigned long) ioat); | ||
663 | } else | ||
664 | spin_unlock_bh(&chan->cleanup_lock); | ||
665 | return -ENOMEM; | ||
666 | } | ||
667 | |||
668 | dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n", | ||
669 | __func__, num_descs, ioat->head, ioat->tail, ioat->issued); | ||
670 | |||
671 | *idx = ioat2_desc_alloc(ioat, num_descs); | ||
672 | return 0; /* with ioat->ring_lock held */ | ||
673 | } | ||
674 | |||
675 | struct dma_async_tx_descriptor * | ||
676 | ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, | ||
677 | dma_addr_t dma_src, size_t len, unsigned long flags) | ||
678 | { | ||
679 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
680 | struct ioat_dma_descriptor *hw; | ||
681 | struct ioat_ring_ent *desc; | ||
682 | dma_addr_t dst = dma_dest; | ||
683 | dma_addr_t src = dma_src; | ||
684 | size_t total_len = len; | ||
685 | int num_descs; | ||
686 | u16 idx; | ||
687 | int i; | ||
688 | |||
689 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
690 | if (likely(num_descs) && | ||
691 | ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) | ||
692 | /* pass */; | ||
693 | else | ||
694 | return NULL; | ||
695 | i = 0; | ||
696 | do { | ||
697 | size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
698 | |||
699 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
700 | hw = desc->hw; | ||
701 | |||
702 | hw->size = copy; | ||
703 | hw->ctl = 0; | ||
704 | hw->src_addr = src; | ||
705 | hw->dst_addr = dst; | ||
706 | |||
707 | len -= copy; | ||
708 | dst += copy; | ||
709 | src += copy; | ||
710 | dump_desc_dbg(ioat, desc); | ||
711 | } while (++i < num_descs); | ||
712 | |||
713 | desc->txd.flags = flags; | ||
714 | desc->len = total_len; | ||
715 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
716 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
717 | hw->ctl_f.compl_write = 1; | ||
718 | dump_desc_dbg(ioat, desc); | ||
719 | /* we leave the channel locked to ensure in order submission */ | ||
720 | |||
721 | return &desc->txd; | ||
722 | } | ||
723 | |||
724 | /** | ||
725 | * ioat2_free_chan_resources - release all the descriptors | ||
726 | * @chan: the channel to be cleaned | ||
727 | */ | ||
728 | void ioat2_free_chan_resources(struct dma_chan *c) | ||
729 | { | ||
730 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
731 | struct ioat_chan_common *chan = &ioat->base; | ||
732 | struct ioatdma_device *device = chan->device; | ||
733 | struct ioat_ring_ent *desc; | ||
734 | const u16 total_descs = 1 << ioat->alloc_order; | ||
735 | int descs; | ||
736 | int i; | ||
737 | |||
738 | /* Before freeing channel resources first check | ||
739 | * if they have been previously allocated for this channel. | ||
740 | */ | ||
741 | if (!ioat->ring) | ||
742 | return; | ||
743 | |||
744 | tasklet_disable(&chan->cleanup_task); | ||
745 | del_timer_sync(&chan->timer); | ||
746 | device->cleanup_tasklet((unsigned long) ioat); | ||
747 | |||
748 | /* Delay 100ms after reset to allow internal DMA logic to quiesce | ||
749 | * before removing DMA descriptor resources. | ||
750 | */ | ||
751 | writeb(IOAT_CHANCMD_RESET, | ||
752 | chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); | ||
753 | mdelay(100); | ||
754 | |||
755 | spin_lock_bh(&ioat->ring_lock); | ||
756 | descs = ioat2_ring_space(ioat); | ||
757 | dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs); | ||
758 | for (i = 0; i < descs; i++) { | ||
759 | desc = ioat2_get_ring_ent(ioat, ioat->head + i); | ||
760 | ioat2_free_ring_ent(desc, c); | ||
761 | } | ||
762 | |||
763 | if (descs < total_descs) | ||
764 | dev_err(to_dev(chan), "Freeing %d in use descriptors!\n", | ||
765 | total_descs - descs); | ||
766 | |||
767 | for (i = 0; i < total_descs - descs; i++) { | ||
768 | desc = ioat2_get_ring_ent(ioat, ioat->tail + i); | ||
769 | dump_desc_dbg(ioat, desc); | ||
770 | ioat2_free_ring_ent(desc, c); | ||
771 | } | ||
772 | |||
773 | kfree(ioat->ring); | ||
774 | ioat->ring = NULL; | ||
775 | ioat->alloc_order = 0; | ||
776 | pci_pool_free(device->completion_pool, chan->completion, | ||
777 | chan->completion_dma); | ||
778 | spin_unlock_bh(&ioat->ring_lock); | ||
779 | |||
780 | chan->last_completion = 0; | ||
781 | chan->completion_dma = 0; | ||
782 | ioat->pending = 0; | ||
783 | ioat->dmacount = 0; | ||
784 | } | ||
785 | |||
786 | enum dma_status | ||
787 | ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, | ||
788 | dma_cookie_t *done, dma_cookie_t *used) | ||
789 | { | ||
790 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
791 | struct ioatdma_device *device = ioat->base.device; | ||
792 | |||
793 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) | ||
794 | return DMA_SUCCESS; | ||
795 | |||
796 | device->cleanup_tasklet((unsigned long) ioat); | ||
797 | |||
798 | return ioat_is_complete(c, cookie, done, used); | ||
799 | } | ||
800 | |||
801 | static ssize_t ring_size_show(struct dma_chan *c, char *page) | ||
802 | { | ||
803 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
804 | |||
805 | return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1); | ||
806 | } | ||
807 | static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size); | ||
808 | |||
809 | static ssize_t ring_active_show(struct dma_chan *c, char *page) | ||
810 | { | ||
811 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
812 | |||
813 | /* ...taken outside the lock, no need to be precise */ | ||
814 | return sprintf(page, "%d\n", ioat2_ring_active(ioat)); | ||
815 | } | ||
816 | static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active); | ||
817 | |||
818 | static struct attribute *ioat2_attrs[] = { | ||
819 | &ring_size_attr.attr, | ||
820 | &ring_active_attr.attr, | ||
821 | &ioat_cap_attr.attr, | ||
822 | &ioat_version_attr.attr, | ||
823 | NULL, | ||
824 | }; | ||
825 | |||
826 | struct kobj_type ioat2_ktype = { | ||
827 | .sysfs_ops = &ioat_sysfs_ops, | ||
828 | .default_attrs = ioat2_attrs, | ||
829 | }; | ||
830 | |||
831 | int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) | ||
832 | { | ||
833 | struct pci_dev *pdev = device->pdev; | ||
834 | struct dma_device *dma; | ||
835 | struct dma_chan *c; | ||
836 | struct ioat_chan_common *chan; | ||
837 | int err; | ||
838 | |||
839 | device->enumerate_channels = ioat2_enumerate_channels; | ||
840 | device->cleanup_tasklet = ioat2_cleanup_tasklet; | ||
841 | device->timer_fn = ioat2_timer_event; | ||
842 | device->self_test = ioat_dma_self_test; | ||
843 | dma = &device->common; | ||
844 | dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; | ||
845 | dma->device_issue_pending = ioat2_issue_pending; | ||
846 | dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; | ||
847 | dma->device_free_chan_resources = ioat2_free_chan_resources; | ||
848 | dma->device_is_tx_complete = ioat2_is_complete; | ||
849 | |||
850 | err = ioat_probe(device); | ||
851 | if (err) | ||
852 | return err; | ||
853 | ioat_set_tcp_copy_break(2048); | ||
854 | |||
855 | list_for_each_entry(c, &dma->channels, device_node) { | ||
856 | chan = to_chan_common(c); | ||
857 | writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, | ||
858 | chan->reg_base + IOAT_DCACTRL_OFFSET); | ||
859 | } | ||
860 | |||
861 | err = ioat_register(device); | ||
862 | if (err) | ||
863 | return err; | ||
864 | |||
865 | ioat_kobject_add(device, &ioat2_ktype); | ||
866 | |||
867 | if (dca) | ||
868 | device->dca = ioat2_dca_init(pdev, device->reg_base); | ||
869 | |||
870 | return err; | ||
871 | } | ||
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h new file mode 100644 index 000000000000..1d849ef74d5f --- /dev/null +++ b/drivers/dma/ioat/dma_v2.h | |||
@@ -0,0 +1,190 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the Free | ||
6 | * Software Foundation; either version 2 of the License, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called COPYING. | ||
20 | */ | ||
21 | #ifndef IOATDMA_V2_H | ||
22 | #define IOATDMA_V2_H | ||
23 | |||
24 | #include <linux/dmaengine.h> | ||
25 | #include "dma.h" | ||
26 | #include "hw.h" | ||
27 | |||
28 | |||
29 | extern int ioat_pending_level; | ||
30 | extern int ioat_ring_alloc_order; | ||
31 | |||
32 | /* | ||
33 | * workaround for IOAT ver.3.0 null descriptor issue | ||
34 | * (channel returns error when size is 0) | ||
35 | */ | ||
36 | #define NULL_DESC_BUFFER_SIZE 1 | ||
37 | |||
38 | #define IOAT_MAX_ORDER 16 | ||
39 | #define ioat_get_alloc_order() \ | ||
40 | (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) | ||
41 | #define ioat_get_max_alloc_order() \ | ||
42 | (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) | ||
43 | |||
44 | /* struct ioat2_dma_chan - ioat v2 / v3 channel attributes | ||
45 | * @base: common ioat channel parameters | ||
46 | * @xfercap_log; log2 of channel max transfer length (for fast division) | ||
47 | * @head: allocated index | ||
48 | * @issued: hardware notification point | ||
49 | * @tail: cleanup index | ||
50 | * @pending: lock free indicator for issued != head | ||
51 | * @dmacount: identical to 'head' except for occasionally resetting to zero | ||
52 | * @alloc_order: log2 of the number of allocated descriptors | ||
53 | * @ring: software ring buffer implementation of hardware ring | ||
54 | * @ring_lock: protects ring attributes | ||
55 | */ | ||
56 | struct ioat2_dma_chan { | ||
57 | struct ioat_chan_common base; | ||
58 | size_t xfercap_log; | ||
59 | u16 head; | ||
60 | u16 issued; | ||
61 | u16 tail; | ||
62 | u16 dmacount; | ||
63 | u16 alloc_order; | ||
64 | int pending; | ||
65 | struct ioat_ring_ent **ring; | ||
66 | spinlock_t ring_lock; | ||
67 | }; | ||
68 | |||
69 | static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c) | ||
70 | { | ||
71 | struct ioat_chan_common *chan = to_chan_common(c); | ||
72 | |||
73 | return container_of(chan, struct ioat2_dma_chan, base); | ||
74 | } | ||
75 | |||
76 | static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat) | ||
77 | { | ||
78 | return (1 << ioat->alloc_order) - 1; | ||
79 | } | ||
80 | |||
81 | /* count of descriptors in flight with the engine */ | ||
82 | static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat) | ||
83 | { | ||
84 | return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat); | ||
85 | } | ||
86 | |||
87 | /* count of descriptors pending submission to hardware */ | ||
88 | static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat) | ||
89 | { | ||
90 | return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat); | ||
91 | } | ||
92 | |||
93 | static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat) | ||
94 | { | ||
95 | u16 num_descs = ioat2_ring_mask(ioat) + 1; | ||
96 | u16 active = ioat2_ring_active(ioat); | ||
97 | |||
98 | BUG_ON(active > num_descs); | ||
99 | |||
100 | return num_descs - active; | ||
101 | } | ||
102 | |||
103 | /* assumes caller already checked space */ | ||
104 | static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len) | ||
105 | { | ||
106 | ioat->head += len; | ||
107 | return ioat->head - len; | ||
108 | } | ||
109 | |||
110 | static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len) | ||
111 | { | ||
112 | u16 num_descs = len >> ioat->xfercap_log; | ||
113 | |||
114 | num_descs += !!(len & ((1 << ioat->xfercap_log) - 1)); | ||
115 | return num_descs; | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * struct ioat_ring_ent - wrapper around hardware descriptor | ||
120 | * @hw: hardware DMA descriptor (for memcpy) | ||
121 | * @fill: hardware fill descriptor | ||
122 | * @xor: hardware xor descriptor | ||
123 | * @xor_ex: hardware xor extension descriptor | ||
124 | * @pq: hardware pq descriptor | ||
125 | * @pq_ex: hardware pq extension descriptor | ||
126 | * @pqu: hardware pq update descriptor | ||
127 | * @raw: hardware raw (un-typed) descriptor | ||
128 | * @txd: the generic software descriptor for all engines | ||
129 | * @len: total transaction length for unmap | ||
130 | * @result: asynchronous result of validate operations | ||
131 | * @id: identifier for debug | ||
132 | */ | ||
133 | |||
134 | struct ioat_ring_ent { | ||
135 | union { | ||
136 | struct ioat_dma_descriptor *hw; | ||
137 | struct ioat_fill_descriptor *fill; | ||
138 | struct ioat_xor_descriptor *xor; | ||
139 | struct ioat_xor_ext_descriptor *xor_ex; | ||
140 | struct ioat_pq_descriptor *pq; | ||
141 | struct ioat_pq_ext_descriptor *pq_ex; | ||
142 | struct ioat_pq_update_descriptor *pqu; | ||
143 | struct ioat_raw_descriptor *raw; | ||
144 | }; | ||
145 | size_t len; | ||
146 | struct dma_async_tx_descriptor txd; | ||
147 | enum sum_check_flags *result; | ||
148 | #ifdef DEBUG | ||
149 | int id; | ||
150 | #endif | ||
151 | }; | ||
152 | |||
153 | static inline struct ioat_ring_ent * | ||
154 | ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx) | ||
155 | { | ||
156 | return ioat->ring[idx & ioat2_ring_mask(ioat)]; | ||
157 | } | ||
158 | |||
159 | static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) | ||
160 | { | ||
161 | struct ioat_chan_common *chan = &ioat->base; | ||
162 | |||
163 | writel(addr & 0x00000000FFFFFFFF, | ||
164 | chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); | ||
165 | writel(addr >> 32, | ||
166 | chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); | ||
167 | } | ||
168 | |||
169 | int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca); | ||
170 | int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca); | ||
171 | struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); | ||
172 | struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); | ||
173 | int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs); | ||
174 | int ioat2_enumerate_channels(struct ioatdma_device *device); | ||
175 | struct dma_async_tx_descriptor * | ||
176 | ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, | ||
177 | dma_addr_t dma_src, size_t len, unsigned long flags); | ||
178 | void ioat2_issue_pending(struct dma_chan *chan); | ||
179 | int ioat2_alloc_chan_resources(struct dma_chan *c); | ||
180 | void ioat2_free_chan_resources(struct dma_chan *c); | ||
181 | enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, | ||
182 | dma_cookie_t *done, dma_cookie_t *used); | ||
183 | void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); | ||
184 | bool reshape_ring(struct ioat2_dma_chan *ioat, int order); | ||
185 | void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); | ||
186 | void ioat2_cleanup_tasklet(unsigned long data); | ||
187 | void ioat2_timer_event(unsigned long data); | ||
188 | extern struct kobj_type ioat2_ktype; | ||
189 | extern struct kmem_cache *ioat2_cache; | ||
190 | #endif /* IOATDMA_V2_H */ | ||
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c new file mode 100644 index 000000000000..35d1e33afd5b --- /dev/null +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -0,0 +1,1223 @@ | |||
1 | /* | ||
2 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
3 | * redistributing this file, you may do so under either license. | ||
4 | * | ||
5 | * GPL LICENSE SUMMARY | ||
6 | * | ||
7 | * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms and conditions of the GNU General Public License, | ||
11 | * version 2, as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., | ||
20 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | * The full GNU General Public License is included in this distribution in | ||
23 | * the file called "COPYING". | ||
24 | * | ||
25 | * BSD LICENSE | ||
26 | * | ||
27 | * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. | ||
28 | * | ||
29 | * Redistribution and use in source and binary forms, with or without | ||
30 | * modification, are permitted provided that the following conditions are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
43 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
44 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
45 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
46 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
47 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
48 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
49 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
50 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
51 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
52 | * POSSIBILITY OF SUCH DAMAGE. | ||
53 | */ | ||
54 | |||
55 | /* | ||
56 | * Support routines for v3+ hardware | ||
57 | */ | ||
58 | |||
59 | #include <linux/pci.h> | ||
60 | #include <linux/dmaengine.h> | ||
61 | #include <linux/dma-mapping.h> | ||
62 | #include "registers.h" | ||
63 | #include "hw.h" | ||
64 | #include "dma.h" | ||
65 | #include "dma_v2.h" | ||
66 | |||
67 | /* ioat hardware assumes at least two sources for raid operations */ | ||
68 | #define src_cnt_to_sw(x) ((x) + 2) | ||
69 | #define src_cnt_to_hw(x) ((x) - 2) | ||
70 | |||
71 | /* provide a lookup table for setting the source address in the base or | ||
72 | * extended descriptor of an xor or pq descriptor | ||
73 | */ | ||
74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; | ||
75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; | ||
76 | static const u8 pq_idx_to_desc __read_mostly = 0xf8; | ||
77 | static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; | ||
78 | |||
79 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
80 | { | ||
81 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
82 | |||
83 | return raw->field[xor_idx_to_field[idx]]; | ||
84 | } | ||
85 | |||
86 | static void xor_set_src(struct ioat_raw_descriptor *descs[2], | ||
87 | dma_addr_t addr, u32 offset, int idx) | ||
88 | { | ||
89 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
90 | |||
91 | raw->field[xor_idx_to_field[idx]] = addr + offset; | ||
92 | } | ||
93 | |||
94 | static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
95 | { | ||
96 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
97 | |||
98 | return raw->field[pq_idx_to_field[idx]]; | ||
99 | } | ||
100 | |||
101 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | ||
102 | dma_addr_t addr, u32 offset, u8 coef, int idx) | ||
103 | { | ||
104 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; | ||
105 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
106 | |||
107 | raw->field[pq_idx_to_field[idx]] = addr + offset; | ||
108 | pq->coef[idx] = coef; | ||
109 | } | ||
110 | |||
111 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | ||
112 | struct ioat_ring_ent *desc, int idx) | ||
113 | { | ||
114 | struct ioat_chan_common *chan = &ioat->base; | ||
115 | struct pci_dev *pdev = chan->device->pdev; | ||
116 | size_t len = desc->len; | ||
117 | size_t offset = len - desc->hw->size; | ||
118 | struct dma_async_tx_descriptor *tx = &desc->txd; | ||
119 | enum dma_ctrl_flags flags = tx->flags; | ||
120 | |||
121 | switch (desc->hw->ctl_f.op) { | ||
122 | case IOAT_OP_COPY: | ||
123 | if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ | ||
124 | ioat_dma_unmap(chan, flags, len, desc->hw); | ||
125 | break; | ||
126 | case IOAT_OP_FILL: { | ||
127 | struct ioat_fill_descriptor *hw = desc->fill; | ||
128 | |||
129 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
130 | ioat_unmap(pdev, hw->dst_addr - offset, len, | ||
131 | PCI_DMA_FROMDEVICE, flags, 1); | ||
132 | break; | ||
133 | } | ||
134 | case IOAT_OP_XOR_VAL: | ||
135 | case IOAT_OP_XOR: { | ||
136 | struct ioat_xor_descriptor *xor = desc->xor; | ||
137 | struct ioat_ring_ent *ext; | ||
138 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
139 | int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); | ||
140 | struct ioat_raw_descriptor *descs[2]; | ||
141 | int i; | ||
142 | |||
143 | if (src_cnt > 5) { | ||
144 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
145 | xor_ex = ext->xor_ex; | ||
146 | } | ||
147 | |||
148 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
149 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
150 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
151 | for (i = 0; i < src_cnt; i++) { | ||
152 | dma_addr_t src = xor_get_src(descs, i); | ||
153 | |||
154 | ioat_unmap(pdev, src - offset, len, | ||
155 | PCI_DMA_TODEVICE, flags, 0); | ||
156 | } | ||
157 | |||
158 | /* dest is a source in xor validate operations */ | ||
159 | if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
160 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
161 | PCI_DMA_TODEVICE, flags, 1); | ||
162 | break; | ||
163 | } | ||
164 | } | ||
165 | |||
166 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
167 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
168 | PCI_DMA_FROMDEVICE, flags, 1); | ||
169 | break; | ||
170 | } | ||
171 | case IOAT_OP_PQ_VAL: | ||
172 | case IOAT_OP_PQ: { | ||
173 | struct ioat_pq_descriptor *pq = desc->pq; | ||
174 | struct ioat_ring_ent *ext; | ||
175 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
176 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
177 | struct ioat_raw_descriptor *descs[2]; | ||
178 | int i; | ||
179 | |||
180 | if (src_cnt > 3) { | ||
181 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
182 | pq_ex = ext->pq_ex; | ||
183 | } | ||
184 | |||
185 | /* in the 'continue' case don't unmap the dests as sources */ | ||
186 | if (dmaf_p_disabled_continue(flags)) | ||
187 | src_cnt--; | ||
188 | else if (dmaf_continue(flags)) | ||
189 | src_cnt -= 3; | ||
190 | |||
191 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
192 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
193 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
194 | for (i = 0; i < src_cnt; i++) { | ||
195 | dma_addr_t src = pq_get_src(descs, i); | ||
196 | |||
197 | ioat_unmap(pdev, src - offset, len, | ||
198 | PCI_DMA_TODEVICE, flags, 0); | ||
199 | } | ||
200 | |||
201 | /* the dests are sources in pq validate operations */ | ||
202 | if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
203 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
204 | ioat_unmap(pdev, pq->p_addr - offset, | ||
205 | len, PCI_DMA_TODEVICE, flags, 0); | ||
206 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
207 | ioat_unmap(pdev, pq->q_addr - offset, | ||
208 | len, PCI_DMA_TODEVICE, flags, 0); | ||
209 | break; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
214 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
215 | ioat_unmap(pdev, pq->p_addr - offset, len, | ||
216 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
217 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
218 | ioat_unmap(pdev, pq->q_addr - offset, len, | ||
219 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
220 | } | ||
221 | break; | ||
222 | } | ||
223 | default: | ||
224 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", | ||
225 | __func__, desc->hw->ctl_f.op); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static bool desc_has_ext(struct ioat_ring_ent *desc) | ||
230 | { | ||
231 | struct ioat_dma_descriptor *hw = desc->hw; | ||
232 | |||
233 | if (hw->ctl_f.op == IOAT_OP_XOR || | ||
234 | hw->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
235 | struct ioat_xor_descriptor *xor = desc->xor; | ||
236 | |||
237 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) | ||
238 | return true; | ||
239 | } else if (hw->ctl_f.op == IOAT_OP_PQ || | ||
240 | hw->ctl_f.op == IOAT_OP_PQ_VAL) { | ||
241 | struct ioat_pq_descriptor *pq = desc->pq; | ||
242 | |||
243 | if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) | ||
244 | return true; | ||
245 | } | ||
246 | |||
247 | return false; | ||
248 | } | ||
249 | |||
250 | /** | ||
251 | * __cleanup - reclaim used descriptors | ||
252 | * @ioat: channel (ring) to clean | ||
253 | * | ||
254 | * The difference from the dma_v2.c __cleanup() is that this routine | ||
255 | * handles extended descriptors and dma-unmapping raid operations. | ||
256 | */ | ||
257 | static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | ||
258 | { | ||
259 | struct ioat_chan_common *chan = &ioat->base; | ||
260 | struct ioat_ring_ent *desc; | ||
261 | bool seen_current = false; | ||
262 | u16 active; | ||
263 | int i; | ||
264 | |||
265 | dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", | ||
266 | __func__, ioat->head, ioat->tail, ioat->issued); | ||
267 | |||
268 | active = ioat2_ring_active(ioat); | ||
269 | for (i = 0; i < active && !seen_current; i++) { | ||
270 | struct dma_async_tx_descriptor *tx; | ||
271 | |||
272 | prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); | ||
273 | desc = ioat2_get_ring_ent(ioat, ioat->tail + i); | ||
274 | dump_desc_dbg(ioat, desc); | ||
275 | tx = &desc->txd; | ||
276 | if (tx->cookie) { | ||
277 | chan->completed_cookie = tx->cookie; | ||
278 | ioat3_dma_unmap(ioat, desc, ioat->tail + i); | ||
279 | tx->cookie = 0; | ||
280 | if (tx->callback) { | ||
281 | tx->callback(tx->callback_param); | ||
282 | tx->callback = NULL; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | if (tx->phys == phys_complete) | ||
287 | seen_current = true; | ||
288 | |||
289 | /* skip extended descriptors */ | ||
290 | if (desc_has_ext(desc)) { | ||
291 | BUG_ON(i + 1 >= active); | ||
292 | i++; | ||
293 | } | ||
294 | } | ||
295 | ioat->tail += i; | ||
296 | BUG_ON(!seen_current); /* no active descs have written a completion? */ | ||
297 | chan->last_completion = phys_complete; | ||
298 | if (ioat->head == ioat->tail) { | ||
299 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", | ||
300 | __func__); | ||
301 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); | ||
302 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | static void ioat3_cleanup(struct ioat2_dma_chan *ioat) | ||
307 | { | ||
308 | struct ioat_chan_common *chan = &ioat->base; | ||
309 | unsigned long phys_complete; | ||
310 | |||
311 | prefetch(chan->completion); | ||
312 | |||
313 | if (!spin_trylock_bh(&chan->cleanup_lock)) | ||
314 | return; | ||
315 | |||
316 | if (!ioat_cleanup_preamble(chan, &phys_complete)) { | ||
317 | spin_unlock_bh(&chan->cleanup_lock); | ||
318 | return; | ||
319 | } | ||
320 | |||
321 | if (!spin_trylock_bh(&ioat->ring_lock)) { | ||
322 | spin_unlock_bh(&chan->cleanup_lock); | ||
323 | return; | ||
324 | } | ||
325 | |||
326 | __cleanup(ioat, phys_complete); | ||
327 | |||
328 | spin_unlock_bh(&ioat->ring_lock); | ||
329 | spin_unlock_bh(&chan->cleanup_lock); | ||
330 | } | ||
331 | |||
332 | static void ioat3_cleanup_tasklet(unsigned long data) | ||
333 | { | ||
334 | struct ioat2_dma_chan *ioat = (void *) data; | ||
335 | |||
336 | ioat3_cleanup(ioat); | ||
337 | writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN, | ||
338 | ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); | ||
339 | } | ||
340 | |||
341 | static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) | ||
342 | { | ||
343 | struct ioat_chan_common *chan = &ioat->base; | ||
344 | unsigned long phys_complete; | ||
345 | u32 status; | ||
346 | |||
347 | status = ioat_chansts(chan); | ||
348 | if (is_ioat_active(status) || is_ioat_idle(status)) | ||
349 | ioat_suspend(chan); | ||
350 | while (is_ioat_active(status) || is_ioat_idle(status)) { | ||
351 | status = ioat_chansts(chan); | ||
352 | cpu_relax(); | ||
353 | } | ||
354 | |||
355 | if (ioat_cleanup_preamble(chan, &phys_complete)) | ||
356 | __cleanup(ioat, phys_complete); | ||
357 | |||
358 | __ioat2_restart_chan(ioat); | ||
359 | } | ||
360 | |||
361 | static void ioat3_timer_event(unsigned long data) | ||
362 | { | ||
363 | struct ioat2_dma_chan *ioat = (void *) data; | ||
364 | struct ioat_chan_common *chan = &ioat->base; | ||
365 | |||
366 | spin_lock_bh(&chan->cleanup_lock); | ||
367 | if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { | ||
368 | unsigned long phys_complete; | ||
369 | u64 status; | ||
370 | |||
371 | spin_lock_bh(&ioat->ring_lock); | ||
372 | status = ioat_chansts(chan); | ||
373 | |||
374 | /* when halted due to errors check for channel | ||
375 | * programming errors before advancing the completion state | ||
376 | */ | ||
377 | if (is_ioat_halted(status)) { | ||
378 | u32 chanerr; | ||
379 | |||
380 | chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); | ||
381 | BUG_ON(is_ioat_bug(chanerr)); | ||
382 | } | ||
383 | |||
384 | /* if we haven't made progress and we have already | ||
385 | * acknowledged a pending completion once, then be more | ||
386 | * forceful with a restart | ||
387 | */ | ||
388 | if (ioat_cleanup_preamble(chan, &phys_complete)) | ||
389 | __cleanup(ioat, phys_complete); | ||
390 | else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) | ||
391 | ioat3_restart_channel(ioat); | ||
392 | else { | ||
393 | set_bit(IOAT_COMPLETION_ACK, &chan->state); | ||
394 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
395 | } | ||
396 | spin_unlock_bh(&ioat->ring_lock); | ||
397 | } else { | ||
398 | u16 active; | ||
399 | |||
400 | /* if the ring is idle, empty, and oversized try to step | ||
401 | * down the size | ||
402 | */ | ||
403 | spin_lock_bh(&ioat->ring_lock); | ||
404 | active = ioat2_ring_active(ioat); | ||
405 | if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) | ||
406 | reshape_ring(ioat, ioat->alloc_order-1); | ||
407 | spin_unlock_bh(&ioat->ring_lock); | ||
408 | |||
409 | /* keep shrinking until we get back to our minimum | ||
410 | * default size | ||
411 | */ | ||
412 | if (ioat->alloc_order > ioat_get_alloc_order()) | ||
413 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
414 | } | ||
415 | spin_unlock_bh(&chan->cleanup_lock); | ||
416 | } | ||
417 | |||
418 | static enum dma_status | ||
419 | ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, | ||
420 | dma_cookie_t *done, dma_cookie_t *used) | ||
421 | { | ||
422 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
423 | |||
424 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) | ||
425 | return DMA_SUCCESS; | ||
426 | |||
427 | ioat3_cleanup(ioat); | ||
428 | |||
429 | return ioat_is_complete(c, cookie, done, used); | ||
430 | } | ||
431 | |||
432 | static struct dma_async_tx_descriptor * | ||
433 | ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, | ||
434 | size_t len, unsigned long flags) | ||
435 | { | ||
436 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
437 | struct ioat_ring_ent *desc; | ||
438 | size_t total_len = len; | ||
439 | struct ioat_fill_descriptor *fill; | ||
440 | int num_descs; | ||
441 | u64 src_data = (0x0101010101010101ULL) * (value & 0xff); | ||
442 | u16 idx; | ||
443 | int i; | ||
444 | |||
445 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
446 | if (likely(num_descs) && | ||
447 | ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) | ||
448 | /* pass */; | ||
449 | else | ||
450 | return NULL; | ||
451 | i = 0; | ||
452 | do { | ||
453 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
454 | |||
455 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
456 | fill = desc->fill; | ||
457 | |||
458 | fill->size = xfer_size; | ||
459 | fill->src_data = src_data; | ||
460 | fill->dst_addr = dest; | ||
461 | fill->ctl = 0; | ||
462 | fill->ctl_f.op = IOAT_OP_FILL; | ||
463 | |||
464 | len -= xfer_size; | ||
465 | dest += xfer_size; | ||
466 | dump_desc_dbg(ioat, desc); | ||
467 | } while (++i < num_descs); | ||
468 | |||
469 | desc->txd.flags = flags; | ||
470 | desc->len = total_len; | ||
471 | fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
472 | fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
473 | fill->ctl_f.compl_write = 1; | ||
474 | dump_desc_dbg(ioat, desc); | ||
475 | |||
476 | /* we leave the channel locked to ensure in order submission */ | ||
477 | return &desc->txd; | ||
478 | } | ||
479 | |||
480 | static struct dma_async_tx_descriptor * | ||
481 | __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
482 | dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, | ||
483 | size_t len, unsigned long flags) | ||
484 | { | ||
485 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
486 | struct ioat_ring_ent *compl_desc; | ||
487 | struct ioat_ring_ent *desc; | ||
488 | struct ioat_ring_ent *ext; | ||
489 | size_t total_len = len; | ||
490 | struct ioat_xor_descriptor *xor; | ||
491 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
492 | struct ioat_dma_descriptor *hw; | ||
493 | u32 offset = 0; | ||
494 | int num_descs; | ||
495 | int with_ext; | ||
496 | int i; | ||
497 | u16 idx; | ||
498 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | ||
499 | |||
500 | BUG_ON(src_cnt < 2); | ||
501 | |||
502 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
503 | /* we need 2x the number of descriptors to cover greater than 5 | ||
504 | * sources | ||
505 | */ | ||
506 | if (src_cnt > 5) { | ||
507 | with_ext = 1; | ||
508 | num_descs *= 2; | ||
509 | } else | ||
510 | with_ext = 0; | ||
511 | |||
512 | /* completion writes from the raid engine may pass completion | ||
513 | * writes from the legacy engine, so we need one extra null | ||
514 | * (legacy) descriptor to ensure all completion writes arrive in | ||
515 | * order. | ||
516 | */ | ||
517 | if (likely(num_descs) && | ||
518 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
519 | /* pass */; | ||
520 | else | ||
521 | return NULL; | ||
522 | i = 0; | ||
523 | do { | ||
524 | struct ioat_raw_descriptor *descs[2]; | ||
525 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
526 | int s; | ||
527 | |||
528 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
529 | xor = desc->xor; | ||
530 | |||
531 | /* save a branch by unconditionally retrieving the | ||
532 | * extended descriptor xor_set_src() knows to not write | ||
533 | * to it in the single descriptor case | ||
534 | */ | ||
535 | ext = ioat2_get_ring_ent(ioat, idx + i + 1); | ||
536 | xor_ex = ext->xor_ex; | ||
537 | |||
538 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
539 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
540 | for (s = 0; s < src_cnt; s++) | ||
541 | xor_set_src(descs, src[s], offset, s); | ||
542 | xor->size = xfer_size; | ||
543 | xor->dst_addr = dest + offset; | ||
544 | xor->ctl = 0; | ||
545 | xor->ctl_f.op = op; | ||
546 | xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); | ||
547 | |||
548 | len -= xfer_size; | ||
549 | offset += xfer_size; | ||
550 | dump_desc_dbg(ioat, desc); | ||
551 | } while ((i += 1 + with_ext) < num_descs); | ||
552 | |||
553 | /* last xor descriptor carries the unmap parameters and fence bit */ | ||
554 | desc->txd.flags = flags; | ||
555 | desc->len = total_len; | ||
556 | if (result) | ||
557 | desc->result = result; | ||
558 | xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
559 | |||
560 | /* completion descriptor carries interrupt bit */ | ||
561 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
562 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
563 | hw = compl_desc->hw; | ||
564 | hw->ctl = 0; | ||
565 | hw->ctl_f.null = 1; | ||
566 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
567 | hw->ctl_f.compl_write = 1; | ||
568 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
569 | dump_desc_dbg(ioat, compl_desc); | ||
570 | |||
571 | /* we leave the channel locked to ensure in order submission */ | ||
572 | return &desc->txd; | ||
573 | } | ||
574 | |||
575 | static struct dma_async_tx_descriptor * | ||
576 | ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | ||
577 | unsigned int src_cnt, size_t len, unsigned long flags) | ||
578 | { | ||
579 | return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); | ||
580 | } | ||
581 | |||
582 | struct dma_async_tx_descriptor * | ||
583 | ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | ||
584 | unsigned int src_cnt, size_t len, | ||
585 | enum sum_check_flags *result, unsigned long flags) | ||
586 | { | ||
587 | /* the cleanup routine only sets bits on validate failure, it | ||
588 | * does not clear bits on validate success... so clear it here | ||
589 | */ | ||
590 | *result = 0; | ||
591 | |||
592 | return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], | ||
593 | src_cnt - 1, len, flags); | ||
594 | } | ||
595 | |||
596 | static void | ||
597 | dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) | ||
598 | { | ||
599 | struct device *dev = to_dev(&ioat->base); | ||
600 | struct ioat_pq_descriptor *pq = desc->pq; | ||
601 | struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; | ||
602 | struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; | ||
603 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
604 | int i; | ||
605 | |||
606 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | ||
607 | " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", | ||
608 | desc_id(desc), (unsigned long long) desc->txd.phys, | ||
609 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | ||
610 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, | ||
611 | pq->ctl_f.compl_write, | ||
612 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | ||
613 | pq->ctl_f.src_cnt); | ||
614 | for (i = 0; i < src_cnt; i++) | ||
615 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | ||
616 | (unsigned long long) pq_get_src(descs, i), pq->coef[i]); | ||
617 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | ||
618 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | ||
619 | } | ||
620 | |||
621 | static struct dma_async_tx_descriptor * | ||
622 | __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
623 | const dma_addr_t *dst, const dma_addr_t *src, | ||
624 | unsigned int src_cnt, const unsigned char *scf, | ||
625 | size_t len, unsigned long flags) | ||
626 | { | ||
627 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
628 | struct ioat_chan_common *chan = &ioat->base; | ||
629 | struct ioat_ring_ent *compl_desc; | ||
630 | struct ioat_ring_ent *desc; | ||
631 | struct ioat_ring_ent *ext; | ||
632 | size_t total_len = len; | ||
633 | struct ioat_pq_descriptor *pq; | ||
634 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
635 | struct ioat_dma_descriptor *hw; | ||
636 | u32 offset = 0; | ||
637 | int num_descs; | ||
638 | int with_ext; | ||
639 | int i, s; | ||
640 | u16 idx; | ||
641 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | ||
642 | |||
643 | dev_dbg(to_dev(chan), "%s\n", __func__); | ||
644 | /* the engine requires at least two sources (we provide | ||
645 | * at least 1 implied source in the DMA_PREP_CONTINUE case) | ||
646 | */ | ||
647 | BUG_ON(src_cnt + dmaf_continue(flags) < 2); | ||
648 | |||
649 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
650 | /* we need 2x the number of descriptors to cover greater than 3 | ||
651 | * sources | ||
652 | */ | ||
653 | if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { | ||
654 | with_ext = 1; | ||
655 | num_descs *= 2; | ||
656 | } else | ||
657 | with_ext = 0; | ||
658 | |||
659 | /* completion writes from the raid engine may pass completion | ||
660 | * writes from the legacy engine, so we need one extra null | ||
661 | * (legacy) descriptor to ensure all completion writes arrive in | ||
662 | * order. | ||
663 | */ | ||
664 | if (likely(num_descs) && | ||
665 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
666 | /* pass */; | ||
667 | else | ||
668 | return NULL; | ||
669 | i = 0; | ||
670 | do { | ||
671 | struct ioat_raw_descriptor *descs[2]; | ||
672 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
673 | |||
674 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
675 | pq = desc->pq; | ||
676 | |||
677 | /* save a branch by unconditionally retrieving the | ||
678 | * extended descriptor pq_set_src() knows to not write | ||
679 | * to it in the single descriptor case | ||
680 | */ | ||
681 | ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); | ||
682 | pq_ex = ext->pq_ex; | ||
683 | |||
684 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
685 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
686 | |||
687 | for (s = 0; s < src_cnt; s++) | ||
688 | pq_set_src(descs, src[s], offset, scf[s], s); | ||
689 | |||
690 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | ||
691 | if (dmaf_p_disabled_continue(flags)) | ||
692 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
693 | else if (dmaf_continue(flags)) { | ||
694 | pq_set_src(descs, dst[0], offset, 0, s++); | ||
695 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
696 | pq_set_src(descs, dst[1], offset, 0, s++); | ||
697 | } | ||
698 | pq->size = xfer_size; | ||
699 | pq->p_addr = dst[0] + offset; | ||
700 | pq->q_addr = dst[1] + offset; | ||
701 | pq->ctl = 0; | ||
702 | pq->ctl_f.op = op; | ||
703 | pq->ctl_f.src_cnt = src_cnt_to_hw(s); | ||
704 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | ||
705 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | ||
706 | |||
707 | len -= xfer_size; | ||
708 | offset += xfer_size; | ||
709 | } while ((i += 1 + with_ext) < num_descs); | ||
710 | |||
711 | /* last pq descriptor carries the unmap parameters and fence bit */ | ||
712 | desc->txd.flags = flags; | ||
713 | desc->len = total_len; | ||
714 | if (result) | ||
715 | desc->result = result; | ||
716 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
717 | dump_pq_desc_dbg(ioat, desc, ext); | ||
718 | |||
719 | /* completion descriptor carries interrupt bit */ | ||
720 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
721 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
722 | hw = compl_desc->hw; | ||
723 | hw->ctl = 0; | ||
724 | hw->ctl_f.null = 1; | ||
725 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
726 | hw->ctl_f.compl_write = 1; | ||
727 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
728 | dump_desc_dbg(ioat, compl_desc); | ||
729 | |||
730 | /* we leave the channel locked to ensure in order submission */ | ||
731 | return &desc->txd; | ||
732 | } | ||
733 | |||
734 | static struct dma_async_tx_descriptor * | ||
735 | ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | ||
736 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
737 | unsigned long flags) | ||
738 | { | ||
739 | /* handle the single source multiply case from the raid6 | ||
740 | * recovery path | ||
741 | */ | ||
742 | if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { | ||
743 | dma_addr_t single_source[2]; | ||
744 | unsigned char single_source_coef[2]; | ||
745 | |||
746 | BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); | ||
747 | single_source[0] = src[0]; | ||
748 | single_source[1] = src[0]; | ||
749 | single_source_coef[0] = scf[0]; | ||
750 | single_source_coef[1] = 0; | ||
751 | |||
752 | return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, | ||
753 | single_source_coef, len, flags); | ||
754 | } else | ||
755 | return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, | ||
756 | len, flags); | ||
757 | } | ||
758 | |||
759 | struct dma_async_tx_descriptor * | ||
760 | ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | ||
761 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
762 | enum sum_check_flags *pqres, unsigned long flags) | ||
763 | { | ||
764 | /* the cleanup routine only sets bits on validate failure, it | ||
765 | * does not clear bits on validate success... so clear it here | ||
766 | */ | ||
767 | *pqres = 0; | ||
768 | |||
769 | return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | ||
770 | flags); | ||
771 | } | ||
772 | |||
773 | static struct dma_async_tx_descriptor * | ||
774 | ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, | ||
775 | unsigned int src_cnt, size_t len, unsigned long flags) | ||
776 | { | ||
777 | unsigned char scf[src_cnt]; | ||
778 | dma_addr_t pq[2]; | ||
779 | |||
780 | memset(scf, 0, src_cnt); | ||
781 | flags |= DMA_PREP_PQ_DISABLE_Q; | ||
782 | pq[0] = dst; | ||
783 | pq[1] = ~0; | ||
784 | |||
785 | return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, | ||
786 | flags); | ||
787 | } | ||
788 | |||
789 | struct dma_async_tx_descriptor * | ||
790 | ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, | ||
791 | unsigned int src_cnt, size_t len, | ||
792 | enum sum_check_flags *result, unsigned long flags) | ||
793 | { | ||
794 | unsigned char scf[src_cnt]; | ||
795 | dma_addr_t pq[2]; | ||
796 | |||
797 | /* the cleanup routine only sets bits on validate failure, it | ||
798 | * does not clear bits on validate success... so clear it here | ||
799 | */ | ||
800 | *result = 0; | ||
801 | |||
802 | memset(scf, 0, src_cnt); | ||
803 | flags |= DMA_PREP_PQ_DISABLE_Q; | ||
804 | pq[0] = src[0]; | ||
805 | pq[1] = ~0; | ||
806 | |||
807 | return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, | ||
808 | len, flags); | ||
809 | } | ||
810 | |||
811 | static struct dma_async_tx_descriptor * | ||
812 | ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) | ||
813 | { | ||
814 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
815 | struct ioat_ring_ent *desc; | ||
816 | struct ioat_dma_descriptor *hw; | ||
817 | u16 idx; | ||
818 | |||
819 | if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) | ||
820 | desc = ioat2_get_ring_ent(ioat, idx); | ||
821 | else | ||
822 | return NULL; | ||
823 | |||
824 | hw = desc->hw; | ||
825 | hw->ctl = 0; | ||
826 | hw->ctl_f.null = 1; | ||
827 | hw->ctl_f.int_en = 1; | ||
828 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
829 | hw->ctl_f.compl_write = 1; | ||
830 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
831 | hw->src_addr = 0; | ||
832 | hw->dst_addr = 0; | ||
833 | |||
834 | desc->txd.flags = flags; | ||
835 | desc->len = 1; | ||
836 | |||
837 | dump_desc_dbg(ioat, desc); | ||
838 | |||
839 | /* we leave the channel locked to ensure in order submission */ | ||
840 | return &desc->txd; | ||
841 | } | ||
842 | |||
843 | static void __devinit ioat3_dma_test_callback(void *dma_async_param) | ||
844 | { | ||
845 | struct completion *cmp = dma_async_param; | ||
846 | |||
847 | complete(cmp); | ||
848 | } | ||
849 | |||
850 | #define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ | ||
851 | static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) | ||
852 | { | ||
853 | int i, src_idx; | ||
854 | struct page *dest; | ||
855 | struct page *xor_srcs[IOAT_NUM_SRC_TEST]; | ||
856 | struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; | ||
857 | dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; | ||
858 | dma_addr_t dma_addr, dest_dma; | ||
859 | struct dma_async_tx_descriptor *tx; | ||
860 | struct dma_chan *dma_chan; | ||
861 | dma_cookie_t cookie; | ||
862 | u8 cmp_byte = 0; | ||
863 | u32 cmp_word; | ||
864 | u32 xor_val_result; | ||
865 | int err = 0; | ||
866 | struct completion cmp; | ||
867 | unsigned long tmo; | ||
868 | struct device *dev = &device->pdev->dev; | ||
869 | struct dma_device *dma = &device->common; | ||
870 | |||
871 | dev_dbg(dev, "%s\n", __func__); | ||
872 | |||
873 | if (!dma_has_cap(DMA_XOR, dma->cap_mask)) | ||
874 | return 0; | ||
875 | |||
876 | for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { | ||
877 | xor_srcs[src_idx] = alloc_page(GFP_KERNEL); | ||
878 | if (!xor_srcs[src_idx]) { | ||
879 | while (src_idx--) | ||
880 | __free_page(xor_srcs[src_idx]); | ||
881 | return -ENOMEM; | ||
882 | } | ||
883 | } | ||
884 | |||
885 | dest = alloc_page(GFP_KERNEL); | ||
886 | if (!dest) { | ||
887 | while (src_idx--) | ||
888 | __free_page(xor_srcs[src_idx]); | ||
889 | return -ENOMEM; | ||
890 | } | ||
891 | |||
892 | /* Fill in src buffers */ | ||
893 | for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { | ||
894 | u8 *ptr = page_address(xor_srcs[src_idx]); | ||
895 | for (i = 0; i < PAGE_SIZE; i++) | ||
896 | ptr[i] = (1 << src_idx); | ||
897 | } | ||
898 | |||
899 | for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) | ||
900 | cmp_byte ^= (u8) (1 << src_idx); | ||
901 | |||
902 | cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | | ||
903 | (cmp_byte << 8) | cmp_byte; | ||
904 | |||
905 | memset(page_address(dest), 0, PAGE_SIZE); | ||
906 | |||
907 | dma_chan = container_of(dma->channels.next, struct dma_chan, | ||
908 | device_node); | ||
909 | if (dma->device_alloc_chan_resources(dma_chan) < 1) { | ||
910 | err = -ENODEV; | ||
911 | goto out; | ||
912 | } | ||
913 | |||
914 | /* test xor */ | ||
915 | dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); | ||
916 | for (i = 0; i < IOAT_NUM_SRC_TEST; i++) | ||
917 | dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, | ||
918 | DMA_TO_DEVICE); | ||
919 | tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, | ||
920 | IOAT_NUM_SRC_TEST, PAGE_SIZE, | ||
921 | DMA_PREP_INTERRUPT); | ||
922 | |||
923 | if (!tx) { | ||
924 | dev_err(dev, "Self-test xor prep failed\n"); | ||
925 | err = -ENODEV; | ||
926 | goto free_resources; | ||
927 | } | ||
928 | |||
929 | async_tx_ack(tx); | ||
930 | init_completion(&cmp); | ||
931 | tx->callback = ioat3_dma_test_callback; | ||
932 | tx->callback_param = &cmp; | ||
933 | cookie = tx->tx_submit(tx); | ||
934 | if (cookie < 0) { | ||
935 | dev_err(dev, "Self-test xor setup failed\n"); | ||
936 | err = -ENODEV; | ||
937 | goto free_resources; | ||
938 | } | ||
939 | dma->device_issue_pending(dma_chan); | ||
940 | |||
941 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
942 | |||
943 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
944 | dev_err(dev, "Self-test xor timed out\n"); | ||
945 | err = -ENODEV; | ||
946 | goto free_resources; | ||
947 | } | ||
948 | |||
949 | dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); | ||
950 | for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { | ||
951 | u32 *ptr = page_address(dest); | ||
952 | if (ptr[i] != cmp_word) { | ||
953 | dev_err(dev, "Self-test xor failed compare\n"); | ||
954 | err = -ENODEV; | ||
955 | goto free_resources; | ||
956 | } | ||
957 | } | ||
958 | dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE); | ||
959 | |||
960 | /* skip validate if the capability is not present */ | ||
961 | if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) | ||
962 | goto free_resources; | ||
963 | |||
964 | /* validate the sources with the destintation page */ | ||
965 | for (i = 0; i < IOAT_NUM_SRC_TEST; i++) | ||
966 | xor_val_srcs[i] = xor_srcs[i]; | ||
967 | xor_val_srcs[i] = dest; | ||
968 | |||
969 | xor_val_result = 1; | ||
970 | |||
971 | for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) | ||
972 | dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, | ||
973 | DMA_TO_DEVICE); | ||
974 | tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, | ||
975 | IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, | ||
976 | &xor_val_result, DMA_PREP_INTERRUPT); | ||
977 | if (!tx) { | ||
978 | dev_err(dev, "Self-test zero prep failed\n"); | ||
979 | err = -ENODEV; | ||
980 | goto free_resources; | ||
981 | } | ||
982 | |||
983 | async_tx_ack(tx); | ||
984 | init_completion(&cmp); | ||
985 | tx->callback = ioat3_dma_test_callback; | ||
986 | tx->callback_param = &cmp; | ||
987 | cookie = tx->tx_submit(tx); | ||
988 | if (cookie < 0) { | ||
989 | dev_err(dev, "Self-test zero setup failed\n"); | ||
990 | err = -ENODEV; | ||
991 | goto free_resources; | ||
992 | } | ||
993 | dma->device_issue_pending(dma_chan); | ||
994 | |||
995 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
996 | |||
997 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
998 | dev_err(dev, "Self-test validate timed out\n"); | ||
999 | err = -ENODEV; | ||
1000 | goto free_resources; | ||
1001 | } | ||
1002 | |||
1003 | if (xor_val_result != 0) { | ||
1004 | dev_err(dev, "Self-test validate failed compare\n"); | ||
1005 | err = -ENODEV; | ||
1006 | goto free_resources; | ||
1007 | } | ||
1008 | |||
1009 | /* skip memset if the capability is not present */ | ||
1010 | if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) | ||
1011 | goto free_resources; | ||
1012 | |||
1013 | /* test memset */ | ||
1014 | dma_addr = dma_map_page(dev, dest, 0, | ||
1015 | PAGE_SIZE, DMA_FROM_DEVICE); | ||
1016 | tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, | ||
1017 | DMA_PREP_INTERRUPT); | ||
1018 | if (!tx) { | ||
1019 | dev_err(dev, "Self-test memset prep failed\n"); | ||
1020 | err = -ENODEV; | ||
1021 | goto free_resources; | ||
1022 | } | ||
1023 | |||
1024 | async_tx_ack(tx); | ||
1025 | init_completion(&cmp); | ||
1026 | tx->callback = ioat3_dma_test_callback; | ||
1027 | tx->callback_param = &cmp; | ||
1028 | cookie = tx->tx_submit(tx); | ||
1029 | if (cookie < 0) { | ||
1030 | dev_err(dev, "Self-test memset setup failed\n"); | ||
1031 | err = -ENODEV; | ||
1032 | goto free_resources; | ||
1033 | } | ||
1034 | dma->device_issue_pending(dma_chan); | ||
1035 | |||
1036 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
1037 | |||
1038 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
1039 | dev_err(dev, "Self-test memset timed out\n"); | ||
1040 | err = -ENODEV; | ||
1041 | goto free_resources; | ||
1042 | } | ||
1043 | |||
1044 | for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { | ||
1045 | u32 *ptr = page_address(dest); | ||
1046 | if (ptr[i]) { | ||
1047 | dev_err(dev, "Self-test memset failed compare\n"); | ||
1048 | err = -ENODEV; | ||
1049 | goto free_resources; | ||
1050 | } | ||
1051 | } | ||
1052 | |||
1053 | /* test for non-zero parity sum */ | ||
1054 | xor_val_result = 0; | ||
1055 | for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) | ||
1056 | dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, | ||
1057 | DMA_TO_DEVICE); | ||
1058 | tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, | ||
1059 | IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, | ||
1060 | &xor_val_result, DMA_PREP_INTERRUPT); | ||
1061 | if (!tx) { | ||
1062 | dev_err(dev, "Self-test 2nd zero prep failed\n"); | ||
1063 | err = -ENODEV; | ||
1064 | goto free_resources; | ||
1065 | } | ||
1066 | |||
1067 | async_tx_ack(tx); | ||
1068 | init_completion(&cmp); | ||
1069 | tx->callback = ioat3_dma_test_callback; | ||
1070 | tx->callback_param = &cmp; | ||
1071 | cookie = tx->tx_submit(tx); | ||
1072 | if (cookie < 0) { | ||
1073 | dev_err(dev, "Self-test 2nd zero setup failed\n"); | ||
1074 | err = -ENODEV; | ||
1075 | goto free_resources; | ||
1076 | } | ||
1077 | dma->device_issue_pending(dma_chan); | ||
1078 | |||
1079 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
1080 | |||
1081 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
1082 | dev_err(dev, "Self-test 2nd validate timed out\n"); | ||
1083 | err = -ENODEV; | ||
1084 | goto free_resources; | ||
1085 | } | ||
1086 | |||
1087 | if (xor_val_result != SUM_CHECK_P_RESULT) { | ||
1088 | dev_err(dev, "Self-test validate failed compare\n"); | ||
1089 | err = -ENODEV; | ||
1090 | goto free_resources; | ||
1091 | } | ||
1092 | |||
1093 | free_resources: | ||
1094 | dma->device_free_chan_resources(dma_chan); | ||
1095 | out: | ||
1096 | src_idx = IOAT_NUM_SRC_TEST; | ||
1097 | while (src_idx--) | ||
1098 | __free_page(xor_srcs[src_idx]); | ||
1099 | __free_page(dest); | ||
1100 | return err; | ||
1101 | } | ||
1102 | |||
1103 | static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) | ||
1104 | { | ||
1105 | int rc = ioat_dma_self_test(device); | ||
1106 | |||
1107 | if (rc) | ||
1108 | return rc; | ||
1109 | |||
1110 | rc = ioat_xor_val_self_test(device); | ||
1111 | if (rc) | ||
1112 | return rc; | ||
1113 | |||
1114 | return 0; | ||
1115 | } | ||
1116 | |||
1117 | int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | ||
1118 | { | ||
1119 | struct pci_dev *pdev = device->pdev; | ||
1120 | struct dma_device *dma; | ||
1121 | struct dma_chan *c; | ||
1122 | struct ioat_chan_common *chan; | ||
1123 | bool is_raid_device = false; | ||
1124 | int err; | ||
1125 | u16 dev_id; | ||
1126 | u32 cap; | ||
1127 | |||
1128 | device->enumerate_channels = ioat2_enumerate_channels; | ||
1129 | device->self_test = ioat3_dma_self_test; | ||
1130 | dma = &device->common; | ||
1131 | dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; | ||
1132 | dma->device_issue_pending = ioat2_issue_pending; | ||
1133 | dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; | ||
1134 | dma->device_free_chan_resources = ioat2_free_chan_resources; | ||
1135 | |||
1136 | dma_cap_set(DMA_INTERRUPT, dma->cap_mask); | ||
1137 | dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; | ||
1138 | |||
1139 | cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); | ||
1140 | if (cap & IOAT_CAP_XOR) { | ||
1141 | is_raid_device = true; | ||
1142 | dma->max_xor = 8; | ||
1143 | dma->xor_align = 2; | ||
1144 | |||
1145 | dma_cap_set(DMA_XOR, dma->cap_mask); | ||
1146 | dma->device_prep_dma_xor = ioat3_prep_xor; | ||
1147 | |||
1148 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | ||
1149 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; | ||
1150 | } | ||
1151 | if (cap & IOAT_CAP_PQ) { | ||
1152 | is_raid_device = true; | ||
1153 | dma_set_maxpq(dma, 8, 0); | ||
1154 | dma->pq_align = 2; | ||
1155 | |||
1156 | dma_cap_set(DMA_PQ, dma->cap_mask); | ||
1157 | dma->device_prep_dma_pq = ioat3_prep_pq; | ||
1158 | |||
1159 | dma_cap_set(DMA_PQ_VAL, dma->cap_mask); | ||
1160 | dma->device_prep_dma_pq_val = ioat3_prep_pq_val; | ||
1161 | |||
1162 | if (!(cap & IOAT_CAP_XOR)) { | ||
1163 | dma->max_xor = 8; | ||
1164 | dma->xor_align = 2; | ||
1165 | |||
1166 | dma_cap_set(DMA_XOR, dma->cap_mask); | ||
1167 | dma->device_prep_dma_xor = ioat3_prep_pqxor; | ||
1168 | |||
1169 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | ||
1170 | dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; | ||
1171 | } | ||
1172 | } | ||
1173 | if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { | ||
1174 | dma_cap_set(DMA_MEMSET, dma->cap_mask); | ||
1175 | dma->device_prep_dma_memset = ioat3_prep_memset_lock; | ||
1176 | } | ||
1177 | |||
1178 | |||
1179 | if (is_raid_device) { | ||
1180 | dma->device_is_tx_complete = ioat3_is_complete; | ||
1181 | device->cleanup_tasklet = ioat3_cleanup_tasklet; | ||
1182 | device->timer_fn = ioat3_timer_event; | ||
1183 | } else { | ||
1184 | dma->device_is_tx_complete = ioat2_is_complete; | ||
1185 | device->cleanup_tasklet = ioat2_cleanup_tasklet; | ||
1186 | device->timer_fn = ioat2_timer_event; | ||
1187 | } | ||
1188 | |||
1189 | /* -= IOAT ver.3 workarounds =- */ | ||
1190 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors | ||
1191 | * that can cause stability issues for IOAT ver.3 | ||
1192 | */ | ||
1193 | pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); | ||
1194 | |||
1195 | /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit | ||
1196 | * (workaround for spurious config parity error after restart) | ||
1197 | */ | ||
1198 | pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); | ||
1199 | if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) | ||
1200 | pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); | ||
1201 | |||
1202 | err = ioat_probe(device); | ||
1203 | if (err) | ||
1204 | return err; | ||
1205 | ioat_set_tcp_copy_break(262144); | ||
1206 | |||
1207 | list_for_each_entry(c, &dma->channels, device_node) { | ||
1208 | chan = to_chan_common(c); | ||
1209 | writel(IOAT_DMA_DCA_ANY_CPU, | ||
1210 | chan->reg_base + IOAT_DCACTRL_OFFSET); | ||
1211 | } | ||
1212 | |||
1213 | err = ioat_register(device); | ||
1214 | if (err) | ||
1215 | return err; | ||
1216 | |||
1217 | ioat_kobject_add(device, &ioat2_ktype); | ||
1218 | |||
1219 | if (dca) | ||
1220 | device->dca = ioat3_dca_init(pdev, device->reg_base); | ||
1221 | |||
1222 | return 0; | ||
1223 | } | ||
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h new file mode 100644 index 000000000000..99afb12bd409 --- /dev/null +++ b/drivers/dma/ioat/hw.h | |||
@@ -0,0 +1,215 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the Free | ||
6 | * Software Foundation; either version 2 of the License, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called COPYING. | ||
20 | */ | ||
21 | #ifndef _IOAT_HW_H_ | ||
22 | #define _IOAT_HW_H_ | ||
23 | |||
24 | /* PCI Configuration Space Values */ | ||
25 | #define IOAT_PCI_VID 0x8086 | ||
26 | #define IOAT_MMIO_BAR 0 | ||
27 | |||
28 | /* CB device ID's */ | ||
29 | #define IOAT_PCI_DID_5000 0x1A38 | ||
30 | #define IOAT_PCI_DID_CNB 0x360B | ||
31 | #define IOAT_PCI_DID_SCNB 0x65FF | ||
32 | #define IOAT_PCI_DID_SNB 0x402F | ||
33 | |||
34 | #define IOAT_PCI_RID 0x00 | ||
35 | #define IOAT_PCI_SVID 0x8086 | ||
36 | #define IOAT_PCI_SID 0x8086 | ||
37 | #define IOAT_VER_1_2 0x12 /* Version 1.2 */ | ||
38 | #define IOAT_VER_2_0 0x20 /* Version 2.0 */ | ||
39 | #define IOAT_VER_3_0 0x30 /* Version 3.0 */ | ||
40 | #define IOAT_VER_3_2 0x32 /* Version 3.2 */ | ||
41 | |||
42 | struct ioat_dma_descriptor { | ||
43 | uint32_t size; | ||
44 | union { | ||
45 | uint32_t ctl; | ||
46 | struct { | ||
47 | unsigned int int_en:1; | ||
48 | unsigned int src_snoop_dis:1; | ||
49 | unsigned int dest_snoop_dis:1; | ||
50 | unsigned int compl_write:1; | ||
51 | unsigned int fence:1; | ||
52 | unsigned int null:1; | ||
53 | unsigned int src_brk:1; | ||
54 | unsigned int dest_brk:1; | ||
55 | unsigned int bundle:1; | ||
56 | unsigned int dest_dca:1; | ||
57 | unsigned int hint:1; | ||
58 | unsigned int rsvd2:13; | ||
59 | #define IOAT_OP_COPY 0x00 | ||
60 | unsigned int op:8; | ||
61 | } ctl_f; | ||
62 | }; | ||
63 | uint64_t src_addr; | ||
64 | uint64_t dst_addr; | ||
65 | uint64_t next; | ||
66 | uint64_t rsv1; | ||
67 | uint64_t rsv2; | ||
68 | /* store some driver data in an unused portion of the descriptor */ | ||
69 | union { | ||
70 | uint64_t user1; | ||
71 | uint64_t tx_cnt; | ||
72 | }; | ||
73 | uint64_t user2; | ||
74 | }; | ||
75 | |||
76 | struct ioat_fill_descriptor { | ||
77 | uint32_t size; | ||
78 | union { | ||
79 | uint32_t ctl; | ||
80 | struct { | ||
81 | unsigned int int_en:1; | ||
82 | unsigned int rsvd:1; | ||
83 | unsigned int dest_snoop_dis:1; | ||
84 | unsigned int compl_write:1; | ||
85 | unsigned int fence:1; | ||
86 | unsigned int rsvd2:2; | ||
87 | unsigned int dest_brk:1; | ||
88 | unsigned int bundle:1; | ||
89 | unsigned int rsvd4:15; | ||
90 | #define IOAT_OP_FILL 0x01 | ||
91 | unsigned int op:8; | ||
92 | } ctl_f; | ||
93 | }; | ||
94 | uint64_t src_data; | ||
95 | uint64_t dst_addr; | ||
96 | uint64_t next; | ||
97 | uint64_t rsv1; | ||
98 | uint64_t next_dst_addr; | ||
99 | uint64_t user1; | ||
100 | uint64_t user2; | ||
101 | }; | ||
102 | |||
103 | struct ioat_xor_descriptor { | ||
104 | uint32_t size; | ||
105 | union { | ||
106 | uint32_t ctl; | ||
107 | struct { | ||
108 | unsigned int int_en:1; | ||
109 | unsigned int src_snoop_dis:1; | ||
110 | unsigned int dest_snoop_dis:1; | ||
111 | unsigned int compl_write:1; | ||
112 | unsigned int fence:1; | ||
113 | unsigned int src_cnt:3; | ||
114 | unsigned int bundle:1; | ||
115 | unsigned int dest_dca:1; | ||
116 | unsigned int hint:1; | ||
117 | unsigned int rsvd:13; | ||
118 | #define IOAT_OP_XOR 0x87 | ||
119 | #define IOAT_OP_XOR_VAL 0x88 | ||
120 | unsigned int op:8; | ||
121 | } ctl_f; | ||
122 | }; | ||
123 | uint64_t src_addr; | ||
124 | uint64_t dst_addr; | ||
125 | uint64_t next; | ||
126 | uint64_t src_addr2; | ||
127 | uint64_t src_addr3; | ||
128 | uint64_t src_addr4; | ||
129 | uint64_t src_addr5; | ||
130 | }; | ||
131 | |||
132 | struct ioat_xor_ext_descriptor { | ||
133 | uint64_t src_addr6; | ||
134 | uint64_t src_addr7; | ||
135 | uint64_t src_addr8; | ||
136 | uint64_t next; | ||
137 | uint64_t rsvd[4]; | ||
138 | }; | ||
139 | |||
140 | struct ioat_pq_descriptor { | ||
141 | uint32_t size; | ||
142 | union { | ||
143 | uint32_t ctl; | ||
144 | struct { | ||
145 | unsigned int int_en:1; | ||
146 | unsigned int src_snoop_dis:1; | ||
147 | unsigned int dest_snoop_dis:1; | ||
148 | unsigned int compl_write:1; | ||
149 | unsigned int fence:1; | ||
150 | unsigned int src_cnt:3; | ||
151 | unsigned int bundle:1; | ||
152 | unsigned int dest_dca:1; | ||
153 | unsigned int hint:1; | ||
154 | unsigned int p_disable:1; | ||
155 | unsigned int q_disable:1; | ||
156 | unsigned int rsvd:11; | ||
157 | #define IOAT_OP_PQ 0x89 | ||
158 | #define IOAT_OP_PQ_VAL 0x8a | ||
159 | unsigned int op:8; | ||
160 | } ctl_f; | ||
161 | }; | ||
162 | uint64_t src_addr; | ||
163 | uint64_t p_addr; | ||
164 | uint64_t next; | ||
165 | uint64_t src_addr2; | ||
166 | uint64_t src_addr3; | ||
167 | uint8_t coef[8]; | ||
168 | uint64_t q_addr; | ||
169 | }; | ||
170 | |||
171 | struct ioat_pq_ext_descriptor { | ||
172 | uint64_t src_addr4; | ||
173 | uint64_t src_addr5; | ||
174 | uint64_t src_addr6; | ||
175 | uint64_t next; | ||
176 | uint64_t src_addr7; | ||
177 | uint64_t src_addr8; | ||
178 | uint64_t rsvd[2]; | ||
179 | }; | ||
180 | |||
181 | struct ioat_pq_update_descriptor { | ||
182 | uint32_t size; | ||
183 | union { | ||
184 | uint32_t ctl; | ||
185 | struct { | ||
186 | unsigned int int_en:1; | ||
187 | unsigned int src_snoop_dis:1; | ||
188 | unsigned int dest_snoop_dis:1; | ||
189 | unsigned int compl_write:1; | ||
190 | unsigned int fence:1; | ||
191 | unsigned int src_cnt:3; | ||
192 | unsigned int bundle:1; | ||
193 | unsigned int dest_dca:1; | ||
194 | unsigned int hint:1; | ||
195 | unsigned int p_disable:1; | ||
196 | unsigned int q_disable:1; | ||
197 | unsigned int rsvd:3; | ||
198 | unsigned int coef:8; | ||
199 | #define IOAT_OP_PQ_UP 0x8b | ||
200 | unsigned int op:8; | ||
201 | } ctl_f; | ||
202 | }; | ||
203 | uint64_t src_addr; | ||
204 | uint64_t p_addr; | ||
205 | uint64_t next; | ||
206 | uint64_t src_addr2; | ||
207 | uint64_t p_src; | ||
208 | uint64_t q_src; | ||
209 | uint64_t q_addr; | ||
210 | }; | ||
211 | |||
212 | struct ioat_raw_descriptor { | ||
213 | uint64_t field[8]; | ||
214 | }; | ||
215 | #endif | ||
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c new file mode 100644 index 000000000000..d545fae30f37 --- /dev/null +++ b/drivers/dma/ioat/pci.c | |||
@@ -0,0 +1,210 @@ | |||
1 | /* | ||
2 | * Intel I/OAT DMA Linux driver | ||
3 | * Copyright(c) 2007 - 2009 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in | ||
19 | * the file called "COPYING". | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * This driver supports an Intel I/OAT DMA engine, which does asynchronous | ||
25 | * copy operations. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/pci.h> | ||
31 | #include <linux/interrupt.h> | ||
32 | #include <linux/dca.h> | ||
33 | #include "dma.h" | ||
34 | #include "dma_v2.h" | ||
35 | #include "registers.h" | ||
36 | #include "hw.h" | ||
37 | |||
38 | MODULE_VERSION(IOAT_DMA_VERSION); | ||
39 | MODULE_LICENSE("Dual BSD/GPL"); | ||
40 | MODULE_AUTHOR("Intel Corporation"); | ||
41 | |||
42 | static struct pci_device_id ioat_pci_tbl[] = { | ||
43 | /* I/OAT v1 platforms */ | ||
44 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, | ||
45 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, | ||
46 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, | ||
47 | { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, | ||
48 | |||
49 | /* I/OAT v2 platforms */ | ||
50 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, | ||
51 | |||
52 | /* I/OAT v3 platforms */ | ||
53 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, | ||
54 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, | ||
55 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, | ||
56 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, | ||
57 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, | ||
58 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, | ||
59 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, | ||
60 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, | ||
61 | |||
62 | /* I/OAT v3.2 platforms */ | ||
63 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) }, | ||
64 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) }, | ||
65 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) }, | ||
66 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) }, | ||
67 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) }, | ||
68 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) }, | ||
69 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) }, | ||
70 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) }, | ||
71 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) }, | ||
72 | { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) }, | ||
73 | |||
74 | { 0, } | ||
75 | }; | ||
76 | MODULE_DEVICE_TABLE(pci, ioat_pci_tbl); | ||
77 | |||
78 | static int __devinit ioat_pci_probe(struct pci_dev *pdev, | ||
79 | const struct pci_device_id *id); | ||
80 | static void __devexit ioat_remove(struct pci_dev *pdev); | ||
81 | |||
82 | static int ioat_dca_enabled = 1; | ||
83 | module_param(ioat_dca_enabled, int, 0644); | ||
84 | MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); | ||
85 | |||
86 | struct kmem_cache *ioat2_cache; | ||
87 | |||
88 | #define DRV_NAME "ioatdma" | ||
89 | |||
90 | static struct pci_driver ioat_pci_driver = { | ||
91 | .name = DRV_NAME, | ||
92 | .id_table = ioat_pci_tbl, | ||
93 | .probe = ioat_pci_probe, | ||
94 | .remove = __devexit_p(ioat_remove), | ||
95 | }; | ||
96 | |||
97 | static struct ioatdma_device * | ||
98 | alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase) | ||
99 | { | ||
100 | struct device *dev = &pdev->dev; | ||
101 | struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL); | ||
102 | |||
103 | if (!d) | ||
104 | return NULL; | ||
105 | d->pdev = pdev; | ||
106 | d->reg_base = iobase; | ||
107 | return d; | ||
108 | } | ||
109 | |||
110 | static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) | ||
111 | { | ||
112 | void __iomem * const *iomap; | ||
113 | struct device *dev = &pdev->dev; | ||
114 | struct ioatdma_device *device; | ||
115 | int err; | ||
116 | |||
117 | err = pcim_enable_device(pdev); | ||
118 | if (err) | ||
119 | return err; | ||
120 | |||
121 | err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME); | ||
122 | if (err) | ||
123 | return err; | ||
124 | iomap = pcim_iomap_table(pdev); | ||
125 | if (!iomap) | ||
126 | return -ENOMEM; | ||
127 | |||
128 | err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
129 | if (err) | ||
130 | err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); | ||
131 | if (err) | ||
132 | return err; | ||
133 | |||
134 | err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); | ||
135 | if (err) | ||
136 | err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); | ||
137 | if (err) | ||
138 | return err; | ||
139 | |||
140 | device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL); | ||
141 | if (!device) | ||
142 | return -ENOMEM; | ||
143 | |||
144 | pci_set_master(pdev); | ||
145 | |||
146 | device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]); | ||
147 | if (!device) | ||
148 | return -ENOMEM; | ||
149 | pci_set_drvdata(pdev, device); | ||
150 | |||
151 | device->version = readb(device->reg_base + IOAT_VER_OFFSET); | ||
152 | if (device->version == IOAT_VER_1_2) | ||
153 | err = ioat1_dma_probe(device, ioat_dca_enabled); | ||
154 | else if (device->version == IOAT_VER_2_0) | ||
155 | err = ioat2_dma_probe(device, ioat_dca_enabled); | ||
156 | else if (device->version >= IOAT_VER_3_0) | ||
157 | err = ioat3_dma_probe(device, ioat_dca_enabled); | ||
158 | else | ||
159 | return -ENODEV; | ||
160 | |||
161 | if (err) { | ||
162 | dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); | ||
163 | return -ENODEV; | ||
164 | } | ||
165 | |||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | static void __devexit ioat_remove(struct pci_dev *pdev) | ||
170 | { | ||
171 | struct ioatdma_device *device = pci_get_drvdata(pdev); | ||
172 | |||
173 | if (!device) | ||
174 | return; | ||
175 | |||
176 | dev_err(&pdev->dev, "Removing dma and dca services\n"); | ||
177 | if (device->dca) { | ||
178 | unregister_dca_provider(device->dca, &pdev->dev); | ||
179 | free_dca_provider(device->dca); | ||
180 | device->dca = NULL; | ||
181 | } | ||
182 | ioat_dma_remove(device); | ||
183 | } | ||
184 | |||
185 | static int __init ioat_init_module(void) | ||
186 | { | ||
187 | int err; | ||
188 | |||
189 | pr_info("%s: Intel(R) QuickData Technology Driver %s\n", | ||
190 | DRV_NAME, IOAT_DMA_VERSION); | ||
191 | |||
192 | ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent), | ||
193 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
194 | if (!ioat2_cache) | ||
195 | return -ENOMEM; | ||
196 | |||
197 | err = pci_register_driver(&ioat_pci_driver); | ||
198 | if (err) | ||
199 | kmem_cache_destroy(ioat2_cache); | ||
200 | |||
201 | return err; | ||
202 | } | ||
203 | module_init(ioat_init_module); | ||
204 | |||
205 | static void __exit ioat_exit_module(void) | ||
206 | { | ||
207 | pci_unregister_driver(&ioat_pci_driver); | ||
208 | kmem_cache_destroy(ioat2_cache); | ||
209 | } | ||
210 | module_exit(ioat_exit_module); | ||
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioat/registers.h index 49bc277424f8..63038e18ab03 100644 --- a/drivers/dma/ioatdma_registers.h +++ b/drivers/dma/ioat/registers.h | |||
@@ -64,18 +64,37 @@ | |||
64 | 64 | ||
65 | #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ | 65 | #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ |
66 | #define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 | 66 | #define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 |
67 | #define IOAT_DEVICE_MMIO_RESTRICTED 0x0002 | ||
68 | #define IOAT_DEVICE_MEMORY_BYPASS 0x0004 | ||
69 | #define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008 | ||
70 | |||
71 | #define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */ | ||
72 | #define IOAT_CAP_PAGE_BREAK 0x00000001 | ||
73 | #define IOAT_CAP_CRC 0x00000002 | ||
74 | #define IOAT_CAP_SKIP_MARKER 0x00000004 | ||
75 | #define IOAT_CAP_DCA 0x00000010 | ||
76 | #define IOAT_CAP_CRC_MOVE 0x00000020 | ||
77 | #define IOAT_CAP_FILL_BLOCK 0x00000040 | ||
78 | #define IOAT_CAP_APIC 0x00000080 | ||
79 | #define IOAT_CAP_XOR 0x00000100 | ||
80 | #define IOAT_CAP_PQ 0x00000200 | ||
67 | 81 | ||
68 | #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ | 82 | #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ |
69 | 83 | ||
70 | /* DMA Channel Registers */ | 84 | /* DMA Channel Registers */ |
71 | #define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ | 85 | #define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ |
72 | #define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 | 86 | #define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 |
87 | #define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200 | ||
73 | #define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 | 88 | #define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 |
74 | #define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 | 89 | #define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 |
75 | #define IOAT_CHANCTRL_ERR_INT_EN 0x0010 | 90 | #define IOAT_CHANCTRL_ERR_INT_EN 0x0010 |
76 | #define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 | 91 | #define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 |
77 | #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 | 92 | #define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 |
78 | #define IOAT_CHANCTRL_INT_DISABLE 0x0001 | 93 | #define IOAT_CHANCTRL_INT_REARM 0x0001 |
94 | #define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\ | ||
95 | IOAT_CHANCTRL_ERR_COMPLETION_EN |\ | ||
96 | IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\ | ||
97 | IOAT_CHANCTRL_ERR_INT_EN) | ||
79 | 98 | ||
80 | #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ | 99 | #define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ |
81 | #define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ | 100 | #define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ |
@@ -94,14 +113,14 @@ | |||
94 | #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C | 113 | #define IOAT2_CHANSTS_OFFSET_HIGH 0x0C |
95 | #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ | 114 | #define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ |
96 | ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) | 115 | ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) |
97 | #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F | 116 | #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL) |
98 | #define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 | 117 | #define IOAT_CHANSTS_SOFT_ERR 0x10ULL |
99 | #define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 | 118 | #define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL |
100 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 | 119 | #define IOAT_CHANSTS_STATUS 0x7ULL |
101 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 | 120 | #define IOAT_CHANSTS_ACTIVE 0x0 |
102 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 | 121 | #define IOAT_CHANSTS_DONE 0x1 |
103 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 | 122 | #define IOAT_CHANSTS_SUSPENDED 0x2 |
104 | #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 | 123 | #define IOAT_CHANSTS_HALTED 0x3 |
105 | 124 | ||
106 | 125 | ||
107 | 126 | ||
@@ -204,22 +223,27 @@ | |||
204 | #define IOAT_CDAR_OFFSET_HIGH 0x24 | 223 | #define IOAT_CDAR_OFFSET_HIGH 0x24 |
205 | 224 | ||
206 | #define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ | 225 | #define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ |
207 | #define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 | 226 | #define IOAT_CHANERR_SRC_ADDR_ERR 0x0001 |
208 | #define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 | 227 | #define IOAT_CHANERR_DEST_ADDR_ERR 0x0002 |
209 | #define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 | 228 | #define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004 |
210 | #define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 | 229 | #define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008 |
211 | #define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 | 230 | #define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 |
212 | #define IOAT_CHANERR_CHANCMD_ERR 0x0020 | 231 | #define IOAT_CHANERR_CHANCMD_ERR 0x0020 |
213 | #define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 | 232 | #define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 |
214 | #define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 | 233 | #define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 |
215 | #define IOAT_CHANERR_READ_DATA_ERR 0x0100 | 234 | #define IOAT_CHANERR_READ_DATA_ERR 0x0100 |
216 | #define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 | 235 | #define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 |
217 | #define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 | 236 | #define IOAT_CHANERR_CONTROL_ERR 0x0400 |
218 | #define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 | 237 | #define IOAT_CHANERR_LENGTH_ERR 0x0800 |
219 | #define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 | 238 | #define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 |
220 | #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 | 239 | #define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 |
221 | #define IOAT_CHANERR_SOFT_ERR 0x4000 | 240 | #define IOAT_CHANERR_SOFT_ERR 0x4000 |
222 | #define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 | 241 | #define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 |
242 | #define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000 | ||
243 | #define IOAT_CHANERR_XOR_Q_ERR 0x20000 | ||
244 | #define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 | ||
245 | |||
246 | #define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR) | ||
223 | 247 | ||
224 | #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ | 248 | #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ |
225 | 249 | ||
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c deleted file mode 100644 index a600fc0f7962..000000000000 --- a/drivers/dma/ioat_dma.c +++ /dev/null | |||
@@ -1,1741 +0,0 @@ | |||
1 | /* | ||
2 | * Intel I/OAT DMA Linux driver | ||
3 | * Copyright(c) 2004 - 2009 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in | ||
19 | * the file called "COPYING". | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * This driver supports an Intel I/OAT DMA engine, which does asynchronous | ||
25 | * copy operations. | ||
26 | */ | ||
27 | |||
28 | #include <linux/init.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/pci.h> | ||
31 | #include <linux/interrupt.h> | ||
32 | #include <linux/dmaengine.h> | ||
33 | #include <linux/delay.h> | ||
34 | #include <linux/dma-mapping.h> | ||
35 | #include <linux/workqueue.h> | ||
36 | #include <linux/i7300_idle.h> | ||
37 | #include "ioatdma.h" | ||
38 | #include "ioatdma_registers.h" | ||
39 | #include "ioatdma_hw.h" | ||
40 | |||
41 | #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) | ||
42 | #define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) | ||
43 | #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) | ||
44 | #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) | ||
45 | |||
46 | #define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) | ||
47 | static int ioat_pending_level = 4; | ||
48 | module_param(ioat_pending_level, int, 0644); | ||
49 | MODULE_PARM_DESC(ioat_pending_level, | ||
50 | "high-water mark for pushing ioat descriptors (default: 4)"); | ||
51 | |||
52 | #define RESET_DELAY msecs_to_jiffies(100) | ||
53 | #define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) | ||
54 | static void ioat_dma_chan_reset_part2(struct work_struct *work); | ||
55 | static void ioat_dma_chan_watchdog(struct work_struct *work); | ||
56 | |||
57 | /* | ||
58 | * workaround for IOAT ver.3.0 null descriptor issue | ||
59 | * (channel returns error when size is 0) | ||
60 | */ | ||
61 | #define NULL_DESC_BUFFER_SIZE 1 | ||
62 | |||
63 | /* internal functions */ | ||
64 | static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); | ||
65 | static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); | ||
66 | |||
67 | static struct ioat_desc_sw * | ||
68 | ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); | ||
69 | static struct ioat_desc_sw * | ||
70 | ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); | ||
71 | |||
72 | static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( | ||
73 | struct ioatdma_device *device, | ||
74 | int index) | ||
75 | { | ||
76 | return device->idx[index]; | ||
77 | } | ||
78 | |||
79 | /** | ||
80 | * ioat_dma_do_interrupt - handler used for single vector interrupt mode | ||
81 | * @irq: interrupt id | ||
82 | * @data: interrupt data | ||
83 | */ | ||
84 | static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) | ||
85 | { | ||
86 | struct ioatdma_device *instance = data; | ||
87 | struct ioat_dma_chan *ioat_chan; | ||
88 | unsigned long attnstatus; | ||
89 | int bit; | ||
90 | u8 intrctrl; | ||
91 | |||
92 | intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); | ||
93 | |||
94 | if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) | ||
95 | return IRQ_NONE; | ||
96 | |||
97 | if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { | ||
98 | writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); | ||
99 | return IRQ_NONE; | ||
100 | } | ||
101 | |||
102 | attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); | ||
103 | for_each_bit(bit, &attnstatus, BITS_PER_LONG) { | ||
104 | ioat_chan = ioat_lookup_chan_by_index(instance, bit); | ||
105 | tasklet_schedule(&ioat_chan->cleanup_task); | ||
106 | } | ||
107 | |||
108 | writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); | ||
109 | return IRQ_HANDLED; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode | ||
114 | * @irq: interrupt id | ||
115 | * @data: interrupt data | ||
116 | */ | ||
117 | static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) | ||
118 | { | ||
119 | struct ioat_dma_chan *ioat_chan = data; | ||
120 | |||
121 | tasklet_schedule(&ioat_chan->cleanup_task); | ||
122 | |||
123 | return IRQ_HANDLED; | ||
124 | } | ||
125 | |||
126 | static void ioat_dma_cleanup_tasklet(unsigned long data); | ||
127 | |||
128 | /** | ||
129 | * ioat_dma_enumerate_channels - find and initialize the device's channels | ||
130 | * @device: the device to be enumerated | ||
131 | */ | ||
132 | static int ioat_dma_enumerate_channels(struct ioatdma_device *device) | ||
133 | { | ||
134 | u8 xfercap_scale; | ||
135 | u32 xfercap; | ||
136 | int i; | ||
137 | struct ioat_dma_chan *ioat_chan; | ||
138 | |||
139 | /* | ||
140 | * IOAT ver.3 workarounds | ||
141 | */ | ||
142 | if (device->version == IOAT_VER_3_0) { | ||
143 | u32 chan_err_mask; | ||
144 | u16 dev_id; | ||
145 | u32 dmauncerrsts; | ||
146 | |||
147 | /* | ||
148 | * Write CHANERRMSK_INT with 3E07h to mask out the errors | ||
149 | * that can cause stability issues for IOAT ver.3 | ||
150 | */ | ||
151 | chan_err_mask = 0x3E07; | ||
152 | pci_write_config_dword(device->pdev, | ||
153 | IOAT_PCI_CHANERRMASK_INT_OFFSET, | ||
154 | chan_err_mask); | ||
155 | |||
156 | /* | ||
157 | * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit | ||
158 | * (workaround for spurious config parity error after restart) | ||
159 | */ | ||
160 | pci_read_config_word(device->pdev, | ||
161 | IOAT_PCI_DEVICE_ID_OFFSET, | ||
162 | &dev_id); | ||
163 | if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { | ||
164 | dmauncerrsts = 0x10; | ||
165 | pci_write_config_dword(device->pdev, | ||
166 | IOAT_PCI_DMAUNCERRSTS_OFFSET, | ||
167 | dmauncerrsts); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); | ||
172 | xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); | ||
173 | xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); | ||
174 | |||
175 | #ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL | ||
176 | if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) { | ||
177 | device->common.chancnt--; | ||
178 | } | ||
179 | #endif | ||
180 | for (i = 0; i < device->common.chancnt; i++) { | ||
181 | ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); | ||
182 | if (!ioat_chan) { | ||
183 | device->common.chancnt = i; | ||
184 | break; | ||
185 | } | ||
186 | |||
187 | ioat_chan->device = device; | ||
188 | ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); | ||
189 | ioat_chan->xfercap = xfercap; | ||
190 | ioat_chan->desccount = 0; | ||
191 | INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); | ||
192 | if (ioat_chan->device->version == IOAT_VER_2_0) | ||
193 | writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | | ||
194 | IOAT_DMA_DCA_ANY_CPU, | ||
195 | ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); | ||
196 | else if (ioat_chan->device->version == IOAT_VER_3_0) | ||
197 | writel(IOAT_DMA_DCA_ANY_CPU, | ||
198 | ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); | ||
199 | spin_lock_init(&ioat_chan->cleanup_lock); | ||
200 | spin_lock_init(&ioat_chan->desc_lock); | ||
201 | INIT_LIST_HEAD(&ioat_chan->free_desc); | ||
202 | INIT_LIST_HEAD(&ioat_chan->used_desc); | ||
203 | /* This should be made common somewhere in dmaengine.c */ | ||
204 | ioat_chan->common.device = &device->common; | ||
205 | list_add_tail(&ioat_chan->common.device_node, | ||
206 | &device->common.channels); | ||
207 | device->idx[i] = ioat_chan; | ||
208 | tasklet_init(&ioat_chan->cleanup_task, | ||
209 | ioat_dma_cleanup_tasklet, | ||
210 | (unsigned long) ioat_chan); | ||
211 | tasklet_disable(&ioat_chan->cleanup_task); | ||
212 | } | ||
213 | return device->common.chancnt; | ||
214 | } | ||
215 | |||
216 | /** | ||
217 | * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended | ||
218 | * descriptors to hw | ||
219 | * @chan: DMA channel handle | ||
220 | */ | ||
221 | static inline void __ioat1_dma_memcpy_issue_pending( | ||
222 | struct ioat_dma_chan *ioat_chan) | ||
223 | { | ||
224 | ioat_chan->pending = 0; | ||
225 | writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); | ||
226 | } | ||
227 | |||
228 | static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) | ||
229 | { | ||
230 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | ||
231 | |||
232 | if (ioat_chan->pending > 0) { | ||
233 | spin_lock_bh(&ioat_chan->desc_lock); | ||
234 | __ioat1_dma_memcpy_issue_pending(ioat_chan); | ||
235 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
236 | } | ||
237 | } | ||
238 | |||
239 | static inline void __ioat2_dma_memcpy_issue_pending( | ||
240 | struct ioat_dma_chan *ioat_chan) | ||
241 | { | ||
242 | ioat_chan->pending = 0; | ||
243 | writew(ioat_chan->dmacount, | ||
244 | ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); | ||
245 | } | ||
246 | |||
247 | static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) | ||
248 | { | ||
249 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | ||
250 | |||
251 | if (ioat_chan->pending > 0) { | ||
252 | spin_lock_bh(&ioat_chan->desc_lock); | ||
253 | __ioat2_dma_memcpy_issue_pending(ioat_chan); | ||
254 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
255 | } | ||
256 | } | ||
257 | |||
258 | |||
259 | /** | ||
260 | * ioat_dma_chan_reset_part2 - reinit the channel after a reset | ||
261 | */ | ||
262 | static void ioat_dma_chan_reset_part2(struct work_struct *work) | ||
263 | { | ||
264 | struct ioat_dma_chan *ioat_chan = | ||
265 | container_of(work, struct ioat_dma_chan, work.work); | ||
266 | struct ioat_desc_sw *desc; | ||
267 | |||
268 | spin_lock_bh(&ioat_chan->cleanup_lock); | ||
269 | spin_lock_bh(&ioat_chan->desc_lock); | ||
270 | |||
271 | ioat_chan->completion_virt->low = 0; | ||
272 | ioat_chan->completion_virt->high = 0; | ||
273 | ioat_chan->pending = 0; | ||
274 | |||
275 | /* | ||
276 | * count the descriptors waiting, and be sure to do it | ||
277 | * right for both the CB1 line and the CB2 ring | ||
278 | */ | ||
279 | ioat_chan->dmacount = 0; | ||
280 | if (ioat_chan->used_desc.prev) { | ||
281 | desc = to_ioat_desc(ioat_chan->used_desc.prev); | ||
282 | do { | ||
283 | ioat_chan->dmacount++; | ||
284 | desc = to_ioat_desc(desc->node.next); | ||
285 | } while (&desc->node != ioat_chan->used_desc.next); | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * write the new starting descriptor address | ||
290 | * this puts channel engine into ARMED state | ||
291 | */ | ||
292 | desc = to_ioat_desc(ioat_chan->used_desc.prev); | ||
293 | switch (ioat_chan->device->version) { | ||
294 | case IOAT_VER_1_2: | ||
295 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, | ||
296 | ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); | ||
297 | writel(((u64) desc->async_tx.phys) >> 32, | ||
298 | ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); | ||
299 | |||
300 | writeb(IOAT_CHANCMD_START, ioat_chan->reg_base | ||
301 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); | ||
302 | break; | ||
303 | case IOAT_VER_2_0: | ||
304 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, | ||
305 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); | ||
306 | writel(((u64) desc->async_tx.phys) >> 32, | ||
307 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); | ||
308 | |||
309 | /* tell the engine to go with what's left to be done */ | ||
310 | writew(ioat_chan->dmacount, | ||
311 | ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); | ||
312 | |||
313 | break; | ||
314 | } | ||
315 | dev_err(&ioat_chan->device->pdev->dev, | ||
316 | "chan%d reset - %d descs waiting, %d total desc\n", | ||
317 | chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); | ||
318 | |||
319 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
320 | spin_unlock_bh(&ioat_chan->cleanup_lock); | ||
321 | } | ||
322 | |||
323 | /** | ||
324 | * ioat_dma_reset_channel - restart a channel | ||
325 | * @ioat_chan: IOAT DMA channel handle | ||
326 | */ | ||
327 | static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) | ||
328 | { | ||
329 | u32 chansts, chanerr; | ||
330 | |||
331 | if (!ioat_chan->used_desc.prev) | ||
332 | return; | ||
333 | |||
334 | chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); | ||
335 | chansts = (ioat_chan->completion_virt->low | ||
336 | & IOAT_CHANSTS_DMA_TRANSFER_STATUS); | ||
337 | if (chanerr) { | ||
338 | dev_err(&ioat_chan->device->pdev->dev, | ||
339 | "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", | ||
340 | chan_num(ioat_chan), chansts, chanerr); | ||
341 | writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * whack it upside the head with a reset | ||
346 | * and wait for things to settle out. | ||
347 | * force the pending count to a really big negative | ||
348 | * to make sure no one forces an issue_pending | ||
349 | * while we're waiting. | ||
350 | */ | ||
351 | |||
352 | spin_lock_bh(&ioat_chan->desc_lock); | ||
353 | ioat_chan->pending = INT_MIN; | ||
354 | writeb(IOAT_CHANCMD_RESET, | ||
355 | ioat_chan->reg_base | ||
356 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); | ||
357 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
358 | |||
359 | /* schedule the 2nd half instead of sleeping a long time */ | ||
360 | schedule_delayed_work(&ioat_chan->work, RESET_DELAY); | ||
361 | } | ||
362 | |||
363 | /** | ||
364 | * ioat_dma_chan_watchdog - watch for stuck channels | ||
365 | */ | ||
366 | static void ioat_dma_chan_watchdog(struct work_struct *work) | ||
367 | { | ||
368 | struct ioatdma_device *device = | ||
369 | container_of(work, struct ioatdma_device, work.work); | ||
370 | struct ioat_dma_chan *ioat_chan; | ||
371 | int i; | ||
372 | |||
373 | union { | ||
374 | u64 full; | ||
375 | struct { | ||
376 | u32 low; | ||
377 | u32 high; | ||
378 | }; | ||
379 | } completion_hw; | ||
380 | unsigned long compl_desc_addr_hw; | ||
381 | |||
382 | for (i = 0; i < device->common.chancnt; i++) { | ||
383 | ioat_chan = ioat_lookup_chan_by_index(device, i); | ||
384 | |||
385 | if (ioat_chan->device->version == IOAT_VER_1_2 | ||
386 | /* have we started processing anything yet */ | ||
387 | && ioat_chan->last_completion | ||
388 | /* have we completed any since last watchdog cycle? */ | ||
389 | && (ioat_chan->last_completion == | ||
390 | ioat_chan->watchdog_completion) | ||
391 | /* has TCP stuck on one cookie since last watchdog? */ | ||
392 | && (ioat_chan->watchdog_tcp_cookie == | ||
393 | ioat_chan->watchdog_last_tcp_cookie) | ||
394 | && (ioat_chan->watchdog_tcp_cookie != | ||
395 | ioat_chan->completed_cookie) | ||
396 | /* is there something in the chain to be processed? */ | ||
397 | /* CB1 chain always has at least the last one processed */ | ||
398 | && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) | ||
399 | && ioat_chan->pending == 0) { | ||
400 | |||
401 | /* | ||
402 | * check CHANSTS register for completed | ||
403 | * descriptor address. | ||
404 | * if it is different than completion writeback, | ||
405 | * it is not zero | ||
406 | * and it has changed since the last watchdog | ||
407 | * we can assume that channel | ||
408 | * is still working correctly | ||
409 | * and the problem is in completion writeback. | ||
410 | * update completion writeback | ||
411 | * with actual CHANSTS value | ||
412 | * else | ||
413 | * try resetting the channel | ||
414 | */ | ||
415 | |||
416 | completion_hw.low = readl(ioat_chan->reg_base + | ||
417 | IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); | ||
418 | completion_hw.high = readl(ioat_chan->reg_base + | ||
419 | IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); | ||
420 | #if (BITS_PER_LONG == 64) | ||
421 | compl_desc_addr_hw = | ||
422 | completion_hw.full | ||
423 | & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | ||
424 | #else | ||
425 | compl_desc_addr_hw = | ||
426 | completion_hw.low & IOAT_LOW_COMPLETION_MASK; | ||
427 | #endif | ||
428 | |||
429 | if ((compl_desc_addr_hw != 0) | ||
430 | && (compl_desc_addr_hw != ioat_chan->watchdog_completion) | ||
431 | && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { | ||
432 | ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; | ||
433 | ioat_chan->completion_virt->low = completion_hw.low; | ||
434 | ioat_chan->completion_virt->high = completion_hw.high; | ||
435 | } else { | ||
436 | ioat_dma_reset_channel(ioat_chan); | ||
437 | ioat_chan->watchdog_completion = 0; | ||
438 | ioat_chan->last_compl_desc_addr_hw = 0; | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * for version 2.0 if there are descriptors yet to be processed | ||
443 | * and the last completed hasn't changed since the last watchdog | ||
444 | * if they haven't hit the pending level | ||
445 | * issue the pending to push them through | ||
446 | * else | ||
447 | * try resetting the channel | ||
448 | */ | ||
449 | } else if (ioat_chan->device->version == IOAT_VER_2_0 | ||
450 | && ioat_chan->used_desc.prev | ||
451 | && ioat_chan->last_completion | ||
452 | && ioat_chan->last_completion == ioat_chan->watchdog_completion) { | ||
453 | |||
454 | if (ioat_chan->pending < ioat_pending_level) | ||
455 | ioat2_dma_memcpy_issue_pending(&ioat_chan->common); | ||
456 | else { | ||
457 | ioat_dma_reset_channel(ioat_chan); | ||
458 | ioat_chan->watchdog_completion = 0; | ||
459 | } | ||
460 | } else { | ||
461 | ioat_chan->last_compl_desc_addr_hw = 0; | ||
462 | ioat_chan->watchdog_completion | ||
463 | = ioat_chan->last_completion; | ||
464 | } | ||
465 | |||
466 | ioat_chan->watchdog_last_tcp_cookie = | ||
467 | ioat_chan->watchdog_tcp_cookie; | ||
468 | } | ||
469 | |||
470 | schedule_delayed_work(&device->work, WATCHDOG_DELAY); | ||
471 | } | ||
472 | |||
473 | static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) | ||
474 | { | ||
475 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); | ||
476 | struct ioat_desc_sw *first = tx_to_ioat_desc(tx); | ||
477 | struct ioat_desc_sw *prev, *new; | ||
478 | struct ioat_dma_descriptor *hw; | ||
479 | dma_cookie_t cookie; | ||
480 | LIST_HEAD(new_chain); | ||
481 | u32 copy; | ||
482 | size_t len; | ||
483 | dma_addr_t src, dst; | ||
484 | unsigned long orig_flags; | ||
485 | unsigned int desc_count = 0; | ||
486 | |||
487 | /* src and dest and len are stored in the initial descriptor */ | ||
488 | len = first->len; | ||
489 | src = first->src; | ||
490 | dst = first->dst; | ||
491 | orig_flags = first->async_tx.flags; | ||
492 | new = first; | ||
493 | |||
494 | spin_lock_bh(&ioat_chan->desc_lock); | ||
495 | prev = to_ioat_desc(ioat_chan->used_desc.prev); | ||
496 | prefetch(prev->hw); | ||
497 | do { | ||
498 | copy = min_t(size_t, len, ioat_chan->xfercap); | ||
499 | |||
500 | async_tx_ack(&new->async_tx); | ||
501 | |||
502 | hw = new->hw; | ||
503 | hw->size = copy; | ||
504 | hw->ctl = 0; | ||
505 | hw->src_addr = src; | ||
506 | hw->dst_addr = dst; | ||
507 | hw->next = 0; | ||
508 | |||
509 | /* chain together the physical address list for the HW */ | ||
510 | wmb(); | ||
511 | prev->hw->next = (u64) new->async_tx.phys; | ||
512 | |||
513 | len -= copy; | ||
514 | dst += copy; | ||
515 | src += copy; | ||
516 | |||
517 | list_add_tail(&new->node, &new_chain); | ||
518 | desc_count++; | ||
519 | prev = new; | ||
520 | } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); | ||
521 | |||
522 | if (!new) { | ||
523 | dev_err(&ioat_chan->device->pdev->dev, | ||
524 | "tx submit failed\n"); | ||
525 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
526 | return -ENOMEM; | ||
527 | } | ||
528 | |||
529 | hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | ||
530 | if (first->async_tx.callback) { | ||
531 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; | ||
532 | if (first != new) { | ||
533 | /* move callback into to last desc */ | ||
534 | new->async_tx.callback = first->async_tx.callback; | ||
535 | new->async_tx.callback_param | ||
536 | = first->async_tx.callback_param; | ||
537 | first->async_tx.callback = NULL; | ||
538 | first->async_tx.callback_param = NULL; | ||
539 | } | ||
540 | } | ||
541 | |||
542 | new->tx_cnt = desc_count; | ||
543 | new->async_tx.flags = orig_flags; /* client is in control of this ack */ | ||
544 | |||
545 | /* store the original values for use in later cleanup */ | ||
546 | if (new != first) { | ||
547 | new->src = first->src; | ||
548 | new->dst = first->dst; | ||
549 | new->len = first->len; | ||
550 | } | ||
551 | |||
552 | /* cookie incr and addition to used_list must be atomic */ | ||
553 | cookie = ioat_chan->common.cookie; | ||
554 | cookie++; | ||
555 | if (cookie < 0) | ||
556 | cookie = 1; | ||
557 | ioat_chan->common.cookie = new->async_tx.cookie = cookie; | ||
558 | |||
559 | /* write address into NextDescriptor field of last desc in chain */ | ||
560 | to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = | ||
561 | first->async_tx.phys; | ||
562 | list_splice_tail(&new_chain, &ioat_chan->used_desc); | ||
563 | |||
564 | ioat_chan->dmacount += desc_count; | ||
565 | ioat_chan->pending += desc_count; | ||
566 | if (ioat_chan->pending >= ioat_pending_level) | ||
567 | __ioat1_dma_memcpy_issue_pending(ioat_chan); | ||
568 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
569 | |||
570 | return cookie; | ||
571 | } | ||
572 | |||
573 | static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) | ||
574 | { | ||
575 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); | ||
576 | struct ioat_desc_sw *first = tx_to_ioat_desc(tx); | ||
577 | struct ioat_desc_sw *new; | ||
578 | struct ioat_dma_descriptor *hw; | ||
579 | dma_cookie_t cookie; | ||
580 | u32 copy; | ||
581 | size_t len; | ||
582 | dma_addr_t src, dst; | ||
583 | unsigned long orig_flags; | ||
584 | unsigned int desc_count = 0; | ||
585 | |||
586 | /* src and dest and len are stored in the initial descriptor */ | ||
587 | len = first->len; | ||
588 | src = first->src; | ||
589 | dst = first->dst; | ||
590 | orig_flags = first->async_tx.flags; | ||
591 | new = first; | ||
592 | |||
593 | /* | ||
594 | * ioat_chan->desc_lock is still in force in version 2 path | ||
595 | * it gets unlocked at end of this function | ||
596 | */ | ||
597 | do { | ||
598 | copy = min_t(size_t, len, ioat_chan->xfercap); | ||
599 | |||
600 | async_tx_ack(&new->async_tx); | ||
601 | |||
602 | hw = new->hw; | ||
603 | hw->size = copy; | ||
604 | hw->ctl = 0; | ||
605 | hw->src_addr = src; | ||
606 | hw->dst_addr = dst; | ||
607 | |||
608 | len -= copy; | ||
609 | dst += copy; | ||
610 | src += copy; | ||
611 | desc_count++; | ||
612 | } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); | ||
613 | |||
614 | if (!new) { | ||
615 | dev_err(&ioat_chan->device->pdev->dev, | ||
616 | "tx submit failed\n"); | ||
617 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
618 | return -ENOMEM; | ||
619 | } | ||
620 | |||
621 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | ||
622 | if (first->async_tx.callback) { | ||
623 | hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; | ||
624 | if (first != new) { | ||
625 | /* move callback into to last desc */ | ||
626 | new->async_tx.callback = first->async_tx.callback; | ||
627 | new->async_tx.callback_param | ||
628 | = first->async_tx.callback_param; | ||
629 | first->async_tx.callback = NULL; | ||
630 | first->async_tx.callback_param = NULL; | ||
631 | } | ||
632 | } | ||
633 | |||
634 | new->tx_cnt = desc_count; | ||
635 | new->async_tx.flags = orig_flags; /* client is in control of this ack */ | ||
636 | |||
637 | /* store the original values for use in later cleanup */ | ||
638 | if (new != first) { | ||
639 | new->src = first->src; | ||
640 | new->dst = first->dst; | ||
641 | new->len = first->len; | ||
642 | } | ||
643 | |||
644 | /* cookie incr and addition to used_list must be atomic */ | ||
645 | cookie = ioat_chan->common.cookie; | ||
646 | cookie++; | ||
647 | if (cookie < 0) | ||
648 | cookie = 1; | ||
649 | ioat_chan->common.cookie = new->async_tx.cookie = cookie; | ||
650 | |||
651 | ioat_chan->dmacount += desc_count; | ||
652 | ioat_chan->pending += desc_count; | ||
653 | if (ioat_chan->pending >= ioat_pending_level) | ||
654 | __ioat2_dma_memcpy_issue_pending(ioat_chan); | ||
655 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
656 | |||
657 | return cookie; | ||
658 | } | ||
659 | |||
660 | /** | ||
661 | * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair | ||
662 | * @ioat_chan: the channel supplying the memory pool for the descriptors | ||
663 | * @flags: allocation flags | ||
664 | */ | ||
665 | static struct ioat_desc_sw *ioat_dma_alloc_descriptor( | ||
666 | struct ioat_dma_chan *ioat_chan, | ||
667 | gfp_t flags) | ||
668 | { | ||
669 | struct ioat_dma_descriptor *desc; | ||
670 | struct ioat_desc_sw *desc_sw; | ||
671 | struct ioatdma_device *ioatdma_device; | ||
672 | dma_addr_t phys; | ||
673 | |||
674 | ioatdma_device = to_ioatdma_device(ioat_chan->common.device); | ||
675 | desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); | ||
676 | if (unlikely(!desc)) | ||
677 | return NULL; | ||
678 | |||
679 | desc_sw = kzalloc(sizeof(*desc_sw), flags); | ||
680 | if (unlikely(!desc_sw)) { | ||
681 | pci_pool_free(ioatdma_device->dma_pool, desc, phys); | ||
682 | return NULL; | ||
683 | } | ||
684 | |||
685 | memset(desc, 0, sizeof(*desc)); | ||
686 | dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); | ||
687 | switch (ioat_chan->device->version) { | ||
688 | case IOAT_VER_1_2: | ||
689 | desc_sw->async_tx.tx_submit = ioat1_tx_submit; | ||
690 | break; | ||
691 | case IOAT_VER_2_0: | ||
692 | case IOAT_VER_3_0: | ||
693 | desc_sw->async_tx.tx_submit = ioat2_tx_submit; | ||
694 | break; | ||
695 | } | ||
696 | |||
697 | desc_sw->hw = desc; | ||
698 | desc_sw->async_tx.phys = phys; | ||
699 | |||
700 | return desc_sw; | ||
701 | } | ||
702 | |||
703 | static int ioat_initial_desc_count = 256; | ||
704 | module_param(ioat_initial_desc_count, int, 0644); | ||
705 | MODULE_PARM_DESC(ioat_initial_desc_count, | ||
706 | "initial descriptors per channel (default: 256)"); | ||
707 | |||
708 | /** | ||
709 | * ioat2_dma_massage_chan_desc - link the descriptors into a circle | ||
710 | * @ioat_chan: the channel to be massaged | ||
711 | */ | ||
712 | static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) | ||
713 | { | ||
714 | struct ioat_desc_sw *desc, *_desc; | ||
715 | |||
716 | /* setup used_desc */ | ||
717 | ioat_chan->used_desc.next = ioat_chan->free_desc.next; | ||
718 | ioat_chan->used_desc.prev = NULL; | ||
719 | |||
720 | /* pull free_desc out of the circle so that every node is a hw | ||
721 | * descriptor, but leave it pointing to the list | ||
722 | */ | ||
723 | ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; | ||
724 | ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; | ||
725 | |||
726 | /* circle link the hw descriptors */ | ||
727 | desc = to_ioat_desc(ioat_chan->free_desc.next); | ||
728 | desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; | ||
729 | list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { | ||
730 | desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; | ||
731 | } | ||
732 | } | ||
733 | |||
734 | /** | ||
735 | * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors | ||
736 | * @chan: the channel to be filled out | ||
737 | */ | ||
738 | static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) | ||
739 | { | ||
740 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | ||
741 | struct ioat_desc_sw *desc; | ||
742 | u16 chanctrl; | ||
743 | u32 chanerr; | ||
744 | int i; | ||
745 | LIST_HEAD(tmp_list); | ||
746 | |||
747 | /* have we already been set up? */ | ||
748 | if (!list_empty(&ioat_chan->free_desc)) | ||
749 | return ioat_chan->desccount; | ||
750 | |||
751 | /* Setup register to interrupt and write completion status on error */ | ||
752 | chanctrl = IOAT_CHANCTRL_ERR_INT_EN | | ||
753 | IOAT_CHANCTRL_ANY_ERR_ABORT_EN | | ||
754 | IOAT_CHANCTRL_ERR_COMPLETION_EN; | ||
755 | writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); | ||
756 | |||
757 | chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); | ||
758 | if (chanerr) { | ||
759 | dev_err(&ioat_chan->device->pdev->dev, | ||
760 | "CHANERR = %x, clearing\n", chanerr); | ||
761 | writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); | ||
762 | } | ||
763 | |||
764 | /* Allocate descriptors */ | ||
765 | for (i = 0; i < ioat_initial_desc_count; i++) { | ||
766 | desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); | ||
767 | if (!desc) { | ||
768 | dev_err(&ioat_chan->device->pdev->dev, | ||
769 | "Only %d initial descriptors\n", i); | ||
770 | break; | ||
771 | } | ||
772 | list_add_tail(&desc->node, &tmp_list); | ||
773 | } | ||
774 | spin_lock_bh(&ioat_chan->desc_lock); | ||
775 | ioat_chan->desccount = i; | ||
776 | list_splice(&tmp_list, &ioat_chan->free_desc); | ||
777 | if (ioat_chan->device->version != IOAT_VER_1_2) | ||
778 | ioat2_dma_massage_chan_desc(ioat_chan); | ||
779 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
780 | |||
781 | /* allocate a completion writeback area */ | ||
782 | /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ | ||
783 | ioat_chan->completion_virt = | ||
784 | pci_pool_alloc(ioat_chan->device->completion_pool, | ||
785 | GFP_KERNEL, | ||
786 | &ioat_chan->completion_addr); | ||
787 | memset(ioat_chan->completion_virt, 0, | ||
788 | sizeof(*ioat_chan->completion_virt)); | ||
789 | writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, | ||
790 | ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); | ||
791 | writel(((u64) ioat_chan->completion_addr) >> 32, | ||
792 | ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | ||
793 | |||
794 | tasklet_enable(&ioat_chan->cleanup_task); | ||
795 | ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ | ||
796 | return ioat_chan->desccount; | ||
797 | } | ||
798 | |||
799 | /** | ||
800 | * ioat_dma_free_chan_resources - release all the descriptors | ||
801 | * @chan: the channel to be cleaned | ||
802 | */ | ||
803 | static void ioat_dma_free_chan_resources(struct dma_chan *chan) | ||
804 | { | ||
805 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | ||
806 | struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); | ||
807 | struct ioat_desc_sw *desc, *_desc; | ||
808 | int in_use_descs = 0; | ||
809 | |||
810 | /* Before freeing channel resources first check | ||
811 | * if they have been previously allocated for this channel. | ||
812 | */ | ||
813 | if (ioat_chan->desccount == 0) | ||
814 | return; | ||
815 | |||
816 | tasklet_disable(&ioat_chan->cleanup_task); | ||
817 | ioat_dma_memcpy_cleanup(ioat_chan); | ||
818 | |||
819 | /* Delay 100ms after reset to allow internal DMA logic to quiesce | ||
820 | * before removing DMA descriptor resources. | ||
821 | */ | ||
822 | writeb(IOAT_CHANCMD_RESET, | ||
823 | ioat_chan->reg_base | ||
824 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); | ||
825 | mdelay(100); | ||
826 | |||
827 | spin_lock_bh(&ioat_chan->desc_lock); | ||
828 | switch (ioat_chan->device->version) { | ||
829 | case IOAT_VER_1_2: | ||
830 | list_for_each_entry_safe(desc, _desc, | ||
831 | &ioat_chan->used_desc, node) { | ||
832 | in_use_descs++; | ||
833 | list_del(&desc->node); | ||
834 | pci_pool_free(ioatdma_device->dma_pool, desc->hw, | ||
835 | desc->async_tx.phys); | ||
836 | kfree(desc); | ||
837 | } | ||
838 | list_for_each_entry_safe(desc, _desc, | ||
839 | &ioat_chan->free_desc, node) { | ||
840 | list_del(&desc->node); | ||
841 | pci_pool_free(ioatdma_device->dma_pool, desc->hw, | ||
842 | desc->async_tx.phys); | ||
843 | kfree(desc); | ||
844 | } | ||
845 | break; | ||
846 | case IOAT_VER_2_0: | ||
847 | case IOAT_VER_3_0: | ||
848 | list_for_each_entry_safe(desc, _desc, | ||
849 | ioat_chan->free_desc.next, node) { | ||
850 | list_del(&desc->node); | ||
851 | pci_pool_free(ioatdma_device->dma_pool, desc->hw, | ||
852 | desc->async_tx.phys); | ||
853 | kfree(desc); | ||
854 | } | ||
855 | desc = to_ioat_desc(ioat_chan->free_desc.next); | ||
856 | pci_pool_free(ioatdma_device->dma_pool, desc->hw, | ||
857 | desc->async_tx.phys); | ||
858 | kfree(desc); | ||
859 | INIT_LIST_HEAD(&ioat_chan->free_desc); | ||
860 | INIT_LIST_HEAD(&ioat_chan->used_desc); | ||
861 | break; | ||
862 | } | ||
863 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
864 | |||
865 | pci_pool_free(ioatdma_device->completion_pool, | ||
866 | ioat_chan->completion_virt, | ||
867 | ioat_chan->completion_addr); | ||
868 | |||
869 | /* one is ok since we left it on there on purpose */ | ||
870 | if (in_use_descs > 1) | ||
871 | dev_err(&ioat_chan->device->pdev->dev, | ||
872 | "Freeing %d in use descriptors!\n", | ||
873 | in_use_descs - 1); | ||
874 | |||
875 | ioat_chan->last_completion = ioat_chan->completion_addr = 0; | ||
876 | ioat_chan->pending = 0; | ||
877 | ioat_chan->dmacount = 0; | ||
878 | ioat_chan->desccount = 0; | ||
879 | ioat_chan->watchdog_completion = 0; | ||
880 | ioat_chan->last_compl_desc_addr_hw = 0; | ||
881 | ioat_chan->watchdog_tcp_cookie = | ||
882 | ioat_chan->watchdog_last_tcp_cookie = 0; | ||
883 | } | ||
884 | |||
885 | /** | ||
886 | * ioat_dma_get_next_descriptor - return the next available descriptor | ||
887 | * @ioat_chan: IOAT DMA channel handle | ||
888 | * | ||
889 | * Gets the next descriptor from the chain, and must be called with the | ||
890 | * channel's desc_lock held. Allocates more descriptors if the channel | ||
891 | * has run out. | ||
892 | */ | ||
893 | static struct ioat_desc_sw * | ||
894 | ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) | ||
895 | { | ||
896 | struct ioat_desc_sw *new; | ||
897 | |||
898 | if (!list_empty(&ioat_chan->free_desc)) { | ||
899 | new = to_ioat_desc(ioat_chan->free_desc.next); | ||
900 | list_del(&new->node); | ||
901 | } else { | ||
902 | /* try to get another desc */ | ||
903 | new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); | ||
904 | if (!new) { | ||
905 | dev_err(&ioat_chan->device->pdev->dev, | ||
906 | "alloc failed\n"); | ||
907 | return NULL; | ||
908 | } | ||
909 | } | ||
910 | |||
911 | prefetch(new->hw); | ||
912 | return new; | ||
913 | } | ||
914 | |||
915 | static struct ioat_desc_sw * | ||
916 | ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) | ||
917 | { | ||
918 | struct ioat_desc_sw *new; | ||
919 | |||
920 | /* | ||
921 | * used.prev points to where to start processing | ||
922 | * used.next points to next free descriptor | ||
923 | * if used.prev == NULL, there are none waiting to be processed | ||
924 | * if used.next == used.prev.prev, there is only one free descriptor, | ||
925 | * and we need to use it to as a noop descriptor before | ||
926 | * linking in a new set of descriptors, since the device | ||
927 | * has probably already read the pointer to it | ||
928 | */ | ||
929 | if (ioat_chan->used_desc.prev && | ||
930 | ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { | ||
931 | |||
932 | struct ioat_desc_sw *desc; | ||
933 | struct ioat_desc_sw *noop_desc; | ||
934 | int i; | ||
935 | |||
936 | /* set up the noop descriptor */ | ||
937 | noop_desc = to_ioat_desc(ioat_chan->used_desc.next); | ||
938 | /* set size to non-zero value (channel returns error when size is 0) */ | ||
939 | noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; | ||
940 | noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; | ||
941 | noop_desc->hw->src_addr = 0; | ||
942 | noop_desc->hw->dst_addr = 0; | ||
943 | |||
944 | ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; | ||
945 | ioat_chan->pending++; | ||
946 | ioat_chan->dmacount++; | ||
947 | |||
948 | /* try to get a few more descriptors */ | ||
949 | for (i = 16; i; i--) { | ||
950 | desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); | ||
951 | if (!desc) { | ||
952 | dev_err(&ioat_chan->device->pdev->dev, | ||
953 | "alloc failed\n"); | ||
954 | break; | ||
955 | } | ||
956 | list_add_tail(&desc->node, ioat_chan->used_desc.next); | ||
957 | |||
958 | desc->hw->next | ||
959 | = to_ioat_desc(desc->node.next)->async_tx.phys; | ||
960 | to_ioat_desc(desc->node.prev)->hw->next | ||
961 | = desc->async_tx.phys; | ||
962 | ioat_chan->desccount++; | ||
963 | } | ||
964 | |||
965 | ioat_chan->used_desc.next = noop_desc->node.next; | ||
966 | } | ||
967 | new = to_ioat_desc(ioat_chan->used_desc.next); | ||
968 | prefetch(new); | ||
969 | ioat_chan->used_desc.next = new->node.next; | ||
970 | |||
971 | if (ioat_chan->used_desc.prev == NULL) | ||
972 | ioat_chan->used_desc.prev = &new->node; | ||
973 | |||
974 | prefetch(new->hw); | ||
975 | return new; | ||
976 | } | ||
977 | |||
978 | static struct ioat_desc_sw *ioat_dma_get_next_descriptor( | ||
979 | struct ioat_dma_chan *ioat_chan) | ||
980 | { | ||
981 | if (!ioat_chan) | ||
982 | return NULL; | ||
983 | |||
984 | switch (ioat_chan->device->version) { | ||
985 | case IOAT_VER_1_2: | ||
986 | return ioat1_dma_get_next_descriptor(ioat_chan); | ||
987 | case IOAT_VER_2_0: | ||
988 | case IOAT_VER_3_0: | ||
989 | return ioat2_dma_get_next_descriptor(ioat_chan); | ||
990 | } | ||
991 | return NULL; | ||
992 | } | ||
993 | |||
994 | static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( | ||
995 | struct dma_chan *chan, | ||
996 | dma_addr_t dma_dest, | ||
997 | dma_addr_t dma_src, | ||
998 | size_t len, | ||
999 | unsigned long flags) | ||
1000 | { | ||
1001 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | ||
1002 | struct ioat_desc_sw *new; | ||
1003 | |||
1004 | spin_lock_bh(&ioat_chan->desc_lock); | ||
1005 | new = ioat_dma_get_next_descriptor(ioat_chan); | ||
1006 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
1007 | |||
1008 | if (new) { | ||
1009 | new->len = len; | ||
1010 | new->dst = dma_dest; | ||
1011 | new->src = dma_src; | ||
1012 | new->async_tx.flags = flags; | ||
1013 | return &new->async_tx; | ||
1014 | } else { | ||
1015 | dev_err(&ioat_chan->device->pdev->dev, | ||
1016 | "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", | ||
1017 | chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); | ||
1018 | return NULL; | ||
1019 | } | ||
1020 | } | ||
1021 | |||
1022 | static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( | ||
1023 | struct dma_chan *chan, | ||
1024 | dma_addr_t dma_dest, | ||
1025 | dma_addr_t dma_src, | ||
1026 | size_t len, | ||
1027 | unsigned long flags) | ||
1028 | { | ||
1029 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | ||
1030 | struct ioat_desc_sw *new; | ||
1031 | |||
1032 | spin_lock_bh(&ioat_chan->desc_lock); | ||
1033 | new = ioat2_dma_get_next_descriptor(ioat_chan); | ||
1034 | |||
1035 | /* | ||
1036 | * leave ioat_chan->desc_lock set in ioat 2 path | ||
1037 | * it will get unlocked at end of tx_submit | ||
1038 | */ | ||
1039 | |||
1040 | if (new) { | ||
1041 | new->len = len; | ||
1042 | new->dst = dma_dest; | ||
1043 | new->src = dma_src; | ||
1044 | new->async_tx.flags = flags; | ||
1045 | return &new->async_tx; | ||
1046 | } else { | ||
1047 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
1048 | dev_err(&ioat_chan->device->pdev->dev, | ||
1049 | "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", | ||
1050 | chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); | ||
1051 | return NULL; | ||
1052 | } | ||
1053 | } | ||
1054 | |||
1055 | static void ioat_dma_cleanup_tasklet(unsigned long data) | ||
1056 | { | ||
1057 | struct ioat_dma_chan *chan = (void *)data; | ||
1058 | ioat_dma_memcpy_cleanup(chan); | ||
1059 | writew(IOAT_CHANCTRL_INT_DISABLE, | ||
1060 | chan->reg_base + IOAT_CHANCTRL_OFFSET); | ||
1061 | } | ||
1062 | |||
1063 | static void | ||
1064 | ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) | ||
1065 | { | ||
1066 | if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
1067 | if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) | ||
1068 | pci_unmap_single(ioat_chan->device->pdev, | ||
1069 | pci_unmap_addr(desc, dst), | ||
1070 | pci_unmap_len(desc, len), | ||
1071 | PCI_DMA_FROMDEVICE); | ||
1072 | else | ||
1073 | pci_unmap_page(ioat_chan->device->pdev, | ||
1074 | pci_unmap_addr(desc, dst), | ||
1075 | pci_unmap_len(desc, len), | ||
1076 | PCI_DMA_FROMDEVICE); | ||
1077 | } | ||
1078 | |||
1079 | if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
1080 | if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) | ||
1081 | pci_unmap_single(ioat_chan->device->pdev, | ||
1082 | pci_unmap_addr(desc, src), | ||
1083 | pci_unmap_len(desc, len), | ||
1084 | PCI_DMA_TODEVICE); | ||
1085 | else | ||
1086 | pci_unmap_page(ioat_chan->device->pdev, | ||
1087 | pci_unmap_addr(desc, src), | ||
1088 | pci_unmap_len(desc, len), | ||
1089 | PCI_DMA_TODEVICE); | ||
1090 | } | ||
1091 | } | ||
1092 | |||
1093 | /** | ||
1094 | * ioat_dma_memcpy_cleanup - cleanup up finished descriptors | ||
1095 | * @chan: ioat channel to be cleaned up | ||
1096 | */ | ||
1097 | static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) | ||
1098 | { | ||
1099 | unsigned long phys_complete; | ||
1100 | struct ioat_desc_sw *desc, *_desc; | ||
1101 | dma_cookie_t cookie = 0; | ||
1102 | unsigned long desc_phys; | ||
1103 | struct ioat_desc_sw *latest_desc; | ||
1104 | |||
1105 | prefetch(ioat_chan->completion_virt); | ||
1106 | |||
1107 | if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) | ||
1108 | return; | ||
1109 | |||
1110 | /* The completion writeback can happen at any time, | ||
1111 | so reads by the driver need to be atomic operations | ||
1112 | The descriptor physical addresses are limited to 32-bits | ||
1113 | when the CPU can only do a 32-bit mov */ | ||
1114 | |||
1115 | #if (BITS_PER_LONG == 64) | ||
1116 | phys_complete = | ||
1117 | ioat_chan->completion_virt->full | ||
1118 | & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | ||
1119 | #else | ||
1120 | phys_complete = | ||
1121 | ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; | ||
1122 | #endif | ||
1123 | |||
1124 | if ((ioat_chan->completion_virt->full | ||
1125 | & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == | ||
1126 | IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { | ||
1127 | dev_err(&ioat_chan->device->pdev->dev, | ||
1128 | "Channel halted, chanerr = %x\n", | ||
1129 | readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); | ||
1130 | |||
1131 | /* TODO do something to salvage the situation */ | ||
1132 | } | ||
1133 | |||
1134 | if (phys_complete == ioat_chan->last_completion) { | ||
1135 | spin_unlock_bh(&ioat_chan->cleanup_lock); | ||
1136 | /* | ||
1137 | * perhaps we're stuck so hard that the watchdog can't go off? | ||
1138 | * try to catch it after 2 seconds | ||
1139 | */ | ||
1140 | if (ioat_chan->device->version != IOAT_VER_3_0) { | ||
1141 | if (time_after(jiffies, | ||
1142 | ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { | ||
1143 | ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); | ||
1144 | ioat_chan->last_completion_time = jiffies; | ||
1145 | } | ||
1146 | } | ||
1147 | return; | ||
1148 | } | ||
1149 | ioat_chan->last_completion_time = jiffies; | ||
1150 | |||
1151 | cookie = 0; | ||
1152 | if (!spin_trylock_bh(&ioat_chan->desc_lock)) { | ||
1153 | spin_unlock_bh(&ioat_chan->cleanup_lock); | ||
1154 | return; | ||
1155 | } | ||
1156 | |||
1157 | switch (ioat_chan->device->version) { | ||
1158 | case IOAT_VER_1_2: | ||
1159 | list_for_each_entry_safe(desc, _desc, | ||
1160 | &ioat_chan->used_desc, node) { | ||
1161 | |||
1162 | /* | ||
1163 | * Incoming DMA requests may use multiple descriptors, | ||
1164 | * due to exceeding xfercap, perhaps. If so, only the | ||
1165 | * last one will have a cookie, and require unmapping. | ||
1166 | */ | ||
1167 | if (desc->async_tx.cookie) { | ||
1168 | cookie = desc->async_tx.cookie; | ||
1169 | ioat_dma_unmap(ioat_chan, desc); | ||
1170 | if (desc->async_tx.callback) { | ||
1171 | desc->async_tx.callback(desc->async_tx.callback_param); | ||
1172 | desc->async_tx.callback = NULL; | ||
1173 | } | ||
1174 | } | ||
1175 | |||
1176 | if (desc->async_tx.phys != phys_complete) { | ||
1177 | /* | ||
1178 | * a completed entry, but not the last, so clean | ||
1179 | * up if the client is done with the descriptor | ||
1180 | */ | ||
1181 | if (async_tx_test_ack(&desc->async_tx)) { | ||
1182 | list_move_tail(&desc->node, | ||
1183 | &ioat_chan->free_desc); | ||
1184 | } else | ||
1185 | desc->async_tx.cookie = 0; | ||
1186 | } else { | ||
1187 | /* | ||
1188 | * last used desc. Do not remove, so we can | ||
1189 | * append from it, but don't look at it next | ||
1190 | * time, either | ||
1191 | */ | ||
1192 | desc->async_tx.cookie = 0; | ||
1193 | |||
1194 | /* TODO check status bits? */ | ||
1195 | break; | ||
1196 | } | ||
1197 | } | ||
1198 | break; | ||
1199 | case IOAT_VER_2_0: | ||
1200 | case IOAT_VER_3_0: | ||
1201 | /* has some other thread has already cleaned up? */ | ||
1202 | if (ioat_chan->used_desc.prev == NULL) | ||
1203 | break; | ||
1204 | |||
1205 | /* work backwards to find latest finished desc */ | ||
1206 | desc = to_ioat_desc(ioat_chan->used_desc.next); | ||
1207 | latest_desc = NULL; | ||
1208 | do { | ||
1209 | desc = to_ioat_desc(desc->node.prev); | ||
1210 | desc_phys = (unsigned long)desc->async_tx.phys | ||
1211 | & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; | ||
1212 | if (desc_phys == phys_complete) { | ||
1213 | latest_desc = desc; | ||
1214 | break; | ||
1215 | } | ||
1216 | } while (&desc->node != ioat_chan->used_desc.prev); | ||
1217 | |||
1218 | if (latest_desc != NULL) { | ||
1219 | |||
1220 | /* work forwards to clear finished descriptors */ | ||
1221 | for (desc = to_ioat_desc(ioat_chan->used_desc.prev); | ||
1222 | &desc->node != latest_desc->node.next && | ||
1223 | &desc->node != ioat_chan->used_desc.next; | ||
1224 | desc = to_ioat_desc(desc->node.next)) { | ||
1225 | if (desc->async_tx.cookie) { | ||
1226 | cookie = desc->async_tx.cookie; | ||
1227 | desc->async_tx.cookie = 0; | ||
1228 | ioat_dma_unmap(ioat_chan, desc); | ||
1229 | if (desc->async_tx.callback) { | ||
1230 | desc->async_tx.callback(desc->async_tx.callback_param); | ||
1231 | desc->async_tx.callback = NULL; | ||
1232 | } | ||
1233 | } | ||
1234 | } | ||
1235 | |||
1236 | /* move used.prev up beyond those that are finished */ | ||
1237 | if (&desc->node == ioat_chan->used_desc.next) | ||
1238 | ioat_chan->used_desc.prev = NULL; | ||
1239 | else | ||
1240 | ioat_chan->used_desc.prev = &desc->node; | ||
1241 | } | ||
1242 | break; | ||
1243 | } | ||
1244 | |||
1245 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
1246 | |||
1247 | ioat_chan->last_completion = phys_complete; | ||
1248 | if (cookie != 0) | ||
1249 | ioat_chan->completed_cookie = cookie; | ||
1250 | |||
1251 | spin_unlock_bh(&ioat_chan->cleanup_lock); | ||
1252 | } | ||
1253 | |||
1254 | /** | ||
1255 | * ioat_dma_is_complete - poll the status of a IOAT DMA transaction | ||
1256 | * @chan: IOAT DMA channel handle | ||
1257 | * @cookie: DMA transaction identifier | ||
1258 | * @done: if not %NULL, updated with last completed transaction | ||
1259 | * @used: if not %NULL, updated with last used transaction | ||
1260 | */ | ||
1261 | static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, | ||
1262 | dma_cookie_t cookie, | ||
1263 | dma_cookie_t *done, | ||
1264 | dma_cookie_t *used) | ||
1265 | { | ||
1266 | struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); | ||
1267 | dma_cookie_t last_used; | ||
1268 | dma_cookie_t last_complete; | ||
1269 | enum dma_status ret; | ||
1270 | |||
1271 | last_used = chan->cookie; | ||
1272 | last_complete = ioat_chan->completed_cookie; | ||
1273 | ioat_chan->watchdog_tcp_cookie = cookie; | ||
1274 | |||
1275 | if (done) | ||
1276 | *done = last_complete; | ||
1277 | if (used) | ||
1278 | *used = last_used; | ||
1279 | |||
1280 | ret = dma_async_is_complete(cookie, last_complete, last_used); | ||
1281 | if (ret == DMA_SUCCESS) | ||
1282 | return ret; | ||
1283 | |||
1284 | ioat_dma_memcpy_cleanup(ioat_chan); | ||
1285 | |||
1286 | last_used = chan->cookie; | ||
1287 | last_complete = ioat_chan->completed_cookie; | ||
1288 | |||
1289 | if (done) | ||
1290 | *done = last_complete; | ||
1291 | if (used) | ||
1292 | *used = last_used; | ||
1293 | |||
1294 | return dma_async_is_complete(cookie, last_complete, last_used); | ||
1295 | } | ||
1296 | |||
1297 | static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) | ||
1298 | { | ||
1299 | struct ioat_desc_sw *desc; | ||
1300 | |||
1301 | spin_lock_bh(&ioat_chan->desc_lock); | ||
1302 | |||
1303 | desc = ioat_dma_get_next_descriptor(ioat_chan); | ||
1304 | |||
1305 | if (!desc) { | ||
1306 | dev_err(&ioat_chan->device->pdev->dev, | ||
1307 | "Unable to start null desc - get next desc failed\n"); | ||
1308 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
1309 | return; | ||
1310 | } | ||
1311 | |||
1312 | desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL | ||
1313 | | IOAT_DMA_DESCRIPTOR_CTL_INT_GN | ||
1314 | | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | ||
1315 | /* set size to non-zero value (channel returns error when size is 0) */ | ||
1316 | desc->hw->size = NULL_DESC_BUFFER_SIZE; | ||
1317 | desc->hw->src_addr = 0; | ||
1318 | desc->hw->dst_addr = 0; | ||
1319 | async_tx_ack(&desc->async_tx); | ||
1320 | switch (ioat_chan->device->version) { | ||
1321 | case IOAT_VER_1_2: | ||
1322 | desc->hw->next = 0; | ||
1323 | list_add_tail(&desc->node, &ioat_chan->used_desc); | ||
1324 | |||
1325 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, | ||
1326 | ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); | ||
1327 | writel(((u64) desc->async_tx.phys) >> 32, | ||
1328 | ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); | ||
1329 | |||
1330 | writeb(IOAT_CHANCMD_START, ioat_chan->reg_base | ||
1331 | + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); | ||
1332 | break; | ||
1333 | case IOAT_VER_2_0: | ||
1334 | case IOAT_VER_3_0: | ||
1335 | writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, | ||
1336 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); | ||
1337 | writel(((u64) desc->async_tx.phys) >> 32, | ||
1338 | ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); | ||
1339 | |||
1340 | ioat_chan->dmacount++; | ||
1341 | __ioat2_dma_memcpy_issue_pending(ioat_chan); | ||
1342 | break; | ||
1343 | } | ||
1344 | spin_unlock_bh(&ioat_chan->desc_lock); | ||
1345 | } | ||
1346 | |||
1347 | /* | ||
1348 | * Perform a IOAT transaction to verify the HW works. | ||
1349 | */ | ||
1350 | #define IOAT_TEST_SIZE 2000 | ||
1351 | |||
1352 | static void ioat_dma_test_callback(void *dma_async_param) | ||
1353 | { | ||
1354 | struct completion *cmp = dma_async_param; | ||
1355 | |||
1356 | complete(cmp); | ||
1357 | } | ||
1358 | |||
1359 | /** | ||
1360 | * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. | ||
1361 | * @device: device to be tested | ||
1362 | */ | ||
1363 | static int ioat_dma_self_test(struct ioatdma_device *device) | ||
1364 | { | ||
1365 | int i; | ||
1366 | u8 *src; | ||
1367 | u8 *dest; | ||
1368 | struct dma_chan *dma_chan; | ||
1369 | struct dma_async_tx_descriptor *tx; | ||
1370 | dma_addr_t dma_dest, dma_src; | ||
1371 | dma_cookie_t cookie; | ||
1372 | int err = 0; | ||
1373 | struct completion cmp; | ||
1374 | unsigned long tmo; | ||
1375 | unsigned long flags; | ||
1376 | |||
1377 | src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); | ||
1378 | if (!src) | ||
1379 | return -ENOMEM; | ||
1380 | dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); | ||
1381 | if (!dest) { | ||
1382 | kfree(src); | ||
1383 | return -ENOMEM; | ||
1384 | } | ||
1385 | |||
1386 | /* Fill in src buffer */ | ||
1387 | for (i = 0; i < IOAT_TEST_SIZE; i++) | ||
1388 | src[i] = (u8)i; | ||
1389 | |||
1390 | /* Start copy, using first DMA channel */ | ||
1391 | dma_chan = container_of(device->common.channels.next, | ||
1392 | struct dma_chan, | ||
1393 | device_node); | ||
1394 | if (device->common.device_alloc_chan_resources(dma_chan) < 1) { | ||
1395 | dev_err(&device->pdev->dev, | ||
1396 | "selftest cannot allocate chan resource\n"); | ||
1397 | err = -ENODEV; | ||
1398 | goto out; | ||
1399 | } | ||
1400 | |||
1401 | dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, | ||
1402 | DMA_TO_DEVICE); | ||
1403 | dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, | ||
1404 | DMA_FROM_DEVICE); | ||
1405 | flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; | ||
1406 | tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, | ||
1407 | IOAT_TEST_SIZE, flags); | ||
1408 | if (!tx) { | ||
1409 | dev_err(&device->pdev->dev, | ||
1410 | "Self-test prep failed, disabling\n"); | ||
1411 | err = -ENODEV; | ||
1412 | goto free_resources; | ||
1413 | } | ||
1414 | |||
1415 | async_tx_ack(tx); | ||
1416 | init_completion(&cmp); | ||
1417 | tx->callback = ioat_dma_test_callback; | ||
1418 | tx->callback_param = &cmp; | ||
1419 | cookie = tx->tx_submit(tx); | ||
1420 | if (cookie < 0) { | ||
1421 | dev_err(&device->pdev->dev, | ||
1422 | "Self-test setup failed, disabling\n"); | ||
1423 | err = -ENODEV; | ||
1424 | goto free_resources; | ||
1425 | } | ||
1426 | device->common.device_issue_pending(dma_chan); | ||
1427 | |||
1428 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
1429 | |||
1430 | if (tmo == 0 || | ||
1431 | device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) | ||
1432 | != DMA_SUCCESS) { | ||
1433 | dev_err(&device->pdev->dev, | ||
1434 | "Self-test copy timed out, disabling\n"); | ||
1435 | err = -ENODEV; | ||
1436 | goto free_resources; | ||
1437 | } | ||
1438 | if (memcmp(src, dest, IOAT_TEST_SIZE)) { | ||
1439 | dev_err(&device->pdev->dev, | ||
1440 | "Self-test copy failed compare, disabling\n"); | ||
1441 | err = -ENODEV; | ||
1442 | goto free_resources; | ||
1443 | } | ||
1444 | |||
1445 | free_resources: | ||
1446 | device->common.device_free_chan_resources(dma_chan); | ||
1447 | out: | ||
1448 | kfree(src); | ||
1449 | kfree(dest); | ||
1450 | return err; | ||
1451 | } | ||
1452 | |||
1453 | static char ioat_interrupt_style[32] = "msix"; | ||
1454 | module_param_string(ioat_interrupt_style, ioat_interrupt_style, | ||
1455 | sizeof(ioat_interrupt_style), 0644); | ||
1456 | MODULE_PARM_DESC(ioat_interrupt_style, | ||
1457 | "set ioat interrupt style: msix (default), " | ||
1458 | "msix-single-vector, msi, intx)"); | ||
1459 | |||
1460 | /** | ||
1461 | * ioat_dma_setup_interrupts - setup interrupt handler | ||
1462 | * @device: ioat device | ||
1463 | */ | ||
1464 | static int ioat_dma_setup_interrupts(struct ioatdma_device *device) | ||
1465 | { | ||
1466 | struct ioat_dma_chan *ioat_chan; | ||
1467 | int err, i, j, msixcnt; | ||
1468 | u8 intrctrl = 0; | ||
1469 | |||
1470 | if (!strcmp(ioat_interrupt_style, "msix")) | ||
1471 | goto msix; | ||
1472 | if (!strcmp(ioat_interrupt_style, "msix-single-vector")) | ||
1473 | goto msix_single_vector; | ||
1474 | if (!strcmp(ioat_interrupt_style, "msi")) | ||
1475 | goto msi; | ||
1476 | if (!strcmp(ioat_interrupt_style, "intx")) | ||
1477 | goto intx; | ||
1478 | dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", | ||
1479 | ioat_interrupt_style); | ||
1480 | goto err_no_irq; | ||
1481 | |||
1482 | msix: | ||
1483 | /* The number of MSI-X vectors should equal the number of channels */ | ||
1484 | msixcnt = device->common.chancnt; | ||
1485 | for (i = 0; i < msixcnt; i++) | ||
1486 | device->msix_entries[i].entry = i; | ||
1487 | |||
1488 | err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); | ||
1489 | if (err < 0) | ||
1490 | goto msi; | ||
1491 | if (err > 0) | ||
1492 | goto msix_single_vector; | ||
1493 | |||
1494 | for (i = 0; i < msixcnt; i++) { | ||
1495 | ioat_chan = ioat_lookup_chan_by_index(device, i); | ||
1496 | err = request_irq(device->msix_entries[i].vector, | ||
1497 | ioat_dma_do_interrupt_msix, | ||
1498 | 0, "ioat-msix", ioat_chan); | ||
1499 | if (err) { | ||
1500 | for (j = 0; j < i; j++) { | ||
1501 | ioat_chan = | ||
1502 | ioat_lookup_chan_by_index(device, j); | ||
1503 | free_irq(device->msix_entries[j].vector, | ||
1504 | ioat_chan); | ||
1505 | } | ||
1506 | goto msix_single_vector; | ||
1507 | } | ||
1508 | } | ||
1509 | intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; | ||
1510 | device->irq_mode = msix_multi_vector; | ||
1511 | goto done; | ||
1512 | |||
1513 | msix_single_vector: | ||
1514 | device->msix_entries[0].entry = 0; | ||
1515 | err = pci_enable_msix(device->pdev, device->msix_entries, 1); | ||
1516 | if (err) | ||
1517 | goto msi; | ||
1518 | |||
1519 | err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, | ||
1520 | 0, "ioat-msix", device); | ||
1521 | if (err) { | ||
1522 | pci_disable_msix(device->pdev); | ||
1523 | goto msi; | ||
1524 | } | ||
1525 | device->irq_mode = msix_single_vector; | ||
1526 | goto done; | ||
1527 | |||
1528 | msi: | ||
1529 | err = pci_enable_msi(device->pdev); | ||
1530 | if (err) | ||
1531 | goto intx; | ||
1532 | |||
1533 | err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, | ||
1534 | 0, "ioat-msi", device); | ||
1535 | if (err) { | ||
1536 | pci_disable_msi(device->pdev); | ||
1537 | goto intx; | ||
1538 | } | ||
1539 | /* | ||
1540 | * CB 1.2 devices need a bit set in configuration space to enable MSI | ||
1541 | */ | ||
1542 | if (device->version == IOAT_VER_1_2) { | ||
1543 | u32 dmactrl; | ||
1544 | pci_read_config_dword(device->pdev, | ||
1545 | IOAT_PCI_DMACTRL_OFFSET, &dmactrl); | ||
1546 | dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; | ||
1547 | pci_write_config_dword(device->pdev, | ||
1548 | IOAT_PCI_DMACTRL_OFFSET, dmactrl); | ||
1549 | } | ||
1550 | device->irq_mode = msi; | ||
1551 | goto done; | ||
1552 | |||
1553 | intx: | ||
1554 | err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, | ||
1555 | IRQF_SHARED, "ioat-intx", device); | ||
1556 | if (err) | ||
1557 | goto err_no_irq; | ||
1558 | device->irq_mode = intx; | ||
1559 | |||
1560 | done: | ||
1561 | intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; | ||
1562 | writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); | ||
1563 | return 0; | ||
1564 | |||
1565 | err_no_irq: | ||
1566 | /* Disable all interrupt generation */ | ||
1567 | writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); | ||
1568 | dev_err(&device->pdev->dev, "no usable interrupts\n"); | ||
1569 | device->irq_mode = none; | ||
1570 | return -1; | ||
1571 | } | ||
1572 | |||
1573 | /** | ||
1574 | * ioat_dma_remove_interrupts - remove whatever interrupts were set | ||
1575 | * @device: ioat device | ||
1576 | */ | ||
1577 | static void ioat_dma_remove_interrupts(struct ioatdma_device *device) | ||
1578 | { | ||
1579 | struct ioat_dma_chan *ioat_chan; | ||
1580 | int i; | ||
1581 | |||
1582 | /* Disable all interrupt generation */ | ||
1583 | writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); | ||
1584 | |||
1585 | switch (device->irq_mode) { | ||
1586 | case msix_multi_vector: | ||
1587 | for (i = 0; i < device->common.chancnt; i++) { | ||
1588 | ioat_chan = ioat_lookup_chan_by_index(device, i); | ||
1589 | free_irq(device->msix_entries[i].vector, ioat_chan); | ||
1590 | } | ||
1591 | pci_disable_msix(device->pdev); | ||
1592 | break; | ||
1593 | case msix_single_vector: | ||
1594 | free_irq(device->msix_entries[0].vector, device); | ||
1595 | pci_disable_msix(device->pdev); | ||
1596 | break; | ||
1597 | case msi: | ||
1598 | free_irq(device->pdev->irq, device); | ||
1599 | pci_disable_msi(device->pdev); | ||
1600 | break; | ||
1601 | case intx: | ||
1602 | free_irq(device->pdev->irq, device); | ||
1603 | break; | ||
1604 | case none: | ||
1605 | dev_warn(&device->pdev->dev, | ||
1606 | "call to %s without interrupts setup\n", __func__); | ||
1607 | } | ||
1608 | device->irq_mode = none; | ||
1609 | } | ||
1610 | |||
1611 | struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, | ||
1612 | void __iomem *iobase) | ||
1613 | { | ||
1614 | int err; | ||
1615 | struct ioatdma_device *device; | ||
1616 | |||
1617 | device = kzalloc(sizeof(*device), GFP_KERNEL); | ||
1618 | if (!device) { | ||
1619 | err = -ENOMEM; | ||
1620 | goto err_kzalloc; | ||
1621 | } | ||
1622 | device->pdev = pdev; | ||
1623 | device->reg_base = iobase; | ||
1624 | device->version = readb(device->reg_base + IOAT_VER_OFFSET); | ||
1625 | |||
1626 | /* DMA coherent memory pool for DMA descriptor allocations */ | ||
1627 | device->dma_pool = pci_pool_create("dma_desc_pool", pdev, | ||
1628 | sizeof(struct ioat_dma_descriptor), | ||
1629 | 64, 0); | ||
1630 | if (!device->dma_pool) { | ||
1631 | err = -ENOMEM; | ||
1632 | goto err_dma_pool; | ||
1633 | } | ||
1634 | |||
1635 | device->completion_pool = pci_pool_create("completion_pool", pdev, | ||
1636 | sizeof(u64), SMP_CACHE_BYTES, | ||
1637 | SMP_CACHE_BYTES); | ||
1638 | if (!device->completion_pool) { | ||
1639 | err = -ENOMEM; | ||
1640 | goto err_completion_pool; | ||
1641 | } | ||
1642 | |||
1643 | INIT_LIST_HEAD(&device->common.channels); | ||
1644 | ioat_dma_enumerate_channels(device); | ||
1645 | |||
1646 | device->common.device_alloc_chan_resources = | ||
1647 | ioat_dma_alloc_chan_resources; | ||
1648 | device->common.device_free_chan_resources = | ||
1649 | ioat_dma_free_chan_resources; | ||
1650 | device->common.dev = &pdev->dev; | ||
1651 | |||
1652 | dma_cap_set(DMA_MEMCPY, device->common.cap_mask); | ||
1653 | device->common.device_is_tx_complete = ioat_dma_is_complete; | ||
1654 | switch (device->version) { | ||
1655 | case IOAT_VER_1_2: | ||
1656 | device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; | ||
1657 | device->common.device_issue_pending = | ||
1658 | ioat1_dma_memcpy_issue_pending; | ||
1659 | break; | ||
1660 | case IOAT_VER_2_0: | ||
1661 | case IOAT_VER_3_0: | ||
1662 | device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; | ||
1663 | device->common.device_issue_pending = | ||
1664 | ioat2_dma_memcpy_issue_pending; | ||
1665 | break; | ||
1666 | } | ||
1667 | |||
1668 | dev_err(&device->pdev->dev, | ||
1669 | "Intel(R) I/OAT DMA Engine found," | ||
1670 | " %d channels, device version 0x%02x, driver version %s\n", | ||
1671 | device->common.chancnt, device->version, IOAT_DMA_VERSION); | ||
1672 | |||
1673 | if (!device->common.chancnt) { | ||
1674 | dev_err(&device->pdev->dev, | ||
1675 | "Intel(R) I/OAT DMA Engine problem found: " | ||
1676 | "zero channels detected\n"); | ||
1677 | goto err_setup_interrupts; | ||
1678 | } | ||
1679 | |||
1680 | err = ioat_dma_setup_interrupts(device); | ||
1681 | if (err) | ||
1682 | goto err_setup_interrupts; | ||
1683 | |||
1684 | err = ioat_dma_self_test(device); | ||
1685 | if (err) | ||
1686 | goto err_self_test; | ||
1687 | |||
1688 | ioat_set_tcp_copy_break(device); | ||
1689 | |||
1690 | dma_async_device_register(&device->common); | ||
1691 | |||
1692 | if (device->version != IOAT_VER_3_0) { | ||
1693 | INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); | ||
1694 | schedule_delayed_work(&device->work, | ||
1695 | WATCHDOG_DELAY); | ||
1696 | } | ||
1697 | |||
1698 | return device; | ||
1699 | |||
1700 | err_self_test: | ||
1701 | ioat_dma_remove_interrupts(device); | ||
1702 | err_setup_interrupts: | ||
1703 | pci_pool_destroy(device->completion_pool); | ||
1704 | err_completion_pool: | ||
1705 | pci_pool_destroy(device->dma_pool); | ||
1706 | err_dma_pool: | ||
1707 | kfree(device); | ||
1708 | err_kzalloc: | ||
1709 | dev_err(&pdev->dev, | ||
1710 | "Intel(R) I/OAT DMA Engine initialization failed\n"); | ||
1711 | return NULL; | ||
1712 | } | ||
1713 | |||
1714 | void ioat_dma_remove(struct ioatdma_device *device) | ||
1715 | { | ||
1716 | struct dma_chan *chan, *_chan; | ||
1717 | struct ioat_dma_chan *ioat_chan; | ||
1718 | |||
1719 | if (device->version != IOAT_VER_3_0) | ||
1720 | cancel_delayed_work(&device->work); | ||
1721 | |||
1722 | ioat_dma_remove_interrupts(device); | ||
1723 | |||
1724 | dma_async_device_unregister(&device->common); | ||
1725 | |||
1726 | pci_pool_destroy(device->dma_pool); | ||
1727 | pci_pool_destroy(device->completion_pool); | ||
1728 | |||
1729 | iounmap(device->reg_base); | ||
1730 | pci_release_regions(device->pdev); | ||
1731 | pci_disable_device(device->pdev); | ||
1732 | |||
1733 | list_for_each_entry_safe(chan, _chan, | ||
1734 | &device->common.channels, device_node) { | ||
1735 | ioat_chan = to_ioat_chan(chan); | ||
1736 | list_del(&chan->device_node); | ||
1737 | kfree(ioat_chan); | ||
1738 | } | ||
1739 | kfree(device); | ||
1740 | } | ||
1741 | |||
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h deleted file mode 100644 index a52ff4bd4601..000000000000 --- a/drivers/dma/ioatdma.h +++ /dev/null | |||
@@ -1,165 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the Free | ||
6 | * Software Foundation; either version 2 of the License, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called COPYING. | ||
20 | */ | ||
21 | #ifndef IOATDMA_H | ||
22 | #define IOATDMA_H | ||
23 | |||
24 | #include <linux/dmaengine.h> | ||
25 | #include "ioatdma_hw.h" | ||
26 | #include <linux/init.h> | ||
27 | #include <linux/dmapool.h> | ||
28 | #include <linux/cache.h> | ||
29 | #include <linux/pci_ids.h> | ||
30 | #include <net/tcp.h> | ||
31 | |||
32 | #define IOAT_DMA_VERSION "3.64" | ||
33 | |||
34 | enum ioat_interrupt { | ||
35 | none = 0, | ||
36 | msix_multi_vector = 1, | ||
37 | msix_single_vector = 2, | ||
38 | msi = 3, | ||
39 | intx = 4, | ||
40 | }; | ||
41 | |||
42 | #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 | ||
43 | #define IOAT_DMA_DCA_ANY_CPU ~0 | ||
44 | #define IOAT_WATCHDOG_PERIOD (2 * HZ) | ||
45 | |||
46 | |||
47 | /** | ||
48 | * struct ioatdma_device - internal representation of a IOAT device | ||
49 | * @pdev: PCI-Express device | ||
50 | * @reg_base: MMIO register space base address | ||
51 | * @dma_pool: for allocating DMA descriptors | ||
52 | * @common: embedded struct dma_device | ||
53 | * @version: version of ioatdma device | ||
54 | * @irq_mode: which style irq to use | ||
55 | * @msix_entries: irq handlers | ||
56 | * @idx: per channel data | ||
57 | */ | ||
58 | |||
59 | struct ioatdma_device { | ||
60 | struct pci_dev *pdev; | ||
61 | void __iomem *reg_base; | ||
62 | struct pci_pool *dma_pool; | ||
63 | struct pci_pool *completion_pool; | ||
64 | struct dma_device common; | ||
65 | u8 version; | ||
66 | enum ioat_interrupt irq_mode; | ||
67 | struct delayed_work work; | ||
68 | struct msix_entry msix_entries[4]; | ||
69 | struct ioat_dma_chan *idx[4]; | ||
70 | }; | ||
71 | |||
72 | /** | ||
73 | * struct ioat_dma_chan - internal representation of a DMA channel | ||
74 | */ | ||
75 | struct ioat_dma_chan { | ||
76 | |||
77 | void __iomem *reg_base; | ||
78 | |||
79 | dma_cookie_t completed_cookie; | ||
80 | unsigned long last_completion; | ||
81 | unsigned long last_completion_time; | ||
82 | |||
83 | size_t xfercap; /* XFERCAP register value expanded out */ | ||
84 | |||
85 | spinlock_t cleanup_lock; | ||
86 | spinlock_t desc_lock; | ||
87 | struct list_head free_desc; | ||
88 | struct list_head used_desc; | ||
89 | unsigned long watchdog_completion; | ||
90 | int watchdog_tcp_cookie; | ||
91 | u32 watchdog_last_tcp_cookie; | ||
92 | struct delayed_work work; | ||
93 | |||
94 | int pending; | ||
95 | int dmacount; | ||
96 | int desccount; | ||
97 | |||
98 | struct ioatdma_device *device; | ||
99 | struct dma_chan common; | ||
100 | |||
101 | dma_addr_t completion_addr; | ||
102 | union { | ||
103 | u64 full; /* HW completion writeback */ | ||
104 | struct { | ||
105 | u32 low; | ||
106 | u32 high; | ||
107 | }; | ||
108 | } *completion_virt; | ||
109 | unsigned long last_compl_desc_addr_hw; | ||
110 | struct tasklet_struct cleanup_task; | ||
111 | }; | ||
112 | |||
113 | /* wrapper around hardware descriptor format + additional software fields */ | ||
114 | |||
115 | /** | ||
116 | * struct ioat_desc_sw - wrapper around hardware descriptor | ||
117 | * @hw: hardware DMA descriptor | ||
118 | * @node: this descriptor will either be on the free list, | ||
119 | * or attached to a transaction list (async_tx.tx_list) | ||
120 | * @tx_cnt: number of descriptors required to complete the transaction | ||
121 | * @async_tx: the generic software descriptor for all engines | ||
122 | */ | ||
123 | struct ioat_desc_sw { | ||
124 | struct ioat_dma_descriptor *hw; | ||
125 | struct list_head node; | ||
126 | int tx_cnt; | ||
127 | size_t len; | ||
128 | dma_addr_t src; | ||
129 | dma_addr_t dst; | ||
130 | struct dma_async_tx_descriptor async_tx; | ||
131 | }; | ||
132 | |||
133 | static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) | ||
134 | { | ||
135 | #ifdef CONFIG_NET_DMA | ||
136 | switch (dev->version) { | ||
137 | case IOAT_VER_1_2: | ||
138 | sysctl_tcp_dma_copybreak = 4096; | ||
139 | break; | ||
140 | case IOAT_VER_2_0: | ||
141 | sysctl_tcp_dma_copybreak = 2048; | ||
142 | break; | ||
143 | case IOAT_VER_3_0: | ||
144 | sysctl_tcp_dma_copybreak = 262144; | ||
145 | break; | ||
146 | } | ||
147 | #endif | ||
148 | } | ||
149 | |||
150 | #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) | ||
151 | struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, | ||
152 | void __iomem *iobase); | ||
153 | void ioat_dma_remove(struct ioatdma_device *device); | ||
154 | struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); | ||
155 | struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); | ||
156 | struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); | ||
157 | #else | ||
158 | #define ioat_dma_probe(pdev, iobase) NULL | ||
159 | #define ioat_dma_remove(device) do { } while (0) | ||
160 | #define ioat_dca_init(pdev, iobase) NULL | ||
161 | #define ioat2_dca_init(pdev, iobase) NULL | ||
162 | #define ioat3_dca_init(pdev, iobase) NULL | ||
163 | #endif | ||
164 | |||
165 | #endif /* IOATDMA_H */ | ||
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h deleted file mode 100644 index afa57eef86c9..000000000000 --- a/drivers/dma/ioatdma_hw.h +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms of the GNU General Public License as published by the Free | ||
6 | * Software Foundation; either version 2 of the License, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., 59 | ||
16 | * Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * The full GNU General Public License is included in this distribution in the | ||
19 | * file called COPYING. | ||
20 | */ | ||
21 | #ifndef _IOAT_HW_H_ | ||
22 | #define _IOAT_HW_H_ | ||
23 | |||
24 | /* PCI Configuration Space Values */ | ||
25 | #define IOAT_PCI_VID 0x8086 | ||
26 | |||
27 | /* CB device ID's */ | ||
28 | #define IOAT_PCI_DID_5000 0x1A38 | ||
29 | #define IOAT_PCI_DID_CNB 0x360B | ||
30 | #define IOAT_PCI_DID_SCNB 0x65FF | ||
31 | #define IOAT_PCI_DID_SNB 0x402F | ||
32 | |||
33 | #define IOAT_PCI_RID 0x00 | ||
34 | #define IOAT_PCI_SVID 0x8086 | ||
35 | #define IOAT_PCI_SID 0x8086 | ||
36 | #define IOAT_VER_1_2 0x12 /* Version 1.2 */ | ||
37 | #define IOAT_VER_2_0 0x20 /* Version 2.0 */ | ||
38 | #define IOAT_VER_3_0 0x30 /* Version 3.0 */ | ||
39 | |||
40 | struct ioat_dma_descriptor { | ||
41 | uint32_t size; | ||
42 | uint32_t ctl; | ||
43 | uint64_t src_addr; | ||
44 | uint64_t dst_addr; | ||
45 | uint64_t next; | ||
46 | uint64_t rsv1; | ||
47 | uint64_t rsv2; | ||
48 | uint64_t user1; | ||
49 | uint64_t user2; | ||
50 | }; | ||
51 | |||
52 | #define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001 | ||
53 | #define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002 | ||
54 | #define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004 | ||
55 | #define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 | ||
56 | #define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 | ||
57 | #define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 | ||
58 | #define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 | ||
59 | #define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 | ||
60 | #define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 | ||
61 | #define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 | ||
62 | #define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 | ||
63 | |||
64 | #define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 | ||
65 | #define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 | ||
66 | |||
67 | #define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 | ||
68 | #define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 | ||
69 | |||
70 | #endif | ||
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 2f052265122f..645ca8d54ec4 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/platform_device.h> | 31 | #include <linux/platform_device.h> |
32 | #include <linux/memory.h> | 32 | #include <linux/memory.h> |
33 | #include <linux/ioport.h> | 33 | #include <linux/ioport.h> |
34 | #include <linux/raid/pq.h> | ||
34 | 35 | ||
35 | #include <mach/adma.h> | 36 | #include <mach/adma.h> |
36 | 37 | ||
@@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) | |||
57 | } | 58 | } |
58 | } | 59 | } |
59 | 60 | ||
61 | static void | ||
62 | iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) | ||
63 | { | ||
64 | struct dma_async_tx_descriptor *tx = &desc->async_tx; | ||
65 | struct iop_adma_desc_slot *unmap = desc->group_head; | ||
66 | struct device *dev = &iop_chan->device->pdev->dev; | ||
67 | u32 len = unmap->unmap_len; | ||
68 | enum dma_ctrl_flags flags = tx->flags; | ||
69 | u32 src_cnt; | ||
70 | dma_addr_t addr; | ||
71 | dma_addr_t dest; | ||
72 | |||
73 | src_cnt = unmap->unmap_src_cnt; | ||
74 | dest = iop_desc_get_dest_addr(unmap, iop_chan); | ||
75 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
76 | enum dma_data_direction dir; | ||
77 | |||
78 | if (src_cnt > 1) /* is xor? */ | ||
79 | dir = DMA_BIDIRECTIONAL; | ||
80 | else | ||
81 | dir = DMA_FROM_DEVICE; | ||
82 | |||
83 | dma_unmap_page(dev, dest, len, dir); | ||
84 | } | ||
85 | |||
86 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
87 | while (src_cnt--) { | ||
88 | addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt); | ||
89 | if (addr == dest) | ||
90 | continue; | ||
91 | dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); | ||
92 | } | ||
93 | } | ||
94 | desc->group_head = NULL; | ||
95 | } | ||
96 | |||
97 | static void | ||
98 | iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) | ||
99 | { | ||
100 | struct dma_async_tx_descriptor *tx = &desc->async_tx; | ||
101 | struct iop_adma_desc_slot *unmap = desc->group_head; | ||
102 | struct device *dev = &iop_chan->device->pdev->dev; | ||
103 | u32 len = unmap->unmap_len; | ||
104 | enum dma_ctrl_flags flags = tx->flags; | ||
105 | u32 src_cnt = unmap->unmap_src_cnt; | ||
106 | dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan); | ||
107 | dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan); | ||
108 | int i; | ||
109 | |||
110 | if (tx->flags & DMA_PREP_CONTINUE) | ||
111 | src_cnt -= 3; | ||
112 | |||
113 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) { | ||
114 | dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL); | ||
115 | dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL); | ||
116 | } | ||
117 | |||
118 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
119 | dma_addr_t addr; | ||
120 | |||
121 | for (i = 0; i < src_cnt; i++) { | ||
122 | addr = iop_desc_get_src_addr(unmap, iop_chan, i); | ||
123 | dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); | ||
124 | } | ||
125 | if (desc->pq_check_result) { | ||
126 | dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE); | ||
127 | dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | desc->group_head = NULL; | ||
132 | } | ||
133 | |||
134 | |||
60 | static dma_cookie_t | 135 | static dma_cookie_t |
61 | iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, | 136 | iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, |
62 | struct iop_adma_chan *iop_chan, dma_cookie_t cookie) | 137 | struct iop_adma_chan *iop_chan, dma_cookie_t cookie) |
63 | { | 138 | { |
64 | BUG_ON(desc->async_tx.cookie < 0); | 139 | struct dma_async_tx_descriptor *tx = &desc->async_tx; |
65 | if (desc->async_tx.cookie > 0) { | 140 | |
66 | cookie = desc->async_tx.cookie; | 141 | BUG_ON(tx->cookie < 0); |
67 | desc->async_tx.cookie = 0; | 142 | if (tx->cookie > 0) { |
143 | cookie = tx->cookie; | ||
144 | tx->cookie = 0; | ||
68 | 145 | ||
69 | /* call the callback (must not sleep or submit new | 146 | /* call the callback (must not sleep or submit new |
70 | * operations to this channel) | 147 | * operations to this channel) |
71 | */ | 148 | */ |
72 | if (desc->async_tx.callback) | 149 | if (tx->callback) |
73 | desc->async_tx.callback( | 150 | tx->callback(tx->callback_param); |
74 | desc->async_tx.callback_param); | ||
75 | 151 | ||
76 | /* unmap dma addresses | 152 | /* unmap dma addresses |
77 | * (unmap_single vs unmap_page?) | 153 | * (unmap_single vs unmap_page?) |
78 | */ | 154 | */ |
79 | if (desc->group_head && desc->unmap_len) { | 155 | if (desc->group_head && desc->unmap_len) { |
80 | struct iop_adma_desc_slot *unmap = desc->group_head; | 156 | if (iop_desc_is_pq(desc)) |
81 | struct device *dev = | 157 | iop_desc_unmap_pq(iop_chan, desc); |
82 | &iop_chan->device->pdev->dev; | 158 | else |
83 | u32 len = unmap->unmap_len; | 159 | iop_desc_unmap(iop_chan, desc); |
84 | enum dma_ctrl_flags flags = desc->async_tx.flags; | ||
85 | u32 src_cnt; | ||
86 | dma_addr_t addr; | ||
87 | dma_addr_t dest; | ||
88 | |||
89 | src_cnt = unmap->unmap_src_cnt; | ||
90 | dest = iop_desc_get_dest_addr(unmap, iop_chan); | ||
91 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
92 | enum dma_data_direction dir; | ||
93 | |||
94 | if (src_cnt > 1) /* is xor? */ | ||
95 | dir = DMA_BIDIRECTIONAL; | ||
96 | else | ||
97 | dir = DMA_FROM_DEVICE; | ||
98 | |||
99 | dma_unmap_page(dev, dest, len, dir); | ||
100 | } | ||
101 | |||
102 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
103 | while (src_cnt--) { | ||
104 | addr = iop_desc_get_src_addr(unmap, | ||
105 | iop_chan, | ||
106 | src_cnt); | ||
107 | if (addr == dest) | ||
108 | continue; | ||
109 | dma_unmap_page(dev, addr, len, | ||
110 | DMA_TO_DEVICE); | ||
111 | } | ||
112 | } | ||
113 | desc->group_head = NULL; | ||
114 | } | 160 | } |
115 | } | 161 | } |
116 | 162 | ||
117 | /* run dependent operations */ | 163 | /* run dependent operations */ |
118 | dma_run_dependencies(&desc->async_tx); | 164 | dma_run_dependencies(tx); |
119 | 165 | ||
120 | return cookie; | 166 | return cookie; |
121 | } | 167 | } |
@@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data) | |||
287 | { | 333 | { |
288 | struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; | 334 | struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; |
289 | 335 | ||
290 | spin_lock(&iop_chan->lock); | 336 | /* lockdep will flag depedency submissions as potentially |
337 | * recursive locking, this is not the case as a dependency | ||
338 | * submission will never recurse a channels submit routine. | ||
339 | * There are checks in async_tx.c to prevent this. | ||
340 | */ | ||
341 | spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING); | ||
291 | __iop_adma_slot_cleanup(iop_chan); | 342 | __iop_adma_slot_cleanup(iop_chan); |
292 | spin_unlock(&iop_chan->lock); | 343 | spin_unlock(&iop_chan->lock); |
293 | } | 344 | } |
@@ -370,7 +421,7 @@ retry: | |||
370 | } | 421 | } |
371 | alloc_tail->group_head = alloc_start; | 422 | alloc_tail->group_head = alloc_start; |
372 | alloc_tail->async_tx.cookie = -EBUSY; | 423 | alloc_tail->async_tx.cookie = -EBUSY; |
373 | list_splice(&chain, &alloc_tail->async_tx.tx_list); | 424 | list_splice(&chain, &alloc_tail->tx_list); |
374 | iop_chan->last_used = last_used; | 425 | iop_chan->last_used = last_used; |
375 | iop_desc_clear_next_desc(alloc_start); | 426 | iop_desc_clear_next_desc(alloc_start); |
376 | iop_desc_clear_next_desc(alloc_tail); | 427 | iop_desc_clear_next_desc(alloc_tail); |
@@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) | |||
429 | 480 | ||
430 | old_chain_tail = list_entry(iop_chan->chain.prev, | 481 | old_chain_tail = list_entry(iop_chan->chain.prev, |
431 | struct iop_adma_desc_slot, chain_node); | 482 | struct iop_adma_desc_slot, chain_node); |
432 | list_splice_init(&sw_desc->async_tx.tx_list, | 483 | list_splice_init(&sw_desc->tx_list, |
433 | &old_chain_tail->chain_node); | 484 | &old_chain_tail->chain_node); |
434 | 485 | ||
435 | /* fix up the hardware chain */ | 486 | /* fix up the hardware chain */ |
@@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan) | |||
496 | 547 | ||
497 | dma_async_tx_descriptor_init(&slot->async_tx, chan); | 548 | dma_async_tx_descriptor_init(&slot->async_tx, chan); |
498 | slot->async_tx.tx_submit = iop_adma_tx_submit; | 549 | slot->async_tx.tx_submit = iop_adma_tx_submit; |
550 | INIT_LIST_HEAD(&slot->tx_list); | ||
499 | INIT_LIST_HEAD(&slot->chain_node); | 551 | INIT_LIST_HEAD(&slot->chain_node); |
500 | INIT_LIST_HEAD(&slot->slot_node); | 552 | INIT_LIST_HEAD(&slot->slot_node); |
501 | hw_desc = (char *) iop_chan->device->dma_desc_pool; | 553 | hw_desc = (char *) iop_chan->device->dma_desc_pool; |
@@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, | |||
660 | } | 712 | } |
661 | 713 | ||
662 | static struct dma_async_tx_descriptor * | 714 | static struct dma_async_tx_descriptor * |
663 | iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, | 715 | iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, |
664 | unsigned int src_cnt, size_t len, u32 *result, | 716 | unsigned int src_cnt, size_t len, u32 *result, |
665 | unsigned long flags) | 717 | unsigned long flags) |
666 | { | 718 | { |
667 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); | 719 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); |
668 | struct iop_adma_desc_slot *sw_desc, *grp_start; | 720 | struct iop_adma_desc_slot *sw_desc, *grp_start; |
@@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, | |||
696 | return sw_desc ? &sw_desc->async_tx : NULL; | 748 | return sw_desc ? &sw_desc->async_tx : NULL; |
697 | } | 749 | } |
698 | 750 | ||
751 | static struct dma_async_tx_descriptor * | ||
752 | iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | ||
753 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
754 | unsigned long flags) | ||
755 | { | ||
756 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); | ||
757 | struct iop_adma_desc_slot *sw_desc, *g; | ||
758 | int slot_cnt, slots_per_op; | ||
759 | int continue_srcs; | ||
760 | |||
761 | if (unlikely(!len)) | ||
762 | return NULL; | ||
763 | BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); | ||
764 | |||
765 | dev_dbg(iop_chan->device->common.dev, | ||
766 | "%s src_cnt: %d len: %u flags: %lx\n", | ||
767 | __func__, src_cnt, len, flags); | ||
768 | |||
769 | if (dmaf_p_disabled_continue(flags)) | ||
770 | continue_srcs = 1+src_cnt; | ||
771 | else if (dmaf_continue(flags)) | ||
772 | continue_srcs = 3+src_cnt; | ||
773 | else | ||
774 | continue_srcs = 0+src_cnt; | ||
775 | |||
776 | spin_lock_bh(&iop_chan->lock); | ||
777 | slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op); | ||
778 | sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); | ||
779 | if (sw_desc) { | ||
780 | int i; | ||
781 | |||
782 | g = sw_desc->group_head; | ||
783 | iop_desc_set_byte_count(g, iop_chan, len); | ||
784 | |||
785 | /* even if P is disabled its destination address (bits | ||
786 | * [3:0]) must match Q. It is ok if P points to an | ||
787 | * invalid address, it won't be written. | ||
788 | */ | ||
789 | if (flags & DMA_PREP_PQ_DISABLE_P) | ||
790 | dst[0] = dst[1] & 0x7; | ||
791 | |||
792 | iop_desc_set_pq_addr(g, dst); | ||
793 | sw_desc->unmap_src_cnt = src_cnt; | ||
794 | sw_desc->unmap_len = len; | ||
795 | sw_desc->async_tx.flags = flags; | ||
796 | for (i = 0; i < src_cnt; i++) | ||
797 | iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); | ||
798 | |||
799 | /* if we are continuing a previous operation factor in | ||
800 | * the old p and q values, see the comment for dma_maxpq | ||
801 | * in include/linux/dmaengine.h | ||
802 | */ | ||
803 | if (dmaf_p_disabled_continue(flags)) | ||
804 | iop_desc_set_pq_src_addr(g, i++, dst[1], 1); | ||
805 | else if (dmaf_continue(flags)) { | ||
806 | iop_desc_set_pq_src_addr(g, i++, dst[0], 0); | ||
807 | iop_desc_set_pq_src_addr(g, i++, dst[1], 1); | ||
808 | iop_desc_set_pq_src_addr(g, i++, dst[1], 0); | ||
809 | } | ||
810 | iop_desc_init_pq(g, i, flags); | ||
811 | } | ||
812 | spin_unlock_bh(&iop_chan->lock); | ||
813 | |||
814 | return sw_desc ? &sw_desc->async_tx : NULL; | ||
815 | } | ||
816 | |||
817 | static struct dma_async_tx_descriptor * | ||
818 | iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | ||
819 | unsigned int src_cnt, const unsigned char *scf, | ||
820 | size_t len, enum sum_check_flags *pqres, | ||
821 | unsigned long flags) | ||
822 | { | ||
823 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); | ||
824 | struct iop_adma_desc_slot *sw_desc, *g; | ||
825 | int slot_cnt, slots_per_op; | ||
826 | |||
827 | if (unlikely(!len)) | ||
828 | return NULL; | ||
829 | BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); | ||
830 | |||
831 | dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", | ||
832 | __func__, src_cnt, len); | ||
833 | |||
834 | spin_lock_bh(&iop_chan->lock); | ||
835 | slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op); | ||
836 | sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); | ||
837 | if (sw_desc) { | ||
838 | /* for validate operations p and q are tagged onto the | ||
839 | * end of the source list | ||
840 | */ | ||
841 | int pq_idx = src_cnt; | ||
842 | |||
843 | g = sw_desc->group_head; | ||
844 | iop_desc_init_pq_zero_sum(g, src_cnt+2, flags); | ||
845 | iop_desc_set_pq_zero_sum_byte_count(g, len); | ||
846 | g->pq_check_result = pqres; | ||
847 | pr_debug("\t%s: g->pq_check_result: %p\n", | ||
848 | __func__, g->pq_check_result); | ||
849 | sw_desc->unmap_src_cnt = src_cnt+2; | ||
850 | sw_desc->unmap_len = len; | ||
851 | sw_desc->async_tx.flags = flags; | ||
852 | while (src_cnt--) | ||
853 | iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, | ||
854 | src[src_cnt], | ||
855 | scf[src_cnt]); | ||
856 | iop_desc_set_pq_zero_sum_addr(g, pq_idx, src); | ||
857 | } | ||
858 | spin_unlock_bh(&iop_chan->lock); | ||
859 | |||
860 | return sw_desc ? &sw_desc->async_tx : NULL; | ||
861 | } | ||
862 | |||
699 | static void iop_adma_free_chan_resources(struct dma_chan *chan) | 863 | static void iop_adma_free_chan_resources(struct dma_chan *chan) |
700 | { | 864 | { |
701 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); | 865 | struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); |
@@ -906,7 +1070,7 @@ out: | |||
906 | 1070 | ||
907 | #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ | 1071 | #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ |
908 | static int __devinit | 1072 | static int __devinit |
909 | iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | 1073 | iop_adma_xor_val_self_test(struct iop_adma_device *device) |
910 | { | 1074 | { |
911 | int i, src_idx; | 1075 | int i, src_idx; |
912 | struct page *dest; | 1076 | struct page *dest; |
@@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | |||
1002 | PAGE_SIZE, DMA_TO_DEVICE); | 1166 | PAGE_SIZE, DMA_TO_DEVICE); |
1003 | 1167 | ||
1004 | /* skip zero sum if the capability is not present */ | 1168 | /* skip zero sum if the capability is not present */ |
1005 | if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) | 1169 | if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) |
1006 | goto free_resources; | 1170 | goto free_resources; |
1007 | 1171 | ||
1008 | /* zero sum the sources with the destintation page */ | 1172 | /* zero sum the sources with the destintation page */ |
@@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | |||
1016 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, | 1180 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, |
1017 | zero_sum_srcs[i], 0, PAGE_SIZE, | 1181 | zero_sum_srcs[i], 0, PAGE_SIZE, |
1018 | DMA_TO_DEVICE); | 1182 | DMA_TO_DEVICE); |
1019 | tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, | 1183 | tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, |
1020 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, | 1184 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, |
1021 | &zero_sum_result, | 1185 | &zero_sum_result, |
1022 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 1186 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
1023 | 1187 | ||
1024 | cookie = iop_adma_tx_submit(tx); | 1188 | cookie = iop_adma_tx_submit(tx); |
1025 | iop_adma_issue_pending(dma_chan); | 1189 | iop_adma_issue_pending(dma_chan); |
@@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) | |||
1072 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, | 1236 | dma_srcs[i] = dma_map_page(dma_chan->device->dev, |
1073 | zero_sum_srcs[i], 0, PAGE_SIZE, | 1237 | zero_sum_srcs[i], 0, PAGE_SIZE, |
1074 | DMA_TO_DEVICE); | 1238 | DMA_TO_DEVICE); |
1075 | tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, | 1239 | tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs, |
1076 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, | 1240 | IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, |
1077 | &zero_sum_result, | 1241 | &zero_sum_result, |
1078 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 1242 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
1079 | 1243 | ||
1080 | cookie = iop_adma_tx_submit(tx); | 1244 | cookie = iop_adma_tx_submit(tx); |
1081 | iop_adma_issue_pending(dma_chan); | 1245 | iop_adma_issue_pending(dma_chan); |
@@ -1105,6 +1269,170 @@ out: | |||
1105 | return err; | 1269 | return err; |
1106 | } | 1270 | } |
1107 | 1271 | ||
1272 | #ifdef CONFIG_MD_RAID6_PQ | ||
1273 | static int __devinit | ||
1274 | iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device) | ||
1275 | { | ||
1276 | /* combined sources, software pq results, and extra hw pq results */ | ||
1277 | struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2]; | ||
1278 | /* ptr to the extra hw pq buffers defined above */ | ||
1279 | struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2]; | ||
1280 | /* address conversion buffers (dma_map / page_address) */ | ||
1281 | void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2]; | ||
1282 | dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST]; | ||
1283 | dma_addr_t pq_dest[2]; | ||
1284 | |||
1285 | int i; | ||
1286 | struct dma_async_tx_descriptor *tx; | ||
1287 | struct dma_chan *dma_chan; | ||
1288 | dma_cookie_t cookie; | ||
1289 | u32 zero_sum_result; | ||
1290 | int err = 0; | ||
1291 | struct device *dev; | ||
1292 | |||
1293 | dev_dbg(device->common.dev, "%s\n", __func__); | ||
1294 | |||
1295 | for (i = 0; i < ARRAY_SIZE(pq); i++) { | ||
1296 | pq[i] = alloc_page(GFP_KERNEL); | ||
1297 | if (!pq[i]) { | ||
1298 | while (i--) | ||
1299 | __free_page(pq[i]); | ||
1300 | return -ENOMEM; | ||
1301 | } | ||
1302 | } | ||
1303 | |||
1304 | /* Fill in src buffers */ | ||
1305 | for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { | ||
1306 | pq_sw[i] = page_address(pq[i]); | ||
1307 | memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE); | ||
1308 | } | ||
1309 | pq_sw[i] = page_address(pq[i]); | ||
1310 | pq_sw[i+1] = page_address(pq[i+1]); | ||
1311 | |||
1312 | dma_chan = container_of(device->common.channels.next, | ||
1313 | struct dma_chan, | ||
1314 | device_node); | ||
1315 | if (iop_adma_alloc_chan_resources(dma_chan) < 1) { | ||
1316 | err = -ENODEV; | ||
1317 | goto out; | ||
1318 | } | ||
1319 | |||
1320 | dev = dma_chan->device->dev; | ||
1321 | |||
1322 | /* initialize the dests */ | ||
1323 | memset(page_address(pq_hw[0]), 0 , PAGE_SIZE); | ||
1324 | memset(page_address(pq_hw[1]), 0 , PAGE_SIZE); | ||
1325 | |||
1326 | /* test pq */ | ||
1327 | pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE); | ||
1328 | pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE); | ||
1329 | for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) | ||
1330 | pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, | ||
1331 | DMA_TO_DEVICE); | ||
1332 | |||
1333 | tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src, | ||
1334 | IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp, | ||
1335 | PAGE_SIZE, | ||
1336 | DMA_PREP_INTERRUPT | | ||
1337 | DMA_CTRL_ACK); | ||
1338 | |||
1339 | cookie = iop_adma_tx_submit(tx); | ||
1340 | iop_adma_issue_pending(dma_chan); | ||
1341 | msleep(8); | ||
1342 | |||
1343 | if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != | ||
1344 | DMA_SUCCESS) { | ||
1345 | dev_err(dev, "Self-test pq timed out, disabling\n"); | ||
1346 | err = -ENODEV; | ||
1347 | goto free_resources; | ||
1348 | } | ||
1349 | |||
1350 | raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw); | ||
1351 | |||
1352 | if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST], | ||
1353 | page_address(pq_hw[0]), PAGE_SIZE) != 0) { | ||
1354 | dev_err(dev, "Self-test p failed compare, disabling\n"); | ||
1355 | err = -ENODEV; | ||
1356 | goto free_resources; | ||
1357 | } | ||
1358 | if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1], | ||
1359 | page_address(pq_hw[1]), PAGE_SIZE) != 0) { | ||
1360 | dev_err(dev, "Self-test q failed compare, disabling\n"); | ||
1361 | err = -ENODEV; | ||
1362 | goto free_resources; | ||
1363 | } | ||
1364 | |||
1365 | /* test correct zero sum using the software generated pq values */ | ||
1366 | for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) | ||
1367 | pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, | ||
1368 | DMA_TO_DEVICE); | ||
1369 | |||
1370 | zero_sum_result = ~0; | ||
1371 | tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], | ||
1372 | pq_src, IOP_ADMA_NUM_SRC_TEST, | ||
1373 | raid6_gfexp, PAGE_SIZE, &zero_sum_result, | ||
1374 | DMA_PREP_INTERRUPT|DMA_CTRL_ACK); | ||
1375 | |||
1376 | cookie = iop_adma_tx_submit(tx); | ||
1377 | iop_adma_issue_pending(dma_chan); | ||
1378 | msleep(8); | ||
1379 | |||
1380 | if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != | ||
1381 | DMA_SUCCESS) { | ||
1382 | dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n"); | ||
1383 | err = -ENODEV; | ||
1384 | goto free_resources; | ||
1385 | } | ||
1386 | |||
1387 | if (zero_sum_result != 0) { | ||
1388 | dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n", | ||
1389 | zero_sum_result); | ||
1390 | err = -ENODEV; | ||
1391 | goto free_resources; | ||
1392 | } | ||
1393 | |||
1394 | /* test incorrect zero sum */ | ||
1395 | i = IOP_ADMA_NUM_SRC_TEST; | ||
1396 | memset(pq_sw[i] + 100, 0, 100); | ||
1397 | memset(pq_sw[i+1] + 200, 0, 200); | ||
1398 | for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++) | ||
1399 | pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE, | ||
1400 | DMA_TO_DEVICE); | ||
1401 | |||
1402 | zero_sum_result = 0; | ||
1403 | tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST], | ||
1404 | pq_src, IOP_ADMA_NUM_SRC_TEST, | ||
1405 | raid6_gfexp, PAGE_SIZE, &zero_sum_result, | ||
1406 | DMA_PREP_INTERRUPT|DMA_CTRL_ACK); | ||
1407 | |||
1408 | cookie = iop_adma_tx_submit(tx); | ||
1409 | iop_adma_issue_pending(dma_chan); | ||
1410 | msleep(8); | ||
1411 | |||
1412 | if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != | ||
1413 | DMA_SUCCESS) { | ||
1414 | dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n"); | ||
1415 | err = -ENODEV; | ||
1416 | goto free_resources; | ||
1417 | } | ||
1418 | |||
1419 | if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) { | ||
1420 | dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n", | ||
1421 | zero_sum_result); | ||
1422 | err = -ENODEV; | ||
1423 | goto free_resources; | ||
1424 | } | ||
1425 | |||
1426 | free_resources: | ||
1427 | iop_adma_free_chan_resources(dma_chan); | ||
1428 | out: | ||
1429 | i = ARRAY_SIZE(pq); | ||
1430 | while (i--) | ||
1431 | __free_page(pq[i]); | ||
1432 | return err; | ||
1433 | } | ||
1434 | #endif | ||
1435 | |||
1108 | static int __devexit iop_adma_remove(struct platform_device *dev) | 1436 | static int __devexit iop_adma_remove(struct platform_device *dev) |
1109 | { | 1437 | { |
1110 | struct iop_adma_device *device = platform_get_drvdata(dev); | 1438 | struct iop_adma_device *device = platform_get_drvdata(dev); |
@@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) | |||
1192 | dma_dev->max_xor = iop_adma_get_max_xor(); | 1520 | dma_dev->max_xor = iop_adma_get_max_xor(); |
1193 | dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; | 1521 | dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; |
1194 | } | 1522 | } |
1195 | if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) | 1523 | if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask)) |
1196 | dma_dev->device_prep_dma_zero_sum = | 1524 | dma_dev->device_prep_dma_xor_val = |
1197 | iop_adma_prep_dma_zero_sum; | 1525 | iop_adma_prep_dma_xor_val; |
1526 | if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { | ||
1527 | dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0); | ||
1528 | dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq; | ||
1529 | } | ||
1530 | if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) | ||
1531 | dma_dev->device_prep_dma_pq_val = | ||
1532 | iop_adma_prep_dma_pq_val; | ||
1198 | if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) | 1533 | if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) |
1199 | dma_dev->device_prep_dma_interrupt = | 1534 | dma_dev->device_prep_dma_interrupt = |
1200 | iop_adma_prep_dma_interrupt; | 1535 | iop_adma_prep_dma_interrupt; |
@@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) | |||
1248 | } | 1583 | } |
1249 | 1584 | ||
1250 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || | 1585 | if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || |
1251 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { | 1586 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { |
1252 | ret = iop_adma_xor_zero_sum_self_test(adev); | 1587 | ret = iop_adma_xor_val_self_test(adev); |
1253 | dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); | 1588 | dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); |
1254 | if (ret) | 1589 | if (ret) |
1255 | goto err_free_iop_chan; | 1590 | goto err_free_iop_chan; |
1256 | } | 1591 | } |
1257 | 1592 | ||
1593 | if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) && | ||
1594 | dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) { | ||
1595 | #ifdef CONFIG_MD_RAID6_PQ | ||
1596 | ret = iop_adma_pq_zero_sum_self_test(adev); | ||
1597 | dev_dbg(&pdev->dev, "pq self test returned %d\n", ret); | ||
1598 | #else | ||
1599 | /* can not test raid6, so do not publish capability */ | ||
1600 | dma_cap_clear(DMA_PQ, dma_dev->cap_mask); | ||
1601 | dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask); | ||
1602 | ret = 0; | ||
1603 | #endif | ||
1604 | if (ret) | ||
1605 | goto err_free_iop_chan; | ||
1606 | } | ||
1607 | |||
1258 | dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " | 1608 | dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " |
1259 | "( %s%s%s%s%s%s%s%s%s%s)\n", | 1609 | "( %s%s%s%s%s%s%s)\n", |
1260 | dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", | 1610 | dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "", |
1261 | dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", | 1611 | dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", |
1262 | dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", | ||
1263 | dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", | 1612 | dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", |
1264 | dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", | 1613 | dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "", |
1265 | dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", | ||
1266 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", | 1614 | dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", |
1267 | dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", | ||
1268 | dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", | 1615 | dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", |
1269 | dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); | 1616 | dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); |
1270 | 1617 | ||
@@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) | |||
1296 | if (sw_desc) { | 1643 | if (sw_desc) { |
1297 | grp_start = sw_desc->group_head; | 1644 | grp_start = sw_desc->group_head; |
1298 | 1645 | ||
1299 | list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); | 1646 | list_splice_init(&sw_desc->tx_list, &iop_chan->chain); |
1300 | async_tx_ack(&sw_desc->async_tx); | 1647 | async_tx_ack(&sw_desc->async_tx); |
1301 | iop_desc_init_memcpy(grp_start, 0); | 1648 | iop_desc_init_memcpy(grp_start, 0); |
1302 | iop_desc_set_byte_count(grp_start, iop_chan, 0); | 1649 | iop_desc_set_byte_count(grp_start, iop_chan, 0); |
@@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) | |||
1352 | sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); | 1699 | sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); |
1353 | if (sw_desc) { | 1700 | if (sw_desc) { |
1354 | grp_start = sw_desc->group_head; | 1701 | grp_start = sw_desc->group_head; |
1355 | list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); | 1702 | list_splice_init(&sw_desc->tx_list, &iop_chan->chain); |
1356 | async_tx_ack(&sw_desc->async_tx); | 1703 | async_tx_ack(&sw_desc->async_tx); |
1357 | iop_desc_init_null_xor(grp_start, 2, 0); | 1704 | iop_desc_init_null_xor(grp_start, 2, 0); |
1358 | iop_desc_set_byte_count(grp_start, iop_chan, 0); | 1705 | iop_desc_set_byte_count(grp_start, iop_chan, 0); |
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c index 9f6fe46a9b87..c0a272c73682 100644 --- a/drivers/dma/iovlock.c +++ b/drivers/dma/iovlock.c | |||
@@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, | |||
183 | iov_byte_offset, | 183 | iov_byte_offset, |
184 | kdata, | 184 | kdata, |
185 | copy); | 185 | copy); |
186 | /* poll for a descriptor slot */ | ||
187 | if (unlikely(dma_cookie < 0)) { | ||
188 | dma_async_issue_pending(chan); | ||
189 | continue; | ||
190 | } | ||
186 | 191 | ||
187 | len -= copy; | 192 | len -= copy; |
188 | iov[iovec_idx].iov_len -= copy; | 193 | iov[iovec_idx].iov_len -= copy; |
@@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, | |||
248 | page, | 253 | page, |
249 | offset, | 254 | offset, |
250 | copy); | 255 | copy); |
256 | /* poll for a descriptor slot */ | ||
257 | if (unlikely(dma_cookie < 0)) { | ||
258 | dma_async_issue_pending(chan); | ||
259 | continue; | ||
260 | } | ||
251 | 261 | ||
252 | len -= copy; | 262 | len -= copy; |
253 | iov[iovec_idx].iov_len -= copy; | 263 | iov[iovec_idx].iov_len -= copy; |
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 3f23eabe09f2..466ab10c1ff1 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c | |||
@@ -517,7 +517,7 @@ retry: | |||
517 | } | 517 | } |
518 | alloc_tail->group_head = alloc_start; | 518 | alloc_tail->group_head = alloc_start; |
519 | alloc_tail->async_tx.cookie = -EBUSY; | 519 | alloc_tail->async_tx.cookie = -EBUSY; |
520 | list_splice(&chain, &alloc_tail->async_tx.tx_list); | 520 | list_splice(&chain, &alloc_tail->tx_list); |
521 | mv_chan->last_used = last_used; | 521 | mv_chan->last_used = last_used; |
522 | mv_desc_clear_next_desc(alloc_start); | 522 | mv_desc_clear_next_desc(alloc_start); |
523 | mv_desc_clear_next_desc(alloc_tail); | 523 | mv_desc_clear_next_desc(alloc_tail); |
@@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) | |||
565 | cookie = mv_desc_assign_cookie(mv_chan, sw_desc); | 565 | cookie = mv_desc_assign_cookie(mv_chan, sw_desc); |
566 | 566 | ||
567 | if (list_empty(&mv_chan->chain)) | 567 | if (list_empty(&mv_chan->chain)) |
568 | list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); | 568 | list_splice_init(&sw_desc->tx_list, &mv_chan->chain); |
569 | else { | 569 | else { |
570 | new_hw_chain = 0; | 570 | new_hw_chain = 0; |
571 | 571 | ||
572 | old_chain_tail = list_entry(mv_chan->chain.prev, | 572 | old_chain_tail = list_entry(mv_chan->chain.prev, |
573 | struct mv_xor_desc_slot, | 573 | struct mv_xor_desc_slot, |
574 | chain_node); | 574 | chain_node); |
575 | list_splice_init(&grp_start->async_tx.tx_list, | 575 | list_splice_init(&grp_start->tx_list, |
576 | &old_chain_tail->chain_node); | 576 | &old_chain_tail->chain_node); |
577 | 577 | ||
578 | if (!mv_can_chain(grp_start)) | 578 | if (!mv_can_chain(grp_start)) |
@@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) | |||
632 | slot->async_tx.tx_submit = mv_xor_tx_submit; | 632 | slot->async_tx.tx_submit = mv_xor_tx_submit; |
633 | INIT_LIST_HEAD(&slot->chain_node); | 633 | INIT_LIST_HEAD(&slot->chain_node); |
634 | INIT_LIST_HEAD(&slot->slot_node); | 634 | INIT_LIST_HEAD(&slot->slot_node); |
635 | INIT_LIST_HEAD(&slot->tx_list); | ||
635 | hw_desc = (char *) mv_chan->device->dma_desc_pool; | 636 | hw_desc = (char *) mv_chan->device->dma_desc_pool; |
636 | slot->async_tx.phys = | 637 | slot->async_tx.phys = |
637 | (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; | 638 | (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; |
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 06cafe1ef521..977b592e976b 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h | |||
@@ -126,9 +126,8 @@ struct mv_xor_chan { | |||
126 | * @idx: pool index | 126 | * @idx: pool index |
127 | * @unmap_src_cnt: number of xor sources | 127 | * @unmap_src_cnt: number of xor sources |
128 | * @unmap_len: transaction bytecount | 128 | * @unmap_len: transaction bytecount |
129 | * @tx_list: list of slots that make up a multi-descriptor transaction | ||
129 | * @async_tx: support for the async_tx api | 130 | * @async_tx: support for the async_tx api |
130 | * @group_list: list of slots that make up a multi-descriptor transaction | ||
131 | * for example transfer lengths larger than the supported hw max | ||
132 | * @xor_check_result: result of zero sum | 131 | * @xor_check_result: result of zero sum |
133 | * @crc32_result: result crc calculation | 132 | * @crc32_result: result crc calculation |
134 | */ | 133 | */ |
@@ -145,6 +144,7 @@ struct mv_xor_desc_slot { | |||
145 | u16 unmap_src_cnt; | 144 | u16 unmap_src_cnt; |
146 | u32 value; | 145 | u32 value; |
147 | size_t unmap_len; | 146 | size_t unmap_len; |
147 | struct list_head tx_list; | ||
148 | struct dma_async_tx_descriptor async_tx; | 148 | struct dma_async_tx_descriptor async_tx; |
149 | union { | 149 | union { |
150 | u32 *xor_check_result; | 150 | u32 *xor_check_result; |
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c new file mode 100644 index 000000000000..b3b065c4e5c1 --- /dev/null +++ b/drivers/dma/shdma.c | |||
@@ -0,0 +1,786 @@ | |||
1 | /* | ||
2 | * Renesas SuperH DMA Engine support | ||
3 | * | ||
4 | * base is drivers/dma/flsdma.c | ||
5 | * | ||
6 | * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> | ||
7 | * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. | ||
8 | * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. | ||
9 | * | ||
10 | * This is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * - DMA of SuperH does not have Hardware DMA chain mode. | ||
16 | * - MAX DMA size is 16MB. | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | #include <linux/init.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/dmaengine.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/dma-mapping.h> | ||
26 | #include <linux/dmapool.h> | ||
27 | #include <linux/platform_device.h> | ||
28 | #include <cpu/dma.h> | ||
29 | #include <asm/dma-sh.h> | ||
30 | #include "shdma.h" | ||
31 | |||
32 | /* DMA descriptor control */ | ||
33 | #define DESC_LAST (-1) | ||
34 | #define DESC_COMP (1) | ||
35 | #define DESC_NCOMP (0) | ||
36 | |||
37 | #define NR_DESCS_PER_CHANNEL 32 | ||
38 | /* | ||
39 | * Define the default configuration for dual address memory-memory transfer. | ||
40 | * The 0x400 value represents auto-request, external->external. | ||
41 | * | ||
42 | * And this driver set 4byte burst mode. | ||
43 | * If you want to change mode, you need to change RS_DEFAULT of value. | ||
44 | * (ex 1byte burst mode -> (RS_DUAL & ~TS_32) | ||
45 | */ | ||
46 | #define RS_DEFAULT (RS_DUAL) | ||
47 | |||
48 | #define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) | ||
49 | static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) | ||
50 | { | ||
51 | ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); | ||
52 | } | ||
53 | |||
54 | static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg) | ||
55 | { | ||
56 | return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg)); | ||
57 | } | ||
58 | |||
59 | static void dmae_init(struct sh_dmae_chan *sh_chan) | ||
60 | { | ||
61 | u32 chcr = RS_DEFAULT; /* default is DUAL mode */ | ||
62 | sh_dmae_writel(sh_chan, chcr, CHCR); | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Reset DMA controller | ||
67 | * | ||
68 | * SH7780 has two DMAOR register | ||
69 | */ | ||
70 | static void sh_dmae_ctl_stop(int id) | ||
71 | { | ||
72 | unsigned short dmaor = dmaor_read_reg(id); | ||
73 | |||
74 | dmaor &= ~(DMAOR_NMIF | DMAOR_AE); | ||
75 | dmaor_write_reg(id, dmaor); | ||
76 | } | ||
77 | |||
78 | static int sh_dmae_rst(int id) | ||
79 | { | ||
80 | unsigned short dmaor; | ||
81 | |||
82 | sh_dmae_ctl_stop(id); | ||
83 | dmaor = (dmaor_read_reg(id)|DMAOR_INIT); | ||
84 | |||
85 | dmaor_write_reg(id, dmaor); | ||
86 | if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) { | ||
87 | pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n"); | ||
88 | return -EINVAL; | ||
89 | } | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | static int dmae_is_idle(struct sh_dmae_chan *sh_chan) | ||
94 | { | ||
95 | u32 chcr = sh_dmae_readl(sh_chan, CHCR); | ||
96 | if (chcr & CHCR_DE) { | ||
97 | if (!(chcr & CHCR_TE)) | ||
98 | return -EBUSY; /* working */ | ||
99 | } | ||
100 | return 0; /* waiting */ | ||
101 | } | ||
102 | |||
103 | static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) | ||
104 | { | ||
105 | u32 chcr = sh_dmae_readl(sh_chan, CHCR); | ||
106 | return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; | ||
107 | } | ||
108 | |||
109 | static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) | ||
110 | { | ||
111 | sh_dmae_writel(sh_chan, hw.sar, SAR); | ||
112 | sh_dmae_writel(sh_chan, hw.dar, DAR); | ||
113 | sh_dmae_writel(sh_chan, | ||
114 | (hw.tcr >> calc_xmit_shift(sh_chan)), TCR); | ||
115 | } | ||
116 | |||
117 | static void dmae_start(struct sh_dmae_chan *sh_chan) | ||
118 | { | ||
119 | u32 chcr = sh_dmae_readl(sh_chan, CHCR); | ||
120 | |||
121 | chcr |= (CHCR_DE|CHCR_IE); | ||
122 | sh_dmae_writel(sh_chan, chcr, CHCR); | ||
123 | } | ||
124 | |||
125 | static void dmae_halt(struct sh_dmae_chan *sh_chan) | ||
126 | { | ||
127 | u32 chcr = sh_dmae_readl(sh_chan, CHCR); | ||
128 | |||
129 | chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE); | ||
130 | sh_dmae_writel(sh_chan, chcr, CHCR); | ||
131 | } | ||
132 | |||
133 | static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val) | ||
134 | { | ||
135 | int ret = dmae_is_idle(sh_chan); | ||
136 | /* When DMA was working, can not set data to CHCR */ | ||
137 | if (ret) | ||
138 | return ret; | ||
139 | |||
140 | sh_dmae_writel(sh_chan, val, CHCR); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | #define DMARS1_ADDR 0x04 | ||
145 | #define DMARS2_ADDR 0x08 | ||
146 | #define DMARS_SHIFT 8 | ||
147 | #define DMARS_CHAN_MSK 0x01 | ||
148 | static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) | ||
149 | { | ||
150 | u32 addr; | ||
151 | int shift = 0; | ||
152 | int ret = dmae_is_idle(sh_chan); | ||
153 | if (ret) | ||
154 | return ret; | ||
155 | |||
156 | if (sh_chan->id & DMARS_CHAN_MSK) | ||
157 | shift = DMARS_SHIFT; | ||
158 | |||
159 | switch (sh_chan->id) { | ||
160 | /* DMARS0 */ | ||
161 | case 0: | ||
162 | case 1: | ||
163 | addr = SH_DMARS_BASE; | ||
164 | break; | ||
165 | /* DMARS1 */ | ||
166 | case 2: | ||
167 | case 3: | ||
168 | addr = (SH_DMARS_BASE + DMARS1_ADDR); | ||
169 | break; | ||
170 | /* DMARS2 */ | ||
171 | case 4: | ||
172 | case 5: | ||
173 | addr = (SH_DMARS_BASE + DMARS2_ADDR); | ||
174 | break; | ||
175 | default: | ||
176 | return -EINVAL; | ||
177 | } | ||
178 | |||
179 | ctrl_outw((val << shift) | | ||
180 | (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)), | ||
181 | addr); | ||
182 | |||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) | ||
187 | { | ||
188 | struct sh_desc *desc = tx_to_sh_desc(tx); | ||
189 | struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); | ||
190 | dma_cookie_t cookie; | ||
191 | |||
192 | spin_lock_bh(&sh_chan->desc_lock); | ||
193 | |||
194 | cookie = sh_chan->common.cookie; | ||
195 | cookie++; | ||
196 | if (cookie < 0) | ||
197 | cookie = 1; | ||
198 | |||
199 | /* If desc only in the case of 1 */ | ||
200 | if (desc->async_tx.cookie != -EBUSY) | ||
201 | desc->async_tx.cookie = cookie; | ||
202 | sh_chan->common.cookie = desc->async_tx.cookie; | ||
203 | |||
204 | list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); | ||
205 | |||
206 | spin_unlock_bh(&sh_chan->desc_lock); | ||
207 | |||
208 | return cookie; | ||
209 | } | ||
210 | |||
211 | static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) | ||
212 | { | ||
213 | struct sh_desc *desc, *_desc, *ret = NULL; | ||
214 | |||
215 | spin_lock_bh(&sh_chan->desc_lock); | ||
216 | list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { | ||
217 | if (async_tx_test_ack(&desc->async_tx)) { | ||
218 | list_del(&desc->node); | ||
219 | ret = desc; | ||
220 | break; | ||
221 | } | ||
222 | } | ||
223 | spin_unlock_bh(&sh_chan->desc_lock); | ||
224 | |||
225 | return ret; | ||
226 | } | ||
227 | |||
228 | static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) | ||
229 | { | ||
230 | if (desc) { | ||
231 | spin_lock_bh(&sh_chan->desc_lock); | ||
232 | |||
233 | list_splice_init(&desc->tx_list, &sh_chan->ld_free); | ||
234 | list_add(&desc->node, &sh_chan->ld_free); | ||
235 | |||
236 | spin_unlock_bh(&sh_chan->desc_lock); | ||
237 | } | ||
238 | } | ||
239 | |||
240 | static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) | ||
241 | { | ||
242 | struct sh_dmae_chan *sh_chan = to_sh_chan(chan); | ||
243 | struct sh_desc *desc; | ||
244 | |||
245 | spin_lock_bh(&sh_chan->desc_lock); | ||
246 | while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) { | ||
247 | spin_unlock_bh(&sh_chan->desc_lock); | ||
248 | desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL); | ||
249 | if (!desc) { | ||
250 | spin_lock_bh(&sh_chan->desc_lock); | ||
251 | break; | ||
252 | } | ||
253 | dma_async_tx_descriptor_init(&desc->async_tx, | ||
254 | &sh_chan->common); | ||
255 | desc->async_tx.tx_submit = sh_dmae_tx_submit; | ||
256 | desc->async_tx.flags = DMA_CTRL_ACK; | ||
257 | INIT_LIST_HEAD(&desc->tx_list); | ||
258 | sh_dmae_put_desc(sh_chan, desc); | ||
259 | |||
260 | spin_lock_bh(&sh_chan->desc_lock); | ||
261 | sh_chan->descs_allocated++; | ||
262 | } | ||
263 | spin_unlock_bh(&sh_chan->desc_lock); | ||
264 | |||
265 | return sh_chan->descs_allocated; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * sh_dma_free_chan_resources - Free all resources of the channel. | ||
270 | */ | ||
271 | static void sh_dmae_free_chan_resources(struct dma_chan *chan) | ||
272 | { | ||
273 | struct sh_dmae_chan *sh_chan = to_sh_chan(chan); | ||
274 | struct sh_desc *desc, *_desc; | ||
275 | LIST_HEAD(list); | ||
276 | |||
277 | BUG_ON(!list_empty(&sh_chan->ld_queue)); | ||
278 | spin_lock_bh(&sh_chan->desc_lock); | ||
279 | |||
280 | list_splice_init(&sh_chan->ld_free, &list); | ||
281 | sh_chan->descs_allocated = 0; | ||
282 | |||
283 | spin_unlock_bh(&sh_chan->desc_lock); | ||
284 | |||
285 | list_for_each_entry_safe(desc, _desc, &list, node) | ||
286 | kfree(desc); | ||
287 | } | ||
288 | |||
289 | static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( | ||
290 | struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, | ||
291 | size_t len, unsigned long flags) | ||
292 | { | ||
293 | struct sh_dmae_chan *sh_chan; | ||
294 | struct sh_desc *first = NULL, *prev = NULL, *new; | ||
295 | size_t copy_size; | ||
296 | |||
297 | if (!chan) | ||
298 | return NULL; | ||
299 | |||
300 | if (!len) | ||
301 | return NULL; | ||
302 | |||
303 | sh_chan = to_sh_chan(chan); | ||
304 | |||
305 | do { | ||
306 | /* Allocate the link descriptor from DMA pool */ | ||
307 | new = sh_dmae_get_desc(sh_chan); | ||
308 | if (!new) { | ||
309 | dev_err(sh_chan->dev, | ||
310 | "No free memory for link descriptor\n"); | ||
311 | goto err_get_desc; | ||
312 | } | ||
313 | |||
314 | copy_size = min(len, (size_t)SH_DMA_TCR_MAX); | ||
315 | |||
316 | new->hw.sar = dma_src; | ||
317 | new->hw.dar = dma_dest; | ||
318 | new->hw.tcr = copy_size; | ||
319 | if (!first) | ||
320 | first = new; | ||
321 | |||
322 | new->mark = DESC_NCOMP; | ||
323 | async_tx_ack(&new->async_tx); | ||
324 | |||
325 | prev = new; | ||
326 | len -= copy_size; | ||
327 | dma_src += copy_size; | ||
328 | dma_dest += copy_size; | ||
329 | /* Insert the link descriptor to the LD ring */ | ||
330 | list_add_tail(&new->node, &first->tx_list); | ||
331 | } while (len); | ||
332 | |||
333 | new->async_tx.flags = flags; /* client is in control of this ack */ | ||
334 | new->async_tx.cookie = -EBUSY; /* Last desc */ | ||
335 | |||
336 | return &first->async_tx; | ||
337 | |||
338 | err_get_desc: | ||
339 | sh_dmae_put_desc(sh_chan, first); | ||
340 | return NULL; | ||
341 | |||
342 | } | ||
343 | |||
344 | /* | ||
345 | * sh_chan_ld_cleanup - Clean up link descriptors | ||
346 | * | ||
347 | * This function clean up the ld_queue of DMA channel. | ||
348 | */ | ||
349 | static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) | ||
350 | { | ||
351 | struct sh_desc *desc, *_desc; | ||
352 | |||
353 | spin_lock_bh(&sh_chan->desc_lock); | ||
354 | list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { | ||
355 | dma_async_tx_callback callback; | ||
356 | void *callback_param; | ||
357 | |||
358 | /* non send data */ | ||
359 | if (desc->mark == DESC_NCOMP) | ||
360 | break; | ||
361 | |||
362 | /* send data sesc */ | ||
363 | callback = desc->async_tx.callback; | ||
364 | callback_param = desc->async_tx.callback_param; | ||
365 | |||
366 | /* Remove from ld_queue list */ | ||
367 | list_splice_init(&desc->tx_list, &sh_chan->ld_free); | ||
368 | |||
369 | dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", | ||
370 | desc); | ||
371 | |||
372 | list_move(&desc->node, &sh_chan->ld_free); | ||
373 | /* Run the link descriptor callback function */ | ||
374 | if (callback) { | ||
375 | spin_unlock_bh(&sh_chan->desc_lock); | ||
376 | dev_dbg(sh_chan->dev, "link descriptor %p callback\n", | ||
377 | desc); | ||
378 | callback(callback_param); | ||
379 | spin_lock_bh(&sh_chan->desc_lock); | ||
380 | } | ||
381 | } | ||
382 | spin_unlock_bh(&sh_chan->desc_lock); | ||
383 | } | ||
384 | |||
385 | static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) | ||
386 | { | ||
387 | struct list_head *ld_node; | ||
388 | struct sh_dmae_regs hw; | ||
389 | |||
390 | /* DMA work check */ | ||
391 | if (dmae_is_idle(sh_chan)) | ||
392 | return; | ||
393 | |||
394 | /* Find the first un-transfer desciptor */ | ||
395 | for (ld_node = sh_chan->ld_queue.next; | ||
396 | (ld_node != &sh_chan->ld_queue) | ||
397 | && (to_sh_desc(ld_node)->mark == DESC_COMP); | ||
398 | ld_node = ld_node->next) | ||
399 | cpu_relax(); | ||
400 | |||
401 | if (ld_node != &sh_chan->ld_queue) { | ||
402 | /* Get the ld start address from ld_queue */ | ||
403 | hw = to_sh_desc(ld_node)->hw; | ||
404 | dmae_set_reg(sh_chan, hw); | ||
405 | dmae_start(sh_chan); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) | ||
410 | { | ||
411 | struct sh_dmae_chan *sh_chan = to_sh_chan(chan); | ||
412 | sh_chan_xfer_ld_queue(sh_chan); | ||
413 | } | ||
414 | |||
415 | static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, | ||
416 | dma_cookie_t cookie, | ||
417 | dma_cookie_t *done, | ||
418 | dma_cookie_t *used) | ||
419 | { | ||
420 | struct sh_dmae_chan *sh_chan = to_sh_chan(chan); | ||
421 | dma_cookie_t last_used; | ||
422 | dma_cookie_t last_complete; | ||
423 | |||
424 | sh_dmae_chan_ld_cleanup(sh_chan); | ||
425 | |||
426 | last_used = chan->cookie; | ||
427 | last_complete = sh_chan->completed_cookie; | ||
428 | if (last_complete == -EBUSY) | ||
429 | last_complete = last_used; | ||
430 | |||
431 | if (done) | ||
432 | *done = last_complete; | ||
433 | |||
434 | if (used) | ||
435 | *used = last_used; | ||
436 | |||
437 | return dma_async_is_complete(cookie, last_complete, last_used); | ||
438 | } | ||
439 | |||
440 | static irqreturn_t sh_dmae_interrupt(int irq, void *data) | ||
441 | { | ||
442 | irqreturn_t ret = IRQ_NONE; | ||
443 | struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; | ||
444 | u32 chcr = sh_dmae_readl(sh_chan, CHCR); | ||
445 | |||
446 | if (chcr & CHCR_TE) { | ||
447 | /* DMA stop */ | ||
448 | dmae_halt(sh_chan); | ||
449 | |||
450 | ret = IRQ_HANDLED; | ||
451 | tasklet_schedule(&sh_chan->tasklet); | ||
452 | } | ||
453 | |||
454 | return ret; | ||
455 | } | ||
456 | |||
457 | #if defined(CONFIG_CPU_SH4) | ||
458 | static irqreturn_t sh_dmae_err(int irq, void *data) | ||
459 | { | ||
460 | int err = 0; | ||
461 | struct sh_dmae_device *shdev = (struct sh_dmae_device *)data; | ||
462 | |||
463 | /* IRQ Multi */ | ||
464 | if (shdev->pdata.mode & SHDMA_MIX_IRQ) { | ||
465 | int cnt = 0; | ||
466 | switch (irq) { | ||
467 | #if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) | ||
468 | case DMTE6_IRQ: | ||
469 | cnt++; | ||
470 | #endif | ||
471 | case DMTE0_IRQ: | ||
472 | if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) { | ||
473 | disable_irq(irq); | ||
474 | return IRQ_HANDLED; | ||
475 | } | ||
476 | default: | ||
477 | return IRQ_NONE; | ||
478 | } | ||
479 | } else { | ||
480 | /* reset dma controller */ | ||
481 | err = sh_dmae_rst(0); | ||
482 | if (err) | ||
483 | return err; | ||
484 | if (shdev->pdata.mode & SHDMA_DMAOR1) { | ||
485 | err = sh_dmae_rst(1); | ||
486 | if (err) | ||
487 | return err; | ||
488 | } | ||
489 | disable_irq(irq); | ||
490 | return IRQ_HANDLED; | ||
491 | } | ||
492 | } | ||
493 | #endif | ||
494 | |||
495 | static void dmae_do_tasklet(unsigned long data) | ||
496 | { | ||
497 | struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; | ||
498 | struct sh_desc *desc, *_desc, *cur_desc = NULL; | ||
499 | u32 sar_buf = sh_dmae_readl(sh_chan, SAR); | ||
500 | list_for_each_entry_safe(desc, _desc, | ||
501 | &sh_chan->ld_queue, node) { | ||
502 | if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { | ||
503 | cur_desc = desc; | ||
504 | break; | ||
505 | } | ||
506 | } | ||
507 | |||
508 | if (cur_desc) { | ||
509 | switch (cur_desc->async_tx.cookie) { | ||
510 | case 0: /* other desc data */ | ||
511 | break; | ||
512 | case -EBUSY: /* last desc */ | ||
513 | sh_chan->completed_cookie = | ||
514 | cur_desc->async_tx.cookie; | ||
515 | break; | ||
516 | default: /* first desc ( 0 < )*/ | ||
517 | sh_chan->completed_cookie = | ||
518 | cur_desc->async_tx.cookie - 1; | ||
519 | break; | ||
520 | } | ||
521 | cur_desc->mark = DESC_COMP; | ||
522 | } | ||
523 | /* Next desc */ | ||
524 | sh_chan_xfer_ld_queue(sh_chan); | ||
525 | sh_dmae_chan_ld_cleanup(sh_chan); | ||
526 | } | ||
527 | |||
528 | static unsigned int get_dmae_irq(unsigned int id) | ||
529 | { | ||
530 | unsigned int irq = 0; | ||
531 | if (id < ARRAY_SIZE(dmte_irq_map)) | ||
532 | irq = dmte_irq_map[id]; | ||
533 | return irq; | ||
534 | } | ||
535 | |||
536 | static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id) | ||
537 | { | ||
538 | int err; | ||
539 | unsigned int irq = get_dmae_irq(id); | ||
540 | unsigned long irqflags = IRQF_DISABLED; | ||
541 | struct sh_dmae_chan *new_sh_chan; | ||
542 | |||
543 | /* alloc channel */ | ||
544 | new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL); | ||
545 | if (!new_sh_chan) { | ||
546 | dev_err(shdev->common.dev, "No free memory for allocating " | ||
547 | "dma channels!\n"); | ||
548 | return -ENOMEM; | ||
549 | } | ||
550 | |||
551 | new_sh_chan->dev = shdev->common.dev; | ||
552 | new_sh_chan->id = id; | ||
553 | |||
554 | /* Init DMA tasklet */ | ||
555 | tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet, | ||
556 | (unsigned long)new_sh_chan); | ||
557 | |||
558 | /* Init the channel */ | ||
559 | dmae_init(new_sh_chan); | ||
560 | |||
561 | spin_lock_init(&new_sh_chan->desc_lock); | ||
562 | |||
563 | /* Init descripter manage list */ | ||
564 | INIT_LIST_HEAD(&new_sh_chan->ld_queue); | ||
565 | INIT_LIST_HEAD(&new_sh_chan->ld_free); | ||
566 | |||
567 | /* copy struct dma_device */ | ||
568 | new_sh_chan->common.device = &shdev->common; | ||
569 | |||
570 | /* Add the channel to DMA device channel list */ | ||
571 | list_add_tail(&new_sh_chan->common.device_node, | ||
572 | &shdev->common.channels); | ||
573 | shdev->common.chancnt++; | ||
574 | |||
575 | if (shdev->pdata.mode & SHDMA_MIX_IRQ) { | ||
576 | irqflags = IRQF_SHARED; | ||
577 | #if defined(DMTE6_IRQ) | ||
578 | if (irq >= DMTE6_IRQ) | ||
579 | irq = DMTE6_IRQ; | ||
580 | else | ||
581 | #endif | ||
582 | irq = DMTE0_IRQ; | ||
583 | } | ||
584 | |||
585 | snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id), | ||
586 | "sh-dmae%d", new_sh_chan->id); | ||
587 | |||
588 | /* set up channel irq */ | ||
589 | err = request_irq(irq, &sh_dmae_interrupt, | ||
590 | irqflags, new_sh_chan->dev_id, new_sh_chan); | ||
591 | if (err) { | ||
592 | dev_err(shdev->common.dev, "DMA channel %d request_irq error " | ||
593 | "with return %d\n", id, err); | ||
594 | goto err_no_irq; | ||
595 | } | ||
596 | |||
597 | /* CHCR register control function */ | ||
598 | new_sh_chan->set_chcr = dmae_set_chcr; | ||
599 | /* DMARS register control function */ | ||
600 | new_sh_chan->set_dmars = dmae_set_dmars; | ||
601 | |||
602 | shdev->chan[id] = new_sh_chan; | ||
603 | return 0; | ||
604 | |||
605 | err_no_irq: | ||
606 | /* remove from dmaengine device node */ | ||
607 | list_del(&new_sh_chan->common.device_node); | ||
608 | kfree(new_sh_chan); | ||
609 | return err; | ||
610 | } | ||
611 | |||
612 | static void sh_dmae_chan_remove(struct sh_dmae_device *shdev) | ||
613 | { | ||
614 | int i; | ||
615 | |||
616 | for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) { | ||
617 | if (shdev->chan[i]) { | ||
618 | struct sh_dmae_chan *shchan = shdev->chan[i]; | ||
619 | if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) | ||
620 | free_irq(dmte_irq_map[i], shchan); | ||
621 | |||
622 | list_del(&shchan->common.device_node); | ||
623 | kfree(shchan); | ||
624 | shdev->chan[i] = NULL; | ||
625 | } | ||
626 | } | ||
627 | shdev->common.chancnt = 0; | ||
628 | } | ||
629 | |||
630 | static int __init sh_dmae_probe(struct platform_device *pdev) | ||
631 | { | ||
632 | int err = 0, cnt, ecnt; | ||
633 | unsigned long irqflags = IRQF_DISABLED; | ||
634 | #if defined(CONFIG_CPU_SH4) | ||
635 | int eirq[] = { DMAE0_IRQ, | ||
636 | #if defined(DMAE1_IRQ) | ||
637 | DMAE1_IRQ | ||
638 | #endif | ||
639 | }; | ||
640 | #endif | ||
641 | struct sh_dmae_device *shdev; | ||
642 | |||
643 | shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL); | ||
644 | if (!shdev) { | ||
645 | dev_err(&pdev->dev, "No enough memory\n"); | ||
646 | err = -ENOMEM; | ||
647 | goto shdev_err; | ||
648 | } | ||
649 | |||
650 | /* get platform data */ | ||
651 | if (!pdev->dev.platform_data) | ||
652 | goto shdev_err; | ||
653 | |||
654 | /* platform data */ | ||
655 | memcpy(&shdev->pdata, pdev->dev.platform_data, | ||
656 | sizeof(struct sh_dmae_pdata)); | ||
657 | |||
658 | /* reset dma controller */ | ||
659 | err = sh_dmae_rst(0); | ||
660 | if (err) | ||
661 | goto rst_err; | ||
662 | |||
663 | /* SH7780/85/23 has DMAOR1 */ | ||
664 | if (shdev->pdata.mode & SHDMA_DMAOR1) { | ||
665 | err = sh_dmae_rst(1); | ||
666 | if (err) | ||
667 | goto rst_err; | ||
668 | } | ||
669 | |||
670 | INIT_LIST_HEAD(&shdev->common.channels); | ||
671 | |||
672 | dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask); | ||
673 | shdev->common.device_alloc_chan_resources | ||
674 | = sh_dmae_alloc_chan_resources; | ||
675 | shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources; | ||
676 | shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy; | ||
677 | shdev->common.device_is_tx_complete = sh_dmae_is_complete; | ||
678 | shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending; | ||
679 | shdev->common.dev = &pdev->dev; | ||
680 | |||
681 | #if defined(CONFIG_CPU_SH4) | ||
682 | /* Non Mix IRQ mode SH7722/SH7730 etc... */ | ||
683 | if (shdev->pdata.mode & SHDMA_MIX_IRQ) { | ||
684 | irqflags = IRQF_SHARED; | ||
685 | eirq[0] = DMTE0_IRQ; | ||
686 | #if defined(DMTE6_IRQ) && defined(DMAE1_IRQ) | ||
687 | eirq[1] = DMTE6_IRQ; | ||
688 | #endif | ||
689 | } | ||
690 | |||
691 | for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) { | ||
692 | err = request_irq(eirq[ecnt], sh_dmae_err, | ||
693 | irqflags, "DMAC Address Error", shdev); | ||
694 | if (err) { | ||
695 | dev_err(&pdev->dev, "DMA device request_irq" | ||
696 | "error (irq %d) with return %d\n", | ||
697 | eirq[ecnt], err); | ||
698 | goto eirq_err; | ||
699 | } | ||
700 | } | ||
701 | #endif /* CONFIG_CPU_SH4 */ | ||
702 | |||
703 | /* Create DMA Channel */ | ||
704 | for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) { | ||
705 | err = sh_dmae_chan_probe(shdev, cnt); | ||
706 | if (err) | ||
707 | goto chan_probe_err; | ||
708 | } | ||
709 | |||
710 | platform_set_drvdata(pdev, shdev); | ||
711 | dma_async_device_register(&shdev->common); | ||
712 | |||
713 | return err; | ||
714 | |||
715 | chan_probe_err: | ||
716 | sh_dmae_chan_remove(shdev); | ||
717 | |||
718 | eirq_err: | ||
719 | for (ecnt-- ; ecnt >= 0; ecnt--) | ||
720 | free_irq(eirq[ecnt], shdev); | ||
721 | |||
722 | rst_err: | ||
723 | kfree(shdev); | ||
724 | |||
725 | shdev_err: | ||
726 | return err; | ||
727 | } | ||
728 | |||
729 | static int __exit sh_dmae_remove(struct platform_device *pdev) | ||
730 | { | ||
731 | struct sh_dmae_device *shdev = platform_get_drvdata(pdev); | ||
732 | |||
733 | dma_async_device_unregister(&shdev->common); | ||
734 | |||
735 | if (shdev->pdata.mode & SHDMA_MIX_IRQ) { | ||
736 | free_irq(DMTE0_IRQ, shdev); | ||
737 | #if defined(DMTE6_IRQ) | ||
738 | free_irq(DMTE6_IRQ, shdev); | ||
739 | #endif | ||
740 | } | ||
741 | |||
742 | /* channel data remove */ | ||
743 | sh_dmae_chan_remove(shdev); | ||
744 | |||
745 | if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) { | ||
746 | free_irq(DMAE0_IRQ, shdev); | ||
747 | #if defined(DMAE1_IRQ) | ||
748 | free_irq(DMAE1_IRQ, shdev); | ||
749 | #endif | ||
750 | } | ||
751 | kfree(shdev); | ||
752 | |||
753 | return 0; | ||
754 | } | ||
755 | |||
756 | static void sh_dmae_shutdown(struct platform_device *pdev) | ||
757 | { | ||
758 | struct sh_dmae_device *shdev = platform_get_drvdata(pdev); | ||
759 | sh_dmae_ctl_stop(0); | ||
760 | if (shdev->pdata.mode & SHDMA_DMAOR1) | ||
761 | sh_dmae_ctl_stop(1); | ||
762 | } | ||
763 | |||
764 | static struct platform_driver sh_dmae_driver = { | ||
765 | .remove = __exit_p(sh_dmae_remove), | ||
766 | .shutdown = sh_dmae_shutdown, | ||
767 | .driver = { | ||
768 | .name = "sh-dma-engine", | ||
769 | }, | ||
770 | }; | ||
771 | |||
772 | static int __init sh_dmae_init(void) | ||
773 | { | ||
774 | return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe); | ||
775 | } | ||
776 | module_init(sh_dmae_init); | ||
777 | |||
778 | static void __exit sh_dmae_exit(void) | ||
779 | { | ||
780 | platform_driver_unregister(&sh_dmae_driver); | ||
781 | } | ||
782 | module_exit(sh_dmae_exit); | ||
783 | |||
784 | MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>"); | ||
785 | MODULE_DESCRIPTION("Renesas SH DMA Engine driver"); | ||
786 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h new file mode 100644 index 000000000000..2b4bc15a2c0a --- /dev/null +++ b/drivers/dma/shdma.h | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Renesas SuperH DMA Engine support | ||
3 | * | ||
4 | * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com> | ||
5 | * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved. | ||
6 | * | ||
7 | * This is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | */ | ||
13 | #ifndef __DMA_SHDMA_H | ||
14 | #define __DMA_SHDMA_H | ||
15 | |||
16 | #include <linux/device.h> | ||
17 | #include <linux/dmapool.h> | ||
18 | #include <linux/dmaengine.h> | ||
19 | |||
20 | #define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ | ||
21 | |||
22 | struct sh_dmae_regs { | ||
23 | u32 sar; /* SAR / source address */ | ||
24 | u32 dar; /* DAR / destination address */ | ||
25 | u32 tcr; /* TCR / transfer count */ | ||
26 | }; | ||
27 | |||
28 | struct sh_desc { | ||
29 | struct list_head tx_list; | ||
30 | struct sh_dmae_regs hw; | ||
31 | struct list_head node; | ||
32 | struct dma_async_tx_descriptor async_tx; | ||
33 | int mark; | ||
34 | }; | ||
35 | |||
36 | struct sh_dmae_chan { | ||
37 | dma_cookie_t completed_cookie; /* The maximum cookie completed */ | ||
38 | spinlock_t desc_lock; /* Descriptor operation lock */ | ||
39 | struct list_head ld_queue; /* Link descriptors queue */ | ||
40 | struct list_head ld_free; /* Link descriptors free */ | ||
41 | struct dma_chan common; /* DMA common channel */ | ||
42 | struct device *dev; /* Channel device */ | ||
43 | struct tasklet_struct tasklet; /* Tasklet */ | ||
44 | int descs_allocated; /* desc count */ | ||
45 | int id; /* Raw id of this channel */ | ||
46 | char dev_id[16]; /* unique name per DMAC of channel */ | ||
47 | |||
48 | /* Set chcr */ | ||
49 | int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs); | ||
50 | /* Set DMA resource */ | ||
51 | int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res); | ||
52 | }; | ||
53 | |||
54 | struct sh_dmae_device { | ||
55 | struct dma_device common; | ||
56 | struct sh_dmae_chan *chan[MAX_DMA_CHANNELS]; | ||
57 | struct sh_dmae_pdata pdata; | ||
58 | }; | ||
59 | |||
60 | #define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common) | ||
61 | #define to_sh_desc(lh) container_of(lh, struct sh_desc, node) | ||
62 | #define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx) | ||
63 | |||
64 | #endif /* __DMA_SHDMA_H */ | ||
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index 7837930146a4..fb6bb64e8861 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c | |||
@@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc) | |||
180 | 180 | ||
181 | static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) | 181 | static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc) |
182 | { | 182 | { |
183 | if (!list_empty(&desc->txd.tx_list)) | 183 | if (!list_empty(&desc->tx_list)) |
184 | desc = list_entry(desc->txd.tx_list.prev, | 184 | desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node); |
185 | struct txx9dmac_desc, desc_node); | ||
186 | return desc; | 185 | return desc; |
187 | } | 186 | } |
188 | 187 | ||
@@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc, | |||
197 | desc = kzalloc(sizeof(*desc), flags); | 196 | desc = kzalloc(sizeof(*desc), flags); |
198 | if (!desc) | 197 | if (!desc) |
199 | return NULL; | 198 | return NULL; |
199 | INIT_LIST_HEAD(&desc->tx_list); | ||
200 | dma_async_tx_descriptor_init(&desc->txd, &dc->chan); | 200 | dma_async_tx_descriptor_init(&desc->txd, &dc->chan); |
201 | desc->txd.tx_submit = txx9dmac_tx_submit; | 201 | desc->txd.tx_submit = txx9dmac_tx_submit; |
202 | /* txd.flags will be overwritten in prep funcs */ | 202 | /* txd.flags will be overwritten in prep funcs */ |
@@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc, | |||
245 | struct txx9dmac_dev *ddev = dc->ddev; | 245 | struct txx9dmac_dev *ddev = dc->ddev; |
246 | struct txx9dmac_desc *child; | 246 | struct txx9dmac_desc *child; |
247 | 247 | ||
248 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 248 | list_for_each_entry(child, &desc->tx_list, desc_node) |
249 | dma_sync_single_for_cpu(chan2parent(&dc->chan), | 249 | dma_sync_single_for_cpu(chan2parent(&dc->chan), |
250 | child->txd.phys, ddev->descsize, | 250 | child->txd.phys, ddev->descsize, |
251 | DMA_TO_DEVICE); | 251 | DMA_TO_DEVICE); |
@@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc, | |||
267 | txx9dmac_sync_desc_for_cpu(dc, desc); | 267 | txx9dmac_sync_desc_for_cpu(dc, desc); |
268 | 268 | ||
269 | spin_lock_bh(&dc->lock); | 269 | spin_lock_bh(&dc->lock); |
270 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 270 | list_for_each_entry(child, &desc->tx_list, desc_node) |
271 | dev_vdbg(chan2dev(&dc->chan), | 271 | dev_vdbg(chan2dev(&dc->chan), |
272 | "moving child desc %p to freelist\n", | 272 | "moving child desc %p to freelist\n", |
273 | child); | 273 | child); |
274 | list_splice_init(&desc->txd.tx_list, &dc->free_list); | 274 | list_splice_init(&desc->tx_list, &dc->free_list); |
275 | dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", | 275 | dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n", |
276 | desc); | 276 | desc); |
277 | list_add(&desc->desc_node, &dc->free_list); | 277 | list_add(&desc->desc_node, &dc->free_list); |
@@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, | |||
429 | param = txd->callback_param; | 429 | param = txd->callback_param; |
430 | 430 | ||
431 | txx9dmac_sync_desc_for_cpu(dc, desc); | 431 | txx9dmac_sync_desc_for_cpu(dc, desc); |
432 | list_splice_init(&txd->tx_list, &dc->free_list); | 432 | list_splice_init(&desc->tx_list, &dc->free_list); |
433 | list_move(&desc->desc_node, &dc->free_list); | 433 | list_move(&desc->desc_node, &dc->free_list); |
434 | 434 | ||
435 | if (!ds) { | 435 | if (!ds) { |
@@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr) | |||
571 | "Bad descriptor submitted for DMA! (cookie: %d)\n", | 571 | "Bad descriptor submitted for DMA! (cookie: %d)\n", |
572 | bad_desc->txd.cookie); | 572 | bad_desc->txd.cookie); |
573 | txx9dmac_dump_desc(dc, &bad_desc->hwdesc); | 573 | txx9dmac_dump_desc(dc, &bad_desc->hwdesc); |
574 | list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) | 574 | list_for_each_entry(child, &bad_desc->tx_list, desc_node) |
575 | txx9dmac_dump_desc(dc, &child->hwdesc); | 575 | txx9dmac_dump_desc(dc, &child->hwdesc); |
576 | /* Pretend the descriptor completed successfully */ | 576 | /* Pretend the descriptor completed successfully */ |
577 | txx9dmac_descriptor_complete(dc, bad_desc); | 577 | txx9dmac_descriptor_complete(dc, bad_desc); |
@@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc) | |||
613 | return; | 613 | return; |
614 | } | 614 | } |
615 | 615 | ||
616 | list_for_each_entry(child, &desc->txd.tx_list, desc_node) | 616 | list_for_each_entry(child, &desc->tx_list, desc_node) |
617 | if (desc_read_CHAR(dc, child) == chain) { | 617 | if (desc_read_CHAR(dc, child) == chain) { |
618 | /* Currently in progress */ | 618 | /* Currently in progress */ |
619 | if (csr & TXX9_DMA_CSR_ABCHC) | 619 | if (csr & TXX9_DMA_CSR_ABCHC) |
@@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, | |||
823 | dma_sync_single_for_device(chan2parent(&dc->chan), | 823 | dma_sync_single_for_device(chan2parent(&dc->chan), |
824 | prev->txd.phys, ddev->descsize, | 824 | prev->txd.phys, ddev->descsize, |
825 | DMA_TO_DEVICE); | 825 | DMA_TO_DEVICE); |
826 | list_add_tail(&desc->desc_node, | 826 | list_add_tail(&desc->desc_node, &first->tx_list); |
827 | &first->txd.tx_list); | ||
828 | } | 827 | } |
829 | prev = desc; | 828 | prev = desc; |
830 | } | 829 | } |
@@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | |||
919 | prev->txd.phys, | 918 | prev->txd.phys, |
920 | ddev->descsize, | 919 | ddev->descsize, |
921 | DMA_TO_DEVICE); | 920 | DMA_TO_DEVICE); |
922 | list_add_tail(&desc->desc_node, | 921 | list_add_tail(&desc->desc_node, &first->tx_list); |
923 | &first->txd.tx_list); | ||
924 | } | 922 | } |
925 | prev = desc; | 923 | prev = desc; |
926 | } | 924 | } |
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h index c907ff01d276..365d42366b9f 100644 --- a/drivers/dma/txx9dmac.h +++ b/drivers/dma/txx9dmac.h | |||
@@ -231,6 +231,7 @@ struct txx9dmac_desc { | |||
231 | 231 | ||
232 | /* THEN values for driver housekeeping */ | 232 | /* THEN values for driver housekeeping */ |
233 | struct list_head desc_node ____cacheline_aligned; | 233 | struct list_head desc_node ____cacheline_aligned; |
234 | struct list_head tx_list; | ||
234 | struct dma_async_tx_descriptor txd; | 235 | struct dma_async_tx_descriptor txd; |
235 | size_t len; | 236 | size_t len; |
236 | }; | 237 | }; |
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index a3ca18e2d7cf..02127e59fe8e 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
@@ -133,6 +133,13 @@ config EDAC_I3000 | |||
133 | Support for error detection and correction on the Intel | 133 | Support for error detection and correction on the Intel |
134 | 3000 and 3010 server chipsets. | 134 | 3000 and 3010 server chipsets. |
135 | 135 | ||
136 | config EDAC_I3200 | ||
137 | tristate "Intel 3200" | ||
138 | depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL | ||
139 | help | ||
140 | Support for error detection and correction on the Intel | ||
141 | 3200 and 3210 server chipsets. | ||
142 | |||
136 | config EDAC_X38 | 143 | config EDAC_X38 |
137 | tristate "Intel X38" | 144 | tristate "Intel X38" |
138 | depends on EDAC_MM_EDAC && PCI && X86 | 145 | depends on EDAC_MM_EDAC && PCI && X86 |
@@ -176,11 +183,11 @@ config EDAC_I5100 | |||
176 | San Clemente MCH. | 183 | San Clemente MCH. |
177 | 184 | ||
178 | config EDAC_MPC85XX | 185 | config EDAC_MPC85XX |
179 | tristate "Freescale MPC85xx" | 186 | tristate "Freescale MPC83xx / MPC85xx" |
180 | depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx | 187 | depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx) |
181 | help | 188 | help |
182 | Support for error detection and correction on the Freescale | 189 | Support for error detection and correction on the Freescale |
183 | MPC8560, MPC8540, MPC8548 | 190 | MPC8349, MPC8560, MPC8540, MPC8548 |
184 | 191 | ||
185 | config EDAC_MV64X60 | 192 | config EDAC_MV64X60 |
186 | tristate "Marvell MV64x60" | 193 | tristate "Marvell MV64x60" |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index cfa033ce53a7..7a473bbe8abd 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
@@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o | |||
32 | obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o | 32 | obj-$(CONFIG_EDAC_I82875P) += i82875p_edac.o |
33 | obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o | 33 | obj-$(CONFIG_EDAC_I82975X) += i82975x_edac.o |
34 | obj-$(CONFIG_EDAC_I3000) += i3000_edac.o | 34 | obj-$(CONFIG_EDAC_I3000) += i3000_edac.o |
35 | obj-$(CONFIG_EDAC_I3200) += i3200_edac.o | ||
35 | obj-$(CONFIG_EDAC_X38) += x38_edac.o | 36 | obj-$(CONFIG_EDAC_X38) += x38_edac.o |
36 | obj-$(CONFIG_EDAC_I82860) += i82860_edac.o | 37 | obj-$(CONFIG_EDAC_I82860) += i82860_edac.o |
37 | obj-$(CONFIG_EDAC_R82600) += r82600_edac.o | 38 | obj-$(CONFIG_EDAC_R82600) += r82600_edac.o |
@@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL) += cell_edac.o | |||
49 | obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o | 50 | obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o |
50 | obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o | 51 | obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o |
51 | obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o | 52 | obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o |
53 | |||
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index 8c54196b5aba..3d50274f1348 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c | |||
@@ -885,14 +885,14 @@ static int __devinit cpc925_probe(struct platform_device *pdev) | |||
885 | 885 | ||
886 | if (!devm_request_mem_region(&pdev->dev, | 886 | if (!devm_request_mem_region(&pdev->dev, |
887 | r->start, | 887 | r->start, |
888 | r->end - r->start + 1, | 888 | resource_size(r), |
889 | pdev->name)) { | 889 | pdev->name)) { |
890 | cpc925_printk(KERN_ERR, "Unable to request mem region\n"); | 890 | cpc925_printk(KERN_ERR, "Unable to request mem region\n"); |
891 | res = -EBUSY; | 891 | res = -EBUSY; |
892 | goto err1; | 892 | goto err1; |
893 | } | 893 | } |
894 | 894 | ||
895 | vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1); | 895 | vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r)); |
896 | if (!vbase) { | 896 | if (!vbase) { |
897 | cpc925_printk(KERN_ERR, "Unable to ioremap device\n"); | 897 | cpc925_printk(KERN_ERR, "Unable to ioremap device\n"); |
898 | res = -ENOMEM; | 898 | res = -ENOMEM; |
@@ -953,7 +953,7 @@ err3: | |||
953 | cpc925_mc_exit(mci); | 953 | cpc925_mc_exit(mci); |
954 | edac_mc_free(mci); | 954 | edac_mc_free(mci); |
955 | err2: | 955 | err2: |
956 | devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1); | 956 | devm_release_mem_region(&pdev->dev, r->start, resource_size(r)); |
957 | err1: | 957 | err1: |
958 | devres_release_group(&pdev->dev, cpc925_probe); | 958 | devres_release_group(&pdev->dev, cpc925_probe); |
959 | out: | 959 | out: |
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index b02a6a69a8f0..d5e13c94714f 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c | |||
@@ -356,7 +356,6 @@ static void complete_edac_device_list_del(struct rcu_head *head) | |||
356 | 356 | ||
357 | edac_dev = container_of(head, struct edac_device_ctl_info, rcu); | 357 | edac_dev = container_of(head, struct edac_device_ctl_info, rcu); |
358 | INIT_LIST_HEAD(&edac_dev->link); | 358 | INIT_LIST_HEAD(&edac_dev->link); |
359 | complete(&edac_dev->removal_complete); | ||
360 | } | 359 | } |
361 | 360 | ||
362 | /* | 361 | /* |
@@ -369,10 +368,8 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info | |||
369 | *edac_device) | 368 | *edac_device) |
370 | { | 369 | { |
371 | list_del_rcu(&edac_device->link); | 370 | list_del_rcu(&edac_device->link); |
372 | |||
373 | init_completion(&edac_device->removal_complete); | ||
374 | call_rcu(&edac_device->rcu, complete_edac_device_list_del); | 371 | call_rcu(&edac_device->rcu, complete_edac_device_list_del); |
375 | wait_for_completion(&edac_device->removal_complete); | 372 | rcu_barrier(); |
376 | } | 373 | } |
377 | 374 | ||
378 | /* | 375 | /* |
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 335b7ebdb11c..b629c41756f0 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c | |||
@@ -418,16 +418,14 @@ static void complete_mc_list_del(struct rcu_head *head) | |||
418 | 418 | ||
419 | mci = container_of(head, struct mem_ctl_info, rcu); | 419 | mci = container_of(head, struct mem_ctl_info, rcu); |
420 | INIT_LIST_HEAD(&mci->link); | 420 | INIT_LIST_HEAD(&mci->link); |
421 | complete(&mci->complete); | ||
422 | } | 421 | } |
423 | 422 | ||
424 | static void del_mc_from_global_list(struct mem_ctl_info *mci) | 423 | static void del_mc_from_global_list(struct mem_ctl_info *mci) |
425 | { | 424 | { |
426 | atomic_dec(&edac_handlers); | 425 | atomic_dec(&edac_handlers); |
427 | list_del_rcu(&mci->link); | 426 | list_del_rcu(&mci->link); |
428 | init_completion(&mci->complete); | ||
429 | call_rcu(&mci->rcu, complete_mc_list_del); | 427 | call_rcu(&mci->rcu, complete_mc_list_del); |
430 | wait_for_completion(&mci->complete); | 428 | rcu_barrier(); |
431 | } | 429 | } |
432 | 430 | ||
433 | /** | 431 | /** |
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 30b585b1d60b..efb5d5650783 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c | |||
@@ -174,7 +174,6 @@ static void complete_edac_pci_list_del(struct rcu_head *head) | |||
174 | 174 | ||
175 | pci = container_of(head, struct edac_pci_ctl_info, rcu); | 175 | pci = container_of(head, struct edac_pci_ctl_info, rcu); |
176 | INIT_LIST_HEAD(&pci->link); | 176 | INIT_LIST_HEAD(&pci->link); |
177 | complete(&pci->complete); | ||
178 | } | 177 | } |
179 | 178 | ||
180 | /* | 179 | /* |
@@ -185,9 +184,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head) | |||
185 | static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) | 184 | static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) |
186 | { | 185 | { |
187 | list_del_rcu(&pci->link); | 186 | list_del_rcu(&pci->link); |
188 | init_completion(&pci->complete); | ||
189 | call_rcu(&pci->rcu, complete_edac_pci_list_del); | 187 | call_rcu(&pci->rcu, complete_edac_pci_list_del); |
190 | wait_for_completion(&pci->complete); | 188 | rcu_barrier(); |
191 | } | 189 | } |
192 | 190 | ||
193 | #if 0 | 191 | #if 0 |
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c new file mode 100644 index 000000000000..fde4db91c4d2 --- /dev/null +++ b/drivers/edac/i3200_edac.c | |||
@@ -0,0 +1,527 @@ | |||
1 | /* | ||
2 | * Intel 3200/3210 Memory Controller kernel module | ||
3 | * Copyright (C) 2008-2009 Akamai Technologies, Inc. | ||
4 | * Portions by Hitoshi Mitake <h.mitake@gmail.com>. | ||
5 | * | ||
6 | * This file may be distributed under the terms of the | ||
7 | * GNU General Public License. | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/pci.h> | ||
13 | #include <linux/pci_ids.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/edac.h> | ||
16 | #include <linux/io.h> | ||
17 | #include "edac_core.h" | ||
18 | |||
19 | #define I3200_REVISION "1.1" | ||
20 | |||
21 | #define EDAC_MOD_STR "i3200_edac" | ||
22 | |||
23 | #define PCI_DEVICE_ID_INTEL_3200_HB 0x29f0 | ||
24 | |||
25 | #define I3200_RANKS 8 | ||
26 | #define I3200_RANKS_PER_CHANNEL 4 | ||
27 | #define I3200_CHANNELS 2 | ||
28 | |||
29 | /* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */ | ||
30 | |||
31 | #define I3200_MCHBAR_LOW 0x48 /* MCH Memory Mapped Register BAR */ | ||
32 | #define I3200_MCHBAR_HIGH 0x4c | ||
33 | #define I3200_MCHBAR_MASK 0xfffffc000ULL /* bits 35:14 */ | ||
34 | #define I3200_MMR_WINDOW_SIZE 16384 | ||
35 | |||
36 | #define I3200_TOM 0xa0 /* Top of Memory (16b) | ||
37 | * | ||
38 | * 15:10 reserved | ||
39 | * 9:0 total populated physical memory | ||
40 | */ | ||
41 | #define I3200_TOM_MASK 0x3ff /* bits 9:0 */ | ||
42 | #define I3200_TOM_SHIFT 26 /* 64MiB grain */ | ||
43 | |||
44 | #define I3200_ERRSTS 0xc8 /* Error Status Register (16b) | ||
45 | * | ||
46 | * 15 reserved | ||
47 | * 14 Isochronous TBWRR Run Behind FIFO Full | ||
48 | * (ITCV) | ||
49 | * 13 Isochronous TBWRR Run Behind FIFO Put | ||
50 | * (ITSTV) | ||
51 | * 12 reserved | ||
52 | * 11 MCH Thermal Sensor Event | ||
53 | * for SMI/SCI/SERR (GTSE) | ||
54 | * 10 reserved | ||
55 | * 9 LOCK to non-DRAM Memory Flag (LCKF) | ||
56 | * 8 reserved | ||
57 | * 7 DRAM Throttle Flag (DTF) | ||
58 | * 6:2 reserved | ||
59 | * 1 Multi-bit DRAM ECC Error Flag (DMERR) | ||
60 | * 0 Single-bit DRAM ECC Error Flag (DSERR) | ||
61 | */ | ||
62 | #define I3200_ERRSTS_UE 0x0002 | ||
63 | #define I3200_ERRSTS_CE 0x0001 | ||
64 | #define I3200_ERRSTS_BITS (I3200_ERRSTS_UE | I3200_ERRSTS_CE) | ||
65 | |||
66 | |||
67 | /* Intel MMIO register space - device 0 function 0 - MMR space */ | ||
68 | |||
69 | #define I3200_C0DRB 0x200 /* Channel 0 DRAM Rank Boundary (16b x 4) | ||
70 | * | ||
71 | * 15:10 reserved | ||
72 | * 9:0 Channel 0 DRAM Rank Boundary Address | ||
73 | */ | ||
74 | #define I3200_C1DRB 0x600 /* Channel 1 DRAM Rank Boundary (16b x 4) */ | ||
75 | #define I3200_DRB_MASK 0x3ff /* bits 9:0 */ | ||
76 | #define I3200_DRB_SHIFT 26 /* 64MiB grain */ | ||
77 | |||
78 | #define I3200_C0ECCERRLOG 0x280 /* Channel 0 ECC Error Log (64b) | ||
79 | * | ||
80 | * 63:48 Error Column Address (ERRCOL) | ||
81 | * 47:32 Error Row Address (ERRROW) | ||
82 | * 31:29 Error Bank Address (ERRBANK) | ||
83 | * 28:27 Error Rank Address (ERRRANK) | ||
84 | * 26:24 reserved | ||
85 | * 23:16 Error Syndrome (ERRSYND) | ||
86 | * 15: 2 reserved | ||
87 | * 1 Multiple Bit Error Status (MERRSTS) | ||
88 | * 0 Correctable Error Status (CERRSTS) | ||
89 | */ | ||
90 | #define I3200_C1ECCERRLOG 0x680 /* Chan 1 ECC Error Log (64b) */ | ||
91 | #define I3200_ECCERRLOG_CE 0x1 | ||
92 | #define I3200_ECCERRLOG_UE 0x2 | ||
93 | #define I3200_ECCERRLOG_RANK_BITS 0x18000000 | ||
94 | #define I3200_ECCERRLOG_RANK_SHIFT 27 | ||
95 | #define I3200_ECCERRLOG_SYNDROME_BITS 0xff0000 | ||
96 | #define I3200_ECCERRLOG_SYNDROME_SHIFT 16 | ||
97 | #define I3200_CAPID0 0xe0 /* P.95 of spec for details */ | ||
98 | |||
99 | struct i3200_priv { | ||
100 | void __iomem *window; | ||
101 | }; | ||
102 | |||
103 | static int nr_channels; | ||
104 | |||
105 | static int how_many_channels(struct pci_dev *pdev) | ||
106 | { | ||
107 | unsigned char capid0_8b; /* 8th byte of CAPID0 */ | ||
108 | |||
109 | pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); | ||
110 | if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ | ||
111 | debugf0("In single channel mode.\n"); | ||
112 | return 1; | ||
113 | } else { | ||
114 | debugf0("In dual channel mode.\n"); | ||
115 | return 2; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static unsigned long eccerrlog_syndrome(u64 log) | ||
120 | { | ||
121 | return (log & I3200_ECCERRLOG_SYNDROME_BITS) >> | ||
122 | I3200_ECCERRLOG_SYNDROME_SHIFT; | ||
123 | } | ||
124 | |||
125 | static int eccerrlog_row(int channel, u64 log) | ||
126 | { | ||
127 | u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >> | ||
128 | I3200_ECCERRLOG_RANK_SHIFT); | ||
129 | return rank | (channel * I3200_RANKS_PER_CHANNEL); | ||
130 | } | ||
131 | |||
132 | enum i3200_chips { | ||
133 | I3200 = 0, | ||
134 | }; | ||
135 | |||
136 | struct i3200_dev_info { | ||
137 | const char *ctl_name; | ||
138 | }; | ||
139 | |||
140 | struct i3200_error_info { | ||
141 | u16 errsts; | ||
142 | u16 errsts2; | ||
143 | u64 eccerrlog[I3200_CHANNELS]; | ||
144 | }; | ||
145 | |||
146 | static const struct i3200_dev_info i3200_devs[] = { | ||
147 | [I3200] = { | ||
148 | .ctl_name = "i3200" | ||
149 | }, | ||
150 | }; | ||
151 | |||
152 | static struct pci_dev *mci_pdev; | ||
153 | static int i3200_registered = 1; | ||
154 | |||
155 | |||
156 | static void i3200_clear_error_info(struct mem_ctl_info *mci) | ||
157 | { | ||
158 | struct pci_dev *pdev; | ||
159 | |||
160 | pdev = to_pci_dev(mci->dev); | ||
161 | |||
162 | /* | ||
163 | * Clear any error bits. | ||
164 | * (Yes, we really clear bits by writing 1 to them.) | ||
165 | */ | ||
166 | pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS, | ||
167 | I3200_ERRSTS_BITS); | ||
168 | } | ||
169 | |||
170 | static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci, | ||
171 | struct i3200_error_info *info) | ||
172 | { | ||
173 | struct pci_dev *pdev; | ||
174 | struct i3200_priv *priv = mci->pvt_info; | ||
175 | void __iomem *window = priv->window; | ||
176 | |||
177 | pdev = to_pci_dev(mci->dev); | ||
178 | |||
179 | /* | ||
180 | * This is a mess because there is no atomic way to read all the | ||
181 | * registers at once and the registers can transition from CE being | ||
182 | * overwritten by UE. | ||
183 | */ | ||
184 | pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts); | ||
185 | if (!(info->errsts & I3200_ERRSTS_BITS)) | ||
186 | return; | ||
187 | |||
188 | info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); | ||
189 | if (nr_channels == 2) | ||
190 | info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); | ||
191 | |||
192 | pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2); | ||
193 | |||
194 | /* | ||
195 | * If the error is the same for both reads then the first set | ||
196 | * of reads is valid. If there is a change then there is a CE | ||
197 | * with no info and the second set of reads is valid and | ||
198 | * should be UE info. | ||
199 | */ | ||
200 | if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { | ||
201 | info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG); | ||
202 | if (nr_channels == 2) | ||
203 | info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG); | ||
204 | } | ||
205 | |||
206 | i3200_clear_error_info(mci); | ||
207 | } | ||
208 | |||
209 | static void i3200_process_error_info(struct mem_ctl_info *mci, | ||
210 | struct i3200_error_info *info) | ||
211 | { | ||
212 | int channel; | ||
213 | u64 log; | ||
214 | |||
215 | if (!(info->errsts & I3200_ERRSTS_BITS)) | ||
216 | return; | ||
217 | |||
218 | if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) { | ||
219 | edac_mc_handle_ce_no_info(mci, "UE overwrote CE"); | ||
220 | info->errsts = info->errsts2; | ||
221 | } | ||
222 | |||
223 | for (channel = 0; channel < nr_channels; channel++) { | ||
224 | log = info->eccerrlog[channel]; | ||
225 | if (log & I3200_ECCERRLOG_UE) { | ||
226 | edac_mc_handle_ue(mci, 0, 0, | ||
227 | eccerrlog_row(channel, log), | ||
228 | "i3200 UE"); | ||
229 | } else if (log & I3200_ECCERRLOG_CE) { | ||
230 | edac_mc_handle_ce(mci, 0, 0, | ||
231 | eccerrlog_syndrome(log), | ||
232 | eccerrlog_row(channel, log), 0, | ||
233 | "i3200 CE"); | ||
234 | } | ||
235 | } | ||
236 | } | ||
237 | |||
238 | static void i3200_check(struct mem_ctl_info *mci) | ||
239 | { | ||
240 | struct i3200_error_info info; | ||
241 | |||
242 | debugf1("MC%d: %s()\n", mci->mc_idx, __func__); | ||
243 | i3200_get_and_clear_error_info(mci, &info); | ||
244 | i3200_process_error_info(mci, &info); | ||
245 | } | ||
246 | |||
247 | |||
248 | void __iomem *i3200_map_mchbar(struct pci_dev *pdev) | ||
249 | { | ||
250 | union { | ||
251 | u64 mchbar; | ||
252 | struct { | ||
253 | u32 mchbar_low; | ||
254 | u32 mchbar_high; | ||
255 | }; | ||
256 | } u; | ||
257 | void __iomem *window; | ||
258 | |||
259 | pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low); | ||
260 | pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high); | ||
261 | u.mchbar &= I3200_MCHBAR_MASK; | ||
262 | |||
263 | if (u.mchbar != (resource_size_t)u.mchbar) { | ||
264 | printk(KERN_ERR | ||
265 | "i3200: mmio space beyond accessible range (0x%llx)\n", | ||
266 | (unsigned long long)u.mchbar); | ||
267 | return NULL; | ||
268 | } | ||
269 | |||
270 | window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE); | ||
271 | if (!window) | ||
272 | printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n", | ||
273 | (unsigned long long)u.mchbar); | ||
274 | |||
275 | return window; | ||
276 | } | ||
277 | |||
278 | |||
279 | static void i3200_get_drbs(void __iomem *window, | ||
280 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) | ||
281 | { | ||
282 | int i; | ||
283 | |||
284 | for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { | ||
285 | drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; | ||
286 | drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; | ||
287 | } | ||
288 | } | ||
289 | |||
290 | static bool i3200_is_stacked(struct pci_dev *pdev, | ||
291 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]) | ||
292 | { | ||
293 | u16 tom; | ||
294 | |||
295 | pci_read_config_word(pdev, I3200_TOM, &tom); | ||
296 | tom &= I3200_TOM_MASK; | ||
297 | |||
298 | return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom; | ||
299 | } | ||
300 | |||
301 | static unsigned long drb_to_nr_pages( | ||
302 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked, | ||
303 | int channel, int rank) | ||
304 | { | ||
305 | int n; | ||
306 | |||
307 | n = drbs[channel][rank]; | ||
308 | if (rank > 0) | ||
309 | n -= drbs[channel][rank - 1]; | ||
310 | if (stacked && (channel == 1) && | ||
311 | drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1]) | ||
312 | n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1]; | ||
313 | |||
314 | n <<= (I3200_DRB_SHIFT - PAGE_SHIFT); | ||
315 | return n; | ||
316 | } | ||
317 | |||
318 | static int i3200_probe1(struct pci_dev *pdev, int dev_idx) | ||
319 | { | ||
320 | int rc; | ||
321 | int i; | ||
322 | struct mem_ctl_info *mci = NULL; | ||
323 | unsigned long last_page; | ||
324 | u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL]; | ||
325 | bool stacked; | ||
326 | void __iomem *window; | ||
327 | struct i3200_priv *priv; | ||
328 | |||
329 | debugf0("MC: %s()\n", __func__); | ||
330 | |||
331 | window = i3200_map_mchbar(pdev); | ||
332 | if (!window) | ||
333 | return -ENODEV; | ||
334 | |||
335 | i3200_get_drbs(window, drbs); | ||
336 | nr_channels = how_many_channels(pdev); | ||
337 | |||
338 | mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS, | ||
339 | nr_channels, 0); | ||
340 | if (!mci) | ||
341 | return -ENOMEM; | ||
342 | |||
343 | debugf3("MC: %s(): init mci\n", __func__); | ||
344 | |||
345 | mci->dev = &pdev->dev; | ||
346 | mci->mtype_cap = MEM_FLAG_DDR2; | ||
347 | |||
348 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; | ||
349 | mci->edac_cap = EDAC_FLAG_SECDED; | ||
350 | |||
351 | mci->mod_name = EDAC_MOD_STR; | ||
352 | mci->mod_ver = I3200_REVISION; | ||
353 | mci->ctl_name = i3200_devs[dev_idx].ctl_name; | ||
354 | mci->dev_name = pci_name(pdev); | ||
355 | mci->edac_check = i3200_check; | ||
356 | mci->ctl_page_to_phys = NULL; | ||
357 | priv = mci->pvt_info; | ||
358 | priv->window = window; | ||
359 | |||
360 | stacked = i3200_is_stacked(pdev, drbs); | ||
361 | |||
362 | /* | ||
363 | * The dram rank boundary (DRB) reg values are boundary addresses | ||
364 | * for each DRAM rank with a granularity of 64MB. DRB regs are | ||
365 | * cumulative; the last one will contain the total memory | ||
366 | * contained in all ranks. | ||
367 | */ | ||
368 | last_page = -1UL; | ||
369 | for (i = 0; i < mci->nr_csrows; i++) { | ||
370 | unsigned long nr_pages; | ||
371 | struct csrow_info *csrow = &mci->csrows[i]; | ||
372 | |||
373 | nr_pages = drb_to_nr_pages(drbs, stacked, | ||
374 | i / I3200_RANKS_PER_CHANNEL, | ||
375 | i % I3200_RANKS_PER_CHANNEL); | ||
376 | |||
377 | if (nr_pages == 0) { | ||
378 | csrow->mtype = MEM_EMPTY; | ||
379 | continue; | ||
380 | } | ||
381 | |||
382 | csrow->first_page = last_page + 1; | ||
383 | last_page += nr_pages; | ||
384 | csrow->last_page = last_page; | ||
385 | csrow->nr_pages = nr_pages; | ||
386 | |||
387 | csrow->grain = nr_pages << PAGE_SHIFT; | ||
388 | csrow->mtype = MEM_DDR2; | ||
389 | csrow->dtype = DEV_UNKNOWN; | ||
390 | csrow->edac_mode = EDAC_UNKNOWN; | ||
391 | } | ||
392 | |||
393 | i3200_clear_error_info(mci); | ||
394 | |||
395 | rc = -ENODEV; | ||
396 | if (edac_mc_add_mc(mci)) { | ||
397 | debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__); | ||
398 | goto fail; | ||
399 | } | ||
400 | |||
401 | /* get this far and it's successful */ | ||
402 | debugf3("MC: %s(): success\n", __func__); | ||
403 | return 0; | ||
404 | |||
405 | fail: | ||
406 | iounmap(window); | ||
407 | if (mci) | ||
408 | edac_mc_free(mci); | ||
409 | |||
410 | return rc; | ||
411 | } | ||
412 | |||
413 | static int __devinit i3200_init_one(struct pci_dev *pdev, | ||
414 | const struct pci_device_id *ent) | ||
415 | { | ||
416 | int rc; | ||
417 | |||
418 | debugf0("MC: %s()\n", __func__); | ||
419 | |||
420 | if (pci_enable_device(pdev) < 0) | ||
421 | return -EIO; | ||
422 | |||
423 | rc = i3200_probe1(pdev, ent->driver_data); | ||
424 | if (!mci_pdev) | ||
425 | mci_pdev = pci_dev_get(pdev); | ||
426 | |||
427 | return rc; | ||
428 | } | ||
429 | |||
430 | static void __devexit i3200_remove_one(struct pci_dev *pdev) | ||
431 | { | ||
432 | struct mem_ctl_info *mci; | ||
433 | struct i3200_priv *priv; | ||
434 | |||
435 | debugf0("%s()\n", __func__); | ||
436 | |||
437 | mci = edac_mc_del_mc(&pdev->dev); | ||
438 | if (!mci) | ||
439 | return; | ||
440 | |||
441 | priv = mci->pvt_info; | ||
442 | iounmap(priv->window); | ||
443 | |||
444 | edac_mc_free(mci); | ||
445 | } | ||
446 | |||
447 | static const struct pci_device_id i3200_pci_tbl[] __devinitdata = { | ||
448 | { | ||
449 | PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, | ||
450 | I3200}, | ||
451 | { | ||
452 | 0, | ||
453 | } /* 0 terminated list. */ | ||
454 | }; | ||
455 | |||
456 | MODULE_DEVICE_TABLE(pci, i3200_pci_tbl); | ||
457 | |||
458 | static struct pci_driver i3200_driver = { | ||
459 | .name = EDAC_MOD_STR, | ||
460 | .probe = i3200_init_one, | ||
461 | .remove = __devexit_p(i3200_remove_one), | ||
462 | .id_table = i3200_pci_tbl, | ||
463 | }; | ||
464 | |||
465 | static int __init i3200_init(void) | ||
466 | { | ||
467 | int pci_rc; | ||
468 | |||
469 | debugf3("MC: %s()\n", __func__); | ||
470 | |||
471 | /* Ensure that the OPSTATE is set correctly for POLL or NMI */ | ||
472 | opstate_init(); | ||
473 | |||
474 | pci_rc = pci_register_driver(&i3200_driver); | ||
475 | if (pci_rc < 0) | ||
476 | goto fail0; | ||
477 | |||
478 | if (!mci_pdev) { | ||
479 | i3200_registered = 0; | ||
480 | mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL, | ||
481 | PCI_DEVICE_ID_INTEL_3200_HB, NULL); | ||
482 | if (!mci_pdev) { | ||
483 | debugf0("i3200 pci_get_device fail\n"); | ||
484 | pci_rc = -ENODEV; | ||
485 | goto fail1; | ||
486 | } | ||
487 | |||
488 | pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl); | ||
489 | if (pci_rc < 0) { | ||
490 | debugf0("i3200 init fail\n"); | ||
491 | pci_rc = -ENODEV; | ||
492 | goto fail1; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | return 0; | ||
497 | |||
498 | fail1: | ||
499 | pci_unregister_driver(&i3200_driver); | ||
500 | |||
501 | fail0: | ||
502 | if (mci_pdev) | ||
503 | pci_dev_put(mci_pdev); | ||
504 | |||
505 | return pci_rc; | ||
506 | } | ||
507 | |||
508 | static void __exit i3200_exit(void) | ||
509 | { | ||
510 | debugf3("MC: %s()\n", __func__); | ||
511 | |||
512 | pci_unregister_driver(&i3200_driver); | ||
513 | if (!i3200_registered) { | ||
514 | i3200_remove_one(mci_pdev); | ||
515 | pci_dev_put(mci_pdev); | ||
516 | } | ||
517 | } | ||
518 | |||
519 | module_init(i3200_init); | ||
520 | module_exit(i3200_exit); | ||
521 | |||
522 | MODULE_LICENSE("GPL"); | ||
523 | MODULE_AUTHOR("Akamai Technologies, Inc."); | ||
524 | MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers"); | ||
525 | |||
526 | module_param(edac_op_state, int, 0444); | ||
527 | MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); | ||
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index 3f2ccfc6407c..157f6504f25e 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c | |||
@@ -41,7 +41,9 @@ static u32 orig_pci_err_en; | |||
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | static u32 orig_l2_err_disable; | 43 | static u32 orig_l2_err_disable; |
44 | #ifdef CONFIG_MPC85xx | ||
44 | static u32 orig_hid1[2]; | 45 | static u32 orig_hid1[2]; |
46 | #endif | ||
45 | 47 | ||
46 | /************************ MC SYSFS parts ***********************************/ | 48 | /************************ MC SYSFS parts ***********************************/ |
47 | 49 | ||
@@ -646,6 +648,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = { | |||
646 | { .compatible = "fsl,mpc8560-l2-cache-controller", }, | 648 | { .compatible = "fsl,mpc8560-l2-cache-controller", }, |
647 | { .compatible = "fsl,mpc8568-l2-cache-controller", }, | 649 | { .compatible = "fsl,mpc8568-l2-cache-controller", }, |
648 | { .compatible = "fsl,mpc8572-l2-cache-controller", }, | 650 | { .compatible = "fsl,mpc8572-l2-cache-controller", }, |
651 | { .compatible = "fsl,p2020-l2-cache-controller", }, | ||
649 | {}, | 652 | {}, |
650 | }; | 653 | }; |
651 | 654 | ||
@@ -788,19 +791,20 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci) | |||
788 | csrow = &mci->csrows[index]; | 791 | csrow = &mci->csrows[index]; |
789 | cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + | 792 | cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + |
790 | (index * MPC85XX_MC_CS_BNDS_OFS)); | 793 | (index * MPC85XX_MC_CS_BNDS_OFS)); |
791 | start = (cs_bnds & 0xfff0000) << 4; | 794 | |
792 | end = ((cs_bnds & 0xfff) << 20); | 795 | start = (cs_bnds & 0xffff0000) >> 16; |
793 | if (start) | 796 | end = (cs_bnds & 0x0000ffff); |
794 | start |= 0xfffff; | ||
795 | if (end) | ||
796 | end |= 0xfffff; | ||
797 | 797 | ||
798 | if (start == end) | 798 | if (start == end) |
799 | continue; /* not populated */ | 799 | continue; /* not populated */ |
800 | 800 | ||
801 | start <<= (24 - PAGE_SHIFT); | ||
802 | end <<= (24 - PAGE_SHIFT); | ||
803 | end |= (1 << (24 - PAGE_SHIFT)) - 1; | ||
804 | |||
801 | csrow->first_page = start >> PAGE_SHIFT; | 805 | csrow->first_page = start >> PAGE_SHIFT; |
802 | csrow->last_page = end >> PAGE_SHIFT; | 806 | csrow->last_page = end >> PAGE_SHIFT; |
803 | csrow->nr_pages = csrow->last_page + 1 - csrow->first_page; | 807 | csrow->nr_pages = end + 1 - start; |
804 | csrow->grain = 8; | 808 | csrow->grain = 8; |
805 | csrow->mtype = mtype; | 809 | csrow->mtype = mtype; |
806 | csrow->dtype = DEV_UNKNOWN; | 810 | csrow->dtype = DEV_UNKNOWN; |
@@ -984,6 +988,8 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = { | |||
984 | { .compatible = "fsl,mpc8560-memory-controller", }, | 988 | { .compatible = "fsl,mpc8560-memory-controller", }, |
985 | { .compatible = "fsl,mpc8568-memory-controller", }, | 989 | { .compatible = "fsl,mpc8568-memory-controller", }, |
986 | { .compatible = "fsl,mpc8572-memory-controller", }, | 990 | { .compatible = "fsl,mpc8572-memory-controller", }, |
991 | { .compatible = "fsl,mpc8349-memory-controller", }, | ||
992 | { .compatible = "fsl,p2020-memory-controller", }, | ||
987 | {}, | 993 | {}, |
988 | }; | 994 | }; |
989 | 995 | ||
@@ -999,13 +1005,13 @@ static struct of_platform_driver mpc85xx_mc_err_driver = { | |||
999 | }, | 1005 | }, |
1000 | }; | 1006 | }; |
1001 | 1007 | ||
1002 | 1008 | #ifdef CONFIG_MPC85xx | |
1003 | static void __init mpc85xx_mc_clear_rfxe(void *data) | 1009 | static void __init mpc85xx_mc_clear_rfxe(void *data) |
1004 | { | 1010 | { |
1005 | orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1); | 1011 | orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1); |
1006 | mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000)); | 1012 | mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000)); |
1007 | } | 1013 | } |
1008 | 1014 | #endif | |
1009 | 1015 | ||
1010 | static int __init mpc85xx_mc_init(void) | 1016 | static int __init mpc85xx_mc_init(void) |
1011 | { | 1017 | { |
@@ -1038,26 +1044,32 @@ static int __init mpc85xx_mc_init(void) | |||
1038 | printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); | 1044 | printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); |
1039 | #endif | 1045 | #endif |
1040 | 1046 | ||
1047 | #ifdef CONFIG_MPC85xx | ||
1041 | /* | 1048 | /* |
1042 | * need to clear HID1[RFXE] to disable machine check int | 1049 | * need to clear HID1[RFXE] to disable machine check int |
1043 | * so we can catch it | 1050 | * so we can catch it |
1044 | */ | 1051 | */ |
1045 | if (edac_op_state == EDAC_OPSTATE_INT) | 1052 | if (edac_op_state == EDAC_OPSTATE_INT) |
1046 | on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0); | 1053 | on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0); |
1054 | #endif | ||
1047 | 1055 | ||
1048 | return 0; | 1056 | return 0; |
1049 | } | 1057 | } |
1050 | 1058 | ||
1051 | module_init(mpc85xx_mc_init); | 1059 | module_init(mpc85xx_mc_init); |
1052 | 1060 | ||
1061 | #ifdef CONFIG_MPC85xx | ||
1053 | static void __exit mpc85xx_mc_restore_hid1(void *data) | 1062 | static void __exit mpc85xx_mc_restore_hid1(void *data) |
1054 | { | 1063 | { |
1055 | mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]); | 1064 | mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]); |
1056 | } | 1065 | } |
1066 | #endif | ||
1057 | 1067 | ||
1058 | static void __exit mpc85xx_mc_exit(void) | 1068 | static void __exit mpc85xx_mc_exit(void) |
1059 | { | 1069 | { |
1070 | #ifdef CONFIG_MPC85xx | ||
1060 | on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); | 1071 | on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); |
1072 | #endif | ||
1061 | #ifdef CONFIG_PCI | 1073 | #ifdef CONFIG_PCI |
1062 | of_unregister_platform_driver(&mpc85xx_pci_err_driver); | 1074 | of_unregister_platform_driver(&mpc85xx_pci_err_driver); |
1063 | #endif | 1075 | #endif |
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 5131aaae8e03..a6b9fec13a74 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c | |||
@@ -90,7 +90,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev) | |||
90 | return -ENOENT; | 90 | return -ENOENT; |
91 | } | 91 | } |
92 | 92 | ||
93 | pci_serr = ioremap(r->start, r->end - r->start + 1); | 93 | pci_serr = ioremap(r->start, resource_size(r)); |
94 | if (!pci_serr) | 94 | if (!pci_serr) |
95 | return -ENOMEM; | 95 | return -ENOMEM; |
96 | 96 | ||
@@ -140,7 +140,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) | |||
140 | 140 | ||
141 | if (!devm_request_mem_region(&pdev->dev, | 141 | if (!devm_request_mem_region(&pdev->dev, |
142 | r->start, | 142 | r->start, |
143 | r->end - r->start + 1, | 143 | resource_size(r), |
144 | pdata->name)) { | 144 | pdata->name)) { |
145 | printk(KERN_ERR "%s: Error while requesting mem region\n", | 145 | printk(KERN_ERR "%s: Error while requesting mem region\n", |
146 | __func__); | 146 | __func__); |
@@ -150,7 +150,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev) | |||
150 | 150 | ||
151 | pdata->pci_vbase = devm_ioremap(&pdev->dev, | 151 | pdata->pci_vbase = devm_ioremap(&pdev->dev, |
152 | r->start, | 152 | r->start, |
153 | r->end - r->start + 1); | 153 | resource_size(r)); |
154 | if (!pdata->pci_vbase) { | 154 | if (!pdata->pci_vbase) { |
155 | printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); | 155 | printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); |
156 | res = -ENOMEM; | 156 | res = -ENOMEM; |
@@ -306,7 +306,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) | |||
306 | 306 | ||
307 | if (!devm_request_mem_region(&pdev->dev, | 307 | if (!devm_request_mem_region(&pdev->dev, |
308 | r->start, | 308 | r->start, |
309 | r->end - r->start + 1, | 309 | resource_size(r), |
310 | pdata->name)) { | 310 | pdata->name)) { |
311 | printk(KERN_ERR "%s: Error while request mem region\n", | 311 | printk(KERN_ERR "%s: Error while request mem region\n", |
312 | __func__); | 312 | __func__); |
@@ -316,7 +316,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev) | |||
316 | 316 | ||
317 | pdata->sram_vbase = devm_ioremap(&pdev->dev, | 317 | pdata->sram_vbase = devm_ioremap(&pdev->dev, |
318 | r->start, | 318 | r->start, |
319 | r->end - r->start + 1); | 319 | resource_size(r)); |
320 | if (!pdata->sram_vbase) { | 320 | if (!pdata->sram_vbase) { |
321 | printk(KERN_ERR "%s: Unable to setup SRAM err regs\n", | 321 | printk(KERN_ERR "%s: Unable to setup SRAM err regs\n", |
322 | __func__); | 322 | __func__); |
@@ -474,7 +474,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) | |||
474 | 474 | ||
475 | if (!devm_request_mem_region(&pdev->dev, | 475 | if (!devm_request_mem_region(&pdev->dev, |
476 | r->start, | 476 | r->start, |
477 | r->end - r->start + 1, | 477 | resource_size(r), |
478 | pdata->name)) { | 478 | pdata->name)) { |
479 | printk(KERN_ERR "%s: Error while requesting mem region\n", | 479 | printk(KERN_ERR "%s: Error while requesting mem region\n", |
480 | __func__); | 480 | __func__); |
@@ -484,7 +484,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) | |||
484 | 484 | ||
485 | pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev, | 485 | pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev, |
486 | r->start, | 486 | r->start, |
487 | r->end - r->start + 1); | 487 | resource_size(r)); |
488 | if (!pdata->cpu_vbase[0]) { | 488 | if (!pdata->cpu_vbase[0]) { |
489 | printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); | 489 | printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); |
490 | res = -ENOMEM; | 490 | res = -ENOMEM; |
@@ -501,7 +501,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) | |||
501 | 501 | ||
502 | if (!devm_request_mem_region(&pdev->dev, | 502 | if (!devm_request_mem_region(&pdev->dev, |
503 | r->start, | 503 | r->start, |
504 | r->end - r->start + 1, | 504 | resource_size(r), |
505 | pdata->name)) { | 505 | pdata->name)) { |
506 | printk(KERN_ERR "%s: Error while requesting mem region\n", | 506 | printk(KERN_ERR "%s: Error while requesting mem region\n", |
507 | __func__); | 507 | __func__); |
@@ -511,7 +511,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev) | |||
511 | 511 | ||
512 | pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev, | 512 | pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev, |
513 | r->start, | 513 | r->start, |
514 | r->end - r->start + 1); | 514 | resource_size(r)); |
515 | if (!pdata->cpu_vbase[1]) { | 515 | if (!pdata->cpu_vbase[1]) { |
516 | printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); | 516 | printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__); |
517 | res = -ENOMEM; | 517 | res = -ENOMEM; |
@@ -726,7 +726,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) | |||
726 | 726 | ||
727 | if (!devm_request_mem_region(&pdev->dev, | 727 | if (!devm_request_mem_region(&pdev->dev, |
728 | r->start, | 728 | r->start, |
729 | r->end - r->start + 1, | 729 | resource_size(r), |
730 | pdata->name)) { | 730 | pdata->name)) { |
731 | printk(KERN_ERR "%s: Error while requesting mem region\n", | 731 | printk(KERN_ERR "%s: Error while requesting mem region\n", |
732 | __func__); | 732 | __func__); |
@@ -736,7 +736,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev) | |||
736 | 736 | ||
737 | pdata->mc_vbase = devm_ioremap(&pdev->dev, | 737 | pdata->mc_vbase = devm_ioremap(&pdev->dev, |
738 | r->start, | 738 | r->start, |
739 | r->end - r->start + 1); | 739 | resource_size(r)); |
740 | if (!pdata->mc_vbase) { | 740 | if (!pdata->mc_vbase) { |
741 | printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); | 741 | printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); |
742 | res = -ENOMEM; | 742 | res = -ENOMEM; |
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c index 949c97ff57e3..1f20a042a4f5 100644 --- a/drivers/idle/i7300_idle.c +++ b/drivers/idle/i7300_idle.c | |||
@@ -29,8 +29,8 @@ | |||
29 | 29 | ||
30 | #include <asm/idle.h> | 30 | #include <asm/idle.h> |
31 | 31 | ||
32 | #include "../dma/ioatdma_hw.h" | 32 | #include "../dma/ioat/hw.h" |
33 | #include "../dma/ioatdma_registers.h" | 33 | #include "../dma/ioat/registers.h" |
34 | 34 | ||
35 | #define I7300_IDLE_DRIVER_VERSION "1.55" | 35 | #define I7300_IDLE_DRIVER_VERSION "1.55" |
36 | #define I7300_PRINT "i7300_idle:" | 36 | #define I7300_PRINT "i7300_idle:" |
@@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void) | |||
126 | udelay(10); | 126 | udelay(10); |
127 | 127 | ||
128 | sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & | 128 | sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & |
129 | IOAT_CHANSTS_DMA_TRANSFER_STATUS; | 129 | IOAT_CHANSTS_STATUS; |
130 | 130 | ||
131 | if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) | 131 | if (sts != IOAT_CHANSTS_ACTIVE) |
132 | break; | 132 | break; |
133 | 133 | ||
134 | } | 134 | } |
@@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl, | |||
160 | udelay(1000); | 160 | udelay(1000); |
161 | 161 | ||
162 | chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & | 162 | chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & |
163 | IOAT_CHANSTS_DMA_TRANSFER_STATUS; | 163 | IOAT_CHANSTS_STATUS; |
164 | 164 | ||
165 | if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) { | 165 | if (chan_sts != IOAT_CHANSTS_DONE) { |
166 | /* Not complete, reset the channel */ | 166 | /* Not complete, reset the channel */ |
167 | writeb(IOAT_CHANCMD_RESET, | 167 | writeb(IOAT_CHANCMD_RESET, |
168 | ioat_chanbase + IOAT1_CHANCMD_OFFSET); | 168 | ioat_chanbase + IOAT1_CHANCMD_OFFSET); |
@@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void) | |||
288 | ioat_chanbase + IOAT1_CHANCMD_OFFSET); | 288 | ioat_chanbase + IOAT1_CHANCMD_OFFSET); |
289 | 289 | ||
290 | chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & | 290 | chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & |
291 | IOAT_CHANSTS_DMA_TRANSFER_STATUS; | 291 | IOAT_CHANSTS_STATUS; |
292 | 292 | ||
293 | if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { | 293 | if (chan_sts != IOAT_CHANSTS_ACTIVE) { |
294 | writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); | 294 | writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET); |
295 | break; | 295 | break; |
296 | } | 296 | } |
@@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void) | |||
298 | } | 298 | } |
299 | 299 | ||
300 | chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & | 300 | chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) & |
301 | IOAT_CHANSTS_DMA_TRANSFER_STATUS; | 301 | IOAT_CHANSTS_STATUS; |
302 | 302 | ||
303 | /* | 303 | /* |
304 | * We tried to reset multiple times. If IO A/T channel is still active | 304 | * We tried to reset multiple times. If IO A/T channel is still active |
305 | * flag an error and return without cleanup. Memory leak is better | 305 | * flag an error and return without cleanup. Memory leak is better |
306 | * than random corruption in that extreme error situation. | 306 | * than random corruption in that extreme error situation. |
307 | */ | 307 | */ |
308 | if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) { | 308 | if (chan_sts == IOAT_CHANSTS_ACTIVE) { |
309 | printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." | 309 | printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels." |
310 | " Not freeing resources\n"); | 310 | " Not freeing resources\n"); |
311 | return; | 311 | return; |
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 76d6751f89a7..02f4f8f1db6f 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig | |||
@@ -225,6 +225,7 @@ config INPUT_SGI_BTNS | |||
225 | config INPUT_WINBOND_CIR | 225 | config INPUT_WINBOND_CIR |
226 | tristate "Winbond IR remote control" | 226 | tristate "Winbond IR remote control" |
227 | depends on X86 && PNP | 227 | depends on X86 && PNP |
228 | select NEW_LEDS | ||
228 | select LEDS_CLASS | 229 | select LEDS_CLASS |
229 | select BITREVERSE | 230 | select BITREVERSE |
230 | help | 231 | help |
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 020f9573fd82..2158377a1359 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -124,6 +124,8 @@ config MD_RAID456 | |||
124 | select MD_RAID6_PQ | 124 | select MD_RAID6_PQ |
125 | select ASYNC_MEMCPY | 125 | select ASYNC_MEMCPY |
126 | select ASYNC_XOR | 126 | select ASYNC_XOR |
127 | select ASYNC_PQ | ||
128 | select ASYNC_RAID6_RECOV | ||
127 | ---help--- | 129 | ---help--- |
128 | A RAID-5 set of N drives with a capacity of C MB per drive provides | 130 | A RAID-5 set of N drives with a capacity of C MB per drive provides |
129 | the capacity of C * (N - 1) MB, and protects against a failure | 131 | the capacity of C * (N - 1) MB, and protects against a failure |
@@ -152,9 +154,33 @@ config MD_RAID456 | |||
152 | 154 | ||
153 | If unsure, say Y. | 155 | If unsure, say Y. |
154 | 156 | ||
157 | config MULTICORE_RAID456 | ||
158 | bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)" | ||
159 | depends on MD_RAID456 | ||
160 | depends on SMP | ||
161 | depends on EXPERIMENTAL | ||
162 | ---help--- | ||
163 | Enable the raid456 module to dispatch per-stripe raid operations to a | ||
164 | thread pool. | ||
165 | |||
166 | If unsure, say N. | ||
167 | |||
155 | config MD_RAID6_PQ | 168 | config MD_RAID6_PQ |
156 | tristate | 169 | tristate |
157 | 170 | ||
171 | config ASYNC_RAID6_TEST | ||
172 | tristate "Self test for hardware accelerated raid6 recovery" | ||
173 | depends on MD_RAID6_PQ | ||
174 | select ASYNC_RAID6_RECOV | ||
175 | ---help--- | ||
176 | This is a one-shot self test that permutes through the | ||
177 | recovery of all the possible two disk failure scenarios for a | ||
178 | N-disk array. Recovery is performed with the asynchronous | ||
179 | raid6 recovery routines, and will optionally use an offload | ||
180 | engine if one is available. | ||
181 | |||
182 | If unsure, say N. | ||
183 | |||
158 | config MD_MULTIPATH | 184 | config MD_MULTIPATH |
159 | tristate "Multipath I/O support" | 185 | tristate "Multipath I/O support" |
160 | depends on BLK_DEV_MD | 186 | depends on BLK_DEV_MD |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 3319c2fec28e..6986b0059d23 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -108,6 +108,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) | |||
108 | * allocated while we're using it | 108 | * allocated while we're using it |
109 | */ | 109 | */ |
110 | static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) | 110 | static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) |
111 | __releases(bitmap->lock) | ||
112 | __acquires(bitmap->lock) | ||
111 | { | 113 | { |
112 | unsigned char *mappage; | 114 | unsigned char *mappage; |
113 | 115 | ||
@@ -325,7 +327,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) | |||
325 | return 0; | 327 | return 0; |
326 | 328 | ||
327 | bad_alignment: | 329 | bad_alignment: |
328 | rcu_read_unlock(); | ||
329 | return -EINVAL; | 330 | return -EINVAL; |
330 | } | 331 | } |
331 | 332 | ||
@@ -1207,6 +1208,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |||
1207 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, | 1208 | static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, |
1208 | sector_t offset, int *blocks, | 1209 | sector_t offset, int *blocks, |
1209 | int create) | 1210 | int create) |
1211 | __releases(bitmap->lock) | ||
1212 | __acquires(bitmap->lock) | ||
1210 | { | 1213 | { |
1211 | /* If 'create', we might release the lock and reclaim it. | 1214 | /* If 'create', we might release the lock and reclaim it. |
1212 | * The lock must have been taken with interrupts enabled. | 1215 | * The lock must have been taken with interrupts enabled. |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index ea4842905444..1ceceb334d5e 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -108,6 +108,9 @@ static int linear_congested(void *data, int bits) | |||
108 | linear_conf_t *conf; | 108 | linear_conf_t *conf; |
109 | int i, ret = 0; | 109 | int i, ret = 0; |
110 | 110 | ||
111 | if (mddev_congested(mddev, bits)) | ||
112 | return 1; | ||
113 | |||
111 | rcu_read_lock(); | 114 | rcu_read_lock(); |
112 | conf = rcu_dereference(mddev->private); | 115 | conf = rcu_dereference(mddev->private); |
113 | 116 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 6aa497e4baf8..26ba42a79129 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -262,6 +262,12 @@ static void mddev_resume(mddev_t *mddev) | |||
262 | mddev->pers->quiesce(mddev, 0); | 262 | mddev->pers->quiesce(mddev, 0); |
263 | } | 263 | } |
264 | 264 | ||
265 | int mddev_congested(mddev_t *mddev, int bits) | ||
266 | { | ||
267 | return mddev->suspended; | ||
268 | } | ||
269 | EXPORT_SYMBOL(mddev_congested); | ||
270 | |||
265 | 271 | ||
266 | static inline mddev_t *mddev_get(mddev_t *mddev) | 272 | static inline mddev_t *mddev_get(mddev_t *mddev) |
267 | { | 273 | { |
@@ -4218,7 +4224,7 @@ static int do_md_run(mddev_t * mddev) | |||
4218 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 4224 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
4219 | mddev->sync_thread = md_register_thread(md_do_sync, | 4225 | mddev->sync_thread = md_register_thread(md_do_sync, |
4220 | mddev, | 4226 | mddev, |
4221 | "%s_resync"); | 4227 | "resync"); |
4222 | if (!mddev->sync_thread) { | 4228 | if (!mddev->sync_thread) { |
4223 | printk(KERN_ERR "%s: could not start resync" | 4229 | printk(KERN_ERR "%s: could not start resync" |
4224 | " thread...\n", | 4230 | " thread...\n", |
@@ -4575,10 +4581,10 @@ static int get_version(void __user * arg) | |||
4575 | static int get_array_info(mddev_t * mddev, void __user * arg) | 4581 | static int get_array_info(mddev_t * mddev, void __user * arg) |
4576 | { | 4582 | { |
4577 | mdu_array_info_t info; | 4583 | mdu_array_info_t info; |
4578 | int nr,working,active,failed,spare; | 4584 | int nr,working,insync,failed,spare; |
4579 | mdk_rdev_t *rdev; | 4585 | mdk_rdev_t *rdev; |
4580 | 4586 | ||
4581 | nr=working=active=failed=spare=0; | 4587 | nr=working=insync=failed=spare=0; |
4582 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 4588 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
4583 | nr++; | 4589 | nr++; |
4584 | if (test_bit(Faulty, &rdev->flags)) | 4590 | if (test_bit(Faulty, &rdev->flags)) |
@@ -4586,7 +4592,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
4586 | else { | 4592 | else { |
4587 | working++; | 4593 | working++; |
4588 | if (test_bit(In_sync, &rdev->flags)) | 4594 | if (test_bit(In_sync, &rdev->flags)) |
4589 | active++; | 4595 | insync++; |
4590 | else | 4596 | else |
4591 | spare++; | 4597 | spare++; |
4592 | } | 4598 | } |
@@ -4611,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
4611 | info.state = (1<<MD_SB_CLEAN); | 4617 | info.state = (1<<MD_SB_CLEAN); |
4612 | if (mddev->bitmap && mddev->bitmap_offset) | 4618 | if (mddev->bitmap && mddev->bitmap_offset) |
4613 | info.state = (1<<MD_SB_BITMAP_PRESENT); | 4619 | info.state = (1<<MD_SB_BITMAP_PRESENT); |
4614 | info.active_disks = active; | 4620 | info.active_disks = insync; |
4615 | info.working_disks = working; | 4621 | info.working_disks = working; |
4616 | info.failed_disks = failed; | 4622 | info.failed_disks = failed; |
4617 | info.spare_disks = spare; | 4623 | info.spare_disks = spare; |
@@ -4721,7 +4727,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4721 | if (!list_empty(&mddev->disks)) { | 4727 | if (!list_empty(&mddev->disks)) { |
4722 | mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, | 4728 | mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, |
4723 | mdk_rdev_t, same_set); | 4729 | mdk_rdev_t, same_set); |
4724 | int err = super_types[mddev->major_version] | 4730 | err = super_types[mddev->major_version] |
4725 | .load_super(rdev, rdev0, mddev->minor_version); | 4731 | .load_super(rdev, rdev0, mddev->minor_version); |
4726 | if (err < 0) { | 4732 | if (err < 0) { |
4727 | printk(KERN_WARNING | 4733 | printk(KERN_WARNING |
@@ -5631,7 +5637,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
5631 | thread->run = run; | 5637 | thread->run = run; |
5632 | thread->mddev = mddev; | 5638 | thread->mddev = mddev; |
5633 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | 5639 | thread->timeout = MAX_SCHEDULE_TIMEOUT; |
5634 | thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev)); | 5640 | thread->tsk = kthread_run(md_thread, thread, |
5641 | "%s_%s", | ||
5642 | mdname(thread->mddev), | ||
5643 | name ?: mddev->pers->name); | ||
5635 | if (IS_ERR(thread->tsk)) { | 5644 | if (IS_ERR(thread->tsk)) { |
5636 | kfree(thread); | 5645 | kfree(thread); |
5637 | return NULL; | 5646 | return NULL; |
@@ -6745,7 +6754,7 @@ void md_check_recovery(mddev_t *mddev) | |||
6745 | } | 6754 | } |
6746 | mddev->sync_thread = md_register_thread(md_do_sync, | 6755 | mddev->sync_thread = md_register_thread(md_do_sync, |
6747 | mddev, | 6756 | mddev, |
6748 | "%s_resync"); | 6757 | "resync"); |
6749 | if (!mddev->sync_thread) { | 6758 | if (!mddev->sync_thread) { |
6750 | printk(KERN_ERR "%s: could not start resync" | 6759 | printk(KERN_ERR "%s: could not start resync" |
6751 | " thread...\n", | 6760 | " thread...\n", |
diff --git a/drivers/md/md.h b/drivers/md/md.h index f55d2ff95133..f184b69ef337 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -430,6 +430,7 @@ extern void md_write_end(mddev_t *mddev); | |||
430 | extern void md_done_sync(mddev_t *mddev, int blocks, int ok); | 430 | extern void md_done_sync(mddev_t *mddev, int blocks, int ok); |
431 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); | 431 | extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); |
432 | 432 | ||
433 | extern int mddev_congested(mddev_t *mddev, int bits); | ||
433 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, | 434 | extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, |
434 | sector_t sector, int size, struct page *page); | 435 | sector_t sector, int size, struct page *page); |
435 | extern void md_super_wait(mddev_t *mddev); | 436 | extern void md_super_wait(mddev_t *mddev); |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d2d3fd54cc68..ee7646f974a0 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -150,7 +150,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | mp_bh = mempool_alloc(conf->pool, GFP_NOIO); | 152 | mp_bh = mempool_alloc(conf->pool, GFP_NOIO); |
153 | memset(mp_bh, 0, sizeof(*mp_bh)); | ||
154 | 153 | ||
155 | mp_bh->master_bio = bio; | 154 | mp_bh->master_bio = bio; |
156 | mp_bh->mddev = mddev; | 155 | mp_bh->mddev = mddev; |
@@ -199,6 +198,9 @@ static int multipath_congested(void *data, int bits) | |||
199 | multipath_conf_t *conf = mddev->private; | 198 | multipath_conf_t *conf = mddev->private; |
200 | int i, ret = 0; | 199 | int i, ret = 0; |
201 | 200 | ||
201 | if (mddev_congested(mddev, bits)) | ||
202 | return 1; | ||
203 | |||
202 | rcu_read_lock(); | 204 | rcu_read_lock(); |
203 | for (i = 0; i < mddev->raid_disks ; i++) { | 205 | for (i = 0; i < mddev->raid_disks ; i++) { |
204 | mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); | 206 | mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); |
@@ -504,7 +506,7 @@ static int multipath_run (mddev_t *mddev) | |||
504 | } | 506 | } |
505 | 507 | ||
506 | { | 508 | { |
507 | mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath"); | 509 | mddev->thread = md_register_thread(multipathd, mddev, NULL); |
508 | if (!mddev->thread) { | 510 | if (!mddev->thread) { |
509 | printk(KERN_ERR "multipath: couldn't allocate thread" | 511 | printk(KERN_ERR "multipath: couldn't allocate thread" |
510 | " for %s\n", mdname(mddev)); | 512 | " for %s\n", mdname(mddev)); |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f845ed98fec9..d3a4ce06015a 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -44,6 +44,9 @@ static int raid0_congested(void *data, int bits) | |||
44 | mdk_rdev_t **devlist = conf->devlist; | 44 | mdk_rdev_t **devlist = conf->devlist; |
45 | int i, ret = 0; | 45 | int i, ret = 0; |
46 | 46 | ||
47 | if (mddev_congested(mddev, bits)) | ||
48 | return 1; | ||
49 | |||
47 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { | 50 | for (i = 0; i < mddev->raid_disks && !ret ; i++) { |
48 | struct request_queue *q = bdev_get_queue(devlist[i]->bdev); | 51 | struct request_queue *q = bdev_get_queue(devlist[i]->bdev); |
49 | 52 | ||
@@ -86,7 +89,7 @@ static void dump_zones(mddev_t *mddev) | |||
86 | 89 | ||
87 | static int create_strip_zones(mddev_t *mddev) | 90 | static int create_strip_zones(mddev_t *mddev) |
88 | { | 91 | { |
89 | int i, c, j, err; | 92 | int i, c, err; |
90 | sector_t curr_zone_end, sectors; | 93 | sector_t curr_zone_end, sectors; |
91 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; | 94 | mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev; |
92 | struct strip_zone *zone; | 95 | struct strip_zone *zone; |
@@ -198,6 +201,8 @@ static int create_strip_zones(mddev_t *mddev) | |||
198 | /* now do the other zones */ | 201 | /* now do the other zones */ |
199 | for (i = 1; i < conf->nr_strip_zones; i++) | 202 | for (i = 1; i < conf->nr_strip_zones; i++) |
200 | { | 203 | { |
204 | int j; | ||
205 | |||
201 | zone = conf->strip_zone + i; | 206 | zone = conf->strip_zone + i; |
202 | dev = conf->devlist + i * mddev->raid_disks; | 207 | dev = conf->devlist + i * mddev->raid_disks; |
203 | 208 | ||
@@ -207,7 +212,6 @@ static int create_strip_zones(mddev_t *mddev) | |||
207 | c = 0; | 212 | c = 0; |
208 | 213 | ||
209 | for (j=0; j<cnt; j++) { | 214 | for (j=0; j<cnt; j++) { |
210 | char b[BDEVNAME_SIZE]; | ||
211 | rdev = conf->devlist[j]; | 215 | rdev = conf->devlist[j]; |
212 | printk(KERN_INFO "raid0: checking %s ...", | 216 | printk(KERN_INFO "raid0: checking %s ...", |
213 | bdevname(rdev->bdev, b)); | 217 | bdevname(rdev->bdev, b)); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ff7ed3335995..d1b9bd5fd4f6 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits) | |||
576 | conf_t *conf = mddev->private; | 576 | conf_t *conf = mddev->private; |
577 | int i, ret = 0; | 577 | int i, ret = 0; |
578 | 578 | ||
579 | if (mddev_congested(mddev, bits)) | ||
580 | return 1; | ||
581 | |||
579 | rcu_read_lock(); | 582 | rcu_read_lock(); |
580 | for (i = 0; i < mddev->raid_disks; i++) { | 583 | for (i = 0; i < mddev->raid_disks; i++) { |
581 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); | 584 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
@@ -851,7 +854,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
851 | read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; | 854 | read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; |
852 | read_bio->bi_bdev = mirror->rdev->bdev; | 855 | read_bio->bi_bdev = mirror->rdev->bdev; |
853 | read_bio->bi_end_io = raid1_end_read_request; | 856 | read_bio->bi_end_io = raid1_end_read_request; |
854 | read_bio->bi_rw = READ | do_sync; | 857 | read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); |
855 | read_bio->bi_private = r1_bio; | 858 | read_bio->bi_private = r1_bio; |
856 | 859 | ||
857 | generic_make_request(read_bio); | 860 | generic_make_request(read_bio); |
@@ -943,7 +946,8 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
943 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; | 946 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; |
944 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 947 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
945 | mbio->bi_end_io = raid1_end_write_request; | 948 | mbio->bi_end_io = raid1_end_write_request; |
946 | mbio->bi_rw = WRITE | do_barriers | do_sync; | 949 | mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) | |
950 | (do_sync << BIO_RW_SYNCIO); | ||
947 | mbio->bi_private = r1_bio; | 951 | mbio->bi_private = r1_bio; |
948 | 952 | ||
949 | if (behind_pages) { | 953 | if (behind_pages) { |
@@ -1623,7 +1627,8 @@ static void raid1d(mddev_t *mddev) | |||
1623 | conf->mirrors[i].rdev->data_offset; | 1627 | conf->mirrors[i].rdev->data_offset; |
1624 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1628 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; |
1625 | bio->bi_end_io = raid1_end_write_request; | 1629 | bio->bi_end_io = raid1_end_write_request; |
1626 | bio->bi_rw = WRITE | do_sync; | 1630 | bio->bi_rw = WRITE | |
1631 | (do_sync << BIO_RW_SYNCIO); | ||
1627 | bio->bi_private = r1_bio; | 1632 | bio->bi_private = r1_bio; |
1628 | r1_bio->bios[i] = bio; | 1633 | r1_bio->bios[i] = bio; |
1629 | generic_make_request(bio); | 1634 | generic_make_request(bio); |
@@ -1672,7 +1677,7 @@ static void raid1d(mddev_t *mddev) | |||
1672 | bio->bi_sector = r1_bio->sector + rdev->data_offset; | 1677 | bio->bi_sector = r1_bio->sector + rdev->data_offset; |
1673 | bio->bi_bdev = rdev->bdev; | 1678 | bio->bi_bdev = rdev->bdev; |
1674 | bio->bi_end_io = raid1_end_read_request; | 1679 | bio->bi_end_io = raid1_end_read_request; |
1675 | bio->bi_rw = READ | do_sync; | 1680 | bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); |
1676 | bio->bi_private = r1_bio; | 1681 | bio->bi_private = r1_bio; |
1677 | unplug = 1; | 1682 | unplug = 1; |
1678 | generic_make_request(bio); | 1683 | generic_make_request(bio); |
@@ -2047,7 +2052,7 @@ static int run(mddev_t *mddev) | |||
2047 | conf->last_used = j; | 2052 | conf->last_used = j; |
2048 | 2053 | ||
2049 | 2054 | ||
2050 | mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1"); | 2055 | mddev->thread = md_register_thread(raid1d, mddev, NULL); |
2051 | if (!mddev->thread) { | 2056 | if (!mddev->thread) { |
2052 | printk(KERN_ERR | 2057 | printk(KERN_ERR |
2053 | "raid1: couldn't allocate thread for %s\n", | 2058 | "raid1: couldn't allocate thread for %s\n", |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d0a2152e064f..51c4c5c4d87a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -631,6 +631,8 @@ static int raid10_congested(void *data, int bits) | |||
631 | conf_t *conf = mddev->private; | 631 | conf_t *conf = mddev->private; |
632 | int i, ret = 0; | 632 | int i, ret = 0; |
633 | 633 | ||
634 | if (mddev_congested(mddev, bits)) | ||
635 | return 1; | ||
634 | rcu_read_lock(); | 636 | rcu_read_lock(); |
635 | for (i = 0; i < mddev->raid_disks && ret == 0; i++) { | 637 | for (i = 0; i < mddev->raid_disks && ret == 0; i++) { |
636 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); | 638 | mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); |
@@ -882,7 +884,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
882 | mirror->rdev->data_offset; | 884 | mirror->rdev->data_offset; |
883 | read_bio->bi_bdev = mirror->rdev->bdev; | 885 | read_bio->bi_bdev = mirror->rdev->bdev; |
884 | read_bio->bi_end_io = raid10_end_read_request; | 886 | read_bio->bi_end_io = raid10_end_read_request; |
885 | read_bio->bi_rw = READ | do_sync; | 887 | read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); |
886 | read_bio->bi_private = r10_bio; | 888 | read_bio->bi_private = r10_bio; |
887 | 889 | ||
888 | generic_make_request(read_bio); | 890 | generic_make_request(read_bio); |
@@ -950,7 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
950 | conf->mirrors[d].rdev->data_offset; | 952 | conf->mirrors[d].rdev->data_offset; |
951 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; | 953 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; |
952 | mbio->bi_end_io = raid10_end_write_request; | 954 | mbio->bi_end_io = raid10_end_write_request; |
953 | mbio->bi_rw = WRITE | do_sync; | 955 | mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO); |
954 | mbio->bi_private = r10_bio; | 956 | mbio->bi_private = r10_bio; |
955 | 957 | ||
956 | atomic_inc(&r10_bio->remaining); | 958 | atomic_inc(&r10_bio->remaining); |
@@ -1623,7 +1625,7 @@ static void raid10d(mddev_t *mddev) | |||
1623 | bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr | 1625 | bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr |
1624 | + rdev->data_offset; | 1626 | + rdev->data_offset; |
1625 | bio->bi_bdev = rdev->bdev; | 1627 | bio->bi_bdev = rdev->bdev; |
1626 | bio->bi_rw = READ | do_sync; | 1628 | bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); |
1627 | bio->bi_private = r10_bio; | 1629 | bio->bi_private = r10_bio; |
1628 | bio->bi_end_io = raid10_end_read_request; | 1630 | bio->bi_end_io = raid10_end_read_request; |
1629 | unplug = 1; | 1631 | unplug = 1; |
@@ -1773,7 +1775,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1773 | max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); | 1775 | max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); |
1774 | if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 1776 | if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
1775 | /* recovery... the complicated one */ | 1777 | /* recovery... the complicated one */ |
1776 | int i, j, k; | 1778 | int j, k; |
1777 | r10_bio = NULL; | 1779 | r10_bio = NULL; |
1778 | 1780 | ||
1779 | for (i=0 ; i<conf->raid_disks; i++) | 1781 | for (i=0 ; i<conf->raid_disks; i++) |
@@ -2188,7 +2190,7 @@ static int run(mddev_t *mddev) | |||
2188 | } | 2190 | } |
2189 | 2191 | ||
2190 | 2192 | ||
2191 | mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10"); | 2193 | mddev->thread = md_register_thread(raid10d, mddev, NULL); |
2192 | if (!mddev->thread) { | 2194 | if (!mddev->thread) { |
2193 | printk(KERN_ERR | 2195 | printk(KERN_ERR |
2194 | "raid10: couldn't allocate thread for %s\n", | 2196 | "raid10: couldn't allocate thread for %s\n", |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 826eb3467357..94829804ab7f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -47,7 +47,9 @@ | |||
47 | #include <linux/kthread.h> | 47 | #include <linux/kthread.h> |
48 | #include <linux/raid/pq.h> | 48 | #include <linux/raid/pq.h> |
49 | #include <linux/async_tx.h> | 49 | #include <linux/async_tx.h> |
50 | #include <linux/async.h> | ||
50 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
52 | #include <linux/cpu.h> | ||
51 | #include "md.h" | 53 | #include "md.h" |
52 | #include "raid5.h" | 54 | #include "raid5.h" |
53 | #include "bitmap.h" | 55 | #include "bitmap.h" |
@@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
499 | struct page *bio_page; | 501 | struct page *bio_page; |
500 | int i; | 502 | int i; |
501 | int page_offset; | 503 | int page_offset; |
504 | struct async_submit_ctl submit; | ||
505 | enum async_tx_flags flags = 0; | ||
502 | 506 | ||
503 | if (bio->bi_sector >= sector) | 507 | if (bio->bi_sector >= sector) |
504 | page_offset = (signed)(bio->bi_sector - sector) * 512; | 508 | page_offset = (signed)(bio->bi_sector - sector) * 512; |
505 | else | 509 | else |
506 | page_offset = (signed)(sector - bio->bi_sector) * -512; | 510 | page_offset = (signed)(sector - bio->bi_sector) * -512; |
511 | |||
512 | if (frombio) | ||
513 | flags |= ASYNC_TX_FENCE; | ||
514 | init_async_submit(&submit, flags, tx, NULL, NULL, NULL); | ||
515 | |||
507 | bio_for_each_segment(bvl, bio, i) { | 516 | bio_for_each_segment(bvl, bio, i) { |
508 | int len = bio_iovec_idx(bio, i)->bv_len; | 517 | int len = bio_iovec_idx(bio, i)->bv_len; |
509 | int clen; | 518 | int clen; |
@@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, | |||
525 | bio_page = bio_iovec_idx(bio, i)->bv_page; | 534 | bio_page = bio_iovec_idx(bio, i)->bv_page; |
526 | if (frombio) | 535 | if (frombio) |
527 | tx = async_memcpy(page, bio_page, page_offset, | 536 | tx = async_memcpy(page, bio_page, page_offset, |
528 | b_offset, clen, | 537 | b_offset, clen, &submit); |
529 | ASYNC_TX_DEP_ACK, | ||
530 | tx, NULL, NULL); | ||
531 | else | 538 | else |
532 | tx = async_memcpy(bio_page, page, b_offset, | 539 | tx = async_memcpy(bio_page, page, b_offset, |
533 | page_offset, clen, | 540 | page_offset, clen, &submit); |
534 | ASYNC_TX_DEP_ACK, | ||
535 | tx, NULL, NULL); | ||
536 | } | 541 | } |
542 | /* chain the operations */ | ||
543 | submit.depend_tx = tx; | ||
544 | |||
537 | if (clen < len) /* hit end of page */ | 545 | if (clen < len) /* hit end of page */ |
538 | break; | 546 | break; |
539 | page_offset += len; | 547 | page_offset += len; |
@@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
592 | { | 600 | { |
593 | struct dma_async_tx_descriptor *tx = NULL; | 601 | struct dma_async_tx_descriptor *tx = NULL; |
594 | raid5_conf_t *conf = sh->raid_conf; | 602 | raid5_conf_t *conf = sh->raid_conf; |
603 | struct async_submit_ctl submit; | ||
595 | int i; | 604 | int i; |
596 | 605 | ||
597 | pr_debug("%s: stripe %llu\n", __func__, | 606 | pr_debug("%s: stripe %llu\n", __func__, |
@@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
615 | } | 624 | } |
616 | 625 | ||
617 | atomic_inc(&sh->count); | 626 | atomic_inc(&sh->count); |
618 | async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, | 627 | init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL); |
619 | ops_complete_biofill, sh); | 628 | async_trigger_callback(&submit); |
620 | } | 629 | } |
621 | 630 | ||
622 | static void ops_complete_compute5(void *stripe_head_ref) | 631 | static void mark_target_uptodate(struct stripe_head *sh, int target) |
623 | { | 632 | { |
624 | struct stripe_head *sh = stripe_head_ref; | 633 | struct r5dev *tgt; |
625 | int target = sh->ops.target; | ||
626 | struct r5dev *tgt = &sh->dev[target]; | ||
627 | 634 | ||
628 | pr_debug("%s: stripe %llu\n", __func__, | 635 | if (target < 0) |
629 | (unsigned long long)sh->sector); | 636 | return; |
630 | 637 | ||
638 | tgt = &sh->dev[target]; | ||
631 | set_bit(R5_UPTODATE, &tgt->flags); | 639 | set_bit(R5_UPTODATE, &tgt->flags); |
632 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | 640 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); |
633 | clear_bit(R5_Wantcompute, &tgt->flags); | 641 | clear_bit(R5_Wantcompute, &tgt->flags); |
642 | } | ||
643 | |||
644 | static void ops_complete_compute(void *stripe_head_ref) | ||
645 | { | ||
646 | struct stripe_head *sh = stripe_head_ref; | ||
647 | |||
648 | pr_debug("%s: stripe %llu\n", __func__, | ||
649 | (unsigned long long)sh->sector); | ||
650 | |||
651 | /* mark the computed target(s) as uptodate */ | ||
652 | mark_target_uptodate(sh, sh->ops.target); | ||
653 | mark_target_uptodate(sh, sh->ops.target2); | ||
654 | |||
634 | clear_bit(STRIPE_COMPUTE_RUN, &sh->state); | 655 | clear_bit(STRIPE_COMPUTE_RUN, &sh->state); |
635 | if (sh->check_state == check_state_compute_run) | 656 | if (sh->check_state == check_state_compute_run) |
636 | sh->check_state = check_state_compute_result; | 657 | sh->check_state = check_state_compute_result; |
@@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref) | |||
638 | release_stripe(sh); | 659 | release_stripe(sh); |
639 | } | 660 | } |
640 | 661 | ||
641 | static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) | 662 | /* return a pointer to the address conversion region of the scribble buffer */ |
663 | static addr_conv_t *to_addr_conv(struct stripe_head *sh, | ||
664 | struct raid5_percpu *percpu) | ||
665 | { | ||
666 | return percpu->scribble + sizeof(struct page *) * (sh->disks + 2); | ||
667 | } | ||
668 | |||
669 | static struct dma_async_tx_descriptor * | ||
670 | ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) | ||
642 | { | 671 | { |
643 | /* kernel stack size limits the total number of disks */ | ||
644 | int disks = sh->disks; | 672 | int disks = sh->disks; |
645 | struct page *xor_srcs[disks]; | 673 | struct page **xor_srcs = percpu->scribble; |
646 | int target = sh->ops.target; | 674 | int target = sh->ops.target; |
647 | struct r5dev *tgt = &sh->dev[target]; | 675 | struct r5dev *tgt = &sh->dev[target]; |
648 | struct page *xor_dest = tgt->page; | 676 | struct page *xor_dest = tgt->page; |
649 | int count = 0; | 677 | int count = 0; |
650 | struct dma_async_tx_descriptor *tx; | 678 | struct dma_async_tx_descriptor *tx; |
679 | struct async_submit_ctl submit; | ||
651 | int i; | 680 | int i; |
652 | 681 | ||
653 | pr_debug("%s: stripe %llu block: %d\n", | 682 | pr_debug("%s: stripe %llu block: %d\n", |
@@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) | |||
660 | 689 | ||
661 | atomic_inc(&sh->count); | 690 | atomic_inc(&sh->count); |
662 | 691 | ||
692 | init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, | ||
693 | ops_complete_compute, sh, to_addr_conv(sh, percpu)); | ||
663 | if (unlikely(count == 1)) | 694 | if (unlikely(count == 1)) |
664 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, | 695 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); |
665 | 0, NULL, ops_complete_compute5, sh); | ||
666 | else | 696 | else |
667 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 697 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); |
668 | ASYNC_TX_XOR_ZERO_DST, NULL, | ||
669 | ops_complete_compute5, sh); | ||
670 | 698 | ||
671 | return tx; | 699 | return tx; |
672 | } | 700 | } |
673 | 701 | ||
702 | /* set_syndrome_sources - populate source buffers for gen_syndrome | ||
703 | * @srcs - (struct page *) array of size sh->disks | ||
704 | * @sh - stripe_head to parse | ||
705 | * | ||
706 | * Populates srcs in proper layout order for the stripe and returns the | ||
707 | * 'count' of sources to be used in a call to async_gen_syndrome. The P | ||
708 | * destination buffer is recorded in srcs[count] and the Q destination | ||
709 | * is recorded in srcs[count+1]]. | ||
710 | */ | ||
711 | static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) | ||
712 | { | ||
713 | int disks = sh->disks; | ||
714 | int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); | ||
715 | int d0_idx = raid6_d0(sh); | ||
716 | int count; | ||
717 | int i; | ||
718 | |||
719 | for (i = 0; i < disks; i++) | ||
720 | srcs[i] = (void *)raid6_empty_zero_page; | ||
721 | |||
722 | count = 0; | ||
723 | i = d0_idx; | ||
724 | do { | ||
725 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
726 | |||
727 | srcs[slot] = sh->dev[i].page; | ||
728 | i = raid6_next_disk(i, disks); | ||
729 | } while (i != d0_idx); | ||
730 | BUG_ON(count != syndrome_disks); | ||
731 | |||
732 | return count; | ||
733 | } | ||
734 | |||
735 | static struct dma_async_tx_descriptor * | ||
736 | ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) | ||
737 | { | ||
738 | int disks = sh->disks; | ||
739 | struct page **blocks = percpu->scribble; | ||
740 | int target; | ||
741 | int qd_idx = sh->qd_idx; | ||
742 | struct dma_async_tx_descriptor *tx; | ||
743 | struct async_submit_ctl submit; | ||
744 | struct r5dev *tgt; | ||
745 | struct page *dest; | ||
746 | int i; | ||
747 | int count; | ||
748 | |||
749 | if (sh->ops.target < 0) | ||
750 | target = sh->ops.target2; | ||
751 | else if (sh->ops.target2 < 0) | ||
752 | target = sh->ops.target; | ||
753 | else | ||
754 | /* we should only have one valid target */ | ||
755 | BUG(); | ||
756 | BUG_ON(target < 0); | ||
757 | pr_debug("%s: stripe %llu block: %d\n", | ||
758 | __func__, (unsigned long long)sh->sector, target); | ||
759 | |||
760 | tgt = &sh->dev[target]; | ||
761 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | ||
762 | dest = tgt->page; | ||
763 | |||
764 | atomic_inc(&sh->count); | ||
765 | |||
766 | if (target == qd_idx) { | ||
767 | count = set_syndrome_sources(blocks, sh); | ||
768 | blocks[count] = NULL; /* regenerating p is not necessary */ | ||
769 | BUG_ON(blocks[count+1] != dest); /* q should already be set */ | ||
770 | init_async_submit(&submit, ASYNC_TX_FENCE, NULL, | ||
771 | ops_complete_compute, sh, | ||
772 | to_addr_conv(sh, percpu)); | ||
773 | tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); | ||
774 | } else { | ||
775 | /* Compute any data- or p-drive using XOR */ | ||
776 | count = 0; | ||
777 | for (i = disks; i-- ; ) { | ||
778 | if (i == target || i == qd_idx) | ||
779 | continue; | ||
780 | blocks[count++] = sh->dev[i].page; | ||
781 | } | ||
782 | |||
783 | init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, | ||
784 | NULL, ops_complete_compute, sh, | ||
785 | to_addr_conv(sh, percpu)); | ||
786 | tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); | ||
787 | } | ||
788 | |||
789 | return tx; | ||
790 | } | ||
791 | |||
792 | static struct dma_async_tx_descriptor * | ||
793 | ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) | ||
794 | { | ||
795 | int i, count, disks = sh->disks; | ||
796 | int syndrome_disks = sh->ddf_layout ? disks : disks-2; | ||
797 | int d0_idx = raid6_d0(sh); | ||
798 | int faila = -1, failb = -1; | ||
799 | int target = sh->ops.target; | ||
800 | int target2 = sh->ops.target2; | ||
801 | struct r5dev *tgt = &sh->dev[target]; | ||
802 | struct r5dev *tgt2 = &sh->dev[target2]; | ||
803 | struct dma_async_tx_descriptor *tx; | ||
804 | struct page **blocks = percpu->scribble; | ||
805 | struct async_submit_ctl submit; | ||
806 | |||
807 | pr_debug("%s: stripe %llu block1: %d block2: %d\n", | ||
808 | __func__, (unsigned long long)sh->sector, target, target2); | ||
809 | BUG_ON(target < 0 || target2 < 0); | ||
810 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | ||
811 | BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags)); | ||
812 | |||
813 | /* we need to open-code set_syndrome_sources to handle the | ||
814 | * slot number conversion for 'faila' and 'failb' | ||
815 | */ | ||
816 | for (i = 0; i < disks ; i++) | ||
817 | blocks[i] = (void *)raid6_empty_zero_page; | ||
818 | count = 0; | ||
819 | i = d0_idx; | ||
820 | do { | ||
821 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
822 | |||
823 | blocks[slot] = sh->dev[i].page; | ||
824 | |||
825 | if (i == target) | ||
826 | faila = slot; | ||
827 | if (i == target2) | ||
828 | failb = slot; | ||
829 | i = raid6_next_disk(i, disks); | ||
830 | } while (i != d0_idx); | ||
831 | BUG_ON(count != syndrome_disks); | ||
832 | |||
833 | BUG_ON(faila == failb); | ||
834 | if (failb < faila) | ||
835 | swap(faila, failb); | ||
836 | pr_debug("%s: stripe: %llu faila: %d failb: %d\n", | ||
837 | __func__, (unsigned long long)sh->sector, faila, failb); | ||
838 | |||
839 | atomic_inc(&sh->count); | ||
840 | |||
841 | if (failb == syndrome_disks+1) { | ||
842 | /* Q disk is one of the missing disks */ | ||
843 | if (faila == syndrome_disks) { | ||
844 | /* Missing P+Q, just recompute */ | ||
845 | init_async_submit(&submit, ASYNC_TX_FENCE, NULL, | ||
846 | ops_complete_compute, sh, | ||
847 | to_addr_conv(sh, percpu)); | ||
848 | return async_gen_syndrome(blocks, 0, count+2, | ||
849 | STRIPE_SIZE, &submit); | ||
850 | } else { | ||
851 | struct page *dest; | ||
852 | int data_target; | ||
853 | int qd_idx = sh->qd_idx; | ||
854 | |||
855 | /* Missing D+Q: recompute D from P, then recompute Q */ | ||
856 | if (target == qd_idx) | ||
857 | data_target = target2; | ||
858 | else | ||
859 | data_target = target; | ||
860 | |||
861 | count = 0; | ||
862 | for (i = disks; i-- ; ) { | ||
863 | if (i == data_target || i == qd_idx) | ||
864 | continue; | ||
865 | blocks[count++] = sh->dev[i].page; | ||
866 | } | ||
867 | dest = sh->dev[data_target].page; | ||
868 | init_async_submit(&submit, | ||
869 | ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, | ||
870 | NULL, NULL, NULL, | ||
871 | to_addr_conv(sh, percpu)); | ||
872 | tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, | ||
873 | &submit); | ||
874 | |||
875 | count = set_syndrome_sources(blocks, sh); | ||
876 | init_async_submit(&submit, ASYNC_TX_FENCE, tx, | ||
877 | ops_complete_compute, sh, | ||
878 | to_addr_conv(sh, percpu)); | ||
879 | return async_gen_syndrome(blocks, 0, count+2, | ||
880 | STRIPE_SIZE, &submit); | ||
881 | } | ||
882 | } else { | ||
883 | init_async_submit(&submit, ASYNC_TX_FENCE, NULL, | ||
884 | ops_complete_compute, sh, | ||
885 | to_addr_conv(sh, percpu)); | ||
886 | if (failb == syndrome_disks) { | ||
887 | /* We're missing D+P. */ | ||
888 | return async_raid6_datap_recov(syndrome_disks+2, | ||
889 | STRIPE_SIZE, faila, | ||
890 | blocks, &submit); | ||
891 | } else { | ||
892 | /* We're missing D+D. */ | ||
893 | return async_raid6_2data_recov(syndrome_disks+2, | ||
894 | STRIPE_SIZE, faila, failb, | ||
895 | blocks, &submit); | ||
896 | } | ||
897 | } | ||
898 | } | ||
899 | |||
900 | |||
674 | static void ops_complete_prexor(void *stripe_head_ref) | 901 | static void ops_complete_prexor(void *stripe_head_ref) |
675 | { | 902 | { |
676 | struct stripe_head *sh = stripe_head_ref; | 903 | struct stripe_head *sh = stripe_head_ref; |
@@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref) | |||
680 | } | 907 | } |
681 | 908 | ||
682 | static struct dma_async_tx_descriptor * | 909 | static struct dma_async_tx_descriptor * |
683 | ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 910 | ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, |
911 | struct dma_async_tx_descriptor *tx) | ||
684 | { | 912 | { |
685 | /* kernel stack size limits the total number of disks */ | ||
686 | int disks = sh->disks; | 913 | int disks = sh->disks; |
687 | struct page *xor_srcs[disks]; | 914 | struct page **xor_srcs = percpu->scribble; |
688 | int count = 0, pd_idx = sh->pd_idx, i; | 915 | int count = 0, pd_idx = sh->pd_idx, i; |
916 | struct async_submit_ctl submit; | ||
689 | 917 | ||
690 | /* existing parity data subtracted */ | 918 | /* existing parity data subtracted */ |
691 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | 919 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; |
@@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
700 | xor_srcs[count++] = dev->page; | 928 | xor_srcs[count++] = dev->page; |
701 | } | 929 | } |
702 | 930 | ||
703 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 931 | init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, |
704 | ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, | 932 | ops_complete_prexor, sh, to_addr_conv(sh, percpu)); |
705 | ops_complete_prexor, sh); | 933 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); |
706 | 934 | ||
707 | return tx; | 935 | return tx; |
708 | } | 936 | } |
@@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
742 | return tx; | 970 | return tx; |
743 | } | 971 | } |
744 | 972 | ||
745 | static void ops_complete_postxor(void *stripe_head_ref) | 973 | static void ops_complete_reconstruct(void *stripe_head_ref) |
746 | { | 974 | { |
747 | struct stripe_head *sh = stripe_head_ref; | 975 | struct stripe_head *sh = stripe_head_ref; |
748 | int disks = sh->disks, i, pd_idx = sh->pd_idx; | 976 | int disks = sh->disks; |
977 | int pd_idx = sh->pd_idx; | ||
978 | int qd_idx = sh->qd_idx; | ||
979 | int i; | ||
749 | 980 | ||
750 | pr_debug("%s: stripe %llu\n", __func__, | 981 | pr_debug("%s: stripe %llu\n", __func__, |
751 | (unsigned long long)sh->sector); | 982 | (unsigned long long)sh->sector); |
752 | 983 | ||
753 | for (i = disks; i--; ) { | 984 | for (i = disks; i--; ) { |
754 | struct r5dev *dev = &sh->dev[i]; | 985 | struct r5dev *dev = &sh->dev[i]; |
755 | if (dev->written || i == pd_idx) | 986 | |
987 | if (dev->written || i == pd_idx || i == qd_idx) | ||
756 | set_bit(R5_UPTODATE, &dev->flags); | 988 | set_bit(R5_UPTODATE, &dev->flags); |
757 | } | 989 | } |
758 | 990 | ||
@@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref) | |||
770 | } | 1002 | } |
771 | 1003 | ||
772 | static void | 1004 | static void |
773 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | 1005 | ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, |
1006 | struct dma_async_tx_descriptor *tx) | ||
774 | { | 1007 | { |
775 | /* kernel stack size limits the total number of disks */ | ||
776 | int disks = sh->disks; | 1008 | int disks = sh->disks; |
777 | struct page *xor_srcs[disks]; | 1009 | struct page **xor_srcs = percpu->scribble; |
778 | 1010 | struct async_submit_ctl submit; | |
779 | int count = 0, pd_idx = sh->pd_idx, i; | 1011 | int count = 0, pd_idx = sh->pd_idx, i; |
780 | struct page *xor_dest; | 1012 | struct page *xor_dest; |
781 | int prexor = 0; | 1013 | int prexor = 0; |
@@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
809 | * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST | 1041 | * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST |
810 | * for the synchronous xor case | 1042 | * for the synchronous xor case |
811 | */ | 1043 | */ |
812 | flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | | 1044 | flags = ASYNC_TX_ACK | |
813 | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); | 1045 | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); |
814 | 1046 | ||
815 | atomic_inc(&sh->count); | 1047 | atomic_inc(&sh->count); |
816 | 1048 | ||
817 | if (unlikely(count == 1)) { | 1049 | init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh, |
818 | flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); | 1050 | to_addr_conv(sh, percpu)); |
819 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, | 1051 | if (unlikely(count == 1)) |
820 | flags, tx, ops_complete_postxor, sh); | 1052 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); |
821 | } else | 1053 | else |
822 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 1054 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); |
823 | flags, tx, ops_complete_postxor, sh); | 1055 | } |
1056 | |||
1057 | static void | ||
1058 | ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, | ||
1059 | struct dma_async_tx_descriptor *tx) | ||
1060 | { | ||
1061 | struct async_submit_ctl submit; | ||
1062 | struct page **blocks = percpu->scribble; | ||
1063 | int count; | ||
1064 | |||
1065 | pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); | ||
1066 | |||
1067 | count = set_syndrome_sources(blocks, sh); | ||
1068 | |||
1069 | atomic_inc(&sh->count); | ||
1070 | |||
1071 | init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct, | ||
1072 | sh, to_addr_conv(sh, percpu)); | ||
1073 | async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); | ||
824 | } | 1074 | } |
825 | 1075 | ||
826 | static void ops_complete_check(void *stripe_head_ref) | 1076 | static void ops_complete_check(void *stripe_head_ref) |
@@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref) | |||
835 | release_stripe(sh); | 1085 | release_stripe(sh); |
836 | } | 1086 | } |
837 | 1087 | ||
838 | static void ops_run_check(struct stripe_head *sh) | 1088 | static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) |
839 | { | 1089 | { |
840 | /* kernel stack size limits the total number of disks */ | ||
841 | int disks = sh->disks; | 1090 | int disks = sh->disks; |
842 | struct page *xor_srcs[disks]; | 1091 | int pd_idx = sh->pd_idx; |
1092 | int qd_idx = sh->qd_idx; | ||
1093 | struct page *xor_dest; | ||
1094 | struct page **xor_srcs = percpu->scribble; | ||
843 | struct dma_async_tx_descriptor *tx; | 1095 | struct dma_async_tx_descriptor *tx; |
844 | 1096 | struct async_submit_ctl submit; | |
845 | int count = 0, pd_idx = sh->pd_idx, i; | 1097 | int count; |
846 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | 1098 | int i; |
847 | 1099 | ||
848 | pr_debug("%s: stripe %llu\n", __func__, | 1100 | pr_debug("%s: stripe %llu\n", __func__, |
849 | (unsigned long long)sh->sector); | 1101 | (unsigned long long)sh->sector); |
850 | 1102 | ||
1103 | count = 0; | ||
1104 | xor_dest = sh->dev[pd_idx].page; | ||
1105 | xor_srcs[count++] = xor_dest; | ||
851 | for (i = disks; i--; ) { | 1106 | for (i = disks; i--; ) { |
852 | struct r5dev *dev = &sh->dev[i]; | 1107 | if (i == pd_idx || i == qd_idx) |
853 | if (i != pd_idx) | 1108 | continue; |
854 | xor_srcs[count++] = dev->page; | 1109 | xor_srcs[count++] = sh->dev[i].page; |
855 | } | 1110 | } |
856 | 1111 | ||
857 | tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 1112 | init_async_submit(&submit, 0, NULL, NULL, NULL, |
858 | &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); | 1113 | to_addr_conv(sh, percpu)); |
1114 | tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | ||
1115 | &sh->ops.zero_sum_result, &submit); | ||
1116 | |||
1117 | atomic_inc(&sh->count); | ||
1118 | init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL); | ||
1119 | tx = async_trigger_callback(&submit); | ||
1120 | } | ||
1121 | |||
1122 | static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) | ||
1123 | { | ||
1124 | struct page **srcs = percpu->scribble; | ||
1125 | struct async_submit_ctl submit; | ||
1126 | int count; | ||
1127 | |||
1128 | pr_debug("%s: stripe %llu checkp: %d\n", __func__, | ||
1129 | (unsigned long long)sh->sector, checkp); | ||
1130 | |||
1131 | count = set_syndrome_sources(srcs, sh); | ||
1132 | if (!checkp) | ||
1133 | srcs[count] = NULL; | ||
859 | 1134 | ||
860 | atomic_inc(&sh->count); | 1135 | atomic_inc(&sh->count); |
861 | tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, | 1136 | init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, |
862 | ops_complete_check, sh); | 1137 | sh, to_addr_conv(sh, percpu)); |
1138 | async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE, | ||
1139 | &sh->ops.zero_sum_result, percpu->spare_page, &submit); | ||
863 | } | 1140 | } |
864 | 1141 | ||
865 | static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) | 1142 | static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) |
866 | { | 1143 | { |
867 | int overlap_clear = 0, i, disks = sh->disks; | 1144 | int overlap_clear = 0, i, disks = sh->disks; |
868 | struct dma_async_tx_descriptor *tx = NULL; | 1145 | struct dma_async_tx_descriptor *tx = NULL; |
1146 | raid5_conf_t *conf = sh->raid_conf; | ||
1147 | int level = conf->level; | ||
1148 | struct raid5_percpu *percpu; | ||
1149 | unsigned long cpu; | ||
869 | 1150 | ||
1151 | cpu = get_cpu(); | ||
1152 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
870 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | 1153 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { |
871 | ops_run_biofill(sh); | 1154 | ops_run_biofill(sh); |
872 | overlap_clear++; | 1155 | overlap_clear++; |
873 | } | 1156 | } |
874 | 1157 | ||
875 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { | 1158 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { |
876 | tx = ops_run_compute5(sh); | 1159 | if (level < 6) |
877 | /* terminate the chain if postxor is not set to be run */ | 1160 | tx = ops_run_compute5(sh, percpu); |
878 | if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) | 1161 | else { |
1162 | if (sh->ops.target2 < 0 || sh->ops.target < 0) | ||
1163 | tx = ops_run_compute6_1(sh, percpu); | ||
1164 | else | ||
1165 | tx = ops_run_compute6_2(sh, percpu); | ||
1166 | } | ||
1167 | /* terminate the chain if reconstruct is not set to be run */ | ||
1168 | if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) | ||
879 | async_tx_ack(tx); | 1169 | async_tx_ack(tx); |
880 | } | 1170 | } |
881 | 1171 | ||
882 | if (test_bit(STRIPE_OP_PREXOR, &ops_request)) | 1172 | if (test_bit(STRIPE_OP_PREXOR, &ops_request)) |
883 | tx = ops_run_prexor(sh, tx); | 1173 | tx = ops_run_prexor(sh, percpu, tx); |
884 | 1174 | ||
885 | if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { | 1175 | if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { |
886 | tx = ops_run_biodrain(sh, tx); | 1176 | tx = ops_run_biodrain(sh, tx); |
887 | overlap_clear++; | 1177 | overlap_clear++; |
888 | } | 1178 | } |
889 | 1179 | ||
890 | if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) | 1180 | if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) { |
891 | ops_run_postxor(sh, tx); | 1181 | if (level < 6) |
1182 | ops_run_reconstruct5(sh, percpu, tx); | ||
1183 | else | ||
1184 | ops_run_reconstruct6(sh, percpu, tx); | ||
1185 | } | ||
892 | 1186 | ||
893 | if (test_bit(STRIPE_OP_CHECK, &ops_request)) | 1187 | if (test_bit(STRIPE_OP_CHECK, &ops_request)) { |
894 | ops_run_check(sh); | 1188 | if (sh->check_state == check_state_run) |
1189 | ops_run_check_p(sh, percpu); | ||
1190 | else if (sh->check_state == check_state_run_q) | ||
1191 | ops_run_check_pq(sh, percpu, 0); | ||
1192 | else if (sh->check_state == check_state_run_pq) | ||
1193 | ops_run_check_pq(sh, percpu, 1); | ||
1194 | else | ||
1195 | BUG(); | ||
1196 | } | ||
895 | 1197 | ||
896 | if (overlap_clear) | 1198 | if (overlap_clear) |
897 | for (i = disks; i--; ) { | 1199 | for (i = disks; i--; ) { |
@@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
899 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | 1201 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) |
900 | wake_up(&sh->raid_conf->wait_for_overlap); | 1202 | wake_up(&sh->raid_conf->wait_for_overlap); |
901 | } | 1203 | } |
1204 | put_cpu(); | ||
902 | } | 1205 | } |
903 | 1206 | ||
904 | static int grow_one_stripe(raid5_conf_t *conf) | 1207 | static int grow_one_stripe(raid5_conf_t *conf) |
@@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
948 | return 0; | 1251 | return 0; |
949 | } | 1252 | } |
950 | 1253 | ||
1254 | /** | ||
1255 | * scribble_len - return the required size of the scribble region | ||
1256 | * @num - total number of disks in the array | ||
1257 | * | ||
1258 | * The size must be enough to contain: | ||
1259 | * 1/ a struct page pointer for each device in the array +2 | ||
1260 | * 2/ room to convert each entry in (1) to its corresponding dma | ||
1261 | * (dma_map_page()) or page (page_address()) address. | ||
1262 | * | ||
1263 | * Note: the +2 is for the destination buffers of the ddf/raid6 case where we | ||
1264 | * calculate over all devices (not just the data blocks), using zeros in place | ||
1265 | * of the P and Q blocks. | ||
1266 | */ | ||
1267 | static size_t scribble_len(int num) | ||
1268 | { | ||
1269 | size_t len; | ||
1270 | |||
1271 | len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); | ||
1272 | |||
1273 | return len; | ||
1274 | } | ||
1275 | |||
951 | static int resize_stripes(raid5_conf_t *conf, int newsize) | 1276 | static int resize_stripes(raid5_conf_t *conf, int newsize) |
952 | { | 1277 | { |
953 | /* Make all the stripes able to hold 'newsize' devices. | 1278 | /* Make all the stripes able to hold 'newsize' devices. |
@@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
976 | struct stripe_head *osh, *nsh; | 1301 | struct stripe_head *osh, *nsh; |
977 | LIST_HEAD(newstripes); | 1302 | LIST_HEAD(newstripes); |
978 | struct disk_info *ndisks; | 1303 | struct disk_info *ndisks; |
1304 | unsigned long cpu; | ||
979 | int err; | 1305 | int err; |
980 | struct kmem_cache *sc; | 1306 | struct kmem_cache *sc; |
981 | int i; | 1307 | int i; |
@@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1041 | /* Step 3. | 1367 | /* Step 3. |
1042 | * At this point, we are holding all the stripes so the array | 1368 | * At this point, we are holding all the stripes so the array |
1043 | * is completely stalled, so now is a good time to resize | 1369 | * is completely stalled, so now is a good time to resize |
1044 | * conf->disks. | 1370 | * conf->disks and the scribble region |
1045 | */ | 1371 | */ |
1046 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); | 1372 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); |
1047 | if (ndisks) { | 1373 | if (ndisks) { |
@@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1052 | } else | 1378 | } else |
1053 | err = -ENOMEM; | 1379 | err = -ENOMEM; |
1054 | 1380 | ||
1381 | get_online_cpus(); | ||
1382 | conf->scribble_len = scribble_len(newsize); | ||
1383 | for_each_present_cpu(cpu) { | ||
1384 | struct raid5_percpu *percpu; | ||
1385 | void *scribble; | ||
1386 | |||
1387 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
1388 | scribble = kmalloc(conf->scribble_len, GFP_NOIO); | ||
1389 | |||
1390 | if (scribble) { | ||
1391 | kfree(percpu->scribble); | ||
1392 | percpu->scribble = scribble; | ||
1393 | } else { | ||
1394 | err = -ENOMEM; | ||
1395 | break; | ||
1396 | } | ||
1397 | } | ||
1398 | put_online_cpus(); | ||
1399 | |||
1055 | /* Step 4, return new stripes to service */ | 1400 | /* Step 4, return new stripes to service */ |
1056 | while(!list_empty(&newstripes)) { | 1401 | while(!list_empty(&newstripes)) { |
1057 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | 1402 | nsh = list_entry(newstripes.next, struct stripe_head, lru); |
1058 | list_del_init(&nsh->lru); | 1403 | list_del_init(&nsh->lru); |
1404 | |||
1059 | for (i=conf->raid_disks; i < newsize; i++) | 1405 | for (i=conf->raid_disks; i < newsize; i++) |
1060 | if (nsh->dev[i].page == NULL) { | 1406 | if (nsh->dev[i].page == NULL) { |
1061 | struct page *p = alloc_page(GFP_NOIO); | 1407 | struct page *p = alloc_page(GFP_NOIO); |
@@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) | |||
1594 | } | 1940 | } |
1595 | 1941 | ||
1596 | 1942 | ||
1597 | |||
1598 | /* | ||
1599 | * Copy data between a page in the stripe cache, and one or more bion | ||
1600 | * The page could align with the middle of the bio, or there could be | ||
1601 | * several bion, each with several bio_vecs, which cover part of the page | ||
1602 | * Multiple bion are linked together on bi_next. There may be extras | ||
1603 | * at the end of this list. We ignore them. | ||
1604 | */ | ||
1605 | static void copy_data(int frombio, struct bio *bio, | ||
1606 | struct page *page, | ||
1607 | sector_t sector) | ||
1608 | { | ||
1609 | char *pa = page_address(page); | ||
1610 | struct bio_vec *bvl; | ||
1611 | int i; | ||
1612 | int page_offset; | ||
1613 | |||
1614 | if (bio->bi_sector >= sector) | ||
1615 | page_offset = (signed)(bio->bi_sector - sector) * 512; | ||
1616 | else | ||
1617 | page_offset = (signed)(sector - bio->bi_sector) * -512; | ||
1618 | bio_for_each_segment(bvl, bio, i) { | ||
1619 | int len = bio_iovec_idx(bio,i)->bv_len; | ||
1620 | int clen; | ||
1621 | int b_offset = 0; | ||
1622 | |||
1623 | if (page_offset < 0) { | ||
1624 | b_offset = -page_offset; | ||
1625 | page_offset += b_offset; | ||
1626 | len -= b_offset; | ||
1627 | } | ||
1628 | |||
1629 | if (len > 0 && page_offset + len > STRIPE_SIZE) | ||
1630 | clen = STRIPE_SIZE - page_offset; | ||
1631 | else clen = len; | ||
1632 | |||
1633 | if (clen > 0) { | ||
1634 | char *ba = __bio_kmap_atomic(bio, i, KM_USER0); | ||
1635 | if (frombio) | ||
1636 | memcpy(pa+page_offset, ba+b_offset, clen); | ||
1637 | else | ||
1638 | memcpy(ba+b_offset, pa+page_offset, clen); | ||
1639 | __bio_kunmap_atomic(ba, KM_USER0); | ||
1640 | } | ||
1641 | if (clen < len) /* hit end of page */ | ||
1642 | break; | ||
1643 | page_offset += len; | ||
1644 | } | ||
1645 | } | ||
1646 | |||
1647 | #define check_xor() do { \ | ||
1648 | if (count == MAX_XOR_BLOCKS) { \ | ||
1649 | xor_blocks(count, STRIPE_SIZE, dest, ptr);\ | ||
1650 | count = 0; \ | ||
1651 | } \ | ||
1652 | } while(0) | ||
1653 | |||
1654 | static void compute_parity6(struct stripe_head *sh, int method) | ||
1655 | { | ||
1656 | raid5_conf_t *conf = sh->raid_conf; | ||
1657 | int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; | ||
1658 | int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); | ||
1659 | struct bio *chosen; | ||
1660 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | ||
1661 | void *ptrs[syndrome_disks+2]; | ||
1662 | |||
1663 | pd_idx = sh->pd_idx; | ||
1664 | qd_idx = sh->qd_idx; | ||
1665 | d0_idx = raid6_d0(sh); | ||
1666 | |||
1667 | pr_debug("compute_parity, stripe %llu, method %d\n", | ||
1668 | (unsigned long long)sh->sector, method); | ||
1669 | |||
1670 | switch(method) { | ||
1671 | case READ_MODIFY_WRITE: | ||
1672 | BUG(); /* READ_MODIFY_WRITE N/A for RAID-6 */ | ||
1673 | case RECONSTRUCT_WRITE: | ||
1674 | for (i= disks; i-- ;) | ||
1675 | if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) { | ||
1676 | chosen = sh->dev[i].towrite; | ||
1677 | sh->dev[i].towrite = NULL; | ||
1678 | |||
1679 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | ||
1680 | wake_up(&conf->wait_for_overlap); | ||
1681 | |||
1682 | BUG_ON(sh->dev[i].written); | ||
1683 | sh->dev[i].written = chosen; | ||
1684 | } | ||
1685 | break; | ||
1686 | case CHECK_PARITY: | ||
1687 | BUG(); /* Not implemented yet */ | ||
1688 | } | ||
1689 | |||
1690 | for (i = disks; i--;) | ||
1691 | if (sh->dev[i].written) { | ||
1692 | sector_t sector = sh->dev[i].sector; | ||
1693 | struct bio *wbi = sh->dev[i].written; | ||
1694 | while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { | ||
1695 | copy_data(1, wbi, sh->dev[i].page, sector); | ||
1696 | wbi = r5_next_bio(wbi, sector); | ||
1697 | } | ||
1698 | |||
1699 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
1700 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | ||
1701 | } | ||
1702 | |||
1703 | /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ | ||
1704 | |||
1705 | for (i = 0; i < disks; i++) | ||
1706 | ptrs[i] = (void *)raid6_empty_zero_page; | ||
1707 | |||
1708 | count = 0; | ||
1709 | i = d0_idx; | ||
1710 | do { | ||
1711 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
1712 | |||
1713 | ptrs[slot] = page_address(sh->dev[i].page); | ||
1714 | if (slot < syndrome_disks && | ||
1715 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { | ||
1716 | printk(KERN_ERR "block %d/%d not uptodate " | ||
1717 | "on parity calc\n", i, count); | ||
1718 | BUG(); | ||
1719 | } | ||
1720 | |||
1721 | i = raid6_next_disk(i, disks); | ||
1722 | } while (i != d0_idx); | ||
1723 | BUG_ON(count != syndrome_disks); | ||
1724 | |||
1725 | raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs); | ||
1726 | |||
1727 | switch(method) { | ||
1728 | case RECONSTRUCT_WRITE: | ||
1729 | set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
1730 | set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); | ||
1731 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); | ||
1732 | set_bit(R5_LOCKED, &sh->dev[qd_idx].flags); | ||
1733 | break; | ||
1734 | case UPDATE_PARITY: | ||
1735 | set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
1736 | set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); | ||
1737 | break; | ||
1738 | } | ||
1739 | } | ||
1740 | |||
1741 | |||
1742 | /* Compute one missing block */ | ||
1743 | static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) | ||
1744 | { | ||
1745 | int i, count, disks = sh->disks; | ||
1746 | void *ptr[MAX_XOR_BLOCKS], *dest, *p; | ||
1747 | int qd_idx = sh->qd_idx; | ||
1748 | |||
1749 | pr_debug("compute_block_1, stripe %llu, idx %d\n", | ||
1750 | (unsigned long long)sh->sector, dd_idx); | ||
1751 | |||
1752 | if ( dd_idx == qd_idx ) { | ||
1753 | /* We're actually computing the Q drive */ | ||
1754 | compute_parity6(sh, UPDATE_PARITY); | ||
1755 | } else { | ||
1756 | dest = page_address(sh->dev[dd_idx].page); | ||
1757 | if (!nozero) memset(dest, 0, STRIPE_SIZE); | ||
1758 | count = 0; | ||
1759 | for (i = disks ; i--; ) { | ||
1760 | if (i == dd_idx || i == qd_idx) | ||
1761 | continue; | ||
1762 | p = page_address(sh->dev[i].page); | ||
1763 | if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) | ||
1764 | ptr[count++] = p; | ||
1765 | else | ||
1766 | printk("compute_block() %d, stripe %llu, %d" | ||
1767 | " not present\n", dd_idx, | ||
1768 | (unsigned long long)sh->sector, i); | ||
1769 | |||
1770 | check_xor(); | ||
1771 | } | ||
1772 | if (count) | ||
1773 | xor_blocks(count, STRIPE_SIZE, dest, ptr); | ||
1774 | if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); | ||
1775 | else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); | ||
1776 | } | ||
1777 | } | ||
1778 | |||
1779 | /* Compute two missing blocks */ | ||
1780 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | ||
1781 | { | ||
1782 | int i, count, disks = sh->disks; | ||
1783 | int syndrome_disks = sh->ddf_layout ? disks : disks-2; | ||
1784 | int d0_idx = raid6_d0(sh); | ||
1785 | int faila = -1, failb = -1; | ||
1786 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | ||
1787 | void *ptrs[syndrome_disks+2]; | ||
1788 | |||
1789 | for (i = 0; i < disks ; i++) | ||
1790 | ptrs[i] = (void *)raid6_empty_zero_page; | ||
1791 | count = 0; | ||
1792 | i = d0_idx; | ||
1793 | do { | ||
1794 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); | ||
1795 | |||
1796 | ptrs[slot] = page_address(sh->dev[i].page); | ||
1797 | |||
1798 | if (i == dd_idx1) | ||
1799 | faila = slot; | ||
1800 | if (i == dd_idx2) | ||
1801 | failb = slot; | ||
1802 | i = raid6_next_disk(i, disks); | ||
1803 | } while (i != d0_idx); | ||
1804 | BUG_ON(count != syndrome_disks); | ||
1805 | |||
1806 | BUG_ON(faila == failb); | ||
1807 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } | ||
1808 | |||
1809 | pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", | ||
1810 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, | ||
1811 | faila, failb); | ||
1812 | |||
1813 | if (failb == syndrome_disks+1) { | ||
1814 | /* Q disk is one of the missing disks */ | ||
1815 | if (faila == syndrome_disks) { | ||
1816 | /* Missing P+Q, just recompute */ | ||
1817 | compute_parity6(sh, UPDATE_PARITY); | ||
1818 | return; | ||
1819 | } else { | ||
1820 | /* We're missing D+Q; recompute D from P */ | ||
1821 | compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ? | ||
1822 | dd_idx2 : dd_idx1), | ||
1823 | 0); | ||
1824 | compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ | ||
1825 | return; | ||
1826 | } | ||
1827 | } | ||
1828 | |||
1829 | /* We're missing D+P or D+D; */ | ||
1830 | if (failb == syndrome_disks) { | ||
1831 | /* We're missing D+P. */ | ||
1832 | raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs); | ||
1833 | } else { | ||
1834 | /* We're missing D+D. */ | ||
1835 | raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb, | ||
1836 | ptrs); | ||
1837 | } | ||
1838 | |||
1839 | /* Both the above update both missing blocks */ | ||
1840 | set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); | ||
1841 | set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); | ||
1842 | } | ||
1843 | |||
1844 | static void | 1943 | static void |
1845 | schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | 1944 | schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, |
1846 | int rcw, int expand) | 1945 | int rcw, int expand) |
1847 | { | 1946 | { |
1848 | int i, pd_idx = sh->pd_idx, disks = sh->disks; | 1947 | int i, pd_idx = sh->pd_idx, disks = sh->disks; |
1948 | raid5_conf_t *conf = sh->raid_conf; | ||
1949 | int level = conf->level; | ||
1849 | 1950 | ||
1850 | if (rcw) { | 1951 | if (rcw) { |
1851 | /* if we are not expanding this is a proper write request, and | 1952 | /* if we are not expanding this is a proper write request, and |
@@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | |||
1858 | } else | 1959 | } else |
1859 | sh->reconstruct_state = reconstruct_state_run; | 1960 | sh->reconstruct_state = reconstruct_state_run; |
1860 | 1961 | ||
1861 | set_bit(STRIPE_OP_POSTXOR, &s->ops_request); | 1962 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); |
1862 | 1963 | ||
1863 | for (i = disks; i--; ) { | 1964 | for (i = disks; i--; ) { |
1864 | struct r5dev *dev = &sh->dev[i]; | 1965 | struct r5dev *dev = &sh->dev[i]; |
@@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | |||
1871 | s->locked++; | 1972 | s->locked++; |
1872 | } | 1973 | } |
1873 | } | 1974 | } |
1874 | if (s->locked + 1 == disks) | 1975 | if (s->locked + conf->max_degraded == disks) |
1875 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | 1976 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) |
1876 | atomic_inc(&sh->raid_conf->pending_full_writes); | 1977 | atomic_inc(&conf->pending_full_writes); |
1877 | } else { | 1978 | } else { |
1979 | BUG_ON(level == 6); | ||
1878 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || | 1980 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || |
1879 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); | 1981 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); |
1880 | 1982 | ||
1881 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; | 1983 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; |
1882 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); | 1984 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); |
1883 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | 1985 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); |
1884 | set_bit(STRIPE_OP_POSTXOR, &s->ops_request); | 1986 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); |
1885 | 1987 | ||
1886 | for (i = disks; i--; ) { | 1988 | for (i = disks; i--; ) { |
1887 | struct r5dev *dev = &sh->dev[i]; | 1989 | struct r5dev *dev = &sh->dev[i]; |
@@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, | |||
1899 | } | 2001 | } |
1900 | } | 2002 | } |
1901 | 2003 | ||
1902 | /* keep the parity disk locked while asynchronous operations | 2004 | /* keep the parity disk(s) locked while asynchronous operations |
1903 | * are in flight | 2005 | * are in flight |
1904 | */ | 2006 | */ |
1905 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); | 2007 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); |
1906 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | 2008 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); |
1907 | s->locked++; | 2009 | s->locked++; |
1908 | 2010 | ||
2011 | if (level == 6) { | ||
2012 | int qd_idx = sh->qd_idx; | ||
2013 | struct r5dev *dev = &sh->dev[qd_idx]; | ||
2014 | |||
2015 | set_bit(R5_LOCKED, &dev->flags); | ||
2016 | clear_bit(R5_UPTODATE, &dev->flags); | ||
2017 | s->locked++; | ||
2018 | } | ||
2019 | |||
1909 | pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", | 2020 | pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", |
1910 | __func__, (unsigned long long)sh->sector, | 2021 | __func__, (unsigned long long)sh->sector, |
1911 | s->locked, s->ops_request); | 2022 | s->locked, s->ops_request); |
@@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
1986 | 2097 | ||
1987 | static void end_reshape(raid5_conf_t *conf); | 2098 | static void end_reshape(raid5_conf_t *conf); |
1988 | 2099 | ||
1989 | static int page_is_zero(struct page *p) | ||
1990 | { | ||
1991 | char *a = page_address(p); | ||
1992 | return ((*(u32*)a) == 0 && | ||
1993 | memcmp(a, a+4, STRIPE_SIZE-4)==0); | ||
1994 | } | ||
1995 | |||
1996 | static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, | 2100 | static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous, |
1997 | struct stripe_head *sh) | 2101 | struct stripe_head *sh) |
1998 | { | 2102 | { |
@@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, | |||
2132 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | 2236 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); |
2133 | set_bit(R5_Wantcompute, &dev->flags); | 2237 | set_bit(R5_Wantcompute, &dev->flags); |
2134 | sh->ops.target = disk_idx; | 2238 | sh->ops.target = disk_idx; |
2239 | sh->ops.target2 = -1; | ||
2135 | s->req_compute = 1; | 2240 | s->req_compute = 1; |
2136 | /* Careful: from this point on 'uptodate' is in the eye | 2241 | /* Careful: from this point on 'uptodate' is in the eye |
2137 | * of raid5_run_ops which services 'compute' operations | 2242 | * of raid_run_ops which services 'compute' operations |
2138 | * before writes. R5_Wantcompute flags a block that will | 2243 | * before writes. R5_Wantcompute flags a block that will |
2139 | * be R5_UPTODATE by the time it is needed for a | 2244 | * be R5_UPTODATE by the time it is needed for a |
2140 | * subsequent operation. | 2245 | * subsequent operation. |
@@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh, | |||
2173 | set_bit(STRIPE_HANDLE, &sh->state); | 2278 | set_bit(STRIPE_HANDLE, &sh->state); |
2174 | } | 2279 | } |
2175 | 2280 | ||
2176 | static void handle_stripe_fill6(struct stripe_head *sh, | 2281 | /* fetch_block6 - checks the given member device to see if its data needs |
2177 | struct stripe_head_state *s, struct r6_state *r6s, | 2282 | * to be read or computed to satisfy a request. |
2178 | int disks) | 2283 | * |
2284 | * Returns 1 when no more member devices need to be checked, otherwise returns | ||
2285 | * 0 to tell the loop in handle_stripe_fill6 to continue | ||
2286 | */ | ||
2287 | static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, | ||
2288 | struct r6_state *r6s, int disk_idx, int disks) | ||
2179 | { | 2289 | { |
2180 | int i; | 2290 | struct r5dev *dev = &sh->dev[disk_idx]; |
2181 | for (i = disks; i--; ) { | 2291 | struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]], |
2182 | struct r5dev *dev = &sh->dev[i]; | 2292 | &sh->dev[r6s->failed_num[1]] }; |
2183 | if (!test_bit(R5_LOCKED, &dev->flags) && | 2293 | |
2184 | !test_bit(R5_UPTODATE, &dev->flags) && | 2294 | if (!test_bit(R5_LOCKED, &dev->flags) && |
2185 | (dev->toread || (dev->towrite && | 2295 | !test_bit(R5_UPTODATE, &dev->flags) && |
2186 | !test_bit(R5_OVERWRITE, &dev->flags)) || | 2296 | (dev->toread || |
2187 | s->syncing || s->expanding || | 2297 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || |
2188 | (s->failed >= 1 && | 2298 | s->syncing || s->expanding || |
2189 | (sh->dev[r6s->failed_num[0]].toread || | 2299 | (s->failed >= 1 && |
2190 | s->to_write)) || | 2300 | (fdev[0]->toread || s->to_write)) || |
2191 | (s->failed >= 2 && | 2301 | (s->failed >= 2 && |
2192 | (sh->dev[r6s->failed_num[1]].toread || | 2302 | (fdev[1]->toread || s->to_write)))) { |
2193 | s->to_write)))) { | 2303 | /* we would like to get this block, possibly by computing it, |
2194 | /* we would like to get this block, possibly | 2304 | * otherwise read it if the backing disk is insync |
2195 | * by computing it, but we might not be able to | 2305 | */ |
2306 | BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); | ||
2307 | BUG_ON(test_bit(R5_Wantread, &dev->flags)); | ||
2308 | if ((s->uptodate == disks - 1) && | ||
2309 | (s->failed && (disk_idx == r6s->failed_num[0] || | ||
2310 | disk_idx == r6s->failed_num[1]))) { | ||
2311 | /* have disk failed, and we're requested to fetch it; | ||
2312 | * do compute it | ||
2196 | */ | 2313 | */ |
2197 | if ((s->uptodate == disks - 1) && | 2314 | pr_debug("Computing stripe %llu block %d\n", |
2198 | (s->failed && (i == r6s->failed_num[0] || | 2315 | (unsigned long long)sh->sector, disk_idx); |
2199 | i == r6s->failed_num[1]))) { | 2316 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); |
2200 | pr_debug("Computing stripe %llu block %d\n", | 2317 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); |
2201 | (unsigned long long)sh->sector, i); | 2318 | set_bit(R5_Wantcompute, &dev->flags); |
2202 | compute_block_1(sh, i, 0); | 2319 | sh->ops.target = disk_idx; |
2203 | s->uptodate++; | 2320 | sh->ops.target2 = -1; /* no 2nd target */ |
2204 | } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { | 2321 | s->req_compute = 1; |
2205 | /* Computing 2-failure is *very* expensive; only | 2322 | s->uptodate++; |
2206 | * do it if failed >= 2 | 2323 | return 1; |
2207 | */ | 2324 | } else if (s->uptodate == disks-2 && s->failed >= 2) { |
2208 | int other; | 2325 | /* Computing 2-failure is *very* expensive; only |
2209 | for (other = disks; other--; ) { | 2326 | * do it if failed >= 2 |
2210 | if (other == i) | 2327 | */ |
2211 | continue; | 2328 | int other; |
2212 | if (!test_bit(R5_UPTODATE, | 2329 | for (other = disks; other--; ) { |
2213 | &sh->dev[other].flags)) | 2330 | if (other == disk_idx) |
2214 | break; | 2331 | continue; |
2215 | } | 2332 | if (!test_bit(R5_UPTODATE, |
2216 | BUG_ON(other < 0); | 2333 | &sh->dev[other].flags)) |
2217 | pr_debug("Computing stripe %llu blocks %d,%d\n", | 2334 | break; |
2218 | (unsigned long long)sh->sector, | ||
2219 | i, other); | ||
2220 | compute_block_2(sh, i, other); | ||
2221 | s->uptodate += 2; | ||
2222 | } else if (test_bit(R5_Insync, &dev->flags)) { | ||
2223 | set_bit(R5_LOCKED, &dev->flags); | ||
2224 | set_bit(R5_Wantread, &dev->flags); | ||
2225 | s->locked++; | ||
2226 | pr_debug("Reading block %d (sync=%d)\n", | ||
2227 | i, s->syncing); | ||
2228 | } | 2335 | } |
2336 | BUG_ON(other < 0); | ||
2337 | pr_debug("Computing stripe %llu blocks %d,%d\n", | ||
2338 | (unsigned long long)sh->sector, | ||
2339 | disk_idx, other); | ||
2340 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); | ||
2341 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | ||
2342 | set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags); | ||
2343 | set_bit(R5_Wantcompute, &sh->dev[other].flags); | ||
2344 | sh->ops.target = disk_idx; | ||
2345 | sh->ops.target2 = other; | ||
2346 | s->uptodate += 2; | ||
2347 | s->req_compute = 1; | ||
2348 | return 1; | ||
2349 | } else if (test_bit(R5_Insync, &dev->flags)) { | ||
2350 | set_bit(R5_LOCKED, &dev->flags); | ||
2351 | set_bit(R5_Wantread, &dev->flags); | ||
2352 | s->locked++; | ||
2353 | pr_debug("Reading block %d (sync=%d)\n", | ||
2354 | disk_idx, s->syncing); | ||
2229 | } | 2355 | } |
2230 | } | 2356 | } |
2357 | |||
2358 | return 0; | ||
2359 | } | ||
2360 | |||
2361 | /** | ||
2362 | * handle_stripe_fill6 - read or compute data to satisfy pending requests. | ||
2363 | */ | ||
2364 | static void handle_stripe_fill6(struct stripe_head *sh, | ||
2365 | struct stripe_head_state *s, struct r6_state *r6s, | ||
2366 | int disks) | ||
2367 | { | ||
2368 | int i; | ||
2369 | |||
2370 | /* look for blocks to read/compute, skip this if a compute | ||
2371 | * is already in flight, or if the stripe contents are in the | ||
2372 | * midst of changing due to a write | ||
2373 | */ | ||
2374 | if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && | ||
2375 | !sh->reconstruct_state) | ||
2376 | for (i = disks; i--; ) | ||
2377 | if (fetch_block6(sh, s, r6s, i, disks)) | ||
2378 | break; | ||
2231 | set_bit(STRIPE_HANDLE, &sh->state); | 2379 | set_bit(STRIPE_HANDLE, &sh->state); |
2232 | } | 2380 | } |
2233 | 2381 | ||
@@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, | |||
2361 | */ | 2509 | */ |
2362 | /* since handle_stripe can be called at any time we need to handle the | 2510 | /* since handle_stripe can be called at any time we need to handle the |
2363 | * case where a compute block operation has been submitted and then a | 2511 | * case where a compute block operation has been submitted and then a |
2364 | * subsequent call wants to start a write request. raid5_run_ops only | 2512 | * subsequent call wants to start a write request. raid_run_ops only |
2365 | * handles the case where compute block and postxor are requested | 2513 | * handles the case where compute block and reconstruct are requested |
2366 | * simultaneously. If this is not the case then new writes need to be | 2514 | * simultaneously. If this is not the case then new writes need to be |
2367 | * held off until the compute completes. | 2515 | * held off until the compute completes. |
2368 | */ | 2516 | */ |
2369 | if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && | 2517 | if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && |
2370 | (s->locked == 0 && (rcw == 0 || rmw == 0) && | 2518 | (s->locked == 0 && (rcw == 0 || rmw == 0) && |
2371 | !test_bit(STRIPE_BIT_DELAY, &sh->state))) | 2519 | !test_bit(STRIPE_BIT_DELAY, &sh->state))) |
2372 | schedule_reconstruction5(sh, s, rcw == 0, 0); | 2520 | schedule_reconstruction(sh, s, rcw == 0, 0); |
2373 | } | 2521 | } |
2374 | 2522 | ||
2375 | static void handle_stripe_dirtying6(raid5_conf_t *conf, | 2523 | static void handle_stripe_dirtying6(raid5_conf_t *conf, |
2376 | struct stripe_head *sh, struct stripe_head_state *s, | 2524 | struct stripe_head *sh, struct stripe_head_state *s, |
2377 | struct r6_state *r6s, int disks) | 2525 | struct r6_state *r6s, int disks) |
2378 | { | 2526 | { |
2379 | int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; | 2527 | int rcw = 0, pd_idx = sh->pd_idx, i; |
2380 | int qd_idx = sh->qd_idx; | 2528 | int qd_idx = sh->qd_idx; |
2529 | |||
2530 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2381 | for (i = disks; i--; ) { | 2531 | for (i = disks; i--; ) { |
2382 | struct r5dev *dev = &sh->dev[i]; | 2532 | struct r5dev *dev = &sh->dev[i]; |
2383 | /* Would I have to read this buffer for reconstruct_write */ | 2533 | /* check if we haven't enough data */ |
2384 | if (!test_bit(R5_OVERWRITE, &dev->flags) | 2534 | if (!test_bit(R5_OVERWRITE, &dev->flags) && |
2385 | && i != pd_idx && i != qd_idx | 2535 | i != pd_idx && i != qd_idx && |
2386 | && (!test_bit(R5_LOCKED, &dev->flags) | 2536 | !test_bit(R5_LOCKED, &dev->flags) && |
2387 | ) && | 2537 | !(test_bit(R5_UPTODATE, &dev->flags) || |
2388 | !test_bit(R5_UPTODATE, &dev->flags)) { | 2538 | test_bit(R5_Wantcompute, &dev->flags))) { |
2389 | if (test_bit(R5_Insync, &dev->flags)) rcw++; | 2539 | rcw++; |
2390 | else { | 2540 | if (!test_bit(R5_Insync, &dev->flags)) |
2391 | pr_debug("raid6: must_compute: " | 2541 | continue; /* it's a failed drive */ |
2392 | "disk %d flags=%#lx\n", i, dev->flags); | 2542 | |
2393 | must_compute++; | 2543 | if ( |
2544 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2545 | pr_debug("Read_old stripe %llu " | ||
2546 | "block %d for Reconstruct\n", | ||
2547 | (unsigned long long)sh->sector, i); | ||
2548 | set_bit(R5_LOCKED, &dev->flags); | ||
2549 | set_bit(R5_Wantread, &dev->flags); | ||
2550 | s->locked++; | ||
2551 | } else { | ||
2552 | pr_debug("Request delayed stripe %llu " | ||
2553 | "block %d for Reconstruct\n", | ||
2554 | (unsigned long long)sh->sector, i); | ||
2555 | set_bit(STRIPE_DELAYED, &sh->state); | ||
2556 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2394 | } | 2557 | } |
2395 | } | 2558 | } |
2396 | } | 2559 | } |
2397 | pr_debug("for sector %llu, rcw=%d, must_compute=%d\n", | ||
2398 | (unsigned long long)sh->sector, rcw, must_compute); | ||
2399 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2400 | |||
2401 | if (rcw > 0) | ||
2402 | /* want reconstruct write, but need to get some data */ | ||
2403 | for (i = disks; i--; ) { | ||
2404 | struct r5dev *dev = &sh->dev[i]; | ||
2405 | if (!test_bit(R5_OVERWRITE, &dev->flags) | ||
2406 | && !(s->failed == 0 && (i == pd_idx || i == qd_idx)) | ||
2407 | && !test_bit(R5_LOCKED, &dev->flags) && | ||
2408 | !test_bit(R5_UPTODATE, &dev->flags) && | ||
2409 | test_bit(R5_Insync, &dev->flags)) { | ||
2410 | if ( | ||
2411 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2412 | pr_debug("Read_old stripe %llu " | ||
2413 | "block %d for Reconstruct\n", | ||
2414 | (unsigned long long)sh->sector, i); | ||
2415 | set_bit(R5_LOCKED, &dev->flags); | ||
2416 | set_bit(R5_Wantread, &dev->flags); | ||
2417 | s->locked++; | ||
2418 | } else { | ||
2419 | pr_debug("Request delayed stripe %llu " | ||
2420 | "block %d for Reconstruct\n", | ||
2421 | (unsigned long long)sh->sector, i); | ||
2422 | set_bit(STRIPE_DELAYED, &sh->state); | ||
2423 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2424 | } | ||
2425 | } | ||
2426 | } | ||
2427 | /* now if nothing is locked, and if we have enough data, we can start a | 2560 | /* now if nothing is locked, and if we have enough data, we can start a |
2428 | * write request | 2561 | * write request |
2429 | */ | 2562 | */ |
2430 | if (s->locked == 0 && rcw == 0 && | 2563 | if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && |
2564 | s->locked == 0 && rcw == 0 && | ||
2431 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) { | 2565 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) { |
2432 | if (must_compute > 0) { | 2566 | schedule_reconstruction(sh, s, 1, 0); |
2433 | /* We have failed blocks and need to compute them */ | ||
2434 | switch (s->failed) { | ||
2435 | case 0: | ||
2436 | BUG(); | ||
2437 | case 1: | ||
2438 | compute_block_1(sh, r6s->failed_num[0], 0); | ||
2439 | break; | ||
2440 | case 2: | ||
2441 | compute_block_2(sh, r6s->failed_num[0], | ||
2442 | r6s->failed_num[1]); | ||
2443 | break; | ||
2444 | default: /* This request should have been failed? */ | ||
2445 | BUG(); | ||
2446 | } | ||
2447 | } | ||
2448 | |||
2449 | pr_debug("Computing parity for stripe %llu\n", | ||
2450 | (unsigned long long)sh->sector); | ||
2451 | compute_parity6(sh, RECONSTRUCT_WRITE); | ||
2452 | /* now every locked buffer is ready to be written */ | ||
2453 | for (i = disks; i--; ) | ||
2454 | if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { | ||
2455 | pr_debug("Writing stripe %llu block %d\n", | ||
2456 | (unsigned long long)sh->sector, i); | ||
2457 | s->locked++; | ||
2458 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
2459 | } | ||
2460 | if (s->locked == disks) | ||
2461 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | ||
2462 | atomic_inc(&conf->pending_full_writes); | ||
2463 | /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ | ||
2464 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2465 | |||
2466 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2467 | atomic_dec(&conf->preread_active_stripes); | ||
2468 | if (atomic_read(&conf->preread_active_stripes) < | ||
2469 | IO_THRESHOLD) | ||
2470 | md_wakeup_thread(conf->mddev->thread); | ||
2471 | } | ||
2472 | } | 2567 | } |
2473 | } | 2568 | } |
2474 | 2569 | ||
@@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2527 | * we are done. Otherwise update the mismatch count and repair | 2622 | * we are done. Otherwise update the mismatch count and repair |
2528 | * parity if !MD_RECOVERY_CHECK | 2623 | * parity if !MD_RECOVERY_CHECK |
2529 | */ | 2624 | */ |
2530 | if (sh->ops.zero_sum_result == 0) | 2625 | if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0) |
2531 | /* parity is correct (on disc, | 2626 | /* parity is correct (on disc, |
2532 | * not in buffer any more) | 2627 | * not in buffer any more) |
2533 | */ | 2628 | */ |
@@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2544 | set_bit(R5_Wantcompute, | 2639 | set_bit(R5_Wantcompute, |
2545 | &sh->dev[sh->pd_idx].flags); | 2640 | &sh->dev[sh->pd_idx].flags); |
2546 | sh->ops.target = sh->pd_idx; | 2641 | sh->ops.target = sh->pd_idx; |
2642 | sh->ops.target2 = -1; | ||
2547 | s->uptodate++; | 2643 | s->uptodate++; |
2548 | } | 2644 | } |
2549 | } | 2645 | } |
@@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2560 | 2656 | ||
2561 | 2657 | ||
2562 | static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | 2658 | static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, |
2563 | struct stripe_head_state *s, | 2659 | struct stripe_head_state *s, |
2564 | struct r6_state *r6s, struct page *tmp_page, | 2660 | struct r6_state *r6s, int disks) |
2565 | int disks) | ||
2566 | { | 2661 | { |
2567 | int update_p = 0, update_q = 0; | ||
2568 | struct r5dev *dev; | ||
2569 | int pd_idx = sh->pd_idx; | 2662 | int pd_idx = sh->pd_idx; |
2570 | int qd_idx = sh->qd_idx; | 2663 | int qd_idx = sh->qd_idx; |
2664 | struct r5dev *dev; | ||
2571 | 2665 | ||
2572 | set_bit(STRIPE_HANDLE, &sh->state); | 2666 | set_bit(STRIPE_HANDLE, &sh->state); |
2573 | 2667 | ||
2574 | BUG_ON(s->failed > 2); | 2668 | BUG_ON(s->failed > 2); |
2575 | BUG_ON(s->uptodate < disks); | 2669 | |
2576 | /* Want to check and possibly repair P and Q. | 2670 | /* Want to check and possibly repair P and Q. |
2577 | * However there could be one 'failed' device, in which | 2671 | * However there could be one 'failed' device, in which |
2578 | * case we can only check one of them, possibly using the | 2672 | * case we can only check one of them, possibly using the |
2579 | * other to generate missing data | 2673 | * other to generate missing data |
2580 | */ | 2674 | */ |
2581 | 2675 | ||
2582 | /* If !tmp_page, we cannot do the calculations, | 2676 | switch (sh->check_state) { |
2583 | * but as we have set STRIPE_HANDLE, we will soon be called | 2677 | case check_state_idle: |
2584 | * by stripe_handle with a tmp_page - just wait until then. | 2678 | /* start a new check operation if there are < 2 failures */ |
2585 | */ | ||
2586 | if (tmp_page) { | ||
2587 | if (s->failed == r6s->q_failed) { | 2679 | if (s->failed == r6s->q_failed) { |
2588 | /* The only possible failed device holds 'Q', so it | 2680 | /* The only possible failed device holds Q, so it |
2589 | * makes sense to check P (If anything else were failed, | 2681 | * makes sense to check P (If anything else were failed, |
2590 | * we would have used P to recreate it). | 2682 | * we would have used P to recreate it). |
2591 | */ | 2683 | */ |
2592 | compute_block_1(sh, pd_idx, 1); | 2684 | sh->check_state = check_state_run; |
2593 | if (!page_is_zero(sh->dev[pd_idx].page)) { | ||
2594 | compute_block_1(sh, pd_idx, 0); | ||
2595 | update_p = 1; | ||
2596 | } | ||
2597 | } | 2685 | } |
2598 | if (!r6s->q_failed && s->failed < 2) { | 2686 | if (!r6s->q_failed && s->failed < 2) { |
2599 | /* q is not failed, and we didn't use it to generate | 2687 | /* Q is not failed, and we didn't use it to generate |
2600 | * anything, so it makes sense to check it | 2688 | * anything, so it makes sense to check it |
2601 | */ | 2689 | */ |
2602 | memcpy(page_address(tmp_page), | 2690 | if (sh->check_state == check_state_run) |
2603 | page_address(sh->dev[qd_idx].page), | 2691 | sh->check_state = check_state_run_pq; |
2604 | STRIPE_SIZE); | 2692 | else |
2605 | compute_parity6(sh, UPDATE_PARITY); | 2693 | sh->check_state = check_state_run_q; |
2606 | if (memcmp(page_address(tmp_page), | ||
2607 | page_address(sh->dev[qd_idx].page), | ||
2608 | STRIPE_SIZE) != 0) { | ||
2609 | clear_bit(STRIPE_INSYNC, &sh->state); | ||
2610 | update_q = 1; | ||
2611 | } | ||
2612 | } | 2694 | } |
2613 | if (update_p || update_q) { | 2695 | |
2614 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | 2696 | /* discard potentially stale zero_sum_result */ |
2615 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | 2697 | sh->ops.zero_sum_result = 0; |
2616 | /* don't try to repair!! */ | 2698 | |
2617 | update_p = update_q = 0; | 2699 | if (sh->check_state == check_state_run) { |
2700 | /* async_xor_zero_sum destroys the contents of P */ | ||
2701 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
2702 | s->uptodate--; | ||
2703 | } | ||
2704 | if (sh->check_state >= check_state_run && | ||
2705 | sh->check_state <= check_state_run_pq) { | ||
2706 | /* async_syndrome_zero_sum preserves P and Q, so | ||
2707 | * no need to mark them !uptodate here | ||
2708 | */ | ||
2709 | set_bit(STRIPE_OP_CHECK, &s->ops_request); | ||
2710 | break; | ||
2618 | } | 2711 | } |
2619 | 2712 | ||
2713 | /* we have 2-disk failure */ | ||
2714 | BUG_ON(s->failed != 2); | ||
2715 | /* fall through */ | ||
2716 | case check_state_compute_result: | ||
2717 | sh->check_state = check_state_idle; | ||
2718 | |||
2719 | /* check that a write has not made the stripe insync */ | ||
2720 | if (test_bit(STRIPE_INSYNC, &sh->state)) | ||
2721 | break; | ||
2722 | |||
2620 | /* now write out any block on a failed drive, | 2723 | /* now write out any block on a failed drive, |
2621 | * or P or Q if they need it | 2724 | * or P or Q if they were recomputed |
2622 | */ | 2725 | */ |
2623 | 2726 | BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ | |
2624 | if (s->failed == 2) { | 2727 | if (s->failed == 2) { |
2625 | dev = &sh->dev[r6s->failed_num[1]]; | 2728 | dev = &sh->dev[r6s->failed_num[1]]; |
2626 | s->locked++; | 2729 | s->locked++; |
@@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | |||
2633 | set_bit(R5_LOCKED, &dev->flags); | 2736 | set_bit(R5_LOCKED, &dev->flags); |
2634 | set_bit(R5_Wantwrite, &dev->flags); | 2737 | set_bit(R5_Wantwrite, &dev->flags); |
2635 | } | 2738 | } |
2636 | 2739 | if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { | |
2637 | if (update_p) { | ||
2638 | dev = &sh->dev[pd_idx]; | 2740 | dev = &sh->dev[pd_idx]; |
2639 | s->locked++; | 2741 | s->locked++; |
2640 | set_bit(R5_LOCKED, &dev->flags); | 2742 | set_bit(R5_LOCKED, &dev->flags); |
2641 | set_bit(R5_Wantwrite, &dev->flags); | 2743 | set_bit(R5_Wantwrite, &dev->flags); |
2642 | } | 2744 | } |
2643 | if (update_q) { | 2745 | if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { |
2644 | dev = &sh->dev[qd_idx]; | 2746 | dev = &sh->dev[qd_idx]; |
2645 | s->locked++; | 2747 | s->locked++; |
2646 | set_bit(R5_LOCKED, &dev->flags); | 2748 | set_bit(R5_LOCKED, &dev->flags); |
@@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, | |||
2649 | clear_bit(STRIPE_DEGRADED, &sh->state); | 2751 | clear_bit(STRIPE_DEGRADED, &sh->state); |
2650 | 2752 | ||
2651 | set_bit(STRIPE_INSYNC, &sh->state); | 2753 | set_bit(STRIPE_INSYNC, &sh->state); |
2754 | break; | ||
2755 | case check_state_run: | ||
2756 | case check_state_run_q: | ||
2757 | case check_state_run_pq: | ||
2758 | break; /* we will be called again upon completion */ | ||
2759 | case check_state_check_result: | ||
2760 | sh->check_state = check_state_idle; | ||
2761 | |||
2762 | /* handle a successful check operation, if parity is correct | ||
2763 | * we are done. Otherwise update the mismatch count and repair | ||
2764 | * parity if !MD_RECOVERY_CHECK | ||
2765 | */ | ||
2766 | if (sh->ops.zero_sum_result == 0) { | ||
2767 | /* both parities are correct */ | ||
2768 | if (!s->failed) | ||
2769 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2770 | else { | ||
2771 | /* in contrast to the raid5 case we can validate | ||
2772 | * parity, but still have a failure to write | ||
2773 | * back | ||
2774 | */ | ||
2775 | sh->check_state = check_state_compute_result; | ||
2776 | /* Returning at this point means that we may go | ||
2777 | * off and bring p and/or q uptodate again so | ||
2778 | * we make sure to check zero_sum_result again | ||
2779 | * to verify if p or q need writeback | ||
2780 | */ | ||
2781 | } | ||
2782 | } else { | ||
2783 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | ||
2784 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | ||
2785 | /* don't try to repair!! */ | ||
2786 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2787 | else { | ||
2788 | int *target = &sh->ops.target; | ||
2789 | |||
2790 | sh->ops.target = -1; | ||
2791 | sh->ops.target2 = -1; | ||
2792 | sh->check_state = check_state_compute_run; | ||
2793 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); | ||
2794 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | ||
2795 | if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) { | ||
2796 | set_bit(R5_Wantcompute, | ||
2797 | &sh->dev[pd_idx].flags); | ||
2798 | *target = pd_idx; | ||
2799 | target = &sh->ops.target2; | ||
2800 | s->uptodate++; | ||
2801 | } | ||
2802 | if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) { | ||
2803 | set_bit(R5_Wantcompute, | ||
2804 | &sh->dev[qd_idx].flags); | ||
2805 | *target = qd_idx; | ||
2806 | s->uptodate++; | ||
2807 | } | ||
2808 | } | ||
2809 | } | ||
2810 | break; | ||
2811 | case check_state_compute_run: | ||
2812 | break; | ||
2813 | default: | ||
2814 | printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n", | ||
2815 | __func__, sh->check_state, | ||
2816 | (unsigned long long) sh->sector); | ||
2817 | BUG(); | ||
2652 | } | 2818 | } |
2653 | } | 2819 | } |
2654 | 2820 | ||
@@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2666 | if (i != sh->pd_idx && i != sh->qd_idx) { | 2832 | if (i != sh->pd_idx && i != sh->qd_idx) { |
2667 | int dd_idx, j; | 2833 | int dd_idx, j; |
2668 | struct stripe_head *sh2; | 2834 | struct stripe_head *sh2; |
2835 | struct async_submit_ctl submit; | ||
2669 | 2836 | ||
2670 | sector_t bn = compute_blocknr(sh, i, 1); | 2837 | sector_t bn = compute_blocknr(sh, i, 1); |
2671 | sector_t s = raid5_compute_sector(conf, bn, 0, | 2838 | sector_t s = raid5_compute_sector(conf, bn, 0, |
@@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2685 | } | 2852 | } |
2686 | 2853 | ||
2687 | /* place all the copies on one channel */ | 2854 | /* place all the copies on one channel */ |
2855 | init_async_submit(&submit, 0, tx, NULL, NULL, NULL); | ||
2688 | tx = async_memcpy(sh2->dev[dd_idx].page, | 2856 | tx = async_memcpy(sh2->dev[dd_idx].page, |
2689 | sh->dev[i].page, 0, 0, STRIPE_SIZE, | 2857 | sh->dev[i].page, 0, 0, STRIPE_SIZE, |
2690 | ASYNC_TX_DEP_ACK, tx, NULL, NULL); | 2858 | &submit); |
2691 | 2859 | ||
2692 | set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); | 2860 | set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); |
2693 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); | 2861 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); |
@@ -2756,7 +2924,8 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2756 | rcu_read_lock(); | 2924 | rcu_read_lock(); |
2757 | for (i=disks; i--; ) { | 2925 | for (i=disks; i--; ) { |
2758 | mdk_rdev_t *rdev; | 2926 | mdk_rdev_t *rdev; |
2759 | struct r5dev *dev = &sh->dev[i]; | 2927 | |
2928 | dev = &sh->dev[i]; | ||
2760 | clear_bit(R5_Insync, &dev->flags); | 2929 | clear_bit(R5_Insync, &dev->flags); |
2761 | 2930 | ||
2762 | pr_debug("check %d: state 0x%lx toread %p read %p write %p " | 2931 | pr_debug("check %d: state 0x%lx toread %p read %p write %p " |
@@ -2973,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2973 | /* Need to write out all blocks after computing parity */ | 3142 | /* Need to write out all blocks after computing parity */ |
2974 | sh->disks = conf->raid_disks; | 3143 | sh->disks = conf->raid_disks; |
2975 | stripe_set_idx(sh->sector, conf, 0, sh); | 3144 | stripe_set_idx(sh->sector, conf, 0, sh); |
2976 | schedule_reconstruction5(sh, &s, 1, 1); | 3145 | schedule_reconstruction(sh, &s, 1, 1); |
2977 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { | 3146 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
2978 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 3147 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
2979 | atomic_dec(&conf->reshape_stripes); | 3148 | atomic_dec(&conf->reshape_stripes); |
@@ -2993,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2993 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | 3162 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); |
2994 | 3163 | ||
2995 | if (s.ops_request) | 3164 | if (s.ops_request) |
2996 | raid5_run_ops(sh, s.ops_request); | 3165 | raid_run_ops(sh, s.ops_request); |
2997 | 3166 | ||
2998 | ops_run_io(sh, &s); | 3167 | ops_run_io(sh, &s); |
2999 | 3168 | ||
@@ -3002,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
3002 | return blocked_rdev == NULL; | 3171 | return blocked_rdev == NULL; |
3003 | } | 3172 | } |
3004 | 3173 | ||
3005 | static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | 3174 | static bool handle_stripe6(struct stripe_head *sh) |
3006 | { | 3175 | { |
3007 | raid5_conf_t *conf = sh->raid_conf; | 3176 | raid5_conf_t *conf = sh->raid_conf; |
3008 | int disks = sh->disks; | 3177 | int disks = sh->disks; |
@@ -3014,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3014 | mdk_rdev_t *blocked_rdev = NULL; | 3183 | mdk_rdev_t *blocked_rdev = NULL; |
3015 | 3184 | ||
3016 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 3185 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
3017 | "pd_idx=%d, qd_idx=%d\n", | 3186 | "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", |
3018 | (unsigned long long)sh->sector, sh->state, | 3187 | (unsigned long long)sh->sector, sh->state, |
3019 | atomic_read(&sh->count), pd_idx, qd_idx); | 3188 | atomic_read(&sh->count), pd_idx, qd_idx, |
3189 | sh->check_state, sh->reconstruct_state); | ||
3020 | memset(&s, 0, sizeof(s)); | 3190 | memset(&s, 0, sizeof(s)); |
3021 | 3191 | ||
3022 | spin_lock(&sh->lock); | 3192 | spin_lock(&sh->lock); |
@@ -3036,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3036 | 3206 | ||
3037 | pr_debug("check %d: state 0x%lx read %p write %p written %p\n", | 3207 | pr_debug("check %d: state 0x%lx read %p write %p written %p\n", |
3038 | i, dev->flags, dev->toread, dev->towrite, dev->written); | 3208 | i, dev->flags, dev->toread, dev->towrite, dev->written); |
3039 | /* maybe we can reply to a read */ | 3209 | /* maybe we can reply to a read |
3040 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { | 3210 | * |
3041 | struct bio *rbi, *rbi2; | 3211 | * new wantfill requests are only permitted while |
3042 | pr_debug("Return read for disc %d\n", i); | 3212 | * ops_complete_biofill is guaranteed to be inactive |
3043 | spin_lock_irq(&conf->device_lock); | 3213 | */ |
3044 | rbi = dev->toread; | 3214 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && |
3045 | dev->toread = NULL; | 3215 | !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) |
3046 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | 3216 | set_bit(R5_Wantfill, &dev->flags); |
3047 | wake_up(&conf->wait_for_overlap); | ||
3048 | spin_unlock_irq(&conf->device_lock); | ||
3049 | while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { | ||
3050 | copy_data(0, rbi, dev->page, dev->sector); | ||
3051 | rbi2 = r5_next_bio(rbi, dev->sector); | ||
3052 | spin_lock_irq(&conf->device_lock); | ||
3053 | if (!raid5_dec_bi_phys_segments(rbi)) { | ||
3054 | rbi->bi_next = return_bi; | ||
3055 | return_bi = rbi; | ||
3056 | } | ||
3057 | spin_unlock_irq(&conf->device_lock); | ||
3058 | rbi = rbi2; | ||
3059 | } | ||
3060 | } | ||
3061 | 3217 | ||
3062 | /* now count some things */ | 3218 | /* now count some things */ |
3063 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; | 3219 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; |
3064 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; | 3220 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; |
3221 | if (test_bit(R5_Wantcompute, &dev->flags)) { | ||
3222 | s.compute++; | ||
3223 | BUG_ON(s.compute > 2); | ||
3224 | } | ||
3065 | 3225 | ||
3066 | 3226 | if (test_bit(R5_Wantfill, &dev->flags)) { | |
3067 | if (dev->toread) | 3227 | s.to_fill++; |
3228 | } else if (dev->toread) | ||
3068 | s.to_read++; | 3229 | s.to_read++; |
3069 | if (dev->towrite) { | 3230 | if (dev->towrite) { |
3070 | s.to_write++; | 3231 | s.to_write++; |
@@ -3105,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3105 | blocked_rdev = NULL; | 3266 | blocked_rdev = NULL; |
3106 | } | 3267 | } |
3107 | 3268 | ||
3269 | if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { | ||
3270 | set_bit(STRIPE_OP_BIOFILL, &s.ops_request); | ||
3271 | set_bit(STRIPE_BIOFILL_RUN, &sh->state); | ||
3272 | } | ||
3273 | |||
3108 | pr_debug("locked=%d uptodate=%d to_read=%d" | 3274 | pr_debug("locked=%d uptodate=%d to_read=%d" |
3109 | " to_write=%d failed=%d failed_num=%d,%d\n", | 3275 | " to_write=%d failed=%d failed_num=%d,%d\n", |
3110 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, | 3276 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, |
@@ -3145,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3145 | * or to load a block that is being partially written. | 3311 | * or to load a block that is being partially written. |
3146 | */ | 3312 | */ |
3147 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || | 3313 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || |
3148 | (s.syncing && (s.uptodate < disks)) || s.expanding) | 3314 | (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) |
3149 | handle_stripe_fill6(sh, &s, &r6s, disks); | 3315 | handle_stripe_fill6(sh, &s, &r6s, disks); |
3150 | 3316 | ||
3151 | /* now to consider writing and what else, if anything should be read */ | 3317 | /* Now we check to see if any write operations have recently |
3152 | if (s.to_write) | 3318 | * completed |
3319 | */ | ||
3320 | if (sh->reconstruct_state == reconstruct_state_drain_result) { | ||
3321 | int qd_idx = sh->qd_idx; | ||
3322 | |||
3323 | sh->reconstruct_state = reconstruct_state_idle; | ||
3324 | /* All the 'written' buffers and the parity blocks are ready to | ||
3325 | * be written back to disk | ||
3326 | */ | ||
3327 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); | ||
3328 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags)); | ||
3329 | for (i = disks; i--; ) { | ||
3330 | dev = &sh->dev[i]; | ||
3331 | if (test_bit(R5_LOCKED, &dev->flags) && | ||
3332 | (i == sh->pd_idx || i == qd_idx || | ||
3333 | dev->written)) { | ||
3334 | pr_debug("Writing block %d\n", i); | ||
3335 | BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); | ||
3336 | set_bit(R5_Wantwrite, &dev->flags); | ||
3337 | if (!test_bit(R5_Insync, &dev->flags) || | ||
3338 | ((i == sh->pd_idx || i == qd_idx) && | ||
3339 | s.failed == 0)) | ||
3340 | set_bit(STRIPE_INSYNC, &sh->state); | ||
3341 | } | ||
3342 | } | ||
3343 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
3344 | atomic_dec(&conf->preread_active_stripes); | ||
3345 | if (atomic_read(&conf->preread_active_stripes) < | ||
3346 | IO_THRESHOLD) | ||
3347 | md_wakeup_thread(conf->mddev->thread); | ||
3348 | } | ||
3349 | } | ||
3350 | |||
3351 | /* Now to consider new write requests and what else, if anything | ||
3352 | * should be read. We do not handle new writes when: | ||
3353 | * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. | ||
3354 | * 2/ A 'check' operation is in flight, as it may clobber the parity | ||
3355 | * block. | ||
3356 | */ | ||
3357 | if (s.to_write && !sh->reconstruct_state && !sh->check_state) | ||
3153 | handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); | 3358 | handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); |
3154 | 3359 | ||
3155 | /* maybe we need to check and possibly fix the parity for this stripe | 3360 | /* maybe we need to check and possibly fix the parity for this stripe |
3156 | * Any reads will already have been scheduled, so we just see if enough | 3361 | * Any reads will already have been scheduled, so we just see if enough |
3157 | * data is available | 3362 | * data is available. The parity check is held off while parity |
3363 | * dependent operations are in flight. | ||
3158 | */ | 3364 | */ |
3159 | if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) | 3365 | if (sh->check_state || |
3160 | handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); | 3366 | (s.syncing && s.locked == 0 && |
3367 | !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && | ||
3368 | !test_bit(STRIPE_INSYNC, &sh->state))) | ||
3369 | handle_parity_checks6(conf, sh, &s, &r6s, disks); | ||
3161 | 3370 | ||
3162 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { | 3371 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { |
3163 | md_done_sync(conf->mddev, STRIPE_SECTORS,1); | 3372 | md_done_sync(conf->mddev, STRIPE_SECTORS,1); |
@@ -3178,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3178 | set_bit(R5_Wantwrite, &dev->flags); | 3387 | set_bit(R5_Wantwrite, &dev->flags); |
3179 | set_bit(R5_ReWrite, &dev->flags); | 3388 | set_bit(R5_ReWrite, &dev->flags); |
3180 | set_bit(R5_LOCKED, &dev->flags); | 3389 | set_bit(R5_LOCKED, &dev->flags); |
3390 | s.locked++; | ||
3181 | } else { | 3391 | } else { |
3182 | /* let's read it back */ | 3392 | /* let's read it back */ |
3183 | set_bit(R5_Wantread, &dev->flags); | 3393 | set_bit(R5_Wantread, &dev->flags); |
3184 | set_bit(R5_LOCKED, &dev->flags); | 3394 | set_bit(R5_LOCKED, &dev->flags); |
3395 | s.locked++; | ||
3185 | } | 3396 | } |
3186 | } | 3397 | } |
3187 | } | 3398 | } |
3188 | 3399 | ||
3189 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { | 3400 | /* Finish reconstruct operations initiated by the expansion process */ |
3401 | if (sh->reconstruct_state == reconstruct_state_result) { | ||
3402 | sh->reconstruct_state = reconstruct_state_idle; | ||
3403 | clear_bit(STRIPE_EXPANDING, &sh->state); | ||
3404 | for (i = conf->raid_disks; i--; ) { | ||
3405 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
3406 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
3407 | s.locked++; | ||
3408 | } | ||
3409 | } | ||
3410 | |||
3411 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && | ||
3412 | !sh->reconstruct_state) { | ||
3190 | struct stripe_head *sh2 | 3413 | struct stripe_head *sh2 |
3191 | = get_active_stripe(conf, sh->sector, 1, 1, 1); | 3414 | = get_active_stripe(conf, sh->sector, 1, 1, 1); |
3192 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { | 3415 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { |
@@ -3207,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3207 | /* Need to write out all blocks after computing P&Q */ | 3430 | /* Need to write out all blocks after computing P&Q */ |
3208 | sh->disks = conf->raid_disks; | 3431 | sh->disks = conf->raid_disks; |
3209 | stripe_set_idx(sh->sector, conf, 0, sh); | 3432 | stripe_set_idx(sh->sector, conf, 0, sh); |
3210 | compute_parity6(sh, RECONSTRUCT_WRITE); | 3433 | schedule_reconstruction(sh, &s, 1, 1); |
3211 | for (i = conf->raid_disks ; i-- ; ) { | 3434 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
3212 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
3213 | s.locked++; | ||
3214 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
3215 | } | ||
3216 | clear_bit(STRIPE_EXPANDING, &sh->state); | ||
3217 | } else if (s.expanded) { | ||
3218 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 3435 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
3219 | atomic_dec(&conf->reshape_stripes); | 3436 | atomic_dec(&conf->reshape_stripes); |
3220 | wake_up(&conf->wait_for_overlap); | 3437 | wake_up(&conf->wait_for_overlap); |
@@ -3232,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3232 | if (unlikely(blocked_rdev)) | 3449 | if (unlikely(blocked_rdev)) |
3233 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | 3450 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); |
3234 | 3451 | ||
3452 | if (s.ops_request) | ||
3453 | raid_run_ops(sh, s.ops_request); | ||
3454 | |||
3235 | ops_run_io(sh, &s); | 3455 | ops_run_io(sh, &s); |
3236 | 3456 | ||
3237 | return_io(return_bi); | 3457 | return_io(return_bi); |
@@ -3240,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3240 | } | 3460 | } |
3241 | 3461 | ||
3242 | /* returns true if the stripe was handled */ | 3462 | /* returns true if the stripe was handled */ |
3243 | static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page) | 3463 | static bool handle_stripe(struct stripe_head *sh) |
3244 | { | 3464 | { |
3245 | if (sh->raid_conf->level == 6) | 3465 | if (sh->raid_conf->level == 6) |
3246 | return handle_stripe6(sh, tmp_page); | 3466 | return handle_stripe6(sh); |
3247 | else | 3467 | else |
3248 | return handle_stripe5(sh); | 3468 | return handle_stripe5(sh); |
3249 | } | 3469 | } |
3250 | 3470 | ||
3251 | |||
3252 | |||
3253 | static void raid5_activate_delayed(raid5_conf_t *conf) | 3471 | static void raid5_activate_delayed(raid5_conf_t *conf) |
3254 | { | 3472 | { |
3255 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { | 3473 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) { |
@@ -3331,6 +3549,9 @@ static int raid5_congested(void *data, int bits) | |||
3331 | /* No difference between reads and writes. Just check | 3549 | /* No difference between reads and writes. Just check |
3332 | * how busy the stripe_cache is | 3550 | * how busy the stripe_cache is |
3333 | */ | 3551 | */ |
3552 | |||
3553 | if (mddev_congested(mddev, bits)) | ||
3554 | return 1; | ||
3334 | if (conf->inactive_blocked) | 3555 | if (conf->inactive_blocked) |
3335 | return 1; | 3556 | return 1; |
3336 | if (conf->quiesce) | 3557 | if (conf->quiesce) |
@@ -3880,7 +4101,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3880 | INIT_LIST_HEAD(&stripes); | 4101 | INIT_LIST_HEAD(&stripes); |
3881 | for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { | 4102 | for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { |
3882 | int j; | 4103 | int j; |
3883 | int skipped = 0; | 4104 | int skipped_disk = 0; |
3884 | sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); | 4105 | sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); |
3885 | set_bit(STRIPE_EXPANDING, &sh->state); | 4106 | set_bit(STRIPE_EXPANDING, &sh->state); |
3886 | atomic_inc(&conf->reshape_stripes); | 4107 | atomic_inc(&conf->reshape_stripes); |
@@ -3896,14 +4117,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3896 | continue; | 4117 | continue; |
3897 | s = compute_blocknr(sh, j, 0); | 4118 | s = compute_blocknr(sh, j, 0); |
3898 | if (s < raid5_size(mddev, 0, 0)) { | 4119 | if (s < raid5_size(mddev, 0, 0)) { |
3899 | skipped = 1; | 4120 | skipped_disk = 1; |
3900 | continue; | 4121 | continue; |
3901 | } | 4122 | } |
3902 | memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE); | 4123 | memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE); |
3903 | set_bit(R5_Expanded, &sh->dev[j].flags); | 4124 | set_bit(R5_Expanded, &sh->dev[j].flags); |
3904 | set_bit(R5_UPTODATE, &sh->dev[j].flags); | 4125 | set_bit(R5_UPTODATE, &sh->dev[j].flags); |
3905 | } | 4126 | } |
3906 | if (!skipped) { | 4127 | if (!skipped_disk) { |
3907 | set_bit(STRIPE_EXPAND_READY, &sh->state); | 4128 | set_bit(STRIPE_EXPAND_READY, &sh->state); |
3908 | set_bit(STRIPE_HANDLE, &sh->state); | 4129 | set_bit(STRIPE_HANDLE, &sh->state); |
3909 | } | 4130 | } |
@@ -4057,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
4057 | spin_unlock(&sh->lock); | 4278 | spin_unlock(&sh->lock); |
4058 | 4279 | ||
4059 | /* wait for any blocked device to be handled */ | 4280 | /* wait for any blocked device to be handled */ |
4060 | while(unlikely(!handle_stripe(sh, NULL))) | 4281 | while (unlikely(!handle_stripe(sh))) |
4061 | ; | 4282 | ; |
4062 | release_stripe(sh); | 4283 | release_stripe(sh); |
4063 | 4284 | ||
@@ -4114,7 +4335,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
4114 | return handled; | 4335 | return handled; |
4115 | } | 4336 | } |
4116 | 4337 | ||
4117 | handle_stripe(sh, NULL); | 4338 | handle_stripe(sh); |
4118 | release_stripe(sh); | 4339 | release_stripe(sh); |
4119 | handled++; | 4340 | handled++; |
4120 | } | 4341 | } |
@@ -4128,6 +4349,36 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
4128 | return handled; | 4349 | return handled; |
4129 | } | 4350 | } |
4130 | 4351 | ||
4352 | #ifdef CONFIG_MULTICORE_RAID456 | ||
4353 | static void __process_stripe(void *param, async_cookie_t cookie) | ||
4354 | { | ||
4355 | struct stripe_head *sh = param; | ||
4356 | |||
4357 | handle_stripe(sh); | ||
4358 | release_stripe(sh); | ||
4359 | } | ||
4360 | |||
4361 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
4362 | { | ||
4363 | async_schedule_domain(__process_stripe, sh, domain); | ||
4364 | } | ||
4365 | |||
4366 | static void synchronize_stripe_processing(struct list_head *domain) | ||
4367 | { | ||
4368 | async_synchronize_full_domain(domain); | ||
4369 | } | ||
4370 | #else | ||
4371 | static void process_stripe(struct stripe_head *sh, struct list_head *domain) | ||
4372 | { | ||
4373 | handle_stripe(sh); | ||
4374 | release_stripe(sh); | ||
4375 | cond_resched(); | ||
4376 | } | ||
4377 | |||
4378 | static void synchronize_stripe_processing(struct list_head *domain) | ||
4379 | { | ||
4380 | } | ||
4381 | #endif | ||
4131 | 4382 | ||
4132 | 4383 | ||
4133 | /* | 4384 | /* |
@@ -4142,6 +4393,7 @@ static void raid5d(mddev_t *mddev) | |||
4142 | struct stripe_head *sh; | 4393 | struct stripe_head *sh; |
4143 | raid5_conf_t *conf = mddev->private; | 4394 | raid5_conf_t *conf = mddev->private; |
4144 | int handled; | 4395 | int handled; |
4396 | LIST_HEAD(raid_domain); | ||
4145 | 4397 | ||
4146 | pr_debug("+++ raid5d active\n"); | 4398 | pr_debug("+++ raid5d active\n"); |
4147 | 4399 | ||
@@ -4178,8 +4430,7 @@ static void raid5d(mddev_t *mddev) | |||
4178 | spin_unlock_irq(&conf->device_lock); | 4430 | spin_unlock_irq(&conf->device_lock); |
4179 | 4431 | ||
4180 | handled++; | 4432 | handled++; |
4181 | handle_stripe(sh, conf->spare_page); | 4433 | process_stripe(sh, &raid_domain); |
4182 | release_stripe(sh); | ||
4183 | 4434 | ||
4184 | spin_lock_irq(&conf->device_lock); | 4435 | spin_lock_irq(&conf->device_lock); |
4185 | } | 4436 | } |
@@ -4187,6 +4438,7 @@ static void raid5d(mddev_t *mddev) | |||
4187 | 4438 | ||
4188 | spin_unlock_irq(&conf->device_lock); | 4439 | spin_unlock_irq(&conf->device_lock); |
4189 | 4440 | ||
4441 | synchronize_stripe_processing(&raid_domain); | ||
4190 | async_tx_issue_pending_all(); | 4442 | async_tx_issue_pending_all(); |
4191 | unplug_slaves(mddev); | 4443 | unplug_slaves(mddev); |
4192 | 4444 | ||
@@ -4319,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
4319 | return sectors * (raid_disks - conf->max_degraded); | 4571 | return sectors * (raid_disks - conf->max_degraded); |
4320 | } | 4572 | } |
4321 | 4573 | ||
4574 | static void raid5_free_percpu(raid5_conf_t *conf) | ||
4575 | { | ||
4576 | struct raid5_percpu *percpu; | ||
4577 | unsigned long cpu; | ||
4578 | |||
4579 | if (!conf->percpu) | ||
4580 | return; | ||
4581 | |||
4582 | get_online_cpus(); | ||
4583 | for_each_possible_cpu(cpu) { | ||
4584 | percpu = per_cpu_ptr(conf->percpu, cpu); | ||
4585 | safe_put_page(percpu->spare_page); | ||
4586 | kfree(percpu->scribble); | ||
4587 | } | ||
4588 | #ifdef CONFIG_HOTPLUG_CPU | ||
4589 | unregister_cpu_notifier(&conf->cpu_notify); | ||
4590 | #endif | ||
4591 | put_online_cpus(); | ||
4592 | |||
4593 | free_percpu(conf->percpu); | ||
4594 | } | ||
4595 | |||
4322 | static void free_conf(raid5_conf_t *conf) | 4596 | static void free_conf(raid5_conf_t *conf) |
4323 | { | 4597 | { |
4324 | shrink_stripes(conf); | 4598 | shrink_stripes(conf); |
4325 | safe_put_page(conf->spare_page); | 4599 | raid5_free_percpu(conf); |
4326 | kfree(conf->disks); | 4600 | kfree(conf->disks); |
4327 | kfree(conf->stripe_hashtbl); | 4601 | kfree(conf->stripe_hashtbl); |
4328 | kfree(conf); | 4602 | kfree(conf); |
4329 | } | 4603 | } |
4330 | 4604 | ||
4605 | #ifdef CONFIG_HOTPLUG_CPU | ||
4606 | static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, | ||
4607 | void *hcpu) | ||
4608 | { | ||
4609 | raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify); | ||
4610 | long cpu = (long)hcpu; | ||
4611 | struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); | ||
4612 | |||
4613 | switch (action) { | ||
4614 | case CPU_UP_PREPARE: | ||
4615 | case CPU_UP_PREPARE_FROZEN: | ||
4616 | if (conf->level == 6 && !percpu->spare_page) | ||
4617 | percpu->spare_page = alloc_page(GFP_KERNEL); | ||
4618 | if (!percpu->scribble) | ||
4619 | percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL); | ||
4620 | |||
4621 | if (!percpu->scribble || | ||
4622 | (conf->level == 6 && !percpu->spare_page)) { | ||
4623 | safe_put_page(percpu->spare_page); | ||
4624 | kfree(percpu->scribble); | ||
4625 | pr_err("%s: failed memory allocation for cpu%ld\n", | ||
4626 | __func__, cpu); | ||
4627 | return NOTIFY_BAD; | ||
4628 | } | ||
4629 | break; | ||
4630 | case CPU_DEAD: | ||
4631 | case CPU_DEAD_FROZEN: | ||
4632 | safe_put_page(percpu->spare_page); | ||
4633 | kfree(percpu->scribble); | ||
4634 | percpu->spare_page = NULL; | ||
4635 | percpu->scribble = NULL; | ||
4636 | break; | ||
4637 | default: | ||
4638 | break; | ||
4639 | } | ||
4640 | return NOTIFY_OK; | ||
4641 | } | ||
4642 | #endif | ||
4643 | |||
4644 | static int raid5_alloc_percpu(raid5_conf_t *conf) | ||
4645 | { | ||
4646 | unsigned long cpu; | ||
4647 | struct page *spare_page; | ||
4648 | struct raid5_percpu *allcpus; | ||
4649 | void *scribble; | ||
4650 | int err; | ||
4651 | |||
4652 | allcpus = alloc_percpu(struct raid5_percpu); | ||
4653 | if (!allcpus) | ||
4654 | return -ENOMEM; | ||
4655 | conf->percpu = allcpus; | ||
4656 | |||
4657 | get_online_cpus(); | ||
4658 | err = 0; | ||
4659 | for_each_present_cpu(cpu) { | ||
4660 | if (conf->level == 6) { | ||
4661 | spare_page = alloc_page(GFP_KERNEL); | ||
4662 | if (!spare_page) { | ||
4663 | err = -ENOMEM; | ||
4664 | break; | ||
4665 | } | ||
4666 | per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page; | ||
4667 | } | ||
4668 | scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL); | ||
4669 | if (!scribble) { | ||
4670 | err = -ENOMEM; | ||
4671 | break; | ||
4672 | } | ||
4673 | per_cpu_ptr(conf->percpu, cpu)->scribble = scribble; | ||
4674 | } | ||
4675 | #ifdef CONFIG_HOTPLUG_CPU | ||
4676 | conf->cpu_notify.notifier_call = raid456_cpu_notify; | ||
4677 | conf->cpu_notify.priority = 0; | ||
4678 | if (err == 0) | ||
4679 | err = register_cpu_notifier(&conf->cpu_notify); | ||
4680 | #endif | ||
4681 | put_online_cpus(); | ||
4682 | |||
4683 | return err; | ||
4684 | } | ||
4685 | |||
4331 | static raid5_conf_t *setup_conf(mddev_t *mddev) | 4686 | static raid5_conf_t *setup_conf(mddev_t *mddev) |
4332 | { | 4687 | { |
4333 | raid5_conf_t *conf; | 4688 | raid5_conf_t *conf; |
@@ -4369,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4369 | goto abort; | 4724 | goto abort; |
4370 | 4725 | ||
4371 | conf->raid_disks = mddev->raid_disks; | 4726 | conf->raid_disks = mddev->raid_disks; |
4727 | conf->scribble_len = scribble_len(conf->raid_disks); | ||
4372 | if (mddev->reshape_position == MaxSector) | 4728 | if (mddev->reshape_position == MaxSector) |
4373 | conf->previous_raid_disks = mddev->raid_disks; | 4729 | conf->previous_raid_disks = mddev->raid_disks; |
4374 | else | 4730 | else |
@@ -4384,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4384 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) | 4740 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) |
4385 | goto abort; | 4741 | goto abort; |
4386 | 4742 | ||
4387 | if (mddev->new_level == 6) { | 4743 | conf->level = mddev->new_level; |
4388 | conf->spare_page = alloc_page(GFP_KERNEL); | 4744 | if (raid5_alloc_percpu(conf) != 0) |
4389 | if (!conf->spare_page) | 4745 | goto abort; |
4390 | goto abort; | 4746 | |
4391 | } | ||
4392 | spin_lock_init(&conf->device_lock); | 4747 | spin_lock_init(&conf->device_lock); |
4393 | init_waitqueue_head(&conf->wait_for_stripe); | 4748 | init_waitqueue_head(&conf->wait_for_stripe); |
4394 | init_waitqueue_head(&conf->wait_for_overlap); | 4749 | init_waitqueue_head(&conf->wait_for_overlap); |
@@ -4447,7 +4802,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4447 | printk(KERN_INFO "raid5: allocated %dkB for %s\n", | 4802 | printk(KERN_INFO "raid5: allocated %dkB for %s\n", |
4448 | memory, mdname(mddev)); | 4803 | memory, mdname(mddev)); |
4449 | 4804 | ||
4450 | conf->thread = md_register_thread(raid5d, mddev, "%s_raid5"); | 4805 | conf->thread = md_register_thread(raid5d, mddev, NULL); |
4451 | if (!conf->thread) { | 4806 | if (!conf->thread) { |
4452 | printk(KERN_ERR | 4807 | printk(KERN_ERR |
4453 | "raid5: couldn't allocate thread for %s\n", | 4808 | "raid5: couldn't allocate thread for %s\n", |
@@ -4613,7 +4968,7 @@ static int run(mddev_t *mddev) | |||
4613 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 4968 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
4614 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 4969 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
4615 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, | 4970 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, |
4616 | "%s_reshape"); | 4971 | "reshape"); |
4617 | } | 4972 | } |
4618 | 4973 | ||
4619 | /* read-ahead size must cover two whole stripes, which is | 4974 | /* read-ahead size must cover two whole stripes, which is |
@@ -5031,7 +5386,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5031 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 5386 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
5032 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 5387 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
5033 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, | 5388 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, |
5034 | "%s_reshape"); | 5389 | "reshape"); |
5035 | if (!mddev->sync_thread) { | 5390 | if (!mddev->sync_thread) { |
5036 | mddev->recovery = 0; | 5391 | mddev->recovery = 0; |
5037 | spin_lock_irq(&conf->device_lock); | 5392 | spin_lock_irq(&conf->device_lock); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 9459689c4ea0..2390e0e83daf 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _RAID5_H | 2 | #define _RAID5_H |
3 | 3 | ||
4 | #include <linux/raid/xor.h> | 4 | #include <linux/raid/xor.h> |
5 | #include <linux/dmaengine.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * | 8 | * |
@@ -175,7 +176,9 @@ | |||
175 | */ | 176 | */ |
176 | enum check_states { | 177 | enum check_states { |
177 | check_state_idle = 0, | 178 | check_state_idle = 0, |
178 | check_state_run, /* parity check */ | 179 | check_state_run, /* xor parity check */ |
180 | check_state_run_q, /* q-parity check */ | ||
181 | check_state_run_pq, /* pq dual parity check */ | ||
179 | check_state_check_result, | 182 | check_state_check_result, |
180 | check_state_compute_run, /* parity repair */ | 183 | check_state_compute_run, /* parity repair */ |
181 | check_state_compute_result, | 184 | check_state_compute_result, |
@@ -215,8 +218,8 @@ struct stripe_head { | |||
215 | * @target - STRIPE_OP_COMPUTE_BLK target | 218 | * @target - STRIPE_OP_COMPUTE_BLK target |
216 | */ | 219 | */ |
217 | struct stripe_operations { | 220 | struct stripe_operations { |
218 | int target; | 221 | int target, target2; |
219 | u32 zero_sum_result; | 222 | enum sum_check_flags zero_sum_result; |
220 | } ops; | 223 | } ops; |
221 | struct r5dev { | 224 | struct r5dev { |
222 | struct bio req; | 225 | struct bio req; |
@@ -298,7 +301,7 @@ struct r6_state { | |||
298 | #define STRIPE_OP_COMPUTE_BLK 1 | 301 | #define STRIPE_OP_COMPUTE_BLK 1 |
299 | #define STRIPE_OP_PREXOR 2 | 302 | #define STRIPE_OP_PREXOR 2 |
300 | #define STRIPE_OP_BIODRAIN 3 | 303 | #define STRIPE_OP_BIODRAIN 3 |
301 | #define STRIPE_OP_POSTXOR 4 | 304 | #define STRIPE_OP_RECONSTRUCT 4 |
302 | #define STRIPE_OP_CHECK 5 | 305 | #define STRIPE_OP_CHECK 5 |
303 | 306 | ||
304 | /* | 307 | /* |
@@ -385,8 +388,21 @@ struct raid5_private_data { | |||
385 | * (fresh device added). | 388 | * (fresh device added). |
386 | * Cleared when a sync completes. | 389 | * Cleared when a sync completes. |
387 | */ | 390 | */ |
388 | 391 | /* per cpu variables */ | |
389 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 392 | struct raid5_percpu { |
393 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | ||
394 | void *scribble; /* space for constructing buffer | ||
395 | * lists and performing address | ||
396 | * conversions | ||
397 | */ | ||
398 | } *percpu; | ||
399 | size_t scribble_len; /* size of scribble region must be | ||
400 | * associated with conf to handle | ||
401 | * cpu hotplug while reshaping | ||
402 | */ | ||
403 | #ifdef CONFIG_HOTPLUG_CPU | ||
404 | struct notifier_block cpu_notify; | ||
405 | #endif | ||
390 | 406 | ||
391 | /* | 407 | /* |
392 | * Free stripes pool | 408 | * Free stripes pool |
diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h index 895e2efca8a9..01fc70484743 100644 --- a/drivers/media/dvb/dvb-core/dvbdev.h +++ b/drivers/media/dvb/dvb-core/dvbdev.h | |||
@@ -31,10 +31,9 @@ | |||
31 | #define DVB_MAJOR 212 | 31 | #define DVB_MAJOR 212 |
32 | 32 | ||
33 | #if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0 | 33 | #if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0 |
34 | #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS | 34 | #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS |
35 | #else | 35 | #else |
36 | #warning invalid CONFIG_DVB_MAX_ADAPTERS value | 36 | #define DVB_MAX_ADAPTERS 8 |
37 | #define DVB_MAX_ADAPTERS 8 | ||
38 | #endif | 37 | #endif |
39 | 38 | ||
40 | #define DVB_UNSET (-1) | 39 | #define DVB_UNSET (-1) |
diff --git a/drivers/media/dvb/dvb-usb/Kconfig b/drivers/media/dvb/dvb-usb/Kconfig index 0e4b97fba384..9744b0692417 100644 --- a/drivers/media/dvb/dvb-usb/Kconfig +++ b/drivers/media/dvb/dvb-usb/Kconfig | |||
@@ -75,7 +75,7 @@ config DVB_USB_DIB0700 | |||
75 | select DVB_DIB3000MC if !DVB_FE_CUSTOMISE | 75 | select DVB_DIB3000MC if !DVB_FE_CUSTOMISE |
76 | select DVB_S5H1411 if !DVB_FE_CUSTOMISE | 76 | select DVB_S5H1411 if !DVB_FE_CUSTOMISE |
77 | select DVB_LGDT3305 if !DVB_FE_CUSTOMISE | 77 | select DVB_LGDT3305 if !DVB_FE_CUSTOMISE |
78 | select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE | 78 | select DVB_TUNER_DIB0070 |
79 | select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE | 79 | select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE |
80 | select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE | 80 | select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE |
81 | select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE | 81 | select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE |
diff --git a/drivers/media/video/saa7164/saa7164-api.c b/drivers/media/video/saa7164/saa7164-api.c index bb6df1b276be..6f094a96ac81 100644 --- a/drivers/media/video/saa7164/saa7164-api.c +++ b/drivers/media/video/saa7164/saa7164-api.c | |||
@@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev) | |||
415 | goto out; | 415 | goto out; |
416 | } | 416 | } |
417 | 417 | ||
418 | if (debug & DBGLVL_API) | 418 | if (saa_debug & DBGLVL_API) |
419 | saa7164_dumphex16(dev, buf, (buflen/16)*16); | 419 | saa7164_dumphex16(dev, buf, (buflen/16)*16); |
420 | 420 | ||
421 | saa7164_api_dump_subdevs(dev, buf, buflen); | 421 | saa7164_api_dump_subdevs(dev, buf, buflen); |
@@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg, | |||
480 | 480 | ||
481 | dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len); | 481 | dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len); |
482 | 482 | ||
483 | if (debug & DBGLVL_I2C) | 483 | if (saa_debug & DBGLVL_I2C) |
484 | saa7164_dumphex16(dev, buf, 2 * 16); | 484 | saa7164_dumphex16(dev, buf, 2 * 16); |
485 | 485 | ||
486 | ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR, | 486 | ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR, |
@@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg, | |||
488 | if (ret != SAA_OK) | 488 | if (ret != SAA_OK) |
489 | printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret); | 489 | printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret); |
490 | else { | 490 | else { |
491 | if (debug & DBGLVL_I2C) | 491 | if (saa_debug & DBGLVL_I2C) |
492 | saa7164_dumphex16(dev, buf, sizeof(buf)); | 492 | saa7164_dumphex16(dev, buf, sizeof(buf)); |
493 | memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen); | 493 | memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen); |
494 | } | 494 | } |
@@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen, | |||
548 | *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen; | 548 | *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen; |
549 | memcpy((buf + 2 * sizeof(u32)), data, datalen); | 549 | memcpy((buf + 2 * sizeof(u32)), data, datalen); |
550 | 550 | ||
551 | if (debug & DBGLVL_I2C) | 551 | if (saa_debug & DBGLVL_I2C) |
552 | saa7164_dumphex16(dev, buf, sizeof(buf)); | 552 | saa7164_dumphex16(dev, buf, sizeof(buf)); |
553 | 553 | ||
554 | ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR, | 554 | ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR, |
diff --git a/drivers/media/video/saa7164/saa7164-cmd.c b/drivers/media/video/saa7164/saa7164-cmd.c index e097f1a0969a..c45966edc0cf 100644 --- a/drivers/media/video/saa7164/saa7164-cmd.c +++ b/drivers/media/video/saa7164/saa7164-cmd.c | |||
@@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno) | |||
250 | unsigned long stamp; | 250 | unsigned long stamp; |
251 | int r; | 251 | int r; |
252 | 252 | ||
253 | if (debug >= 4) | 253 | if (saa_debug >= 4) |
254 | saa7164_bus_dump(dev); | 254 | saa7164_bus_dump(dev); |
255 | 255 | ||
256 | dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno); | 256 | dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno); |
diff --git a/drivers/media/video/saa7164/saa7164-core.c b/drivers/media/video/saa7164/saa7164-core.c index f0dbead188c8..709affc31042 100644 --- a/drivers/media/video/saa7164/saa7164-core.c +++ b/drivers/media/video/saa7164/saa7164-core.c | |||
@@ -45,8 +45,8 @@ MODULE_LICENSE("GPL"); | |||
45 | 32 bus | 45 | 32 bus |
46 | */ | 46 | */ |
47 | 47 | ||
48 | unsigned int debug; | 48 | unsigned int saa_debug; |
49 | module_param(debug, int, 0644); | 49 | module_param_named(debug, saa_debug, int, 0644); |
50 | MODULE_PARM_DESC(debug, "enable debug messages"); | 50 | MODULE_PARM_DESC(debug, "enable debug messages"); |
51 | 51 | ||
52 | unsigned int waitsecs = 10; | 52 | unsigned int waitsecs = 10; |
@@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev, | |||
653 | printk(KERN_ERR "%s() Unsupported board detected, " | 653 | printk(KERN_ERR "%s() Unsupported board detected, " |
654 | "registering without firmware\n", __func__); | 654 | "registering without firmware\n", __func__); |
655 | 655 | ||
656 | dprintk(1, "%s() parameter debug = %d\n", __func__, debug); | 656 | dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug); |
657 | dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs); | 657 | dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs); |
658 | 658 | ||
659 | fail_fw: | 659 | fail_fw: |
diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h index 6753008a9c9b..42660b546f0e 100644 --- a/drivers/media/video/saa7164/saa7164.h +++ b/drivers/media/video/saa7164/saa7164.h | |||
@@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port, | |||
375 | 375 | ||
376 | /* ----------------------------------------------------------- */ | 376 | /* ----------------------------------------------------------- */ |
377 | 377 | ||
378 | extern unsigned int debug; | 378 | extern unsigned int saa_debug; |
379 | #define dprintk(level, fmt, arg...)\ | 379 | #define dprintk(level, fmt, arg...)\ |
380 | do { if (debug & level)\ | 380 | do { if (saa_debug & level)\ |
381 | printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\ | 381 | printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\ |
382 | } while (0) | 382 | } while (0) |
383 | 383 | ||
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index a5b448ea4eab..b3bf1c44d74d 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c | |||
@@ -339,9 +339,9 @@ static int h_memstick_read_dev_id(struct memstick_dev *card, | |||
339 | card->id.type = id_reg.type; | 339 | card->id.type = id_reg.type; |
340 | card->id.category = id_reg.category; | 340 | card->id.category = id_reg.category; |
341 | card->id.class = id_reg.class; | 341 | card->id.class = id_reg.class; |
342 | dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode); | ||
342 | } | 343 | } |
343 | complete(&card->mrq_complete); | 344 | complete(&card->mrq_complete); |
344 | dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode); | ||
345 | return -EAGAIN; | 345 | return -EAGAIN; |
346 | } | 346 | } |
347 | } | 347 | } |
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 79689b10f937..766e21e15574 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c | |||
@@ -937,6 +937,8 @@ static int quicktest1(unsigned long arg) | |||
937 | 937 | ||
938 | /* Need 1K cacheline aligned that does not cross page boundary */ | 938 | /* Need 1K cacheline aligned that does not cross page boundary */ |
939 | p = kmalloc(4096, 0); | 939 | p = kmalloc(4096, 0); |
940 | if (p == NULL) | ||
941 | return -ENOMEM; | ||
940 | mq = ALIGNUP(p, 1024); | 942 | mq = ALIGNUP(p, 1024); |
941 | memset(mes, 0xee, sizeof(mes)); | 943 | memset(mes, 0xee, sizeof(mes)); |
942 | dw = mq; | 944 | dw = mq; |
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 9cbf95bedce6..ccd4408a26c7 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c | |||
@@ -340,10 +340,9 @@ static struct proc_dir_entry *proc_gru __read_mostly; | |||
340 | 340 | ||
341 | static int create_proc_file(struct proc_entry *p) | 341 | static int create_proc_file(struct proc_entry *p) |
342 | { | 342 | { |
343 | p->entry = create_proc_entry(p->name, p->mode, proc_gru); | 343 | p->entry = proc_create(p->name, p->mode, proc_gru, p->fops); |
344 | if (!p->entry) | 344 | if (!p->entry) |
345 | return -1; | 345 | return -1; |
346 | p->entry->proc_fops = p->fops; | ||
347 | return 0; | 346 | return 0; |
348 | } | 347 | } |
349 | 348 | ||
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 065fa818be57..fc25586b7ee1 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c | |||
@@ -599,6 +599,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) | |||
599 | struct scatterlist *sg; | 599 | struct scatterlist *sg; |
600 | unsigned int i; | 600 | unsigned int i; |
601 | enum dma_data_direction direction; | 601 | enum dma_data_direction direction; |
602 | unsigned int sglen; | ||
602 | 603 | ||
603 | /* | 604 | /* |
604 | * We don't do DMA on "complex" transfers, i.e. with | 605 | * We don't do DMA on "complex" transfers, i.e. with |
@@ -628,11 +629,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) | |||
628 | else | 629 | else |
629 | direction = DMA_TO_DEVICE; | 630 | direction = DMA_TO_DEVICE; |
630 | 631 | ||
632 | sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction); | ||
633 | if (sglen != data->sg_len) | ||
634 | goto unmap_exit; | ||
631 | desc = chan->device->device_prep_slave_sg(chan, | 635 | desc = chan->device->device_prep_slave_sg(chan, |
632 | data->sg, data->sg_len, direction, | 636 | data->sg, data->sg_len, direction, |
633 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); | 637 | DMA_PREP_INTERRUPT | DMA_CTRL_ACK); |
634 | if (!desc) | 638 | if (!desc) |
635 | return -ENOMEM; | 639 | goto unmap_exit; |
636 | 640 | ||
637 | host->dma.data_desc = desc; | 641 | host->dma.data_desc = desc; |
638 | desc->callback = atmci_dma_complete; | 642 | desc->callback = atmci_dma_complete; |
@@ -643,6 +647,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data) | |||
643 | chan->device->device_issue_pending(chan); | 647 | chan->device->device_issue_pending(chan); |
644 | 648 | ||
645 | return 0; | 649 | return 0; |
650 | unmap_exit: | ||
651 | dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction); | ||
652 | return -ENOMEM; | ||
646 | } | 653 | } |
647 | 654 | ||
648 | #else /* CONFIG_MMC_ATMELMCI_DMA */ | 655 | #else /* CONFIG_MMC_ATMELMCI_DMA */ |
diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c index 2ab1d59870f4..a8b689635a3b 100644 --- a/drivers/net/wireless/arlan-proc.c +++ b/drivers/net/wireless/arlan-proc.c | |||
@@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev) | |||
402 | 402 | ||
403 | static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0"; | 403 | static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0"; |
404 | 404 | ||
405 | static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp, | 405 | static int arlan_sysctl_info(ctl_table * ctl, int write, |
406 | void __user *buffer, size_t * lenp, loff_t *ppos) | 406 | void __user *buffer, size_t * lenp, loff_t *ppos) |
407 | { | 407 | { |
408 | int i; | 408 | int i; |
@@ -629,7 +629,7 @@ final: | |||
629 | *lenp = pos; | 629 | *lenp = pos; |
630 | 630 | ||
631 | if (!write) | 631 | if (!write) |
632 | retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 632 | retv = proc_dostring(ctl, write, buffer, lenp, ppos); |
633 | else | 633 | else |
634 | { | 634 | { |
635 | *lenp = 0; | 635 | *lenp = 0; |
@@ -639,7 +639,7 @@ final: | |||
639 | } | 639 | } |
640 | 640 | ||
641 | 641 | ||
642 | static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp, | 642 | static int arlan_sysctl_info161719(ctl_table * ctl, int write, |
643 | void __user *buffer, size_t * lenp, loff_t *ppos) | 643 | void __user *buffer, size_t * lenp, loff_t *ppos) |
644 | { | 644 | { |
645 | int i; | 645 | int i; |
@@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp | |||
669 | 669 | ||
670 | final: | 670 | final: |
671 | *lenp = pos; | 671 | *lenp = pos; |
672 | retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 672 | retv = proc_dostring(ctl, write, buffer, lenp, ppos); |
673 | return retv; | 673 | return retv; |
674 | } | 674 | } |
675 | 675 | ||
676 | static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp, | 676 | static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, |
677 | void __user *buffer, size_t * lenp, loff_t *ppos) | 677 | void __user *buffer, size_t * lenp, loff_t *ppos) |
678 | { | 678 | { |
679 | int i; | 679 | int i; |
@@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp | |||
698 | SARLBNpln(u_char, txBuffer, 0x800); | 698 | SARLBNpln(u_char, txBuffer, 0x800); |
699 | final: | 699 | final: |
700 | *lenp = pos; | 700 | *lenp = pos; |
701 | retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 701 | retv = proc_dostring(ctl, write, buffer, lenp, ppos); |
702 | return retv; | 702 | return retv; |
703 | } | 703 | } |
704 | 704 | ||
705 | static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp, | 705 | static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, |
706 | void __user *buffer, size_t * lenp, loff_t *ppos) | 706 | void __user *buffer, size_t * lenp, loff_t *ppos) |
707 | { | 707 | { |
708 | int i; | 708 | int i; |
@@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp | |||
726 | SARLBNpln(u_char, rxBuffer, 0x800); | 726 | SARLBNpln(u_char, rxBuffer, 0x800); |
727 | final: | 727 | final: |
728 | *lenp = pos; | 728 | *lenp = pos; |
729 | retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 729 | retv = proc_dostring(ctl, write, buffer, lenp, ppos); |
730 | return retv; | 730 | return retv; |
731 | } | 731 | } |
732 | 732 | ||
733 | static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, | 733 | static int arlan_sysctl_info18(ctl_table * ctl, int write, |
734 | void __user *buffer, size_t * lenp, loff_t *ppos) | 734 | void __user *buffer, size_t * lenp, loff_t *ppos) |
735 | { | 735 | { |
736 | int i; | 736 | int i; |
@@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp, | |||
756 | 756 | ||
757 | final: | 757 | final: |
758 | *lenp = pos; | 758 | *lenp = pos; |
759 | retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 759 | retv = proc_dostring(ctl, write, buffer, lenp, ppos); |
760 | return retv; | 760 | return retv; |
761 | } | 761 | } |
762 | 762 | ||
@@ -766,7 +766,7 @@ final: | |||
766 | 766 | ||
767 | static char conf_reset_result[200]; | 767 | static char conf_reset_result[200]; |
768 | 768 | ||
769 | static int arlan_configure(ctl_table * ctl, int write, struct file *filp, | 769 | static int arlan_configure(ctl_table * ctl, int write, |
770 | void __user *buffer, size_t * lenp, loff_t *ppos) | 770 | void __user *buffer, size_t * lenp, loff_t *ppos) |
771 | { | 771 | { |
772 | int pos = 0; | 772 | int pos = 0; |
@@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp, | |||
788 | return -1; | 788 | return -1; |
789 | 789 | ||
790 | *lenp = pos; | 790 | *lenp = pos; |
791 | return proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 791 | return proc_dostring(ctl, write, buffer, lenp, ppos); |
792 | } | 792 | } |
793 | 793 | ||
794 | static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, | 794 | static int arlan_sysctl_reset(ctl_table * ctl, int write, |
795 | void __user *buffer, size_t * lenp, loff_t *ppos) | 795 | void __user *buffer, size_t * lenp, loff_t *ppos) |
796 | { | 796 | { |
797 | int pos = 0; | 797 | int pos = 0; |
@@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp, | |||
811 | } else | 811 | } else |
812 | return -1; | 812 | return -1; |
813 | *lenp = pos + 3; | 813 | *lenp = pos + 3; |
814 | return proc_dostring(ctl, write, filp, buffer, lenp, ppos); | 814 | return proc_dostring(ctl, write, buffer, lenp, ppos); |
815 | } | 815 | } |
816 | 816 | ||
817 | 817 | ||
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c index 554e11f9e1ce..8eefe56f1cbe 100644 --- a/drivers/parport/procfs.c +++ b/drivers/parport/procfs.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #define PARPORT_MIN_SPINTIME_VALUE 1 | 31 | #define PARPORT_MIN_SPINTIME_VALUE 1 |
32 | #define PARPORT_MAX_SPINTIME_VALUE 1000 | 32 | #define PARPORT_MAX_SPINTIME_VALUE 1000 |
33 | 33 | ||
34 | static int do_active_device(ctl_table *table, int write, struct file *filp, | 34 | static int do_active_device(ctl_table *table, int write, |
35 | void __user *result, size_t *lenp, loff_t *ppos) | 35 | void __user *result, size_t *lenp, loff_t *ppos) |
36 | { | 36 | { |
37 | struct parport *port = (struct parport *)table->extra1; | 37 | struct parport *port = (struct parport *)table->extra1; |
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp, | |||
68 | } | 68 | } |
69 | 69 | ||
70 | #ifdef CONFIG_PARPORT_1284 | 70 | #ifdef CONFIG_PARPORT_1284 |
71 | static int do_autoprobe(ctl_table *table, int write, struct file *filp, | 71 | static int do_autoprobe(ctl_table *table, int write, |
72 | void __user *result, size_t *lenp, loff_t *ppos) | 72 | void __user *result, size_t *lenp, loff_t *ppos) |
73 | { | 73 | { |
74 | struct parport_device_info *info = table->extra2; | 74 | struct parport_device_info *info = table->extra2; |
@@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp, | |||
111 | #endif /* IEEE1284.3 support. */ | 111 | #endif /* IEEE1284.3 support. */ |
112 | 112 | ||
113 | static int do_hardware_base_addr (ctl_table *table, int write, | 113 | static int do_hardware_base_addr (ctl_table *table, int write, |
114 | struct file *filp, void __user *result, | 114 | void __user *result, |
115 | size_t *lenp, loff_t *ppos) | 115 | size_t *lenp, loff_t *ppos) |
116 | { | 116 | { |
117 | struct parport *port = (struct parport *)table->extra1; | 117 | struct parport *port = (struct parport *)table->extra1; |
@@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write, | |||
139 | } | 139 | } |
140 | 140 | ||
141 | static int do_hardware_irq (ctl_table *table, int write, | 141 | static int do_hardware_irq (ctl_table *table, int write, |
142 | struct file *filp, void __user *result, | 142 | void __user *result, |
143 | size_t *lenp, loff_t *ppos) | 143 | size_t *lenp, loff_t *ppos) |
144 | { | 144 | { |
145 | struct parport *port = (struct parport *)table->extra1; | 145 | struct parport *port = (struct parport *)table->extra1; |
@@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write, | |||
167 | } | 167 | } |
168 | 168 | ||
169 | static int do_hardware_dma (ctl_table *table, int write, | 169 | static int do_hardware_dma (ctl_table *table, int write, |
170 | struct file *filp, void __user *result, | 170 | void __user *result, |
171 | size_t *lenp, loff_t *ppos) | 171 | size_t *lenp, loff_t *ppos) |
172 | { | 172 | { |
173 | struct parport *port = (struct parport *)table->extra1; | 173 | struct parport *port = (struct parport *)table->extra1; |
@@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write, | |||
195 | } | 195 | } |
196 | 196 | ||
197 | static int do_hardware_modes (ctl_table *table, int write, | 197 | static int do_hardware_modes (ctl_table *table, int write, |
198 | struct file *filp, void __user *result, | 198 | void __user *result, |
199 | size_t *lenp, loff_t *ppos) | 199 | size_t *lenp, loff_t *ppos) |
200 | { | 200 | { |
201 | struct parport *port = (struct parport *)table->extra1; | 201 | struct parport *port = (struct parport *)table->extra1; |
diff --git a/drivers/staging/go7007/Makefile b/drivers/staging/go7007/Makefile index d14ea84a01f6..1301caa7495d 100644 --- a/drivers/staging/go7007/Makefile +++ b/drivers/staging/go7007/Makefile | |||
@@ -32,8 +32,3 @@ endif | |||
32 | 32 | ||
33 | EXTRA_CFLAGS += -Idrivers/media/dvb/frontends | 33 | EXTRA_CFLAGS += -Idrivers/media/dvb/frontends |
34 | EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core | 34 | EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core |
35 | |||
36 | # Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too | ||
37 | ifeq ($(CONFIG_SND),) | ||
38 | EXTRA_CFLAGS += -include sound/config.h | ||
39 | endif | ||
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 68fa0e43b781..8c075b2416bb 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c | |||
@@ -912,6 +912,7 @@ static void sierra_release(struct usb_serial *serial) | |||
912 | } | 912 | } |
913 | } | 913 | } |
914 | 914 | ||
915 | #ifdef CONFIG_PM | ||
915 | static void stop_read_write_urbs(struct usb_serial *serial) | 916 | static void stop_read_write_urbs(struct usb_serial *serial) |
916 | { | 917 | { |
917 | int i, j; | 918 | int i, j; |
@@ -988,6 +989,10 @@ static int sierra_resume(struct usb_serial *serial) | |||
988 | 989 | ||
989 | return ec ? -EIO : 0; | 990 | return ec ? -EIO : 0; |
990 | } | 991 | } |
992 | #else | ||
993 | #define sierra_suspend NULL | ||
994 | #define sierra_resume NULL | ||
995 | #endif | ||
991 | 996 | ||
992 | static struct usb_serial_driver sierra_device = { | 997 | static struct usb_serial_driver sierra_device = { |
993 | .driver = { | 998 | .driver = { |
diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c index ba3d71f5c7d0..9554ad5f9af7 100644 --- a/drivers/vlynq/vlynq.c +++ b/drivers/vlynq/vlynq.c | |||
@@ -702,7 +702,7 @@ static int vlynq_probe(struct platform_device *pdev) | |||
702 | dev->mem_start = mem_res->start; | 702 | dev->mem_start = mem_res->start; |
703 | dev->mem_end = mem_res->end; | 703 | dev->mem_end = mem_res->end; |
704 | 704 | ||
705 | len = regs_res->end - regs_res->start; | 705 | len = resource_size(regs_res); |
706 | if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) { | 706 | if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) { |
707 | printk(KERN_ERR "%s: Can't request vlynq registers\n", | 707 | printk(KERN_ERR "%s: Can't request vlynq registers\n", |
708 | dev_name(&dev->dev)); | 708 | dev_name(&dev->dev)); |
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 798cb071d132..3f57ce4bee5d 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -19,9 +19,6 @@ static int | |||
19 | adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, | 19 | adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, |
20 | int create) | 20 | int create) |
21 | { | 21 | { |
22 | if (block < 0) | ||
23 | goto abort_negative; | ||
24 | |||
25 | if (!create) { | 22 | if (!create) { |
26 | if (block >= inode->i_blocks) | 23 | if (block >= inode->i_blocks) |
27 | goto abort_toobig; | 24 | goto abort_toobig; |
@@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh, | |||
34 | /* don't support allocation of blocks yet */ | 31 | /* don't support allocation of blocks yet */ |
35 | return -EIO; | 32 | return -EIO; |
36 | 33 | ||
37 | abort_negative: | ||
38 | adfs_error(inode->i_sb, "block %d < 0", block); | ||
39 | return -EIO; | ||
40 | |||
41 | abort_toobig: | 34 | abort_toobig: |
42 | return 0; | 35 | return 0; |
43 | } | 36 | } |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 442d94fe255c..b9b3bb51b1e4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -1711,42 +1711,52 @@ struct elf_note_info { | |||
1711 | int numnote; | 1711 | int numnote; |
1712 | }; | 1712 | }; |
1713 | 1713 | ||
1714 | static int fill_note_info(struct elfhdr *elf, int phdrs, | 1714 | static int elf_note_info_init(struct elf_note_info *info) |
1715 | struct elf_note_info *info, | ||
1716 | long signr, struct pt_regs *regs) | ||
1717 | { | 1715 | { |
1718 | #define NUM_NOTES 6 | 1716 | memset(info, 0, sizeof(*info)); |
1719 | struct list_head *t; | ||
1720 | |||
1721 | info->notes = NULL; | ||
1722 | info->prstatus = NULL; | ||
1723 | info->psinfo = NULL; | ||
1724 | info->fpu = NULL; | ||
1725 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1726 | info->xfpu = NULL; | ||
1727 | #endif | ||
1728 | INIT_LIST_HEAD(&info->thread_list); | 1717 | INIT_LIST_HEAD(&info->thread_list); |
1729 | 1718 | ||
1730 | info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), | 1719 | /* Allocate space for six ELF notes */ |
1731 | GFP_KERNEL); | 1720 | info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL); |
1732 | if (!info->notes) | 1721 | if (!info->notes) |
1733 | return 0; | 1722 | return 0; |
1734 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); | 1723 | info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); |
1735 | if (!info->psinfo) | 1724 | if (!info->psinfo) |
1736 | return 0; | 1725 | goto notes_free; |
1737 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); | 1726 | info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); |
1738 | if (!info->prstatus) | 1727 | if (!info->prstatus) |
1739 | return 0; | 1728 | goto psinfo_free; |
1740 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); | 1729 | info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); |
1741 | if (!info->fpu) | 1730 | if (!info->fpu) |
1742 | return 0; | 1731 | goto prstatus_free; |
1743 | #ifdef ELF_CORE_COPY_XFPREGS | 1732 | #ifdef ELF_CORE_COPY_XFPREGS |
1744 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); | 1733 | info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); |
1745 | if (!info->xfpu) | 1734 | if (!info->xfpu) |
1746 | return 0; | 1735 | goto fpu_free; |
1736 | #endif | ||
1737 | return 1; | ||
1738 | #ifdef ELF_CORE_COPY_XFPREGS | ||
1739 | fpu_free: | ||
1740 | kfree(info->fpu); | ||
1747 | #endif | 1741 | #endif |
1742 | prstatus_free: | ||
1743 | kfree(info->prstatus); | ||
1744 | psinfo_free: | ||
1745 | kfree(info->psinfo); | ||
1746 | notes_free: | ||
1747 | kfree(info->notes); | ||
1748 | return 0; | ||
1749 | } | ||
1750 | |||
1751 | static int fill_note_info(struct elfhdr *elf, int phdrs, | ||
1752 | struct elf_note_info *info, | ||
1753 | long signr, struct pt_regs *regs) | ||
1754 | { | ||
1755 | struct list_head *t; | ||
1756 | |||
1757 | if (!elf_note_info_init(info)) | ||
1758 | return 0; | ||
1748 | 1759 | ||
1749 | info->thread_status_size = 0; | ||
1750 | if (signr) { | 1760 | if (signr) { |
1751 | struct core_thread *ct; | 1761 | struct core_thread *ct; |
1752 | struct elf_thread_status *ets; | 1762 | struct elf_thread_status *ets; |
@@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, | |||
1806 | #endif | 1816 | #endif |
1807 | 1817 | ||
1808 | return 1; | 1818 | return 1; |
1809 | |||
1810 | #undef NUM_NOTES | ||
1811 | } | 1819 | } |
1812 | 1820 | ||
1813 | static size_t get_note_info_size(struct elf_note_info *info) | 1821 | static size_t get_note_info_size(struct elf_note_info *info) |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 76285471073e..38502c67987c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, | |||
283 | } | 283 | } |
284 | 284 | ||
285 | stack_size = exec_params.stack_size; | 285 | stack_size = exec_params.stack_size; |
286 | if (stack_size < interp_params.stack_size) | ||
287 | stack_size = interp_params.stack_size; | ||
288 | |||
289 | if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | 286 | if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) |
290 | executable_stack = EXSTACK_ENABLE_X; | 287 | executable_stack = EXSTACK_ENABLE_X; |
291 | else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | 288 | else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) |
292 | executable_stack = EXSTACK_DISABLE_X; | 289 | executable_stack = EXSTACK_DISABLE_X; |
293 | else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | ||
294 | executable_stack = EXSTACK_ENABLE_X; | ||
295 | else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | ||
296 | executable_stack = EXSTACK_DISABLE_X; | ||
297 | else | 290 | else |
298 | executable_stack = EXSTACK_DEFAULT; | 291 | executable_stack = EXSTACK_DEFAULT; |
299 | 292 | ||
293 | if (stack_size == 0) { | ||
294 | stack_size = interp_params.stack_size; | ||
295 | if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK) | ||
296 | executable_stack = EXSTACK_ENABLE_X; | ||
297 | else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK) | ||
298 | executable_stack = EXSTACK_DISABLE_X; | ||
299 | else | ||
300 | executable_stack = EXSTACK_DEFAULT; | ||
301 | } | ||
302 | |||
300 | retval = -ENOEXEC; | 303 | retval = -ENOEXEC; |
301 | if (stack_size == 0) | 304 | if (stack_size == 0) |
302 | goto error; | 305 | goto error; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index e92f229e3c6e..a2796651e756 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -278,8 +278,6 @@ static int decompress_exec( | |||
278 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); | 278 | ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); |
279 | if (ret <= 0) | 279 | if (ret <= 0) |
280 | break; | 280 | break; |
281 | if (ret >= (unsigned long) -4096) | ||
282 | break; | ||
283 | len -= ret; | 281 | len -= ret; |
284 | 282 | ||
285 | strm.next_in = buf; | 283 | strm.next_in = buf; |
@@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp) | |||
335 | "(%d != %d)", (unsigned) r, curid, id); | 333 | "(%d != %d)", (unsigned) r, curid, id); |
336 | goto failed; | 334 | goto failed; |
337 | } else if ( ! p->lib_list[id].loaded && | 335 | } else if ( ! p->lib_list[id].loaded && |
338 | load_flat_shared_library(id, p) > (unsigned long) -4096) { | 336 | IS_ERR_VALUE(load_flat_shared_library(id, p))) { |
339 | printk("BINFMT_FLAT: failed to load library %d", id); | 337 | printk("BINFMT_FLAT: failed to load library %d", id); |
340 | goto failed; | 338 | goto failed; |
341 | } | 339 | } |
@@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
545 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, | 543 | textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, |
546 | MAP_PRIVATE|MAP_EXECUTABLE, 0); | 544 | MAP_PRIVATE|MAP_EXECUTABLE, 0); |
547 | up_write(¤t->mm->mmap_sem); | 545 | up_write(¤t->mm->mmap_sem); |
548 | if (!textpos || textpos >= (unsigned long) -4096) { | 546 | if (!textpos || IS_ERR_VALUE(textpos)) { |
549 | if (!textpos) | 547 | if (!textpos) |
550 | textpos = (unsigned long) -ENOMEM; | 548 | textpos = (unsigned long) -ENOMEM; |
551 | printk("Unable to mmap process text, errno %d\n", (int)-textpos); | 549 | printk("Unable to mmap process text, errno %d\n", (int)-textpos); |
@@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
560 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); | 558 | PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); |
561 | up_write(¤t->mm->mmap_sem); | 559 | up_write(¤t->mm->mmap_sem); |
562 | 560 | ||
563 | if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { | 561 | if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) { |
564 | if (!realdatastart) | 562 | if (!realdatastart) |
565 | realdatastart = (unsigned long) -ENOMEM; | 563 | realdatastart = (unsigned long) -ENOMEM; |
566 | printk("Unable to allocate RAM for process data, errno %d\n", | 564 | printk("Unable to allocate RAM for process data, errno %d\n", |
@@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
587 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | 585 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, |
588 | data_len + (relocs * sizeof(unsigned long)), &fpos); | 586 | data_len + (relocs * sizeof(unsigned long)), &fpos); |
589 | } | 587 | } |
590 | if (result >= (unsigned long)-4096) { | 588 | if (IS_ERR_VALUE(result)) { |
591 | printk("Unable to read data+bss, errno %d\n", (int)-result); | 589 | printk("Unable to read data+bss, errno %d\n", (int)-result); |
592 | do_munmap(current->mm, textpos, text_len); | 590 | do_munmap(current->mm, textpos, text_len); |
593 | do_munmap(current->mm, realdatastart, data_len + extra); | 591 | do_munmap(current->mm, realdatastart, data_len + extra); |
@@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
607 | PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); | 605 | PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); |
608 | up_write(¤t->mm->mmap_sem); | 606 | up_write(¤t->mm->mmap_sem); |
609 | 607 | ||
610 | if (!textpos || textpos >= (unsigned long) -4096) { | 608 | if (!textpos || IS_ERR_VALUE(textpos)) { |
611 | if (!textpos) | 609 | if (!textpos) |
612 | textpos = (unsigned long) -ENOMEM; | 610 | textpos = (unsigned long) -ENOMEM; |
613 | printk("Unable to allocate RAM for process text/data, errno %d\n", | 611 | printk("Unable to allocate RAM for process text/data, errno %d\n", |
@@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
641 | fpos = 0; | 639 | fpos = 0; |
642 | result = bprm->file->f_op->read(bprm->file, | 640 | result = bprm->file->f_op->read(bprm->file, |
643 | (char *) textpos, text_len, &fpos); | 641 | (char *) textpos, text_len, &fpos); |
644 | if (result < (unsigned long) -4096) | 642 | if (!IS_ERR_VALUE(result)) |
645 | result = decompress_exec(bprm, text_len, (char *) datapos, | 643 | result = decompress_exec(bprm, text_len, (char *) datapos, |
646 | data_len + (relocs * sizeof(unsigned long)), 0); | 644 | data_len + (relocs * sizeof(unsigned long)), 0); |
647 | } | 645 | } |
@@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm, | |||
651 | fpos = 0; | 649 | fpos = 0; |
652 | result = bprm->file->f_op->read(bprm->file, | 650 | result = bprm->file->f_op->read(bprm->file, |
653 | (char *) textpos, text_len, &fpos); | 651 | (char *) textpos, text_len, &fpos); |
654 | if (result < (unsigned long) -4096) { | 652 | if (!IS_ERR_VALUE(result)) { |
655 | fpos = ntohl(hdr->data_start); | 653 | fpos = ntohl(hdr->data_start); |
656 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, | 654 | result = bprm->file->f_op->read(bprm->file, (char *) datapos, |
657 | data_len + (relocs * sizeof(unsigned long)), &fpos); | 655 | data_len + (relocs * sizeof(unsigned long)), &fpos); |
658 | } | 656 | } |
659 | } | 657 | } |
660 | if (result >= (unsigned long)-4096) { | 658 | if (IS_ERR_VALUE(result)) { |
661 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); | 659 | printk("Unable to read code+data+bss, errno %d\n",(int)-result); |
662 | do_munmap(current->mm, textpos, text_len + data_len + extra + | 660 | do_munmap(current->mm, textpos, text_len + data_len + extra + |
663 | MAX_SHARED_LIBS * sizeof(unsigned long)); | 661 | MAX_SHARED_LIBS * sizeof(unsigned long)); |
@@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
835 | 833 | ||
836 | res = prepare_binprm(&bprm); | 834 | res = prepare_binprm(&bprm); |
837 | 835 | ||
838 | if (res <= (unsigned long)-4096) | 836 | if (!IS_ERR_VALUE(res)) |
839 | res = load_flat_file(&bprm, libs, id, NULL); | 837 | res = load_flat_file(&bprm, libs, id, NULL); |
840 | 838 | ||
841 | abort_creds(bprm.cred); | 839 | abort_creds(bprm.cred); |
@@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
880 | stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ | 878 | stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ |
881 | 879 | ||
882 | res = load_flat_file(bprm, &libinfo, 0, &stack_len); | 880 | res = load_flat_file(bprm, &libinfo, 0, &stack_len); |
883 | if (res > (unsigned long)-4096) | 881 | if (IS_ERR_VALUE(res)) |
884 | return res; | 882 | return res; |
885 | 883 | ||
886 | /* Update data segment pointers for all libraries */ | 884 | /* Update data segment pointers for all libraries */ |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9096fd0ca3ca..d154a3f365d5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5269,6 +5269,7 @@ static const struct address_space_operations btrfs_aops = { | |||
5269 | .invalidatepage = btrfs_invalidatepage, | 5269 | .invalidatepage = btrfs_invalidatepage, |
5270 | .releasepage = btrfs_releasepage, | 5270 | .releasepage = btrfs_releasepage, |
5271 | .set_page_dirty = btrfs_set_page_dirty, | 5271 | .set_page_dirty = btrfs_set_page_dirty, |
5272 | .error_remove_page = generic_error_remove_page, | ||
5272 | }; | 5273 | }; |
5273 | 5274 | ||
5274 | static const struct address_space_operations btrfs_symlink_aops = { | 5275 | static const struct address_space_operations btrfs_symlink_aops = { |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cbc57f932d2..d6db933df2b2 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
264 | { | 264 | { |
265 | struct char_device_struct *cd; | 265 | struct char_device_struct *cd; |
266 | struct cdev *cdev; | 266 | struct cdev *cdev; |
267 | char *s; | ||
268 | int err = -ENOMEM; | 267 | int err = -ENOMEM; |
269 | 268 | ||
270 | cd = __register_chrdev_region(major, baseminor, count, name); | 269 | cd = __register_chrdev_region(major, baseminor, count, name); |
@@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor, | |||
278 | cdev->owner = fops->owner; | 277 | cdev->owner = fops->owner; |
279 | cdev->ops = fops; | 278 | cdev->ops = fops; |
280 | kobject_set_name(&cdev->kobj, "%s", name); | 279 | kobject_set_name(&cdev->kobj, "%s", name); |
281 | for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) | ||
282 | *s = '!'; | ||
283 | 280 | ||
284 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); | 281 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); |
285 | if (err) | 282 | if (err) |
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index 8ccd5ed81d9c..d99860a33890 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _CODA_INT_ | 2 | #define _CODA_INT_ |
3 | 3 | ||
4 | struct dentry; | 4 | struct dentry; |
5 | struct file; | ||
5 | 6 | ||
6 | extern struct file_system_type coda_fs_type; | 7 | extern struct file_system_type coda_fs_type; |
7 | extern unsigned long coda_timeout; | 8 | extern unsigned long coda_timeout; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index a2edb7913447..31f4b0e6d72c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -63,9 +63,9 @@ static void drop_slab(void) | |||
63 | } | 63 | } |
64 | 64 | ||
65 | int drop_caches_sysctl_handler(ctl_table *table, int write, | 65 | int drop_caches_sysctl_handler(ctl_table *table, int write, |
66 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 66 | void __user *buffer, size_t *length, loff_t *ppos) |
67 | { | 67 | { |
68 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 68 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
69 | if (write) { | 69 | if (write) { |
70 | if (sysctl_drop_caches & 1) | 70 | if (sysctl_drop_caches & 1) |
71 | drop_pagecache(); | 71 | drop_pagecache(); |
@@ -55,6 +55,7 @@ | |||
55 | #include <linux/kmod.h> | 55 | #include <linux/kmod.h> |
56 | #include <linux/fsnotify.h> | 56 | #include <linux/fsnotify.h> |
57 | #include <linux/fs_struct.h> | 57 | #include <linux/fs_struct.h> |
58 | #include <linux/pipe_fs_i.h> | ||
58 | 59 | ||
59 | #include <asm/uaccess.h> | 60 | #include <asm/uaccess.h> |
60 | #include <asm/mmu_context.h> | 61 | #include <asm/mmu_context.h> |
@@ -63,6 +64,7 @@ | |||
63 | 64 | ||
64 | int core_uses_pid; | 65 | int core_uses_pid; |
65 | char core_pattern[CORENAME_MAX_SIZE] = "core"; | 66 | char core_pattern[CORENAME_MAX_SIZE] = "core"; |
67 | unsigned int core_pipe_limit; | ||
66 | int suid_dumpable = 0; | 68 | int suid_dumpable = 0; |
67 | 69 | ||
68 | /* The maximal length of core_pattern is also specified in sysctl.c */ | 70 | /* The maximal length of core_pattern is also specified in sysctl.c */ |
@@ -1393,18 +1395,16 @@ out_ret: | |||
1393 | return retval; | 1395 | return retval; |
1394 | } | 1396 | } |
1395 | 1397 | ||
1396 | int set_binfmt(struct linux_binfmt *new) | 1398 | void set_binfmt(struct linux_binfmt *new) |
1397 | { | 1399 | { |
1398 | struct linux_binfmt *old = current->binfmt; | 1400 | struct mm_struct *mm = current->mm; |
1399 | 1401 | ||
1400 | if (new) { | 1402 | if (mm->binfmt) |
1401 | if (!try_module_get(new->module)) | 1403 | module_put(mm->binfmt->module); |
1402 | return -1; | 1404 | |
1403 | } | 1405 | mm->binfmt = new; |
1404 | current->binfmt = new; | 1406 | if (new) |
1405 | if (old) | 1407 | __module_get(new->module); |
1406 | module_put(old->module); | ||
1407 | return 0; | ||
1408 | } | 1408 | } |
1409 | 1409 | ||
1410 | EXPORT_SYMBOL(set_binfmt); | 1410 | EXPORT_SYMBOL(set_binfmt); |
@@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm) | |||
1728 | return (ret >= 2) ? 2 : ret; | 1728 | return (ret >= 2) ? 2 : ret; |
1729 | } | 1729 | } |
1730 | 1730 | ||
1731 | static void wait_for_dump_helpers(struct file *file) | ||
1732 | { | ||
1733 | struct pipe_inode_info *pipe; | ||
1734 | |||
1735 | pipe = file->f_path.dentry->d_inode->i_pipe; | ||
1736 | |||
1737 | pipe_lock(pipe); | ||
1738 | pipe->readers++; | ||
1739 | pipe->writers--; | ||
1740 | |||
1741 | while ((pipe->readers > 1) && (!signal_pending(current))) { | ||
1742 | wake_up_interruptible_sync(&pipe->wait); | ||
1743 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
1744 | pipe_wait(pipe); | ||
1745 | } | ||
1746 | |||
1747 | pipe->readers--; | ||
1748 | pipe->writers++; | ||
1749 | pipe_unlock(pipe); | ||
1750 | |||
1751 | } | ||
1752 | |||
1753 | |||
1731 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | 1754 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) |
1732 | { | 1755 | { |
1733 | struct core_state core_state; | 1756 | struct core_state core_state; |
@@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1744 | unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; | 1767 | unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; |
1745 | char **helper_argv = NULL; | 1768 | char **helper_argv = NULL; |
1746 | int helper_argc = 0; | 1769 | int helper_argc = 0; |
1747 | char *delimit; | 1770 | int dump_count = 0; |
1771 | static atomic_t core_dump_count = ATOMIC_INIT(0); | ||
1748 | 1772 | ||
1749 | audit_core_dumps(signr); | 1773 | audit_core_dumps(signr); |
1750 | 1774 | ||
1751 | binfmt = current->binfmt; | 1775 | binfmt = mm->binfmt; |
1752 | if (!binfmt || !binfmt->core_dump) | 1776 | if (!binfmt || !binfmt->core_dump) |
1753 | goto fail; | 1777 | goto fail; |
1754 | 1778 | ||
@@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1799 | lock_kernel(); | 1823 | lock_kernel(); |
1800 | ispipe = format_corename(corename, signr); | 1824 | ispipe = format_corename(corename, signr); |
1801 | unlock_kernel(); | 1825 | unlock_kernel(); |
1802 | /* | 1826 | |
1803 | * Don't bother to check the RLIMIT_CORE value if core_pattern points | ||
1804 | * to a pipe. Since we're not writing directly to the filesystem | ||
1805 | * RLIMIT_CORE doesn't really apply, as no actual core file will be | ||
1806 | * created unless the pipe reader choses to write out the core file | ||
1807 | * at which point file size limits and permissions will be imposed | ||
1808 | * as it does with any other process | ||
1809 | */ | ||
1810 | if ((!ispipe) && (core_limit < binfmt->min_coredump)) | 1827 | if ((!ispipe) && (core_limit < binfmt->min_coredump)) |
1811 | goto fail_unlock; | 1828 | goto fail_unlock; |
1812 | 1829 | ||
1813 | if (ispipe) { | 1830 | if (ispipe) { |
1831 | if (core_limit == 0) { | ||
1832 | /* | ||
1833 | * Normally core limits are irrelevant to pipes, since | ||
1834 | * we're not writing to the file system, but we use | ||
1835 | * core_limit of 0 here as a speacial value. Any | ||
1836 | * non-zero limit gets set to RLIM_INFINITY below, but | ||
1837 | * a limit of 0 skips the dump. This is a consistent | ||
1838 | * way to catch recursive crashes. We can still crash | ||
1839 | * if the core_pattern binary sets RLIM_CORE = !0 | ||
1840 | * but it runs as root, and can do lots of stupid things | ||
1841 | * Note that we use task_tgid_vnr here to grab the pid | ||
1842 | * of the process group leader. That way we get the | ||
1843 | * right pid if a thread in a multi-threaded | ||
1844 | * core_pattern process dies. | ||
1845 | */ | ||
1846 | printk(KERN_WARNING | ||
1847 | "Process %d(%s) has RLIMIT_CORE set to 0\n", | ||
1848 | task_tgid_vnr(current), current->comm); | ||
1849 | printk(KERN_WARNING "Aborting core\n"); | ||
1850 | goto fail_unlock; | ||
1851 | } | ||
1852 | |||
1853 | dump_count = atomic_inc_return(&core_dump_count); | ||
1854 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | ||
1855 | printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n", | ||
1856 | task_tgid_vnr(current), current->comm); | ||
1857 | printk(KERN_WARNING "Skipping core dump\n"); | ||
1858 | goto fail_dropcount; | ||
1859 | } | ||
1860 | |||
1814 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); | 1861 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); |
1815 | if (!helper_argv) { | 1862 | if (!helper_argv) { |
1816 | printk(KERN_WARNING "%s failed to allocate memory\n", | 1863 | printk(KERN_WARNING "%s failed to allocate memory\n", |
1817 | __func__); | 1864 | __func__); |
1818 | goto fail_unlock; | 1865 | goto fail_dropcount; |
1819 | } | ||
1820 | /* Terminate the string before the first option */ | ||
1821 | delimit = strchr(corename, ' '); | ||
1822 | if (delimit) | ||
1823 | *delimit = '\0'; | ||
1824 | delimit = strrchr(helper_argv[0], '/'); | ||
1825 | if (delimit) | ||
1826 | delimit++; | ||
1827 | else | ||
1828 | delimit = helper_argv[0]; | ||
1829 | if (!strcmp(delimit, current->comm)) { | ||
1830 | printk(KERN_NOTICE "Recursive core dump detected, " | ||
1831 | "aborting\n"); | ||
1832 | goto fail_unlock; | ||
1833 | } | 1866 | } |
1834 | 1867 | ||
1835 | core_limit = RLIM_INFINITY; | 1868 | core_limit = RLIM_INFINITY; |
1836 | 1869 | ||
1837 | /* SIGPIPE can happen, but it's just never processed */ | 1870 | /* SIGPIPE can happen, but it's just never processed */ |
1838 | if (call_usermodehelper_pipe(corename+1, helper_argv, NULL, | 1871 | if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, |
1839 | &file)) { | 1872 | &file)) { |
1840 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 1873 | printk(KERN_INFO "Core dump to %s pipe failed\n", |
1841 | corename); | 1874 | corename); |
1842 | goto fail_unlock; | 1875 | goto fail_dropcount; |
1843 | } | 1876 | } |
1844 | } else | 1877 | } else |
1845 | file = filp_open(corename, | 1878 | file = filp_open(corename, |
1846 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | 1879 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, |
1847 | 0600); | 1880 | 0600); |
1848 | if (IS_ERR(file)) | 1881 | if (IS_ERR(file)) |
1849 | goto fail_unlock; | 1882 | goto fail_dropcount; |
1850 | inode = file->f_path.dentry->d_inode; | 1883 | inode = file->f_path.dentry->d_inode; |
1851 | if (inode->i_nlink > 1) | 1884 | if (inode->i_nlink > 1) |
1852 | goto close_fail; /* multiple links - don't dump */ | 1885 | goto close_fail; /* multiple links - don't dump */ |
@@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1875 | if (retval) | 1908 | if (retval) |
1876 | current->signal->group_exit_code |= 0x80; | 1909 | current->signal->group_exit_code |= 0x80; |
1877 | close_fail: | 1910 | close_fail: |
1911 | if (ispipe && core_pipe_limit) | ||
1912 | wait_for_dump_helpers(file); | ||
1878 | filp_close(file, NULL); | 1913 | filp_close(file, NULL); |
1914 | fail_dropcount: | ||
1915 | if (dump_count) | ||
1916 | atomic_dec(&core_dump_count); | ||
1879 | fail_unlock: | 1917 | fail_unlock: |
1880 | if (helper_argv) | 1918 | if (helper_argv) |
1881 | argv_free(helper_argv); | 1919 | argv_free(helper_argv); |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 1c1638f873a4..ade634076d0a 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = { | |||
819 | .writepages = ext2_writepages, | 819 | .writepages = ext2_writepages, |
820 | .migratepage = buffer_migrate_page, | 820 | .migratepage = buffer_migrate_page, |
821 | .is_partially_uptodate = block_is_partially_uptodate, | 821 | .is_partially_uptodate = block_is_partially_uptodate, |
822 | .error_remove_page = generic_error_remove_page, | ||
822 | }; | 823 | }; |
823 | 824 | ||
824 | const struct address_space_operations ext2_aops_xip = { | 825 | const struct address_space_operations ext2_aops_xip = { |
@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = { | |||
837 | .direct_IO = ext2_direct_IO, | 838 | .direct_IO = ext2_direct_IO, |
838 | .writepages = ext2_writepages, | 839 | .writepages = ext2_writepages, |
839 | .migratepage = buffer_migrate_page, | 840 | .migratepage = buffer_migrate_page, |
841 | .error_remove_page = generic_error_remove_page, | ||
840 | }; | 842 | }; |
841 | 843 | ||
842 | /* | 844 | /* |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index cd098a7b77fc..acf1b1423327 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = { | |||
1830 | .direct_IO = ext3_direct_IO, | 1830 | .direct_IO = ext3_direct_IO, |
1831 | .migratepage = buffer_migrate_page, | 1831 | .migratepage = buffer_migrate_page, |
1832 | .is_partially_uptodate = block_is_partially_uptodate, | 1832 | .is_partially_uptodate = block_is_partially_uptodate, |
1833 | .error_remove_page = generic_error_remove_page, | ||
1833 | }; | 1834 | }; |
1834 | 1835 | ||
1835 | static const struct address_space_operations ext3_writeback_aops = { | 1836 | static const struct address_space_operations ext3_writeback_aops = { |
@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = { | |||
1845 | .direct_IO = ext3_direct_IO, | 1846 | .direct_IO = ext3_direct_IO, |
1846 | .migratepage = buffer_migrate_page, | 1847 | .migratepage = buffer_migrate_page, |
1847 | .is_partially_uptodate = block_is_partially_uptodate, | 1848 | .is_partially_uptodate = block_is_partially_uptodate, |
1849 | .error_remove_page = generic_error_remove_page, | ||
1848 | }; | 1850 | }; |
1849 | 1851 | ||
1850 | static const struct address_space_operations ext3_journalled_aops = { | 1852 | static const struct address_space_operations ext3_journalled_aops = { |
@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = { | |||
1859 | .invalidatepage = ext3_invalidatepage, | 1861 | .invalidatepage = ext3_invalidatepage, |
1860 | .releasepage = ext3_releasepage, | 1862 | .releasepage = ext3_releasepage, |
1861 | .is_partially_uptodate = block_is_partially_uptodate, | 1863 | .is_partially_uptodate = block_is_partially_uptodate, |
1864 | .error_remove_page = generic_error_remove_page, | ||
1862 | }; | 1865 | }; |
1863 | 1866 | ||
1864 | void ext3_set_aops(struct inode *inode) | 1867 | void ext3_set_aops(struct inode *inode) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3a798737e305..064746fad581 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
3386 | .direct_IO = ext4_direct_IO, | 3386 | .direct_IO = ext4_direct_IO, |
3387 | .migratepage = buffer_migrate_page, | 3387 | .migratepage = buffer_migrate_page, |
3388 | .is_partially_uptodate = block_is_partially_uptodate, | 3388 | .is_partially_uptodate = block_is_partially_uptodate, |
3389 | .error_remove_page = generic_error_remove_page, | ||
3389 | }; | 3390 | }; |
3390 | 3391 | ||
3391 | static const struct address_space_operations ext4_writeback_aops = { | 3392 | static const struct address_space_operations ext4_writeback_aops = { |
@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
3401 | .direct_IO = ext4_direct_IO, | 3402 | .direct_IO = ext4_direct_IO, |
3402 | .migratepage = buffer_migrate_page, | 3403 | .migratepage = buffer_migrate_page, |
3403 | .is_partially_uptodate = block_is_partially_uptodate, | 3404 | .is_partially_uptodate = block_is_partially_uptodate, |
3405 | .error_remove_page = generic_error_remove_page, | ||
3404 | }; | 3406 | }; |
3405 | 3407 | ||
3406 | static const struct address_space_operations ext4_journalled_aops = { | 3408 | static const struct address_space_operations ext4_journalled_aops = { |
@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3415 | .invalidatepage = ext4_invalidatepage, | 3417 | .invalidatepage = ext4_invalidatepage, |
3416 | .releasepage = ext4_releasepage, | 3418 | .releasepage = ext4_releasepage, |
3417 | .is_partially_uptodate = block_is_partially_uptodate, | 3419 | .is_partially_uptodate = block_is_partially_uptodate, |
3420 | .error_remove_page = generic_error_remove_page, | ||
3418 | }; | 3421 | }; |
3419 | 3422 | ||
3420 | static const struct address_space_operations ext4_da_aops = { | 3423 | static const struct address_space_operations ext4_da_aops = { |
@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
3431 | .direct_IO = ext4_direct_IO, | 3434 | .direct_IO = ext4_direct_IO, |
3432 | .migratepage = buffer_migrate_page, | 3435 | .migratepage = buffer_migrate_page, |
3433 | .is_partially_uptodate = block_is_partially_uptodate, | 3436 | .is_partially_uptodate = block_is_partially_uptodate, |
3437 | .error_remove_page = generic_error_remove_page, | ||
3434 | }; | 3438 | }; |
3435 | 3439 | ||
3436 | void ext4_set_aops(struct inode *inode) | 3440 | void ext4_set_aops(struct inode *inode) |
diff --git a/fs/fcntl.c b/fs/fcntl.c index ae413086db97..fc089f2f7f56 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp) | |||
263 | return pid; | 263 | return pid; |
264 | } | 264 | } |
265 | 265 | ||
266 | static int f_setown_ex(struct file *filp, unsigned long arg) | ||
267 | { | ||
268 | struct f_owner_ex * __user owner_p = (void * __user)arg; | ||
269 | struct f_owner_ex owner; | ||
270 | struct pid *pid; | ||
271 | int type; | ||
272 | int ret; | ||
273 | |||
274 | ret = copy_from_user(&owner, owner_p, sizeof(owner)); | ||
275 | if (ret) | ||
276 | return ret; | ||
277 | |||
278 | switch (owner.type) { | ||
279 | case F_OWNER_TID: | ||
280 | type = PIDTYPE_MAX; | ||
281 | break; | ||
282 | |||
283 | case F_OWNER_PID: | ||
284 | type = PIDTYPE_PID; | ||
285 | break; | ||
286 | |||
287 | case F_OWNER_GID: | ||
288 | type = PIDTYPE_PGID; | ||
289 | break; | ||
290 | |||
291 | default: | ||
292 | return -EINVAL; | ||
293 | } | ||
294 | |||
295 | rcu_read_lock(); | ||
296 | pid = find_vpid(owner.pid); | ||
297 | if (owner.pid && !pid) | ||
298 | ret = -ESRCH; | ||
299 | else | ||
300 | ret = __f_setown(filp, pid, type, 1); | ||
301 | rcu_read_unlock(); | ||
302 | |||
303 | return ret; | ||
304 | } | ||
305 | |||
306 | static int f_getown_ex(struct file *filp, unsigned long arg) | ||
307 | { | ||
308 | struct f_owner_ex * __user owner_p = (void * __user)arg; | ||
309 | struct f_owner_ex owner; | ||
310 | int ret = 0; | ||
311 | |||
312 | read_lock(&filp->f_owner.lock); | ||
313 | owner.pid = pid_vnr(filp->f_owner.pid); | ||
314 | switch (filp->f_owner.pid_type) { | ||
315 | case PIDTYPE_MAX: | ||
316 | owner.type = F_OWNER_TID; | ||
317 | break; | ||
318 | |||
319 | case PIDTYPE_PID: | ||
320 | owner.type = F_OWNER_PID; | ||
321 | break; | ||
322 | |||
323 | case PIDTYPE_PGID: | ||
324 | owner.type = F_OWNER_GID; | ||
325 | break; | ||
326 | |||
327 | default: | ||
328 | WARN_ON(1); | ||
329 | ret = -EINVAL; | ||
330 | break; | ||
331 | } | ||
332 | read_unlock(&filp->f_owner.lock); | ||
333 | |||
334 | if (!ret) | ||
335 | ret = copy_to_user(owner_p, &owner, sizeof(owner)); | ||
336 | return ret; | ||
337 | } | ||
338 | |||
266 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | 339 | static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, |
267 | struct file *filp) | 340 | struct file *filp) |
268 | { | 341 | { |
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
313 | case F_SETOWN: | 386 | case F_SETOWN: |
314 | err = f_setown(filp, arg, 1); | 387 | err = f_setown(filp, arg, 1); |
315 | break; | 388 | break; |
389 | case F_GETOWN_EX: | ||
390 | err = f_getown_ex(filp, arg); | ||
391 | break; | ||
392 | case F_SETOWN_EX: | ||
393 | err = f_setown_ex(filp, arg); | ||
394 | break; | ||
316 | case F_GETSIG: | 395 | case F_GETSIG: |
317 | err = filp->f_owner.signum; | 396 | err = filp->f_owner.signum; |
318 | break; | 397 | break; |
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p, | |||
428 | 507 | ||
429 | static void send_sigio_to_task(struct task_struct *p, | 508 | static void send_sigio_to_task(struct task_struct *p, |
430 | struct fown_struct *fown, | 509 | struct fown_struct *fown, |
431 | int fd, | 510 | int fd, int reason, int group) |
432 | int reason) | ||
433 | { | 511 | { |
434 | /* | 512 | /* |
435 | * F_SETSIG can change ->signum lockless in parallel, make | 513 | * F_SETSIG can change ->signum lockless in parallel, make |
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p, | |||
461 | else | 539 | else |
462 | si.si_band = band_table[reason - POLL_IN]; | 540 | si.si_band = band_table[reason - POLL_IN]; |
463 | si.si_fd = fd; | 541 | si.si_fd = fd; |
464 | if (!group_send_sig_info(signum, &si, p)) | 542 | if (!do_send_sig_info(signum, &si, p, group)) |
465 | break; | 543 | break; |
466 | /* fall-through: fall back on the old plain SIGIO signal */ | 544 | /* fall-through: fall back on the old plain SIGIO signal */ |
467 | case 0: | 545 | case 0: |
468 | group_send_sig_info(SIGIO, SEND_SIG_PRIV, p); | 546 | do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group); |
469 | } | 547 | } |
470 | } | 548 | } |
471 | 549 | ||
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band) | |||
474 | struct task_struct *p; | 552 | struct task_struct *p; |
475 | enum pid_type type; | 553 | enum pid_type type; |
476 | struct pid *pid; | 554 | struct pid *pid; |
555 | int group = 1; | ||
477 | 556 | ||
478 | read_lock(&fown->lock); | 557 | read_lock(&fown->lock); |
558 | |||
479 | type = fown->pid_type; | 559 | type = fown->pid_type; |
560 | if (type == PIDTYPE_MAX) { | ||
561 | group = 0; | ||
562 | type = PIDTYPE_PID; | ||
563 | } | ||
564 | |||
480 | pid = fown->pid; | 565 | pid = fown->pid; |
481 | if (!pid) | 566 | if (!pid) |
482 | goto out_unlock_fown; | 567 | goto out_unlock_fown; |
483 | 568 | ||
484 | read_lock(&tasklist_lock); | 569 | read_lock(&tasklist_lock); |
485 | do_each_pid_task(pid, type, p) { | 570 | do_each_pid_task(pid, type, p) { |
486 | send_sigio_to_task(p, fown, fd, band); | 571 | send_sigio_to_task(p, fown, fd, band, group); |
487 | } while_each_pid_task(pid, type, p); | 572 | } while_each_pid_task(pid, type, p); |
488 | read_unlock(&tasklist_lock); | 573 | read_unlock(&tasklist_lock); |
489 | out_unlock_fown: | 574 | out_unlock_fown: |
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band) | |||
491 | } | 576 | } |
492 | 577 | ||
493 | static void send_sigurg_to_task(struct task_struct *p, | 578 | static void send_sigurg_to_task(struct task_struct *p, |
494 | struct fown_struct *fown) | 579 | struct fown_struct *fown, int group) |
495 | { | 580 | { |
496 | if (sigio_perm(p, fown, SIGURG)) | 581 | if (sigio_perm(p, fown, SIGURG)) |
497 | group_send_sig_info(SIGURG, SEND_SIG_PRIV, p); | 582 | do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group); |
498 | } | 583 | } |
499 | 584 | ||
500 | int send_sigurg(struct fown_struct *fown) | 585 | int send_sigurg(struct fown_struct *fown) |
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown) | |||
502 | struct task_struct *p; | 587 | struct task_struct *p; |
503 | enum pid_type type; | 588 | enum pid_type type; |
504 | struct pid *pid; | 589 | struct pid *pid; |
590 | int group = 1; | ||
505 | int ret = 0; | 591 | int ret = 0; |
506 | 592 | ||
507 | read_lock(&fown->lock); | 593 | read_lock(&fown->lock); |
594 | |||
508 | type = fown->pid_type; | 595 | type = fown->pid_type; |
596 | if (type == PIDTYPE_MAX) { | ||
597 | group = 0; | ||
598 | type = PIDTYPE_PID; | ||
599 | } | ||
600 | |||
509 | pid = fown->pid; | 601 | pid = fown->pid; |
510 | if (!pid) | 602 | if (!pid) |
511 | goto out_unlock_fown; | 603 | goto out_unlock_fown; |
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown) | |||
514 | 606 | ||
515 | read_lock(&tasklist_lock); | 607 | read_lock(&tasklist_lock); |
516 | do_each_pid_task(pid, type, p) { | 608 | do_each_pid_task(pid, type, p) { |
517 | send_sigurg_to_task(p, fown); | 609 | send_sigurg_to_task(p, fown, group); |
518 | } while_each_pid_task(pid, type, p); | 610 | } while_each_pid_task(pid, type, p); |
519 | read_unlock(&tasklist_lock); | 611 | read_unlock(&tasklist_lock); |
520 | out_unlock_fown: | 612 | out_unlock_fown: |
diff --git a/fs/file_table.c b/fs/file_table.c index 334ce39881f8..8eb44042e009 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files); | |||
74 | * Handle nr_files sysctl | 74 | * Handle nr_files sysctl |
75 | */ | 75 | */ |
76 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) | 76 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
77 | int proc_nr_files(ctl_table *table, int write, struct file *filp, | 77 | int proc_nr_files(ctl_table *table, int write, |
78 | void __user *buffer, size_t *lenp, loff_t *ppos) | 78 | void __user *buffer, size_t *lenp, loff_t *ppos) |
79 | { | 79 | { |
80 | files_stat.nr_files = get_nr_files(); | 80 | files_stat.nr_files = get_nr_files(); |
81 | return proc_dointvec(table, write, filp, buffer, lenp, ppos); | 81 | return proc_dointvec(table, write, buffer, lenp, ppos); |
82 | } | 82 | } |
83 | #else | 83 | #else |
84 | int proc_nr_files(ctl_table *table, int write, struct file *filp, | 84 | int proc_nr_files(ctl_table *table, int write, |
85 | void __user *buffer, size_t *lenp, loff_t *ppos) | 85 | void __user *buffer, size_t *lenp, loff_t *ppos) |
86 | { | 86 | { |
87 | return -ENOSYS; | 87 | return -ENOSYS; |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 7ebae9a4ecc0..694b5d48f036 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = { | |||
1135 | .direct_IO = gfs2_direct_IO, | 1135 | .direct_IO = gfs2_direct_IO, |
1136 | .migratepage = buffer_migrate_page, | 1136 | .migratepage = buffer_migrate_page, |
1137 | .is_partially_uptodate = block_is_partially_uptodate, | 1137 | .is_partially_uptodate = block_is_partially_uptodate, |
1138 | .error_remove_page = generic_error_remove_page, | ||
1138 | }; | 1139 | }; |
1139 | 1140 | ||
1140 | static const struct address_space_operations gfs2_ordered_aops = { | 1141 | static const struct address_space_operations gfs2_ordered_aops = { |
@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = { | |||
1151 | .direct_IO = gfs2_direct_IO, | 1152 | .direct_IO = gfs2_direct_IO, |
1152 | .migratepage = buffer_migrate_page, | 1153 | .migratepage = buffer_migrate_page, |
1153 | .is_partially_uptodate = block_is_partially_uptodate, | 1154 | .is_partially_uptodate = block_is_partially_uptodate, |
1155 | .error_remove_page = generic_error_remove_page, | ||
1154 | }; | 1156 | }; |
1155 | 1157 | ||
1156 | static const struct address_space_operations gfs2_jdata_aops = { | 1158 | static const struct address_space_operations gfs2_jdata_aops = { |
@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = { | |||
1166 | .invalidatepage = gfs2_invalidatepage, | 1168 | .invalidatepage = gfs2_invalidatepage, |
1167 | .releasepage = gfs2_releasepage, | 1169 | .releasepage = gfs2_releasepage, |
1168 | .is_partially_uptodate = block_is_partially_uptodate, | 1170 | .is_partially_uptodate = block_is_partially_uptodate, |
1171 | .error_remove_page = generic_error_remove_page, | ||
1169 | }; | 1172 | }; |
1170 | 1173 | ||
1171 | void gfs2_set_aops(struct inode *inode) | 1174 | void gfs2_set_aops(struct inode *inode) |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index eba6d552d9c9..133335479c24 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -936,15 +936,9 @@ static struct file_system_type hugetlbfs_fs_type = { | |||
936 | 936 | ||
937 | static struct vfsmount *hugetlbfs_vfsmount; | 937 | static struct vfsmount *hugetlbfs_vfsmount; |
938 | 938 | ||
939 | static int can_do_hugetlb_shm(int creat_flags) | 939 | static int can_do_hugetlb_shm(void) |
940 | { | 940 | { |
941 | if (creat_flags != HUGETLB_SHMFS_INODE) | 941 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); |
942 | return 0; | ||
943 | if (capable(CAP_IPC_LOCK)) | ||
944 | return 1; | ||
945 | if (in_group_p(sysctl_hugetlb_shm_group)) | ||
946 | return 1; | ||
947 | return 0; | ||
948 | } | 942 | } |
949 | 943 | ||
950 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | 944 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, |
@@ -960,7 +954,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | |||
960 | if (!hugetlbfs_vfsmount) | 954 | if (!hugetlbfs_vfsmount) |
961 | return ERR_PTR(-ENOENT); | 955 | return ERR_PTR(-ENOENT); |
962 | 956 | ||
963 | if (!can_do_hugetlb_shm(creat_flags)) { | 957 | if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { |
964 | *user = current_user(); | 958 | *user = current_user(); |
965 | if (user_shm_lock(size, *user)) { | 959 | if (user_shm_lock(size, *user)) { |
966 | WARN_ONCE(1, | 960 | WARN_ONCE(1, |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5021b75d2d1e..86d6b4db1096 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = { | |||
525 | .direct_IO = nfs_direct_IO, | 525 | .direct_IO = nfs_direct_IO, |
526 | .migratepage = nfs_migrate_page, | 526 | .migratepage = nfs_migrate_page, |
527 | .launder_page = nfs_launder_page, | 527 | .launder_page = nfs_launder_page, |
528 | .error_remove_page = generic_error_remove_page, | ||
528 | }; | 529 | }; |
529 | 530 | ||
530 | /* | 531 | /* |
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index b38f944f0667..cfce53cb65d7 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = { | |||
1550 | .migratepage = buffer_migrate_page, /* Move a page cache page from | 1550 | .migratepage = buffer_migrate_page, /* Move a page cache page from |
1551 | one physical page to an | 1551 | one physical page to an |
1552 | other. */ | 1552 | other. */ |
1553 | .error_remove_page = generic_error_remove_page, | ||
1553 | }; | 1554 | }; |
1554 | 1555 | ||
1555 | /** | 1556 | /** |
@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = { | |||
1569 | .migratepage = buffer_migrate_page, /* Move a page cache page from | 1570 | .migratepage = buffer_migrate_page, /* Move a page cache page from |
1570 | one physical page to an | 1571 | one physical page to an |
1571 | other. */ | 1572 | other. */ |
1573 | .error_remove_page = generic_error_remove_page, | ||
1572 | }; | 1574 | }; |
1573 | 1575 | ||
1574 | #ifdef NTFS_RW | 1576 | #ifdef NTFS_RW |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 72e76062a900..deb2b132ae5e 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = { | |||
2022 | .releasepage = ocfs2_releasepage, | 2022 | .releasepage = ocfs2_releasepage, |
2023 | .migratepage = buffer_migrate_page, | 2023 | .migratepage = buffer_migrate_page, |
2024 | .is_partially_uptodate = block_is_partially_uptodate, | 2024 | .is_partially_uptodate = block_is_partially_uptodate, |
2025 | .error_remove_page = generic_error_remove_page, | ||
2025 | }; | 2026 | }; |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 171e052c07b3..c7bff4f603ff 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
97 | "Committed_AS: %8lu kB\n" | 97 | "Committed_AS: %8lu kB\n" |
98 | "VmallocTotal: %8lu kB\n" | 98 | "VmallocTotal: %8lu kB\n" |
99 | "VmallocUsed: %8lu kB\n" | 99 | "VmallocUsed: %8lu kB\n" |
100 | "VmallocChunk: %8lu kB\n", | 100 | "VmallocChunk: %8lu kB\n" |
101 | #ifdef CONFIG_MEMORY_FAILURE | ||
102 | "HardwareCorrupted: %8lu kB\n" | ||
103 | #endif | ||
104 | , | ||
101 | K(i.totalram), | 105 | K(i.totalram), |
102 | K(i.freeram), | 106 | K(i.freeram), |
103 | K(i.bufferram), | 107 | K(i.bufferram), |
@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
144 | (unsigned long)VMALLOC_TOTAL >> 10, | 148 | (unsigned long)VMALLOC_TOTAL >> 10, |
145 | vmi.used >> 10, | 149 | vmi.used >> 10, |
146 | vmi.largest_chunk >> 10 | 150 | vmi.largest_chunk >> 10 |
151 | #ifdef CONFIG_MEMORY_FAILURE | ||
152 | ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10) | ||
153 | #endif | ||
147 | ); | 154 | ); |
148 | 155 | ||
149 | hugetlb_report_meminfo(m); | 156 | hugetlb_report_meminfo(m); |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 9b1e4e9a16bf..f667e8aeabdf 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, | |||
153 | 153 | ||
154 | /* careful: calling conventions are nasty here */ | 154 | /* careful: calling conventions are nasty here */ |
155 | res = count; | 155 | res = count; |
156 | error = table->proc_handler(table, write, filp, buf, &res, ppos); | 156 | error = table->proc_handler(table, write, buf, &res, ppos); |
157 | if (!error) | 157 | if (!error) |
158 | error = res; | 158 | error = res; |
159 | out: | 159 | out: |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 47f132df0c3f..c117fa80d1e9 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) | |||
528 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; | 528 | pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK; |
529 | 529 | ||
530 | root = romfs_iget(sb, pos); | 530 | root = romfs_iget(sb, pos); |
531 | if (!root) | 531 | if (IS_ERR(root)) |
532 | goto error; | 532 | goto error; |
533 | 533 | ||
534 | sb->s_root = d_alloc_root(root); | 534 | sb->s_root = d_alloc_root(root); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d5e5559e31db..381854461b28 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1635 | .direct_IO = xfs_vm_direct_IO, | 1635 | .direct_IO = xfs_vm_direct_IO, |
1636 | .migratepage = buffer_migrate_page, | 1636 | .migratepage = buffer_migrate_page, |
1637 | .is_partially_uptodate = block_is_partially_uptodate, | 1637 | .is_partially_uptodate = block_is_partially_uptodate, |
1638 | .error_remove_page = generic_error_remove_page, | ||
1638 | }; | 1639 | }; |
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 916c0ffb6083..c5bc67c4e3bb 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c | |||
@@ -26,7 +26,6 @@ STATIC int | |||
26 | xfs_stats_clear_proc_handler( | 26 | xfs_stats_clear_proc_handler( |
27 | ctl_table *ctl, | 27 | ctl_table *ctl, |
28 | int write, | 28 | int write, |
29 | struct file *filp, | ||
30 | void __user *buffer, | 29 | void __user *buffer, |
31 | size_t *lenp, | 30 | size_t *lenp, |
32 | loff_t *ppos) | 31 | loff_t *ppos) |
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler( | |||
34 | int c, ret, *valp = ctl->data; | 33 | int c, ret, *valp = ctl->data; |
35 | __uint32_t vn_active; | 34 | __uint32_t vn_active; |
36 | 35 | ||
37 | ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos); | 36 | ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); |
38 | 37 | ||
39 | if (!ret && write && *valp) { | 38 | if (!ret && write && *valp) { |
40 | printk("XFS Clearing xfsstats\n"); | 39 | printk("XFS Clearing xfsstats\n"); |
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index 4d3e48373e74..0c3dd8603927 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h | |||
@@ -73,6 +73,19 @@ | |||
73 | #define F_SETSIG 10 /* for sockets. */ | 73 | #define F_SETSIG 10 /* for sockets. */ |
74 | #define F_GETSIG 11 /* for sockets. */ | 74 | #define F_GETSIG 11 /* for sockets. */ |
75 | #endif | 75 | #endif |
76 | #ifndef F_SETOWN_EX | ||
77 | #define F_SETOWN_EX 12 | ||
78 | #define F_GETOWN_EX 13 | ||
79 | #endif | ||
80 | |||
81 | #define F_OWNER_TID 0 | ||
82 | #define F_OWNER_PID 1 | ||
83 | #define F_OWNER_GID 2 | ||
84 | |||
85 | struct f_owner_ex { | ||
86 | int type; | ||
87 | pid_t pid; | ||
88 | }; | ||
76 | 89 | ||
77 | /* for F_[GET|SET]FL */ | 90 | /* for F_[GET|SET]FL */ |
78 | #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ | 91 | #define FD_CLOEXEC 1 /* actually anything with low bit set goes */ |
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index dd63bd38864b..5ee13b2fd223 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h | |||
@@ -34,6 +34,7 @@ | |||
34 | #define MADV_REMOVE 9 /* remove these pages & resources */ | 34 | #define MADV_REMOVE 9 /* remove these pages & resources */ |
35 | #define MADV_DONTFORK 10 /* don't inherit across fork */ | 35 | #define MADV_DONTFORK 10 /* don't inherit across fork */ |
36 | #define MADV_DOFORK 11 /* do inherit across fork */ | 36 | #define MADV_DOFORK 11 /* do inherit across fork */ |
37 | #define MADV_HWPOISON 100 /* poison a page for testing */ | ||
37 | 38 | ||
38 | #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ | 39 | #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ |
39 | #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ | 40 | #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ |
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index c840719a8c59..942d30b5aab1 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h | |||
@@ -82,6 +82,7 @@ typedef struct siginfo { | |||
82 | #ifdef __ARCH_SI_TRAPNO | 82 | #ifdef __ARCH_SI_TRAPNO |
83 | int _trapno; /* TRAP # which caused the signal */ | 83 | int _trapno; /* TRAP # which caused the signal */ |
84 | #endif | 84 | #endif |
85 | short _addr_lsb; /* LSB of the reported address */ | ||
85 | } _sigfault; | 86 | } _sigfault; |
86 | 87 | ||
87 | /* SIGPOLL */ | 88 | /* SIGPOLL */ |
@@ -112,6 +113,7 @@ typedef struct siginfo { | |||
112 | #ifdef __ARCH_SI_TRAPNO | 113 | #ifdef __ARCH_SI_TRAPNO |
113 | #define si_trapno _sifields._sigfault._trapno | 114 | #define si_trapno _sifields._sigfault._trapno |
114 | #endif | 115 | #endif |
116 | #define si_addr_lsb _sifields._sigfault._addr_lsb | ||
115 | #define si_band _sifields._sigpoll._band | 117 | #define si_band _sifields._sigpoll._band |
116 | #define si_fd _sifields._sigpoll._fd | 118 | #define si_fd _sifields._sigpoll._fd |
117 | 119 | ||
@@ -192,7 +194,11 @@ typedef struct siginfo { | |||
192 | #define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ | 194 | #define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ |
193 | #define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ | 195 | #define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ |
194 | #define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ | 196 | #define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ |
195 | #define NSIGBUS 3 | 197 | /* hardware memory error consumed on a machine check: action required */ |
198 | #define BUS_MCEERR_AR (__SI_FAULT|4) | ||
199 | /* hardware memory error detected in process but not consumed: action optional*/ | ||
200 | #define BUS_MCEERR_AO (__SI_FAULT|5) | ||
201 | #define NSIGBUS 5 | ||
196 | 202 | ||
197 | /* | 203 | /* |
198 | * SIGTRAP si_codes | 204 | * SIGTRAP si_codes |
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5fc2ef8d97fa..a1c486a88e88 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h | |||
@@ -58,25 +58,60 @@ struct dma_chan_ref { | |||
58 | * array. | 58 | * array. |
59 | * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a | 59 | * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a |
60 | * dependency chain | 60 | * dependency chain |
61 | * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. | 61 | * @ASYNC_TX_FENCE: specify that the next operation in the dependency |
62 | * chain uses this operation's result as an input | ||
62 | */ | 63 | */ |
63 | enum async_tx_flags { | 64 | enum async_tx_flags { |
64 | ASYNC_TX_XOR_ZERO_DST = (1 << 0), | 65 | ASYNC_TX_XOR_ZERO_DST = (1 << 0), |
65 | ASYNC_TX_XOR_DROP_DST = (1 << 1), | 66 | ASYNC_TX_XOR_DROP_DST = (1 << 1), |
66 | ASYNC_TX_ACK = (1 << 3), | 67 | ASYNC_TX_ACK = (1 << 2), |
67 | ASYNC_TX_DEP_ACK = (1 << 4), | 68 | ASYNC_TX_FENCE = (1 << 3), |
69 | }; | ||
70 | |||
71 | /** | ||
72 | * struct async_submit_ctl - async_tx submission/completion modifiers | ||
73 | * @flags: submission modifiers | ||
74 | * @depend_tx: parent dependency of the current operation being submitted | ||
75 | * @cb_fn: callback routine to run at operation completion | ||
76 | * @cb_param: parameter for the callback routine | ||
77 | * @scribble: caller provided space for dma/page address conversions | ||
78 | */ | ||
79 | struct async_submit_ctl { | ||
80 | enum async_tx_flags flags; | ||
81 | struct dma_async_tx_descriptor *depend_tx; | ||
82 | dma_async_tx_callback cb_fn; | ||
83 | void *cb_param; | ||
84 | void *scribble; | ||
68 | }; | 85 | }; |
69 | 86 | ||
70 | #ifdef CONFIG_DMA_ENGINE | 87 | #ifdef CONFIG_DMA_ENGINE |
71 | #define async_tx_issue_pending_all dma_issue_pending_all | 88 | #define async_tx_issue_pending_all dma_issue_pending_all |
89 | |||
90 | /** | ||
91 | * async_tx_issue_pending - send pending descriptor to the hardware channel | ||
92 | * @tx: descriptor handle to retrieve hardware context | ||
93 | * | ||
94 | * Note: any dependent operations will have already been issued by | ||
95 | * async_tx_channel_switch, or (in the case of no channel switch) will | ||
96 | * be already pending on this channel. | ||
97 | */ | ||
98 | static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) | ||
99 | { | ||
100 | if (likely(tx)) { | ||
101 | struct dma_chan *chan = tx->chan; | ||
102 | struct dma_device *dma = chan->device; | ||
103 | |||
104 | dma->device_issue_pending(chan); | ||
105 | } | ||
106 | } | ||
72 | #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL | 107 | #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL |
73 | #include <asm/async_tx.h> | 108 | #include <asm/async_tx.h> |
74 | #else | 109 | #else |
75 | #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ | 110 | #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \ |
76 | __async_tx_find_channel(dep, type) | 111 | __async_tx_find_channel(dep, type) |
77 | struct dma_chan * | 112 | struct dma_chan * |
78 | __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | 113 | __async_tx_find_channel(struct async_submit_ctl *submit, |
79 | enum dma_transaction_type tx_type); | 114 | enum dma_transaction_type tx_type); |
80 | #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ | 115 | #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */ |
81 | #else | 116 | #else |
82 | static inline void async_tx_issue_pending_all(void) | 117 | static inline void async_tx_issue_pending_all(void) |
@@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void) | |||
84 | do { } while (0); | 119 | do { } while (0); |
85 | } | 120 | } |
86 | 121 | ||
122 | static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx) | ||
123 | { | ||
124 | do { } while (0); | ||
125 | } | ||
126 | |||
87 | static inline struct dma_chan * | 127 | static inline struct dma_chan * |
88 | async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | 128 | async_tx_find_channel(struct async_submit_ctl *submit, |
89 | enum dma_transaction_type tx_type, struct page **dst, int dst_count, | 129 | enum dma_transaction_type tx_type, struct page **dst, |
90 | struct page **src, int src_count, size_t len) | 130 | int dst_count, struct page **src, int src_count, |
131 | size_t len) | ||
91 | { | 132 | { |
92 | return NULL; | 133 | return NULL; |
93 | } | 134 | } |
@@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, | |||
99 | * @cb_fn_param: parameter to pass to the callback routine | 140 | * @cb_fn_param: parameter to pass to the callback routine |
100 | */ | 141 | */ |
101 | static inline void | 142 | static inline void |
102 | async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) | 143 | async_tx_sync_epilog(struct async_submit_ctl *submit) |
103 | { | 144 | { |
104 | if (cb_fn) | 145 | if (submit->cb_fn) |
105 | cb_fn(cb_fn_param); | 146 | submit->cb_fn(submit->cb_param); |
106 | } | 147 | } |
107 | 148 | ||
108 | void | 149 | typedef union { |
109 | async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | 150 | unsigned long addr; |
110 | enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, | 151 | struct page *page; |
111 | dma_async_tx_callback cb_fn, void *cb_fn_param); | 152 | dma_addr_t dma; |
153 | } addr_conv_t; | ||
154 | |||
155 | static inline void | ||
156 | init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags, | ||
157 | struct dma_async_tx_descriptor *tx, | ||
158 | dma_async_tx_callback cb_fn, void *cb_param, | ||
159 | addr_conv_t *scribble) | ||
160 | { | ||
161 | args->flags = flags; | ||
162 | args->depend_tx = tx; | ||
163 | args->cb_fn = cb_fn; | ||
164 | args->cb_param = cb_param; | ||
165 | args->scribble = scribble; | ||
166 | } | ||
167 | |||
168 | void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, | ||
169 | struct async_submit_ctl *submit); | ||
112 | 170 | ||
113 | struct dma_async_tx_descriptor * | 171 | struct dma_async_tx_descriptor * |
114 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, | 172 | async_xor(struct page *dest, struct page **src_list, unsigned int offset, |
115 | int src_cnt, size_t len, enum async_tx_flags flags, | 173 | int src_cnt, size_t len, struct async_submit_ctl *submit); |
116 | struct dma_async_tx_descriptor *depend_tx, | ||
117 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
118 | 174 | ||
119 | struct dma_async_tx_descriptor * | 175 | struct dma_async_tx_descriptor * |
120 | async_xor_zero_sum(struct page *dest, struct page **src_list, | 176 | async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, |
121 | unsigned int offset, int src_cnt, size_t len, | 177 | int src_cnt, size_t len, enum sum_check_flags *result, |
122 | u32 *result, enum async_tx_flags flags, | 178 | struct async_submit_ctl *submit); |
123 | struct dma_async_tx_descriptor *depend_tx, | ||
124 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
125 | 179 | ||
126 | struct dma_async_tx_descriptor * | 180 | struct dma_async_tx_descriptor * |
127 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, | 181 | async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, |
128 | unsigned int src_offset, size_t len, enum async_tx_flags flags, | 182 | unsigned int src_offset, size_t len, |
129 | struct dma_async_tx_descriptor *depend_tx, | 183 | struct async_submit_ctl *submit); |
130 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
131 | 184 | ||
132 | struct dma_async_tx_descriptor * | 185 | struct dma_async_tx_descriptor * |
133 | async_memset(struct page *dest, int val, unsigned int offset, | 186 | async_memset(struct page *dest, int val, unsigned int offset, |
134 | size_t len, enum async_tx_flags flags, | 187 | size_t len, struct async_submit_ctl *submit); |
135 | struct dma_async_tx_descriptor *depend_tx, | 188 | |
136 | dma_async_tx_callback cb_fn, void *cb_fn_param); | 189 | struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); |
190 | |||
191 | struct dma_async_tx_descriptor * | ||
192 | async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, | ||
193 | size_t len, struct async_submit_ctl *submit); | ||
194 | |||
195 | struct dma_async_tx_descriptor * | ||
196 | async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, | ||
197 | size_t len, enum sum_check_flags *pqres, struct page *spare, | ||
198 | struct async_submit_ctl *submit); | ||
199 | |||
200 | struct dma_async_tx_descriptor * | ||
201 | async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, | ||
202 | struct page **ptrs, struct async_submit_ctl *submit); | ||
137 | 203 | ||
138 | struct dma_async_tx_descriptor * | 204 | struct dma_async_tx_descriptor * |
139 | async_trigger_callback(enum async_tx_flags flags, | 205 | async_raid6_datap_recov(int src_num, size_t bytes, int faila, |
140 | struct dma_async_tx_descriptor *depend_tx, | 206 | struct page **ptrs, struct async_submit_ctl *submit); |
141 | dma_async_tx_callback cb_fn, void *cb_fn_param); | ||
142 | 207 | ||
143 | void async_tx_quiesce(struct dma_async_tx_descriptor **tx); | 208 | void async_tx_quiesce(struct dma_async_tx_descriptor **tx); |
144 | #endif /* _ASYNC_TX_H_ */ | 209 | #endif /* _ASYNC_TX_H_ */ |
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 2046b5b8af48..aece486ac734 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h | |||
@@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); | |||
120 | extern int prepare_bprm_creds(struct linux_binprm *bprm); | 120 | extern int prepare_bprm_creds(struct linux_binprm *bprm); |
121 | extern void install_exec_creds(struct linux_binprm *bprm); | 121 | extern void install_exec_creds(struct linux_binprm *bprm); |
122 | extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); | 122 | extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); |
123 | extern int set_binfmt(struct linux_binfmt *new); | 123 | extern void set_binfmt(struct linux_binfmt *new); |
124 | extern void free_bprm(struct linux_binprm *); | 124 | extern void free_bprm(struct linux_binprm *); |
125 | 125 | ||
126 | #endif /* __KERNEL__ */ | 126 | #endif /* __KERNEL__ */ |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 90bba9e62286..b62bb9294d0c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -141,6 +141,38 @@ enum { | |||
141 | CGRP_WAIT_ON_RMDIR, | 141 | CGRP_WAIT_ON_RMDIR, |
142 | }; | 142 | }; |
143 | 143 | ||
144 | /* which pidlist file are we talking about? */ | ||
145 | enum cgroup_filetype { | ||
146 | CGROUP_FILE_PROCS, | ||
147 | CGROUP_FILE_TASKS, | ||
148 | }; | ||
149 | |||
150 | /* | ||
151 | * A pidlist is a list of pids that virtually represents the contents of one | ||
152 | * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists, | ||
153 | * a pair (one each for procs, tasks) for each pid namespace that's relevant | ||
154 | * to the cgroup. | ||
155 | */ | ||
156 | struct cgroup_pidlist { | ||
157 | /* | ||
158 | * used to find which pidlist is wanted. doesn't change as long as | ||
159 | * this particular list stays in the list. | ||
160 | */ | ||
161 | struct { enum cgroup_filetype type; struct pid_namespace *ns; } key; | ||
162 | /* array of xids */ | ||
163 | pid_t *list; | ||
164 | /* how many elements the above list has */ | ||
165 | int length; | ||
166 | /* how many files are using the current array */ | ||
167 | int use_count; | ||
168 | /* each of these stored in a list by its cgroup */ | ||
169 | struct list_head links; | ||
170 | /* pointer to the cgroup we belong to, for list removal purposes */ | ||
171 | struct cgroup *owner; | ||
172 | /* protects the other fields */ | ||
173 | struct rw_semaphore mutex; | ||
174 | }; | ||
175 | |||
144 | struct cgroup { | 176 | struct cgroup { |
145 | unsigned long flags; /* "unsigned long" so bitops work */ | 177 | unsigned long flags; /* "unsigned long" so bitops work */ |
146 | 178 | ||
@@ -179,11 +211,12 @@ struct cgroup { | |||
179 | */ | 211 | */ |
180 | struct list_head release_list; | 212 | struct list_head release_list; |
181 | 213 | ||
182 | /* pids_mutex protects pids_list and cached pid arrays. */ | 214 | /* |
183 | struct rw_semaphore pids_mutex; | 215 | * list of pidlists, up to two for each namespace (one for procs, one |
184 | 216 | * for tasks); created on demand. | |
185 | /* Linked list of struct cgroup_pids */ | 217 | */ |
186 | struct list_head pids_list; | 218 | struct list_head pidlists; |
219 | struct mutex pidlist_mutex; | ||
187 | 220 | ||
188 | /* For RCU-protected deletion */ | 221 | /* For RCU-protected deletion */ |
189 | struct rcu_head rcu_head; | 222 | struct rcu_head rcu_head; |
@@ -227,6 +260,9 @@ struct css_set { | |||
227 | * during subsystem registration (at boot time). | 260 | * during subsystem registration (at boot time). |
228 | */ | 261 | */ |
229 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | 262 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
263 | |||
264 | /* For RCU-protected deletion */ | ||
265 | struct rcu_head rcu_head; | ||
230 | }; | 266 | }; |
231 | 267 | ||
232 | /* | 268 | /* |
@@ -389,10 +425,11 @@ struct cgroup_subsys { | |||
389 | struct cgroup *cgrp); | 425 | struct cgroup *cgrp); |
390 | int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); | 426 | int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
391 | void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); | 427 | void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); |
392 | int (*can_attach)(struct cgroup_subsys *ss, | 428 | int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, |
393 | struct cgroup *cgrp, struct task_struct *tsk); | 429 | struct task_struct *tsk, bool threadgroup); |
394 | void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, | 430 | void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, |
395 | struct cgroup *old_cgrp, struct task_struct *tsk); | 431 | struct cgroup *old_cgrp, struct task_struct *tsk, |
432 | bool threadgroup); | ||
396 | void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); | 433 | void (*fork)(struct cgroup_subsys *ss, struct task_struct *task); |
397 | void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); | 434 | void (*exit)(struct cgroup_subsys *ss, struct task_struct *task); |
398 | int (*populate)(struct cgroup_subsys *ss, | 435 | int (*populate)(struct cgroup_subsys *ss, |
diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 7f627775c947..ddb7a97c78c2 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h | |||
@@ -27,8 +27,8 @@ | |||
27 | * | 27 | * |
28 | * configfs Copyright (C) 2005 Oracle. All rights reserved. | 28 | * configfs Copyright (C) 2005 Oracle. All rights reserved. |
29 | * | 29 | * |
30 | * Please read Documentation/filesystems/configfs.txt before using the | 30 | * Please read Documentation/filesystems/configfs/configfs.txt before using |
31 | * configfs interface, ESPECIALLY the parts about reference counts and | 31 | * the configfs interface, ESPECIALLY the parts about reference counts and |
32 | * item destructors. | 32 | * item destructors. |
33 | */ | 33 | */ |
34 | 34 | ||
diff --git a/include/linux/dca.h b/include/linux/dca.h index 9c20c7e87d0a..d27a7a05718d 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h | |||
@@ -20,6 +20,9 @@ | |||
20 | */ | 20 | */ |
21 | #ifndef DCA_H | 21 | #ifndef DCA_H |
22 | #define DCA_H | 22 | #define DCA_H |
23 | |||
24 | #include <linux/pci.h> | ||
25 | |||
23 | /* DCA Provider API */ | 26 | /* DCA Provider API */ |
24 | 27 | ||
25 | /* DCA Notifier Interface */ | 28 | /* DCA Notifier Interface */ |
@@ -36,6 +39,12 @@ struct dca_provider { | |||
36 | int id; | 39 | int id; |
37 | }; | 40 | }; |
38 | 41 | ||
42 | struct dca_domain { | ||
43 | struct list_head node; | ||
44 | struct list_head dca_providers; | ||
45 | struct pci_bus *pci_rc; | ||
46 | }; | ||
47 | |||
39 | struct dca_ops { | 48 | struct dca_ops { |
40 | int (*add_requester) (struct dca_provider *, struct device *); | 49 | int (*add_requester) (struct dca_provider *, struct device *); |
41 | int (*remove_requester) (struct dca_provider *, struct device *); | 50 | int (*remove_requester) (struct dca_provider *, struct device *); |
@@ -47,7 +56,7 @@ struct dca_ops { | |||
47 | struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); | 56 | struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); |
48 | void free_dca_provider(struct dca_provider *dca); | 57 | void free_dca_provider(struct dca_provider *dca); |
49 | int register_dca_provider(struct dca_provider *dca, struct device *dev); | 58 | int register_dca_provider(struct dca_provider *dca, struct device *dev); |
50 | void unregister_dca_provider(struct dca_provider *dca); | 59 | void unregister_dca_provider(struct dca_provider *dca, struct device *dev); |
51 | 60 | ||
52 | static inline void *dca_priv(struct dca_provider *dca) | 61 | static inline void *dca_priv(struct dca_provider *dca) |
53 | { | 62 | { |
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index eb5c2ba2f81a..fc1b930f246c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h | |||
@@ -9,7 +9,7 @@ | |||
9 | * 2 as published by the Free Software Foundation. | 9 | * 2 as published by the Free Software Foundation. |
10 | * | 10 | * |
11 | * debugfs is for people to use instead of /proc or /sys. | 11 | * debugfs is for people to use instead of /proc or /sys. |
12 | * See Documentation/DocBook/kernel-api for more details. | 12 | * See Documentation/DocBook/filesystems for more details. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #ifndef _DEBUGFS_H_ | 15 | #ifndef _DEBUGFS_H_ |
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index ffefba81c818..2b9f2ac7ed60 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h | |||
@@ -48,19 +48,20 @@ enum dma_status { | |||
48 | 48 | ||
49 | /** | 49 | /** |
50 | * enum dma_transaction_type - DMA transaction types/indexes | 50 | * enum dma_transaction_type - DMA transaction types/indexes |
51 | * | ||
52 | * Note: The DMA_ASYNC_TX capability is not to be set by drivers. It is | ||
53 | * automatically set as dma devices are registered. | ||
51 | */ | 54 | */ |
52 | enum dma_transaction_type { | 55 | enum dma_transaction_type { |
53 | DMA_MEMCPY, | 56 | DMA_MEMCPY, |
54 | DMA_XOR, | 57 | DMA_XOR, |
55 | DMA_PQ_XOR, | 58 | DMA_PQ, |
56 | DMA_DUAL_XOR, | 59 | DMA_XOR_VAL, |
57 | DMA_PQ_UPDATE, | 60 | DMA_PQ_VAL, |
58 | DMA_ZERO_SUM, | ||
59 | DMA_PQ_ZERO_SUM, | ||
60 | DMA_MEMSET, | 61 | DMA_MEMSET, |
61 | DMA_MEMCPY_CRC32C, | ||
62 | DMA_INTERRUPT, | 62 | DMA_INTERRUPT, |
63 | DMA_PRIVATE, | 63 | DMA_PRIVATE, |
64 | DMA_ASYNC_TX, | ||
64 | DMA_SLAVE, | 65 | DMA_SLAVE, |
65 | }; | 66 | }; |
66 | 67 | ||
@@ -70,18 +71,25 @@ enum dma_transaction_type { | |||
70 | 71 | ||
71 | /** | 72 | /** |
72 | * enum dma_ctrl_flags - DMA flags to augment operation preparation, | 73 | * enum dma_ctrl_flags - DMA flags to augment operation preparation, |
73 | * control completion, and communicate status. | 74 | * control completion, and communicate status. |
74 | * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of | 75 | * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of |
75 | * this transaction | 76 | * this transaction |
76 | * @DMA_CTRL_ACK - the descriptor cannot be reused until the client | 77 | * @DMA_CTRL_ACK - the descriptor cannot be reused until the client |
77 | * acknowledges receipt, i.e. has has a chance to establish any | 78 | * acknowledges receipt, i.e. has has a chance to establish any dependency |
78 | * dependency chains | 79 | * chains |
79 | * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) | 80 | * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) |
80 | * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) | 81 | * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) |
81 | * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single | 82 | * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single |
82 | * (if not set, do the source dma-unmapping as page) | 83 | * (if not set, do the source dma-unmapping as page) |
83 | * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single | 84 | * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single |
84 | * (if not set, do the destination dma-unmapping as page) | 85 | * (if not set, do the destination dma-unmapping as page) |
86 | * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q | ||
87 | * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P | ||
88 | * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as | ||
89 | * sources that were the result of a previous operation, in the case of a PQ | ||
90 | * operation it continues the calculation with new sources | ||
91 | * @DMA_PREP_FENCE - tell the driver that subsequent operations depend | ||
92 | * on the result of this operation | ||
85 | */ | 93 | */ |
86 | enum dma_ctrl_flags { | 94 | enum dma_ctrl_flags { |
87 | DMA_PREP_INTERRUPT = (1 << 0), | 95 | DMA_PREP_INTERRUPT = (1 << 0), |
@@ -90,9 +98,32 @@ enum dma_ctrl_flags { | |||
90 | DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), | 98 | DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), |
91 | DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), | 99 | DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), |
92 | DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), | 100 | DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), |
101 | DMA_PREP_PQ_DISABLE_P = (1 << 6), | ||
102 | DMA_PREP_PQ_DISABLE_Q = (1 << 7), | ||
103 | DMA_PREP_CONTINUE = (1 << 8), | ||
104 | DMA_PREP_FENCE = (1 << 9), | ||
93 | }; | 105 | }; |
94 | 106 | ||
95 | /** | 107 | /** |
108 | * enum sum_check_bits - bit position of pq_check_flags | ||
109 | */ | ||
110 | enum sum_check_bits { | ||
111 | SUM_CHECK_P = 0, | ||
112 | SUM_CHECK_Q = 1, | ||
113 | }; | ||
114 | |||
115 | /** | ||
116 | * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations | ||
117 | * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise | ||
118 | * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise | ||
119 | */ | ||
120 | enum sum_check_flags { | ||
121 | SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P), | ||
122 | SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q), | ||
123 | }; | ||
124 | |||
125 | |||
126 | /** | ||
96 | * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. | 127 | * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. |
97 | * See linux/cpumask.h | 128 | * See linux/cpumask.h |
98 | */ | 129 | */ |
@@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param); | |||
180 | * @flags: flags to augment operation preparation, control completion, and | 211 | * @flags: flags to augment operation preparation, control completion, and |
181 | * communicate status | 212 | * communicate status |
182 | * @phys: physical address of the descriptor | 213 | * @phys: physical address of the descriptor |
183 | * @tx_list: driver common field for operations that require multiple | ||
184 | * descriptors | ||
185 | * @chan: target channel for this operation | 214 | * @chan: target channel for this operation |
186 | * @tx_submit: set the prepared descriptor(s) to be executed by the engine | 215 | * @tx_submit: set the prepared descriptor(s) to be executed by the engine |
187 | * @callback: routine to call after this operation is complete | 216 | * @callback: routine to call after this operation is complete |
@@ -195,7 +224,6 @@ struct dma_async_tx_descriptor { | |||
195 | dma_cookie_t cookie; | 224 | dma_cookie_t cookie; |
196 | enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */ | 225 | enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */ |
197 | dma_addr_t phys; | 226 | dma_addr_t phys; |
198 | struct list_head tx_list; | ||
199 | struct dma_chan *chan; | 227 | struct dma_chan *chan; |
200 | dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); | 228 | dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); |
201 | dma_async_tx_callback callback; | 229 | dma_async_tx_callback callback; |
@@ -213,6 +241,11 @@ struct dma_async_tx_descriptor { | |||
213 | * @global_node: list_head for global dma_device_list | 241 | * @global_node: list_head for global dma_device_list |
214 | * @cap_mask: one or more dma_capability flags | 242 | * @cap_mask: one or more dma_capability flags |
215 | * @max_xor: maximum number of xor sources, 0 if no capability | 243 | * @max_xor: maximum number of xor sources, 0 if no capability |
244 | * @max_pq: maximum number of PQ sources and PQ-continue capability | ||
245 | * @copy_align: alignment shift for memcpy operations | ||
246 | * @xor_align: alignment shift for xor operations | ||
247 | * @pq_align: alignment shift for pq operations | ||
248 | * @fill_align: alignment shift for memset operations | ||
216 | * @dev_id: unique device ID | 249 | * @dev_id: unique device ID |
217 | * @dev: struct device reference for dma mapping api | 250 | * @dev: struct device reference for dma mapping api |
218 | * @device_alloc_chan_resources: allocate resources and return the | 251 | * @device_alloc_chan_resources: allocate resources and return the |
@@ -220,7 +253,9 @@ struct dma_async_tx_descriptor { | |||
220 | * @device_free_chan_resources: release DMA channel's resources | 253 | * @device_free_chan_resources: release DMA channel's resources |
221 | * @device_prep_dma_memcpy: prepares a memcpy operation | 254 | * @device_prep_dma_memcpy: prepares a memcpy operation |
222 | * @device_prep_dma_xor: prepares a xor operation | 255 | * @device_prep_dma_xor: prepares a xor operation |
223 | * @device_prep_dma_zero_sum: prepares a zero_sum operation | 256 | * @device_prep_dma_xor_val: prepares a xor validation operation |
257 | * @device_prep_dma_pq: prepares a pq operation | ||
258 | * @device_prep_dma_pq_val: prepares a pqzero_sum operation | ||
224 | * @device_prep_dma_memset: prepares a memset operation | 259 | * @device_prep_dma_memset: prepares a memset operation |
225 | * @device_prep_dma_interrupt: prepares an end of chain interrupt operation | 260 | * @device_prep_dma_interrupt: prepares an end of chain interrupt operation |
226 | * @device_prep_slave_sg: prepares a slave dma operation | 261 | * @device_prep_slave_sg: prepares a slave dma operation |
@@ -235,7 +270,13 @@ struct dma_device { | |||
235 | struct list_head channels; | 270 | struct list_head channels; |
236 | struct list_head global_node; | 271 | struct list_head global_node; |
237 | dma_cap_mask_t cap_mask; | 272 | dma_cap_mask_t cap_mask; |
238 | int max_xor; | 273 | unsigned short max_xor; |
274 | unsigned short max_pq; | ||
275 | u8 copy_align; | ||
276 | u8 xor_align; | ||
277 | u8 pq_align; | ||
278 | u8 fill_align; | ||
279 | #define DMA_HAS_PQ_CONTINUE (1 << 15) | ||
239 | 280 | ||
240 | int dev_id; | 281 | int dev_id; |
241 | struct device *dev; | 282 | struct device *dev; |
@@ -249,9 +290,17 @@ struct dma_device { | |||
249 | struct dma_async_tx_descriptor *(*device_prep_dma_xor)( | 290 | struct dma_async_tx_descriptor *(*device_prep_dma_xor)( |
250 | struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | 291 | struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, |
251 | unsigned int src_cnt, size_t len, unsigned long flags); | 292 | unsigned int src_cnt, size_t len, unsigned long flags); |
252 | struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( | 293 | struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( |
253 | struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, | 294 | struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, |
254 | size_t len, u32 *result, unsigned long flags); | 295 | size_t len, enum sum_check_flags *result, unsigned long flags); |
296 | struct dma_async_tx_descriptor *(*device_prep_dma_pq)( | ||
297 | struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | ||
298 | unsigned int src_cnt, const unsigned char *scf, | ||
299 | size_t len, unsigned long flags); | ||
300 | struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)( | ||
301 | struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | ||
302 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
303 | enum sum_check_flags *pqres, unsigned long flags); | ||
255 | struct dma_async_tx_descriptor *(*device_prep_dma_memset)( | 304 | struct dma_async_tx_descriptor *(*device_prep_dma_memset)( |
256 | struct dma_chan *chan, dma_addr_t dest, int value, size_t len, | 305 | struct dma_chan *chan, dma_addr_t dest, int value, size_t len, |
257 | unsigned long flags); | 306 | unsigned long flags); |
@@ -270,6 +319,96 @@ struct dma_device { | |||
270 | void (*device_issue_pending)(struct dma_chan *chan); | 319 | void (*device_issue_pending)(struct dma_chan *chan); |
271 | }; | 320 | }; |
272 | 321 | ||
322 | static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) | ||
323 | { | ||
324 | size_t mask; | ||
325 | |||
326 | if (!align) | ||
327 | return true; | ||
328 | mask = (1 << align) - 1; | ||
329 | if (mask & (off1 | off2 | len)) | ||
330 | return false; | ||
331 | return true; | ||
332 | } | ||
333 | |||
334 | static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1, | ||
335 | size_t off2, size_t len) | ||
336 | { | ||
337 | return dmaengine_check_align(dev->copy_align, off1, off2, len); | ||
338 | } | ||
339 | |||
340 | static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1, | ||
341 | size_t off2, size_t len) | ||
342 | { | ||
343 | return dmaengine_check_align(dev->xor_align, off1, off2, len); | ||
344 | } | ||
345 | |||
346 | static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1, | ||
347 | size_t off2, size_t len) | ||
348 | { | ||
349 | return dmaengine_check_align(dev->pq_align, off1, off2, len); | ||
350 | } | ||
351 | |||
352 | static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1, | ||
353 | size_t off2, size_t len) | ||
354 | { | ||
355 | return dmaengine_check_align(dev->fill_align, off1, off2, len); | ||
356 | } | ||
357 | |||
358 | static inline void | ||
359 | dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue) | ||
360 | { | ||
361 | dma->max_pq = maxpq; | ||
362 | if (has_pq_continue) | ||
363 | dma->max_pq |= DMA_HAS_PQ_CONTINUE; | ||
364 | } | ||
365 | |||
366 | static inline bool dmaf_continue(enum dma_ctrl_flags flags) | ||
367 | { | ||
368 | return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE; | ||
369 | } | ||
370 | |||
371 | static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags) | ||
372 | { | ||
373 | enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P; | ||
374 | |||
375 | return (flags & mask) == mask; | ||
376 | } | ||
377 | |||
378 | static inline bool dma_dev_has_pq_continue(struct dma_device *dma) | ||
379 | { | ||
380 | return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; | ||
381 | } | ||
382 | |||
383 | static unsigned short dma_dev_to_maxpq(struct dma_device *dma) | ||
384 | { | ||
385 | return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; | ||
386 | } | ||
387 | |||
388 | /* dma_maxpq - reduce maxpq in the face of continued operations | ||
389 | * @dma - dma device with PQ capability | ||
390 | * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set | ||
391 | * | ||
392 | * When an engine does not support native continuation we need 3 extra | ||
393 | * source slots to reuse P and Q with the following coefficients: | ||
394 | * 1/ {00} * P : remove P from Q', but use it as a source for P' | ||
395 | * 2/ {01} * Q : use Q to continue Q' calculation | ||
396 | * 3/ {00} * Q : subtract Q from P' to cancel (2) | ||
397 | * | ||
398 | * In the case where P is disabled we only need 1 extra source: | ||
399 | * 1/ {01} * Q : use Q to continue Q' calculation | ||
400 | */ | ||
401 | static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags) | ||
402 | { | ||
403 | if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags)) | ||
404 | return dma_dev_to_maxpq(dma); | ||
405 | else if (dmaf_p_disabled_continue(flags)) | ||
406 | return dma_dev_to_maxpq(dma) - 1; | ||
407 | else if (dmaf_continue(flags)) | ||
408 | return dma_dev_to_maxpq(dma) - 3; | ||
409 | BUG(); | ||
410 | } | ||
411 | |||
273 | /* --- public DMA engine API --- */ | 412 | /* --- public DMA engine API --- */ |
274 | 413 | ||
275 | #ifdef CONFIG_DMA_ENGINE | 414 | #ifdef CONFIG_DMA_ENGINE |
@@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void) | |||
299 | #ifdef CONFIG_ASYNC_TX_DMA | 438 | #ifdef CONFIG_ASYNC_TX_DMA |
300 | #define async_dmaengine_get() dmaengine_get() | 439 | #define async_dmaengine_get() dmaengine_get() |
301 | #define async_dmaengine_put() dmaengine_put() | 440 | #define async_dmaengine_put() dmaengine_put() |
441 | #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH | ||
442 | #define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX) | ||
443 | #else | ||
302 | #define async_dma_find_channel(type) dma_find_channel(type) | 444 | #define async_dma_find_channel(type) dma_find_channel(type) |
445 | #endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */ | ||
303 | #else | 446 | #else |
304 | static inline void async_dmaengine_get(void) | 447 | static inline void async_dmaengine_get(void) |
305 | { | 448 | { |
@@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type) | |||
312 | { | 455 | { |
313 | return NULL; | 456 | return NULL; |
314 | } | 457 | } |
315 | #endif | 458 | #endif /* CONFIG_ASYNC_TX_DMA */ |
316 | 459 | ||
317 | dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, | 460 | dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, |
318 | void *dest, void *src, size_t len); | 461 | void *dest, void *src, size_t len); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 51803528b095..78e95b8b66d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -595,6 +595,7 @@ struct address_space_operations { | |||
595 | int (*launder_page) (struct page *); | 595 | int (*launder_page) (struct page *); |
596 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, | 596 | int (*is_partially_uptodate) (struct page *, read_descriptor_t *, |
597 | unsigned long); | 597 | unsigned long); |
598 | int (*error_remove_page)(struct address_space *, struct page *); | ||
598 | }; | 599 | }; |
599 | 600 | ||
600 | /* | 601 | /* |
@@ -2467,7 +2468,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, | |||
2467 | size_t len, loff_t *ppos); | 2468 | size_t len, loff_t *ppos); |
2468 | 2469 | ||
2469 | struct ctl_table; | 2470 | struct ctl_table; |
2470 | int proc_nr_files(struct ctl_table *table, int write, struct file *filp, | 2471 | int proc_nr_files(struct ctl_table *table, int write, |
2471 | void __user *buffer, size_t *lenp, loff_t *ppos); | 2472 | void __user *buffer, size_t *lenp, loff_t *ppos); |
2472 | 2473 | ||
2473 | int __init get_filesystem_list(char *buf); | 2474 | int __init get_filesystem_list(char *buf); |
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3c0924a18daf..cd3d2abaf30a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h | |||
@@ -19,7 +19,7 @@ | |||
19 | extern int ftrace_enabled; | 19 | extern int ftrace_enabled; |
20 | extern int | 20 | extern int |
21 | ftrace_enable_sysctl(struct ctl_table *table, int write, | 21 | ftrace_enable_sysctl(struct ctl_table *table, int write, |
22 | struct file *filp, void __user *buffer, size_t *lenp, | 22 | void __user *buffer, size_t *lenp, |
23 | loff_t *ppos); | 23 | loff_t *ppos); |
24 | 24 | ||
25 | typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); | 25 | typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); |
@@ -94,7 +94,7 @@ static inline void ftrace_start(void) { } | |||
94 | extern int stack_tracer_enabled; | 94 | extern int stack_tracer_enabled; |
95 | int | 95 | int |
96 | stack_trace_sysctl(struct ctl_table *table, int write, | 96 | stack_trace_sysctl(struct ctl_table *table, int write, |
97 | struct file *file, void __user *buffer, size_t *lenp, | 97 | void __user *buffer, size_t *lenp, |
98 | loff_t *ppos); | 98 | loff_t *ppos); |
99 | #endif | 99 | #endif |
100 | 100 | ||
diff --git a/include/linux/futex.h b/include/linux/futex.h index 34956c8fdebf..8ec17997d94f 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -4,11 +4,6 @@ | |||
4 | #include <linux/compiler.h> | 4 | #include <linux/compiler.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | 6 | ||
7 | struct inode; | ||
8 | struct mm_struct; | ||
9 | struct task_struct; | ||
10 | union ktime; | ||
11 | |||
12 | /* Second argument to futex syscall */ | 7 | /* Second argument to futex syscall */ |
13 | 8 | ||
14 | 9 | ||
@@ -129,6 +124,11 @@ struct robust_list_head { | |||
129 | #define FUTEX_BITSET_MATCH_ANY 0xffffffff | 124 | #define FUTEX_BITSET_MATCH_ANY 0xffffffff |
130 | 125 | ||
131 | #ifdef __KERNEL__ | 126 | #ifdef __KERNEL__ |
127 | struct inode; | ||
128 | struct mm_struct; | ||
129 | struct task_struct; | ||
130 | union ktime; | ||
131 | |||
132 | long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, | 132 | long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout, |
133 | u32 __user *uaddr2, u32 val2, u32 val3); | 133 | u32 __user *uaddr2, u32 val2, u32 val3); |
134 | 134 | ||
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 176e7ee73eff..11ab19ac6b3d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) | |||
20 | } | 20 | } |
21 | 21 | ||
22 | void reset_vma_resv_huge_pages(struct vm_area_struct *vma); | 22 | void reset_vma_resv_huge_pages(struct vm_area_struct *vma); |
23 | int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); | 23 | int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
24 | int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); | 24 | int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
25 | int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); | 25 | int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
26 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); | 26 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); |
27 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, | 27 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, |
28 | struct page **, struct vm_area_struct **, | 28 | struct page **, struct vm_area_struct **, |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e46a0734ab6e..bf9213b2db8f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void) | |||
118 | 118 | ||
119 | extern bool mem_cgroup_oom_called(struct task_struct *task); | 119 | extern bool mem_cgroup_oom_called(struct task_struct *task); |
120 | void mem_cgroup_update_mapped_file_stat(struct page *page, int val); | 120 | void mem_cgroup_update_mapped_file_stat(struct page *page, int val); |
121 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | ||
122 | gfp_t gfp_mask, int nid, | ||
123 | int zid); | ||
121 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | 124 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ |
122 | struct mem_cgroup; | 125 | struct mem_cgroup; |
123 | 126 | ||
@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page, | |||
276 | { | 279 | { |
277 | } | 280 | } |
278 | 281 | ||
282 | static inline | ||
283 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | ||
284 | gfp_t gfp_mask, int nid, int zid) | ||
285 | { | ||
286 | return 0; | ||
287 | } | ||
288 | |||
279 | #endif /* CONFIG_CGROUP_MEM_CONT */ | 289 | #endif /* CONFIG_CGROUP_MEM_CONT */ |
280 | 290 | ||
281 | #endif /* _LINUX_MEMCONTROL_H */ | 291 | #endif /* _LINUX_MEMCONTROL_H */ |
diff --git a/include/linux/mm.h b/include/linux/mm.h index b6eae5e3144b..6953a5a53e44 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page) | |||
695 | #define VM_FAULT_SIGBUS 0x0002 | 695 | #define VM_FAULT_SIGBUS 0x0002 |
696 | #define VM_FAULT_MAJOR 0x0004 | 696 | #define VM_FAULT_MAJOR 0x0004 |
697 | #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ | 697 | #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ |
698 | #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ | ||
698 | 699 | ||
699 | #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ | 700 | #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ |
700 | #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ | 701 | #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ |
701 | 702 | ||
702 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) | 703 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) |
703 | 704 | ||
704 | /* | 705 | /* |
705 | * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. | 706 | * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. |
@@ -794,6 +795,11 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, | |||
794 | extern int vmtruncate(struct inode * inode, loff_t offset); | 795 | extern int vmtruncate(struct inode * inode, loff_t offset); |
795 | extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); | 796 | extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); |
796 | 797 | ||
798 | int truncate_inode_page(struct address_space *mapping, struct page *page); | ||
799 | int generic_error_remove_page(struct address_space *mapping, struct page *page); | ||
800 | |||
801 | int invalidate_inode_page(struct page *page); | ||
802 | |||
797 | #ifdef CONFIG_MMU | 803 | #ifdef CONFIG_MMU |
798 | extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 804 | extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
799 | unsigned long address, unsigned int flags); | 805 | unsigned long address, unsigned int flags); |
@@ -1279,7 +1285,7 @@ int in_gate_area_no_task(unsigned long addr); | |||
1279 | #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) | 1285 | #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) |
1280 | #endif /* __HAVE_ARCH_GATE_AREA */ | 1286 | #endif /* __HAVE_ARCH_GATE_AREA */ |
1281 | 1287 | ||
1282 | int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, | 1288 | int drop_caches_sysctl_handler(struct ctl_table *, int, |
1283 | void __user *, size_t *, loff_t *); | 1289 | void __user *, size_t *, loff_t *); |
1284 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | 1290 | unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, |
1285 | unsigned long lru_pages); | 1291 | unsigned long lru_pages); |
@@ -1308,5 +1314,12 @@ void vmemmap_populate_print_last(void); | |||
1308 | extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, | 1314 | extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, |
1309 | size_t size); | 1315 | size_t size); |
1310 | extern void refund_locked_memory(struct mm_struct *mm, size_t size); | 1316 | extern void refund_locked_memory(struct mm_struct *mm, size_t size); |
1317 | |||
1318 | extern void memory_failure(unsigned long pfn, int trapno); | ||
1319 | extern int __memory_failure(unsigned long pfn, int trapno, int ref); | ||
1320 | extern int sysctl_memory_failure_early_kill; | ||
1321 | extern int sysctl_memory_failure_recovery; | ||
1322 | extern atomic_long_t mce_bad_pages; | ||
1323 | |||
1311 | #endif /* __KERNEL__ */ | 1324 | #endif /* __KERNEL__ */ |
1312 | #endif /* _LINUX_MM_H */ | 1325 | #endif /* _LINUX_MM_H */ |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0042090a4d70..21d6aa45206a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -240,6 +240,8 @@ struct mm_struct { | |||
240 | 240 | ||
241 | unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ | 241 | unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ |
242 | 242 | ||
243 | struct linux_binfmt *binfmt; | ||
244 | |||
243 | cpumask_t cpu_vm_mask; | 245 | cpumask_t cpu_vm_mask; |
244 | 246 | ||
245 | /* Architecture-specific MM context */ | 247 | /* Architecture-specific MM context */ |
@@ -259,11 +261,10 @@ struct mm_struct { | |||
259 | unsigned long flags; /* Must use atomic bitops to access the bits */ | 261 | unsigned long flags; /* Must use atomic bitops to access the bits */ |
260 | 262 | ||
261 | struct core_state *core_state; /* coredumping support */ | 263 | struct core_state *core_state; /* coredumping support */ |
262 | 264 | #ifdef CONFIG_AIO | |
263 | /* aio bits */ | ||
264 | spinlock_t ioctx_lock; | 265 | spinlock_t ioctx_lock; |
265 | struct hlist_head ioctx_list; | 266 | struct hlist_head ioctx_list; |
266 | 267 | #endif | |
267 | #ifdef CONFIG_MM_OWNER | 268 | #ifdef CONFIG_MM_OWNER |
268 | /* | 269 | /* |
269 | * "owner" points to a task that is regarded as the canonical | 270 | * "owner" points to a task that is regarded as the canonical |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 652ef01be582..6f7561730d88 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone) | |||
755 | 755 | ||
756 | /* These two functions are used to setup the per zone pages min values */ | 756 | /* These two functions are used to setup the per zone pages min values */ |
757 | struct ctl_table; | 757 | struct ctl_table; |
758 | struct file; | 758 | int min_free_kbytes_sysctl_handler(struct ctl_table *, int, |
759 | int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, | ||
760 | void __user *, size_t *, loff_t *); | 759 | void __user *, size_t *, loff_t *); |
761 | extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; | 760 | extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; |
762 | int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, | 761 | int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, |
763 | void __user *, size_t *, loff_t *); | 762 | void __user *, size_t *, loff_t *); |
764 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, | 763 | int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, |
765 | void __user *, size_t *, loff_t *); | 764 | void __user *, size_t *, loff_t *); |
766 | int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, | 765 | int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, |
767 | struct file *, void __user *, size_t *, loff_t *); | 766 | void __user *, size_t *, loff_t *); |
768 | int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, | 767 | int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, |
769 | struct file *, void __user *, size_t *, loff_t *); | 768 | void __user *, size_t *, loff_t *); |
770 | 769 | ||
771 | extern int numa_zonelist_order_handler(struct ctl_table *, int, | 770 | extern int numa_zonelist_order_handler(struct ctl_table *, int, |
772 | struct file *, void __user *, size_t *, loff_t *); | 771 | void __user *, size_t *, loff_t *); |
773 | extern char numa_zonelist_order[]; | 772 | extern char numa_zonelist_order[]; |
774 | #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ | 773 | #define NUMA_ZONELIST_ORDER_LEN 16 /* string buffer size */ |
775 | 774 | ||
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 13de789f0a5c..6b202b173955 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -51,6 +51,9 @@ | |||
51 | * PG_buddy is set to indicate that the page is free and in the buddy system | 51 | * PG_buddy is set to indicate that the page is free and in the buddy system |
52 | * (see mm/page_alloc.c). | 52 | * (see mm/page_alloc.c). |
53 | * | 53 | * |
54 | * PG_hwpoison indicates that a page got corrupted in hardware and contains | ||
55 | * data with incorrect ECC bits that triggered a machine check. Accessing is | ||
56 | * not safe since it may cause another machine check. Don't touch! | ||
54 | */ | 57 | */ |
55 | 58 | ||
56 | /* | 59 | /* |
@@ -102,6 +105,9 @@ enum pageflags { | |||
102 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED | 105 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED |
103 | PG_uncached, /* Page has been mapped as uncached */ | 106 | PG_uncached, /* Page has been mapped as uncached */ |
104 | #endif | 107 | #endif |
108 | #ifdef CONFIG_MEMORY_FAILURE | ||
109 | PG_hwpoison, /* hardware poisoned page. Don't touch */ | ||
110 | #endif | ||
105 | __NR_PAGEFLAGS, | 111 | __NR_PAGEFLAGS, |
106 | 112 | ||
107 | /* Filesystems */ | 113 | /* Filesystems */ |
@@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached) | |||
269 | PAGEFLAG_FALSE(Uncached) | 275 | PAGEFLAG_FALSE(Uncached) |
270 | #endif | 276 | #endif |
271 | 277 | ||
278 | #ifdef CONFIG_MEMORY_FAILURE | ||
279 | PAGEFLAG(HWPoison, hwpoison) | ||
280 | TESTSETFLAG(HWPoison, hwpoison) | ||
281 | #define __PG_HWPOISON (1UL << PG_hwpoison) | ||
282 | #else | ||
283 | PAGEFLAG_FALSE(HWPoison) | ||
284 | #define __PG_HWPOISON 0 | ||
285 | #endif | ||
286 | |||
272 | static inline int PageUptodate(struct page *page) | 287 | static inline int PageUptodate(struct page *page) |
273 | { | 288 | { |
274 | int ret = test_bit(PG_uptodate, &(page)->flags); | 289 | int ret = test_bit(PG_uptodate, &(page)->flags); |
@@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page) | |||
393 | 1 << PG_private | 1 << PG_private_2 | \ | 408 | 1 << PG_private | 1 << PG_private_2 | \ |
394 | 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ | 409 | 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ |
395 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ | 410 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ |
396 | 1 << PG_unevictable | __PG_MLOCKED) | 411 | 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) |
397 | 412 | ||
398 | /* | 413 | /* |
399 | * Flags checked when a page is prepped for return by the page allocator. | 414 | * Flags checked when a page is prepped for return by the page allocator. |
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index ada779f24178..4b938d4f3ac2 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -38,6 +38,7 @@ enum { | |||
38 | PCG_LOCK, /* page cgroup is locked */ | 38 | PCG_LOCK, /* page cgroup is locked */ |
39 | PCG_CACHE, /* charged as cache */ | 39 | PCG_CACHE, /* charged as cache */ |
40 | PCG_USED, /* this object is in use. */ | 40 | PCG_USED, /* this object is in use. */ |
41 | PCG_ACCT_LRU, /* page has been accounted for */ | ||
41 | }; | 42 | }; |
42 | 43 | ||
43 | #define TESTPCGFLAG(uname, lname) \ | 44 | #define TESTPCGFLAG(uname, lname) \ |
@@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\ | |||
52 | static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ | 53 | static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ |
53 | { clear_bit(PCG_##lname, &pc->flags); } | 54 | { clear_bit(PCG_##lname, &pc->flags); } |
54 | 55 | ||
56 | #define TESTCLEARPCGFLAG(uname, lname) \ | ||
57 | static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ | ||
58 | { return test_and_clear_bit(PCG_##lname, &pc->flags); } | ||
59 | |||
55 | /* Cache flag is set only once (at allocation) */ | 60 | /* Cache flag is set only once (at allocation) */ |
56 | TESTPCGFLAG(Cache, CACHE) | 61 | TESTPCGFLAG(Cache, CACHE) |
62 | CLEARPCGFLAG(Cache, CACHE) | ||
63 | SETPCGFLAG(Cache, CACHE) | ||
57 | 64 | ||
58 | TESTPCGFLAG(Used, USED) | 65 | TESTPCGFLAG(Used, USED) |
59 | CLEARPCGFLAG(Used, USED) | 66 | CLEARPCGFLAG(Used, USED) |
67 | SETPCGFLAG(Used, USED) | ||
68 | |||
69 | SETPCGFLAG(AcctLRU, ACCT_LRU) | ||
70 | CLEARPCGFLAG(AcctLRU, ACCT_LRU) | ||
71 | TESTPCGFLAG(AcctLRU, ACCT_LRU) | ||
72 | TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU) | ||
60 | 73 | ||
61 | static inline int page_cgroup_nid(struct page_cgroup *pc) | 74 | static inline int page_cgroup_nid(struct page_cgroup *pc) |
62 | { | 75 | { |
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 7803565aa877..da1fda8623e0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h | |||
@@ -2527,6 +2527,16 @@ | |||
2527 | #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e | 2527 | #define PCI_DEVICE_ID_INTEL_E7525_MCH 0x359e |
2528 | #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b | 2528 | #define PCI_DEVICE_ID_INTEL_IOAT_CNB 0x360b |
2529 | #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c | 2529 | #define PCI_DEVICE_ID_INTEL_FBD_CNB 0x360c |
2530 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF0 0x3710 | ||
2531 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF1 0x3711 | ||
2532 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF2 0x3712 | ||
2533 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF3 0x3713 | ||
2534 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF4 0x3714 | ||
2535 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF5 0x3715 | ||
2536 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF6 0x3716 | ||
2537 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF7 0x3717 | ||
2538 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF8 0x3718 | ||
2539 | #define PCI_DEVICE_ID_INTEL_IOAT_JSF9 0x3719 | ||
2530 | #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 | 2540 | #define PCI_DEVICE_ID_INTEL_ICH10_0 0x3a14 |
2531 | #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 | 2541 | #define PCI_DEVICE_ID_INTEL_ICH10_1 0x3a16 |
2532 | #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 | 2542 | #define PCI_DEVICE_ID_INTEL_ICH10_2 0x3a18 |
diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 07bff666e65b..931150566ade 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h | |||
@@ -88,4 +88,6 @@ | |||
88 | #define PR_TASK_PERF_EVENTS_DISABLE 31 | 88 | #define PR_TASK_PERF_EVENTS_DISABLE 31 |
89 | #define PR_TASK_PERF_EVENTS_ENABLE 32 | 89 | #define PR_TASK_PERF_EVENTS_ENABLE 32 |
90 | 90 | ||
91 | #define PR_MCE_KILL 33 | ||
92 | |||
91 | #endif /* _LINUX_PRCTL_H */ | 93 | #endif /* _LINUX_PRCTL_H */ |
diff --git a/include/linux/relay.h b/include/linux/relay.h index 953fc055e875..14a86bc7102b 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h | |||
@@ -140,7 +140,7 @@ struct rchan_callbacks | |||
140 | * cause relay_open() to create a single global buffer rather | 140 | * cause relay_open() to create a single global buffer rather |
141 | * than the default set of per-cpu buffers. | 141 | * than the default set of per-cpu buffers. |
142 | * | 142 | * |
143 | * See Documentation/filesystems/relayfs.txt for more info. | 143 | * See Documentation/filesystems/relay.txt for more info. |
144 | */ | 144 | */ |
145 | struct dentry *(*create_buf_file)(const char *filename, | 145 | struct dentry *(*create_buf_file)(const char *filename, |
146 | struct dentry *parent, | 146 | struct dentry *parent, |
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 511f42fc6816..731af71cddc9 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h | |||
@@ -35,6 +35,10 @@ struct res_counter { | |||
35 | */ | 35 | */ |
36 | unsigned long long limit; | 36 | unsigned long long limit; |
37 | /* | 37 | /* |
38 | * the limit that usage can be exceed | ||
39 | */ | ||
40 | unsigned long long soft_limit; | ||
41 | /* | ||
38 | * the number of unsuccessful attempts to consume the resource | 42 | * the number of unsuccessful attempts to consume the resource |
39 | */ | 43 | */ |
40 | unsigned long long failcnt; | 44 | unsigned long long failcnt; |
@@ -87,6 +91,7 @@ enum { | |||
87 | RES_MAX_USAGE, | 91 | RES_MAX_USAGE, |
88 | RES_LIMIT, | 92 | RES_LIMIT, |
89 | RES_FAILCNT, | 93 | RES_FAILCNT, |
94 | RES_SOFT_LIMIT, | ||
90 | }; | 95 | }; |
91 | 96 | ||
92 | /* | 97 | /* |
@@ -109,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); | |||
109 | int __must_check res_counter_charge_locked(struct res_counter *counter, | 114 | int __must_check res_counter_charge_locked(struct res_counter *counter, |
110 | unsigned long val); | 115 | unsigned long val); |
111 | int __must_check res_counter_charge(struct res_counter *counter, | 116 | int __must_check res_counter_charge(struct res_counter *counter, |
112 | unsigned long val, struct res_counter **limit_fail_at); | 117 | unsigned long val, struct res_counter **limit_fail_at, |
118 | struct res_counter **soft_limit_at); | ||
113 | 119 | ||
114 | /* | 120 | /* |
115 | * uncharge - tell that some portion of the resource is released | 121 | * uncharge - tell that some portion of the resource is released |
@@ -122,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter, | |||
122 | */ | 128 | */ |
123 | 129 | ||
124 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); | 130 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); |
125 | void res_counter_uncharge(struct res_counter *counter, unsigned long val); | 131 | void res_counter_uncharge(struct res_counter *counter, unsigned long val, |
132 | bool *was_soft_limit_excess); | ||
126 | 133 | ||
127 | static inline bool res_counter_limit_check_locked(struct res_counter *cnt) | 134 | static inline bool res_counter_limit_check_locked(struct res_counter *cnt) |
128 | { | 135 | { |
@@ -132,6 +139,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt) | |||
132 | return false; | 139 | return false; |
133 | } | 140 | } |
134 | 141 | ||
142 | static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt) | ||
143 | { | ||
144 | if (cnt->usage < cnt->soft_limit) | ||
145 | return true; | ||
146 | |||
147 | return false; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * Get the difference between the usage and the soft limit | ||
152 | * @cnt: The counter | ||
153 | * | ||
154 | * Returns 0 if usage is less than or equal to soft limit | ||
155 | * The difference between usage and soft limit, otherwise. | ||
156 | */ | ||
157 | static inline unsigned long long | ||
158 | res_counter_soft_limit_excess(struct res_counter *cnt) | ||
159 | { | ||
160 | unsigned long long excess; | ||
161 | unsigned long flags; | ||
162 | |||
163 | spin_lock_irqsave(&cnt->lock, flags); | ||
164 | if (cnt->usage <= cnt->soft_limit) | ||
165 | excess = 0; | ||
166 | else | ||
167 | excess = cnt->usage - cnt->soft_limit; | ||
168 | spin_unlock_irqrestore(&cnt->lock, flags); | ||
169 | return excess; | ||
170 | } | ||
171 | |||
135 | /* | 172 | /* |
136 | * Helper function to detect if the cgroup is within it's limit or | 173 | * Helper function to detect if the cgroup is within it's limit or |
137 | * not. It's currently called from cgroup_rss_prepare() | 174 | * not. It's currently called from cgroup_rss_prepare() |
@@ -147,6 +184,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) | |||
147 | return ret; | 184 | return ret; |
148 | } | 185 | } |
149 | 186 | ||
187 | static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt) | ||
188 | { | ||
189 | bool ret; | ||
190 | unsigned long flags; | ||
191 | |||
192 | spin_lock_irqsave(&cnt->lock, flags); | ||
193 | ret = res_counter_soft_limit_check_locked(cnt); | ||
194 | spin_unlock_irqrestore(&cnt->lock, flags); | ||
195 | return ret; | ||
196 | } | ||
197 | |||
150 | static inline void res_counter_reset_max(struct res_counter *cnt) | 198 | static inline void res_counter_reset_max(struct res_counter *cnt) |
151 | { | 199 | { |
152 | unsigned long flags; | 200 | unsigned long flags; |
@@ -180,4 +228,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt, | |||
180 | return ret; | 228 | return ret; |
181 | } | 229 | } |
182 | 230 | ||
231 | static inline int | ||
232 | res_counter_set_soft_limit(struct res_counter *cnt, | ||
233 | unsigned long long soft_limit) | ||
234 | { | ||
235 | unsigned long flags; | ||
236 | |||
237 | spin_lock_irqsave(&cnt->lock, flags); | ||
238 | cnt->soft_limit = soft_limit; | ||
239 | spin_unlock_irqrestore(&cnt->lock, flags); | ||
240 | return 0; | ||
241 | } | ||
242 | |||
183 | #endif | 243 | #endif |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 477841d29fce..cb0ba7032609 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page) | |||
81 | */ | 81 | */ |
82 | int page_referenced(struct page *, int is_locked, | 82 | int page_referenced(struct page *, int is_locked, |
83 | struct mem_cgroup *cnt, unsigned long *vm_flags); | 83 | struct mem_cgroup *cnt, unsigned long *vm_flags); |
84 | int try_to_unmap(struct page *, int ignore_refs); | 84 | enum ttu_flags { |
85 | TTU_UNMAP = 0, /* unmap mode */ | ||
86 | TTU_MIGRATION = 1, /* migration mode */ | ||
87 | TTU_MUNLOCK = 2, /* munlock mode */ | ||
88 | TTU_ACTION_MASK = 0xff, | ||
89 | |||
90 | TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ | ||
91 | TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ | ||
92 | TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ | ||
93 | }; | ||
94 | #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) | ||
95 | |||
96 | int try_to_unmap(struct page *, enum ttu_flags flags); | ||
85 | 97 | ||
86 | /* | 98 | /* |
87 | * Called from mm/filemap_xip.c to unmap empty zero page | 99 | * Called from mm/filemap_xip.c to unmap empty zero page |
@@ -108,6 +120,13 @@ int page_mkclean(struct page *); | |||
108 | */ | 120 | */ |
109 | int try_to_munlock(struct page *); | 121 | int try_to_munlock(struct page *); |
110 | 122 | ||
123 | /* | ||
124 | * Called by memory-failure.c to kill processes. | ||
125 | */ | ||
126 | struct anon_vma *page_lock_anon_vma(struct page *page); | ||
127 | void page_unlock_anon_vma(struct anon_vma *anon_vma); | ||
128 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); | ||
129 | |||
111 | #else /* !CONFIG_MMU */ | 130 | #else /* !CONFIG_MMU */ |
112 | 131 | ||
113 | #define anon_vma_init() do {} while (0) | 132 | #define anon_vma_init() do {} while (0) |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 848d1f20086e..75e6e60bf583 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -309,7 +309,7 @@ extern void softlockup_tick(void); | |||
309 | extern void touch_softlockup_watchdog(void); | 309 | extern void touch_softlockup_watchdog(void); |
310 | extern void touch_all_softlockup_watchdogs(void); | 310 | extern void touch_all_softlockup_watchdogs(void); |
311 | extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, | 311 | extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, |
312 | struct file *filp, void __user *buffer, | 312 | void __user *buffer, |
313 | size_t *lenp, loff_t *ppos); | 313 | size_t *lenp, loff_t *ppos); |
314 | extern unsigned int softlockup_panic; | 314 | extern unsigned int softlockup_panic; |
315 | extern int softlockup_thresh; | 315 | extern int softlockup_thresh; |
@@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count; | |||
331 | extern unsigned long sysctl_hung_task_timeout_secs; | 331 | extern unsigned long sysctl_hung_task_timeout_secs; |
332 | extern unsigned long sysctl_hung_task_warnings; | 332 | extern unsigned long sysctl_hung_task_warnings; |
333 | extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | 333 | extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, |
334 | struct file *filp, void __user *buffer, | 334 | void __user *buffer, |
335 | size_t *lenp, loff_t *ppos); | 335 | size_t *lenp, loff_t *ppos); |
336 | #endif | 336 | #endif |
337 | 337 | ||
@@ -1271,7 +1271,6 @@ struct task_struct { | |||
1271 | struct mm_struct *mm, *active_mm; | 1271 | struct mm_struct *mm, *active_mm; |
1272 | 1272 | ||
1273 | /* task state */ | 1273 | /* task state */ |
1274 | struct linux_binfmt *binfmt; | ||
1275 | int exit_state; | 1274 | int exit_state; |
1276 | int exit_code, exit_signal; | 1275 | int exit_code, exit_signal; |
1277 | int pdeath_signal; /* The signal sent when the parent dies */ | 1276 | int pdeath_signal; /* The signal sent when the parent dies */ |
@@ -1735,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p); | |||
1735 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | 1734 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ |
1736 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | 1735 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ |
1737 | #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ | 1736 | #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ |
1737 | #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ | ||
1738 | #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ | 1738 | #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ |
1739 | #define PF_DUMPCORE 0x00000200 /* dumped core */ | 1739 | #define PF_DUMPCORE 0x00000200 /* dumped core */ |
1740 | #define PF_SIGNALED 0x00000400 /* killed by a signal */ | 1740 | #define PF_SIGNALED 0x00000400 /* killed by a signal */ |
@@ -1754,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p); | |||
1754 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ | 1754 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
1755 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ | 1755 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
1756 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ | 1756 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ |
1757 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ | ||
1757 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | 1758 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
1758 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ | 1759 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
1759 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ | 1760 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ |
@@ -1906,7 +1907,7 @@ extern unsigned int sysctl_sched_time_avg; | |||
1906 | extern unsigned int sysctl_timer_migration; | 1907 | extern unsigned int sysctl_timer_migration; |
1907 | 1908 | ||
1908 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1909 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
1909 | struct file *file, void __user *buffer, size_t *length, | 1910 | void __user *buffer, size_t *length, |
1910 | loff_t *ppos); | 1911 | loff_t *ppos); |
1911 | #endif | 1912 | #endif |
1912 | #ifdef CONFIG_SCHED_DEBUG | 1913 | #ifdef CONFIG_SCHED_DEBUG |
@@ -1924,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period; | |||
1924 | extern int sysctl_sched_rt_runtime; | 1925 | extern int sysctl_sched_rt_runtime; |
1925 | 1926 | ||
1926 | int sched_rt_handler(struct ctl_table *table, int write, | 1927 | int sched_rt_handler(struct ctl_table *table, int write, |
1927 | struct file *filp, void __user *buffer, size_t *lenp, | 1928 | void __user *buffer, size_t *lenp, |
1928 | loff_t *ppos); | 1929 | loff_t *ppos); |
1929 | 1930 | ||
1930 | extern unsigned int sysctl_sched_compat_yield; | 1931 | extern unsigned int sysctl_sched_compat_yield; |
@@ -2059,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); | |||
2059 | extern int kill_pid(struct pid *pid, int sig, int priv); | 2060 | extern int kill_pid(struct pid *pid, int sig, int priv); |
2060 | extern int kill_proc_info(int, struct siginfo *, pid_t); | 2061 | extern int kill_proc_info(int, struct siginfo *, pid_t); |
2061 | extern int do_notify_parent(struct task_struct *, int); | 2062 | extern int do_notify_parent(struct task_struct *, int); |
2063 | extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); | ||
2062 | extern void force_sig(int, struct task_struct *); | 2064 | extern void force_sig(int, struct task_struct *); |
2063 | extern void force_sig_specific(int, struct task_struct *); | 2065 | extern void force_sig_specific(int, struct task_struct *); |
2064 | extern int send_sig(int, struct task_struct *, int); | 2066 | extern int send_sig(int, struct task_struct *, int); |
@@ -2336,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p) | |||
2336 | return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); | 2338 | return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); |
2337 | } | 2339 | } |
2338 | 2340 | ||
2339 | extern int __fatal_signal_pending(struct task_struct *p); | 2341 | static inline int __fatal_signal_pending(struct task_struct *p) |
2342 | { | ||
2343 | return unlikely(sigismember(&p->pending.signal, SIGKILL)); | ||
2344 | } | ||
2340 | 2345 | ||
2341 | static inline int fatal_signal_pending(struct task_struct *p) | 2346 | static inline int fatal_signal_pending(struct task_struct *p) |
2342 | { | 2347 | { |
diff --git a/include/linux/security.h b/include/linux/security.h index d050b66ab9ef..239e40d0450b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h | |||
@@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) | |||
133 | return PAGE_ALIGN(mmap_min_addr); | 133 | return PAGE_ALIGN(mmap_min_addr); |
134 | return hint; | 134 | return hint; |
135 | } | 135 | } |
136 | extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, | 136 | extern int mmap_min_addr_handler(struct ctl_table *table, int write, |
137 | void __user *buffer, size_t *lenp, loff_t *ppos); | 137 | void __user *buffer, size_t *lenp, loff_t *ppos); |
138 | 138 | ||
139 | #ifdef CONFIG_SECURITY | 139 | #ifdef CONFIG_SECURITY |
diff --git a/include/linux/signal.h b/include/linux/signal.h index c7552836bd95..ab9272cc270c 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h | |||
@@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | extern int next_signal(struct sigpending *pending, sigset_t *mask); | 235 | extern int next_signal(struct sigpending *pending, sigset_t *mask); |
236 | extern int do_send_sig_info(int sig, struct siginfo *info, | ||
237 | struct task_struct *p, bool group); | ||
236 | extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); | 238 | extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); |
237 | extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); | 239 | extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); |
238 | extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, | 240 | extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 6c990e658f4e..4ec90019c1a4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -34,16 +34,38 @@ static inline int current_is_kswapd(void) | |||
34 | * the type/offset into the pte as 5/27 as well. | 34 | * the type/offset into the pte as 5/27 as well. |
35 | */ | 35 | */ |
36 | #define MAX_SWAPFILES_SHIFT 5 | 36 | #define MAX_SWAPFILES_SHIFT 5 |
37 | #ifndef CONFIG_MIGRATION | 37 | |
38 | #define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) | 38 | /* |
39 | * Use some of the swap files numbers for other purposes. This | ||
40 | * is a convenient way to hook into the VM to trigger special | ||
41 | * actions on faults. | ||
42 | */ | ||
43 | |||
44 | /* | ||
45 | * NUMA node memory migration support | ||
46 | */ | ||
47 | #ifdef CONFIG_MIGRATION | ||
48 | #define SWP_MIGRATION_NUM 2 | ||
49 | #define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM) | ||
50 | #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1) | ||
39 | #else | 51 | #else |
40 | /* Use last two entries for page migration swap entries */ | 52 | #define SWP_MIGRATION_NUM 0 |
41 | #define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2) | ||
42 | #define SWP_MIGRATION_READ MAX_SWAPFILES | ||
43 | #define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1) | ||
44 | #endif | 53 | #endif |
45 | 54 | ||
46 | /* | 55 | /* |
56 | * Handling of hardware poisoned pages with memory corruption. | ||
57 | */ | ||
58 | #ifdef CONFIG_MEMORY_FAILURE | ||
59 | #define SWP_HWPOISON_NUM 1 | ||
60 | #define SWP_HWPOISON MAX_SWAPFILES | ||
61 | #else | ||
62 | #define SWP_HWPOISON_NUM 0 | ||
63 | #endif | ||
64 | |||
65 | #define MAX_SWAPFILES \ | ||
66 | ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) | ||
67 | |||
68 | /* | ||
47 | * Magic header for a swap area. The first part of the union is | 69 | * Magic header for a swap area. The first part of the union is |
48 | * what the swap magic looks like for the old (limited to 128MB) | 70 | * what the swap magic looks like for the old (limited to 128MB) |
49 | * swap area format, the second part of the union adds - in the | 71 | * swap area format, the second part of the union adds - in the |
@@ -217,6 +239,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
217 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, | 239 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, |
218 | gfp_t gfp_mask, bool noswap, | 240 | gfp_t gfp_mask, bool noswap, |
219 | unsigned int swappiness); | 241 | unsigned int swappiness); |
242 | extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | ||
243 | gfp_t gfp_mask, bool noswap, | ||
244 | unsigned int swappiness, | ||
245 | struct zone *zone, | ||
246 | int nid); | ||
220 | extern int __isolate_lru_page(struct page *page, int mode, int file); | 247 | extern int __isolate_lru_page(struct page *page, int mode, int file); |
221 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | 248 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
222 | extern int vm_swappiness; | 249 | extern int vm_swappiness; |
@@ -240,7 +267,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma); | |||
240 | extern void scan_mapping_unevictable_pages(struct address_space *); | 267 | extern void scan_mapping_unevictable_pages(struct address_space *); |
241 | 268 | ||
242 | extern unsigned long scan_unevictable_pages; | 269 | extern unsigned long scan_unevictable_pages; |
243 | extern int scan_unevictable_handler(struct ctl_table *, int, struct file *, | 270 | extern int scan_unevictable_handler(struct ctl_table *, int, |
244 | void __user *, size_t *, loff_t *); | 271 | void __user *, size_t *, loff_t *); |
245 | extern int scan_unevictable_register_node(struct node *node); | 272 | extern int scan_unevictable_register_node(struct node *node); |
246 | extern void scan_unevictable_unregister_node(struct node *node); | 273 | extern void scan_unevictable_unregister_node(struct node *node); |
diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 6ec39ab27b4b..cd42e30b7c6e 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h | |||
@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry) | |||
131 | 131 | ||
132 | #endif | 132 | #endif |
133 | 133 | ||
134 | #ifdef CONFIG_MEMORY_FAILURE | ||
135 | /* | ||
136 | * Support for hardware poisoned pages | ||
137 | */ | ||
138 | static inline swp_entry_t make_hwpoison_entry(struct page *page) | ||
139 | { | ||
140 | BUG_ON(!PageLocked(page)); | ||
141 | return swp_entry(SWP_HWPOISON, page_to_pfn(page)); | ||
142 | } | ||
143 | |||
144 | static inline int is_hwpoison_entry(swp_entry_t entry) | ||
145 | { | ||
146 | return swp_type(entry) == SWP_HWPOISON; | ||
147 | } | ||
148 | #else | ||
149 | |||
150 | static inline swp_entry_t make_hwpoison_entry(struct page *page) | ||
151 | { | ||
152 | return swp_entry(0, 0); | ||
153 | } | ||
154 | |||
155 | static inline int is_hwpoison_entry(swp_entry_t swp) | ||
156 | { | ||
157 | return 0; | ||
158 | } | ||
159 | #endif | ||
160 | |||
161 | #if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) | ||
162 | static inline int non_swap_entry(swp_entry_t entry) | ||
163 | { | ||
164 | return swp_type(entry) >= MAX_SWAPFILES; | ||
165 | } | ||
166 | #else | ||
167 | static inline int non_swap_entry(swp_entry_t entry) | ||
168 | { | ||
169 | return 0; | ||
170 | } | ||
171 | #endif | ||
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e76d3b22a466..1e4743ee6831 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -29,7 +29,6 @@ | |||
29 | #include <linux/types.h> | 29 | #include <linux/types.h> |
30 | #include <linux/compiler.h> | 30 | #include <linux/compiler.h> |
31 | 31 | ||
32 | struct file; | ||
33 | struct completion; | 32 | struct completion; |
34 | 33 | ||
35 | #define CTL_MAXNAME 10 /* how many path components do we allow in a | 34 | #define CTL_MAXNAME 10 /* how many path components do we allow in a |
@@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table, | |||
977 | void __user *oldval, size_t __user *oldlenp, | 976 | void __user *oldval, size_t __user *oldlenp, |
978 | void __user *newval, size_t newlen); | 977 | void __user *newval, size_t newlen); |
979 | 978 | ||
980 | typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp, | 979 | typedef int proc_handler (struct ctl_table *ctl, int write, |
981 | void __user *buffer, size_t *lenp, loff_t *ppos); | 980 | void __user *buffer, size_t *lenp, loff_t *ppos); |
982 | 981 | ||
983 | extern int proc_dostring(struct ctl_table *, int, struct file *, | 982 | extern int proc_dostring(struct ctl_table *, int, |
984 | void __user *, size_t *, loff_t *); | 983 | void __user *, size_t *, loff_t *); |
985 | extern int proc_dointvec(struct ctl_table *, int, struct file *, | 984 | extern int proc_dointvec(struct ctl_table *, int, |
986 | void __user *, size_t *, loff_t *); | 985 | void __user *, size_t *, loff_t *); |
987 | extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *, | 986 | extern int proc_dointvec_minmax(struct ctl_table *, int, |
988 | void __user *, size_t *, loff_t *); | 987 | void __user *, size_t *, loff_t *); |
989 | extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *, | 988 | extern int proc_dointvec_jiffies(struct ctl_table *, int, |
990 | void __user *, size_t *, loff_t *); | 989 | void __user *, size_t *, loff_t *); |
991 | extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *, | 990 | extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, |
992 | void __user *, size_t *, loff_t *); | 991 | void __user *, size_t *, loff_t *); |
993 | extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *, | 992 | extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, |
994 | void __user *, size_t *, loff_t *); | 993 | void __user *, size_t *, loff_t *); |
995 | extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *, | 994 | extern int proc_doulongvec_minmax(struct ctl_table *, int, |
996 | void __user *, size_t *, loff_t *); | 995 | void __user *, size_t *, loff_t *); |
997 | extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, | 996 | extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, |
998 | struct file *, void __user *, size_t *, loff_t *); | 997 | void __user *, size_t *, loff_t *); |
999 | 998 | ||
1000 | extern int do_sysctl (int __user *name, int nlen, | 999 | extern int do_sysctl (int __user *name, int nlen, |
1001 | void __user *oldval, size_t __user *oldlenp, | 1000 | void __user *oldval, size_t __user *oldlenp, |
diff --git a/include/linux/time.h b/include/linux/time.h index 56787c093345..fe04e5ef6a59 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
@@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond); | |||
155 | struct tms; | 155 | struct tms; |
156 | extern void do_sys_times(struct tms *); | 156 | extern void do_sys_times(struct tms *); |
157 | 157 | ||
158 | /* | ||
159 | * Similar to the struct tm in userspace <time.h>, but it needs to be here so | ||
160 | * that the kernel source is self contained. | ||
161 | */ | ||
162 | struct tm { | ||
163 | /* | ||
164 | * the number of seconds after the minute, normally in the range | ||
165 | * 0 to 59, but can be up to 60 to allow for leap seconds | ||
166 | */ | ||
167 | int tm_sec; | ||
168 | /* the number of minutes after the hour, in the range 0 to 59*/ | ||
169 | int tm_min; | ||
170 | /* the number of hours past midnight, in the range 0 to 23 */ | ||
171 | int tm_hour; | ||
172 | /* the day of the month, in the range 1 to 31 */ | ||
173 | int tm_mday; | ||
174 | /* the number of months since January, in the range 0 to 11 */ | ||
175 | int tm_mon; | ||
176 | /* the number of years since 1900 */ | ||
177 | long tm_year; | ||
178 | /* the number of days since Sunday, in the range 0 to 6 */ | ||
179 | int tm_wday; | ||
180 | /* the number of days since January 1, in the range 0 to 365 */ | ||
181 | int tm_yday; | ||
182 | }; | ||
183 | |||
184 | void time_to_tm(time_t totalsecs, int offset, struct tm *result); | ||
185 | |||
158 | /** | 186 | /** |
159 | * timespec_to_ns - Convert timespec to nanoseconds | 187 | * timespec_to_ns - Convert timespec to nanoseconds |
160 | * @ts: pointer to the timespec variable to be converted | 188 | * @ts: pointer to the timespec variable to be converted |
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 17ba82efa483..1eb44a924e56 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Tracing hooks | 2 | * Tracing hooks |
3 | * | 3 | * |
4 | * Copyright (C) 2008 Red Hat, Inc. All rights reserved. | 4 | * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. |
5 | * | 5 | * |
6 | * This copyrighted material is made available to anyone wishing to use, | 6 | * This copyrighted material is made available to anyone wishing to use, |
7 | * modify, copy, or redistribute it subject to the terms and conditions | 7 | * modify, copy, or redistribute it subject to the terms and conditions |
@@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task, | |||
463 | 463 | ||
464 | /** | 464 | /** |
465 | * tracehook_notify_jctl - report about job control stop/continue | 465 | * tracehook_notify_jctl - report about job control stop/continue |
466 | * @notify: nonzero if this is the last thread in the group to stop | 466 | * @notify: zero, %CLD_STOPPED or %CLD_CONTINUED |
467 | * @why: %CLD_STOPPED or %CLD_CONTINUED | 467 | * @why: %CLD_STOPPED or %CLD_CONTINUED |
468 | * | 468 | * |
469 | * This is called when we might call do_notify_parent_cldstop(). | 469 | * This is called when we might call do_notify_parent_cldstop(). |
470 | * It's called when about to stop for job control; we are already in | ||
471 | * %TASK_STOPPED state, about to call schedule(). It's also called when | ||
472 | * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made. | ||
473 | * | 470 | * |
474 | * Return nonzero to generate a %SIGCHLD with @why, which is | 471 | * @notify is zero if we would not ordinarily send a %SIGCHLD, |
475 | * normal if @notify is nonzero. | 472 | * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD. |
476 | * | 473 | * |
477 | * Called with no locks held. | 474 | * @why is %CLD_STOPPED when about to stop for job control; |
475 | * we are already in %TASK_STOPPED state, about to call schedule(). | ||
476 | * It might also be that we have just exited (check %PF_EXITING), | ||
477 | * but need to report that a group-wide stop is complete. | ||
478 | * | ||
479 | * @why is %CLD_CONTINUED when waking up after job control stop and | ||
480 | * ready to make a delayed @notify report. | ||
481 | * | ||
482 | * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal. | ||
483 | * | ||
484 | * Called with the siglock held. | ||
478 | */ | 485 | */ |
479 | static inline int tracehook_notify_jctl(int notify, int why) | 486 | static inline int tracehook_notify_jctl(int notify, int why) |
480 | { | 487 | { |
481 | return notify || (current->ptrace & PT_PTRACED); | 488 | return notify ?: (current->ptrace & PT_PTRACED) ? why : 0; |
489 | } | ||
490 | |||
491 | /** | ||
492 | * tracehook_finish_jctl - report about return from job control stop | ||
493 | * | ||
494 | * This is called by do_signal_stop() after wakeup. | ||
495 | */ | ||
496 | static inline void tracehook_finish_jctl(void) | ||
497 | { | ||
482 | } | 498 | } |
483 | 499 | ||
484 | #define DEATH_REAP -1 | 500 | #define DEATH_REAP -1 |
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63a3f7a80580..660a9de96f81 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h | |||
@@ -4,7 +4,7 @@ | |||
4 | /* | 4 | /* |
5 | * Kernel Tracepoint API. | 5 | * Kernel Tracepoint API. |
6 | * | 6 | * |
7 | * See Documentation/tracepoint.txt. | 7 | * See Documentation/trace/tracepoints.txt. |
8 | * | 8 | * |
9 | * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> | 9 | * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> |
10 | * | 10 | * |
diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h index 46dd12c5709e..9356b24223ac 100644 --- a/include/linux/unaligned/be_byteshift.h +++ b/include/linux/unaligned/be_byteshift.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H | 1 | #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H |
2 | #define _LINUX_UNALIGNED_BE_BYTESHIFT_H | 2 | #define _LINUX_UNALIGNED_BE_BYTESHIFT_H |
3 | 3 | ||
4 | #include <linux/kernel.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
6 | static inline u16 __get_unaligned_be16(const u8 *p) | 6 | static inline u16 __get_unaligned_be16(const u8 *p) |
7 | { | 7 | { |
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h index 59777e951baf..be376fb79b64 100644 --- a/include/linux/unaligned/le_byteshift.h +++ b/include/linux/unaligned/le_byteshift.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H | 1 | #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H |
2 | #define _LINUX_UNALIGNED_LE_BYTESHIFT_H | 2 | #define _LINUX_UNALIGNED_LE_BYTESHIFT_H |
3 | 3 | ||
4 | #include <linux/kernel.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
6 | static inline u16 __get_unaligned_le16(const u8 *p) | 6 | static inline u16 __get_unaligned_le16(const u8 *p) |
7 | { | 7 | { |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 75cf58666ff9..66ebddcff664 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -110,21 +110,20 @@ extern int laptop_mode; | |||
110 | extern unsigned long determine_dirtyable_memory(void); | 110 | extern unsigned long determine_dirtyable_memory(void); |
111 | 111 | ||
112 | extern int dirty_background_ratio_handler(struct ctl_table *table, int write, | 112 | extern int dirty_background_ratio_handler(struct ctl_table *table, int write, |
113 | struct file *filp, void __user *buffer, size_t *lenp, | 113 | void __user *buffer, size_t *lenp, |
114 | loff_t *ppos); | 114 | loff_t *ppos); |
115 | extern int dirty_background_bytes_handler(struct ctl_table *table, int write, | 115 | extern int dirty_background_bytes_handler(struct ctl_table *table, int write, |
116 | struct file *filp, void __user *buffer, size_t *lenp, | 116 | void __user *buffer, size_t *lenp, |
117 | loff_t *ppos); | 117 | loff_t *ppos); |
118 | extern int dirty_ratio_handler(struct ctl_table *table, int write, | 118 | extern int dirty_ratio_handler(struct ctl_table *table, int write, |
119 | struct file *filp, void __user *buffer, size_t *lenp, | 119 | void __user *buffer, size_t *lenp, |
120 | loff_t *ppos); | 120 | loff_t *ppos); |
121 | extern int dirty_bytes_handler(struct ctl_table *table, int write, | 121 | extern int dirty_bytes_handler(struct ctl_table *table, int write, |
122 | struct file *filp, void __user *buffer, size_t *lenp, | 122 | void __user *buffer, size_t *lenp, |
123 | loff_t *ppos); | 123 | loff_t *ppos); |
124 | 124 | ||
125 | struct ctl_table; | 125 | struct ctl_table; |
126 | struct file; | 126 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, |
127 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, | ||
128 | void __user *, size_t *, loff_t *); | 127 | void __user *, size_t *, loff_t *); |
129 | 128 | ||
130 | void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, | 129 | void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, |
diff --git a/include/net/ip.h b/include/net/ip.h index 72c36926c26d..5b26a0bd178e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h | |||
@@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, | |||
399 | * fed into the routing cache should use these handlers. | 399 | * fed into the routing cache should use these handlers. |
400 | */ | 400 | */ |
401 | int ipv4_doint_and_flush(ctl_table *ctl, int write, | 401 | int ipv4_doint_and_flush(ctl_table *ctl, int write, |
402 | struct file* filp, void __user *buffer, | 402 | void __user *buffer, |
403 | size_t *lenp, loff_t *ppos); | 403 | size_t *lenp, loff_t *ppos); |
404 | int ipv4_doint_and_flush_strategy(ctl_table *table, | 404 | int ipv4_doint_and_flush_strategy(ctl_table *table, |
405 | void __user *oldval, size_t __user *oldlenp, | 405 | void __user *oldval, size_t __user *oldlenp, |
diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 1459ed3e2697..f76f22d05721 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h | |||
@@ -55,7 +55,6 @@ enum { | |||
55 | #include <net/neighbour.h> | 55 | #include <net/neighbour.h> |
56 | 56 | ||
57 | struct ctl_table; | 57 | struct ctl_table; |
58 | struct file; | ||
59 | struct inet6_dev; | 58 | struct inet6_dev; |
60 | struct net_device; | 59 | struct net_device; |
61 | struct net_proto_family; | 60 | struct net_proto_family; |
@@ -139,7 +138,6 @@ extern int igmp6_event_report(struct sk_buff *skb); | |||
139 | #ifdef CONFIG_SYSCTL | 138 | #ifdef CONFIG_SYSCTL |
140 | extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, | 139 | extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, |
141 | int write, | 140 | int write, |
142 | struct file * filp, | ||
143 | void __user *buffer, | 141 | void __user *buffer, |
144 | size_t *lenp, | 142 | size_t *lenp, |
145 | loff_t *ppos); | 143 | loff_t *ppos); |
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 40eab7314aeb..7d3704750efc 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c | |||
@@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table) | |||
27 | } | 27 | } |
28 | 28 | ||
29 | #ifdef CONFIG_PROC_SYSCTL | 29 | #ifdef CONFIG_PROC_SYSCTL |
30 | static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, | 30 | static int proc_ipc_dointvec(ctl_table *table, int write, |
31 | void __user *buffer, size_t *lenp, loff_t *ppos) | 31 | void __user *buffer, size_t *lenp, loff_t *ppos) |
32 | { | 32 | { |
33 | struct ctl_table ipc_table; | 33 | struct ctl_table ipc_table; |
34 | memcpy(&ipc_table, table, sizeof(ipc_table)); | 34 | memcpy(&ipc_table, table, sizeof(ipc_table)); |
35 | ipc_table.data = get_ipc(table); | 35 | ipc_table.data = get_ipc(table); |
36 | 36 | ||
37 | return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); | 37 | return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); |
38 | } | 38 | } |
39 | 39 | ||
40 | static int proc_ipc_callback_dointvec(ctl_table *table, int write, | 40 | static int proc_ipc_callback_dointvec(ctl_table *table, int write, |
41 | struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | 41 | void __user *buffer, size_t *lenp, loff_t *ppos) |
42 | { | 42 | { |
43 | struct ctl_table ipc_table; | 43 | struct ctl_table ipc_table; |
44 | size_t lenp_bef = *lenp; | 44 | size_t lenp_bef = *lenp; |
@@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, | |||
47 | memcpy(&ipc_table, table, sizeof(ipc_table)); | 47 | memcpy(&ipc_table, table, sizeof(ipc_table)); |
48 | ipc_table.data = get_ipc(table); | 48 | ipc_table.data = get_ipc(table); |
49 | 49 | ||
50 | rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); | 50 | rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); |
51 | 51 | ||
52 | if (write && !rc && lenp_bef == *lenp) | 52 | if (write && !rc && lenp_bef == *lenp) |
53 | /* | 53 | /* |
@@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, | |||
61 | } | 61 | } |
62 | 62 | ||
63 | static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, | 63 | static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, |
64 | struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | 64 | void __user *buffer, size_t *lenp, loff_t *ppos) |
65 | { | 65 | { |
66 | struct ctl_table ipc_table; | 66 | struct ctl_table ipc_table; |
67 | memcpy(&ipc_table, table, sizeof(ipc_table)); | 67 | memcpy(&ipc_table, table, sizeof(ipc_table)); |
68 | ipc_table.data = get_ipc(table); | 68 | ipc_table.data = get_ipc(table); |
69 | 69 | ||
70 | return proc_doulongvec_minmax(&ipc_table, write, filp, buffer, | 70 | return proc_doulongvec_minmax(&ipc_table, write, buffer, |
71 | lenp, ppos); | 71 | lenp, ppos); |
72 | } | 72 | } |
73 | 73 | ||
@@ -95,7 +95,7 @@ static void ipc_auto_callback(int val) | |||
95 | } | 95 | } |
96 | 96 | ||
97 | static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, | 97 | static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, |
98 | struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | 98 | void __user *buffer, size_t *lenp, loff_t *ppos) |
99 | { | 99 | { |
100 | struct ctl_table ipc_table; | 100 | struct ctl_table ipc_table; |
101 | size_t lenp_bef = *lenp; | 101 | size_t lenp_bef = *lenp; |
@@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, | |||
106 | ipc_table.data = get_ipc(table); | 106 | ipc_table.data = get_ipc(table); |
107 | oldval = *((int *)(ipc_table.data)); | 107 | oldval = *((int *)(ipc_table.data)); |
108 | 108 | ||
109 | rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); | 109 | rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); |
110 | 110 | ||
111 | if (write && !rc && lenp_bef == *lenp) { | 111 | if (write && !rc && lenp_bef == *lenp) { |
112 | int newval = *((int *)(ipc_table.data)); | 112 | int newval = *((int *)(ipc_table.data)); |
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 24ae46dfe45d..8a058711fc10 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c | |||
@@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table) | |||
31 | return which; | 31 | return which; |
32 | } | 32 | } |
33 | 33 | ||
34 | static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp, | 34 | static int proc_mq_dointvec(ctl_table *table, int write, |
35 | void __user *buffer, size_t *lenp, loff_t *ppos) | 35 | void __user *buffer, size_t *lenp, loff_t *ppos) |
36 | { | 36 | { |
37 | struct ctl_table mq_table; | 37 | struct ctl_table mq_table; |
38 | memcpy(&mq_table, table, sizeof(mq_table)); | 38 | memcpy(&mq_table, table, sizeof(mq_table)); |
39 | mq_table.data = get_mq(table); | 39 | mq_table.data = get_mq(table); |
40 | 40 | ||
41 | return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos); | 41 | return proc_dointvec(&mq_table, write, buffer, lenp, ppos); |
42 | } | 42 | } |
43 | 43 | ||
44 | static int proc_mq_dointvec_minmax(ctl_table *table, int write, | 44 | static int proc_mq_dointvec_minmax(ctl_table *table, int write, |
45 | struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | 45 | void __user *buffer, size_t *lenp, loff_t *ppos) |
46 | { | 46 | { |
47 | struct ctl_table mq_table; | 47 | struct ctl_table mq_table; |
48 | memcpy(&mq_table, table, sizeof(mq_table)); | 48 | memcpy(&mq_table, table, sizeof(mq_table)); |
49 | mq_table.data = get_mq(table); | 49 | mq_table.data = get_mq(table); |
50 | 50 | ||
51 | return proc_dointvec_minmax(&mq_table, write, filp, buffer, | 51 | return proc_dointvec_minmax(&mq_table, write, buffer, |
52 | lenp, ppos); | 52 | lenp, ppos); |
53 | } | 53 | } |
54 | #else | 54 | #else |
diff --git a/kernel/Makefile b/kernel/Makefile index 187c89b4783d..b8d4cd8ac0b9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -58,7 +58,6 @@ obj-$(CONFIG_KEXEC) += kexec.o | |||
58 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o | 58 | obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o |
59 | obj-$(CONFIG_COMPAT) += compat.o | 59 | obj-$(CONFIG_COMPAT) += compat.o |
60 | obj-$(CONFIG_CGROUPS) += cgroup.o | 60 | obj-$(CONFIG_CGROUPS) += cgroup.o |
61 | obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o | ||
62 | obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o | 61 | obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o |
63 | obj-$(CONFIG_CPUSETS) += cpuset.o | 62 | obj-$(CONFIG_CPUSETS) += cpuset.o |
64 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o | 63 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cd83d9933b6b..7ccba4bc5e3b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -23,6 +23,7 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/cgroup.h> | 25 | #include <linux/cgroup.h> |
26 | #include <linux/ctype.h> | ||
26 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
27 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
28 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
@@ -48,6 +49,8 @@ | |||
48 | #include <linux/namei.h> | 49 | #include <linux/namei.h> |
49 | #include <linux/smp_lock.h> | 50 | #include <linux/smp_lock.h> |
50 | #include <linux/pid_namespace.h> | 51 | #include <linux/pid_namespace.h> |
52 | #include <linux/idr.h> | ||
53 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | ||
51 | 54 | ||
52 | #include <asm/atomic.h> | 55 | #include <asm/atomic.h> |
53 | 56 | ||
@@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = { | |||
60 | #include <linux/cgroup_subsys.h> | 63 | #include <linux/cgroup_subsys.h> |
61 | }; | 64 | }; |
62 | 65 | ||
66 | #define MAX_CGROUP_ROOT_NAMELEN 64 | ||
67 | |||
63 | /* | 68 | /* |
64 | * A cgroupfs_root represents the root of a cgroup hierarchy, | 69 | * A cgroupfs_root represents the root of a cgroup hierarchy, |
65 | * and may be associated with a superblock to form an active | 70 | * and may be associated with a superblock to form an active |
@@ -74,6 +79,9 @@ struct cgroupfs_root { | |||
74 | */ | 79 | */ |
75 | unsigned long subsys_bits; | 80 | unsigned long subsys_bits; |
76 | 81 | ||
82 | /* Unique id for this hierarchy. */ | ||
83 | int hierarchy_id; | ||
84 | |||
77 | /* The bitmask of subsystems currently attached to this hierarchy */ | 85 | /* The bitmask of subsystems currently attached to this hierarchy */ |
78 | unsigned long actual_subsys_bits; | 86 | unsigned long actual_subsys_bits; |
79 | 87 | ||
@@ -94,6 +102,9 @@ struct cgroupfs_root { | |||
94 | 102 | ||
95 | /* The path to use for release notifications. */ | 103 | /* The path to use for release notifications. */ |
96 | char release_agent_path[PATH_MAX]; | 104 | char release_agent_path[PATH_MAX]; |
105 | |||
106 | /* The name for this hierarchy - may be empty */ | ||
107 | char name[MAX_CGROUP_ROOT_NAMELEN]; | ||
97 | }; | 108 | }; |
98 | 109 | ||
99 | /* | 110 | /* |
@@ -141,6 +152,10 @@ struct css_id { | |||
141 | static LIST_HEAD(roots); | 152 | static LIST_HEAD(roots); |
142 | static int root_count; | 153 | static int root_count; |
143 | 154 | ||
155 | static DEFINE_IDA(hierarchy_ida); | ||
156 | static int next_hierarchy_id; | ||
157 | static DEFINE_SPINLOCK(hierarchy_id_lock); | ||
158 | |||
144 | /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ | 159 | /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ |
145 | #define dummytop (&rootnode.top_cgroup) | 160 | #define dummytop (&rootnode.top_cgroup) |
146 | 161 | ||
@@ -201,6 +216,7 @@ struct cg_cgroup_link { | |||
201 | * cgroup, anchored on cgroup->css_sets | 216 | * cgroup, anchored on cgroup->css_sets |
202 | */ | 217 | */ |
203 | struct list_head cgrp_link_list; | 218 | struct list_head cgrp_link_list; |
219 | struct cgroup *cgrp; | ||
204 | /* | 220 | /* |
205 | * List running through cg_cgroup_links pointing at a | 221 | * List running through cg_cgroup_links pointing at a |
206 | * single css_set object, anchored on css_set->cg_links | 222 | * single css_set object, anchored on css_set->cg_links |
@@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); | |||
227 | static DEFINE_RWLOCK(css_set_lock); | 243 | static DEFINE_RWLOCK(css_set_lock); |
228 | static int css_set_count; | 244 | static int css_set_count; |
229 | 245 | ||
230 | /* hash table for cgroup groups. This improves the performance to | 246 | /* |
231 | * find an existing css_set */ | 247 | * hash table for cgroup groups. This improves the performance to find |
248 | * an existing css_set. This hash doesn't (currently) take into | ||
249 | * account cgroups in empty hierarchies. | ||
250 | */ | ||
232 | #define CSS_SET_HASH_BITS 7 | 251 | #define CSS_SET_HASH_BITS 7 |
233 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | 252 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) |
234 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | 253 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; |
@@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | |||
248 | return &css_set_table[index]; | 267 | return &css_set_table[index]; |
249 | } | 268 | } |
250 | 269 | ||
270 | static void free_css_set_rcu(struct rcu_head *obj) | ||
271 | { | ||
272 | struct css_set *cg = container_of(obj, struct css_set, rcu_head); | ||
273 | kfree(cg); | ||
274 | } | ||
275 | |||
251 | /* We don't maintain the lists running through each css_set to its | 276 | /* We don't maintain the lists running through each css_set to its |
252 | * task until after the first call to cgroup_iter_start(). This | 277 | * task until after the first call to cgroup_iter_start(). This |
253 | * reduces the fork()/exit() overhead for people who have cgroups | 278 | * reduces the fork()/exit() overhead for people who have cgroups |
254 | * compiled into their kernel but not actually in use */ | 279 | * compiled into their kernel but not actually in use */ |
255 | static int use_task_css_set_links __read_mostly; | 280 | static int use_task_css_set_links __read_mostly; |
256 | 281 | ||
257 | /* When we create or destroy a css_set, the operation simply | 282 | static void __put_css_set(struct css_set *cg, int taskexit) |
258 | * takes/releases a reference count on all the cgroups referenced | ||
259 | * by subsystems in this css_set. This can end up multiple-counting | ||
260 | * some cgroups, but that's OK - the ref-count is just a | ||
261 | * busy/not-busy indicator; ensuring that we only count each cgroup | ||
262 | * once would require taking a global lock to ensure that no | ||
263 | * subsystems moved between hierarchies while we were doing so. | ||
264 | * | ||
265 | * Possible TODO: decide at boot time based on the number of | ||
266 | * registered subsystems and the number of CPUs or NUMA nodes whether | ||
267 | * it's better for performance to ref-count every subsystem, or to | ||
268 | * take a global lock and only add one ref count to each hierarchy. | ||
269 | */ | ||
270 | |||
271 | /* | ||
272 | * unlink a css_set from the list and free it | ||
273 | */ | ||
274 | static void unlink_css_set(struct css_set *cg) | ||
275 | { | 283 | { |
276 | struct cg_cgroup_link *link; | 284 | struct cg_cgroup_link *link; |
277 | struct cg_cgroup_link *saved_link; | 285 | struct cg_cgroup_link *saved_link; |
278 | |||
279 | hlist_del(&cg->hlist); | ||
280 | css_set_count--; | ||
281 | |||
282 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | ||
283 | cg_link_list) { | ||
284 | list_del(&link->cg_link_list); | ||
285 | list_del(&link->cgrp_link_list); | ||
286 | kfree(link); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | static void __put_css_set(struct css_set *cg, int taskexit) | ||
291 | { | ||
292 | int i; | ||
293 | /* | 286 | /* |
294 | * Ensure that the refcount doesn't hit zero while any readers | 287 | * Ensure that the refcount doesn't hit zero while any readers |
295 | * can see it. Similar to atomic_dec_and_lock(), but for an | 288 | * can see it. Similar to atomic_dec_and_lock(), but for an |
@@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
302 | write_unlock(&css_set_lock); | 295 | write_unlock(&css_set_lock); |
303 | return; | 296 | return; |
304 | } | 297 | } |
305 | unlink_css_set(cg); | ||
306 | write_unlock(&css_set_lock); | ||
307 | 298 | ||
308 | rcu_read_lock(); | 299 | /* This css_set is dead. unlink it and release cgroup refcounts */ |
309 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 300 | hlist_del(&cg->hlist); |
310 | struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); | 301 | css_set_count--; |
302 | |||
303 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | ||
304 | cg_link_list) { | ||
305 | struct cgroup *cgrp = link->cgrp; | ||
306 | list_del(&link->cg_link_list); | ||
307 | list_del(&link->cgrp_link_list); | ||
311 | if (atomic_dec_and_test(&cgrp->count) && | 308 | if (atomic_dec_and_test(&cgrp->count) && |
312 | notify_on_release(cgrp)) { | 309 | notify_on_release(cgrp)) { |
313 | if (taskexit) | 310 | if (taskexit) |
314 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 311 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
315 | check_for_release(cgrp); | 312 | check_for_release(cgrp); |
316 | } | 313 | } |
314 | |||
315 | kfree(link); | ||
317 | } | 316 | } |
318 | rcu_read_unlock(); | 317 | |
319 | kfree(cg); | 318 | write_unlock(&css_set_lock); |
319 | call_rcu(&cg->rcu_head, free_css_set_rcu); | ||
320 | } | 320 | } |
321 | 321 | ||
322 | /* | 322 | /* |
@@ -338,6 +338,78 @@ static inline void put_css_set_taskexit(struct css_set *cg) | |||
338 | } | 338 | } |
339 | 339 | ||
340 | /* | 340 | /* |
341 | * compare_css_sets - helper function for find_existing_css_set(). | ||
342 | * @cg: candidate css_set being tested | ||
343 | * @old_cg: existing css_set for a task | ||
344 | * @new_cgrp: cgroup that's being entered by the task | ||
345 | * @template: desired set of css pointers in css_set (pre-calculated) | ||
346 | * | ||
347 | * Returns true if "cg" matches "old_cg" except for the hierarchy | ||
348 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". | ||
349 | */ | ||
350 | static bool compare_css_sets(struct css_set *cg, | ||
351 | struct css_set *old_cg, | ||
352 | struct cgroup *new_cgrp, | ||
353 | struct cgroup_subsys_state *template[]) | ||
354 | { | ||
355 | struct list_head *l1, *l2; | ||
356 | |||
357 | if (memcmp(template, cg->subsys, sizeof(cg->subsys))) { | ||
358 | /* Not all subsystems matched */ | ||
359 | return false; | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * Compare cgroup pointers in order to distinguish between | ||
364 | * different cgroups in heirarchies with no subsystems. We | ||
365 | * could get by with just this check alone (and skip the | ||
366 | * memcmp above) but on most setups the memcmp check will | ||
367 | * avoid the need for this more expensive check on almost all | ||
368 | * candidates. | ||
369 | */ | ||
370 | |||
371 | l1 = &cg->cg_links; | ||
372 | l2 = &old_cg->cg_links; | ||
373 | while (1) { | ||
374 | struct cg_cgroup_link *cgl1, *cgl2; | ||
375 | struct cgroup *cg1, *cg2; | ||
376 | |||
377 | l1 = l1->next; | ||
378 | l2 = l2->next; | ||
379 | /* See if we reached the end - both lists are equal length. */ | ||
380 | if (l1 == &cg->cg_links) { | ||
381 | BUG_ON(l2 != &old_cg->cg_links); | ||
382 | break; | ||
383 | } else { | ||
384 | BUG_ON(l2 == &old_cg->cg_links); | ||
385 | } | ||
386 | /* Locate the cgroups associated with these links. */ | ||
387 | cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list); | ||
388 | cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list); | ||
389 | cg1 = cgl1->cgrp; | ||
390 | cg2 = cgl2->cgrp; | ||
391 | /* Hierarchies should be linked in the same order. */ | ||
392 | BUG_ON(cg1->root != cg2->root); | ||
393 | |||
394 | /* | ||
395 | * If this hierarchy is the hierarchy of the cgroup | ||
396 | * that's changing, then we need to check that this | ||
397 | * css_set points to the new cgroup; if it's any other | ||
398 | * hierarchy, then this css_set should point to the | ||
399 | * same cgroup as the old css_set. | ||
400 | */ | ||
401 | if (cg1->root == new_cgrp->root) { | ||
402 | if (cg1 != new_cgrp) | ||
403 | return false; | ||
404 | } else { | ||
405 | if (cg1 != cg2) | ||
406 | return false; | ||
407 | } | ||
408 | } | ||
409 | return true; | ||
410 | } | ||
411 | |||
412 | /* | ||
341 | * find_existing_css_set() is a helper for | 413 | * find_existing_css_set() is a helper for |
342 | * find_css_set(), and checks to see whether an existing | 414 | * find_css_set(), and checks to see whether an existing |
343 | * css_set is suitable. | 415 | * css_set is suitable. |
@@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set( | |||
378 | 450 | ||
379 | hhead = css_set_hash(template); | 451 | hhead = css_set_hash(template); |
380 | hlist_for_each_entry(cg, node, hhead, hlist) { | 452 | hlist_for_each_entry(cg, node, hhead, hlist) { |
381 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { | 453 | if (!compare_css_sets(cg, oldcg, cgrp, template)) |
382 | /* All subsystems matched */ | 454 | continue; |
383 | return cg; | 455 | |
384 | } | 456 | /* This css_set matches what we need */ |
457 | return cg; | ||
385 | } | 458 | } |
386 | 459 | ||
387 | /* No existing cgroup group matched */ | 460 | /* No existing cgroup group matched */ |
@@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links, | |||
435 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, | 508 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, |
436 | cgrp_link_list); | 509 | cgrp_link_list); |
437 | link->cg = cg; | 510 | link->cg = cg; |
511 | link->cgrp = cgrp; | ||
512 | atomic_inc(&cgrp->count); | ||
438 | list_move(&link->cgrp_link_list, &cgrp->css_sets); | 513 | list_move(&link->cgrp_link_list, &cgrp->css_sets); |
439 | list_add(&link->cg_link_list, &cg->cg_links); | 514 | /* |
515 | * Always add links to the tail of the list so that the list | ||
516 | * is sorted by order of hierarchy creation | ||
517 | */ | ||
518 | list_add_tail(&link->cg_link_list, &cg->cg_links); | ||
440 | } | 519 | } |
441 | 520 | ||
442 | /* | 521 | /* |
@@ -451,11 +530,11 @@ static struct css_set *find_css_set( | |||
451 | { | 530 | { |
452 | struct css_set *res; | 531 | struct css_set *res; |
453 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | 532 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; |
454 | int i; | ||
455 | 533 | ||
456 | struct list_head tmp_cg_links; | 534 | struct list_head tmp_cg_links; |
457 | 535 | ||
458 | struct hlist_head *hhead; | 536 | struct hlist_head *hhead; |
537 | struct cg_cgroup_link *link; | ||
459 | 538 | ||
460 | /* First see if we already have a cgroup group that matches | 539 | /* First see if we already have a cgroup group that matches |
461 | * the desired set */ | 540 | * the desired set */ |
@@ -489,20 +568,12 @@ static struct css_set *find_css_set( | |||
489 | 568 | ||
490 | write_lock(&css_set_lock); | 569 | write_lock(&css_set_lock); |
491 | /* Add reference counts and links from the new css_set. */ | 570 | /* Add reference counts and links from the new css_set. */ |
492 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 571 | list_for_each_entry(link, &oldcg->cg_links, cg_link_list) { |
493 | struct cgroup *cgrp = res->subsys[i]->cgroup; | 572 | struct cgroup *c = link->cgrp; |
494 | struct cgroup_subsys *ss = subsys[i]; | 573 | if (c->root == cgrp->root) |
495 | atomic_inc(&cgrp->count); | 574 | c = cgrp; |
496 | /* | 575 | link_css_set(&tmp_cg_links, res, c); |
497 | * We want to add a link once per cgroup, so we | ||
498 | * only do it for the first subsystem in each | ||
499 | * hierarchy | ||
500 | */ | ||
501 | if (ss->root->subsys_list.next == &ss->sibling) | ||
502 | link_css_set(&tmp_cg_links, res, cgrp); | ||
503 | } | 576 | } |
504 | if (list_empty(&rootnode.subsys_list)) | ||
505 | link_css_set(&tmp_cg_links, res, dummytop); | ||
506 | 577 | ||
507 | BUG_ON(!list_empty(&tmp_cg_links)); | 578 | BUG_ON(!list_empty(&tmp_cg_links)); |
508 | 579 | ||
@@ -518,6 +589,41 @@ static struct css_set *find_css_set( | |||
518 | } | 589 | } |
519 | 590 | ||
520 | /* | 591 | /* |
592 | * Return the cgroup for "task" from the given hierarchy. Must be | ||
593 | * called with cgroup_mutex held. | ||
594 | */ | ||
595 | static struct cgroup *task_cgroup_from_root(struct task_struct *task, | ||
596 | struct cgroupfs_root *root) | ||
597 | { | ||
598 | struct css_set *css; | ||
599 | struct cgroup *res = NULL; | ||
600 | |||
601 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | ||
602 | read_lock(&css_set_lock); | ||
603 | /* | ||
604 | * No need to lock the task - since we hold cgroup_mutex the | ||
605 | * task can't change groups, so the only thing that can happen | ||
606 | * is that it exits and its css is set back to init_css_set. | ||
607 | */ | ||
608 | css = task->cgroups; | ||
609 | if (css == &init_css_set) { | ||
610 | res = &root->top_cgroup; | ||
611 | } else { | ||
612 | struct cg_cgroup_link *link; | ||
613 | list_for_each_entry(link, &css->cg_links, cg_link_list) { | ||
614 | struct cgroup *c = link->cgrp; | ||
615 | if (c->root == root) { | ||
616 | res = c; | ||
617 | break; | ||
618 | } | ||
619 | } | ||
620 | } | ||
621 | read_unlock(&css_set_lock); | ||
622 | BUG_ON(!res); | ||
623 | return res; | ||
624 | } | ||
625 | |||
626 | /* | ||
521 | * There is one global cgroup mutex. We also require taking | 627 | * There is one global cgroup mutex. We also require taking |
522 | * task_lock() when dereferencing a task's cgroup subsys pointers. | 628 | * task_lock() when dereferencing a task's cgroup subsys pointers. |
523 | * See "The task_lock() exception", at the end of this comment. | 629 | * See "The task_lock() exception", at the end of this comment. |
@@ -677,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
677 | */ | 783 | */ |
678 | deactivate_super(cgrp->root->sb); | 784 | deactivate_super(cgrp->root->sb); |
679 | 785 | ||
786 | /* | ||
787 | * if we're getting rid of the cgroup, refcount should ensure | ||
788 | * that there are no pidlists left. | ||
789 | */ | ||
790 | BUG_ON(!list_empty(&cgrp->pidlists)); | ||
791 | |||
680 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); | 792 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); |
681 | } | 793 | } |
682 | iput(inode); | 794 | iput(inode); |
@@ -841,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
841 | seq_puts(seq, ",noprefix"); | 953 | seq_puts(seq, ",noprefix"); |
842 | if (strlen(root->release_agent_path)) | 954 | if (strlen(root->release_agent_path)) |
843 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 955 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
956 | if (strlen(root->name)) | ||
957 | seq_printf(seq, ",name=%s", root->name); | ||
844 | mutex_unlock(&cgroup_mutex); | 958 | mutex_unlock(&cgroup_mutex); |
845 | return 0; | 959 | return 0; |
846 | } | 960 | } |
@@ -849,6 +963,12 @@ struct cgroup_sb_opts { | |||
849 | unsigned long subsys_bits; | 963 | unsigned long subsys_bits; |
850 | unsigned long flags; | 964 | unsigned long flags; |
851 | char *release_agent; | 965 | char *release_agent; |
966 | char *name; | ||
967 | /* User explicitly requested empty subsystem */ | ||
968 | bool none; | ||
969 | |||
970 | struct cgroupfs_root *new_root; | ||
971 | |||
852 | }; | 972 | }; |
853 | 973 | ||
854 | /* Convert a hierarchy specifier into a bitmask of subsystems and | 974 | /* Convert a hierarchy specifier into a bitmask of subsystems and |
@@ -863,9 +983,7 @@ static int parse_cgroupfs_options(char *data, | |||
863 | mask = ~(1UL << cpuset_subsys_id); | 983 | mask = ~(1UL << cpuset_subsys_id); |
864 | #endif | 984 | #endif |
865 | 985 | ||
866 | opts->subsys_bits = 0; | 986 | memset(opts, 0, sizeof(*opts)); |
867 | opts->flags = 0; | ||
868 | opts->release_agent = NULL; | ||
869 | 987 | ||
870 | while ((token = strsep(&o, ",")) != NULL) { | 988 | while ((token = strsep(&o, ",")) != NULL) { |
871 | if (!*token) | 989 | if (!*token) |
@@ -879,17 +997,42 @@ static int parse_cgroupfs_options(char *data, | |||
879 | if (!ss->disabled) | 997 | if (!ss->disabled) |
880 | opts->subsys_bits |= 1ul << i; | 998 | opts->subsys_bits |= 1ul << i; |
881 | } | 999 | } |
1000 | } else if (!strcmp(token, "none")) { | ||
1001 | /* Explicitly have no subsystems */ | ||
1002 | opts->none = true; | ||
882 | } else if (!strcmp(token, "noprefix")) { | 1003 | } else if (!strcmp(token, "noprefix")) { |
883 | set_bit(ROOT_NOPREFIX, &opts->flags); | 1004 | set_bit(ROOT_NOPREFIX, &opts->flags); |
884 | } else if (!strncmp(token, "release_agent=", 14)) { | 1005 | } else if (!strncmp(token, "release_agent=", 14)) { |
885 | /* Specifying two release agents is forbidden */ | 1006 | /* Specifying two release agents is forbidden */ |
886 | if (opts->release_agent) | 1007 | if (opts->release_agent) |
887 | return -EINVAL; | 1008 | return -EINVAL; |
888 | opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL); | 1009 | opts->release_agent = |
1010 | kstrndup(token + 14, PATH_MAX, GFP_KERNEL); | ||
889 | if (!opts->release_agent) | 1011 | if (!opts->release_agent) |
890 | return -ENOMEM; | 1012 | return -ENOMEM; |
891 | strncpy(opts->release_agent, token + 14, PATH_MAX - 1); | 1013 | } else if (!strncmp(token, "name=", 5)) { |
892 | opts->release_agent[PATH_MAX - 1] = 0; | 1014 | int i; |
1015 | const char *name = token + 5; | ||
1016 | /* Can't specify an empty name */ | ||
1017 | if (!strlen(name)) | ||
1018 | return -EINVAL; | ||
1019 | /* Must match [\w.-]+ */ | ||
1020 | for (i = 0; i < strlen(name); i++) { | ||
1021 | char c = name[i]; | ||
1022 | if (isalnum(c)) | ||
1023 | continue; | ||
1024 | if ((c == '.') || (c == '-') || (c == '_')) | ||
1025 | continue; | ||
1026 | return -EINVAL; | ||
1027 | } | ||
1028 | /* Specifying two names is forbidden */ | ||
1029 | if (opts->name) | ||
1030 | return -EINVAL; | ||
1031 | opts->name = kstrndup(name, | ||
1032 | MAX_CGROUP_ROOT_NAMELEN, | ||
1033 | GFP_KERNEL); | ||
1034 | if (!opts->name) | ||
1035 | return -ENOMEM; | ||
893 | } else { | 1036 | } else { |
894 | struct cgroup_subsys *ss; | 1037 | struct cgroup_subsys *ss; |
895 | int i; | 1038 | int i; |
@@ -906,6 +1049,8 @@ static int parse_cgroupfs_options(char *data, | |||
906 | } | 1049 | } |
907 | } | 1050 | } |
908 | 1051 | ||
1052 | /* Consistency checks */ | ||
1053 | |||
909 | /* | 1054 | /* |
910 | * Option noprefix was introduced just for backward compatibility | 1055 | * Option noprefix was introduced just for backward compatibility |
911 | * with the old cpuset, so we allow noprefix only if mounting just | 1056 | * with the old cpuset, so we allow noprefix only if mounting just |
@@ -915,8 +1060,16 @@ static int parse_cgroupfs_options(char *data, | |||
915 | (opts->subsys_bits & mask)) | 1060 | (opts->subsys_bits & mask)) |
916 | return -EINVAL; | 1061 | return -EINVAL; |
917 | 1062 | ||
918 | /* We can't have an empty hierarchy */ | 1063 | |
919 | if (!opts->subsys_bits) | 1064 | /* Can't specify "none" and some subsystems */ |
1065 | if (opts->subsys_bits && opts->none) | ||
1066 | return -EINVAL; | ||
1067 | |||
1068 | /* | ||
1069 | * We either have to specify by name or by subsystems. (So all | ||
1070 | * empty hierarchies must have a name). | ||
1071 | */ | ||
1072 | if (!opts->subsys_bits && !opts->name) | ||
920 | return -EINVAL; | 1073 | return -EINVAL; |
921 | 1074 | ||
922 | return 0; | 1075 | return 0; |
@@ -944,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
944 | goto out_unlock; | 1097 | goto out_unlock; |
945 | } | 1098 | } |
946 | 1099 | ||
1100 | /* Don't allow name to change at remount */ | ||
1101 | if (opts.name && strcmp(opts.name, root->name)) { | ||
1102 | ret = -EINVAL; | ||
1103 | goto out_unlock; | ||
1104 | } | ||
1105 | |||
947 | ret = rebind_subsystems(root, opts.subsys_bits); | 1106 | ret = rebind_subsystems(root, opts.subsys_bits); |
948 | if (ret) | 1107 | if (ret) |
949 | goto out_unlock; | 1108 | goto out_unlock; |
@@ -955,6 +1114,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
955 | strcpy(root->release_agent_path, opts.release_agent); | 1114 | strcpy(root->release_agent_path, opts.release_agent); |
956 | out_unlock: | 1115 | out_unlock: |
957 | kfree(opts.release_agent); | 1116 | kfree(opts.release_agent); |
1117 | kfree(opts.name); | ||
958 | mutex_unlock(&cgroup_mutex); | 1118 | mutex_unlock(&cgroup_mutex); |
959 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1119 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
960 | unlock_kernel(); | 1120 | unlock_kernel(); |
@@ -974,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
974 | INIT_LIST_HEAD(&cgrp->children); | 1134 | INIT_LIST_HEAD(&cgrp->children); |
975 | INIT_LIST_HEAD(&cgrp->css_sets); | 1135 | INIT_LIST_HEAD(&cgrp->css_sets); |
976 | INIT_LIST_HEAD(&cgrp->release_list); | 1136 | INIT_LIST_HEAD(&cgrp->release_list); |
977 | INIT_LIST_HEAD(&cgrp->pids_list); | 1137 | INIT_LIST_HEAD(&cgrp->pidlists); |
978 | init_rwsem(&cgrp->pids_mutex); | 1138 | mutex_init(&cgrp->pidlist_mutex); |
979 | } | 1139 | } |
1140 | |||
980 | static void init_cgroup_root(struct cgroupfs_root *root) | 1141 | static void init_cgroup_root(struct cgroupfs_root *root) |
981 | { | 1142 | { |
982 | struct cgroup *cgrp = &root->top_cgroup; | 1143 | struct cgroup *cgrp = &root->top_cgroup; |
@@ -988,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
988 | init_cgroup_housekeeping(cgrp); | 1149 | init_cgroup_housekeeping(cgrp); |
989 | } | 1150 | } |
990 | 1151 | ||
1152 | static bool init_root_id(struct cgroupfs_root *root) | ||
1153 | { | ||
1154 | int ret = 0; | ||
1155 | |||
1156 | do { | ||
1157 | if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL)) | ||
1158 | return false; | ||
1159 | spin_lock(&hierarchy_id_lock); | ||
1160 | /* Try to allocate the next unused ID */ | ||
1161 | ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id, | ||
1162 | &root->hierarchy_id); | ||
1163 | if (ret == -ENOSPC) | ||
1164 | /* Try again starting from 0 */ | ||
1165 | ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id); | ||
1166 | if (!ret) { | ||
1167 | next_hierarchy_id = root->hierarchy_id + 1; | ||
1168 | } else if (ret != -EAGAIN) { | ||
1169 | /* Can only get here if the 31-bit IDR is full ... */ | ||
1170 | BUG_ON(ret); | ||
1171 | } | ||
1172 | spin_unlock(&hierarchy_id_lock); | ||
1173 | } while (ret); | ||
1174 | return true; | ||
1175 | } | ||
1176 | |||
991 | static int cgroup_test_super(struct super_block *sb, void *data) | 1177 | static int cgroup_test_super(struct super_block *sb, void *data) |
992 | { | 1178 | { |
993 | struct cgroupfs_root *new = data; | 1179 | struct cgroup_sb_opts *opts = data; |
994 | struct cgroupfs_root *root = sb->s_fs_info; | 1180 | struct cgroupfs_root *root = sb->s_fs_info; |
995 | 1181 | ||
996 | /* First check subsystems */ | 1182 | /* If we asked for a name then it must match */ |
997 | if (new->subsys_bits != root->subsys_bits) | 1183 | if (opts->name && strcmp(opts->name, root->name)) |
998 | return 0; | 1184 | return 0; |
999 | 1185 | ||
1000 | /* Next check flags */ | 1186 | /* |
1001 | if (new->flags != root->flags) | 1187 | * If we asked for subsystems (or explicitly for no |
1188 | * subsystems) then they must match | ||
1189 | */ | ||
1190 | if ((opts->subsys_bits || opts->none) | ||
1191 | && (opts->subsys_bits != root->subsys_bits)) | ||
1002 | return 0; | 1192 | return 0; |
1003 | 1193 | ||
1004 | return 1; | 1194 | return 1; |
1005 | } | 1195 | } |
1006 | 1196 | ||
1197 | static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | ||
1198 | { | ||
1199 | struct cgroupfs_root *root; | ||
1200 | |||
1201 | if (!opts->subsys_bits && !opts->none) | ||
1202 | return NULL; | ||
1203 | |||
1204 | root = kzalloc(sizeof(*root), GFP_KERNEL); | ||
1205 | if (!root) | ||
1206 | return ERR_PTR(-ENOMEM); | ||
1207 | |||
1208 | if (!init_root_id(root)) { | ||
1209 | kfree(root); | ||
1210 | return ERR_PTR(-ENOMEM); | ||
1211 | } | ||
1212 | init_cgroup_root(root); | ||
1213 | |||
1214 | root->subsys_bits = opts->subsys_bits; | ||
1215 | root->flags = opts->flags; | ||
1216 | if (opts->release_agent) | ||
1217 | strcpy(root->release_agent_path, opts->release_agent); | ||
1218 | if (opts->name) | ||
1219 | strcpy(root->name, opts->name); | ||
1220 | return root; | ||
1221 | } | ||
1222 | |||
1223 | static void cgroup_drop_root(struct cgroupfs_root *root) | ||
1224 | { | ||
1225 | if (!root) | ||
1226 | return; | ||
1227 | |||
1228 | BUG_ON(!root->hierarchy_id); | ||
1229 | spin_lock(&hierarchy_id_lock); | ||
1230 | ida_remove(&hierarchy_ida, root->hierarchy_id); | ||
1231 | spin_unlock(&hierarchy_id_lock); | ||
1232 | kfree(root); | ||
1233 | } | ||
1234 | |||
1007 | static int cgroup_set_super(struct super_block *sb, void *data) | 1235 | static int cgroup_set_super(struct super_block *sb, void *data) |
1008 | { | 1236 | { |
1009 | int ret; | 1237 | int ret; |
1010 | struct cgroupfs_root *root = data; | 1238 | struct cgroup_sb_opts *opts = data; |
1239 | |||
1240 | /* If we don't have a new root, we can't set up a new sb */ | ||
1241 | if (!opts->new_root) | ||
1242 | return -EINVAL; | ||
1243 | |||
1244 | BUG_ON(!opts->subsys_bits && !opts->none); | ||
1011 | 1245 | ||
1012 | ret = set_anon_super(sb, NULL); | 1246 | ret = set_anon_super(sb, NULL); |
1013 | if (ret) | 1247 | if (ret) |
1014 | return ret; | 1248 | return ret; |
1015 | 1249 | ||
1016 | sb->s_fs_info = root; | 1250 | sb->s_fs_info = opts->new_root; |
1017 | root->sb = sb; | 1251 | opts->new_root->sb = sb; |
1018 | 1252 | ||
1019 | sb->s_blocksize = PAGE_CACHE_SIZE; | 1253 | sb->s_blocksize = PAGE_CACHE_SIZE; |
1020 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 1254 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
@@ -1051,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1051 | void *data, struct vfsmount *mnt) | 1285 | void *data, struct vfsmount *mnt) |
1052 | { | 1286 | { |
1053 | struct cgroup_sb_opts opts; | 1287 | struct cgroup_sb_opts opts; |
1288 | struct cgroupfs_root *root; | ||
1054 | int ret = 0; | 1289 | int ret = 0; |
1055 | struct super_block *sb; | 1290 | struct super_block *sb; |
1056 | struct cgroupfs_root *root; | 1291 | struct cgroupfs_root *new_root; |
1057 | struct list_head tmp_cg_links; | ||
1058 | 1292 | ||
1059 | /* First find the desired set of subsystems */ | 1293 | /* First find the desired set of subsystems */ |
1060 | ret = parse_cgroupfs_options(data, &opts); | 1294 | ret = parse_cgroupfs_options(data, &opts); |
1061 | if (ret) { | 1295 | if (ret) |
1062 | kfree(opts.release_agent); | 1296 | goto out_err; |
1063 | return ret; | ||
1064 | } | ||
1065 | |||
1066 | root = kzalloc(sizeof(*root), GFP_KERNEL); | ||
1067 | if (!root) { | ||
1068 | kfree(opts.release_agent); | ||
1069 | return -ENOMEM; | ||
1070 | } | ||
1071 | 1297 | ||
1072 | init_cgroup_root(root); | 1298 | /* |
1073 | root->subsys_bits = opts.subsys_bits; | 1299 | * Allocate a new cgroup root. We may not need it if we're |
1074 | root->flags = opts.flags; | 1300 | * reusing an existing hierarchy. |
1075 | if (opts.release_agent) { | 1301 | */ |
1076 | strcpy(root->release_agent_path, opts.release_agent); | 1302 | new_root = cgroup_root_from_opts(&opts); |
1077 | kfree(opts.release_agent); | 1303 | if (IS_ERR(new_root)) { |
1304 | ret = PTR_ERR(new_root); | ||
1305 | goto out_err; | ||
1078 | } | 1306 | } |
1307 | opts.new_root = new_root; | ||
1079 | 1308 | ||
1080 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root); | 1309 | /* Locate an existing or new sb for this hierarchy */ |
1081 | 1310 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); | |
1082 | if (IS_ERR(sb)) { | 1311 | if (IS_ERR(sb)) { |
1083 | kfree(root); | 1312 | ret = PTR_ERR(sb); |
1084 | return PTR_ERR(sb); | 1313 | cgroup_drop_root(opts.new_root); |
1314 | goto out_err; | ||
1085 | } | 1315 | } |
1086 | 1316 | ||
1087 | if (sb->s_fs_info != root) { | 1317 | root = sb->s_fs_info; |
1088 | /* Reusing an existing superblock */ | 1318 | BUG_ON(!root); |
1089 | BUG_ON(sb->s_root == NULL); | 1319 | if (root == opts.new_root) { |
1090 | kfree(root); | 1320 | /* We used the new root structure, so this is a new hierarchy */ |
1091 | root = NULL; | 1321 | struct list_head tmp_cg_links; |
1092 | } else { | ||
1093 | /* New superblock */ | ||
1094 | struct cgroup *root_cgrp = &root->top_cgroup; | 1322 | struct cgroup *root_cgrp = &root->top_cgroup; |
1095 | struct inode *inode; | 1323 | struct inode *inode; |
1324 | struct cgroupfs_root *existing_root; | ||
1096 | int i; | 1325 | int i; |
1097 | 1326 | ||
1098 | BUG_ON(sb->s_root != NULL); | 1327 | BUG_ON(sb->s_root != NULL); |
@@ -1105,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1105 | mutex_lock(&inode->i_mutex); | 1334 | mutex_lock(&inode->i_mutex); |
1106 | mutex_lock(&cgroup_mutex); | 1335 | mutex_lock(&cgroup_mutex); |
1107 | 1336 | ||
1337 | if (strlen(root->name)) { | ||
1338 | /* Check for name clashes with existing mounts */ | ||
1339 | for_each_active_root(existing_root) { | ||
1340 | if (!strcmp(existing_root->name, root->name)) { | ||
1341 | ret = -EBUSY; | ||
1342 | mutex_unlock(&cgroup_mutex); | ||
1343 | mutex_unlock(&inode->i_mutex); | ||
1344 | goto drop_new_super; | ||
1345 | } | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1108 | /* | 1349 | /* |
1109 | * We're accessing css_set_count without locking | 1350 | * We're accessing css_set_count without locking |
1110 | * css_set_lock here, but that's OK - it can only be | 1351 | * css_set_lock here, but that's OK - it can only be |
@@ -1123,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1123 | if (ret == -EBUSY) { | 1364 | if (ret == -EBUSY) { |
1124 | mutex_unlock(&cgroup_mutex); | 1365 | mutex_unlock(&cgroup_mutex); |
1125 | mutex_unlock(&inode->i_mutex); | 1366 | mutex_unlock(&inode->i_mutex); |
1126 | goto free_cg_links; | 1367 | free_cg_links(&tmp_cg_links); |
1368 | goto drop_new_super; | ||
1127 | } | 1369 | } |
1128 | 1370 | ||
1129 | /* EBUSY should be the only error here */ | 1371 | /* EBUSY should be the only error here */ |
@@ -1155,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1155 | BUG_ON(root->number_of_cgroups != 1); | 1397 | BUG_ON(root->number_of_cgroups != 1); |
1156 | 1398 | ||
1157 | cgroup_populate_dir(root_cgrp); | 1399 | cgroup_populate_dir(root_cgrp); |
1158 | mutex_unlock(&inode->i_mutex); | ||
1159 | mutex_unlock(&cgroup_mutex); | 1400 | mutex_unlock(&cgroup_mutex); |
1401 | mutex_unlock(&inode->i_mutex); | ||
1402 | } else { | ||
1403 | /* | ||
1404 | * We re-used an existing hierarchy - the new root (if | ||
1405 | * any) is not needed | ||
1406 | */ | ||
1407 | cgroup_drop_root(opts.new_root); | ||
1160 | } | 1408 | } |
1161 | 1409 | ||
1162 | simple_set_mnt(mnt, sb); | 1410 | simple_set_mnt(mnt, sb); |
1411 | kfree(opts.release_agent); | ||
1412 | kfree(opts.name); | ||
1163 | return 0; | 1413 | return 0; |
1164 | 1414 | ||
1165 | free_cg_links: | ||
1166 | free_cg_links(&tmp_cg_links); | ||
1167 | drop_new_super: | 1415 | drop_new_super: |
1168 | deactivate_locked_super(sb); | 1416 | deactivate_locked_super(sb); |
1417 | out_err: | ||
1418 | kfree(opts.release_agent); | ||
1419 | kfree(opts.name); | ||
1420 | |||
1169 | return ret; | 1421 | return ret; |
1170 | } | 1422 | } |
1171 | 1423 | ||
@@ -1211,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1211 | mutex_unlock(&cgroup_mutex); | 1463 | mutex_unlock(&cgroup_mutex); |
1212 | 1464 | ||
1213 | kill_litter_super(sb); | 1465 | kill_litter_super(sb); |
1214 | kfree(root); | 1466 | cgroup_drop_root(root); |
1215 | } | 1467 | } |
1216 | 1468 | ||
1217 | static struct file_system_type cgroup_fs_type = { | 1469 | static struct file_system_type cgroup_fs_type = { |
@@ -1276,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1276 | return 0; | 1528 | return 0; |
1277 | } | 1529 | } |
1278 | 1530 | ||
1279 | /* | ||
1280 | * Return the first subsystem attached to a cgroup's hierarchy, and | ||
1281 | * its subsystem id. | ||
1282 | */ | ||
1283 | |||
1284 | static void get_first_subsys(const struct cgroup *cgrp, | ||
1285 | struct cgroup_subsys_state **css, int *subsys_id) | ||
1286 | { | ||
1287 | const struct cgroupfs_root *root = cgrp->root; | ||
1288 | const struct cgroup_subsys *test_ss; | ||
1289 | BUG_ON(list_empty(&root->subsys_list)); | ||
1290 | test_ss = list_entry(root->subsys_list.next, | ||
1291 | struct cgroup_subsys, sibling); | ||
1292 | if (css) { | ||
1293 | *css = cgrp->subsys[test_ss->subsys_id]; | ||
1294 | BUG_ON(!*css); | ||
1295 | } | ||
1296 | if (subsys_id) | ||
1297 | *subsys_id = test_ss->subsys_id; | ||
1298 | } | ||
1299 | |||
1300 | /** | 1531 | /** |
1301 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' | 1532 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' |
1302 | * @cgrp: the cgroup the task is attaching to | 1533 | * @cgrp: the cgroup the task is attaching to |
@@ -1313,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1313 | struct css_set *cg; | 1544 | struct css_set *cg; |
1314 | struct css_set *newcg; | 1545 | struct css_set *newcg; |
1315 | struct cgroupfs_root *root = cgrp->root; | 1546 | struct cgroupfs_root *root = cgrp->root; |
1316 | int subsys_id; | ||
1317 | |||
1318 | get_first_subsys(cgrp, NULL, &subsys_id); | ||
1319 | 1547 | ||
1320 | /* Nothing to do if the task is already in that cgroup */ | 1548 | /* Nothing to do if the task is already in that cgroup */ |
1321 | oldcgrp = task_cgroup(tsk, subsys_id); | 1549 | oldcgrp = task_cgroup_from_root(tsk, root); |
1322 | if (cgrp == oldcgrp) | 1550 | if (cgrp == oldcgrp) |
1323 | return 0; | 1551 | return 0; |
1324 | 1552 | ||
1325 | for_each_subsys(root, ss) { | 1553 | for_each_subsys(root, ss) { |
1326 | if (ss->can_attach) { | 1554 | if (ss->can_attach) { |
1327 | retval = ss->can_attach(ss, cgrp, tsk); | 1555 | retval = ss->can_attach(ss, cgrp, tsk, false); |
1328 | if (retval) | 1556 | if (retval) |
1329 | return retval; | 1557 | return retval; |
1330 | } | 1558 | } |
@@ -1362,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1362 | 1590 | ||
1363 | for_each_subsys(root, ss) { | 1591 | for_each_subsys(root, ss) { |
1364 | if (ss->attach) | 1592 | if (ss->attach) |
1365 | ss->attach(ss, cgrp, oldcgrp, tsk); | 1593 | ss->attach(ss, cgrp, oldcgrp, tsk, false); |
1366 | } | 1594 | } |
1367 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1595 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
1368 | synchronize_rcu(); | 1596 | synchronize_rcu(); |
@@ -1423,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | |||
1423 | return ret; | 1651 | return ret; |
1424 | } | 1652 | } |
1425 | 1653 | ||
1426 | /* The various types of files and directories in a cgroup file system */ | ||
1427 | enum cgroup_filetype { | ||
1428 | FILE_ROOT, | ||
1429 | FILE_DIR, | ||
1430 | FILE_TASKLIST, | ||
1431 | FILE_NOTIFY_ON_RELEASE, | ||
1432 | FILE_RELEASE_AGENT, | ||
1433 | }; | ||
1434 | |||
1435 | /** | 1654 | /** |
1436 | * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. | 1655 | * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. |
1437 | * @cgrp: the cgroup to be checked for liveness | 1656 | * @cgrp: the cgroup to be checked for liveness |
@@ -1876,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp) | |||
1876 | * the start of a css_set | 2095 | * the start of a css_set |
1877 | */ | 2096 | */ |
1878 | static void cgroup_advance_iter(struct cgroup *cgrp, | 2097 | static void cgroup_advance_iter(struct cgroup *cgrp, |
1879 | struct cgroup_iter *it) | 2098 | struct cgroup_iter *it) |
1880 | { | 2099 | { |
1881 | struct list_head *l = it->cg_link; | 2100 | struct list_head *l = it->cg_link; |
1882 | struct cg_cgroup_link *link; | 2101 | struct cg_cgroup_link *link; |
@@ -2129,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
2129 | } | 2348 | } |
2130 | 2349 | ||
2131 | /* | 2350 | /* |
2132 | * Stuff for reading the 'tasks' file. | 2351 | * Stuff for reading the 'tasks'/'procs' files. |
2133 | * | 2352 | * |
2134 | * Reading this file can return large amounts of data if a cgroup has | 2353 | * Reading this file can return large amounts of data if a cgroup has |
2135 | * *lots* of attached tasks. So it may need several calls to read(), | 2354 | * *lots* of attached tasks. So it may need several calls to read(), |
@@ -2139,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
2139 | */ | 2358 | */ |
2140 | 2359 | ||
2141 | /* | 2360 | /* |
2142 | * Load into 'pidarray' up to 'npids' of the tasks using cgroup | 2361 | * The following two functions "fix" the issue where there are more pids |
2143 | * 'cgrp'. Return actual number of pids loaded. No need to | 2362 | * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. |
2144 | * task_lock(p) when reading out p->cgroup, since we're in an RCU | 2363 | * TODO: replace with a kernel-wide solution to this problem |
2145 | * read section, so the css_set can't go away, and is | 2364 | */ |
2146 | * immutable after creation. | 2365 | #define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2)) |
2366 | static void *pidlist_allocate(int count) | ||
2367 | { | ||
2368 | if (PIDLIST_TOO_LARGE(count)) | ||
2369 | return vmalloc(count * sizeof(pid_t)); | ||
2370 | else | ||
2371 | return kmalloc(count * sizeof(pid_t), GFP_KERNEL); | ||
2372 | } | ||
2373 | static void pidlist_free(void *p) | ||
2374 | { | ||
2375 | if (is_vmalloc_addr(p)) | ||
2376 | vfree(p); | ||
2377 | else | ||
2378 | kfree(p); | ||
2379 | } | ||
2380 | static void *pidlist_resize(void *p, int newcount) | ||
2381 | { | ||
2382 | void *newlist; | ||
2383 | /* note: if new alloc fails, old p will still be valid either way */ | ||
2384 | if (is_vmalloc_addr(p)) { | ||
2385 | newlist = vmalloc(newcount * sizeof(pid_t)); | ||
2386 | if (!newlist) | ||
2387 | return NULL; | ||
2388 | memcpy(newlist, p, newcount * sizeof(pid_t)); | ||
2389 | vfree(p); | ||
2390 | } else { | ||
2391 | newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL); | ||
2392 | } | ||
2393 | return newlist; | ||
2394 | } | ||
2395 | |||
2396 | /* | ||
2397 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries | ||
2398 | * If the new stripped list is sufficiently smaller and there's enough memory | ||
2399 | * to allocate a new buffer, will let go of the unneeded memory. Returns the | ||
2400 | * number of unique elements. | ||
2401 | */ | ||
2402 | /* is the size difference enough that we should re-allocate the array? */ | ||
2403 | #define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new)) | ||
2404 | static int pidlist_uniq(pid_t **p, int length) | ||
2405 | { | ||
2406 | int src, dest = 1; | ||
2407 | pid_t *list = *p; | ||
2408 | pid_t *newlist; | ||
2409 | |||
2410 | /* | ||
2411 | * we presume the 0th element is unique, so i starts at 1. trivial | ||
2412 | * edge cases first; no work needs to be done for either | ||
2413 | */ | ||
2414 | if (length == 0 || length == 1) | ||
2415 | return length; | ||
2416 | /* src and dest walk down the list; dest counts unique elements */ | ||
2417 | for (src = 1; src < length; src++) { | ||
2418 | /* find next unique element */ | ||
2419 | while (list[src] == list[src-1]) { | ||
2420 | src++; | ||
2421 | if (src == length) | ||
2422 | goto after; | ||
2423 | } | ||
2424 | /* dest always points to where the next unique element goes */ | ||
2425 | list[dest] = list[src]; | ||
2426 | dest++; | ||
2427 | } | ||
2428 | after: | ||
2429 | /* | ||
2430 | * if the length difference is large enough, we want to allocate a | ||
2431 | * smaller buffer to save memory. if this fails due to out of memory, | ||
2432 | * we'll just stay with what we've got. | ||
2433 | */ | ||
2434 | if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { | ||
2435 | newlist = pidlist_resize(list, dest); | ||
2436 | if (newlist) | ||
2437 | *p = newlist; | ||
2438 | } | ||
2439 | return dest; | ||
2440 | } | ||
2441 | |||
2442 | static int cmppid(const void *a, const void *b) | ||
2443 | { | ||
2444 | return *(pid_t *)a - *(pid_t *)b; | ||
2445 | } | ||
2446 | |||
2447 | /* | ||
2448 | * find the appropriate pidlist for our purpose (given procs vs tasks) | ||
2449 | * returns with the lock on that pidlist already held, and takes care | ||
2450 | * of the use count, or returns NULL with no locks held if we're out of | ||
2451 | * memory. | ||
2147 | */ | 2452 | */ |
2148 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | 2453 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, |
2454 | enum cgroup_filetype type) | ||
2149 | { | 2455 | { |
2150 | int n = 0, pid; | 2456 | struct cgroup_pidlist *l; |
2457 | /* don't need task_nsproxy() if we're looking at ourself */ | ||
2458 | struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); | ||
2459 | /* | ||
2460 | * We can't drop the pidlist_mutex before taking the l->mutex in case | ||
2461 | * the last ref-holder is trying to remove l from the list at the same | ||
2462 | * time. Holding the pidlist_mutex precludes somebody taking whichever | ||
2463 | * list we find out from under us - compare release_pid_array(). | ||
2464 | */ | ||
2465 | mutex_lock(&cgrp->pidlist_mutex); | ||
2466 | list_for_each_entry(l, &cgrp->pidlists, links) { | ||
2467 | if (l->key.type == type && l->key.ns == ns) { | ||
2468 | /* found a matching list - drop the extra refcount */ | ||
2469 | put_pid_ns(ns); | ||
2470 | /* make sure l doesn't vanish out from under us */ | ||
2471 | down_write(&l->mutex); | ||
2472 | mutex_unlock(&cgrp->pidlist_mutex); | ||
2473 | l->use_count++; | ||
2474 | return l; | ||
2475 | } | ||
2476 | } | ||
2477 | /* entry not found; create a new one */ | ||
2478 | l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); | ||
2479 | if (!l) { | ||
2480 | mutex_unlock(&cgrp->pidlist_mutex); | ||
2481 | put_pid_ns(ns); | ||
2482 | return l; | ||
2483 | } | ||
2484 | init_rwsem(&l->mutex); | ||
2485 | down_write(&l->mutex); | ||
2486 | l->key.type = type; | ||
2487 | l->key.ns = ns; | ||
2488 | l->use_count = 0; /* don't increment here */ | ||
2489 | l->list = NULL; | ||
2490 | l->owner = cgrp; | ||
2491 | list_add(&l->links, &cgrp->pidlists); | ||
2492 | mutex_unlock(&cgrp->pidlist_mutex); | ||
2493 | return l; | ||
2494 | } | ||
2495 | |||
2496 | /* | ||
2497 | * Load a cgroup's pidarray with either procs' tgids or tasks' pids | ||
2498 | */ | ||
2499 | static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | ||
2500 | struct cgroup_pidlist **lp) | ||
2501 | { | ||
2502 | pid_t *array; | ||
2503 | int length; | ||
2504 | int pid, n = 0; /* used for populating the array */ | ||
2151 | struct cgroup_iter it; | 2505 | struct cgroup_iter it; |
2152 | struct task_struct *tsk; | 2506 | struct task_struct *tsk; |
2507 | struct cgroup_pidlist *l; | ||
2508 | |||
2509 | /* | ||
2510 | * If cgroup gets more users after we read count, we won't have | ||
2511 | * enough space - tough. This race is indistinguishable to the | ||
2512 | * caller from the case that the additional cgroup users didn't | ||
2513 | * show up until sometime later on. | ||
2514 | */ | ||
2515 | length = cgroup_task_count(cgrp); | ||
2516 | array = pidlist_allocate(length); | ||
2517 | if (!array) | ||
2518 | return -ENOMEM; | ||
2519 | /* now, populate the array */ | ||
2153 | cgroup_iter_start(cgrp, &it); | 2520 | cgroup_iter_start(cgrp, &it); |
2154 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2521 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
2155 | if (unlikely(n == npids)) | 2522 | if (unlikely(n == length)) |
2156 | break; | 2523 | break; |
2157 | pid = task_pid_vnr(tsk); | 2524 | /* get tgid or pid for procs or tasks file respectively */ |
2158 | if (pid > 0) | 2525 | if (type == CGROUP_FILE_PROCS) |
2159 | pidarray[n++] = pid; | 2526 | pid = task_tgid_vnr(tsk); |
2527 | else | ||
2528 | pid = task_pid_vnr(tsk); | ||
2529 | if (pid > 0) /* make sure to only use valid results */ | ||
2530 | array[n++] = pid; | ||
2160 | } | 2531 | } |
2161 | cgroup_iter_end(cgrp, &it); | 2532 | cgroup_iter_end(cgrp, &it); |
2162 | return n; | 2533 | length = n; |
2534 | /* now sort & (if procs) strip out duplicates */ | ||
2535 | sort(array, length, sizeof(pid_t), cmppid, NULL); | ||
2536 | if (type == CGROUP_FILE_PROCS) | ||
2537 | length = pidlist_uniq(&array, length); | ||
2538 | l = cgroup_pidlist_find(cgrp, type); | ||
2539 | if (!l) { | ||
2540 | pidlist_free(array); | ||
2541 | return -ENOMEM; | ||
2542 | } | ||
2543 | /* store array, freeing old if necessary - lock already held */ | ||
2544 | pidlist_free(l->list); | ||
2545 | l->list = array; | ||
2546 | l->length = length; | ||
2547 | l->use_count++; | ||
2548 | up_write(&l->mutex); | ||
2549 | *lp = l; | ||
2550 | return 0; | ||
2163 | } | 2551 | } |
2164 | 2552 | ||
2165 | /** | 2553 | /** |
@@ -2216,37 +2604,14 @@ err: | |||
2216 | return ret; | 2604 | return ret; |
2217 | } | 2605 | } |
2218 | 2606 | ||
2219 | /* | ||
2220 | * Cache pids for all threads in the same pid namespace that are | ||
2221 | * opening the same "tasks" file. | ||
2222 | */ | ||
2223 | struct cgroup_pids { | ||
2224 | /* The node in cgrp->pids_list */ | ||
2225 | struct list_head list; | ||
2226 | /* The cgroup those pids belong to */ | ||
2227 | struct cgroup *cgrp; | ||
2228 | /* The namepsace those pids belong to */ | ||
2229 | struct pid_namespace *ns; | ||
2230 | /* Array of process ids in the cgroup */ | ||
2231 | pid_t *tasks_pids; | ||
2232 | /* How many files are using the this tasks_pids array */ | ||
2233 | int use_count; | ||
2234 | /* Length of the current tasks_pids array */ | ||
2235 | int length; | ||
2236 | }; | ||
2237 | |||
2238 | static int cmppid(const void *a, const void *b) | ||
2239 | { | ||
2240 | return *(pid_t *)a - *(pid_t *)b; | ||
2241 | } | ||
2242 | 2607 | ||
2243 | /* | 2608 | /* |
2244 | * seq_file methods for the "tasks" file. The seq_file position is the | 2609 | * seq_file methods for the tasks/procs files. The seq_file position is the |
2245 | * next pid to display; the seq_file iterator is a pointer to the pid | 2610 | * next pid to display; the seq_file iterator is a pointer to the pid |
2246 | * in the cgroup->tasks_pids array. | 2611 | * in the cgroup->l->list array. |
2247 | */ | 2612 | */ |
2248 | 2613 | ||
2249 | static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | 2614 | static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) |
2250 | { | 2615 | { |
2251 | /* | 2616 | /* |
2252 | * Initially we receive a position value that corresponds to | 2617 | * Initially we receive a position value that corresponds to |
@@ -2254,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | |||
2254 | * after a seek to the start). Use a binary-search to find the | 2619 | * after a seek to the start). Use a binary-search to find the |
2255 | * next pid to display, if any | 2620 | * next pid to display, if any |
2256 | */ | 2621 | */ |
2257 | struct cgroup_pids *cp = s->private; | 2622 | struct cgroup_pidlist *l = s->private; |
2258 | struct cgroup *cgrp = cp->cgrp; | ||
2259 | int index = 0, pid = *pos; | 2623 | int index = 0, pid = *pos; |
2260 | int *iter; | 2624 | int *iter; |
2261 | 2625 | ||
2262 | down_read(&cgrp->pids_mutex); | 2626 | down_read(&l->mutex); |
2263 | if (pid) { | 2627 | if (pid) { |
2264 | int end = cp->length; | 2628 | int end = l->length; |
2265 | 2629 | ||
2266 | while (index < end) { | 2630 | while (index < end) { |
2267 | int mid = (index + end) / 2; | 2631 | int mid = (index + end) / 2; |
2268 | if (cp->tasks_pids[mid] == pid) { | 2632 | if (l->list[mid] == pid) { |
2269 | index = mid; | 2633 | index = mid; |
2270 | break; | 2634 | break; |
2271 | } else if (cp->tasks_pids[mid] <= pid) | 2635 | } else if (l->list[mid] <= pid) |
2272 | index = mid + 1; | 2636 | index = mid + 1; |
2273 | else | 2637 | else |
2274 | end = mid; | 2638 | end = mid; |
2275 | } | 2639 | } |
2276 | } | 2640 | } |
2277 | /* If we're off the end of the array, we're done */ | 2641 | /* If we're off the end of the array, we're done */ |
2278 | if (index >= cp->length) | 2642 | if (index >= l->length) |
2279 | return NULL; | 2643 | return NULL; |
2280 | /* Update the abstract position to be the actual pid that we found */ | 2644 | /* Update the abstract position to be the actual pid that we found */ |
2281 | iter = cp->tasks_pids + index; | 2645 | iter = l->list + index; |
2282 | *pos = *iter; | 2646 | *pos = *iter; |
2283 | return iter; | 2647 | return iter; |
2284 | } | 2648 | } |
2285 | 2649 | ||
2286 | static void cgroup_tasks_stop(struct seq_file *s, void *v) | 2650 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) |
2287 | { | 2651 | { |
2288 | struct cgroup_pids *cp = s->private; | 2652 | struct cgroup_pidlist *l = s->private; |
2289 | struct cgroup *cgrp = cp->cgrp; | 2653 | up_read(&l->mutex); |
2290 | up_read(&cgrp->pids_mutex); | ||
2291 | } | 2654 | } |
2292 | 2655 | ||
2293 | static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) | 2656 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) |
2294 | { | 2657 | { |
2295 | struct cgroup_pids *cp = s->private; | 2658 | struct cgroup_pidlist *l = s->private; |
2296 | int *p = v; | 2659 | pid_t *p = v; |
2297 | int *end = cp->tasks_pids + cp->length; | 2660 | pid_t *end = l->list + l->length; |
2298 | |||
2299 | /* | 2661 | /* |
2300 | * Advance to the next pid in the array. If this goes off the | 2662 | * Advance to the next pid in the array. If this goes off the |
2301 | * end, we're done | 2663 | * end, we're done |
@@ -2309,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) | |||
2309 | } | 2671 | } |
2310 | } | 2672 | } |
2311 | 2673 | ||
2312 | static int cgroup_tasks_show(struct seq_file *s, void *v) | 2674 | static int cgroup_pidlist_show(struct seq_file *s, void *v) |
2313 | { | 2675 | { |
2314 | return seq_printf(s, "%d\n", *(int *)v); | 2676 | return seq_printf(s, "%d\n", *(int *)v); |
2315 | } | 2677 | } |
2316 | 2678 | ||
2317 | static const struct seq_operations cgroup_tasks_seq_operations = { | 2679 | /* |
2318 | .start = cgroup_tasks_start, | 2680 | * seq_operations functions for iterating on pidlists through seq_file - |
2319 | .stop = cgroup_tasks_stop, | 2681 | * independent of whether it's tasks or procs |
2320 | .next = cgroup_tasks_next, | 2682 | */ |
2321 | .show = cgroup_tasks_show, | 2683 | static const struct seq_operations cgroup_pidlist_seq_operations = { |
2684 | .start = cgroup_pidlist_start, | ||
2685 | .stop = cgroup_pidlist_stop, | ||
2686 | .next = cgroup_pidlist_next, | ||
2687 | .show = cgroup_pidlist_show, | ||
2322 | }; | 2688 | }; |
2323 | 2689 | ||
2324 | static void release_cgroup_pid_array(struct cgroup_pids *cp) | 2690 | static void cgroup_release_pid_array(struct cgroup_pidlist *l) |
2325 | { | 2691 | { |
2326 | struct cgroup *cgrp = cp->cgrp; | 2692 | /* |
2327 | 2693 | * the case where we're the last user of this particular pidlist will | |
2328 | down_write(&cgrp->pids_mutex); | 2694 | * have us remove it from the cgroup's list, which entails taking the |
2329 | BUG_ON(!cp->use_count); | 2695 | * mutex. since in pidlist_find the pidlist->lock depends on cgroup-> |
2330 | if (!--cp->use_count) { | 2696 | * pidlist_mutex, we have to take pidlist_mutex first. |
2331 | list_del(&cp->list); | 2697 | */ |
2332 | put_pid_ns(cp->ns); | 2698 | mutex_lock(&l->owner->pidlist_mutex); |
2333 | kfree(cp->tasks_pids); | 2699 | down_write(&l->mutex); |
2334 | kfree(cp); | 2700 | BUG_ON(!l->use_count); |
2701 | if (!--l->use_count) { | ||
2702 | /* we're the last user if refcount is 0; remove and free */ | ||
2703 | list_del(&l->links); | ||
2704 | mutex_unlock(&l->owner->pidlist_mutex); | ||
2705 | pidlist_free(l->list); | ||
2706 | put_pid_ns(l->key.ns); | ||
2707 | up_write(&l->mutex); | ||
2708 | kfree(l); | ||
2709 | return; | ||
2335 | } | 2710 | } |
2336 | up_write(&cgrp->pids_mutex); | 2711 | mutex_unlock(&l->owner->pidlist_mutex); |
2712 | up_write(&l->mutex); | ||
2337 | } | 2713 | } |
2338 | 2714 | ||
2339 | static int cgroup_tasks_release(struct inode *inode, struct file *file) | 2715 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) |
2340 | { | 2716 | { |
2341 | struct seq_file *seq; | 2717 | struct cgroup_pidlist *l; |
2342 | struct cgroup_pids *cp; | ||
2343 | |||
2344 | if (!(file->f_mode & FMODE_READ)) | 2718 | if (!(file->f_mode & FMODE_READ)) |
2345 | return 0; | 2719 | return 0; |
2346 | 2720 | /* | |
2347 | seq = file->private_data; | 2721 | * the seq_file will only be initialized if the file was opened for |
2348 | cp = seq->private; | 2722 | * reading; hence we check if it's not null only in that case. |
2349 | 2723 | */ | |
2350 | release_cgroup_pid_array(cp); | 2724 | l = ((struct seq_file *)file->private_data)->private; |
2725 | cgroup_release_pid_array(l); | ||
2351 | return seq_release(inode, file); | 2726 | return seq_release(inode, file); |
2352 | } | 2727 | } |
2353 | 2728 | ||
2354 | static struct file_operations cgroup_tasks_operations = { | 2729 | static const struct file_operations cgroup_pidlist_operations = { |
2355 | .read = seq_read, | 2730 | .read = seq_read, |
2356 | .llseek = seq_lseek, | 2731 | .llseek = seq_lseek, |
2357 | .write = cgroup_file_write, | 2732 | .write = cgroup_file_write, |
2358 | .release = cgroup_tasks_release, | 2733 | .release = cgroup_pidlist_release, |
2359 | }; | 2734 | }; |
2360 | 2735 | ||
2361 | /* | 2736 | /* |
2362 | * Handle an open on 'tasks' file. Prepare an array containing the | 2737 | * The following functions handle opens on a file that displays a pidlist |
2363 | * process id's of tasks currently attached to the cgroup being opened. | 2738 | * (tasks or procs). Prepare an array of the process/thread IDs of whoever's |
2739 | * in the cgroup. | ||
2364 | */ | 2740 | */ |
2365 | 2741 | /* helper function for the two below it */ | |
2366 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | 2742 | static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type) |
2367 | { | 2743 | { |
2368 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2744 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
2369 | struct pid_namespace *ns = current->nsproxy->pid_ns; | 2745 | struct cgroup_pidlist *l; |
2370 | struct cgroup_pids *cp; | ||
2371 | pid_t *pidarray; | ||
2372 | int npids; | ||
2373 | int retval; | 2746 | int retval; |
2374 | 2747 | ||
2375 | /* Nothing to do for write-only files */ | 2748 | /* Nothing to do for write-only files */ |
2376 | if (!(file->f_mode & FMODE_READ)) | 2749 | if (!(file->f_mode & FMODE_READ)) |
2377 | return 0; | 2750 | return 0; |
2378 | 2751 | ||
2379 | /* | 2752 | /* have the array populated */ |
2380 | * If cgroup gets more users after we read count, we won't have | 2753 | retval = pidlist_array_load(cgrp, type, &l); |
2381 | * enough space - tough. This race is indistinguishable to the | 2754 | if (retval) |
2382 | * caller from the case that the additional cgroup users didn't | 2755 | return retval; |
2383 | * show up until sometime later on. | 2756 | /* configure file information */ |
2384 | */ | 2757 | file->f_op = &cgroup_pidlist_operations; |
2385 | npids = cgroup_task_count(cgrp); | ||
2386 | pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); | ||
2387 | if (!pidarray) | ||
2388 | return -ENOMEM; | ||
2389 | npids = pid_array_load(pidarray, npids, cgrp); | ||
2390 | sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); | ||
2391 | |||
2392 | /* | ||
2393 | * Store the array in the cgroup, freeing the old | ||
2394 | * array if necessary | ||
2395 | */ | ||
2396 | down_write(&cgrp->pids_mutex); | ||
2397 | |||
2398 | list_for_each_entry(cp, &cgrp->pids_list, list) { | ||
2399 | if (ns == cp->ns) | ||
2400 | goto found; | ||
2401 | } | ||
2402 | |||
2403 | cp = kzalloc(sizeof(*cp), GFP_KERNEL); | ||
2404 | if (!cp) { | ||
2405 | up_write(&cgrp->pids_mutex); | ||
2406 | kfree(pidarray); | ||
2407 | return -ENOMEM; | ||
2408 | } | ||
2409 | cp->cgrp = cgrp; | ||
2410 | cp->ns = ns; | ||
2411 | get_pid_ns(ns); | ||
2412 | list_add(&cp->list, &cgrp->pids_list); | ||
2413 | found: | ||
2414 | kfree(cp->tasks_pids); | ||
2415 | cp->tasks_pids = pidarray; | ||
2416 | cp->length = npids; | ||
2417 | cp->use_count++; | ||
2418 | up_write(&cgrp->pids_mutex); | ||
2419 | |||
2420 | file->f_op = &cgroup_tasks_operations; | ||
2421 | 2758 | ||
2422 | retval = seq_open(file, &cgroup_tasks_seq_operations); | 2759 | retval = seq_open(file, &cgroup_pidlist_seq_operations); |
2423 | if (retval) { | 2760 | if (retval) { |
2424 | release_cgroup_pid_array(cp); | 2761 | cgroup_release_pid_array(l); |
2425 | return retval; | 2762 | return retval; |
2426 | } | 2763 | } |
2427 | ((struct seq_file *)file->private_data)->private = cp; | 2764 | ((struct seq_file *)file->private_data)->private = l; |
2428 | return 0; | 2765 | return 0; |
2429 | } | 2766 | } |
2767 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | ||
2768 | { | ||
2769 | return cgroup_pidlist_open(file, CGROUP_FILE_TASKS); | ||
2770 | } | ||
2771 | static int cgroup_procs_open(struct inode *unused, struct file *file) | ||
2772 | { | ||
2773 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); | ||
2774 | } | ||
2430 | 2775 | ||
2431 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | 2776 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, |
2432 | struct cftype *cft) | 2777 | struct cftype *cft) |
@@ -2449,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp, | |||
2449 | /* | 2794 | /* |
2450 | * for the common functions, 'private' gives the type of file | 2795 | * for the common functions, 'private' gives the type of file |
2451 | */ | 2796 | */ |
2797 | /* for hysterical raisins, we can't put this on the older files */ | ||
2798 | #define CGROUP_FILE_GENERIC_PREFIX "cgroup." | ||
2452 | static struct cftype files[] = { | 2799 | static struct cftype files[] = { |
2453 | { | 2800 | { |
2454 | .name = "tasks", | 2801 | .name = "tasks", |
2455 | .open = cgroup_tasks_open, | 2802 | .open = cgroup_tasks_open, |
2456 | .write_u64 = cgroup_tasks_write, | 2803 | .write_u64 = cgroup_tasks_write, |
2457 | .release = cgroup_tasks_release, | 2804 | .release = cgroup_pidlist_release, |
2458 | .private = FILE_TASKLIST, | ||
2459 | .mode = S_IRUGO | S_IWUSR, | 2805 | .mode = S_IRUGO | S_IWUSR, |
2460 | }, | 2806 | }, |
2461 | 2807 | { | |
2808 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", | ||
2809 | .open = cgroup_procs_open, | ||
2810 | /* .write_u64 = cgroup_procs_write, TODO */ | ||
2811 | .release = cgroup_pidlist_release, | ||
2812 | .mode = S_IRUGO, | ||
2813 | }, | ||
2462 | { | 2814 | { |
2463 | .name = "notify_on_release", | 2815 | .name = "notify_on_release", |
2464 | .read_u64 = cgroup_read_notify_on_release, | 2816 | .read_u64 = cgroup_read_notify_on_release, |
2465 | .write_u64 = cgroup_write_notify_on_release, | 2817 | .write_u64 = cgroup_write_notify_on_release, |
2466 | .private = FILE_NOTIFY_ON_RELEASE, | ||
2467 | }, | 2818 | }, |
2468 | }; | 2819 | }; |
2469 | 2820 | ||
@@ -2472,7 +2823,6 @@ static struct cftype cft_release_agent = { | |||
2472 | .read_seq_string = cgroup_release_agent_show, | 2823 | .read_seq_string = cgroup_release_agent_show, |
2473 | .write_string = cgroup_release_agent_write, | 2824 | .write_string = cgroup_release_agent_write, |
2474 | .max_write_len = PATH_MAX, | 2825 | .max_write_len = PATH_MAX, |
2475 | .private = FILE_RELEASE_AGENT, | ||
2476 | }; | 2826 | }; |
2477 | 2827 | ||
2478 | static int cgroup_populate_dir(struct cgroup *cgrp) | 2828 | static int cgroup_populate_dir(struct cgroup *cgrp) |
@@ -2879,6 +3229,7 @@ int __init cgroup_init_early(void) | |||
2879 | init_task.cgroups = &init_css_set; | 3229 | init_task.cgroups = &init_css_set; |
2880 | 3230 | ||
2881 | init_css_set_link.cg = &init_css_set; | 3231 | init_css_set_link.cg = &init_css_set; |
3232 | init_css_set_link.cgrp = dummytop; | ||
2882 | list_add(&init_css_set_link.cgrp_link_list, | 3233 | list_add(&init_css_set_link.cgrp_link_list, |
2883 | &rootnode.top_cgroup.css_sets); | 3234 | &rootnode.top_cgroup.css_sets); |
2884 | list_add(&init_css_set_link.cg_link_list, | 3235 | list_add(&init_css_set_link.cg_link_list, |
@@ -2933,7 +3284,7 @@ int __init cgroup_init(void) | |||
2933 | /* Add init_css_set to the hash table */ | 3284 | /* Add init_css_set to the hash table */ |
2934 | hhead = css_set_hash(init_css_set.subsys); | 3285 | hhead = css_set_hash(init_css_set.subsys); |
2935 | hlist_add_head(&init_css_set.hlist, hhead); | 3286 | hlist_add_head(&init_css_set.hlist, hhead); |
2936 | 3287 | BUG_ON(!init_root_id(&rootnode)); | |
2937 | err = register_filesystem(&cgroup_fs_type); | 3288 | err = register_filesystem(&cgroup_fs_type); |
2938 | if (err < 0) | 3289 | if (err < 0) |
2939 | goto out; | 3290 | goto out; |
@@ -2986,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v) | |||
2986 | for_each_active_root(root) { | 3337 | for_each_active_root(root) { |
2987 | struct cgroup_subsys *ss; | 3338 | struct cgroup_subsys *ss; |
2988 | struct cgroup *cgrp; | 3339 | struct cgroup *cgrp; |
2989 | int subsys_id; | ||
2990 | int count = 0; | 3340 | int count = 0; |
2991 | 3341 | ||
2992 | seq_printf(m, "%lu:", root->subsys_bits); | 3342 | seq_printf(m, "%d:", root->hierarchy_id); |
2993 | for_each_subsys(root, ss) | 3343 | for_each_subsys(root, ss) |
2994 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 3344 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); |
3345 | if (strlen(root->name)) | ||
3346 | seq_printf(m, "%sname=%s", count ? "," : "", | ||
3347 | root->name); | ||
2995 | seq_putc(m, ':'); | 3348 | seq_putc(m, ':'); |
2996 | get_first_subsys(&root->top_cgroup, NULL, &subsys_id); | 3349 | cgrp = task_cgroup_from_root(tsk, root); |
2997 | cgrp = task_cgroup(tsk, subsys_id); | ||
2998 | retval = cgroup_path(cgrp, buf, PAGE_SIZE); | 3350 | retval = cgroup_path(cgrp, buf, PAGE_SIZE); |
2999 | if (retval < 0) | 3351 | if (retval < 0) |
3000 | goto out_unlock; | 3352 | goto out_unlock; |
@@ -3033,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) | |||
3033 | mutex_lock(&cgroup_mutex); | 3385 | mutex_lock(&cgroup_mutex); |
3034 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3386 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
3035 | struct cgroup_subsys *ss = subsys[i]; | 3387 | struct cgroup_subsys *ss = subsys[i]; |
3036 | seq_printf(m, "%s\t%lu\t%d\t%d\n", | 3388 | seq_printf(m, "%s\t%d\t%d\t%d\n", |
3037 | ss->name, ss->root->subsys_bits, | 3389 | ss->name, ss->root->hierarchy_id, |
3038 | ss->root->number_of_cgroups, !ss->disabled); | 3390 | ss->root->number_of_cgroups, !ss->disabled); |
3039 | } | 3391 | } |
3040 | mutex_unlock(&cgroup_mutex); | 3392 | mutex_unlock(&cgroup_mutex); |
@@ -3320,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) | |||
3320 | { | 3672 | { |
3321 | int ret; | 3673 | int ret; |
3322 | struct cgroup *target; | 3674 | struct cgroup *target; |
3323 | int subsys_id; | ||
3324 | 3675 | ||
3325 | if (cgrp == dummytop) | 3676 | if (cgrp == dummytop) |
3326 | return 1; | 3677 | return 1; |
3327 | 3678 | ||
3328 | get_first_subsys(cgrp, NULL, &subsys_id); | 3679 | target = task_cgroup_from_root(task, cgrp->root); |
3329 | target = task_cgroup(task, subsys_id); | ||
3330 | while (cgrp != target && cgrp!= cgrp->top_cgroup) | 3680 | while (cgrp != target && cgrp!= cgrp->top_cgroup) |
3331 | cgrp = cgrp->parent; | 3681 | cgrp = cgrp->parent; |
3332 | ret = (cgrp == target); | 3682 | ret = (cgrp == target); |
@@ -3693,3 +4043,154 @@ css_get_next(struct cgroup_subsys *ss, int id, | |||
3693 | return ret; | 4043 | return ret; |
3694 | } | 4044 | } |
3695 | 4045 | ||
4046 | #ifdef CONFIG_CGROUP_DEBUG | ||
4047 | static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, | ||
4048 | struct cgroup *cont) | ||
4049 | { | ||
4050 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | ||
4051 | |||
4052 | if (!css) | ||
4053 | return ERR_PTR(-ENOMEM); | ||
4054 | |||
4055 | return css; | ||
4056 | } | ||
4057 | |||
4058 | static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | ||
4059 | { | ||
4060 | kfree(cont->subsys[debug_subsys_id]); | ||
4061 | } | ||
4062 | |||
4063 | static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) | ||
4064 | { | ||
4065 | return atomic_read(&cont->count); | ||
4066 | } | ||
4067 | |||
4068 | static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft) | ||
4069 | { | ||
4070 | return cgroup_task_count(cont); | ||
4071 | } | ||
4072 | |||
4073 | static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) | ||
4074 | { | ||
4075 | return (u64)(unsigned long)current->cgroups; | ||
4076 | } | ||
4077 | |||
4078 | static u64 current_css_set_refcount_read(struct cgroup *cont, | ||
4079 | struct cftype *cft) | ||
4080 | { | ||
4081 | u64 count; | ||
4082 | |||
4083 | rcu_read_lock(); | ||
4084 | count = atomic_read(¤t->cgroups->refcount); | ||
4085 | rcu_read_unlock(); | ||
4086 | return count; | ||
4087 | } | ||
4088 | |||
4089 | static int current_css_set_cg_links_read(struct cgroup *cont, | ||
4090 | struct cftype *cft, | ||
4091 | struct seq_file *seq) | ||
4092 | { | ||
4093 | struct cg_cgroup_link *link; | ||
4094 | struct css_set *cg; | ||
4095 | |||
4096 | read_lock(&css_set_lock); | ||
4097 | rcu_read_lock(); | ||
4098 | cg = rcu_dereference(current->cgroups); | ||
4099 | list_for_each_entry(link, &cg->cg_links, cg_link_list) { | ||
4100 | struct cgroup *c = link->cgrp; | ||
4101 | const char *name; | ||
4102 | |||
4103 | if (c->dentry) | ||
4104 | name = c->dentry->d_name.name; | ||
4105 | else | ||
4106 | name = "?"; | ||
4107 | seq_printf(seq, "Root %d group %s\n", | ||
4108 | c->root->hierarchy_id, name); | ||
4109 | } | ||
4110 | rcu_read_unlock(); | ||
4111 | read_unlock(&css_set_lock); | ||
4112 | return 0; | ||
4113 | } | ||
4114 | |||
4115 | #define MAX_TASKS_SHOWN_PER_CSS 25 | ||
4116 | static int cgroup_css_links_read(struct cgroup *cont, | ||
4117 | struct cftype *cft, | ||
4118 | struct seq_file *seq) | ||
4119 | { | ||
4120 | struct cg_cgroup_link *link; | ||
4121 | |||
4122 | read_lock(&css_set_lock); | ||
4123 | list_for_each_entry(link, &cont->css_sets, cgrp_link_list) { | ||
4124 | struct css_set *cg = link->cg; | ||
4125 | struct task_struct *task; | ||
4126 | int count = 0; | ||
4127 | seq_printf(seq, "css_set %p\n", cg); | ||
4128 | list_for_each_entry(task, &cg->tasks, cg_list) { | ||
4129 | if (count++ > MAX_TASKS_SHOWN_PER_CSS) { | ||
4130 | seq_puts(seq, " ...\n"); | ||
4131 | break; | ||
4132 | } else { | ||
4133 | seq_printf(seq, " task %d\n", | ||
4134 | task_pid_vnr(task)); | ||
4135 | } | ||
4136 | } | ||
4137 | } | ||
4138 | read_unlock(&css_set_lock); | ||
4139 | return 0; | ||
4140 | } | ||
4141 | |||
4142 | static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | ||
4143 | { | ||
4144 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
4145 | } | ||
4146 | |||
4147 | static struct cftype debug_files[] = { | ||
4148 | { | ||
4149 | .name = "cgroup_refcount", | ||
4150 | .read_u64 = cgroup_refcount_read, | ||
4151 | }, | ||
4152 | { | ||
4153 | .name = "taskcount", | ||
4154 | .read_u64 = debug_taskcount_read, | ||
4155 | }, | ||
4156 | |||
4157 | { | ||
4158 | .name = "current_css_set", | ||
4159 | .read_u64 = current_css_set_read, | ||
4160 | }, | ||
4161 | |||
4162 | { | ||
4163 | .name = "current_css_set_refcount", | ||
4164 | .read_u64 = current_css_set_refcount_read, | ||
4165 | }, | ||
4166 | |||
4167 | { | ||
4168 | .name = "current_css_set_cg_links", | ||
4169 | .read_seq_string = current_css_set_cg_links_read, | ||
4170 | }, | ||
4171 | |||
4172 | { | ||
4173 | .name = "cgroup_css_links", | ||
4174 | .read_seq_string = cgroup_css_links_read, | ||
4175 | }, | ||
4176 | |||
4177 | { | ||
4178 | .name = "releasable", | ||
4179 | .read_u64 = releasable_read, | ||
4180 | }, | ||
4181 | }; | ||
4182 | |||
4183 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
4184 | { | ||
4185 | return cgroup_add_files(cont, ss, debug_files, | ||
4186 | ARRAY_SIZE(debug_files)); | ||
4187 | } | ||
4188 | |||
4189 | struct cgroup_subsys debug_subsys = { | ||
4190 | .name = "debug", | ||
4191 | .create = debug_create, | ||
4192 | .destroy = debug_destroy, | ||
4193 | .populate = debug_populate, | ||
4194 | .subsys_id = debug_subsys_id, | ||
4195 | }; | ||
4196 | #endif /* CONFIG_CGROUP_DEBUG */ | ||
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c deleted file mode 100644 index 0c92d797baa6..000000000000 --- a/kernel/cgroup_debug.c +++ /dev/null | |||
@@ -1,105 +0,0 @@ | |||
1 | /* | ||
2 | * kernel/cgroup_debug.c - Example cgroup subsystem that | ||
3 | * exposes debug info | ||
4 | * | ||
5 | * Copyright (C) Google Inc, 2007 | ||
6 | * | ||
7 | * Developed by Paul Menage (menage@google.com) | ||
8 | * | ||
9 | */ | ||
10 | |||
11 | #include <linux/cgroup.h> | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/rcupdate.h> | ||
15 | |||
16 | #include <asm/atomic.h> | ||
17 | |||
18 | static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, | ||
19 | struct cgroup *cont) | ||
20 | { | ||
21 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | ||
22 | |||
23 | if (!css) | ||
24 | return ERR_PTR(-ENOMEM); | ||
25 | |||
26 | return css; | ||
27 | } | ||
28 | |||
29 | static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | ||
30 | { | ||
31 | kfree(cont->subsys[debug_subsys_id]); | ||
32 | } | ||
33 | |||
34 | static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) | ||
35 | { | ||
36 | return atomic_read(&cont->count); | ||
37 | } | ||
38 | |||
39 | static u64 taskcount_read(struct cgroup *cont, struct cftype *cft) | ||
40 | { | ||
41 | u64 count; | ||
42 | |||
43 | count = cgroup_task_count(cont); | ||
44 | return count; | ||
45 | } | ||
46 | |||
47 | static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) | ||
48 | { | ||
49 | return (u64)(long)current->cgroups; | ||
50 | } | ||
51 | |||
52 | static u64 current_css_set_refcount_read(struct cgroup *cont, | ||
53 | struct cftype *cft) | ||
54 | { | ||
55 | u64 count; | ||
56 | |||
57 | rcu_read_lock(); | ||
58 | count = atomic_read(¤t->cgroups->refcount); | ||
59 | rcu_read_unlock(); | ||
60 | return count; | ||
61 | } | ||
62 | |||
63 | static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | ||
64 | { | ||
65 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
66 | } | ||
67 | |||
68 | static struct cftype files[] = { | ||
69 | { | ||
70 | .name = "cgroup_refcount", | ||
71 | .read_u64 = cgroup_refcount_read, | ||
72 | }, | ||
73 | { | ||
74 | .name = "taskcount", | ||
75 | .read_u64 = taskcount_read, | ||
76 | }, | ||
77 | |||
78 | { | ||
79 | .name = "current_css_set", | ||
80 | .read_u64 = current_css_set_read, | ||
81 | }, | ||
82 | |||
83 | { | ||
84 | .name = "current_css_set_refcount", | ||
85 | .read_u64 = current_css_set_refcount_read, | ||
86 | }, | ||
87 | |||
88 | { | ||
89 | .name = "releasable", | ||
90 | .read_u64 = releasable_read, | ||
91 | }, | ||
92 | }; | ||
93 | |||
94 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
95 | { | ||
96 | return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); | ||
97 | } | ||
98 | |||
99 | struct cgroup_subsys debug_subsys = { | ||
100 | .name = "debug", | ||
101 | .create = debug_create, | ||
102 | .destroy = debug_destroy, | ||
103 | .populate = debug_populate, | ||
104 | .subsys_id = debug_subsys_id, | ||
105 | }; | ||
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fb249e2bcada..59e9ef6aab40 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task) | |||
159 | */ | 159 | */ |
160 | static int freezer_can_attach(struct cgroup_subsys *ss, | 160 | static int freezer_can_attach(struct cgroup_subsys *ss, |
161 | struct cgroup *new_cgroup, | 161 | struct cgroup *new_cgroup, |
162 | struct task_struct *task) | 162 | struct task_struct *task, bool threadgroup) |
163 | { | 163 | { |
164 | struct freezer *freezer; | 164 | struct freezer *freezer; |
165 | 165 | ||
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
177 | if (freezer->state == CGROUP_FROZEN) | 177 | if (freezer->state == CGROUP_FROZEN) |
178 | return -EBUSY; | 178 | return -EBUSY; |
179 | 179 | ||
180 | if (threadgroup) { | ||
181 | struct task_struct *c; | ||
182 | |||
183 | rcu_read_lock(); | ||
184 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
185 | if (is_task_frozen_enough(c)) { | ||
186 | rcu_read_unlock(); | ||
187 | return -EBUSY; | ||
188 | } | ||
189 | } | ||
190 | rcu_read_unlock(); | ||
191 | } | ||
192 | |||
180 | return 0; | 193 | return 0; |
181 | } | 194 | } |
182 | 195 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7e75a41bd508..b5cb469d2545 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1324 | static cpumask_var_t cpus_attach; | 1324 | static cpumask_var_t cpus_attach; |
1325 | 1325 | ||
1326 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1326 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1327 | static int cpuset_can_attach(struct cgroup_subsys *ss, | 1327 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1328 | struct cgroup *cont, struct task_struct *tsk) | 1328 | struct task_struct *tsk, bool threadgroup) |
1329 | { | 1329 | { |
1330 | int ret; | ||
1330 | struct cpuset *cs = cgroup_cs(cont); | 1331 | struct cpuset *cs = cgroup_cs(cont); |
1331 | 1332 | ||
1332 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1333 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, | |||
1343 | if (tsk->flags & PF_THREAD_BOUND) | 1344 | if (tsk->flags & PF_THREAD_BOUND) |
1344 | return -EINVAL; | 1345 | return -EINVAL; |
1345 | 1346 | ||
1346 | return security_task_setscheduler(tsk, 0, NULL); | 1347 | ret = security_task_setscheduler(tsk, 0, NULL); |
1348 | if (ret) | ||
1349 | return ret; | ||
1350 | if (threadgroup) { | ||
1351 | struct task_struct *c; | ||
1352 | |||
1353 | rcu_read_lock(); | ||
1354 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1355 | ret = security_task_setscheduler(c, 0, NULL); | ||
1356 | if (ret) { | ||
1357 | rcu_read_unlock(); | ||
1358 | return ret; | ||
1359 | } | ||
1360 | } | ||
1361 | rcu_read_unlock(); | ||
1362 | } | ||
1363 | return 0; | ||
1364 | } | ||
1365 | |||
1366 | static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | ||
1367 | struct cpuset *cs) | ||
1368 | { | ||
1369 | int err; | ||
1370 | /* | ||
1371 | * can_attach beforehand should guarantee that this doesn't fail. | ||
1372 | * TODO: have a better way to handle failure here | ||
1373 | */ | ||
1374 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | ||
1375 | WARN_ON_ONCE(err); | ||
1376 | |||
1377 | task_lock(tsk); | ||
1378 | cpuset_change_task_nodemask(tsk, to); | ||
1379 | task_unlock(tsk); | ||
1380 | cpuset_update_task_spread_flag(cs, tsk); | ||
1381 | |||
1347 | } | 1382 | } |
1348 | 1383 | ||
1349 | static void cpuset_attach(struct cgroup_subsys *ss, | 1384 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1350 | struct cgroup *cont, struct cgroup *oldcont, | 1385 | struct cgroup *oldcont, struct task_struct *tsk, |
1351 | struct task_struct *tsk) | 1386 | bool threadgroup) |
1352 | { | 1387 | { |
1353 | nodemask_t from, to; | 1388 | nodemask_t from, to; |
1354 | struct mm_struct *mm; | 1389 | struct mm_struct *mm; |
1355 | struct cpuset *cs = cgroup_cs(cont); | 1390 | struct cpuset *cs = cgroup_cs(cont); |
1356 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1391 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1357 | int err; | ||
1358 | 1392 | ||
1359 | if (cs == &top_cpuset) { | 1393 | if (cs == &top_cpuset) { |
1360 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1394 | cpumask_copy(cpus_attach, cpu_possible_mask); |
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss, | |||
1363 | guarantee_online_cpus(cs, cpus_attach); | 1397 | guarantee_online_cpus(cs, cpus_attach); |
1364 | guarantee_online_mems(cs, &to); | 1398 | guarantee_online_mems(cs, &to); |
1365 | } | 1399 | } |
1366 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | ||
1367 | if (err) | ||
1368 | return; | ||
1369 | 1400 | ||
1370 | task_lock(tsk); | 1401 | /* do per-task migration stuff possibly for each in the threadgroup */ |
1371 | cpuset_change_task_nodemask(tsk, &to); | 1402 | cpuset_attach_task(tsk, &to, cs); |
1372 | task_unlock(tsk); | 1403 | if (threadgroup) { |
1373 | cpuset_update_task_spread_flag(cs, tsk); | 1404 | struct task_struct *c; |
1405 | rcu_read_lock(); | ||
1406 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1407 | cpuset_attach_task(c, &to, cs); | ||
1408 | } | ||
1409 | rcu_read_unlock(); | ||
1410 | } | ||
1374 | 1411 | ||
1412 | /* change mm; only needs to be done once even if threadgroup */ | ||
1375 | from = oldcs->mems_allowed; | 1413 | from = oldcs->mems_allowed; |
1376 | to = cs->mems_allowed; | 1414 | to = cs->mems_allowed; |
1377 | mm = get_task_mm(tsk); | 1415 | mm = get_task_mm(tsk); |
diff --git a/kernel/exit.c b/kernel/exit.c index 60d6fdcc9265..5859f598c951 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code) | |||
976 | disassociate_ctty(1); | 976 | disassociate_ctty(1); |
977 | 977 | ||
978 | module_put(task_thread_info(tsk)->exec_domain->module); | 978 | module_put(task_thread_info(tsk)->exec_domain->module); |
979 | if (tsk->binfmt) | ||
980 | module_put(tsk->binfmt->module); | ||
981 | 979 | ||
982 | proc_exit_connector(tsk); | 980 | proc_exit_connector(tsk); |
983 | 981 | ||
@@ -1097,28 +1095,28 @@ struct wait_opts { | |||
1097 | int __user *wo_stat; | 1095 | int __user *wo_stat; |
1098 | struct rusage __user *wo_rusage; | 1096 | struct rusage __user *wo_rusage; |
1099 | 1097 | ||
1098 | wait_queue_t child_wait; | ||
1100 | int notask_error; | 1099 | int notask_error; |
1101 | }; | 1100 | }; |
1102 | 1101 | ||
1103 | static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) | 1102 | static inline |
1103 | struct pid *task_pid_type(struct task_struct *task, enum pid_type type) | ||
1104 | { | 1104 | { |
1105 | struct pid *pid = NULL; | 1105 | if (type != PIDTYPE_PID) |
1106 | if (type == PIDTYPE_PID) | 1106 | task = task->group_leader; |
1107 | pid = task->pids[type].pid; | 1107 | return task->pids[type].pid; |
1108 | else if (type < PIDTYPE_MAX) | ||
1109 | pid = task->group_leader->pids[type].pid; | ||
1110 | return pid; | ||
1111 | } | 1108 | } |
1112 | 1109 | ||
1113 | static int eligible_child(struct wait_opts *wo, struct task_struct *p) | 1110 | static int eligible_pid(struct wait_opts *wo, struct task_struct *p) |
1114 | { | 1111 | { |
1115 | int err; | 1112 | return wo->wo_type == PIDTYPE_MAX || |
1116 | 1113 | task_pid_type(p, wo->wo_type) == wo->wo_pid; | |
1117 | if (wo->wo_type < PIDTYPE_MAX) { | 1114 | } |
1118 | if (task_pid_type(p, wo->wo_type) != wo->wo_pid) | ||
1119 | return 0; | ||
1120 | } | ||
1121 | 1115 | ||
1116 | static int eligible_child(struct wait_opts *wo, struct task_struct *p) | ||
1117 | { | ||
1118 | if (!eligible_pid(wo, p)) | ||
1119 | return 0; | ||
1122 | /* Wait for all children (clone and not) if __WALL is set; | 1120 | /* Wait for all children (clone and not) if __WALL is set; |
1123 | * otherwise, wait for clone children *only* if __WCLONE is | 1121 | * otherwise, wait for clone children *only* if __WCLONE is |
1124 | * set; otherwise, wait for non-clone children *only*. (Note: | 1122 | * set; otherwise, wait for non-clone children *only*. (Note: |
@@ -1128,10 +1126,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p) | |||
1128 | && !(wo->wo_flags & __WALL)) | 1126 | && !(wo->wo_flags & __WALL)) |
1129 | return 0; | 1127 | return 0; |
1130 | 1128 | ||
1131 | err = security_task_wait(p); | ||
1132 | if (err) | ||
1133 | return err; | ||
1134 | |||
1135 | return 1; | 1129 | return 1; |
1136 | } | 1130 | } |
1137 | 1131 | ||
@@ -1144,18 +1138,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, | |||
1144 | 1138 | ||
1145 | put_task_struct(p); | 1139 | put_task_struct(p); |
1146 | infop = wo->wo_info; | 1140 | infop = wo->wo_info; |
1147 | if (!retval) | 1141 | if (infop) { |
1148 | retval = put_user(SIGCHLD, &infop->si_signo); | 1142 | if (!retval) |
1149 | if (!retval) | 1143 | retval = put_user(SIGCHLD, &infop->si_signo); |
1150 | retval = put_user(0, &infop->si_errno); | 1144 | if (!retval) |
1151 | if (!retval) | 1145 | retval = put_user(0, &infop->si_errno); |
1152 | retval = put_user((short)why, &infop->si_code); | 1146 | if (!retval) |
1153 | if (!retval) | 1147 | retval = put_user((short)why, &infop->si_code); |
1154 | retval = put_user(pid, &infop->si_pid); | 1148 | if (!retval) |
1155 | if (!retval) | 1149 | retval = put_user(pid, &infop->si_pid); |
1156 | retval = put_user(uid, &infop->si_uid); | 1150 | if (!retval) |
1157 | if (!retval) | 1151 | retval = put_user(uid, &infop->si_uid); |
1158 | retval = put_user(status, &infop->si_status); | 1152 | if (!retval) |
1153 | retval = put_user(status, &infop->si_status); | ||
1154 | } | ||
1159 | if (!retval) | 1155 | if (!retval) |
1160 | retval = pid; | 1156 | retval = pid; |
1161 | return retval; | 1157 | return retval; |
@@ -1485,13 +1481,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) | |||
1485 | * then ->notask_error is 0 if @p is an eligible child, | 1481 | * then ->notask_error is 0 if @p is an eligible child, |
1486 | * or another error from security_task_wait(), or still -ECHILD. | 1482 | * or another error from security_task_wait(), or still -ECHILD. |
1487 | */ | 1483 | */ |
1488 | static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent, | 1484 | static int wait_consider_task(struct wait_opts *wo, int ptrace, |
1489 | int ptrace, struct task_struct *p) | 1485 | struct task_struct *p) |
1490 | { | 1486 | { |
1491 | int ret = eligible_child(wo, p); | 1487 | int ret = eligible_child(wo, p); |
1492 | if (!ret) | 1488 | if (!ret) |
1493 | return ret; | 1489 | return ret; |
1494 | 1490 | ||
1491 | ret = security_task_wait(p); | ||
1495 | if (unlikely(ret < 0)) { | 1492 | if (unlikely(ret < 0)) { |
1496 | /* | 1493 | /* |
1497 | * If we have not yet seen any eligible child, | 1494 | * If we have not yet seen any eligible child, |
@@ -1553,7 +1550,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) | |||
1553 | * Do not consider detached threads. | 1550 | * Do not consider detached threads. |
1554 | */ | 1551 | */ |
1555 | if (!task_detached(p)) { | 1552 | if (!task_detached(p)) { |
1556 | int ret = wait_consider_task(wo, tsk, 0, p); | 1553 | int ret = wait_consider_task(wo, 0, p); |
1557 | if (ret) | 1554 | if (ret) |
1558 | return ret; | 1555 | return ret; |
1559 | } | 1556 | } |
@@ -1567,7 +1564,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) | |||
1567 | struct task_struct *p; | 1564 | struct task_struct *p; |
1568 | 1565 | ||
1569 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { | 1566 | list_for_each_entry(p, &tsk->ptraced, ptrace_entry) { |
1570 | int ret = wait_consider_task(wo, tsk, 1, p); | 1567 | int ret = wait_consider_task(wo, 1, p); |
1571 | if (ret) | 1568 | if (ret) |
1572 | return ret; | 1569 | return ret; |
1573 | } | 1570 | } |
@@ -1575,15 +1572,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) | |||
1575 | return 0; | 1572 | return 0; |
1576 | } | 1573 | } |
1577 | 1574 | ||
1575 | static int child_wait_callback(wait_queue_t *wait, unsigned mode, | ||
1576 | int sync, void *key) | ||
1577 | { | ||
1578 | struct wait_opts *wo = container_of(wait, struct wait_opts, | ||
1579 | child_wait); | ||
1580 | struct task_struct *p = key; | ||
1581 | |||
1582 | if (!eligible_pid(wo, p)) | ||
1583 | return 0; | ||
1584 | |||
1585 | if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent) | ||
1586 | return 0; | ||
1587 | |||
1588 | return default_wake_function(wait, mode, sync, key); | ||
1589 | } | ||
1590 | |||
1591 | void __wake_up_parent(struct task_struct *p, struct task_struct *parent) | ||
1592 | { | ||
1593 | __wake_up_sync_key(&parent->signal->wait_chldexit, | ||
1594 | TASK_INTERRUPTIBLE, 1, p); | ||
1595 | } | ||
1596 | |||
1578 | static long do_wait(struct wait_opts *wo) | 1597 | static long do_wait(struct wait_opts *wo) |
1579 | { | 1598 | { |
1580 | DECLARE_WAITQUEUE(wait, current); | ||
1581 | struct task_struct *tsk; | 1599 | struct task_struct *tsk; |
1582 | int retval; | 1600 | int retval; |
1583 | 1601 | ||
1584 | trace_sched_process_wait(wo->wo_pid); | 1602 | trace_sched_process_wait(wo->wo_pid); |
1585 | 1603 | ||
1586 | add_wait_queue(¤t->signal->wait_chldexit,&wait); | 1604 | init_waitqueue_func_entry(&wo->child_wait, child_wait_callback); |
1605 | wo->child_wait.private = current; | ||
1606 | add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); | ||
1587 | repeat: | 1607 | repeat: |
1588 | /* | 1608 | /* |
1589 | * If there is nothing that can match our critiera just get out. | 1609 | * If there is nothing that can match our critiera just get out. |
@@ -1624,32 +1644,7 @@ notask: | |||
1624 | } | 1644 | } |
1625 | end: | 1645 | end: |
1626 | __set_current_state(TASK_RUNNING); | 1646 | __set_current_state(TASK_RUNNING); |
1627 | remove_wait_queue(¤t->signal->wait_chldexit,&wait); | 1647 | remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait); |
1628 | if (wo->wo_info) { | ||
1629 | struct siginfo __user *infop = wo->wo_info; | ||
1630 | |||
1631 | if (retval > 0) | ||
1632 | retval = 0; | ||
1633 | else { | ||
1634 | /* | ||
1635 | * For a WNOHANG return, clear out all the fields | ||
1636 | * we would set so the user can easily tell the | ||
1637 | * difference. | ||
1638 | */ | ||
1639 | if (!retval) | ||
1640 | retval = put_user(0, &infop->si_signo); | ||
1641 | if (!retval) | ||
1642 | retval = put_user(0, &infop->si_errno); | ||
1643 | if (!retval) | ||
1644 | retval = put_user(0, &infop->si_code); | ||
1645 | if (!retval) | ||
1646 | retval = put_user(0, &infop->si_pid); | ||
1647 | if (!retval) | ||
1648 | retval = put_user(0, &infop->si_uid); | ||
1649 | if (!retval) | ||
1650 | retval = put_user(0, &infop->si_status); | ||
1651 | } | ||
1652 | } | ||
1653 | return retval; | 1648 | return retval; |
1654 | } | 1649 | } |
1655 | 1650 | ||
@@ -1694,6 +1689,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
1694 | wo.wo_stat = NULL; | 1689 | wo.wo_stat = NULL; |
1695 | wo.wo_rusage = ru; | 1690 | wo.wo_rusage = ru; |
1696 | ret = do_wait(&wo); | 1691 | ret = do_wait(&wo); |
1692 | |||
1693 | if (ret > 0) { | ||
1694 | ret = 0; | ||
1695 | } else if (infop) { | ||
1696 | /* | ||
1697 | * For a WNOHANG return, clear out all the fields | ||
1698 | * we would set so the user can easily tell the | ||
1699 | * difference. | ||
1700 | */ | ||
1701 | if (!ret) | ||
1702 | ret = put_user(0, &infop->si_signo); | ||
1703 | if (!ret) | ||
1704 | ret = put_user(0, &infop->si_errno); | ||
1705 | if (!ret) | ||
1706 | ret = put_user(0, &infop->si_code); | ||
1707 | if (!ret) | ||
1708 | ret = put_user(0, &infop->si_pid); | ||
1709 | if (!ret) | ||
1710 | ret = put_user(0, &infop->si_uid); | ||
1711 | if (!ret) | ||
1712 | ret = put_user(0, &infop->si_status); | ||
1713 | } | ||
1714 | |||
1697 | put_pid(pid); | 1715 | put_pid(pid); |
1698 | 1716 | ||
1699 | /* avoid REGPARM breakage on x86: */ | 1717 | /* avoid REGPARM breakage on x86: */ |
diff --git a/kernel/fork.c b/kernel/fork.c index 51ad0b0b7266..266c6af6ef1b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup); | |||
434 | 434 | ||
435 | #include <linux/init_task.h> | 435 | #include <linux/init_task.h> |
436 | 436 | ||
437 | static void mm_init_aio(struct mm_struct *mm) | ||
438 | { | ||
439 | #ifdef CONFIG_AIO | ||
440 | spin_lock_init(&mm->ioctx_lock); | ||
441 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
442 | #endif | ||
443 | } | ||
444 | |||
437 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 445 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
438 | { | 446 | { |
439 | atomic_set(&mm->mm_users, 1); | 447 | atomic_set(&mm->mm_users, 1); |
@@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
447 | set_mm_counter(mm, file_rss, 0); | 455 | set_mm_counter(mm, file_rss, 0); |
448 | set_mm_counter(mm, anon_rss, 0); | 456 | set_mm_counter(mm, anon_rss, 0); |
449 | spin_lock_init(&mm->page_table_lock); | 457 | spin_lock_init(&mm->page_table_lock); |
450 | spin_lock_init(&mm->ioctx_lock); | ||
451 | INIT_HLIST_HEAD(&mm->ioctx_list); | ||
452 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 458 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
453 | mm->cached_hole_size = ~0UL; | 459 | mm->cached_hole_size = ~0UL; |
460 | mm_init_aio(mm); | ||
454 | mm_init_owner(mm, p); | 461 | mm_init_owner(mm, p); |
455 | 462 | ||
456 | if (likely(!mm_alloc_pgd(mm))) { | 463 | if (likely(!mm_alloc_pgd(mm))) { |
@@ -511,6 +518,8 @@ void mmput(struct mm_struct *mm) | |||
511 | spin_unlock(&mmlist_lock); | 518 | spin_unlock(&mmlist_lock); |
512 | } | 519 | } |
513 | put_swap_token(mm); | 520 | put_swap_token(mm); |
521 | if (mm->binfmt) | ||
522 | module_put(mm->binfmt->module); | ||
514 | mmdrop(mm); | 523 | mmdrop(mm); |
515 | } | 524 | } |
516 | } | 525 | } |
@@ -636,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
636 | mm->hiwater_rss = get_mm_rss(mm); | 645 | mm->hiwater_rss = get_mm_rss(mm); |
637 | mm->hiwater_vm = mm->total_vm; | 646 | mm->hiwater_vm = mm->total_vm; |
638 | 647 | ||
648 | if (mm->binfmt && !try_module_get(mm->binfmt->module)) | ||
649 | goto free_pt; | ||
650 | |||
639 | return mm; | 651 | return mm; |
640 | 652 | ||
641 | free_pt: | 653 | free_pt: |
654 | /* don't put binfmt in mmput, we haven't got module yet */ | ||
655 | mm->binfmt = NULL; | ||
642 | mmput(mm); | 656 | mmput(mm); |
643 | 657 | ||
644 | fail_nomem: | 658 | fail_nomem: |
@@ -979,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
979 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) | 993 | if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) |
980 | return ERR_PTR(-EINVAL); | 994 | return ERR_PTR(-EINVAL); |
981 | 995 | ||
996 | /* | ||
997 | * Siblings of global init remain as zombies on exit since they are | ||
998 | * not reaped by their parent (swapper). To solve this and to avoid | ||
999 | * multi-rooted process trees, prevent global and container-inits | ||
1000 | * from creating siblings. | ||
1001 | */ | ||
1002 | if ((clone_flags & CLONE_PARENT) && | ||
1003 | current->signal->flags & SIGNAL_UNKILLABLE) | ||
1004 | return ERR_PTR(-EINVAL); | ||
1005 | |||
982 | retval = security_task_create(clone_flags); | 1006 | retval = security_task_create(clone_flags); |
983 | if (retval) | 1007 | if (retval) |
984 | goto fork_out; | 1008 | goto fork_out; |
@@ -1020,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1020 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) | 1044 | if (!try_module_get(task_thread_info(p)->exec_domain->module)) |
1021 | goto bad_fork_cleanup_count; | 1045 | goto bad_fork_cleanup_count; |
1022 | 1046 | ||
1023 | if (p->binfmt && !try_module_get(p->binfmt->module)) | ||
1024 | goto bad_fork_cleanup_put_domain; | ||
1025 | |||
1026 | p->did_exec = 0; | 1047 | p->did_exec = 0; |
1027 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ | 1048 | delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ |
1028 | copy_flags(clone_flags, p); | 1049 | copy_flags(clone_flags, p); |
@@ -1310,9 +1331,6 @@ bad_fork_cleanup_cgroup: | |||
1310 | #endif | 1331 | #endif |
1311 | cgroup_exit(p, cgroup_callbacks_done); | 1332 | cgroup_exit(p, cgroup_callbacks_done); |
1312 | delayacct_tsk_free(p); | 1333 | delayacct_tsk_free(p); |
1313 | if (p->binfmt) | ||
1314 | module_put(p->binfmt->module); | ||
1315 | bad_fork_cleanup_put_domain: | ||
1316 | module_put(task_thread_info(p)->exec_domain->module); | 1334 | module_put(task_thread_info(p)->exec_domain->module); |
1317 | bad_fork_cleanup_count: | 1335 | bad_fork_cleanup_count: |
1318 | atomic_dec(&p->cred->user->processes); | 1336 | atomic_dec(&p->cred->user->processes); |
diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 022a4927b785..d4e841747400 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c | |||
@@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout) | |||
171 | * Process updating of timeout sysctl | 171 | * Process updating of timeout sysctl |
172 | */ | 172 | */ |
173 | int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | 173 | int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, |
174 | struct file *filp, void __user *buffer, | 174 | void __user *buffer, |
175 | size_t *lenp, loff_t *ppos) | 175 | size_t *lenp, loff_t *ppos) |
176 | { | 176 | { |
177 | int ret; | 177 | int ret; |
178 | 178 | ||
179 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | 179 | ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
180 | 180 | ||
181 | if (ret || !write) | 181 | if (ret || !write) |
182 | goto out; | 182 | goto out; |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 5aa854f9e5ae..2a5dfec8efe0 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid) | |||
42 | * (hence either you are in the same cgroup as task, or in an | 42 | * (hence either you are in the same cgroup as task, or in an |
43 | * ancestor cgroup thereof) | 43 | * ancestor cgroup thereof) |
44 | */ | 44 | */ |
45 | static int ns_can_attach(struct cgroup_subsys *ss, | 45 | static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, |
46 | struct cgroup *new_cgroup, struct task_struct *task) | 46 | struct task_struct *task, bool threadgroup) |
47 | { | 47 | { |
48 | if (current != task) { | 48 | if (current != task) { |
49 | if (!capable(CAP_SYS_ADMIN)) | 49 | if (!capable(CAP_SYS_ADMIN)) |
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss, | |||
56 | if (!cgroup_is_descendant(new_cgroup, task)) | 56 | if (!cgroup_is_descendant(new_cgroup, task)) |
57 | return -EPERM; | 57 | return -EPERM; |
58 | 58 | ||
59 | if (threadgroup) { | ||
60 | struct task_struct *c; | ||
61 | rcu_read_lock(); | ||
62 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
63 | if (!cgroup_is_descendant(new_cgroup, c)) { | ||
64 | rcu_read_unlock(); | ||
65 | return -EPERM; | ||
66 | } | ||
67 | } | ||
68 | rcu_read_unlock(); | ||
69 | } | ||
70 | |||
59 | return 0; | 71 | return 0; |
60 | } | 72 | } |
61 | 73 | ||
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 821722ae58a7..86b3796b0436 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -118,7 +118,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old | |||
118 | { | 118 | { |
119 | if (!(flags & CLONE_NEWPID)) | 119 | if (!(flags & CLONE_NEWPID)) |
120 | return get_pid_ns(old_ns); | 120 | return get_pid_ns(old_ns); |
121 | if (flags & CLONE_THREAD) | 121 | if (flags & (CLONE_THREAD|CLONE_PARENT)) |
122 | return ERR_PTR(-EINVAL); | 122 | return ERR_PTR(-EINVAL); |
123 | return create_pid_namespace(old_ns); | 123 | return create_pid_namespace(old_ns); |
124 | } | 124 | } |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 307c285af59e..23bd09cd042e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh) | |||
266 | * or self-reaping. Do notification now if it would have happened earlier. | 266 | * or self-reaping. Do notification now if it would have happened earlier. |
267 | * If it should reap itself, return true. | 267 | * If it should reap itself, return true. |
268 | * | 268 | * |
269 | * If it's our own child, there is no notification to do. | 269 | * If it's our own child, there is no notification to do. But if our normal |
270 | * But if our normal children self-reap, then this child | 270 | * children self-reap, then this child was prevented by ptrace and we must |
271 | * was prevented by ptrace and we must reap it now. | 271 | * reap it now, in that case we must also wake up sub-threads sleeping in |
272 | * do_wait(). | ||
272 | */ | 273 | */ |
273 | static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) | 274 | static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) |
274 | { | 275 | { |
@@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) | |||
278 | if (!task_detached(p) && thread_group_empty(p)) { | 279 | if (!task_detached(p) && thread_group_empty(p)) { |
279 | if (!same_thread_group(p->real_parent, tracer)) | 280 | if (!same_thread_group(p->real_parent, tracer)) |
280 | do_notify_parent(p, p->exit_signal); | 281 | do_notify_parent(p, p->exit_signal); |
281 | else if (ignoring_children(tracer->sighand)) | 282 | else if (ignoring_children(tracer->sighand)) { |
283 | __wake_up_parent(p, tracer); | ||
282 | p->exit_signal = -1; | 284 | p->exit_signal = -1; |
285 | } | ||
283 | } | 286 | } |
284 | if (task_detached(p)) { | 287 | if (task_detached(p)) { |
285 | /* Mark it as in the process of being reaped. */ | 288 | /* Mark it as in the process of being reaped. */ |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index e1338f074314..88faec23e833 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent) | |||
19 | { | 19 | { |
20 | spin_lock_init(&counter->lock); | 20 | spin_lock_init(&counter->lock); |
21 | counter->limit = RESOURCE_MAX; | 21 | counter->limit = RESOURCE_MAX; |
22 | counter->soft_limit = RESOURCE_MAX; | ||
22 | counter->parent = parent; | 23 | counter->parent = parent; |
23 | } | 24 | } |
24 | 25 | ||
@@ -36,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | |||
36 | } | 37 | } |
37 | 38 | ||
38 | int res_counter_charge(struct res_counter *counter, unsigned long val, | 39 | int res_counter_charge(struct res_counter *counter, unsigned long val, |
39 | struct res_counter **limit_fail_at) | 40 | struct res_counter **limit_fail_at, |
41 | struct res_counter **soft_limit_fail_at) | ||
40 | { | 42 | { |
41 | int ret; | 43 | int ret; |
42 | unsigned long flags; | 44 | unsigned long flags; |
43 | struct res_counter *c, *u; | 45 | struct res_counter *c, *u; |
44 | 46 | ||
45 | *limit_fail_at = NULL; | 47 | *limit_fail_at = NULL; |
48 | if (soft_limit_fail_at) | ||
49 | *soft_limit_fail_at = NULL; | ||
46 | local_irq_save(flags); | 50 | local_irq_save(flags); |
47 | for (c = counter; c != NULL; c = c->parent) { | 51 | for (c = counter; c != NULL; c = c->parent) { |
48 | spin_lock(&c->lock); | 52 | spin_lock(&c->lock); |
49 | ret = res_counter_charge_locked(c, val); | 53 | ret = res_counter_charge_locked(c, val); |
54 | /* | ||
55 | * With soft limits, we return the highest ancestor | ||
56 | * that exceeds its soft limit | ||
57 | */ | ||
58 | if (soft_limit_fail_at && | ||
59 | !res_counter_soft_limit_check_locked(c)) | ||
60 | *soft_limit_fail_at = c; | ||
50 | spin_unlock(&c->lock); | 61 | spin_unlock(&c->lock); |
51 | if (ret < 0) { | 62 | if (ret < 0) { |
52 | *limit_fail_at = c; | 63 | *limit_fail_at = c; |
@@ -74,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | |||
74 | counter->usage -= val; | 85 | counter->usage -= val; |
75 | } | 86 | } |
76 | 87 | ||
77 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | 88 | void res_counter_uncharge(struct res_counter *counter, unsigned long val, |
89 | bool *was_soft_limit_excess) | ||
78 | { | 90 | { |
79 | unsigned long flags; | 91 | unsigned long flags; |
80 | struct res_counter *c; | 92 | struct res_counter *c; |
@@ -82,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val) | |||
82 | local_irq_save(flags); | 94 | local_irq_save(flags); |
83 | for (c = counter; c != NULL; c = c->parent) { | 95 | for (c = counter; c != NULL; c = c->parent) { |
84 | spin_lock(&c->lock); | 96 | spin_lock(&c->lock); |
97 | if (was_soft_limit_excess) | ||
98 | *was_soft_limit_excess = | ||
99 | !res_counter_soft_limit_check_locked(c); | ||
85 | res_counter_uncharge_locked(c, val); | 100 | res_counter_uncharge_locked(c, val); |
86 | spin_unlock(&c->lock); | 101 | spin_unlock(&c->lock); |
87 | } | 102 | } |
@@ -101,6 +116,8 @@ res_counter_member(struct res_counter *counter, int member) | |||
101 | return &counter->limit; | 116 | return &counter->limit; |
102 | case RES_FAILCNT: | 117 | case RES_FAILCNT: |
103 | return &counter->failcnt; | 118 | return &counter->failcnt; |
119 | case RES_SOFT_LIMIT: | ||
120 | return &counter->soft_limit; | ||
104 | }; | 121 | }; |
105 | 122 | ||
106 | BUG(); | 123 | BUG(); |
diff --git a/kernel/sched.c b/kernel/sched.c index 2f76e06bea58..ee61f454a98b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void) | |||
10312 | #endif /* CONFIG_RT_GROUP_SCHED */ | 10312 | #endif /* CONFIG_RT_GROUP_SCHED */ |
10313 | 10313 | ||
10314 | int sched_rt_handler(struct ctl_table *table, int write, | 10314 | int sched_rt_handler(struct ctl_table *table, int write, |
10315 | struct file *filp, void __user *buffer, size_t *lenp, | 10315 | void __user *buffer, size_t *lenp, |
10316 | loff_t *ppos) | 10316 | loff_t *ppos) |
10317 | { | 10317 | { |
10318 | int ret; | 10318 | int ret; |
@@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
10323 | old_period = sysctl_sched_rt_period; | 10323 | old_period = sysctl_sched_rt_period; |
10324 | old_runtime = sysctl_sched_rt_runtime; | 10324 | old_runtime = sysctl_sched_rt_runtime; |
10325 | 10325 | ||
10326 | ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 10326 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
10327 | 10327 | ||
10328 | if (!ret && write) { | 10328 | if (!ret && write) { |
10329 | ret = sched_rt_global_constraints(); | 10329 | ret = sched_rt_global_constraints(); |
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
10377 | } | 10377 | } |
10378 | 10378 | ||
10379 | static int | 10379 | static int |
10380 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 10380 | cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
10381 | struct task_struct *tsk) | ||
10382 | { | 10381 | { |
10383 | #ifdef CONFIG_RT_GROUP_SCHED | 10382 | #ifdef CONFIG_RT_GROUP_SCHED |
10384 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) | 10383 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) |
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
10388 | if (tsk->sched_class != &fair_sched_class) | 10387 | if (tsk->sched_class != &fair_sched_class) |
10389 | return -EINVAL; | 10388 | return -EINVAL; |
10390 | #endif | 10389 | #endif |
10390 | return 0; | ||
10391 | } | ||
10391 | 10392 | ||
10393 | static int | ||
10394 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
10395 | struct task_struct *tsk, bool threadgroup) | ||
10396 | { | ||
10397 | int retval = cpu_cgroup_can_attach_task(cgrp, tsk); | ||
10398 | if (retval) | ||
10399 | return retval; | ||
10400 | if (threadgroup) { | ||
10401 | struct task_struct *c; | ||
10402 | rcu_read_lock(); | ||
10403 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
10404 | retval = cpu_cgroup_can_attach_task(cgrp, c); | ||
10405 | if (retval) { | ||
10406 | rcu_read_unlock(); | ||
10407 | return retval; | ||
10408 | } | ||
10409 | } | ||
10410 | rcu_read_unlock(); | ||
10411 | } | ||
10392 | return 0; | 10412 | return 0; |
10393 | } | 10413 | } |
10394 | 10414 | ||
10395 | static void | 10415 | static void |
10396 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 10416 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
10397 | struct cgroup *old_cont, struct task_struct *tsk) | 10417 | struct cgroup *old_cont, struct task_struct *tsk, |
10418 | bool threadgroup) | ||
10398 | { | 10419 | { |
10399 | sched_move_task(tsk); | 10420 | sched_move_task(tsk); |
10421 | if (threadgroup) { | ||
10422 | struct task_struct *c; | ||
10423 | rcu_read_lock(); | ||
10424 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
10425 | sched_move_task(c); | ||
10426 | } | ||
10427 | rcu_read_unlock(); | ||
10428 | } | ||
10400 | } | 10429 | } |
10401 | 10430 | ||
10402 | #ifdef CONFIG_FAIR_GROUP_SCHED | 10431 | #ifdef CONFIG_FAIR_GROUP_SCHED |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ecc637a0d591..4e777b47eeda 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
384 | 384 | ||
385 | #ifdef CONFIG_SCHED_DEBUG | 385 | #ifdef CONFIG_SCHED_DEBUG |
386 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 386 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
387 | struct file *filp, void __user *buffer, size_t *lenp, | 387 | void __user *buffer, size_t *lenp, |
388 | loff_t *ppos) | 388 | loff_t *ppos) |
389 | { | 389 | { |
390 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 390 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
391 | 391 | ||
392 | if (ret || !write) | 392 | if (ret || !write) |
393 | return ret; | 393 | return ret; |
diff --git a/kernel/signal.c b/kernel/signal.c index 64c5deeaca5d..6705320784fd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) | |||
705 | 705 | ||
706 | if (why) { | 706 | if (why) { |
707 | /* | 707 | /* |
708 | * The first thread which returns from finish_stop() | 708 | * The first thread which returns from do_signal_stop() |
709 | * will take ->siglock, notice SIGNAL_CLD_MASK, and | 709 | * will take ->siglock, notice SIGNAL_CLD_MASK, and |
710 | * notify its parent. See get_signal_to_deliver(). | 710 | * notify its parent. See get_signal_to_deliver(). |
711 | */ | 711 | */ |
@@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |||
971 | return send_signal(sig, info, t, 0); | 971 | return send_signal(sig, info, t, 0); |
972 | } | 972 | } |
973 | 973 | ||
974 | int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, | ||
975 | bool group) | ||
976 | { | ||
977 | unsigned long flags; | ||
978 | int ret = -ESRCH; | ||
979 | |||
980 | if (lock_task_sighand(p, &flags)) { | ||
981 | ret = send_signal(sig, info, p, group); | ||
982 | unlock_task_sighand(p, &flags); | ||
983 | } | ||
984 | |||
985 | return ret; | ||
986 | } | ||
987 | |||
974 | /* | 988 | /* |
975 | * Force a signal that the process can't ignore: if necessary | 989 | * Force a signal that the process can't ignore: if necessary |
976 | * we unblock the signal and change any SIG_IGN to SIG_DFL. | 990 | * we unblock the signal and change any SIG_IGN to SIG_DFL. |
@@ -1036,12 +1050,6 @@ void zap_other_threads(struct task_struct *p) | |||
1036 | } | 1050 | } |
1037 | } | 1051 | } |
1038 | 1052 | ||
1039 | int __fatal_signal_pending(struct task_struct *tsk) | ||
1040 | { | ||
1041 | return sigismember(&tsk->pending.signal, SIGKILL); | ||
1042 | } | ||
1043 | EXPORT_SYMBOL(__fatal_signal_pending); | ||
1044 | |||
1045 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) | 1053 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) |
1046 | { | 1054 | { |
1047 | struct sighand_struct *sighand; | 1055 | struct sighand_struct *sighand; |
@@ -1068,18 +1076,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long | |||
1068 | */ | 1076 | */ |
1069 | int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | 1077 | int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) |
1070 | { | 1078 | { |
1071 | unsigned long flags; | 1079 | int ret = check_kill_permission(sig, info, p); |
1072 | int ret; | ||
1073 | 1080 | ||
1074 | ret = check_kill_permission(sig, info, p); | 1081 | if (!ret && sig) |
1075 | 1082 | ret = do_send_sig_info(sig, info, p, true); | |
1076 | if (!ret && sig) { | ||
1077 | ret = -ESRCH; | ||
1078 | if (lock_task_sighand(p, &flags)) { | ||
1079 | ret = __group_send_sig_info(sig, info, p); | ||
1080 | unlock_task_sighand(p, &flags); | ||
1081 | } | ||
1082 | } | ||
1083 | 1083 | ||
1084 | return ret; | 1084 | return ret; |
1085 | } | 1085 | } |
@@ -1224,15 +1224,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid) | |||
1224 | * These are for backward compatibility with the rest of the kernel source. | 1224 | * These are for backward compatibility with the rest of the kernel source. |
1225 | */ | 1225 | */ |
1226 | 1226 | ||
1227 | /* | ||
1228 | * The caller must ensure the task can't exit. | ||
1229 | */ | ||
1230 | int | 1227 | int |
1231 | send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | 1228 | send_sig_info(int sig, struct siginfo *info, struct task_struct *p) |
1232 | { | 1229 | { |
1233 | int ret; | ||
1234 | unsigned long flags; | ||
1235 | |||
1236 | /* | 1230 | /* |
1237 | * Make sure legacy kernel users don't send in bad values | 1231 | * Make sure legacy kernel users don't send in bad values |
1238 | * (normal paths check this in check_kill_permission). | 1232 | * (normal paths check this in check_kill_permission). |
@@ -1240,10 +1234,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | |||
1240 | if (!valid_signal(sig)) | 1234 | if (!valid_signal(sig)) |
1241 | return -EINVAL; | 1235 | return -EINVAL; |
1242 | 1236 | ||
1243 | spin_lock_irqsave(&p->sighand->siglock, flags); | 1237 | return do_send_sig_info(sig, info, p, false); |
1244 | ret = specific_send_sig_info(sig, info, p); | ||
1245 | spin_unlock_irqrestore(&p->sighand->siglock, flags); | ||
1246 | return ret; | ||
1247 | } | 1238 | } |
1248 | 1239 | ||
1249 | #define __si_special(priv) \ | 1240 | #define __si_special(priv) \ |
@@ -1383,15 +1374,6 @@ ret: | |||
1383 | } | 1374 | } |
1384 | 1375 | ||
1385 | /* | 1376 | /* |
1386 | * Wake up any threads in the parent blocked in wait* syscalls. | ||
1387 | */ | ||
1388 | static inline void __wake_up_parent(struct task_struct *p, | ||
1389 | struct task_struct *parent) | ||
1390 | { | ||
1391 | wake_up_interruptible_sync(&parent->signal->wait_chldexit); | ||
1392 | } | ||
1393 | |||
1394 | /* | ||
1395 | * Let a parent know about the death of a child. | 1377 | * Let a parent know about the death of a child. |
1396 | * For a stopped/continued status change, use do_notify_parent_cldstop instead. | 1378 | * For a stopped/continued status change, use do_notify_parent_cldstop instead. |
1397 | * | 1379 | * |
@@ -1673,29 +1655,6 @@ void ptrace_notify(int exit_code) | |||
1673 | spin_unlock_irq(¤t->sighand->siglock); | 1655 | spin_unlock_irq(¤t->sighand->siglock); |
1674 | } | 1656 | } |
1675 | 1657 | ||
1676 | static void | ||
1677 | finish_stop(int stop_count) | ||
1678 | { | ||
1679 | /* | ||
1680 | * If there are no other threads in the group, or if there is | ||
1681 | * a group stop in progress and we are the last to stop, | ||
1682 | * report to the parent. When ptraced, every thread reports itself. | ||
1683 | */ | ||
1684 | if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) { | ||
1685 | read_lock(&tasklist_lock); | ||
1686 | do_notify_parent_cldstop(current, CLD_STOPPED); | ||
1687 | read_unlock(&tasklist_lock); | ||
1688 | } | ||
1689 | |||
1690 | do { | ||
1691 | schedule(); | ||
1692 | } while (try_to_freeze()); | ||
1693 | /* | ||
1694 | * Now we don't run again until continued. | ||
1695 | */ | ||
1696 | current->exit_code = 0; | ||
1697 | } | ||
1698 | |||
1699 | /* | 1658 | /* |
1700 | * This performs the stopping for SIGSTOP and other stop signals. | 1659 | * This performs the stopping for SIGSTOP and other stop signals. |
1701 | * We have to stop all threads in the thread group. | 1660 | * We have to stop all threads in the thread group. |
@@ -1705,15 +1664,9 @@ finish_stop(int stop_count) | |||
1705 | static int do_signal_stop(int signr) | 1664 | static int do_signal_stop(int signr) |
1706 | { | 1665 | { |
1707 | struct signal_struct *sig = current->signal; | 1666 | struct signal_struct *sig = current->signal; |
1708 | int stop_count; | 1667 | int notify; |
1709 | 1668 | ||
1710 | if (sig->group_stop_count > 0) { | 1669 | if (!sig->group_stop_count) { |
1711 | /* | ||
1712 | * There is a group stop in progress. We don't need to | ||
1713 | * start another one. | ||
1714 | */ | ||
1715 | stop_count = --sig->group_stop_count; | ||
1716 | } else { | ||
1717 | struct task_struct *t; | 1670 | struct task_struct *t; |
1718 | 1671 | ||
1719 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || | 1672 | if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || |
@@ -1725,7 +1678,7 @@ static int do_signal_stop(int signr) | |||
1725 | */ | 1678 | */ |
1726 | sig->group_exit_code = signr; | 1679 | sig->group_exit_code = signr; |
1727 | 1680 | ||
1728 | stop_count = 0; | 1681 | sig->group_stop_count = 1; |
1729 | for (t = next_thread(current); t != current; t = next_thread(t)) | 1682 | for (t = next_thread(current); t != current; t = next_thread(t)) |
1730 | /* | 1683 | /* |
1731 | * Setting state to TASK_STOPPED for a group | 1684 | * Setting state to TASK_STOPPED for a group |
@@ -1734,19 +1687,44 @@ static int do_signal_stop(int signr) | |||
1734 | */ | 1687 | */ |
1735 | if (!(t->flags & PF_EXITING) && | 1688 | if (!(t->flags & PF_EXITING) && |
1736 | !task_is_stopped_or_traced(t)) { | 1689 | !task_is_stopped_or_traced(t)) { |
1737 | stop_count++; | 1690 | sig->group_stop_count++; |
1738 | signal_wake_up(t, 0); | 1691 | signal_wake_up(t, 0); |
1739 | } | 1692 | } |
1740 | sig->group_stop_count = stop_count; | ||
1741 | } | 1693 | } |
1694 | /* | ||
1695 | * If there are no other threads in the group, or if there is | ||
1696 | * a group stop in progress and we are the last to stop, report | ||
1697 | * to the parent. When ptraced, every thread reports itself. | ||
1698 | */ | ||
1699 | notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0; | ||
1700 | notify = tracehook_notify_jctl(notify, CLD_STOPPED); | ||
1701 | /* | ||
1702 | * tracehook_notify_jctl() can drop and reacquire siglock, so | ||
1703 | * we keep ->group_stop_count != 0 before the call. If SIGCONT | ||
1704 | * or SIGKILL comes in between ->group_stop_count == 0. | ||
1705 | */ | ||
1706 | if (sig->group_stop_count) { | ||
1707 | if (!--sig->group_stop_count) | ||
1708 | sig->flags = SIGNAL_STOP_STOPPED; | ||
1709 | current->exit_code = sig->group_exit_code; | ||
1710 | __set_current_state(TASK_STOPPED); | ||
1711 | } | ||
1712 | spin_unlock_irq(¤t->sighand->siglock); | ||
1742 | 1713 | ||
1743 | if (stop_count == 0) | 1714 | if (notify) { |
1744 | sig->flags = SIGNAL_STOP_STOPPED; | 1715 | read_lock(&tasklist_lock); |
1745 | current->exit_code = sig->group_exit_code; | 1716 | do_notify_parent_cldstop(current, notify); |
1746 | __set_current_state(TASK_STOPPED); | 1717 | read_unlock(&tasklist_lock); |
1718 | } | ||
1719 | |||
1720 | /* Now we don't run again until woken by SIGCONT or SIGKILL */ | ||
1721 | do { | ||
1722 | schedule(); | ||
1723 | } while (try_to_freeze()); | ||
1724 | |||
1725 | tracehook_finish_jctl(); | ||
1726 | current->exit_code = 0; | ||
1747 | 1727 | ||
1748 | spin_unlock_irq(¤t->sighand->siglock); | ||
1749 | finish_stop(stop_count); | ||
1750 | return 1; | 1728 | return 1; |
1751 | } | 1729 | } |
1752 | 1730 | ||
@@ -1815,14 +1793,15 @@ relock: | |||
1815 | int why = (signal->flags & SIGNAL_STOP_CONTINUED) | 1793 | int why = (signal->flags & SIGNAL_STOP_CONTINUED) |
1816 | ? CLD_CONTINUED : CLD_STOPPED; | 1794 | ? CLD_CONTINUED : CLD_STOPPED; |
1817 | signal->flags &= ~SIGNAL_CLD_MASK; | 1795 | signal->flags &= ~SIGNAL_CLD_MASK; |
1818 | spin_unlock_irq(&sighand->siglock); | ||
1819 | 1796 | ||
1820 | if (unlikely(!tracehook_notify_jctl(1, why))) | 1797 | why = tracehook_notify_jctl(why, CLD_CONTINUED); |
1821 | goto relock; | 1798 | spin_unlock_irq(&sighand->siglock); |
1822 | 1799 | ||
1823 | read_lock(&tasklist_lock); | 1800 | if (why) { |
1824 | do_notify_parent_cldstop(current->group_leader, why); | 1801 | read_lock(&tasklist_lock); |
1825 | read_unlock(&tasklist_lock); | 1802 | do_notify_parent_cldstop(current->group_leader, why); |
1803 | read_unlock(&tasklist_lock); | ||
1804 | } | ||
1826 | goto relock; | 1805 | goto relock; |
1827 | } | 1806 | } |
1828 | 1807 | ||
@@ -1987,14 +1966,14 @@ void exit_signals(struct task_struct *tsk) | |||
1987 | if (unlikely(tsk->signal->group_stop_count) && | 1966 | if (unlikely(tsk->signal->group_stop_count) && |
1988 | !--tsk->signal->group_stop_count) { | 1967 | !--tsk->signal->group_stop_count) { |
1989 | tsk->signal->flags = SIGNAL_STOP_STOPPED; | 1968 | tsk->signal->flags = SIGNAL_STOP_STOPPED; |
1990 | group_stop = 1; | 1969 | group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED); |
1991 | } | 1970 | } |
1992 | out: | 1971 | out: |
1993 | spin_unlock_irq(&tsk->sighand->siglock); | 1972 | spin_unlock_irq(&tsk->sighand->siglock); |
1994 | 1973 | ||
1995 | if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) { | 1974 | if (unlikely(group_stop)) { |
1996 | read_lock(&tasklist_lock); | 1975 | read_lock(&tasklist_lock); |
1997 | do_notify_parent_cldstop(tsk, CLD_STOPPED); | 1976 | do_notify_parent_cldstop(tsk, group_stop); |
1998 | read_unlock(&tasklist_lock); | 1977 | read_unlock(&tasklist_lock); |
1999 | } | 1978 | } |
2000 | } | 1979 | } |
@@ -2290,7 +2269,6 @@ static int | |||
2290 | do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) | 2269 | do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) |
2291 | { | 2270 | { |
2292 | struct task_struct *p; | 2271 | struct task_struct *p; |
2293 | unsigned long flags; | ||
2294 | int error = -ESRCH; | 2272 | int error = -ESRCH; |
2295 | 2273 | ||
2296 | rcu_read_lock(); | 2274 | rcu_read_lock(); |
@@ -2300,14 +2278,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) | |||
2300 | /* | 2278 | /* |
2301 | * The null signal is a permissions and process existence | 2279 | * The null signal is a permissions and process existence |
2302 | * probe. No signal is actually delivered. | 2280 | * probe. No signal is actually delivered. |
2303 | * | ||
2304 | * If lock_task_sighand() fails we pretend the task dies | ||
2305 | * after receiving the signal. The window is tiny, and the | ||
2306 | * signal is private anyway. | ||
2307 | */ | 2281 | */ |
2308 | if (!error && sig && lock_task_sighand(p, &flags)) { | 2282 | if (!error && sig) { |
2309 | error = specific_send_sig_info(sig, info, p); | 2283 | error = do_send_sig_info(sig, info, p, false); |
2310 | unlock_task_sighand(p, &flags); | 2284 | /* |
2285 | * If lock_task_sighand() failed we pretend the task | ||
2286 | * dies after receiving the signal. The window is tiny, | ||
2287 | * and the signal is private anyway. | ||
2288 | */ | ||
2289 | if (unlikely(error == -ESRCH)) | ||
2290 | error = 0; | ||
2311 | } | 2291 | } |
2312 | } | 2292 | } |
2313 | rcu_read_unlock(); | 2293 | rcu_read_unlock(); |
diff --git a/kernel/slow-work.c b/kernel/slow-work.c index 09d7519557d3..0d31135efbf4 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c | |||
@@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long); | |||
26 | static void slow_work_oom_timeout(unsigned long); | 26 | static void slow_work_oom_timeout(unsigned long); |
27 | 27 | ||
28 | #ifdef CONFIG_SYSCTL | 28 | #ifdef CONFIG_SYSCTL |
29 | static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *, | 29 | static int slow_work_min_threads_sysctl(struct ctl_table *, int, |
30 | void __user *, size_t *, loff_t *); | 30 | void __user *, size_t *, loff_t *); |
31 | 31 | ||
32 | static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *, | 32 | static int slow_work_max_threads_sysctl(struct ctl_table *, int , |
33 | void __user *, size_t *, loff_t *); | 33 | void __user *, size_t *, loff_t *); |
34 | #endif | 34 | #endif |
35 | 35 | ||
@@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data) | |||
493 | * Handle adjustment of the minimum number of threads | 493 | * Handle adjustment of the minimum number of threads |
494 | */ | 494 | */ |
495 | static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, | 495 | static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, |
496 | struct file *filp, void __user *buffer, | 496 | void __user *buffer, |
497 | size_t *lenp, loff_t *ppos) | 497 | size_t *lenp, loff_t *ppos) |
498 | { | 498 | { |
499 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 499 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
500 | int n; | 500 | int n; |
501 | 501 | ||
502 | if (ret == 0) { | 502 | if (ret == 0) { |
@@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, | |||
521 | * Handle adjustment of the maximum number of threads | 521 | * Handle adjustment of the maximum number of threads |
522 | */ | 522 | */ |
523 | static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, | 523 | static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, |
524 | struct file *filp, void __user *buffer, | 524 | void __user *buffer, |
525 | size_t *lenp, loff_t *ppos) | 525 | size_t *lenp, loff_t *ppos) |
526 | { | 526 | { |
527 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 527 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
528 | int n; | 528 | int n; |
529 | 529 | ||
530 | if (ret == 0) { | 530 | if (ret == 0) { |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 88796c330838..81324d12eb35 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void) | |||
90 | EXPORT_SYMBOL(touch_all_softlockup_watchdogs); | 90 | EXPORT_SYMBOL(touch_all_softlockup_watchdogs); |
91 | 91 | ||
92 | int proc_dosoftlockup_thresh(struct ctl_table *table, int write, | 92 | int proc_dosoftlockup_thresh(struct ctl_table *table, int write, |
93 | struct file *filp, void __user *buffer, | 93 | void __user *buffer, |
94 | size_t *lenp, loff_t *ppos) | 94 | size_t *lenp, loff_t *ppos) |
95 | { | 95 | { |
96 | touch_all_softlockup_watchdogs(); | 96 | touch_all_softlockup_watchdogs(); |
97 | return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 97 | return proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
98 | } | 98 | } |
99 | 99 | ||
100 | /* | 100 | /* |
diff --git a/kernel/sys.c b/kernel/sys.c index ebcb15611728..255475d163e0 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
1542 | current->timer_slack_ns = arg2; | 1542 | current->timer_slack_ns = arg2; |
1543 | error = 0; | 1543 | error = 0; |
1544 | break; | 1544 | break; |
1545 | case PR_MCE_KILL: | ||
1546 | if (arg4 | arg5) | ||
1547 | return -EINVAL; | ||
1548 | switch (arg2) { | ||
1549 | case 0: | ||
1550 | if (arg3 != 0) | ||
1551 | return -EINVAL; | ||
1552 | current->flags &= ~PF_MCE_PROCESS; | ||
1553 | break; | ||
1554 | case 1: | ||
1555 | current->flags |= PF_MCE_PROCESS; | ||
1556 | if (arg3 != 0) | ||
1557 | current->flags |= PF_MCE_EARLY; | ||
1558 | else | ||
1559 | current->flags &= ~PF_MCE_EARLY; | ||
1560 | break; | ||
1561 | default: | ||
1562 | return -EINVAL; | ||
1563 | } | ||
1564 | error = 0; | ||
1565 | break; | ||
1566 | |||
1545 | default: | 1567 | default: |
1546 | error = -EINVAL; | 1568 | error = -EINVAL; |
1547 | break; | 1569 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7f4f57bea4ce..0d949c517412 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -76,6 +76,7 @@ extern int max_threads; | |||
76 | extern int core_uses_pid; | 76 | extern int core_uses_pid; |
77 | extern int suid_dumpable; | 77 | extern int suid_dumpable; |
78 | extern char core_pattern[]; | 78 | extern char core_pattern[]; |
79 | extern unsigned int core_pipe_limit; | ||
79 | extern int pid_max; | 80 | extern int pid_max; |
80 | extern int min_free_kbytes; | 81 | extern int min_free_kbytes; |
81 | extern int pid_max_min, pid_max_max; | 82 | extern int pid_max_min, pid_max_max; |
@@ -162,9 +163,9 @@ extern int max_lock_depth; | |||
162 | #endif | 163 | #endif |
163 | 164 | ||
164 | #ifdef CONFIG_PROC_SYSCTL | 165 | #ifdef CONFIG_PROC_SYSCTL |
165 | static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, | 166 | static int proc_do_cad_pid(struct ctl_table *table, int write, |
166 | void __user *buffer, size_t *lenp, loff_t *ppos); | 167 | void __user *buffer, size_t *lenp, loff_t *ppos); |
167 | static int proc_taint(struct ctl_table *table, int write, struct file *filp, | 168 | static int proc_taint(struct ctl_table *table, int write, |
168 | void __user *buffer, size_t *lenp, loff_t *ppos); | 169 | void __user *buffer, size_t *lenp, loff_t *ppos); |
169 | #endif | 170 | #endif |
170 | 171 | ||
@@ -423,6 +424,14 @@ static struct ctl_table kern_table[] = { | |||
423 | .proc_handler = &proc_dostring, | 424 | .proc_handler = &proc_dostring, |
424 | .strategy = &sysctl_string, | 425 | .strategy = &sysctl_string, |
425 | }, | 426 | }, |
427 | { | ||
428 | .ctl_name = CTL_UNNUMBERED, | ||
429 | .procname = "core_pipe_limit", | ||
430 | .data = &core_pipe_limit, | ||
431 | .maxlen = sizeof(unsigned int), | ||
432 | .mode = 0644, | ||
433 | .proc_handler = &proc_dointvec, | ||
434 | }, | ||
426 | #ifdef CONFIG_PROC_SYSCTL | 435 | #ifdef CONFIG_PROC_SYSCTL |
427 | { | 436 | { |
428 | .procname = "tainted", | 437 | .procname = "tainted", |
@@ -1389,6 +1398,31 @@ static struct ctl_table vm_table[] = { | |||
1389 | .mode = 0644, | 1398 | .mode = 0644, |
1390 | .proc_handler = &scan_unevictable_handler, | 1399 | .proc_handler = &scan_unevictable_handler, |
1391 | }, | 1400 | }, |
1401 | #ifdef CONFIG_MEMORY_FAILURE | ||
1402 | { | ||
1403 | .ctl_name = CTL_UNNUMBERED, | ||
1404 | .procname = "memory_failure_early_kill", | ||
1405 | .data = &sysctl_memory_failure_early_kill, | ||
1406 | .maxlen = sizeof(sysctl_memory_failure_early_kill), | ||
1407 | .mode = 0644, | ||
1408 | .proc_handler = &proc_dointvec_minmax, | ||
1409 | .strategy = &sysctl_intvec, | ||
1410 | .extra1 = &zero, | ||
1411 | .extra2 = &one, | ||
1412 | }, | ||
1413 | { | ||
1414 | .ctl_name = CTL_UNNUMBERED, | ||
1415 | .procname = "memory_failure_recovery", | ||
1416 | .data = &sysctl_memory_failure_recovery, | ||
1417 | .maxlen = sizeof(sysctl_memory_failure_recovery), | ||
1418 | .mode = 0644, | ||
1419 | .proc_handler = &proc_dointvec_minmax, | ||
1420 | .strategy = &sysctl_intvec, | ||
1421 | .extra1 = &zero, | ||
1422 | .extra2 = &one, | ||
1423 | }, | ||
1424 | #endif | ||
1425 | |||
1392 | /* | 1426 | /* |
1393 | * NOTE: do not add new entries to this table unless you have read | 1427 | * NOTE: do not add new entries to this table unless you have read |
1394 | * Documentation/sysctl/ctl_unnumbered.txt | 1428 | * Documentation/sysctl/ctl_unnumbered.txt |
@@ -2217,7 +2251,7 @@ void sysctl_head_put(struct ctl_table_header *head) | |||
2217 | #ifdef CONFIG_PROC_SYSCTL | 2251 | #ifdef CONFIG_PROC_SYSCTL |
2218 | 2252 | ||
2219 | static int _proc_do_string(void* data, int maxlen, int write, | 2253 | static int _proc_do_string(void* data, int maxlen, int write, |
2220 | struct file *filp, void __user *buffer, | 2254 | void __user *buffer, |
2221 | size_t *lenp, loff_t *ppos) | 2255 | size_t *lenp, loff_t *ppos) |
2222 | { | 2256 | { |
2223 | size_t len; | 2257 | size_t len; |
@@ -2278,7 +2312,6 @@ static int _proc_do_string(void* data, int maxlen, int write, | |||
2278 | * proc_dostring - read a string sysctl | 2312 | * proc_dostring - read a string sysctl |
2279 | * @table: the sysctl table | 2313 | * @table: the sysctl table |
2280 | * @write: %TRUE if this is a write to the sysctl file | 2314 | * @write: %TRUE if this is a write to the sysctl file |
2281 | * @filp: the file structure | ||
2282 | * @buffer: the user buffer | 2315 | * @buffer: the user buffer |
2283 | * @lenp: the size of the user buffer | 2316 | * @lenp: the size of the user buffer |
2284 | * @ppos: file position | 2317 | * @ppos: file position |
@@ -2292,10 +2325,10 @@ static int _proc_do_string(void* data, int maxlen, int write, | |||
2292 | * | 2325 | * |
2293 | * Returns 0 on success. | 2326 | * Returns 0 on success. |
2294 | */ | 2327 | */ |
2295 | int proc_dostring(struct ctl_table *table, int write, struct file *filp, | 2328 | int proc_dostring(struct ctl_table *table, int write, |
2296 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2329 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2297 | { | 2330 | { |
2298 | return _proc_do_string(table->data, table->maxlen, write, filp, | 2331 | return _proc_do_string(table->data, table->maxlen, write, |
2299 | buffer, lenp, ppos); | 2332 | buffer, lenp, ppos); |
2300 | } | 2333 | } |
2301 | 2334 | ||
@@ -2320,7 +2353,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, | |||
2320 | } | 2353 | } |
2321 | 2354 | ||
2322 | static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, | 2355 | static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, |
2323 | int write, struct file *filp, void __user *buffer, | 2356 | int write, void __user *buffer, |
2324 | size_t *lenp, loff_t *ppos, | 2357 | size_t *lenp, loff_t *ppos, |
2325 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, | 2358 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, |
2326 | int write, void *data), | 2359 | int write, void *data), |
@@ -2427,13 +2460,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, | |||
2427 | #undef TMPBUFLEN | 2460 | #undef TMPBUFLEN |
2428 | } | 2461 | } |
2429 | 2462 | ||
2430 | static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp, | 2463 | static int do_proc_dointvec(struct ctl_table *table, int write, |
2431 | void __user *buffer, size_t *lenp, loff_t *ppos, | 2464 | void __user *buffer, size_t *lenp, loff_t *ppos, |
2432 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, | 2465 | int (*conv)(int *negp, unsigned long *lvalp, int *valp, |
2433 | int write, void *data), | 2466 | int write, void *data), |
2434 | void *data) | 2467 | void *data) |
2435 | { | 2468 | { |
2436 | return __do_proc_dointvec(table->data, table, write, filp, | 2469 | return __do_proc_dointvec(table->data, table, write, |
2437 | buffer, lenp, ppos, conv, data); | 2470 | buffer, lenp, ppos, conv, data); |
2438 | } | 2471 | } |
2439 | 2472 | ||
@@ -2441,7 +2474,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil | |||
2441 | * proc_dointvec - read a vector of integers | 2474 | * proc_dointvec - read a vector of integers |
2442 | * @table: the sysctl table | 2475 | * @table: the sysctl table |
2443 | * @write: %TRUE if this is a write to the sysctl file | 2476 | * @write: %TRUE if this is a write to the sysctl file |
2444 | * @filp: the file structure | ||
2445 | * @buffer: the user buffer | 2477 | * @buffer: the user buffer |
2446 | * @lenp: the size of the user buffer | 2478 | * @lenp: the size of the user buffer |
2447 | * @ppos: file position | 2479 | * @ppos: file position |
@@ -2451,10 +2483,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil | |||
2451 | * | 2483 | * |
2452 | * Returns 0 on success. | 2484 | * Returns 0 on success. |
2453 | */ | 2485 | */ |
2454 | int proc_dointvec(struct ctl_table *table, int write, struct file *filp, | 2486 | int proc_dointvec(struct ctl_table *table, int write, |
2455 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2487 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2456 | { | 2488 | { |
2457 | return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, | 2489 | return do_proc_dointvec(table,write,buffer,lenp,ppos, |
2458 | NULL,NULL); | 2490 | NULL,NULL); |
2459 | } | 2491 | } |
2460 | 2492 | ||
@@ -2462,7 +2494,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp, | |||
2462 | * Taint values can only be increased | 2494 | * Taint values can only be increased |
2463 | * This means we can safely use a temporary. | 2495 | * This means we can safely use a temporary. |
2464 | */ | 2496 | */ |
2465 | static int proc_taint(struct ctl_table *table, int write, struct file *filp, | 2497 | static int proc_taint(struct ctl_table *table, int write, |
2466 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2498 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2467 | { | 2499 | { |
2468 | struct ctl_table t; | 2500 | struct ctl_table t; |
@@ -2474,7 +2506,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp, | |||
2474 | 2506 | ||
2475 | t = *table; | 2507 | t = *table; |
2476 | t.data = &tmptaint; | 2508 | t.data = &tmptaint; |
2477 | err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos); | 2509 | err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); |
2478 | if (err < 0) | 2510 | if (err < 0) |
2479 | return err; | 2511 | return err; |
2480 | 2512 | ||
@@ -2526,7 +2558,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, | |||
2526 | * proc_dointvec_minmax - read a vector of integers with min/max values | 2558 | * proc_dointvec_minmax - read a vector of integers with min/max values |
2527 | * @table: the sysctl table | 2559 | * @table: the sysctl table |
2528 | * @write: %TRUE if this is a write to the sysctl file | 2560 | * @write: %TRUE if this is a write to the sysctl file |
2529 | * @filp: the file structure | ||
2530 | * @buffer: the user buffer | 2561 | * @buffer: the user buffer |
2531 | * @lenp: the size of the user buffer | 2562 | * @lenp: the size of the user buffer |
2532 | * @ppos: file position | 2563 | * @ppos: file position |
@@ -2539,19 +2570,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, | |||
2539 | * | 2570 | * |
2540 | * Returns 0 on success. | 2571 | * Returns 0 on success. |
2541 | */ | 2572 | */ |
2542 | int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, | 2573 | int proc_dointvec_minmax(struct ctl_table *table, int write, |
2543 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2574 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2544 | { | 2575 | { |
2545 | struct do_proc_dointvec_minmax_conv_param param = { | 2576 | struct do_proc_dointvec_minmax_conv_param param = { |
2546 | .min = (int *) table->extra1, | 2577 | .min = (int *) table->extra1, |
2547 | .max = (int *) table->extra2, | 2578 | .max = (int *) table->extra2, |
2548 | }; | 2579 | }; |
2549 | return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, | 2580 | return do_proc_dointvec(table, write, buffer, lenp, ppos, |
2550 | do_proc_dointvec_minmax_conv, ¶m); | 2581 | do_proc_dointvec_minmax_conv, ¶m); |
2551 | } | 2582 | } |
2552 | 2583 | ||
2553 | static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, | 2584 | static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, |
2554 | struct file *filp, | ||
2555 | void __user *buffer, | 2585 | void __user *buffer, |
2556 | size_t *lenp, loff_t *ppos, | 2586 | size_t *lenp, loff_t *ppos, |
2557 | unsigned long convmul, | 2587 | unsigned long convmul, |
@@ -2656,21 +2686,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int | |||
2656 | } | 2686 | } |
2657 | 2687 | ||
2658 | static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, | 2688 | static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, |
2659 | struct file *filp, | ||
2660 | void __user *buffer, | 2689 | void __user *buffer, |
2661 | size_t *lenp, loff_t *ppos, | 2690 | size_t *lenp, loff_t *ppos, |
2662 | unsigned long convmul, | 2691 | unsigned long convmul, |
2663 | unsigned long convdiv) | 2692 | unsigned long convdiv) |
2664 | { | 2693 | { |
2665 | return __do_proc_doulongvec_minmax(table->data, table, write, | 2694 | return __do_proc_doulongvec_minmax(table->data, table, write, |
2666 | filp, buffer, lenp, ppos, convmul, convdiv); | 2695 | buffer, lenp, ppos, convmul, convdiv); |
2667 | } | 2696 | } |
2668 | 2697 | ||
2669 | /** | 2698 | /** |
2670 | * proc_doulongvec_minmax - read a vector of long integers with min/max values | 2699 | * proc_doulongvec_minmax - read a vector of long integers with min/max values |
2671 | * @table: the sysctl table | 2700 | * @table: the sysctl table |
2672 | * @write: %TRUE if this is a write to the sysctl file | 2701 | * @write: %TRUE if this is a write to the sysctl file |
2673 | * @filp: the file structure | ||
2674 | * @buffer: the user buffer | 2702 | * @buffer: the user buffer |
2675 | * @lenp: the size of the user buffer | 2703 | * @lenp: the size of the user buffer |
2676 | * @ppos: file position | 2704 | * @ppos: file position |
@@ -2683,17 +2711,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, | |||
2683 | * | 2711 | * |
2684 | * Returns 0 on success. | 2712 | * Returns 0 on success. |
2685 | */ | 2713 | */ |
2686 | int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, | 2714 | int proc_doulongvec_minmax(struct ctl_table *table, int write, |
2687 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2715 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2688 | { | 2716 | { |
2689 | return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l); | 2717 | return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); |
2690 | } | 2718 | } |
2691 | 2719 | ||
2692 | /** | 2720 | /** |
2693 | * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values | 2721 | * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values |
2694 | * @table: the sysctl table | 2722 | * @table: the sysctl table |
2695 | * @write: %TRUE if this is a write to the sysctl file | 2723 | * @write: %TRUE if this is a write to the sysctl file |
2696 | * @filp: the file structure | ||
2697 | * @buffer: the user buffer | 2724 | * @buffer: the user buffer |
2698 | * @lenp: the size of the user buffer | 2725 | * @lenp: the size of the user buffer |
2699 | * @ppos: file position | 2726 | * @ppos: file position |
@@ -2708,11 +2735,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp | |||
2708 | * Returns 0 on success. | 2735 | * Returns 0 on success. |
2709 | */ | 2736 | */ |
2710 | int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, | 2737 | int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, |
2711 | struct file *filp, | ||
2712 | void __user *buffer, | 2738 | void __user *buffer, |
2713 | size_t *lenp, loff_t *ppos) | 2739 | size_t *lenp, loff_t *ppos) |
2714 | { | 2740 | { |
2715 | return do_proc_doulongvec_minmax(table, write, filp, buffer, | 2741 | return do_proc_doulongvec_minmax(table, write, buffer, |
2716 | lenp, ppos, HZ, 1000l); | 2742 | lenp, ppos, HZ, 1000l); |
2717 | } | 2743 | } |
2718 | 2744 | ||
@@ -2788,7 +2814,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, | |||
2788 | * proc_dointvec_jiffies - read a vector of integers as seconds | 2814 | * proc_dointvec_jiffies - read a vector of integers as seconds |
2789 | * @table: the sysctl table | 2815 | * @table: the sysctl table |
2790 | * @write: %TRUE if this is a write to the sysctl file | 2816 | * @write: %TRUE if this is a write to the sysctl file |
2791 | * @filp: the file structure | ||
2792 | * @buffer: the user buffer | 2817 | * @buffer: the user buffer |
2793 | * @lenp: the size of the user buffer | 2818 | * @lenp: the size of the user buffer |
2794 | * @ppos: file position | 2819 | * @ppos: file position |
@@ -2800,10 +2825,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, | |||
2800 | * | 2825 | * |
2801 | * Returns 0 on success. | 2826 | * Returns 0 on success. |
2802 | */ | 2827 | */ |
2803 | int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, | 2828 | int proc_dointvec_jiffies(struct ctl_table *table, int write, |
2804 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2829 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2805 | { | 2830 | { |
2806 | return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, | 2831 | return do_proc_dointvec(table,write,buffer,lenp,ppos, |
2807 | do_proc_dointvec_jiffies_conv,NULL); | 2832 | do_proc_dointvec_jiffies_conv,NULL); |
2808 | } | 2833 | } |
2809 | 2834 | ||
@@ -2811,7 +2836,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, | |||
2811 | * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds | 2836 | * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds |
2812 | * @table: the sysctl table | 2837 | * @table: the sysctl table |
2813 | * @write: %TRUE if this is a write to the sysctl file | 2838 | * @write: %TRUE if this is a write to the sysctl file |
2814 | * @filp: the file structure | ||
2815 | * @buffer: the user buffer | 2839 | * @buffer: the user buffer |
2816 | * @lenp: the size of the user buffer | 2840 | * @lenp: the size of the user buffer |
2817 | * @ppos: pointer to the file position | 2841 | * @ppos: pointer to the file position |
@@ -2823,10 +2847,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, | |||
2823 | * | 2847 | * |
2824 | * Returns 0 on success. | 2848 | * Returns 0 on success. |
2825 | */ | 2849 | */ |
2826 | int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, | 2850 | int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, |
2827 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2851 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2828 | { | 2852 | { |
2829 | return do_proc_dointvec(table,write,filp,buffer,lenp,ppos, | 2853 | return do_proc_dointvec(table,write,buffer,lenp,ppos, |
2830 | do_proc_dointvec_userhz_jiffies_conv,NULL); | 2854 | do_proc_dointvec_userhz_jiffies_conv,NULL); |
2831 | } | 2855 | } |
2832 | 2856 | ||
@@ -2834,7 +2858,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file | |||
2834 | * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds | 2858 | * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds |
2835 | * @table: the sysctl table | 2859 | * @table: the sysctl table |
2836 | * @write: %TRUE if this is a write to the sysctl file | 2860 | * @write: %TRUE if this is a write to the sysctl file |
2837 | * @filp: the file structure | ||
2838 | * @buffer: the user buffer | 2861 | * @buffer: the user buffer |
2839 | * @lenp: the size of the user buffer | 2862 | * @lenp: the size of the user buffer |
2840 | * @ppos: file position | 2863 | * @ppos: file position |
@@ -2847,14 +2870,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file | |||
2847 | * | 2870 | * |
2848 | * Returns 0 on success. | 2871 | * Returns 0 on success. |
2849 | */ | 2872 | */ |
2850 | int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, | 2873 | int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, |
2851 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2874 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2852 | { | 2875 | { |
2853 | return do_proc_dointvec(table, write, filp, buffer, lenp, ppos, | 2876 | return do_proc_dointvec(table, write, buffer, lenp, ppos, |
2854 | do_proc_dointvec_ms_jiffies_conv, NULL); | 2877 | do_proc_dointvec_ms_jiffies_conv, NULL); |
2855 | } | 2878 | } |
2856 | 2879 | ||
2857 | static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, | 2880 | static int proc_do_cad_pid(struct ctl_table *table, int write, |
2858 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2881 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2859 | { | 2882 | { |
2860 | struct pid *new_pid; | 2883 | struct pid *new_pid; |
@@ -2863,7 +2886,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp | |||
2863 | 2886 | ||
2864 | tmp = pid_vnr(cad_pid); | 2887 | tmp = pid_vnr(cad_pid); |
2865 | 2888 | ||
2866 | r = __do_proc_dointvec(&tmp, table, write, filp, buffer, | 2889 | r = __do_proc_dointvec(&tmp, table, write, buffer, |
2867 | lenp, ppos, NULL, NULL); | 2890 | lenp, ppos, NULL, NULL); |
2868 | if (r || !write) | 2891 | if (r || !write) |
2869 | return r; | 2892 | return r; |
@@ -2878,50 +2901,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp | |||
2878 | 2901 | ||
2879 | #else /* CONFIG_PROC_FS */ | 2902 | #else /* CONFIG_PROC_FS */ |
2880 | 2903 | ||
2881 | int proc_dostring(struct ctl_table *table, int write, struct file *filp, | 2904 | int proc_dostring(struct ctl_table *table, int write, |
2882 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2905 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2883 | { | 2906 | { |
2884 | return -ENOSYS; | 2907 | return -ENOSYS; |
2885 | } | 2908 | } |
2886 | 2909 | ||
2887 | int proc_dointvec(struct ctl_table *table, int write, struct file *filp, | 2910 | int proc_dointvec(struct ctl_table *table, int write, |
2888 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2911 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2889 | { | 2912 | { |
2890 | return -ENOSYS; | 2913 | return -ENOSYS; |
2891 | } | 2914 | } |
2892 | 2915 | ||
2893 | int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp, | 2916 | int proc_dointvec_minmax(struct ctl_table *table, int write, |
2894 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2917 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2895 | { | 2918 | { |
2896 | return -ENOSYS; | 2919 | return -ENOSYS; |
2897 | } | 2920 | } |
2898 | 2921 | ||
2899 | int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp, | 2922 | int proc_dointvec_jiffies(struct ctl_table *table, int write, |
2900 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2923 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2901 | { | 2924 | { |
2902 | return -ENOSYS; | 2925 | return -ENOSYS; |
2903 | } | 2926 | } |
2904 | 2927 | ||
2905 | int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp, | 2928 | int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, |
2906 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2929 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2907 | { | 2930 | { |
2908 | return -ENOSYS; | 2931 | return -ENOSYS; |
2909 | } | 2932 | } |
2910 | 2933 | ||
2911 | int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp, | 2934 | int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, |
2912 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2935 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2913 | { | 2936 | { |
2914 | return -ENOSYS; | 2937 | return -ENOSYS; |
2915 | } | 2938 | } |
2916 | 2939 | ||
2917 | int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp, | 2940 | int proc_doulongvec_minmax(struct ctl_table *table, int write, |
2918 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2941 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2919 | { | 2942 | { |
2920 | return -ENOSYS; | 2943 | return -ENOSYS; |
2921 | } | 2944 | } |
2922 | 2945 | ||
2923 | int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, | 2946 | int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, |
2924 | struct file *filp, | ||
2925 | void __user *buffer, | 2947 | void __user *buffer, |
2926 | size_t *lenp, loff_t *ppos) | 2948 | size_t *lenp, loff_t *ppos) |
2927 | { | 2949 | { |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 0b0a6366c9d4..ee266620b06c 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o | 1 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o |
2 | 2 | ||
3 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o | 3 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o |
4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o |
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c new file mode 100644 index 000000000000..86628e755f38 --- /dev/null +++ b/kernel/time/timeconv.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. | ||
3 | * This file is part of the GNU C Library. | ||
4 | * Contributed by Paul Eggert (eggert@twinsun.com). | ||
5 | * | ||
6 | * The GNU C Library is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Library General Public License as | ||
8 | * published by the Free Software Foundation; either version 2 of the | ||
9 | * License, or (at your option) any later version. | ||
10 | * | ||
11 | * The GNU C Library is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Library General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Library General Public | ||
17 | * License along with the GNU C Library; see the file COPYING.LIB. If not, | ||
18 | * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
19 | * Boston, MA 02111-1307, USA. | ||
20 | */ | ||
21 | |||
22 | /* | ||
23 | * Converts the calendar time to broken-down time representation | ||
24 | * Based on code from glibc-2.6 | ||
25 | * | ||
26 | * 2009-7-14: | ||
27 | * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com> | ||
28 | */ | ||
29 | |||
30 | #include <linux/time.h> | ||
31 | #include <linux/module.h> | ||
32 | |||
33 | /* | ||
34 | * Nonzero if YEAR is a leap year (every 4 years, | ||
35 | * except every 100th isn't, and every 400th is). | ||
36 | */ | ||
37 | static int __isleap(long year) | ||
38 | { | ||
39 | return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0); | ||
40 | } | ||
41 | |||
42 | /* do a mathdiv for long type */ | ||
43 | static long math_div(long a, long b) | ||
44 | { | ||
45 | return a / b - (a % b < 0); | ||
46 | } | ||
47 | |||
48 | /* How many leap years between y1 and y2, y1 must less or equal to y2 */ | ||
49 | static long leaps_between(long y1, long y2) | ||
50 | { | ||
51 | long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100) | ||
52 | + math_div(y1 - 1, 400); | ||
53 | long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100) | ||
54 | + math_div(y2 - 1, 400); | ||
55 | return leaps2 - leaps1; | ||
56 | } | ||
57 | |||
58 | /* How many days come before each month (0-12). */ | ||
59 | static const unsigned short __mon_yday[2][13] = { | ||
60 | /* Normal years. */ | ||
61 | {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, | ||
62 | /* Leap years. */ | ||
63 | {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366} | ||
64 | }; | ||
65 | |||
66 | #define SECS_PER_HOUR (60 * 60) | ||
67 | #define SECS_PER_DAY (SECS_PER_HOUR * 24) | ||
68 | |||
69 | /** | ||
70 | * time_to_tm - converts the calendar time to local broken-down time | ||
71 | * | ||
72 | * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970, | ||
73 | * Coordinated Universal Time (UTC). | ||
74 | * @offset offset seconds adding to totalsecs. | ||
75 | * @result pointer to struct tm variable to receive broken-down time | ||
76 | */ | ||
77 | void time_to_tm(time_t totalsecs, int offset, struct tm *result) | ||
78 | { | ||
79 | long days, rem, y; | ||
80 | const unsigned short *ip; | ||
81 | |||
82 | days = totalsecs / SECS_PER_DAY; | ||
83 | rem = totalsecs % SECS_PER_DAY; | ||
84 | rem += offset; | ||
85 | while (rem < 0) { | ||
86 | rem += SECS_PER_DAY; | ||
87 | --days; | ||
88 | } | ||
89 | while (rem >= SECS_PER_DAY) { | ||
90 | rem -= SECS_PER_DAY; | ||
91 | ++days; | ||
92 | } | ||
93 | |||
94 | result->tm_hour = rem / SECS_PER_HOUR; | ||
95 | rem %= SECS_PER_HOUR; | ||
96 | result->tm_min = rem / 60; | ||
97 | result->tm_sec = rem % 60; | ||
98 | |||
99 | /* January 1, 1970 was a Thursday. */ | ||
100 | result->tm_wday = (4 + days) % 7; | ||
101 | if (result->tm_wday < 0) | ||
102 | result->tm_wday += 7; | ||
103 | |||
104 | y = 1970; | ||
105 | |||
106 | while (days < 0 || days >= (__isleap(y) ? 366 : 365)) { | ||
107 | /* Guess a corrected year, assuming 365 days per year. */ | ||
108 | long yg = y + math_div(days, 365); | ||
109 | |||
110 | /* Adjust DAYS and Y to match the guessed year. */ | ||
111 | days -= (yg - y) * 365 + leaps_between(y, yg); | ||
112 | y = yg; | ||
113 | } | ||
114 | |||
115 | result->tm_year = y - 1900; | ||
116 | |||
117 | result->tm_yday = days; | ||
118 | |||
119 | ip = __mon_yday[__isleap(y)]; | ||
120 | for (y = 11; days < ip[y]; y--) | ||
121 | continue; | ||
122 | days -= ip[y]; | ||
123 | |||
124 | result->tm_mon = y; | ||
125 | result->tm_mday = days + 1; | ||
126 | } | ||
127 | EXPORT_SYMBOL(time_to_tm); | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 23df7771c937..a142579765bf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) | |||
3015 | 3015 | ||
3016 | int | 3016 | int |
3017 | ftrace_enable_sysctl(struct ctl_table *table, int write, | 3017 | ftrace_enable_sysctl(struct ctl_table *table, int write, |
3018 | struct file *file, void __user *buffer, size_t *lenp, | 3018 | void __user *buffer, size_t *lenp, |
3019 | loff_t *ppos) | 3019 | loff_t *ppos) |
3020 | { | 3020 | { |
3021 | int ret; | 3021 | int ret; |
@@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, | |||
3025 | 3025 | ||
3026 | mutex_lock(&ftrace_lock); | 3026 | mutex_lock(&ftrace_lock); |
3027 | 3027 | ||
3028 | ret = proc_dointvec(table, write, file, buffer, lenp, ppos); | 3028 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
3029 | 3029 | ||
3030 | if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) | 3030 | if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) |
3031 | goto out; | 3031 | goto out; |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0f6facb050a1..8504ac71e4e8 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = { | |||
296 | 296 | ||
297 | int | 297 | int |
298 | stack_trace_sysctl(struct ctl_table *table, int write, | 298 | stack_trace_sysctl(struct ctl_table *table, int write, |
299 | struct file *file, void __user *buffer, size_t *lenp, | 299 | void __user *buffer, size_t *lenp, |
300 | loff_t *ppos) | 300 | loff_t *ppos) |
301 | { | 301 | { |
302 | int ret; | 302 | int ret; |
303 | 303 | ||
304 | mutex_lock(&stack_sysctl_mutex); | 304 | mutex_lock(&stack_sysctl_mutex); |
305 | 305 | ||
306 | ret = proc_dointvec(table, write, file, buffer, lenp, ppos); | 306 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
307 | 307 | ||
308 | if (ret || !write || | 308 | if (ret || !write || |
309 | (last_stack_tracer_enabled == !!stack_tracer_enabled)) | 309 | (last_stack_tracer_enabled == !!stack_tracer_enabled)) |
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 92359cc747a7..69eae358a726 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c | |||
@@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which) | |||
42 | * Special case of dostring for the UTS structure. This has locks | 42 | * Special case of dostring for the UTS structure. This has locks |
43 | * to observe. Should this be in kernel/sys.c ???? | 43 | * to observe. Should this be in kernel/sys.c ???? |
44 | */ | 44 | */ |
45 | static int proc_do_uts_string(ctl_table *table, int write, struct file *filp, | 45 | static int proc_do_uts_string(ctl_table *table, int write, |
46 | void __user *buffer, size_t *lenp, loff_t *ppos) | 46 | void __user *buffer, size_t *lenp, loff_t *ppos) |
47 | { | 47 | { |
48 | struct ctl_table uts_table; | 48 | struct ctl_table uts_table; |
49 | int r; | 49 | int r; |
50 | memcpy(&uts_table, table, sizeof(uts_table)); | 50 | memcpy(&uts_table, table, sizeof(uts_table)); |
51 | uts_table.data = get_uts(table, write); | 51 | uts_table.data = get_uts(table, write); |
52 | r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos); | 52 | r = proc_dostring(&uts_table,write,buffer,lenp, ppos); |
53 | put_uts(table, write, uts_table.data); | 53 | put_uts(table, write, uts_table.data); |
54 | return r; | 54 | return r; |
55 | } | 55 | } |
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 68dfce59c1b8..fc686c7a0a0d 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c | |||
@@ -27,6 +27,11 @@ | |||
27 | 27 | ||
28 | #define GZIP_IOBUF_SIZE (16*1024) | 28 | #define GZIP_IOBUF_SIZE (16*1024) |
29 | 29 | ||
30 | static int nofill(void *buffer, unsigned int len) | ||
31 | { | ||
32 | return -1; | ||
33 | } | ||
34 | |||
30 | /* Included from initramfs et al code */ | 35 | /* Included from initramfs et al code */ |
31 | STATIC int INIT gunzip(unsigned char *buf, int len, | 36 | STATIC int INIT gunzip(unsigned char *buf, int len, |
32 | int(*fill)(void*, unsigned int), | 37 | int(*fill)(void*, unsigned int), |
@@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len, | |||
76 | goto gunzip_nomem4; | 81 | goto gunzip_nomem4; |
77 | } | 82 | } |
78 | 83 | ||
84 | if (!fill) | ||
85 | fill = nofill; | ||
86 | |||
79 | if (len == 0) | 87 | if (len == 0) |
80 | len = fill(zbuf, GZIP_IOBUF_SIZE); | 88 | len = fill(zbuf, GZIP_IOBUF_SIZE); |
81 | 89 | ||
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 0b954e04bd30..ca82fde81c8f 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c | |||
@@ -82,6 +82,11 @@ struct rc { | |||
82 | #define RC_MODEL_TOTAL_BITS 11 | 82 | #define RC_MODEL_TOTAL_BITS 11 |
83 | 83 | ||
84 | 84 | ||
85 | static int nofill(void *buffer, unsigned int len) | ||
86 | { | ||
87 | return -1; | ||
88 | } | ||
89 | |||
85 | /* Called twice: once at startup and once in rc_normalize() */ | 90 | /* Called twice: once at startup and once in rc_normalize() */ |
86 | static void INIT rc_read(struct rc *rc) | 91 | static void INIT rc_read(struct rc *rc) |
87 | { | 92 | { |
@@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc, | |||
97 | int (*fill)(void*, unsigned int), | 102 | int (*fill)(void*, unsigned int), |
98 | char *buffer, int buffer_size) | 103 | char *buffer, int buffer_size) |
99 | { | 104 | { |
100 | rc->fill = fill; | 105 | if (fill) |
106 | rc->fill = fill; | ||
107 | else | ||
108 | rc->fill = nofill; | ||
101 | rc->buffer = (uint8_t *)buffer; | 109 | rc->buffer = (uint8_t *)buffer; |
102 | rc->buffer_size = buffer_size; | 110 | rc->buffer_size = buffer_size; |
103 | rc->buffer_end = rc->buffer + rc->buffer_size; | 111 | rc->buffer_end = rc->buffer + rc->buffer_size; |
diff --git a/mm/Kconfig b/mm/Kconfig index 71eb0b4cce8d..247760729593 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR | |||
245 | /proc/sys/vm/mmap_min_addr tunable. | 245 | /proc/sys/vm/mmap_min_addr tunable. |
246 | 246 | ||
247 | 247 | ||
248 | config MEMORY_FAILURE | ||
249 | depends on MMU | ||
250 | depends on X86_MCE | ||
251 | bool "Enable recovery from hardware memory errors" | ||
252 | help | ||
253 | Enables code to recover from some memory failures on systems | ||
254 | with MCA recovery. This allows a system to continue running | ||
255 | even when some of its memory has uncorrected errors. This requires | ||
256 | special hardware support and typically ECC memory. | ||
257 | |||
258 | config HWPOISON_INJECT | ||
259 | tristate "Poison pages injector" | ||
260 | depends on MEMORY_FAILURE && DEBUG_KERNEL | ||
261 | |||
248 | config NOMMU_INITIAL_TRIM_EXCESS | 262 | config NOMMU_INITIAL_TRIM_EXCESS |
249 | int "Turn on mmap() excess space trimming before booting" | 263 | int "Turn on mmap() excess space trimming before booting" |
250 | depends on !MMU | 264 | depends on !MMU |
diff --git a/mm/Makefile b/mm/Makefile index 88193d73cd1a..515fd793c17f 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o | |||
41 | endif | 41 | endif |
42 | obj-$(CONFIG_QUICKLIST) += quicklist.o | 42 | obj-$(CONFIG_QUICKLIST) += quicklist.o |
43 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o | 43 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o |
44 | obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o | ||
45 | obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o | ||
44 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o | 46 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o |
45 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o | 47 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o |
diff --git a/mm/filemap.c b/mm/filemap.c index bcc7372aebbc..c1fc205a92c6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -104,6 +104,10 @@ | |||
104 | * | 104 | * |
105 | * ->task->proc_lock | 105 | * ->task->proc_lock |
106 | * ->dcache_lock (proc_pid_lookup) | 106 | * ->dcache_lock (proc_pid_lookup) |
107 | * | ||
108 | * (code doesn't rely on that order, so you could switch it around) | ||
109 | * ->tasklist_lock (memory_failure, collect_procs_ao) | ||
110 | * ->i_mmap_lock | ||
107 | */ | 111 | */ |
108 | 112 | ||
109 | /* | 113 | /* |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 815dbd4a6dcb..6f048fcc749c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array) | |||
1537 | 1537 | ||
1538 | #ifdef CONFIG_SYSCTL | 1538 | #ifdef CONFIG_SYSCTL |
1539 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | 1539 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, |
1540 | struct file *file, void __user *buffer, | 1540 | void __user *buffer, |
1541 | size_t *length, loff_t *ppos) | 1541 | size_t *length, loff_t *ppos) |
1542 | { | 1542 | { |
1543 | struct hstate *h = &default_hstate; | 1543 | struct hstate *h = &default_hstate; |
@@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, | |||
1548 | 1548 | ||
1549 | table->data = &tmp; | 1549 | table->data = &tmp; |
1550 | table->maxlen = sizeof(unsigned long); | 1550 | table->maxlen = sizeof(unsigned long); |
1551 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | 1551 | proc_doulongvec_minmax(table, write, buffer, length, ppos); |
1552 | 1552 | ||
1553 | if (write) | 1553 | if (write) |
1554 | h->max_huge_pages = set_max_huge_pages(h, tmp); | 1554 | h->max_huge_pages = set_max_huge_pages(h, tmp); |
@@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, | |||
1557 | } | 1557 | } |
1558 | 1558 | ||
1559 | int hugetlb_treat_movable_handler(struct ctl_table *table, int write, | 1559 | int hugetlb_treat_movable_handler(struct ctl_table *table, int write, |
1560 | struct file *file, void __user *buffer, | 1560 | void __user *buffer, |
1561 | size_t *length, loff_t *ppos) | 1561 | size_t *length, loff_t *ppos) |
1562 | { | 1562 | { |
1563 | proc_dointvec(table, write, file, buffer, length, ppos); | 1563 | proc_dointvec(table, write, buffer, length, ppos); |
1564 | if (hugepages_treat_as_movable) | 1564 | if (hugepages_treat_as_movable) |
1565 | htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; | 1565 | htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; |
1566 | else | 1566 | else |
@@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write, | |||
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | int hugetlb_overcommit_handler(struct ctl_table *table, int write, | 1571 | int hugetlb_overcommit_handler(struct ctl_table *table, int write, |
1572 | struct file *file, void __user *buffer, | 1572 | void __user *buffer, |
1573 | size_t *length, loff_t *ppos) | 1573 | size_t *length, loff_t *ppos) |
1574 | { | 1574 | { |
1575 | struct hstate *h = &default_hstate; | 1575 | struct hstate *h = &default_hstate; |
@@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, | |||
1580 | 1580 | ||
1581 | table->data = &tmp; | 1581 | table->data = &tmp; |
1582 | table->maxlen = sizeof(unsigned long); | 1582 | table->maxlen = sizeof(unsigned long); |
1583 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | 1583 | proc_doulongvec_minmax(table, write, buffer, length, ppos); |
1584 | 1584 | ||
1585 | if (write) { | 1585 | if (write) { |
1586 | spin_lock(&hugetlb_lock); | 1586 | spin_lock(&hugetlb_lock); |
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c new file mode 100644 index 000000000000..e1d85137f086 --- /dev/null +++ b/mm/hwpoison-inject.c | |||
@@ -0,0 +1,41 @@ | |||
1 | /* Inject a hwpoison memory failure on a arbitary pfn */ | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/debugfs.h> | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/mm.h> | ||
6 | |||
7 | static struct dentry *hwpoison_dir, *corrupt_pfn; | ||
8 | |||
9 | static int hwpoison_inject(void *data, u64 val) | ||
10 | { | ||
11 | if (!capable(CAP_SYS_ADMIN)) | ||
12 | return -EPERM; | ||
13 | printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val); | ||
14 | return __memory_failure(val, 18, 0); | ||
15 | } | ||
16 | |||
17 | DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n"); | ||
18 | |||
19 | static void pfn_inject_exit(void) | ||
20 | { | ||
21 | if (hwpoison_dir) | ||
22 | debugfs_remove_recursive(hwpoison_dir); | ||
23 | } | ||
24 | |||
25 | static int pfn_inject_init(void) | ||
26 | { | ||
27 | hwpoison_dir = debugfs_create_dir("hwpoison", NULL); | ||
28 | if (hwpoison_dir == NULL) | ||
29 | return -ENOMEM; | ||
30 | corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir, | ||
31 | NULL, &hwpoison_fops); | ||
32 | if (corrupt_pfn == NULL) { | ||
33 | pfn_inject_exit(); | ||
34 | return -ENOMEM; | ||
35 | } | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | module_init(pfn_inject_init); | ||
40 | module_exit(pfn_inject_exit); | ||
41 | MODULE_LICENSE("GPL"); | ||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/rbtree.h> | 31 | #include <linux/rbtree.h> |
32 | #include <linux/mmu_notifier.h> | 32 | #include <linux/mmu_notifier.h> |
33 | #include <linux/swap.h> | ||
33 | #include <linux/ksm.h> | 34 | #include <linux/ksm.h> |
34 | 35 | ||
35 | #include <asm/tlbflush.h> | 36 | #include <asm/tlbflush.h> |
@@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared; | |||
162 | static unsigned long ksm_rmap_items; | 163 | static unsigned long ksm_rmap_items; |
163 | 164 | ||
164 | /* Limit on the number of unswappable pages used */ | 165 | /* Limit on the number of unswappable pages used */ |
165 | static unsigned long ksm_max_kernel_pages = 2000; | 166 | static unsigned long ksm_max_kernel_pages; |
166 | 167 | ||
167 | /* Number of pages ksmd should scan in one batch */ | 168 | /* Number of pages ksmd should scan in one batch */ |
168 | static unsigned int ksm_thread_pages_to_scan = 200; | 169 | static unsigned int ksm_thread_pages_to_scan = 100; |
169 | 170 | ||
170 | /* Milliseconds ksmd should sleep between batches */ | 171 | /* Milliseconds ksmd should sleep between batches */ |
171 | static unsigned int ksm_thread_sleep_millisecs = 20; | 172 | static unsigned int ksm_thread_sleep_millisecs = 20; |
@@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20; | |||
173 | #define KSM_RUN_STOP 0 | 174 | #define KSM_RUN_STOP 0 |
174 | #define KSM_RUN_MERGE 1 | 175 | #define KSM_RUN_MERGE 1 |
175 | #define KSM_RUN_UNMERGE 2 | 176 | #define KSM_RUN_UNMERGE 2 |
176 | static unsigned int ksm_run = KSM_RUN_MERGE; | 177 | static unsigned int ksm_run = KSM_RUN_STOP; |
177 | 178 | ||
178 | static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); | 179 | static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); |
179 | static DEFINE_MUTEX(ksm_thread_mutex); | 180 | static DEFINE_MUTEX(ksm_thread_mutex); |
@@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock); | |||
183 | sizeof(struct __struct), __alignof__(struct __struct),\ | 184 | sizeof(struct __struct), __alignof__(struct __struct),\ |
184 | (__flags), NULL) | 185 | (__flags), NULL) |
185 | 186 | ||
187 | static void __init ksm_init_max_kernel_pages(void) | ||
188 | { | ||
189 | ksm_max_kernel_pages = nr_free_buffer_pages() / 4; | ||
190 | } | ||
191 | |||
186 | static int __init ksm_slab_init(void) | 192 | static int __init ksm_slab_init(void) |
187 | { | 193 | { |
188 | rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); | 194 | rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0); |
@@ -1667,6 +1673,8 @@ static int __init ksm_init(void) | |||
1667 | struct task_struct *ksm_thread; | 1673 | struct task_struct *ksm_thread; |
1668 | int err; | 1674 | int err; |
1669 | 1675 | ||
1676 | ksm_init_max_kernel_pages(); | ||
1677 | |||
1670 | err = ksm_slab_init(); | 1678 | err = ksm_slab_init(); |
1671 | if (err) | 1679 | if (err) |
1672 | goto out; | 1680 | goto out; |
diff --git a/mm/madvise.c b/mm/madvise.c index d9ae2067952e..35b1479b7c9d 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma, | |||
218 | return error; | 218 | return error; |
219 | } | 219 | } |
220 | 220 | ||
221 | #ifdef CONFIG_MEMORY_FAILURE | ||
222 | /* | ||
223 | * Error injection support for memory error handling. | ||
224 | */ | ||
225 | static int madvise_hwpoison(unsigned long start, unsigned long end) | ||
226 | { | ||
227 | int ret = 0; | ||
228 | |||
229 | if (!capable(CAP_SYS_ADMIN)) | ||
230 | return -EPERM; | ||
231 | for (; start < end; start += PAGE_SIZE) { | ||
232 | struct page *p; | ||
233 | int ret = get_user_pages(current, current->mm, start, 1, | ||
234 | 0, 0, &p, NULL); | ||
235 | if (ret != 1) | ||
236 | return ret; | ||
237 | printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n", | ||
238 | page_to_pfn(p), start); | ||
239 | /* Ignore return value for now */ | ||
240 | __memory_failure(page_to_pfn(p), 0, 1); | ||
241 | put_page(p); | ||
242 | } | ||
243 | return ret; | ||
244 | } | ||
245 | #endif | ||
246 | |||
221 | static long | 247 | static long |
222 | madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, | 248 | madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, |
223 | unsigned long start, unsigned long end, int behavior) | 249 | unsigned long start, unsigned long end, int behavior) |
@@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) | |||
308 | int write; | 334 | int write; |
309 | size_t len; | 335 | size_t len; |
310 | 336 | ||
337 | #ifdef CONFIG_MEMORY_FAILURE | ||
338 | if (behavior == MADV_HWPOISON) | ||
339 | return madvise_hwpoison(start, start+len_in); | ||
340 | #endif | ||
311 | if (!madvise_behavior_valid(behavior)) | 341 | if (!madvise_behavior_valid(behavior)) |
312 | return error; | 342 | return error; |
313 | 343 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9b10d8753784..e2b98a6875c0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/rcupdate.h> | 29 | #include <linux/rcupdate.h> |
30 | #include <linux/limits.h> | 30 | #include <linux/limits.h> |
31 | #include <linux/mutex.h> | 31 | #include <linux/mutex.h> |
32 | #include <linux/rbtree.h> | ||
32 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
33 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
34 | #include <linux/spinlock.h> | 35 | #include <linux/spinlock.h> |
@@ -43,6 +44,7 @@ | |||
43 | 44 | ||
44 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; | 45 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
45 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 46 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
47 | struct mem_cgroup *root_mem_cgroup __read_mostly; | ||
46 | 48 | ||
47 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 49 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
48 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | 50 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ |
@@ -53,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/ | |||
53 | #endif | 55 | #endif |
54 | 56 | ||
55 | static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ | 57 | static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ |
58 | #define SOFTLIMIT_EVENTS_THRESH (1000) | ||
56 | 59 | ||
57 | /* | 60 | /* |
58 | * Statistics for memory cgroup. | 61 | * Statistics for memory cgroup. |
@@ -66,6 +69,8 @@ enum mem_cgroup_stat_index { | |||
66 | MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ | 69 | MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ |
67 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ | 70 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ |
68 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ | 71 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
72 | MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ | ||
73 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ | ||
69 | 74 | ||
70 | MEM_CGROUP_STAT_NSTATS, | 75 | MEM_CGROUP_STAT_NSTATS, |
71 | }; | 76 | }; |
@@ -78,6 +83,20 @@ struct mem_cgroup_stat { | |||
78 | struct mem_cgroup_stat_cpu cpustat[0]; | 83 | struct mem_cgroup_stat_cpu cpustat[0]; |
79 | }; | 84 | }; |
80 | 85 | ||
86 | static inline void | ||
87 | __mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat, | ||
88 | enum mem_cgroup_stat_index idx) | ||
89 | { | ||
90 | stat->count[idx] = 0; | ||
91 | } | ||
92 | |||
93 | static inline s64 | ||
94 | __mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat, | ||
95 | enum mem_cgroup_stat_index idx) | ||
96 | { | ||
97 | return stat->count[idx]; | ||
98 | } | ||
99 | |||
81 | /* | 100 | /* |
82 | * For accounting under irq disable, no need for increment preempt count. | 101 | * For accounting under irq disable, no need for increment preempt count. |
83 | */ | 102 | */ |
@@ -117,6 +136,12 @@ struct mem_cgroup_per_zone { | |||
117 | unsigned long count[NR_LRU_LISTS]; | 136 | unsigned long count[NR_LRU_LISTS]; |
118 | 137 | ||
119 | struct zone_reclaim_stat reclaim_stat; | 138 | struct zone_reclaim_stat reclaim_stat; |
139 | struct rb_node tree_node; /* RB tree node */ | ||
140 | unsigned long long usage_in_excess;/* Set to the value by which */ | ||
141 | /* the soft limit is exceeded*/ | ||
142 | bool on_tree; | ||
143 | struct mem_cgroup *mem; /* Back pointer, we cannot */ | ||
144 | /* use container_of */ | ||
120 | }; | 145 | }; |
121 | /* Macro for accessing counter */ | 146 | /* Macro for accessing counter */ |
122 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) | 147 | #define MEM_CGROUP_ZSTAT(mz, idx) ((mz)->count[(idx)]) |
@@ -130,6 +155,26 @@ struct mem_cgroup_lru_info { | |||
130 | }; | 155 | }; |
131 | 156 | ||
132 | /* | 157 | /* |
158 | * Cgroups above their limits are maintained in a RB-Tree, independent of | ||
159 | * their hierarchy representation | ||
160 | */ | ||
161 | |||
162 | struct mem_cgroup_tree_per_zone { | ||
163 | struct rb_root rb_root; | ||
164 | spinlock_t lock; | ||
165 | }; | ||
166 | |||
167 | struct mem_cgroup_tree_per_node { | ||
168 | struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; | ||
169 | }; | ||
170 | |||
171 | struct mem_cgroup_tree { | ||
172 | struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; | ||
173 | }; | ||
174 | |||
175 | static struct mem_cgroup_tree soft_limit_tree __read_mostly; | ||
176 | |||
177 | /* | ||
133 | * The memory controller data structure. The memory controller controls both | 178 | * The memory controller data structure. The memory controller controls both |
134 | * page cache and RSS per cgroup. We would eventually like to provide | 179 | * page cache and RSS per cgroup. We would eventually like to provide |
135 | * statistics based on the statistics developed by Rik Van Riel for clock-pro, | 180 | * statistics based on the statistics developed by Rik Van Riel for clock-pro, |
@@ -186,6 +231,13 @@ struct mem_cgroup { | |||
186 | struct mem_cgroup_stat stat; | 231 | struct mem_cgroup_stat stat; |
187 | }; | 232 | }; |
188 | 233 | ||
234 | /* | ||
235 | * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft | ||
236 | * limit reclaim to prevent infinite loops, if they ever occur. | ||
237 | */ | ||
238 | #define MEM_CGROUP_MAX_RECLAIM_LOOPS (100) | ||
239 | #define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2) | ||
240 | |||
189 | enum charge_type { | 241 | enum charge_type { |
190 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 242 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
191 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 243 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
@@ -200,13 +252,8 @@ enum charge_type { | |||
200 | #define PCGF_CACHE (1UL << PCG_CACHE) | 252 | #define PCGF_CACHE (1UL << PCG_CACHE) |
201 | #define PCGF_USED (1UL << PCG_USED) | 253 | #define PCGF_USED (1UL << PCG_USED) |
202 | #define PCGF_LOCK (1UL << PCG_LOCK) | 254 | #define PCGF_LOCK (1UL << PCG_LOCK) |
203 | static const unsigned long | 255 | /* Not used, but added here for completeness */ |
204 | pcg_default_flags[NR_CHARGE_TYPE] = { | 256 | #define PCGF_ACCT (1UL << PCG_ACCT) |
205 | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */ | ||
206 | PCGF_USED | PCGF_LOCK, /* Anon */ | ||
207 | PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */ | ||
208 | 0, /* FORCE */ | ||
209 | }; | ||
210 | 257 | ||
211 | /* for encoding cft->private value on file */ | 258 | /* for encoding cft->private value on file */ |
212 | #define _MEM (0) | 259 | #define _MEM (0) |
@@ -215,15 +262,241 @@ pcg_default_flags[NR_CHARGE_TYPE] = { | |||
215 | #define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) | 262 | #define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) |
216 | #define MEMFILE_ATTR(val) ((val) & 0xffff) | 263 | #define MEMFILE_ATTR(val) ((val) & 0xffff) |
217 | 264 | ||
265 | /* | ||
266 | * Reclaim flags for mem_cgroup_hierarchical_reclaim | ||
267 | */ | ||
268 | #define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0 | ||
269 | #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT) | ||
270 | #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1 | ||
271 | #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT) | ||
272 | #define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 | ||
273 | #define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) | ||
274 | |||
218 | static void mem_cgroup_get(struct mem_cgroup *mem); | 275 | static void mem_cgroup_get(struct mem_cgroup *mem); |
219 | static void mem_cgroup_put(struct mem_cgroup *mem); | 276 | static void mem_cgroup_put(struct mem_cgroup *mem); |
220 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); | 277 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); |
221 | 278 | ||
279 | static struct mem_cgroup_per_zone * | ||
280 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) | ||
281 | { | ||
282 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; | ||
283 | } | ||
284 | |||
285 | static struct mem_cgroup_per_zone * | ||
286 | page_cgroup_zoneinfo(struct page_cgroup *pc) | ||
287 | { | ||
288 | struct mem_cgroup *mem = pc->mem_cgroup; | ||
289 | int nid = page_cgroup_nid(pc); | ||
290 | int zid = page_cgroup_zid(pc); | ||
291 | |||
292 | if (!mem) | ||
293 | return NULL; | ||
294 | |||
295 | return mem_cgroup_zoneinfo(mem, nid, zid); | ||
296 | } | ||
297 | |||
298 | static struct mem_cgroup_tree_per_zone * | ||
299 | soft_limit_tree_node_zone(int nid, int zid) | ||
300 | { | ||
301 | return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; | ||
302 | } | ||
303 | |||
304 | static struct mem_cgroup_tree_per_zone * | ||
305 | soft_limit_tree_from_page(struct page *page) | ||
306 | { | ||
307 | int nid = page_to_nid(page); | ||
308 | int zid = page_zonenum(page); | ||
309 | |||
310 | return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; | ||
311 | } | ||
312 | |||
313 | static void | ||
314 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
315 | struct mem_cgroup_per_zone *mz, | ||
316 | struct mem_cgroup_tree_per_zone *mctz) | ||
317 | { | ||
318 | struct rb_node **p = &mctz->rb_root.rb_node; | ||
319 | struct rb_node *parent = NULL; | ||
320 | struct mem_cgroup_per_zone *mz_node; | ||
321 | |||
322 | if (mz->on_tree) | ||
323 | return; | ||
324 | |||
325 | mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res); | ||
326 | while (*p) { | ||
327 | parent = *p; | ||
328 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, | ||
329 | tree_node); | ||
330 | if (mz->usage_in_excess < mz_node->usage_in_excess) | ||
331 | p = &(*p)->rb_left; | ||
332 | /* | ||
333 | * We can't avoid mem cgroups that are over their soft | ||
334 | * limit by the same amount | ||
335 | */ | ||
336 | else if (mz->usage_in_excess >= mz_node->usage_in_excess) | ||
337 | p = &(*p)->rb_right; | ||
338 | } | ||
339 | rb_link_node(&mz->tree_node, parent, p); | ||
340 | rb_insert_color(&mz->tree_node, &mctz->rb_root); | ||
341 | mz->on_tree = true; | ||
342 | } | ||
343 | |||
344 | static void | ||
345 | __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | ||
346 | struct mem_cgroup_per_zone *mz, | ||
347 | struct mem_cgroup_tree_per_zone *mctz) | ||
348 | { | ||
349 | if (!mz->on_tree) | ||
350 | return; | ||
351 | rb_erase(&mz->tree_node, &mctz->rb_root); | ||
352 | mz->on_tree = false; | ||
353 | } | ||
354 | |||
355 | static void | ||
356 | mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | ||
357 | struct mem_cgroup_per_zone *mz, | ||
358 | struct mem_cgroup_tree_per_zone *mctz) | ||
359 | { | ||
360 | spin_lock(&mctz->lock); | ||
361 | __mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
362 | spin_unlock(&mctz->lock); | ||
363 | } | ||
364 | |||
365 | static void | ||
366 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | ||
367 | struct mem_cgroup_per_zone *mz, | ||
368 | struct mem_cgroup_tree_per_zone *mctz) | ||
369 | { | ||
370 | spin_lock(&mctz->lock); | ||
371 | __mem_cgroup_remove_exceeded(mem, mz, mctz); | ||
372 | spin_unlock(&mctz->lock); | ||
373 | } | ||
374 | |||
375 | static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | ||
376 | { | ||
377 | bool ret = false; | ||
378 | int cpu; | ||
379 | s64 val; | ||
380 | struct mem_cgroup_stat_cpu *cpustat; | ||
381 | |||
382 | cpu = get_cpu(); | ||
383 | cpustat = &mem->stat.cpustat[cpu]; | ||
384 | val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS); | ||
385 | if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) { | ||
386 | __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS); | ||
387 | ret = true; | ||
388 | } | ||
389 | put_cpu(); | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | ||
394 | { | ||
395 | unsigned long long prev_usage_in_excess, new_usage_in_excess; | ||
396 | bool updated_tree = false; | ||
397 | struct mem_cgroup_per_zone *mz; | ||
398 | struct mem_cgroup_tree_per_zone *mctz; | ||
399 | |||
400 | mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page)); | ||
401 | mctz = soft_limit_tree_from_page(page); | ||
402 | |||
403 | /* | ||
404 | * We do updates in lazy mode, mem's are removed | ||
405 | * lazily from the per-zone, per-node rb tree | ||
406 | */ | ||
407 | prev_usage_in_excess = mz->usage_in_excess; | ||
408 | |||
409 | new_usage_in_excess = res_counter_soft_limit_excess(&mem->res); | ||
410 | if (prev_usage_in_excess) { | ||
411 | mem_cgroup_remove_exceeded(mem, mz, mctz); | ||
412 | updated_tree = true; | ||
413 | } | ||
414 | if (!new_usage_in_excess) | ||
415 | goto done; | ||
416 | mem_cgroup_insert_exceeded(mem, mz, mctz); | ||
417 | |||
418 | done: | ||
419 | if (updated_tree) { | ||
420 | spin_lock(&mctz->lock); | ||
421 | mz->usage_in_excess = new_usage_in_excess; | ||
422 | spin_unlock(&mctz->lock); | ||
423 | } | ||
424 | } | ||
425 | |||
426 | static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) | ||
427 | { | ||
428 | int node, zone; | ||
429 | struct mem_cgroup_per_zone *mz; | ||
430 | struct mem_cgroup_tree_per_zone *mctz; | ||
431 | |||
432 | for_each_node_state(node, N_POSSIBLE) { | ||
433 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | ||
434 | mz = mem_cgroup_zoneinfo(mem, node, zone); | ||
435 | mctz = soft_limit_tree_node_zone(node, zone); | ||
436 | mem_cgroup_remove_exceeded(mem, mz, mctz); | ||
437 | } | ||
438 | } | ||
439 | } | ||
440 | |||
441 | static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem) | ||
442 | { | ||
443 | return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT; | ||
444 | } | ||
445 | |||
446 | static struct mem_cgroup_per_zone * | ||
447 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | ||
448 | { | ||
449 | struct rb_node *rightmost = NULL; | ||
450 | struct mem_cgroup_per_zone *mz = NULL; | ||
451 | |||
452 | retry: | ||
453 | rightmost = rb_last(&mctz->rb_root); | ||
454 | if (!rightmost) | ||
455 | goto done; /* Nothing to reclaim from */ | ||
456 | |||
457 | mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); | ||
458 | /* | ||
459 | * Remove the node now but someone else can add it back, | ||
460 | * we will to add it back at the end of reclaim to its correct | ||
461 | * position in the tree. | ||
462 | */ | ||
463 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); | ||
464 | if (!res_counter_soft_limit_excess(&mz->mem->res) || | ||
465 | !css_tryget(&mz->mem->css)) | ||
466 | goto retry; | ||
467 | done: | ||
468 | return mz; | ||
469 | } | ||
470 | |||
471 | static struct mem_cgroup_per_zone * | ||
472 | mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | ||
473 | { | ||
474 | struct mem_cgroup_per_zone *mz; | ||
475 | |||
476 | spin_lock(&mctz->lock); | ||
477 | mz = __mem_cgroup_largest_soft_limit_node(mctz); | ||
478 | spin_unlock(&mctz->lock); | ||
479 | return mz; | ||
480 | } | ||
481 | |||
482 | static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | ||
483 | bool charge) | ||
484 | { | ||
485 | int val = (charge) ? 1 : -1; | ||
486 | struct mem_cgroup_stat *stat = &mem->stat; | ||
487 | struct mem_cgroup_stat_cpu *cpustat; | ||
488 | int cpu = get_cpu(); | ||
489 | |||
490 | cpustat = &stat->cpustat[cpu]; | ||
491 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val); | ||
492 | put_cpu(); | ||
493 | } | ||
494 | |||
222 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 495 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
223 | struct page_cgroup *pc, | 496 | struct page_cgroup *pc, |
224 | bool charge) | 497 | bool charge) |
225 | { | 498 | { |
226 | int val = (charge)? 1 : -1; | 499 | int val = (charge) ? 1 : -1; |
227 | struct mem_cgroup_stat *stat = &mem->stat; | 500 | struct mem_cgroup_stat *stat = &mem->stat; |
228 | struct mem_cgroup_stat_cpu *cpustat; | 501 | struct mem_cgroup_stat_cpu *cpustat; |
229 | int cpu = get_cpu(); | 502 | int cpu = get_cpu(); |
@@ -240,28 +513,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | |||
240 | else | 513 | else |
241 | __mem_cgroup_stat_add_safe(cpustat, | 514 | __mem_cgroup_stat_add_safe(cpustat, |
242 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); | 515 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); |
516 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1); | ||
243 | put_cpu(); | 517 | put_cpu(); |
244 | } | 518 | } |
245 | 519 | ||
246 | static struct mem_cgroup_per_zone * | ||
247 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) | ||
248 | { | ||
249 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; | ||
250 | } | ||
251 | |||
252 | static struct mem_cgroup_per_zone * | ||
253 | page_cgroup_zoneinfo(struct page_cgroup *pc) | ||
254 | { | ||
255 | struct mem_cgroup *mem = pc->mem_cgroup; | ||
256 | int nid = page_cgroup_nid(pc); | ||
257 | int zid = page_cgroup_zid(pc); | ||
258 | |||
259 | if (!mem) | ||
260 | return NULL; | ||
261 | |||
262 | return mem_cgroup_zoneinfo(mem, nid, zid); | ||
263 | } | ||
264 | |||
265 | static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, | 520 | static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, |
266 | enum lru_list idx) | 521 | enum lru_list idx) |
267 | { | 522 | { |
@@ -354,6 +609,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | |||
354 | return ret; | 609 | return ret; |
355 | } | 610 | } |
356 | 611 | ||
612 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) | ||
613 | { | ||
614 | return (mem == root_mem_cgroup); | ||
615 | } | ||
616 | |||
357 | /* | 617 | /* |
358 | * Following LRU functions are allowed to be used without PCG_LOCK. | 618 | * Following LRU functions are allowed to be used without PCG_LOCK. |
359 | * Operations are called by routine of global LRU independently from memcg. | 619 | * Operations are called by routine of global LRU independently from memcg. |
@@ -371,22 +631,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | |||
371 | void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) | 631 | void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) |
372 | { | 632 | { |
373 | struct page_cgroup *pc; | 633 | struct page_cgroup *pc; |
374 | struct mem_cgroup *mem; | ||
375 | struct mem_cgroup_per_zone *mz; | 634 | struct mem_cgroup_per_zone *mz; |
376 | 635 | ||
377 | if (mem_cgroup_disabled()) | 636 | if (mem_cgroup_disabled()) |
378 | return; | 637 | return; |
379 | pc = lookup_page_cgroup(page); | 638 | pc = lookup_page_cgroup(page); |
380 | /* can happen while we handle swapcache. */ | 639 | /* can happen while we handle swapcache. */ |
381 | if (list_empty(&pc->lru) || !pc->mem_cgroup) | 640 | if (!TestClearPageCgroupAcctLRU(pc)) |
382 | return; | 641 | return; |
642 | VM_BUG_ON(!pc->mem_cgroup); | ||
383 | /* | 643 | /* |
384 | * We don't check PCG_USED bit. It's cleared when the "page" is finally | 644 | * We don't check PCG_USED bit. It's cleared when the "page" is finally |
385 | * removed from global LRU. | 645 | * removed from global LRU. |
386 | */ | 646 | */ |
387 | mz = page_cgroup_zoneinfo(pc); | 647 | mz = page_cgroup_zoneinfo(pc); |
388 | mem = pc->mem_cgroup; | ||
389 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 648 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
649 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
650 | return; | ||
651 | VM_BUG_ON(list_empty(&pc->lru)); | ||
390 | list_del_init(&pc->lru); | 652 | list_del_init(&pc->lru); |
391 | return; | 653 | return; |
392 | } | 654 | } |
@@ -410,8 +672,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
410 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | 672 | * For making pc->mem_cgroup visible, insert smp_rmb() here. |
411 | */ | 673 | */ |
412 | smp_rmb(); | 674 | smp_rmb(); |
413 | /* unused page is not rotated. */ | 675 | /* unused or root page is not rotated. */ |
414 | if (!PageCgroupUsed(pc)) | 676 | if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup)) |
415 | return; | 677 | return; |
416 | mz = page_cgroup_zoneinfo(pc); | 678 | mz = page_cgroup_zoneinfo(pc); |
417 | list_move(&pc->lru, &mz->lists[lru]); | 679 | list_move(&pc->lru, &mz->lists[lru]); |
@@ -425,6 +687,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
425 | if (mem_cgroup_disabled()) | 687 | if (mem_cgroup_disabled()) |
426 | return; | 688 | return; |
427 | pc = lookup_page_cgroup(page); | 689 | pc = lookup_page_cgroup(page); |
690 | VM_BUG_ON(PageCgroupAcctLRU(pc)); | ||
428 | /* | 691 | /* |
429 | * Used bit is set without atomic ops but after smp_wmb(). | 692 | * Used bit is set without atomic ops but after smp_wmb(). |
430 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | 693 | * For making pc->mem_cgroup visible, insert smp_rmb() here. |
@@ -435,6 +698,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
435 | 698 | ||
436 | mz = page_cgroup_zoneinfo(pc); | 699 | mz = page_cgroup_zoneinfo(pc); |
437 | MEM_CGROUP_ZSTAT(mz, lru) += 1; | 700 | MEM_CGROUP_ZSTAT(mz, lru) += 1; |
701 | SetPageCgroupAcctLRU(pc); | ||
702 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
703 | return; | ||
438 | list_add(&pc->lru, &mz->lists[lru]); | 704 | list_add(&pc->lru, &mz->lists[lru]); |
439 | } | 705 | } |
440 | 706 | ||
@@ -469,7 +735,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) | |||
469 | 735 | ||
470 | spin_lock_irqsave(&zone->lru_lock, flags); | 736 | spin_lock_irqsave(&zone->lru_lock, flags); |
471 | /* link when the page is linked to LRU but page_cgroup isn't */ | 737 | /* link when the page is linked to LRU but page_cgroup isn't */ |
472 | if (PageLRU(page) && list_empty(&pc->lru)) | 738 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) |
473 | mem_cgroup_add_lru_list(page, page_lru(page)); | 739 | mem_cgroup_add_lru_list(page, page_lru(page)); |
474 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 740 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
475 | } | 741 | } |
@@ -855,28 +1121,62 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) | |||
855 | * If shrink==true, for avoiding to free too much, this returns immedieately. | 1121 | * If shrink==true, for avoiding to free too much, this returns immedieately. |
856 | */ | 1122 | */ |
857 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | 1123 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, |
858 | gfp_t gfp_mask, bool noswap, bool shrink) | 1124 | struct zone *zone, |
1125 | gfp_t gfp_mask, | ||
1126 | unsigned long reclaim_options) | ||
859 | { | 1127 | { |
860 | struct mem_cgroup *victim; | 1128 | struct mem_cgroup *victim; |
861 | int ret, total = 0; | 1129 | int ret, total = 0; |
862 | int loop = 0; | 1130 | int loop = 0; |
1131 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | ||
1132 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | ||
1133 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | ||
1134 | unsigned long excess = mem_cgroup_get_excess(root_mem); | ||
863 | 1135 | ||
864 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ | 1136 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ |
865 | if (root_mem->memsw_is_minimum) | 1137 | if (root_mem->memsw_is_minimum) |
866 | noswap = true; | 1138 | noswap = true; |
867 | 1139 | ||
868 | while (loop < 2) { | 1140 | while (1) { |
869 | victim = mem_cgroup_select_victim(root_mem); | 1141 | victim = mem_cgroup_select_victim(root_mem); |
870 | if (victim == root_mem) | 1142 | if (victim == root_mem) { |
871 | loop++; | 1143 | loop++; |
1144 | if (loop >= 2) { | ||
1145 | /* | ||
1146 | * If we have not been able to reclaim | ||
1147 | * anything, it might because there are | ||
1148 | * no reclaimable pages under this hierarchy | ||
1149 | */ | ||
1150 | if (!check_soft || !total) { | ||
1151 | css_put(&victim->css); | ||
1152 | break; | ||
1153 | } | ||
1154 | /* | ||
1155 | * We want to do more targetted reclaim. | ||
1156 | * excess >> 2 is not to excessive so as to | ||
1157 | * reclaim too much, nor too less that we keep | ||
1158 | * coming back to reclaim from this cgroup | ||
1159 | */ | ||
1160 | if (total >= (excess >> 2) || | ||
1161 | (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) { | ||
1162 | css_put(&victim->css); | ||
1163 | break; | ||
1164 | } | ||
1165 | } | ||
1166 | } | ||
872 | if (!mem_cgroup_local_usage(&victim->stat)) { | 1167 | if (!mem_cgroup_local_usage(&victim->stat)) { |
873 | /* this cgroup's local usage == 0 */ | 1168 | /* this cgroup's local usage == 0 */ |
874 | css_put(&victim->css); | 1169 | css_put(&victim->css); |
875 | continue; | 1170 | continue; |
876 | } | 1171 | } |
877 | /* we use swappiness of local cgroup */ | 1172 | /* we use swappiness of local cgroup */ |
878 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap, | 1173 | if (check_soft) |
879 | get_swappiness(victim)); | 1174 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
1175 | noswap, get_swappiness(victim), zone, | ||
1176 | zone->zone_pgdat->node_id); | ||
1177 | else | ||
1178 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | ||
1179 | noswap, get_swappiness(victim)); | ||
880 | css_put(&victim->css); | 1180 | css_put(&victim->css); |
881 | /* | 1181 | /* |
882 | * At shrinking usage, we can't check we should stop here or | 1182 | * At shrinking usage, we can't check we should stop here or |
@@ -886,7 +1186,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
886 | if (shrink) | 1186 | if (shrink) |
887 | return ret; | 1187 | return ret; |
888 | total += ret; | 1188 | total += ret; |
889 | if (mem_cgroup_check_under_limit(root_mem)) | 1189 | if (check_soft) { |
1190 | if (res_counter_check_under_soft_limit(&root_mem->res)) | ||
1191 | return total; | ||
1192 | } else if (mem_cgroup_check_under_limit(root_mem)) | ||
890 | return 1 + total; | 1193 | return 1 + total; |
891 | } | 1194 | } |
892 | return total; | 1195 | return total; |
@@ -965,11 +1268,11 @@ done: | |||
965 | */ | 1268 | */ |
966 | static int __mem_cgroup_try_charge(struct mm_struct *mm, | 1269 | static int __mem_cgroup_try_charge(struct mm_struct *mm, |
967 | gfp_t gfp_mask, struct mem_cgroup **memcg, | 1270 | gfp_t gfp_mask, struct mem_cgroup **memcg, |
968 | bool oom) | 1271 | bool oom, struct page *page) |
969 | { | 1272 | { |
970 | struct mem_cgroup *mem, *mem_over_limit; | 1273 | struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit; |
971 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1274 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
972 | struct res_counter *fail_res; | 1275 | struct res_counter *fail_res, *soft_fail_res = NULL; |
973 | 1276 | ||
974 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1277 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
975 | /* Don't account this! */ | 1278 | /* Don't account this! */ |
@@ -996,20 +1299,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
996 | VM_BUG_ON(css_is_removed(&mem->css)); | 1299 | VM_BUG_ON(css_is_removed(&mem->css)); |
997 | 1300 | ||
998 | while (1) { | 1301 | while (1) { |
999 | int ret; | 1302 | int ret = 0; |
1000 | bool noswap = false; | 1303 | unsigned long flags = 0; |
1001 | 1304 | ||
1002 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); | 1305 | if (mem_cgroup_is_root(mem)) |
1306 | goto done; | ||
1307 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res, | ||
1308 | &soft_fail_res); | ||
1003 | if (likely(!ret)) { | 1309 | if (likely(!ret)) { |
1004 | if (!do_swap_account) | 1310 | if (!do_swap_account) |
1005 | break; | 1311 | break; |
1006 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1312 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, |
1007 | &fail_res); | 1313 | &fail_res, NULL); |
1008 | if (likely(!ret)) | 1314 | if (likely(!ret)) |
1009 | break; | 1315 | break; |
1010 | /* mem+swap counter fails */ | 1316 | /* mem+swap counter fails */ |
1011 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 1317 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); |
1012 | noswap = true; | 1318 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1013 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1319 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
1014 | memsw); | 1320 | memsw); |
1015 | } else | 1321 | } else |
@@ -1020,8 +1326,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1020 | if (!(gfp_mask & __GFP_WAIT)) | 1326 | if (!(gfp_mask & __GFP_WAIT)) |
1021 | goto nomem; | 1327 | goto nomem; |
1022 | 1328 | ||
1023 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, | 1329 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1024 | noswap, false); | 1330 | gfp_mask, flags); |
1025 | if (ret) | 1331 | if (ret) |
1026 | continue; | 1332 | continue; |
1027 | 1333 | ||
@@ -1046,13 +1352,24 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1046 | goto nomem; | 1352 | goto nomem; |
1047 | } | 1353 | } |
1048 | } | 1354 | } |
1355 | /* | ||
1356 | * Insert just the ancestor, we should trickle down to the correct | ||
1357 | * cgroup for reclaim, since the other nodes will be below their | ||
1358 | * soft limit | ||
1359 | */ | ||
1360 | if (soft_fail_res) { | ||
1361 | mem_over_soft_limit = | ||
1362 | mem_cgroup_from_res_counter(soft_fail_res, res); | ||
1363 | if (mem_cgroup_soft_limit_check(mem_over_soft_limit)) | ||
1364 | mem_cgroup_update_tree(mem_over_soft_limit, page); | ||
1365 | } | ||
1366 | done: | ||
1049 | return 0; | 1367 | return 0; |
1050 | nomem: | 1368 | nomem: |
1051 | css_put(&mem->css); | 1369 | css_put(&mem->css); |
1052 | return -ENOMEM; | 1370 | return -ENOMEM; |
1053 | } | 1371 | } |
1054 | 1372 | ||
1055 | |||
1056 | /* | 1373 | /* |
1057 | * A helper function to get mem_cgroup from ID. must be called under | 1374 | * A helper function to get mem_cgroup from ID. must be called under |
1058 | * rcu_read_lock(). The caller must check css_is_removed() or some if | 1375 | * rcu_read_lock(). The caller must check css_is_removed() or some if |
@@ -1119,15 +1436,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1119 | lock_page_cgroup(pc); | 1436 | lock_page_cgroup(pc); |
1120 | if (unlikely(PageCgroupUsed(pc))) { | 1437 | if (unlikely(PageCgroupUsed(pc))) { |
1121 | unlock_page_cgroup(pc); | 1438 | unlock_page_cgroup(pc); |
1122 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 1439 | if (!mem_cgroup_is_root(mem)) { |
1123 | if (do_swap_account) | 1440 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); |
1124 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | 1441 | if (do_swap_account) |
1442 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, | ||
1443 | NULL); | ||
1444 | } | ||
1125 | css_put(&mem->css); | 1445 | css_put(&mem->css); |
1126 | return; | 1446 | return; |
1127 | } | 1447 | } |
1448 | |||
1128 | pc->mem_cgroup = mem; | 1449 | pc->mem_cgroup = mem; |
1450 | /* | ||
1451 | * We access a page_cgroup asynchronously without lock_page_cgroup(). | ||
1452 | * Especially when a page_cgroup is taken from a page, pc->mem_cgroup | ||
1453 | * is accessed after testing USED bit. To make pc->mem_cgroup visible | ||
1454 | * before USED bit, we need memory barrier here. | ||
1455 | * See mem_cgroup_add_lru_list(), etc. | ||
1456 | */ | ||
1129 | smp_wmb(); | 1457 | smp_wmb(); |
1130 | pc->flags = pcg_default_flags[ctype]; | 1458 | switch (ctype) { |
1459 | case MEM_CGROUP_CHARGE_TYPE_CACHE: | ||
1460 | case MEM_CGROUP_CHARGE_TYPE_SHMEM: | ||
1461 | SetPageCgroupCache(pc); | ||
1462 | SetPageCgroupUsed(pc); | ||
1463 | break; | ||
1464 | case MEM_CGROUP_CHARGE_TYPE_MAPPED: | ||
1465 | ClearPageCgroupCache(pc); | ||
1466 | SetPageCgroupUsed(pc); | ||
1467 | break; | ||
1468 | default: | ||
1469 | break; | ||
1470 | } | ||
1131 | 1471 | ||
1132 | mem_cgroup_charge_statistics(mem, pc, true); | 1472 | mem_cgroup_charge_statistics(mem, pc, true); |
1133 | 1473 | ||
@@ -1178,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1178 | if (pc->mem_cgroup != from) | 1518 | if (pc->mem_cgroup != from) |
1179 | goto out; | 1519 | goto out; |
1180 | 1520 | ||
1181 | res_counter_uncharge(&from->res, PAGE_SIZE); | 1521 | if (!mem_cgroup_is_root(from)) |
1522 | res_counter_uncharge(&from->res, PAGE_SIZE, NULL); | ||
1182 | mem_cgroup_charge_statistics(from, pc, false); | 1523 | mem_cgroup_charge_statistics(from, pc, false); |
1183 | 1524 | ||
1184 | page = pc->page; | 1525 | page = pc->page; |
@@ -1197,8 +1538,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1197 | 1); | 1538 | 1); |
1198 | } | 1539 | } |
1199 | 1540 | ||
1200 | if (do_swap_account) | 1541 | if (do_swap_account && !mem_cgroup_is_root(from)) |
1201 | res_counter_uncharge(&from->memsw, PAGE_SIZE); | 1542 | res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL); |
1202 | css_put(&from->css); | 1543 | css_put(&from->css); |
1203 | 1544 | ||
1204 | css_get(&to->css); | 1545 | css_get(&to->css); |
@@ -1238,7 +1579,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
1238 | parent = mem_cgroup_from_cont(pcg); | 1579 | parent = mem_cgroup_from_cont(pcg); |
1239 | 1580 | ||
1240 | 1581 | ||
1241 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false); | 1582 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); |
1242 | if (ret || !parent) | 1583 | if (ret || !parent) |
1243 | return ret; | 1584 | return ret; |
1244 | 1585 | ||
@@ -1268,9 +1609,11 @@ uncharge: | |||
1268 | /* drop extra refcnt by try_charge() */ | 1609 | /* drop extra refcnt by try_charge() */ |
1269 | css_put(&parent->css); | 1610 | css_put(&parent->css); |
1270 | /* uncharge if move fails */ | 1611 | /* uncharge if move fails */ |
1271 | res_counter_uncharge(&parent->res, PAGE_SIZE); | 1612 | if (!mem_cgroup_is_root(parent)) { |
1272 | if (do_swap_account) | 1613 | res_counter_uncharge(&parent->res, PAGE_SIZE, NULL); |
1273 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); | 1614 | if (do_swap_account) |
1615 | res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL); | ||
1616 | } | ||
1274 | return ret; | 1617 | return ret; |
1275 | } | 1618 | } |
1276 | 1619 | ||
@@ -1295,7 +1638,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
1295 | prefetchw(pc); | 1638 | prefetchw(pc); |
1296 | 1639 | ||
1297 | mem = memcg; | 1640 | mem = memcg; |
1298 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true); | 1641 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page); |
1299 | if (ret || !mem) | 1642 | if (ret || !mem) |
1300 | return ret; | 1643 | return ret; |
1301 | 1644 | ||
@@ -1414,14 +1757,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
1414 | if (!mem) | 1757 | if (!mem) |
1415 | goto charge_cur_mm; | 1758 | goto charge_cur_mm; |
1416 | *ptr = mem; | 1759 | *ptr = mem; |
1417 | ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); | 1760 | ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page); |
1418 | /* drop extra refcnt from tryget */ | 1761 | /* drop extra refcnt from tryget */ |
1419 | css_put(&mem->css); | 1762 | css_put(&mem->css); |
1420 | return ret; | 1763 | return ret; |
1421 | charge_cur_mm: | 1764 | charge_cur_mm: |
1422 | if (unlikely(!mm)) | 1765 | if (unlikely(!mm)) |
1423 | mm = &init_mm; | 1766 | mm = &init_mm; |
1424 | return __mem_cgroup_try_charge(mm, mask, ptr, true); | 1767 | return __mem_cgroup_try_charge(mm, mask, ptr, true, page); |
1425 | } | 1768 | } |
1426 | 1769 | ||
1427 | static void | 1770 | static void |
@@ -1459,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
1459 | * This recorded memcg can be obsolete one. So, avoid | 1802 | * This recorded memcg can be obsolete one. So, avoid |
1460 | * calling css_tryget | 1803 | * calling css_tryget |
1461 | */ | 1804 | */ |
1462 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | 1805 | if (!mem_cgroup_is_root(memcg)) |
1806 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, | ||
1807 | NULL); | ||
1808 | mem_cgroup_swap_statistics(memcg, false); | ||
1463 | mem_cgroup_put(memcg); | 1809 | mem_cgroup_put(memcg); |
1464 | } | 1810 | } |
1465 | rcu_read_unlock(); | 1811 | rcu_read_unlock(); |
@@ -1484,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
1484 | return; | 1830 | return; |
1485 | if (!mem) | 1831 | if (!mem) |
1486 | return; | 1832 | return; |
1487 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 1833 | if (!mem_cgroup_is_root(mem)) { |
1488 | if (do_swap_account) | 1834 | res_counter_uncharge(&mem->res, PAGE_SIZE, NULL); |
1489 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | 1835 | if (do_swap_account) |
1836 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | ||
1837 | } | ||
1490 | css_put(&mem->css); | 1838 | css_put(&mem->css); |
1491 | } | 1839 | } |
1492 | 1840 | ||
@@ -1500,6 +1848,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1500 | struct page_cgroup *pc; | 1848 | struct page_cgroup *pc; |
1501 | struct mem_cgroup *mem = NULL; | 1849 | struct mem_cgroup *mem = NULL; |
1502 | struct mem_cgroup_per_zone *mz; | 1850 | struct mem_cgroup_per_zone *mz; |
1851 | bool soft_limit_excess = false; | ||
1503 | 1852 | ||
1504 | if (mem_cgroup_disabled()) | 1853 | if (mem_cgroup_disabled()) |
1505 | return NULL; | 1854 | return NULL; |
@@ -1538,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1538 | break; | 1887 | break; |
1539 | } | 1888 | } |
1540 | 1889 | ||
1541 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 1890 | if (!mem_cgroup_is_root(mem)) { |
1542 | if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | 1891 | res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess); |
1543 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | 1892 | if (do_swap_account && |
1893 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | ||
1894 | res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL); | ||
1895 | } | ||
1896 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | ||
1897 | mem_cgroup_swap_statistics(mem, true); | ||
1544 | mem_cgroup_charge_statistics(mem, pc, false); | 1898 | mem_cgroup_charge_statistics(mem, pc, false); |
1545 | 1899 | ||
1546 | ClearPageCgroupUsed(pc); | 1900 | ClearPageCgroupUsed(pc); |
@@ -1554,6 +1908,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1554 | mz = page_cgroup_zoneinfo(pc); | 1908 | mz = page_cgroup_zoneinfo(pc); |
1555 | unlock_page_cgroup(pc); | 1909 | unlock_page_cgroup(pc); |
1556 | 1910 | ||
1911 | if (soft_limit_excess && mem_cgroup_soft_limit_check(mem)) | ||
1912 | mem_cgroup_update_tree(mem, page); | ||
1557 | /* at swapout, this memcg will be accessed to record to swap */ | 1913 | /* at swapout, this memcg will be accessed to record to swap */ |
1558 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 1914 | if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
1559 | css_put(&mem->css); | 1915 | css_put(&mem->css); |
@@ -1629,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
1629 | * We uncharge this because swap is freed. | 1985 | * We uncharge this because swap is freed. |
1630 | * This memcg can be obsolete one. We avoid calling css_tryget | 1986 | * This memcg can be obsolete one. We avoid calling css_tryget |
1631 | */ | 1987 | */ |
1632 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | 1988 | if (!mem_cgroup_is_root(memcg)) |
1989 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL); | ||
1990 | mem_cgroup_swap_statistics(memcg, false); | ||
1633 | mem_cgroup_put(memcg); | 1991 | mem_cgroup_put(memcg); |
1634 | } | 1992 | } |
1635 | rcu_read_unlock(); | 1993 | rcu_read_unlock(); |
@@ -1658,7 +2016,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) | |||
1658 | unlock_page_cgroup(pc); | 2016 | unlock_page_cgroup(pc); |
1659 | 2017 | ||
1660 | if (mem) { | 2018 | if (mem) { |
1661 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); | 2019 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, |
2020 | page); | ||
1662 | css_put(&mem->css); | 2021 | css_put(&mem->css); |
1663 | } | 2022 | } |
1664 | *ptr = mem; | 2023 | *ptr = mem; |
@@ -1798,8 +2157,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
1798 | if (!ret) | 2157 | if (!ret) |
1799 | break; | 2158 | break; |
1800 | 2159 | ||
1801 | progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, | 2160 | progress = mem_cgroup_hierarchical_reclaim(memcg, NULL, |
1802 | false, true); | 2161 | GFP_KERNEL, |
2162 | MEM_CGROUP_RECLAIM_SHRINK); | ||
1803 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); | 2163 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
1804 | /* Usage is reduced ? */ | 2164 | /* Usage is reduced ? */ |
1805 | if (curusage >= oldusage) | 2165 | if (curusage >= oldusage) |
@@ -1851,7 +2211,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
1851 | if (!ret) | 2211 | if (!ret) |
1852 | break; | 2212 | break; |
1853 | 2213 | ||
1854 | mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true); | 2214 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
2215 | MEM_CGROUP_RECLAIM_NOSWAP | | ||
2216 | MEM_CGROUP_RECLAIM_SHRINK); | ||
1855 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 2217 | curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
1856 | /* Usage is reduced ? */ | 2218 | /* Usage is reduced ? */ |
1857 | if (curusage >= oldusage) | 2219 | if (curusage >= oldusage) |
@@ -1862,6 +2224,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
1862 | return ret; | 2224 | return ret; |
1863 | } | 2225 | } |
1864 | 2226 | ||
2227 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | ||
2228 | gfp_t gfp_mask, int nid, | ||
2229 | int zid) | ||
2230 | { | ||
2231 | unsigned long nr_reclaimed = 0; | ||
2232 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; | ||
2233 | unsigned long reclaimed; | ||
2234 | int loop = 0; | ||
2235 | struct mem_cgroup_tree_per_zone *mctz; | ||
2236 | |||
2237 | if (order > 0) | ||
2238 | return 0; | ||
2239 | |||
2240 | mctz = soft_limit_tree_node_zone(nid, zid); | ||
2241 | /* | ||
2242 | * This loop can run a while, specially if mem_cgroup's continuously | ||
2243 | * keep exceeding their soft limit and putting the system under | ||
2244 | * pressure | ||
2245 | */ | ||
2246 | do { | ||
2247 | if (next_mz) | ||
2248 | mz = next_mz; | ||
2249 | else | ||
2250 | mz = mem_cgroup_largest_soft_limit_node(mctz); | ||
2251 | if (!mz) | ||
2252 | break; | ||
2253 | |||
2254 | reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone, | ||
2255 | gfp_mask, | ||
2256 | MEM_CGROUP_RECLAIM_SOFT); | ||
2257 | nr_reclaimed += reclaimed; | ||
2258 | spin_lock(&mctz->lock); | ||
2259 | |||
2260 | /* | ||
2261 | * If we failed to reclaim anything from this memory cgroup | ||
2262 | * it is time to move on to the next cgroup | ||
2263 | */ | ||
2264 | next_mz = NULL; | ||
2265 | if (!reclaimed) { | ||
2266 | do { | ||
2267 | /* | ||
2268 | * Loop until we find yet another one. | ||
2269 | * | ||
2270 | * By the time we get the soft_limit lock | ||
2271 | * again, someone might have aded the | ||
2272 | * group back on the RB tree. Iterate to | ||
2273 | * make sure we get a different mem. | ||
2274 | * mem_cgroup_largest_soft_limit_node returns | ||
2275 | * NULL if no other cgroup is present on | ||
2276 | * the tree | ||
2277 | */ | ||
2278 | next_mz = | ||
2279 | __mem_cgroup_largest_soft_limit_node(mctz); | ||
2280 | if (next_mz == mz) { | ||
2281 | css_put(&next_mz->mem->css); | ||
2282 | next_mz = NULL; | ||
2283 | } else /* next_mz == NULL or other memcg */ | ||
2284 | break; | ||
2285 | } while (1); | ||
2286 | } | ||
2287 | mz->usage_in_excess = | ||
2288 | res_counter_soft_limit_excess(&mz->mem->res); | ||
2289 | __mem_cgroup_remove_exceeded(mz->mem, mz, mctz); | ||
2290 | /* | ||
2291 | * One school of thought says that we should not add | ||
2292 | * back the node to the tree if reclaim returns 0. | ||
2293 | * But our reclaim could return 0, simply because due | ||
2294 | * to priority we are exposing a smaller subset of | ||
2295 | * memory to reclaim from. Consider this as a longer | ||
2296 | * term TODO. | ||
2297 | */ | ||
2298 | if (mz->usage_in_excess) | ||
2299 | __mem_cgroup_insert_exceeded(mz->mem, mz, mctz); | ||
2300 | spin_unlock(&mctz->lock); | ||
2301 | css_put(&mz->mem->css); | ||
2302 | loop++; | ||
2303 | /* | ||
2304 | * Could not reclaim anything and there are no more | ||
2305 | * mem cgroups to try or we seem to be looping without | ||
2306 | * reclaiming anything. | ||
2307 | */ | ||
2308 | if (!nr_reclaimed && | ||
2309 | (next_mz == NULL || | ||
2310 | loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) | ||
2311 | break; | ||
2312 | } while (!nr_reclaimed); | ||
2313 | if (next_mz) | ||
2314 | css_put(&next_mz->mem->css); | ||
2315 | return nr_reclaimed; | ||
2316 | } | ||
2317 | |||
1865 | /* | 2318 | /* |
1866 | * This routine traverse page_cgroup in given list and drop them all. | 2319 | * This routine traverse page_cgroup in given list and drop them all. |
1867 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 2320 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
@@ -2046,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
2046 | return retval; | 2499 | return retval; |
2047 | } | 2500 | } |
2048 | 2501 | ||
2502 | struct mem_cgroup_idx_data { | ||
2503 | s64 val; | ||
2504 | enum mem_cgroup_stat_index idx; | ||
2505 | }; | ||
2506 | |||
2507 | static int | ||
2508 | mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) | ||
2509 | { | ||
2510 | struct mem_cgroup_idx_data *d = data; | ||
2511 | d->val += mem_cgroup_read_stat(&mem->stat, d->idx); | ||
2512 | return 0; | ||
2513 | } | ||
2514 | |||
2515 | static void | ||
2516 | mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | ||
2517 | enum mem_cgroup_stat_index idx, s64 *val) | ||
2518 | { | ||
2519 | struct mem_cgroup_idx_data d; | ||
2520 | d.idx = idx; | ||
2521 | d.val = 0; | ||
2522 | mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); | ||
2523 | *val = d.val; | ||
2524 | } | ||
2525 | |||
2049 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 2526 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
2050 | { | 2527 | { |
2051 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 2528 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
2052 | u64 val = 0; | 2529 | u64 idx_val, val; |
2053 | int type, name; | 2530 | int type, name; |
2054 | 2531 | ||
2055 | type = MEMFILE_TYPE(cft->private); | 2532 | type = MEMFILE_TYPE(cft->private); |
2056 | name = MEMFILE_ATTR(cft->private); | 2533 | name = MEMFILE_ATTR(cft->private); |
2057 | switch (type) { | 2534 | switch (type) { |
2058 | case _MEM: | 2535 | case _MEM: |
2059 | val = res_counter_read_u64(&mem->res, name); | 2536 | if (name == RES_USAGE && mem_cgroup_is_root(mem)) { |
2537 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2538 | MEM_CGROUP_STAT_CACHE, &idx_val); | ||
2539 | val = idx_val; | ||
2540 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2541 | MEM_CGROUP_STAT_RSS, &idx_val); | ||
2542 | val += idx_val; | ||
2543 | val <<= PAGE_SHIFT; | ||
2544 | } else | ||
2545 | val = res_counter_read_u64(&mem->res, name); | ||
2060 | break; | 2546 | break; |
2061 | case _MEMSWAP: | 2547 | case _MEMSWAP: |
2062 | val = res_counter_read_u64(&mem->memsw, name); | 2548 | if (name == RES_USAGE && mem_cgroup_is_root(mem)) { |
2549 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2550 | MEM_CGROUP_STAT_CACHE, &idx_val); | ||
2551 | val = idx_val; | ||
2552 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2553 | MEM_CGROUP_STAT_RSS, &idx_val); | ||
2554 | val += idx_val; | ||
2555 | mem_cgroup_get_recursive_idx_stat(mem, | ||
2556 | MEM_CGROUP_STAT_SWAPOUT, &idx_val); | ||
2557 | val <<= PAGE_SHIFT; | ||
2558 | } else | ||
2559 | val = res_counter_read_u64(&mem->memsw, name); | ||
2063 | break; | 2560 | break; |
2064 | default: | 2561 | default: |
2065 | BUG(); | 2562 | BUG(); |
@@ -2083,6 +2580,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |||
2083 | name = MEMFILE_ATTR(cft->private); | 2580 | name = MEMFILE_ATTR(cft->private); |
2084 | switch (name) { | 2581 | switch (name) { |
2085 | case RES_LIMIT: | 2582 | case RES_LIMIT: |
2583 | if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ | ||
2584 | ret = -EINVAL; | ||
2585 | break; | ||
2586 | } | ||
2086 | /* This function does all necessary parse...reuse it */ | 2587 | /* This function does all necessary parse...reuse it */ |
2087 | ret = res_counter_memparse_write_strategy(buffer, &val); | 2588 | ret = res_counter_memparse_write_strategy(buffer, &val); |
2088 | if (ret) | 2589 | if (ret) |
@@ -2092,6 +2593,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |||
2092 | else | 2593 | else |
2093 | ret = mem_cgroup_resize_memsw_limit(memcg, val); | 2594 | ret = mem_cgroup_resize_memsw_limit(memcg, val); |
2094 | break; | 2595 | break; |
2596 | case RES_SOFT_LIMIT: | ||
2597 | ret = res_counter_memparse_write_strategy(buffer, &val); | ||
2598 | if (ret) | ||
2599 | break; | ||
2600 | /* | ||
2601 | * For memsw, soft limits are hard to implement in terms | ||
2602 | * of semantics, for now, we support soft limits for | ||
2603 | * control without swap | ||
2604 | */ | ||
2605 | if (type == _MEM) | ||
2606 | ret = res_counter_set_soft_limit(&memcg->res, val); | ||
2607 | else | ||
2608 | ret = -EINVAL; | ||
2609 | break; | ||
2095 | default: | 2610 | default: |
2096 | ret = -EINVAL; /* should be BUG() ? */ | 2611 | ret = -EINVAL; /* should be BUG() ? */ |
2097 | break; | 2612 | break; |
@@ -2149,6 +2664,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | |||
2149 | res_counter_reset_failcnt(&mem->memsw); | 2664 | res_counter_reset_failcnt(&mem->memsw); |
2150 | break; | 2665 | break; |
2151 | } | 2666 | } |
2667 | |||
2152 | return 0; | 2668 | return 0; |
2153 | } | 2669 | } |
2154 | 2670 | ||
@@ -2160,6 +2676,7 @@ enum { | |||
2160 | MCS_MAPPED_FILE, | 2676 | MCS_MAPPED_FILE, |
2161 | MCS_PGPGIN, | 2677 | MCS_PGPGIN, |
2162 | MCS_PGPGOUT, | 2678 | MCS_PGPGOUT, |
2679 | MCS_SWAP, | ||
2163 | MCS_INACTIVE_ANON, | 2680 | MCS_INACTIVE_ANON, |
2164 | MCS_ACTIVE_ANON, | 2681 | MCS_ACTIVE_ANON, |
2165 | MCS_INACTIVE_FILE, | 2682 | MCS_INACTIVE_FILE, |
@@ -2181,6 +2698,7 @@ struct { | |||
2181 | {"mapped_file", "total_mapped_file"}, | 2698 | {"mapped_file", "total_mapped_file"}, |
2182 | {"pgpgin", "total_pgpgin"}, | 2699 | {"pgpgin", "total_pgpgin"}, |
2183 | {"pgpgout", "total_pgpgout"}, | 2700 | {"pgpgout", "total_pgpgout"}, |
2701 | {"swap", "total_swap"}, | ||
2184 | {"inactive_anon", "total_inactive_anon"}, | 2702 | {"inactive_anon", "total_inactive_anon"}, |
2185 | {"active_anon", "total_active_anon"}, | 2703 | {"active_anon", "total_active_anon"}, |
2186 | {"inactive_file", "total_inactive_file"}, | 2704 | {"inactive_file", "total_inactive_file"}, |
@@ -2205,6 +2723,10 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |||
2205 | s->stat[MCS_PGPGIN] += val; | 2723 | s->stat[MCS_PGPGIN] += val; |
2206 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); | 2724 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); |
2207 | s->stat[MCS_PGPGOUT] += val; | 2725 | s->stat[MCS_PGPGOUT] += val; |
2726 | if (do_swap_account) { | ||
2727 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT); | ||
2728 | s->stat[MCS_SWAP] += val * PAGE_SIZE; | ||
2729 | } | ||
2208 | 2730 | ||
2209 | /* per zone stat */ | 2731 | /* per zone stat */ |
2210 | val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); | 2732 | val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON); |
@@ -2236,8 +2758,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
2236 | memset(&mystat, 0, sizeof(mystat)); | 2758 | memset(&mystat, 0, sizeof(mystat)); |
2237 | mem_cgroup_get_local_stat(mem_cont, &mystat); | 2759 | mem_cgroup_get_local_stat(mem_cont, &mystat); |
2238 | 2760 | ||
2239 | for (i = 0; i < NR_MCS_STAT; i++) | 2761 | for (i = 0; i < NR_MCS_STAT; i++) { |
2762 | if (i == MCS_SWAP && !do_swap_account) | ||
2763 | continue; | ||
2240 | cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); | 2764 | cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); |
2765 | } | ||
2241 | 2766 | ||
2242 | /* Hierarchical information */ | 2767 | /* Hierarchical information */ |
2243 | { | 2768 | { |
@@ -2250,9 +2775,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
2250 | 2775 | ||
2251 | memset(&mystat, 0, sizeof(mystat)); | 2776 | memset(&mystat, 0, sizeof(mystat)); |
2252 | mem_cgroup_get_total_stat(mem_cont, &mystat); | 2777 | mem_cgroup_get_total_stat(mem_cont, &mystat); |
2253 | for (i = 0; i < NR_MCS_STAT; i++) | 2778 | for (i = 0; i < NR_MCS_STAT; i++) { |
2779 | if (i == MCS_SWAP && !do_swap_account) | ||
2780 | continue; | ||
2254 | cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); | 2781 | cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); |
2255 | 2782 | } | |
2256 | 2783 | ||
2257 | #ifdef CONFIG_DEBUG_VM | 2784 | #ifdef CONFIG_DEBUG_VM |
2258 | cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); | 2785 | cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); |
@@ -2345,6 +2872,12 @@ static struct cftype mem_cgroup_files[] = { | |||
2345 | .read_u64 = mem_cgroup_read, | 2872 | .read_u64 = mem_cgroup_read, |
2346 | }, | 2873 | }, |
2347 | { | 2874 | { |
2875 | .name = "soft_limit_in_bytes", | ||
2876 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), | ||
2877 | .write_string = mem_cgroup_write, | ||
2878 | .read_u64 = mem_cgroup_read, | ||
2879 | }, | ||
2880 | { | ||
2348 | .name = "failcnt", | 2881 | .name = "failcnt", |
2349 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), | 2882 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), |
2350 | .trigger = mem_cgroup_reset, | 2883 | .trigger = mem_cgroup_reset, |
@@ -2438,6 +2971,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
2438 | mz = &pn->zoneinfo[zone]; | 2971 | mz = &pn->zoneinfo[zone]; |
2439 | for_each_lru(l) | 2972 | for_each_lru(l) |
2440 | INIT_LIST_HEAD(&mz->lists[l]); | 2973 | INIT_LIST_HEAD(&mz->lists[l]); |
2974 | mz->usage_in_excess = 0; | ||
2975 | mz->on_tree = false; | ||
2976 | mz->mem = mem; | ||
2441 | } | 2977 | } |
2442 | return 0; | 2978 | return 0; |
2443 | } | 2979 | } |
@@ -2483,6 +3019,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem) | |||
2483 | { | 3019 | { |
2484 | int node; | 3020 | int node; |
2485 | 3021 | ||
3022 | mem_cgroup_remove_from_trees(mem); | ||
2486 | free_css_id(&mem_cgroup_subsys, &mem->css); | 3023 | free_css_id(&mem_cgroup_subsys, &mem->css); |
2487 | 3024 | ||
2488 | for_each_node_state(node, N_POSSIBLE) | 3025 | for_each_node_state(node, N_POSSIBLE) |
@@ -2531,6 +3068,31 @@ static void __init enable_swap_cgroup(void) | |||
2531 | } | 3068 | } |
2532 | #endif | 3069 | #endif |
2533 | 3070 | ||
3071 | static int mem_cgroup_soft_limit_tree_init(void) | ||
3072 | { | ||
3073 | struct mem_cgroup_tree_per_node *rtpn; | ||
3074 | struct mem_cgroup_tree_per_zone *rtpz; | ||
3075 | int tmp, node, zone; | ||
3076 | |||
3077 | for_each_node_state(node, N_POSSIBLE) { | ||
3078 | tmp = node; | ||
3079 | if (!node_state(node, N_NORMAL_MEMORY)) | ||
3080 | tmp = -1; | ||
3081 | rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); | ||
3082 | if (!rtpn) | ||
3083 | return 1; | ||
3084 | |||
3085 | soft_limit_tree.rb_tree_per_node[node] = rtpn; | ||
3086 | |||
3087 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | ||
3088 | rtpz = &rtpn->rb_tree_per_zone[zone]; | ||
3089 | rtpz->rb_root = RB_ROOT; | ||
3090 | spin_lock_init(&rtpz->lock); | ||
3091 | } | ||
3092 | } | ||
3093 | return 0; | ||
3094 | } | ||
3095 | |||
2534 | static struct cgroup_subsys_state * __ref | 3096 | static struct cgroup_subsys_state * __ref |
2535 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 3097 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
2536 | { | 3098 | { |
@@ -2545,10 +3107,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
2545 | for_each_node_state(node, N_POSSIBLE) | 3107 | for_each_node_state(node, N_POSSIBLE) |
2546 | if (alloc_mem_cgroup_per_zone_info(mem, node)) | 3108 | if (alloc_mem_cgroup_per_zone_info(mem, node)) |
2547 | goto free_out; | 3109 | goto free_out; |
3110 | |||
2548 | /* root ? */ | 3111 | /* root ? */ |
2549 | if (cont->parent == NULL) { | 3112 | if (cont->parent == NULL) { |
2550 | enable_swap_cgroup(); | 3113 | enable_swap_cgroup(); |
2551 | parent = NULL; | 3114 | parent = NULL; |
3115 | root_mem_cgroup = mem; | ||
3116 | if (mem_cgroup_soft_limit_tree_init()) | ||
3117 | goto free_out; | ||
3118 | |||
2552 | } else { | 3119 | } else { |
2553 | parent = mem_cgroup_from_cont(cont->parent); | 3120 | parent = mem_cgroup_from_cont(cont->parent); |
2554 | mem->use_hierarchy = parent->use_hierarchy; | 3121 | mem->use_hierarchy = parent->use_hierarchy; |
@@ -2577,6 +3144,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
2577 | return &mem->css; | 3144 | return &mem->css; |
2578 | free_out: | 3145 | free_out: |
2579 | __mem_cgroup_free(mem); | 3146 | __mem_cgroup_free(mem); |
3147 | root_mem_cgroup = NULL; | ||
2580 | return ERR_PTR(error); | 3148 | return ERR_PTR(error); |
2581 | } | 3149 | } |
2582 | 3150 | ||
@@ -2612,7 +3180,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss, | |||
2612 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 3180 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
2613 | struct cgroup *cont, | 3181 | struct cgroup *cont, |
2614 | struct cgroup *old_cont, | 3182 | struct cgroup *old_cont, |
2615 | struct task_struct *p) | 3183 | struct task_struct *p, |
3184 | bool threadgroup) | ||
2616 | { | 3185 | { |
2617 | mutex_lock(&memcg_tasklist); | 3186 | mutex_lock(&memcg_tasklist); |
2618 | /* | 3187 | /* |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c new file mode 100644 index 000000000000..729d4b15b645 --- /dev/null +++ b/mm/memory-failure.c | |||
@@ -0,0 +1,832 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, 2009 Intel Corporation | ||
3 | * Authors: Andi Kleen, Fengguang Wu | ||
4 | * | ||
5 | * This software may be redistributed and/or modified under the terms of | ||
6 | * the GNU General Public License ("GPL") version 2 only as published by the | ||
7 | * Free Software Foundation. | ||
8 | * | ||
9 | * High level machine check handler. Handles pages reported by the | ||
10 | * hardware as being corrupted usually due to a 2bit ECC memory or cache | ||
11 | * failure. | ||
12 | * | ||
13 | * Handles page cache pages in various states. The tricky part | ||
14 | * here is that we can access any page asynchronous to other VM | ||
15 | * users, because memory failures could happen anytime and anywhere, | ||
16 | * possibly violating some of their assumptions. This is why this code | ||
17 | * has to be extremely careful. Generally it tries to use normal locking | ||
18 | * rules, as in get the standard locks, even if that means the | ||
19 | * error handling takes potentially a long time. | ||
20 | * | ||
21 | * The operation to map back from RMAP chains to processes has to walk | ||
22 | * the complete process list and has non linear complexity with the number | ||
23 | * mappings. In short it can be quite slow. But since memory corruptions | ||
24 | * are rare we hope to get away with this. | ||
25 | */ | ||
26 | |||
27 | /* | ||
28 | * Notebook: | ||
29 | * - hugetlb needs more code | ||
30 | * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages | ||
31 | * - pass bad pages to kdump next kernel | ||
32 | */ | ||
33 | #define DEBUG 1 /* remove me in 2.6.34 */ | ||
34 | #include <linux/kernel.h> | ||
35 | #include <linux/mm.h> | ||
36 | #include <linux/page-flags.h> | ||
37 | #include <linux/sched.h> | ||
38 | #include <linux/rmap.h> | ||
39 | #include <linux/pagemap.h> | ||
40 | #include <linux/swap.h> | ||
41 | #include <linux/backing-dev.h> | ||
42 | #include "internal.h" | ||
43 | |||
44 | int sysctl_memory_failure_early_kill __read_mostly = 0; | ||
45 | |||
46 | int sysctl_memory_failure_recovery __read_mostly = 1; | ||
47 | |||
48 | atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0); | ||
49 | |||
50 | /* | ||
51 | * Send all the processes who have the page mapped an ``action optional'' | ||
52 | * signal. | ||
53 | */ | ||
54 | static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, | ||
55 | unsigned long pfn) | ||
56 | { | ||
57 | struct siginfo si; | ||
58 | int ret; | ||
59 | |||
60 | printk(KERN_ERR | ||
61 | "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", | ||
62 | pfn, t->comm, t->pid); | ||
63 | si.si_signo = SIGBUS; | ||
64 | si.si_errno = 0; | ||
65 | si.si_code = BUS_MCEERR_AO; | ||
66 | si.si_addr = (void *)addr; | ||
67 | #ifdef __ARCH_SI_TRAPNO | ||
68 | si.si_trapno = trapno; | ||
69 | #endif | ||
70 | si.si_addr_lsb = PAGE_SHIFT; | ||
71 | /* | ||
72 | * Don't use force here, it's convenient if the signal | ||
73 | * can be temporarily blocked. | ||
74 | * This could cause a loop when the user sets SIGBUS | ||
75 | * to SIG_IGN, but hopefully noone will do that? | ||
76 | */ | ||
77 | ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ | ||
78 | if (ret < 0) | ||
79 | printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", | ||
80 | t->comm, t->pid, ret); | ||
81 | return ret; | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * Kill all processes that have a poisoned page mapped and then isolate | ||
86 | * the page. | ||
87 | * | ||
88 | * General strategy: | ||
89 | * Find all processes having the page mapped and kill them. | ||
90 | * But we keep a page reference around so that the page is not | ||
91 | * actually freed yet. | ||
92 | * Then stash the page away | ||
93 | * | ||
94 | * There's no convenient way to get back to mapped processes | ||
95 | * from the VMAs. So do a brute-force search over all | ||
96 | * running processes. | ||
97 | * | ||
98 | * Remember that machine checks are not common (or rather | ||
99 | * if they are common you have other problems), so this shouldn't | ||
100 | * be a performance issue. | ||
101 | * | ||
102 | * Also there are some races possible while we get from the | ||
103 | * error detection to actually handle it. | ||
104 | */ | ||
105 | |||
106 | struct to_kill { | ||
107 | struct list_head nd; | ||
108 | struct task_struct *tsk; | ||
109 | unsigned long addr; | ||
110 | unsigned addr_valid:1; | ||
111 | }; | ||
112 | |||
113 | /* | ||
114 | * Failure handling: if we can't find or can't kill a process there's | ||
115 | * not much we can do. We just print a message and ignore otherwise. | ||
116 | */ | ||
117 | |||
118 | /* | ||
119 | * Schedule a process for later kill. | ||
120 | * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM. | ||
121 | * TBD would GFP_NOIO be enough? | ||
122 | */ | ||
123 | static void add_to_kill(struct task_struct *tsk, struct page *p, | ||
124 | struct vm_area_struct *vma, | ||
125 | struct list_head *to_kill, | ||
126 | struct to_kill **tkc) | ||
127 | { | ||
128 | struct to_kill *tk; | ||
129 | |||
130 | if (*tkc) { | ||
131 | tk = *tkc; | ||
132 | *tkc = NULL; | ||
133 | } else { | ||
134 | tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); | ||
135 | if (!tk) { | ||
136 | printk(KERN_ERR | ||
137 | "MCE: Out of memory while machine check handling\n"); | ||
138 | return; | ||
139 | } | ||
140 | } | ||
141 | tk->addr = page_address_in_vma(p, vma); | ||
142 | tk->addr_valid = 1; | ||
143 | |||
144 | /* | ||
145 | * In theory we don't have to kill when the page was | ||
146 | * munmaped. But it could be also a mremap. Since that's | ||
147 | * likely very rare kill anyways just out of paranoia, but use | ||
148 | * a SIGKILL because the error is not contained anymore. | ||
149 | */ | ||
150 | if (tk->addr == -EFAULT) { | ||
151 | pr_debug("MCE: Unable to find user space address %lx in %s\n", | ||
152 | page_to_pfn(p), tsk->comm); | ||
153 | tk->addr_valid = 0; | ||
154 | } | ||
155 | get_task_struct(tsk); | ||
156 | tk->tsk = tsk; | ||
157 | list_add_tail(&tk->nd, to_kill); | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Kill the processes that have been collected earlier. | ||
162 | * | ||
163 | * Only do anything when DOIT is set, otherwise just free the list | ||
164 | * (this is used for clean pages which do not need killing) | ||
165 | * Also when FAIL is set do a force kill because something went | ||
166 | * wrong earlier. | ||
167 | */ | ||
168 | static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | ||
169 | int fail, unsigned long pfn) | ||
170 | { | ||
171 | struct to_kill *tk, *next; | ||
172 | |||
173 | list_for_each_entry_safe (tk, next, to_kill, nd) { | ||
174 | if (doit) { | ||
175 | /* | ||
176 | * In case something went wrong with munmaping | ||
177 | * make sure the process doesn't catch the | ||
178 | * signal and then access the memory. Just kill it. | ||
179 | * the signal handlers | ||
180 | */ | ||
181 | if (fail || tk->addr_valid == 0) { | ||
182 | printk(KERN_ERR | ||
183 | "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n", | ||
184 | pfn, tk->tsk->comm, tk->tsk->pid); | ||
185 | force_sig(SIGKILL, tk->tsk); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * In theory the process could have mapped | ||
190 | * something else on the address in-between. We could | ||
191 | * check for that, but we need to tell the | ||
192 | * process anyways. | ||
193 | */ | ||
194 | else if (kill_proc_ao(tk->tsk, tk->addr, trapno, | ||
195 | pfn) < 0) | ||
196 | printk(KERN_ERR | ||
197 | "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", | ||
198 | pfn, tk->tsk->comm, tk->tsk->pid); | ||
199 | } | ||
200 | put_task_struct(tk->tsk); | ||
201 | kfree(tk); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | static int task_early_kill(struct task_struct *tsk) | ||
206 | { | ||
207 | if (!tsk->mm) | ||
208 | return 0; | ||
209 | if (tsk->flags & PF_MCE_PROCESS) | ||
210 | return !!(tsk->flags & PF_MCE_EARLY); | ||
211 | return sysctl_memory_failure_early_kill; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * Collect processes when the error hit an anonymous page. | ||
216 | */ | ||
217 | static void collect_procs_anon(struct page *page, struct list_head *to_kill, | ||
218 | struct to_kill **tkc) | ||
219 | { | ||
220 | struct vm_area_struct *vma; | ||
221 | struct task_struct *tsk; | ||
222 | struct anon_vma *av; | ||
223 | |||
224 | read_lock(&tasklist_lock); | ||
225 | av = page_lock_anon_vma(page); | ||
226 | if (av == NULL) /* Not actually mapped anymore */ | ||
227 | goto out; | ||
228 | for_each_process (tsk) { | ||
229 | if (!task_early_kill(tsk)) | ||
230 | continue; | ||
231 | list_for_each_entry (vma, &av->head, anon_vma_node) { | ||
232 | if (!page_mapped_in_vma(page, vma)) | ||
233 | continue; | ||
234 | if (vma->vm_mm == tsk->mm) | ||
235 | add_to_kill(tsk, page, vma, to_kill, tkc); | ||
236 | } | ||
237 | } | ||
238 | page_unlock_anon_vma(av); | ||
239 | out: | ||
240 | read_unlock(&tasklist_lock); | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * Collect processes when the error hit a file mapped page. | ||
245 | */ | ||
246 | static void collect_procs_file(struct page *page, struct list_head *to_kill, | ||
247 | struct to_kill **tkc) | ||
248 | { | ||
249 | struct vm_area_struct *vma; | ||
250 | struct task_struct *tsk; | ||
251 | struct prio_tree_iter iter; | ||
252 | struct address_space *mapping = page->mapping; | ||
253 | |||
254 | /* | ||
255 | * A note on the locking order between the two locks. | ||
256 | * We don't rely on this particular order. | ||
257 | * If you have some other code that needs a different order | ||
258 | * feel free to switch them around. Or add a reverse link | ||
259 | * from mm_struct to task_struct, then this could be all | ||
260 | * done without taking tasklist_lock and looping over all tasks. | ||
261 | */ | ||
262 | |||
263 | read_lock(&tasklist_lock); | ||
264 | spin_lock(&mapping->i_mmap_lock); | ||
265 | for_each_process(tsk) { | ||
266 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
267 | |||
268 | if (!task_early_kill(tsk)) | ||
269 | continue; | ||
270 | |||
271 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, | ||
272 | pgoff) { | ||
273 | /* | ||
274 | * Send early kill signal to tasks where a vma covers | ||
275 | * the page but the corrupted page is not necessarily | ||
276 | * mapped it in its pte. | ||
277 | * Assume applications who requested early kill want | ||
278 | * to be informed of all such data corruptions. | ||
279 | */ | ||
280 | if (vma->vm_mm == tsk->mm) | ||
281 | add_to_kill(tsk, page, vma, to_kill, tkc); | ||
282 | } | ||
283 | } | ||
284 | spin_unlock(&mapping->i_mmap_lock); | ||
285 | read_unlock(&tasklist_lock); | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Collect the processes who have the corrupted page mapped to kill. | ||
290 | * This is done in two steps for locking reasons. | ||
291 | * First preallocate one tokill structure outside the spin locks, | ||
292 | * so that we can kill at least one process reasonably reliable. | ||
293 | */ | ||
294 | static void collect_procs(struct page *page, struct list_head *tokill) | ||
295 | { | ||
296 | struct to_kill *tk; | ||
297 | |||
298 | if (!page->mapping) | ||
299 | return; | ||
300 | |||
301 | tk = kmalloc(sizeof(struct to_kill), GFP_NOIO); | ||
302 | if (!tk) | ||
303 | return; | ||
304 | if (PageAnon(page)) | ||
305 | collect_procs_anon(page, tokill, &tk); | ||
306 | else | ||
307 | collect_procs_file(page, tokill, &tk); | ||
308 | kfree(tk); | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Error handlers for various types of pages. | ||
313 | */ | ||
314 | |||
315 | enum outcome { | ||
316 | FAILED, /* Error handling failed */ | ||
317 | DELAYED, /* Will be handled later */ | ||
318 | IGNORED, /* Error safely ignored */ | ||
319 | RECOVERED, /* Successfully recovered */ | ||
320 | }; | ||
321 | |||
322 | static const char *action_name[] = { | ||
323 | [FAILED] = "Failed", | ||
324 | [DELAYED] = "Delayed", | ||
325 | [IGNORED] = "Ignored", | ||
326 | [RECOVERED] = "Recovered", | ||
327 | }; | ||
328 | |||
329 | /* | ||
330 | * Error hit kernel page. | ||
331 | * Do nothing, try to be lucky and not touch this instead. For a few cases we | ||
332 | * could be more sophisticated. | ||
333 | */ | ||
334 | static int me_kernel(struct page *p, unsigned long pfn) | ||
335 | { | ||
336 | return DELAYED; | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * Already poisoned page. | ||
341 | */ | ||
342 | static int me_ignore(struct page *p, unsigned long pfn) | ||
343 | { | ||
344 | return IGNORED; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * Page in unknown state. Do nothing. | ||
349 | */ | ||
350 | static int me_unknown(struct page *p, unsigned long pfn) | ||
351 | { | ||
352 | printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn); | ||
353 | return FAILED; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Free memory | ||
358 | */ | ||
359 | static int me_free(struct page *p, unsigned long pfn) | ||
360 | { | ||
361 | return DELAYED; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Clean (or cleaned) page cache page. | ||
366 | */ | ||
367 | static int me_pagecache_clean(struct page *p, unsigned long pfn) | ||
368 | { | ||
369 | int err; | ||
370 | int ret = FAILED; | ||
371 | struct address_space *mapping; | ||
372 | |||
373 | if (!isolate_lru_page(p)) | ||
374 | page_cache_release(p); | ||
375 | |||
376 | /* | ||
377 | * For anonymous pages we're done the only reference left | ||
378 | * should be the one m_f() holds. | ||
379 | */ | ||
380 | if (PageAnon(p)) | ||
381 | return RECOVERED; | ||
382 | |||
383 | /* | ||
384 | * Now truncate the page in the page cache. This is really | ||
385 | * more like a "temporary hole punch" | ||
386 | * Don't do this for block devices when someone else | ||
387 | * has a reference, because it could be file system metadata | ||
388 | * and that's not safe to truncate. | ||
389 | */ | ||
390 | mapping = page_mapping(p); | ||
391 | if (!mapping) { | ||
392 | /* | ||
393 | * Page has been teared down in the meanwhile | ||
394 | */ | ||
395 | return FAILED; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Truncation is a bit tricky. Enable it per file system for now. | ||
400 | * | ||
401 | * Open: to take i_mutex or not for this? Right now we don't. | ||
402 | */ | ||
403 | if (mapping->a_ops->error_remove_page) { | ||
404 | err = mapping->a_ops->error_remove_page(mapping, p); | ||
405 | if (err != 0) { | ||
406 | printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n", | ||
407 | pfn, err); | ||
408 | } else if (page_has_private(p) && | ||
409 | !try_to_release_page(p, GFP_NOIO)) { | ||
410 | pr_debug("MCE %#lx: failed to release buffers\n", pfn); | ||
411 | } else { | ||
412 | ret = RECOVERED; | ||
413 | } | ||
414 | } else { | ||
415 | /* | ||
416 | * If the file system doesn't support it just invalidate | ||
417 | * This fails on dirty or anything with private pages | ||
418 | */ | ||
419 | if (invalidate_inode_page(p)) | ||
420 | ret = RECOVERED; | ||
421 | else | ||
422 | printk(KERN_INFO "MCE %#lx: Failed to invalidate\n", | ||
423 | pfn); | ||
424 | } | ||
425 | return ret; | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Dirty cache page page | ||
430 | * Issues: when the error hit a hole page the error is not properly | ||
431 | * propagated. | ||
432 | */ | ||
433 | static int me_pagecache_dirty(struct page *p, unsigned long pfn) | ||
434 | { | ||
435 | struct address_space *mapping = page_mapping(p); | ||
436 | |||
437 | SetPageError(p); | ||
438 | /* TBD: print more information about the file. */ | ||
439 | if (mapping) { | ||
440 | /* | ||
441 | * IO error will be reported by write(), fsync(), etc. | ||
442 | * who check the mapping. | ||
443 | * This way the application knows that something went | ||
444 | * wrong with its dirty file data. | ||
445 | * | ||
446 | * There's one open issue: | ||
447 | * | ||
448 | * The EIO will be only reported on the next IO | ||
449 | * operation and then cleared through the IO map. | ||
450 | * Normally Linux has two mechanisms to pass IO error | ||
451 | * first through the AS_EIO flag in the address space | ||
452 | * and then through the PageError flag in the page. | ||
453 | * Since we drop pages on memory failure handling the | ||
454 | * only mechanism open to use is through AS_AIO. | ||
455 | * | ||
456 | * This has the disadvantage that it gets cleared on | ||
457 | * the first operation that returns an error, while | ||
458 | * the PageError bit is more sticky and only cleared | ||
459 | * when the page is reread or dropped. If an | ||
460 | * application assumes it will always get error on | ||
461 | * fsync, but does other operations on the fd before | ||
462 | * and the page is dropped inbetween then the error | ||
463 | * will not be properly reported. | ||
464 | * | ||
465 | * This can already happen even without hwpoisoned | ||
466 | * pages: first on metadata IO errors (which only | ||
467 | * report through AS_EIO) or when the page is dropped | ||
468 | * at the wrong time. | ||
469 | * | ||
470 | * So right now we assume that the application DTRT on | ||
471 | * the first EIO, but we're not worse than other parts | ||
472 | * of the kernel. | ||
473 | */ | ||
474 | mapping_set_error(mapping, EIO); | ||
475 | } | ||
476 | |||
477 | return me_pagecache_clean(p, pfn); | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * Clean and dirty swap cache. | ||
482 | * | ||
483 | * Dirty swap cache page is tricky to handle. The page could live both in page | ||
484 | * cache and swap cache(ie. page is freshly swapped in). So it could be | ||
485 | * referenced concurrently by 2 types of PTEs: | ||
486 | * normal PTEs and swap PTEs. We try to handle them consistently by calling | ||
487 | * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs, | ||
488 | * and then | ||
489 | * - clear dirty bit to prevent IO | ||
490 | * - remove from LRU | ||
491 | * - but keep in the swap cache, so that when we return to it on | ||
492 | * a later page fault, we know the application is accessing | ||
493 | * corrupted data and shall be killed (we installed simple | ||
494 | * interception code in do_swap_page to catch it). | ||
495 | * | ||
496 | * Clean swap cache pages can be directly isolated. A later page fault will | ||
497 | * bring in the known good data from disk. | ||
498 | */ | ||
499 | static int me_swapcache_dirty(struct page *p, unsigned long pfn) | ||
500 | { | ||
501 | int ret = FAILED; | ||
502 | |||
503 | ClearPageDirty(p); | ||
504 | /* Trigger EIO in shmem: */ | ||
505 | ClearPageUptodate(p); | ||
506 | |||
507 | if (!isolate_lru_page(p)) { | ||
508 | page_cache_release(p); | ||
509 | ret = DELAYED; | ||
510 | } | ||
511 | |||
512 | return ret; | ||
513 | } | ||
514 | |||
515 | static int me_swapcache_clean(struct page *p, unsigned long pfn) | ||
516 | { | ||
517 | int ret = FAILED; | ||
518 | |||
519 | if (!isolate_lru_page(p)) { | ||
520 | page_cache_release(p); | ||
521 | ret = RECOVERED; | ||
522 | } | ||
523 | delete_from_swap_cache(p); | ||
524 | return ret; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * Huge pages. Needs work. | ||
529 | * Issues: | ||
530 | * No rmap support so we cannot find the original mapper. In theory could walk | ||
531 | * all MMs and look for the mappings, but that would be non atomic and racy. | ||
532 | * Need rmap for hugepages for this. Alternatively we could employ a heuristic, | ||
533 | * like just walking the current process and hoping it has it mapped (that | ||
534 | * should be usually true for the common "shared database cache" case) | ||
535 | * Should handle free huge pages and dequeue them too, but this needs to | ||
536 | * handle huge page accounting correctly. | ||
537 | */ | ||
538 | static int me_huge_page(struct page *p, unsigned long pfn) | ||
539 | { | ||
540 | return FAILED; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * Various page states we can handle. | ||
545 | * | ||
546 | * A page state is defined by its current page->flags bits. | ||
547 | * The table matches them in order and calls the right handler. | ||
548 | * | ||
549 | * This is quite tricky because we can access page at any time | ||
550 | * in its live cycle, so all accesses have to be extremly careful. | ||
551 | * | ||
552 | * This is not complete. More states could be added. | ||
553 | * For any missing state don't attempt recovery. | ||
554 | */ | ||
555 | |||
556 | #define dirty (1UL << PG_dirty) | ||
557 | #define sc (1UL << PG_swapcache) | ||
558 | #define unevict (1UL << PG_unevictable) | ||
559 | #define mlock (1UL << PG_mlocked) | ||
560 | #define writeback (1UL << PG_writeback) | ||
561 | #define lru (1UL << PG_lru) | ||
562 | #define swapbacked (1UL << PG_swapbacked) | ||
563 | #define head (1UL << PG_head) | ||
564 | #define tail (1UL << PG_tail) | ||
565 | #define compound (1UL << PG_compound) | ||
566 | #define slab (1UL << PG_slab) | ||
567 | #define buddy (1UL << PG_buddy) | ||
568 | #define reserved (1UL << PG_reserved) | ||
569 | |||
570 | static struct page_state { | ||
571 | unsigned long mask; | ||
572 | unsigned long res; | ||
573 | char *msg; | ||
574 | int (*action)(struct page *p, unsigned long pfn); | ||
575 | } error_states[] = { | ||
576 | { reserved, reserved, "reserved kernel", me_ignore }, | ||
577 | { buddy, buddy, "free kernel", me_free }, | ||
578 | |||
579 | /* | ||
580 | * Could in theory check if slab page is free or if we can drop | ||
581 | * currently unused objects without touching them. But just | ||
582 | * treat it as standard kernel for now. | ||
583 | */ | ||
584 | { slab, slab, "kernel slab", me_kernel }, | ||
585 | |||
586 | #ifdef CONFIG_PAGEFLAGS_EXTENDED | ||
587 | { head, head, "huge", me_huge_page }, | ||
588 | { tail, tail, "huge", me_huge_page }, | ||
589 | #else | ||
590 | { compound, compound, "huge", me_huge_page }, | ||
591 | #endif | ||
592 | |||
593 | { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty }, | ||
594 | { sc|dirty, sc, "swapcache", me_swapcache_clean }, | ||
595 | |||
596 | { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty}, | ||
597 | { unevict, unevict, "unevictable LRU", me_pagecache_clean}, | ||
598 | |||
599 | #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT | ||
600 | { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty }, | ||
601 | { mlock, mlock, "mlocked LRU", me_pagecache_clean }, | ||
602 | #endif | ||
603 | |||
604 | { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty }, | ||
605 | { lru|dirty, lru, "clean LRU", me_pagecache_clean }, | ||
606 | { swapbacked, swapbacked, "anonymous", me_pagecache_clean }, | ||
607 | |||
608 | /* | ||
609 | * Catchall entry: must be at end. | ||
610 | */ | ||
611 | { 0, 0, "unknown page state", me_unknown }, | ||
612 | }; | ||
613 | |||
614 | #undef lru | ||
615 | |||
616 | static void action_result(unsigned long pfn, char *msg, int result) | ||
617 | { | ||
618 | struct page *page = NULL; | ||
619 | if (pfn_valid(pfn)) | ||
620 | page = pfn_to_page(pfn); | ||
621 | |||
622 | printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n", | ||
623 | pfn, | ||
624 | page && PageDirty(page) ? "dirty " : "", | ||
625 | msg, action_name[result]); | ||
626 | } | ||
627 | |||
628 | static int page_action(struct page_state *ps, struct page *p, | ||
629 | unsigned long pfn, int ref) | ||
630 | { | ||
631 | int result; | ||
632 | |||
633 | result = ps->action(p, pfn); | ||
634 | action_result(pfn, ps->msg, result); | ||
635 | if (page_count(p) != 1 + ref) | ||
636 | printk(KERN_ERR | ||
637 | "MCE %#lx: %s page still referenced by %d users\n", | ||
638 | pfn, ps->msg, page_count(p) - 1); | ||
639 | |||
640 | /* Could do more checks here if page looks ok */ | ||
641 | /* | ||
642 | * Could adjust zone counters here to correct for the missing page. | ||
643 | */ | ||
644 | |||
645 | return result == RECOVERED ? 0 : -EBUSY; | ||
646 | } | ||
647 | |||
648 | #define N_UNMAP_TRIES 5 | ||
649 | |||
650 | /* | ||
651 | * Do all that is necessary to remove user space mappings. Unmap | ||
652 | * the pages and send SIGBUS to the processes if the data was dirty. | ||
653 | */ | ||
654 | static void hwpoison_user_mappings(struct page *p, unsigned long pfn, | ||
655 | int trapno) | ||
656 | { | ||
657 | enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; | ||
658 | struct address_space *mapping; | ||
659 | LIST_HEAD(tokill); | ||
660 | int ret; | ||
661 | int i; | ||
662 | int kill = 1; | ||
663 | |||
664 | if (PageReserved(p) || PageCompound(p) || PageSlab(p)) | ||
665 | return; | ||
666 | |||
667 | if (!PageLRU(p)) | ||
668 | lru_add_drain_all(); | ||
669 | |||
670 | /* | ||
671 | * This check implies we don't kill processes if their pages | ||
672 | * are in the swap cache early. Those are always late kills. | ||
673 | */ | ||
674 | if (!page_mapped(p)) | ||
675 | return; | ||
676 | |||
677 | if (PageSwapCache(p)) { | ||
678 | printk(KERN_ERR | ||
679 | "MCE %#lx: keeping poisoned page in swap cache\n", pfn); | ||
680 | ttu |= TTU_IGNORE_HWPOISON; | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * Propagate the dirty bit from PTEs to struct page first, because we | ||
685 | * need this to decide if we should kill or just drop the page. | ||
686 | */ | ||
687 | mapping = page_mapping(p); | ||
688 | if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) { | ||
689 | if (page_mkclean(p)) { | ||
690 | SetPageDirty(p); | ||
691 | } else { | ||
692 | kill = 0; | ||
693 | ttu |= TTU_IGNORE_HWPOISON; | ||
694 | printk(KERN_INFO | ||
695 | "MCE %#lx: corrupted page was clean: dropped without side effects\n", | ||
696 | pfn); | ||
697 | } | ||
698 | } | ||
699 | |||
700 | /* | ||
701 | * First collect all the processes that have the page | ||
702 | * mapped in dirty form. This has to be done before try_to_unmap, | ||
703 | * because ttu takes the rmap data structures down. | ||
704 | * | ||
705 | * Error handling: We ignore errors here because | ||
706 | * there's nothing that can be done. | ||
707 | */ | ||
708 | if (kill) | ||
709 | collect_procs(p, &tokill); | ||
710 | |||
711 | /* | ||
712 | * try_to_unmap can fail temporarily due to races. | ||
713 | * Try a few times (RED-PEN better strategy?) | ||
714 | */ | ||
715 | for (i = 0; i < N_UNMAP_TRIES; i++) { | ||
716 | ret = try_to_unmap(p, ttu); | ||
717 | if (ret == SWAP_SUCCESS) | ||
718 | break; | ||
719 | pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret); | ||
720 | } | ||
721 | |||
722 | if (ret != SWAP_SUCCESS) | ||
723 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", | ||
724 | pfn, page_mapcount(p)); | ||
725 | |||
726 | /* | ||
727 | * Now that the dirty bit has been propagated to the | ||
728 | * struct page and all unmaps done we can decide if | ||
729 | * killing is needed or not. Only kill when the page | ||
730 | * was dirty, otherwise the tokill list is merely | ||
731 | * freed. When there was a problem unmapping earlier | ||
732 | * use a more force-full uncatchable kill to prevent | ||
733 | * any accesses to the poisoned memory. | ||
734 | */ | ||
735 | kill_procs_ao(&tokill, !!PageDirty(p), trapno, | ||
736 | ret != SWAP_SUCCESS, pfn); | ||
737 | } | ||
738 | |||
739 | int __memory_failure(unsigned long pfn, int trapno, int ref) | ||
740 | { | ||
741 | struct page_state *ps; | ||
742 | struct page *p; | ||
743 | int res; | ||
744 | |||
745 | if (!sysctl_memory_failure_recovery) | ||
746 | panic("Memory failure from trap %d on page %lx", trapno, pfn); | ||
747 | |||
748 | if (!pfn_valid(pfn)) { | ||
749 | action_result(pfn, "memory outside kernel control", IGNORED); | ||
750 | return -EIO; | ||
751 | } | ||
752 | |||
753 | p = pfn_to_page(pfn); | ||
754 | if (TestSetPageHWPoison(p)) { | ||
755 | action_result(pfn, "already hardware poisoned", IGNORED); | ||
756 | return 0; | ||
757 | } | ||
758 | |||
759 | atomic_long_add(1, &mce_bad_pages); | ||
760 | |||
761 | /* | ||
762 | * We need/can do nothing about count=0 pages. | ||
763 | * 1) it's a free page, and therefore in safe hand: | ||
764 | * prep_new_page() will be the gate keeper. | ||
765 | * 2) it's part of a non-compound high order page. | ||
766 | * Implies some kernel user: cannot stop them from | ||
767 | * R/W the page; let's pray that the page has been | ||
768 | * used and will be freed some time later. | ||
769 | * In fact it's dangerous to directly bump up page count from 0, | ||
770 | * that may make page_freeze_refs()/page_unfreeze_refs() mismatch. | ||
771 | */ | ||
772 | if (!get_page_unless_zero(compound_head(p))) { | ||
773 | action_result(pfn, "free or high order kernel", IGNORED); | ||
774 | return PageBuddy(compound_head(p)) ? 0 : -EBUSY; | ||
775 | } | ||
776 | |||
777 | /* | ||
778 | * Lock the page and wait for writeback to finish. | ||
779 | * It's very difficult to mess with pages currently under IO | ||
780 | * and in many cases impossible, so we just avoid it here. | ||
781 | */ | ||
782 | lock_page_nosync(p); | ||
783 | wait_on_page_writeback(p); | ||
784 | |||
785 | /* | ||
786 | * Now take care of user space mappings. | ||
787 | */ | ||
788 | hwpoison_user_mappings(p, pfn, trapno); | ||
789 | |||
790 | /* | ||
791 | * Torn down by someone else? | ||
792 | */ | ||
793 | if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { | ||
794 | action_result(pfn, "already truncated LRU", IGNORED); | ||
795 | res = 0; | ||
796 | goto out; | ||
797 | } | ||
798 | |||
799 | res = -EBUSY; | ||
800 | for (ps = error_states;; ps++) { | ||
801 | if ((p->flags & ps->mask) == ps->res) { | ||
802 | res = page_action(ps, p, pfn, ref); | ||
803 | break; | ||
804 | } | ||
805 | } | ||
806 | out: | ||
807 | unlock_page(p); | ||
808 | return res; | ||
809 | } | ||
810 | EXPORT_SYMBOL_GPL(__memory_failure); | ||
811 | |||
812 | /** | ||
813 | * memory_failure - Handle memory failure of a page. | ||
814 | * @pfn: Page Number of the corrupted page | ||
815 | * @trapno: Trap number reported in the signal to user space. | ||
816 | * | ||
817 | * This function is called by the low level machine check code | ||
818 | * of an architecture when it detects hardware memory corruption | ||
819 | * of a page. It tries its best to recover, which includes | ||
820 | * dropping pages, killing processes etc. | ||
821 | * | ||
822 | * The function is primarily of use for corruptions that | ||
823 | * happen outside the current execution context (e.g. when | ||
824 | * detected by a background scrubber) | ||
825 | * | ||
826 | * Must run in process context (e.g. a work queue) with interrupts | ||
827 | * enabled and no spinlocks hold. | ||
828 | */ | ||
829 | void memory_failure(unsigned long pfn, int trapno) | ||
830 | { | ||
831 | __memory_failure(pfn, trapno, 0); | ||
832 | } | ||
diff --git a/mm/memory.c b/mm/memory.c index b1443ac07c00..987389a809e7 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1325,7 +1325,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1325 | if (ret & VM_FAULT_ERROR) { | 1325 | if (ret & VM_FAULT_ERROR) { |
1326 | if (ret & VM_FAULT_OOM) | 1326 | if (ret & VM_FAULT_OOM) |
1327 | return i ? i : -ENOMEM; | 1327 | return i ? i : -ENOMEM; |
1328 | else if (ret & VM_FAULT_SIGBUS) | 1328 | if (ret & |
1329 | (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) | ||
1329 | return i ? i : -EFAULT; | 1330 | return i ? i : -EFAULT; |
1330 | BUG(); | 1331 | BUG(); |
1331 | } | 1332 | } |
@@ -2559,8 +2560,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2559 | goto out; | 2560 | goto out; |
2560 | 2561 | ||
2561 | entry = pte_to_swp_entry(orig_pte); | 2562 | entry = pte_to_swp_entry(orig_pte); |
2562 | if (is_migration_entry(entry)) { | 2563 | if (unlikely(non_swap_entry(entry))) { |
2563 | migration_entry_wait(mm, pmd, address); | 2564 | if (is_migration_entry(entry)) { |
2565 | migration_entry_wait(mm, pmd, address); | ||
2566 | } else if (is_hwpoison_entry(entry)) { | ||
2567 | ret = VM_FAULT_HWPOISON; | ||
2568 | } else { | ||
2569 | print_bad_pte(vma, address, orig_pte, NULL); | ||
2570 | ret = VM_FAULT_OOM; | ||
2571 | } | ||
2564 | goto out; | 2572 | goto out; |
2565 | } | 2573 | } |
2566 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); | 2574 | delayacct_set_flag(DELAYACCT_PF_SWAPIN); |
@@ -2584,6 +2592,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2584 | /* Had to read the page from swap area: Major fault */ | 2592 | /* Had to read the page from swap area: Major fault */ |
2585 | ret = VM_FAULT_MAJOR; | 2593 | ret = VM_FAULT_MAJOR; |
2586 | count_vm_event(PGMAJFAULT); | 2594 | count_vm_event(PGMAJFAULT); |
2595 | } else if (PageHWPoison(page)) { | ||
2596 | ret = VM_FAULT_HWPOISON; | ||
2597 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | ||
2598 | goto out; | ||
2587 | } | 2599 | } |
2588 | 2600 | ||
2589 | lock_page(page); | 2601 | lock_page(page); |
@@ -2760,6 +2772,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2760 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) | 2772 | if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) |
2761 | return ret; | 2773 | return ret; |
2762 | 2774 | ||
2775 | if (unlikely(PageHWPoison(vmf.page))) { | ||
2776 | if (ret & VM_FAULT_LOCKED) | ||
2777 | unlock_page(vmf.page); | ||
2778 | return VM_FAULT_HWPOISON; | ||
2779 | } | ||
2780 | |||
2763 | /* | 2781 | /* |
2764 | * For consistency in subsequent calls, make the faulted page always | 2782 | * For consistency in subsequent calls, make the faulted page always |
2765 | * locked. | 2783 | * locked. |
diff --git a/mm/migrate.c b/mm/migrate.c index 16052e80aaac..1a4bf4813780 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
675 | } | 675 | } |
676 | 676 | ||
677 | /* Establish migration ptes or remove ptes */ | 677 | /* Establish migration ptes or remove ptes */ |
678 | try_to_unmap(page, 1); | 678 | try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); |
679 | 679 | ||
680 | skip_unmap: | 680 | skip_unmap: |
681 | if (!page_mapped(page)) | 681 | if (!page_mapped(page)) |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5f378dd58802..d99664e8607e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -155,37 +155,37 @@ static void update_completion_period(void) | |||
155 | } | 155 | } |
156 | 156 | ||
157 | int dirty_background_ratio_handler(struct ctl_table *table, int write, | 157 | int dirty_background_ratio_handler(struct ctl_table *table, int write, |
158 | struct file *filp, void __user *buffer, size_t *lenp, | 158 | void __user *buffer, size_t *lenp, |
159 | loff_t *ppos) | 159 | loff_t *ppos) |
160 | { | 160 | { |
161 | int ret; | 161 | int ret; |
162 | 162 | ||
163 | ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 163 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
164 | if (ret == 0 && write) | 164 | if (ret == 0 && write) |
165 | dirty_background_bytes = 0; | 165 | dirty_background_bytes = 0; |
166 | return ret; | 166 | return ret; |
167 | } | 167 | } |
168 | 168 | ||
169 | int dirty_background_bytes_handler(struct ctl_table *table, int write, | 169 | int dirty_background_bytes_handler(struct ctl_table *table, int write, |
170 | struct file *filp, void __user *buffer, size_t *lenp, | 170 | void __user *buffer, size_t *lenp, |
171 | loff_t *ppos) | 171 | loff_t *ppos) |
172 | { | 172 | { |
173 | int ret; | 173 | int ret; |
174 | 174 | ||
175 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | 175 | ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
176 | if (ret == 0 && write) | 176 | if (ret == 0 && write) |
177 | dirty_background_ratio = 0; | 177 | dirty_background_ratio = 0; |
178 | return ret; | 178 | return ret; |
179 | } | 179 | } |
180 | 180 | ||
181 | int dirty_ratio_handler(struct ctl_table *table, int write, | 181 | int dirty_ratio_handler(struct ctl_table *table, int write, |
182 | struct file *filp, void __user *buffer, size_t *lenp, | 182 | void __user *buffer, size_t *lenp, |
183 | loff_t *ppos) | 183 | loff_t *ppos) |
184 | { | 184 | { |
185 | int old_ratio = vm_dirty_ratio; | 185 | int old_ratio = vm_dirty_ratio; |
186 | int ret; | 186 | int ret; |
187 | 187 | ||
188 | ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 188 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
189 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { | 189 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { |
190 | update_completion_period(); | 190 | update_completion_period(); |
191 | vm_dirty_bytes = 0; | 191 | vm_dirty_bytes = 0; |
@@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write, | |||
195 | 195 | ||
196 | 196 | ||
197 | int dirty_bytes_handler(struct ctl_table *table, int write, | 197 | int dirty_bytes_handler(struct ctl_table *table, int write, |
198 | struct file *filp, void __user *buffer, size_t *lenp, | 198 | void __user *buffer, size_t *lenp, |
199 | loff_t *ppos) | 199 | loff_t *ppos) |
200 | { | 200 | { |
201 | unsigned long old_bytes = vm_dirty_bytes; | 201 | unsigned long old_bytes = vm_dirty_bytes; |
202 | int ret; | 202 | int ret; |
203 | 203 | ||
204 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | 204 | ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
205 | if (ret == 0 && write && vm_dirty_bytes != old_bytes) { | 205 | if (ret == 0 && write && vm_dirty_bytes != old_bytes) { |
206 | update_completion_period(); | 206 | update_completion_period(); |
207 | vm_dirty_ratio = 0; | 207 | vm_dirty_ratio = 0; |
@@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); | |||
686 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs | 686 | * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs |
687 | */ | 687 | */ |
688 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, | 688 | int dirty_writeback_centisecs_handler(ctl_table *table, int write, |
689 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 689 | void __user *buffer, size_t *length, loff_t *ppos) |
690 | { | 690 | { |
691 | proc_dointvec(table, write, file, buffer, length, ppos); | 691 | proc_dointvec(table, write, buffer, length, ppos); |
692 | return 0; | 692 | return 0; |
693 | } | 693 | } |
694 | 694 | ||
@@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) | |||
1149 | EXPORT_SYMBOL(redirty_page_for_writepage); | 1149 | EXPORT_SYMBOL(redirty_page_for_writepage); |
1150 | 1150 | ||
1151 | /* | 1151 | /* |
1152 | * Dirty a page. | ||
1153 | * | ||
1154 | * For pages with a mapping this should be done under the page lock | ||
1155 | * for the benefit of asynchronous memory errors who prefer a consistent | ||
1156 | * dirty state. This rule can be broken in some special cases, | ||
1157 | * but should be better not to. | ||
1158 | * | ||
1152 | * If the mapping doesn't provide a set_page_dirty a_op, then | 1159 | * If the mapping doesn't provide a set_page_dirty a_op, then |
1153 | * just fall through and assume that it wants buffer_heads. | 1160 | * just fall through and assume that it wants buffer_heads. |
1154 | */ | 1161 | */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5717f27a0704..bf720550b44d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -234,6 +234,12 @@ static void bad_page(struct page *page) | |||
234 | static unsigned long nr_shown; | 234 | static unsigned long nr_shown; |
235 | static unsigned long nr_unshown; | 235 | static unsigned long nr_unshown; |
236 | 236 | ||
237 | /* Don't complain about poisoned pages */ | ||
238 | if (PageHWPoison(page)) { | ||
239 | __ClearPageBuddy(page); | ||
240 | return; | ||
241 | } | ||
242 | |||
237 | /* | 243 | /* |
238 | * Allow a burst of 60 reports, then keep quiet for that minute; | 244 | * Allow a burst of 60 reports, then keep quiet for that minute; |
239 | * or allow a steady drip of one report per second. | 245 | * or allow a steady drip of one report per second. |
@@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
666 | /* | 672 | /* |
667 | * This page is about to be returned from the page allocator | 673 | * This page is about to be returned from the page allocator |
668 | */ | 674 | */ |
669 | static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | 675 | static inline int check_new_page(struct page *page) |
670 | { | 676 | { |
671 | if (unlikely(page_mapcount(page) | | 677 | if (unlikely(page_mapcount(page) | |
672 | (page->mapping != NULL) | | 678 | (page->mapping != NULL) | |
@@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
675 | bad_page(page); | 681 | bad_page(page); |
676 | return 1; | 682 | return 1; |
677 | } | 683 | } |
684 | return 0; | ||
685 | } | ||
686 | |||
687 | static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | ||
688 | { | ||
689 | int i; | ||
690 | |||
691 | for (i = 0; i < (1 << order); i++) { | ||
692 | struct page *p = page + i; | ||
693 | if (unlikely(check_new_page(p))) | ||
694 | return 1; | ||
695 | } | ||
678 | 696 | ||
679 | set_page_private(page, 0); | 697 | set_page_private(page, 0); |
680 | set_page_refcounted(page); | 698 | set_page_refcounted(page); |
@@ -2373,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order); | |||
2373 | * sysctl handler for numa_zonelist_order | 2391 | * sysctl handler for numa_zonelist_order |
2374 | */ | 2392 | */ |
2375 | int numa_zonelist_order_handler(ctl_table *table, int write, | 2393 | int numa_zonelist_order_handler(ctl_table *table, int write, |
2376 | struct file *file, void __user *buffer, size_t *length, | 2394 | void __user *buffer, size_t *length, |
2377 | loff_t *ppos) | 2395 | loff_t *ppos) |
2378 | { | 2396 | { |
2379 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; | 2397 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; |
@@ -2382,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
2382 | if (write) | 2400 | if (write) |
2383 | strncpy(saved_string, (char*)table->data, | 2401 | strncpy(saved_string, (char*)table->data, |
2384 | NUMA_ZONELIST_ORDER_LEN); | 2402 | NUMA_ZONELIST_ORDER_LEN); |
2385 | ret = proc_dostring(table, write, file, buffer, length, ppos); | 2403 | ret = proc_dostring(table, write, buffer, length, ppos); |
2386 | if (ret) | 2404 | if (ret) |
2387 | return ret; | 2405 | return ret; |
2388 | if (write) { | 2406 | if (write) { |
@@ -4706,9 +4724,9 @@ module_init(init_per_zone_wmark_min) | |||
4706 | * changes. | 4724 | * changes. |
4707 | */ | 4725 | */ |
4708 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | 4726 | int min_free_kbytes_sysctl_handler(ctl_table *table, int write, |
4709 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4727 | void __user *buffer, size_t *length, loff_t *ppos) |
4710 | { | 4728 | { |
4711 | proc_dointvec(table, write, file, buffer, length, ppos); | 4729 | proc_dointvec(table, write, buffer, length, ppos); |
4712 | if (write) | 4730 | if (write) |
4713 | setup_per_zone_wmarks(); | 4731 | setup_per_zone_wmarks(); |
4714 | return 0; | 4732 | return 0; |
@@ -4716,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, | |||
4716 | 4734 | ||
4717 | #ifdef CONFIG_NUMA | 4735 | #ifdef CONFIG_NUMA |
4718 | int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | 4736 | int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, |
4719 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4737 | void __user *buffer, size_t *length, loff_t *ppos) |
4720 | { | 4738 | { |
4721 | struct zone *zone; | 4739 | struct zone *zone; |
4722 | int rc; | 4740 | int rc; |
4723 | 4741 | ||
4724 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4742 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4725 | if (rc) | 4743 | if (rc) |
4726 | return rc; | 4744 | return rc; |
4727 | 4745 | ||
@@ -4732,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | |||
4732 | } | 4750 | } |
4733 | 4751 | ||
4734 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | 4752 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, |
4735 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4753 | void __user *buffer, size_t *length, loff_t *ppos) |
4736 | { | 4754 | { |
4737 | struct zone *zone; | 4755 | struct zone *zone; |
4738 | int rc; | 4756 | int rc; |
4739 | 4757 | ||
4740 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4758 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4741 | if (rc) | 4759 | if (rc) |
4742 | return rc; | 4760 | return rc; |
4743 | 4761 | ||
@@ -4758,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | |||
4758 | * if in function of the boot time zone sizes. | 4776 | * if in function of the boot time zone sizes. |
4759 | */ | 4777 | */ |
4760 | int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | 4778 | int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, |
4761 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4779 | void __user *buffer, size_t *length, loff_t *ppos) |
4762 | { | 4780 | { |
4763 | proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4781 | proc_dointvec_minmax(table, write, buffer, length, ppos); |
4764 | setup_per_zone_lowmem_reserve(); | 4782 | setup_per_zone_lowmem_reserve(); |
4765 | return 0; | 4783 | return 0; |
4766 | } | 4784 | } |
@@ -4772,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
4772 | */ | 4790 | */ |
4773 | 4791 | ||
4774 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | 4792 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, |
4775 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | 4793 | void __user *buffer, size_t *length, loff_t *ppos) |
4776 | { | 4794 | { |
4777 | struct zone *zone; | 4795 | struct zone *zone; |
4778 | unsigned int cpu; | 4796 | unsigned int cpu; |
4779 | int ret; | 4797 | int ret; |
4780 | 4798 | ||
4781 | ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | 4799 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); |
4782 | if (!write || (ret == -EINVAL)) | 4800 | if (!write || (ret == -EINVAL)) |
4783 | return ret; | 4801 | return ret; |
4784 | for_each_populated_zone(zone) { | 4802 | for_each_populated_zone(zone) { |
@@ -36,6 +36,11 @@ | |||
36 | * mapping->tree_lock (widely used, in set_page_dirty, | 36 | * mapping->tree_lock (widely used, in set_page_dirty, |
37 | * in arch-dependent flush_dcache_mmap_lock, | 37 | * in arch-dependent flush_dcache_mmap_lock, |
38 | * within inode_lock in __sync_single_inode) | 38 | * within inode_lock in __sync_single_inode) |
39 | * | ||
40 | * (code doesn't rely on that order so it could be switched around) | ||
41 | * ->tasklist_lock | ||
42 | * anon_vma->lock (memory_failure, collect_procs_anon) | ||
43 | * pte map lock | ||
39 | */ | 44 | */ |
40 | 45 | ||
41 | #include <linux/mm.h> | 46 | #include <linux/mm.h> |
@@ -191,7 +196,7 @@ void __init anon_vma_init(void) | |||
191 | * Getting a lock on a stable anon_vma from a page off the LRU is | 196 | * Getting a lock on a stable anon_vma from a page off the LRU is |
192 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. | 197 | * tricky: page_lock_anon_vma rely on RCU to guard against the races. |
193 | */ | 198 | */ |
194 | static struct anon_vma *page_lock_anon_vma(struct page *page) | 199 | struct anon_vma *page_lock_anon_vma(struct page *page) |
195 | { | 200 | { |
196 | struct anon_vma *anon_vma; | 201 | struct anon_vma *anon_vma; |
197 | unsigned long anon_mapping; | 202 | unsigned long anon_mapping; |
@@ -211,7 +216,7 @@ out: | |||
211 | return NULL; | 216 | return NULL; |
212 | } | 217 | } |
213 | 218 | ||
214 | static void page_unlock_anon_vma(struct anon_vma *anon_vma) | 219 | void page_unlock_anon_vma(struct anon_vma *anon_vma) |
215 | { | 220 | { |
216 | spin_unlock(&anon_vma->lock); | 221 | spin_unlock(&anon_vma->lock); |
217 | rcu_read_unlock(); | 222 | rcu_read_unlock(); |
@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
311 | * if the page is not mapped into the page tables of this VMA. Only | 316 | * if the page is not mapped into the page tables of this VMA. Only |
312 | * valid for normal file or anonymous VMAs. | 317 | * valid for normal file or anonymous VMAs. |
313 | */ | 318 | */ |
314 | static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | 319 | int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) |
315 | { | 320 | { |
316 | unsigned long address; | 321 | unsigned long address; |
317 | pte_t *pte; | 322 | pte_t *pte; |
@@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page) | |||
756 | * repeatedly from either try_to_unmap_anon or try_to_unmap_file. | 761 | * repeatedly from either try_to_unmap_anon or try_to_unmap_file. |
757 | */ | 762 | */ |
758 | static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | 763 | static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, |
759 | int migration) | 764 | enum ttu_flags flags) |
760 | { | 765 | { |
761 | struct mm_struct *mm = vma->vm_mm; | 766 | struct mm_struct *mm = vma->vm_mm; |
762 | unsigned long address; | 767 | unsigned long address; |
@@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
778 | * If it's recently referenced (perhaps page_referenced | 783 | * If it's recently referenced (perhaps page_referenced |
779 | * skipped over this mm) then we should reactivate it. | 784 | * skipped over this mm) then we should reactivate it. |
780 | */ | 785 | */ |
781 | if (!migration) { | 786 | if (!(flags & TTU_IGNORE_MLOCK)) { |
782 | if (vma->vm_flags & VM_LOCKED) { | 787 | if (vma->vm_flags & VM_LOCKED) { |
783 | ret = SWAP_MLOCK; | 788 | ret = SWAP_MLOCK; |
784 | goto out_unmap; | 789 | goto out_unmap; |
785 | } | 790 | } |
791 | } | ||
792 | if (!(flags & TTU_IGNORE_ACCESS)) { | ||
786 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 793 | if (ptep_clear_flush_young_notify(vma, address, pte)) { |
787 | ret = SWAP_FAIL; | 794 | ret = SWAP_FAIL; |
788 | goto out_unmap; | 795 | goto out_unmap; |
@@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
800 | /* Update high watermark before we lower rss */ | 807 | /* Update high watermark before we lower rss */ |
801 | update_hiwater_rss(mm); | 808 | update_hiwater_rss(mm); |
802 | 809 | ||
803 | if (PageAnon(page)) { | 810 | if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { |
811 | if (PageAnon(page)) | ||
812 | dec_mm_counter(mm, anon_rss); | ||
813 | else | ||
814 | dec_mm_counter(mm, file_rss); | ||
815 | set_pte_at(mm, address, pte, | ||
816 | swp_entry_to_pte(make_hwpoison_entry(page))); | ||
817 | } else if (PageAnon(page)) { | ||
804 | swp_entry_t entry = { .val = page_private(page) }; | 818 | swp_entry_t entry = { .val = page_private(page) }; |
805 | 819 | ||
806 | if (PageSwapCache(page)) { | 820 | if (PageSwapCache(page)) { |
@@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
822 | * pte. do_swap_page() will wait until the migration | 836 | * pte. do_swap_page() will wait until the migration |
823 | * pte is removed and then restart fault handling. | 837 | * pte is removed and then restart fault handling. |
824 | */ | 838 | */ |
825 | BUG_ON(!migration); | 839 | BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); |
826 | entry = make_migration_entry(page, pte_write(pteval)); | 840 | entry = make_migration_entry(page, pte_write(pteval)); |
827 | } | 841 | } |
828 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); | 842 | set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); |
829 | BUG_ON(pte_file(*pte)); | 843 | BUG_ON(pte_file(*pte)); |
830 | } else if (PAGE_MIGRATION && migration) { | 844 | } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) { |
831 | /* Establish migration entry for a file page */ | 845 | /* Establish migration entry for a file page */ |
832 | swp_entry_t entry; | 846 | swp_entry_t entry; |
833 | entry = make_migration_entry(page, pte_write(pteval)); | 847 | entry = make_migration_entry(page, pte_write(pteval)); |
@@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) | |||
996 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | 1010 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be |
997 | * 'LOCKED. | 1011 | * 'LOCKED. |
998 | */ | 1012 | */ |
999 | static int try_to_unmap_anon(struct page *page, int unlock, int migration) | 1013 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) |
1000 | { | 1014 | { |
1001 | struct anon_vma *anon_vma; | 1015 | struct anon_vma *anon_vma; |
1002 | struct vm_area_struct *vma; | 1016 | struct vm_area_struct *vma; |
1003 | unsigned int mlocked = 0; | 1017 | unsigned int mlocked = 0; |
1004 | int ret = SWAP_AGAIN; | 1018 | int ret = SWAP_AGAIN; |
1019 | int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; | ||
1005 | 1020 | ||
1006 | if (MLOCK_PAGES && unlikely(unlock)) | 1021 | if (MLOCK_PAGES && unlikely(unlock)) |
1007 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | 1022 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ |
@@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) | |||
1017 | continue; /* must visit all unlocked vmas */ | 1032 | continue; /* must visit all unlocked vmas */ |
1018 | ret = SWAP_MLOCK; /* saw at least one mlocked vma */ | 1033 | ret = SWAP_MLOCK; /* saw at least one mlocked vma */ |
1019 | } else { | 1034 | } else { |
1020 | ret = try_to_unmap_one(page, vma, migration); | 1035 | ret = try_to_unmap_one(page, vma, flags); |
1021 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1036 | if (ret == SWAP_FAIL || !page_mapped(page)) |
1022 | break; | 1037 | break; |
1023 | } | 1038 | } |
@@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) | |||
1041 | /** | 1056 | /** |
1042 | * try_to_unmap_file - unmap/unlock file page using the object-based rmap method | 1057 | * try_to_unmap_file - unmap/unlock file page using the object-based rmap method |
1043 | * @page: the page to unmap/unlock | 1058 | * @page: the page to unmap/unlock |
1044 | * @unlock: request for unlock rather than unmap [unlikely] | 1059 | * @flags: action and flags |
1045 | * @migration: unmapping for migration - ignored if @unlock | ||
1046 | * | 1060 | * |
1047 | * Find all the mappings of a page using the mapping pointer and the vma chains | 1061 | * Find all the mappings of a page using the mapping pointer and the vma chains |
1048 | * contained in the address_space struct it points to. | 1062 | * contained in the address_space struct it points to. |
@@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) | |||
1054 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | 1068 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be |
1055 | * 'LOCKED. | 1069 | * 'LOCKED. |
1056 | */ | 1070 | */ |
1057 | static int try_to_unmap_file(struct page *page, int unlock, int migration) | 1071 | static int try_to_unmap_file(struct page *page, enum ttu_flags flags) |
1058 | { | 1072 | { |
1059 | struct address_space *mapping = page->mapping; | 1073 | struct address_space *mapping = page->mapping; |
1060 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1074 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); |
@@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) | |||
1066 | unsigned long max_nl_size = 0; | 1080 | unsigned long max_nl_size = 0; |
1067 | unsigned int mapcount; | 1081 | unsigned int mapcount; |
1068 | unsigned int mlocked = 0; | 1082 | unsigned int mlocked = 0; |
1083 | int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; | ||
1069 | 1084 | ||
1070 | if (MLOCK_PAGES && unlikely(unlock)) | 1085 | if (MLOCK_PAGES && unlikely(unlock)) |
1071 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | 1086 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ |
@@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) | |||
1078 | continue; /* must visit all vmas */ | 1093 | continue; /* must visit all vmas */ |
1079 | ret = SWAP_MLOCK; | 1094 | ret = SWAP_MLOCK; |
1080 | } else { | 1095 | } else { |
1081 | ret = try_to_unmap_one(page, vma, migration); | 1096 | ret = try_to_unmap_one(page, vma, flags); |
1082 | if (ret == SWAP_FAIL || !page_mapped(page)) | 1097 | if (ret == SWAP_FAIL || !page_mapped(page)) |
1083 | goto out; | 1098 | goto out; |
1084 | } | 1099 | } |
@@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) | |||
1103 | ret = SWAP_MLOCK; /* leave mlocked == 0 */ | 1118 | ret = SWAP_MLOCK; /* leave mlocked == 0 */ |
1104 | goto out; /* no need to look further */ | 1119 | goto out; /* no need to look further */ |
1105 | } | 1120 | } |
1106 | if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) | 1121 | if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && |
1122 | (vma->vm_flags & VM_LOCKED)) | ||
1107 | continue; | 1123 | continue; |
1108 | cursor = (unsigned long) vma->vm_private_data; | 1124 | cursor = (unsigned long) vma->vm_private_data; |
1109 | if (cursor > max_nl_cursor) | 1125 | if (cursor > max_nl_cursor) |
@@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) | |||
1137 | do { | 1153 | do { |
1138 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1154 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
1139 | shared.vm_set.list) { | 1155 | shared.vm_set.list) { |
1140 | if (!MLOCK_PAGES && !migration && | 1156 | if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && |
1141 | (vma->vm_flags & VM_LOCKED)) | 1157 | (vma->vm_flags & VM_LOCKED)) |
1142 | continue; | 1158 | continue; |
1143 | cursor = (unsigned long) vma->vm_private_data; | 1159 | cursor = (unsigned long) vma->vm_private_data; |
@@ -1177,7 +1193,7 @@ out: | |||
1177 | /** | 1193 | /** |
1178 | * try_to_unmap - try to remove all page table mappings to a page | 1194 | * try_to_unmap - try to remove all page table mappings to a page |
1179 | * @page: the page to get unmapped | 1195 | * @page: the page to get unmapped |
1180 | * @migration: migration flag | 1196 | * @flags: action and flags |
1181 | * | 1197 | * |
1182 | * Tries to remove all the page table entries which are mapping this | 1198 | * Tries to remove all the page table entries which are mapping this |
1183 | * page, used in the pageout path. Caller must hold the page lock. | 1199 | * page, used in the pageout path. Caller must hold the page lock. |
@@ -1188,16 +1204,16 @@ out: | |||
1188 | * SWAP_FAIL - the page is unswappable | 1204 | * SWAP_FAIL - the page is unswappable |
1189 | * SWAP_MLOCK - page is mlocked. | 1205 | * SWAP_MLOCK - page is mlocked. |
1190 | */ | 1206 | */ |
1191 | int try_to_unmap(struct page *page, int migration) | 1207 | int try_to_unmap(struct page *page, enum ttu_flags flags) |
1192 | { | 1208 | { |
1193 | int ret; | 1209 | int ret; |
1194 | 1210 | ||
1195 | BUG_ON(!PageLocked(page)); | 1211 | BUG_ON(!PageLocked(page)); |
1196 | 1212 | ||
1197 | if (PageAnon(page)) | 1213 | if (PageAnon(page)) |
1198 | ret = try_to_unmap_anon(page, 0, migration); | 1214 | ret = try_to_unmap_anon(page, flags); |
1199 | else | 1215 | else |
1200 | ret = try_to_unmap_file(page, 0, migration); | 1216 | ret = try_to_unmap_file(page, flags); |
1201 | if (ret != SWAP_MLOCK && !page_mapped(page)) | 1217 | if (ret != SWAP_MLOCK && !page_mapped(page)) |
1202 | ret = SWAP_SUCCESS; | 1218 | ret = SWAP_SUCCESS; |
1203 | return ret; | 1219 | return ret; |
@@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page) | |||
1222 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); | 1238 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); |
1223 | 1239 | ||
1224 | if (PageAnon(page)) | 1240 | if (PageAnon(page)) |
1225 | return try_to_unmap_anon(page, 1, 0); | 1241 | return try_to_unmap_anon(page, TTU_MUNLOCK); |
1226 | else | 1242 | else |
1227 | return try_to_unmap_file(page, 1, 0); | 1243 | return try_to_unmap_file(page, TTU_MUNLOCK); |
1228 | } | 1244 | } |
1229 | 1245 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index b206a7a32e2a..98631c26c200 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping, | |||
1633 | if (pos + copied > inode->i_size) | 1633 | if (pos + copied > inode->i_size) |
1634 | i_size_write(inode, pos + copied); | 1634 | i_size_write(inode, pos + copied); |
1635 | 1635 | ||
1636 | unlock_page(page); | ||
1637 | set_page_dirty(page); | 1636 | set_page_dirty(page); |
1637 | unlock_page(page); | ||
1638 | page_cache_release(page); | 1638 | page_cache_release(page); |
1639 | 1639 | ||
1640 | return copied; | 1640 | return copied; |
@@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
1971 | iput(inode); | 1971 | iput(inode); |
1972 | return error; | 1972 | return error; |
1973 | } | 1973 | } |
1974 | unlock_page(page); | ||
1975 | inode->i_mapping->a_ops = &shmem_aops; | 1974 | inode->i_mapping->a_ops = &shmem_aops; |
1976 | inode->i_op = &shmem_symlink_inode_operations; | 1975 | inode->i_op = &shmem_symlink_inode_operations; |
1977 | kaddr = kmap_atomic(page, KM_USER0); | 1976 | kaddr = kmap_atomic(page, KM_USER0); |
1978 | memcpy(kaddr, symname, len); | 1977 | memcpy(kaddr, symname, len); |
1979 | kunmap_atomic(kaddr, KM_USER0); | 1978 | kunmap_atomic(kaddr, KM_USER0); |
1980 | set_page_dirty(page); | 1979 | set_page_dirty(page); |
1980 | unlock_page(page); | ||
1981 | page_cache_release(page); | 1981 | page_cache_release(page); |
1982 | } | 1982 | } |
1983 | if (dir->i_mode & S_ISGID) | 1983 | if (dir->i_mode & S_ISGID) |
@@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = { | |||
2420 | .write_end = shmem_write_end, | 2420 | .write_end = shmem_write_end, |
2421 | #endif | 2421 | #endif |
2422 | .migratepage = migrate_page, | 2422 | .migratepage = migrate_page, |
2423 | .error_remove_page = generic_error_remove_page, | ||
2423 | }; | 2424 | }; |
2424 | 2425 | ||
2425 | static const struct file_operations shmem_file_operations = { | 2426 | static const struct file_operations shmem_file_operations = { |
diff --git a/mm/swapfile.c b/mm/swapfile.c index f1bf19daadc6..4de7f02f820b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry) | |||
699 | struct swap_info_struct *p; | 699 | struct swap_info_struct *p; |
700 | struct page *page = NULL; | 700 | struct page *page = NULL; |
701 | 701 | ||
702 | if (is_migration_entry(entry)) | 702 | if (non_swap_entry(entry)) |
703 | return 1; | 703 | return 1; |
704 | 704 | ||
705 | p = swap_info_get(entry); | 705 | p = swap_info_get(entry); |
@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache) | |||
2085 | int count; | 2085 | int count; |
2086 | bool has_cache; | 2086 | bool has_cache; |
2087 | 2087 | ||
2088 | if (is_migration_entry(entry)) | 2088 | if (non_swap_entry(entry)) |
2089 | return -EINVAL; | 2089 | return -EINVAL; |
2090 | 2090 | ||
2091 | type = swp_type(entry); | 2091 | type = swp_type(entry); |
diff --git a/mm/truncate.c b/mm/truncate.c index ccc3ecf7cb98..a17b3977cfdf 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page); | |||
93 | * its lock, b) when a concurrent invalidate_mapping_pages got there first and | 93 | * its lock, b) when a concurrent invalidate_mapping_pages got there first and |
94 | * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. | 94 | * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. |
95 | */ | 95 | */ |
96 | static void | 96 | static int |
97 | truncate_complete_page(struct address_space *mapping, struct page *page) | 97 | truncate_complete_page(struct address_space *mapping, struct page *page) |
98 | { | 98 | { |
99 | if (page->mapping != mapping) | 99 | if (page->mapping != mapping) |
100 | return; | 100 | return -EIO; |
101 | 101 | ||
102 | if (page_has_private(page)) | 102 | if (page_has_private(page)) |
103 | do_invalidatepage(page, 0); | 103 | do_invalidatepage(page, 0); |
@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
108 | remove_from_page_cache(page); | 108 | remove_from_page_cache(page); |
109 | ClearPageMappedToDisk(page); | 109 | ClearPageMappedToDisk(page); |
110 | page_cache_release(page); /* pagecache ref */ | 110 | page_cache_release(page); /* pagecache ref */ |
111 | return 0; | ||
111 | } | 112 | } |
112 | 113 | ||
113 | /* | 114 | /* |
@@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) | |||
135 | return ret; | 136 | return ret; |
136 | } | 137 | } |
137 | 138 | ||
139 | int truncate_inode_page(struct address_space *mapping, struct page *page) | ||
140 | { | ||
141 | if (page_mapped(page)) { | ||
142 | unmap_mapping_range(mapping, | ||
143 | (loff_t)page->index << PAGE_CACHE_SHIFT, | ||
144 | PAGE_CACHE_SIZE, 0); | ||
145 | } | ||
146 | return truncate_complete_page(mapping, page); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Used to get rid of pages on hardware memory corruption. | ||
151 | */ | ||
152 | int generic_error_remove_page(struct address_space *mapping, struct page *page) | ||
153 | { | ||
154 | if (!mapping) | ||
155 | return -EINVAL; | ||
156 | /* | ||
157 | * Only punch for normal data pages for now. | ||
158 | * Handling other types like directories would need more auditing. | ||
159 | */ | ||
160 | if (!S_ISREG(mapping->host->i_mode)) | ||
161 | return -EIO; | ||
162 | return truncate_inode_page(mapping, page); | ||
163 | } | ||
164 | EXPORT_SYMBOL(generic_error_remove_page); | ||
165 | |||
166 | /* | ||
167 | * Safely invalidate one page from its pagecache mapping. | ||
168 | * It only drops clean, unused pages. The page must be locked. | ||
169 | * | ||
170 | * Returns 1 if the page is successfully invalidated, otherwise 0. | ||
171 | */ | ||
172 | int invalidate_inode_page(struct page *page) | ||
173 | { | ||
174 | struct address_space *mapping = page_mapping(page); | ||
175 | if (!mapping) | ||
176 | return 0; | ||
177 | if (PageDirty(page) || PageWriteback(page)) | ||
178 | return 0; | ||
179 | if (page_mapped(page)) | ||
180 | return 0; | ||
181 | return invalidate_complete_page(mapping, page); | ||
182 | } | ||
183 | |||
138 | /** | 184 | /** |
139 | * truncate_inode_pages - truncate range of pages specified by start & end byte offsets | 185 | * truncate_inode_pages - truncate range of pages specified by start & end byte offsets |
140 | * @mapping: mapping to truncate | 186 | * @mapping: mapping to truncate |
@@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
196 | unlock_page(page); | 242 | unlock_page(page); |
197 | continue; | 243 | continue; |
198 | } | 244 | } |
199 | if (page_mapped(page)) { | 245 | truncate_inode_page(mapping, page); |
200 | unmap_mapping_range(mapping, | ||
201 | (loff_t)page_index<<PAGE_CACHE_SHIFT, | ||
202 | PAGE_CACHE_SIZE, 0); | ||
203 | } | ||
204 | truncate_complete_page(mapping, page); | ||
205 | unlock_page(page); | 246 | unlock_page(page); |
206 | } | 247 | } |
207 | pagevec_release(&pvec); | 248 | pagevec_release(&pvec); |
@@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
238 | break; | 279 | break; |
239 | lock_page(page); | 280 | lock_page(page); |
240 | wait_on_page_writeback(page); | 281 | wait_on_page_writeback(page); |
241 | if (page_mapped(page)) { | 282 | truncate_inode_page(mapping, page); |
242 | unmap_mapping_range(mapping, | ||
243 | (loff_t)page->index<<PAGE_CACHE_SHIFT, | ||
244 | PAGE_CACHE_SIZE, 0); | ||
245 | } | ||
246 | if (page->index > next) | 283 | if (page->index > next) |
247 | next = page->index; | 284 | next = page->index; |
248 | next++; | 285 | next++; |
249 | truncate_complete_page(mapping, page); | ||
250 | unlock_page(page); | 286 | unlock_page(page); |
251 | } | 287 | } |
252 | pagevec_release(&pvec); | 288 | pagevec_release(&pvec); |
@@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, | |||
311 | if (lock_failed) | 347 | if (lock_failed) |
312 | continue; | 348 | continue; |
313 | 349 | ||
314 | if (PageDirty(page) || PageWriteback(page)) | 350 | ret += invalidate_inode_page(page); |
315 | goto unlock; | 351 | |
316 | if (page_mapped(page)) | ||
317 | goto unlock; | ||
318 | ret += invalidate_complete_page(mapping, page); | ||
319 | unlock: | ||
320 | unlock_page(page); | 352 | unlock_page(page); |
321 | if (next > end) | 353 | if (next > end) |
322 | break; | 354 | break; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 613e89f471d9..1219ceb8a9b2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
663 | * processes. Try to unmap it here. | 663 | * processes. Try to unmap it here. |
664 | */ | 664 | */ |
665 | if (page_mapped(page) && mapping) { | 665 | if (page_mapped(page) && mapping) { |
666 | switch (try_to_unmap(page, 0)) { | 666 | switch (try_to_unmap(page, TTU_UNMAP)) { |
667 | case SWAP_FAIL: | 667 | case SWAP_FAIL: |
668 | goto activate_locked; | 668 | goto activate_locked; |
669 | case SWAP_AGAIN: | 669 | case SWAP_AGAIN: |
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1836 | 1836 | ||
1837 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 1837 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
1838 | 1838 | ||
1839 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | ||
1840 | gfp_t gfp_mask, bool noswap, | ||
1841 | unsigned int swappiness, | ||
1842 | struct zone *zone, int nid) | ||
1843 | { | ||
1844 | struct scan_control sc = { | ||
1845 | .may_writepage = !laptop_mode, | ||
1846 | .may_unmap = 1, | ||
1847 | .may_swap = !noswap, | ||
1848 | .swap_cluster_max = SWAP_CLUSTER_MAX, | ||
1849 | .swappiness = swappiness, | ||
1850 | .order = 0, | ||
1851 | .mem_cgroup = mem, | ||
1852 | .isolate_pages = mem_cgroup_isolate_pages, | ||
1853 | }; | ||
1854 | nodemask_t nm = nodemask_of_node(nid); | ||
1855 | |||
1856 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | ||
1857 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | ||
1858 | sc.nodemask = &nm; | ||
1859 | sc.nr_reclaimed = 0; | ||
1860 | sc.nr_scanned = 0; | ||
1861 | /* | ||
1862 | * NOTE: Although we can get the priority field, using it | ||
1863 | * here is not a good idea, since it limits the pages we can scan. | ||
1864 | * if we don't reclaim here, the shrink_zone from balance_pgdat | ||
1865 | * will pick up pages from other mem cgroup's as well. We hack | ||
1866 | * the priority and make it zero. | ||
1867 | */ | ||
1868 | shrink_zone(0, zone, &sc); | ||
1869 | return sc.nr_reclaimed; | ||
1870 | } | ||
1871 | |||
1839 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 1872 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
1840 | gfp_t gfp_mask, | 1873 | gfp_t gfp_mask, |
1841 | bool noswap, | 1874 | bool noswap, |
1842 | unsigned int swappiness) | 1875 | unsigned int swappiness) |
1843 | { | 1876 | { |
1877 | struct zonelist *zonelist; | ||
1844 | struct scan_control sc = { | 1878 | struct scan_control sc = { |
1845 | .may_writepage = !laptop_mode, | 1879 | .may_writepage = !laptop_mode, |
1846 | .may_unmap = 1, | 1880 | .may_unmap = 1, |
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1852 | .isolate_pages = mem_cgroup_isolate_pages, | 1886 | .isolate_pages = mem_cgroup_isolate_pages, |
1853 | .nodemask = NULL, /* we don't care the placement */ | 1887 | .nodemask = NULL, /* we don't care the placement */ |
1854 | }; | 1888 | }; |
1855 | struct zonelist *zonelist; | ||
1856 | 1889 | ||
1857 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 1890 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
1858 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 1891 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
@@ -1974,6 +2007,7 @@ loop_again: | |||
1974 | for (i = 0; i <= end_zone; i++) { | 2007 | for (i = 0; i <= end_zone; i++) { |
1975 | struct zone *zone = pgdat->node_zones + i; | 2008 | struct zone *zone = pgdat->node_zones + i; |
1976 | int nr_slab; | 2009 | int nr_slab; |
2010 | int nid, zid; | ||
1977 | 2011 | ||
1978 | if (!populated_zone(zone)) | 2012 | if (!populated_zone(zone)) |
1979 | continue; | 2013 | continue; |
@@ -1988,6 +2022,15 @@ loop_again: | |||
1988 | temp_priority[i] = priority; | 2022 | temp_priority[i] = priority; |
1989 | sc.nr_scanned = 0; | 2023 | sc.nr_scanned = 0; |
1990 | note_zone_scanning_priority(zone, priority); | 2024 | note_zone_scanning_priority(zone, priority); |
2025 | |||
2026 | nid = pgdat->node_id; | ||
2027 | zid = zone_idx(zone); | ||
2028 | /* | ||
2029 | * Call soft limit reclaim before calling shrink_zone. | ||
2030 | * For now we ignore the return value | ||
2031 | */ | ||
2032 | mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask, | ||
2033 | nid, zid); | ||
1991 | /* | 2034 | /* |
1992 | * We put equal pressure on every zone, unless one | 2035 | * We put equal pressure on every zone, unless one |
1993 | * zone has way too many pages free already. | 2036 | * zone has way too many pages free already. |
@@ -2801,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void) | |||
2801 | unsigned long scan_unevictable_pages; | 2844 | unsigned long scan_unevictable_pages; |
2802 | 2845 | ||
2803 | int scan_unevictable_handler(struct ctl_table *table, int write, | 2846 | int scan_unevictable_handler(struct ctl_table *table, int write, |
2804 | struct file *file, void __user *buffer, | 2847 | void __user *buffer, |
2805 | size_t *length, loff_t *ppos) | 2848 | size_t *length, loff_t *ppos) |
2806 | { | 2849 | { |
2807 | proc_doulongvec_minmax(table, write, file, buffer, length, ppos); | 2850 | proc_doulongvec_minmax(table, write, buffer, length, ppos); |
2808 | 2851 | ||
2809 | if (write && *(unsigned long *)table->data) | 2852 | if (write && *(unsigned long *)table->data) |
2810 | scan_all_zones_unevictable_pages(); | 2853 | scan_all_zones_unevictable_pages(); |
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 907a82e9023d..a16a2342f6bf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c | |||
@@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { | |||
965 | 965 | ||
966 | #ifdef CONFIG_SYSCTL | 966 | #ifdef CONFIG_SYSCTL |
967 | static | 967 | static |
968 | int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp, | 968 | int brnf_sysctl_call_tables(ctl_table * ctl, int write, |
969 | void __user * buffer, size_t * lenp, loff_t * ppos) | 969 | void __user * buffer, size_t * lenp, loff_t * ppos) |
970 | { | 970 | { |
971 | int ret; | 971 | int ret; |
972 | 972 | ||
973 | ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 973 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
974 | 974 | ||
975 | if (write && *(int *)(ctl->data)) | 975 | if (write && *(int *)(ctl->data)) |
976 | *(int *)(ctl->data) = 1; | 976 | *(int *)(ctl->data) = 1; |
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 1c6a5bb6f0c8..6e1f085db06a 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c | |||
@@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU | |||
164 | static int min_priority[1]; | 164 | static int min_priority[1]; |
165 | static int max_priority[] = { 127 }; /* From DECnet spec */ | 165 | static int max_priority[] = { 127 }; /* From DECnet spec */ |
166 | 166 | ||
167 | static int dn_forwarding_proc(ctl_table *, int, struct file *, | 167 | static int dn_forwarding_proc(ctl_table *, int, |
168 | void __user *, size_t *, loff_t *); | 168 | void __user *, size_t *, loff_t *); |
169 | static int dn_forwarding_sysctl(ctl_table *table, | 169 | static int dn_forwarding_sysctl(ctl_table *table, |
170 | void __user *oldval, size_t __user *oldlenp, | 170 | void __user *oldval, size_t __user *oldlenp, |
@@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) | |||
274 | } | 274 | } |
275 | 275 | ||
276 | static int dn_forwarding_proc(ctl_table *table, int write, | 276 | static int dn_forwarding_proc(ctl_table *table, int write, |
277 | struct file *filep, | ||
278 | void __user *buffer, | 277 | void __user *buffer, |
279 | size_t *lenp, loff_t *ppos) | 278 | size_t *lenp, loff_t *ppos) |
280 | { | 279 | { |
@@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, | |||
290 | dn_db = dev->dn_ptr; | 289 | dn_db = dev->dn_ptr; |
291 | old = dn_db->parms.forwarding; | 290 | old = dn_db->parms.forwarding; |
292 | 291 | ||
293 | err = proc_dointvec(table, write, filep, buffer, lenp, ppos); | 292 | err = proc_dointvec(table, write, buffer, lenp, ppos); |
294 | 293 | ||
295 | if ((err >= 0) && write) { | 294 | if ((err >= 0) && write) { |
296 | if (dn_db->parms.forwarding < 0) | 295 | if (dn_db->parms.forwarding < 0) |
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 5bcd592ae6dd..26b0ab1e9f56 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c | |||
@@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table, | |||
165 | } | 165 | } |
166 | 166 | ||
167 | static int dn_node_address_handler(ctl_table *table, int write, | 167 | static int dn_node_address_handler(ctl_table *table, int write, |
168 | struct file *filp, | ||
169 | void __user *buffer, | 168 | void __user *buffer, |
170 | size_t *lenp, loff_t *ppos) | 169 | size_t *lenp, loff_t *ppos) |
171 | { | 170 | { |
@@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table, | |||
276 | 275 | ||
277 | 276 | ||
278 | static int dn_def_dev_handler(ctl_table *table, int write, | 277 | static int dn_def_dev_handler(ctl_table *table, int write, |
279 | struct file * filp, | ||
280 | void __user *buffer, | 278 | void __user *buffer, |
281 | size_t *lenp, loff_t *ppos) | 279 | size_t *lenp, loff_t *ppos) |
282 | { | 280 | { |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 07336c6201f0..e92f1fd28aa5 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net) | |||
1270 | } | 1270 | } |
1271 | 1271 | ||
1272 | static int devinet_conf_proc(ctl_table *ctl, int write, | 1272 | static int devinet_conf_proc(ctl_table *ctl, int write, |
1273 | struct file *filp, void __user *buffer, | 1273 | void __user *buffer, |
1274 | size_t *lenp, loff_t *ppos) | 1274 | size_t *lenp, loff_t *ppos) |
1275 | { | 1275 | { |
1276 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 1276 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1277 | 1277 | ||
1278 | if (write) { | 1278 | if (write) { |
1279 | struct ipv4_devconf *cnf = ctl->extra1; | 1279 | struct ipv4_devconf *cnf = ctl->extra1; |
@@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table, | |||
1342 | } | 1342 | } |
1343 | 1343 | ||
1344 | static int devinet_sysctl_forward(ctl_table *ctl, int write, | 1344 | static int devinet_sysctl_forward(ctl_table *ctl, int write, |
1345 | struct file *filp, void __user *buffer, | 1345 | void __user *buffer, |
1346 | size_t *lenp, loff_t *ppos) | 1346 | size_t *lenp, loff_t *ppos) |
1347 | { | 1347 | { |
1348 | int *valp = ctl->data; | 1348 | int *valp = ctl->data; |
1349 | int val = *valp; | 1349 | int val = *valp; |
1350 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 1350 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1351 | 1351 | ||
1352 | if (write && *valp != val) { | 1352 | if (write && *valp != val) { |
1353 | struct net *net = ctl->extra2; | 1353 | struct net *net = ctl->extra2; |
@@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, | |||
1372 | } | 1372 | } |
1373 | 1373 | ||
1374 | int ipv4_doint_and_flush(ctl_table *ctl, int write, | 1374 | int ipv4_doint_and_flush(ctl_table *ctl, int write, |
1375 | struct file *filp, void __user *buffer, | 1375 | void __user *buffer, |
1376 | size_t *lenp, loff_t *ppos) | 1376 | size_t *lenp, loff_t *ppos) |
1377 | { | 1377 | { |
1378 | int *valp = ctl->data; | 1378 | int *valp = ctl->data; |
1379 | int val = *valp; | 1379 | int val = *valp; |
1380 | int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 1380 | int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1381 | struct net *net = ctl->extra2; | 1381 | struct net *net = ctl->extra2; |
1382 | 1382 | ||
1383 | if (write && *valp != val) | 1383 | if (write && *valp != val) |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df9347314538..bb4199252026 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev) | |||
3036 | 3036 | ||
3037 | #ifdef CONFIG_SYSCTL | 3037 | #ifdef CONFIG_SYSCTL |
3038 | static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | 3038 | static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, |
3039 | struct file *filp, void __user *buffer, | 3039 | void __user *buffer, |
3040 | size_t *lenp, loff_t *ppos) | 3040 | size_t *lenp, loff_t *ppos) |
3041 | { | 3041 | { |
3042 | if (write) { | 3042 | if (write) { |
@@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, | |||
3046 | 3046 | ||
3047 | memcpy(&ctl, __ctl, sizeof(ctl)); | 3047 | memcpy(&ctl, __ctl, sizeof(ctl)); |
3048 | ctl.data = &flush_delay; | 3048 | ctl.data = &flush_delay; |
3049 | proc_dointvec(&ctl, write, filp, buffer, lenp, ppos); | 3049 | proc_dointvec(&ctl, write, buffer, lenp, ppos); |
3050 | 3050 | ||
3051 | net = (struct net *)__ctl->extra1; | 3051 | net = (struct net *)__ctl->extra1; |
3052 | rt_cache_flush(net, flush_delay); | 3052 | rt_cache_flush(net, flush_delay); |
@@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old) | |||
3106 | } | 3106 | } |
3107 | 3107 | ||
3108 | static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, | 3108 | static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, |
3109 | struct file *filp, | ||
3110 | void __user *buffer, size_t *lenp, | 3109 | void __user *buffer, size_t *lenp, |
3111 | loff_t *ppos) | 3110 | loff_t *ppos) |
3112 | { | 3111 | { |
3113 | int old = ip_rt_secret_interval; | 3112 | int old = ip_rt_secret_interval; |
3114 | int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos); | 3113 | int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); |
3115 | 3114 | ||
3116 | rt_secret_reschedule(old); | 3115 | rt_secret_reschedule(old); |
3117 | 3116 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4710d219f06a..2dcf04d9b005 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -36,7 +36,7 @@ static void set_local_port_range(int range[2]) | |||
36 | } | 36 | } |
37 | 37 | ||
38 | /* Validate changes from /proc interface. */ | 38 | /* Validate changes from /proc interface. */ |
39 | static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, | 39 | static int ipv4_local_port_range(ctl_table *table, int write, |
40 | void __user *buffer, | 40 | void __user *buffer, |
41 | size_t *lenp, loff_t *ppos) | 41 | size_t *lenp, loff_t *ppos) |
42 | { | 42 | { |
@@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, | |||
51 | }; | 51 | }; |
52 | 52 | ||
53 | inet_get_local_port_range(range, range + 1); | 53 | inet_get_local_port_range(range, range + 1); |
54 | ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); | 54 | ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
55 | 55 | ||
56 | if (write && ret == 0) { | 56 | if (write && ret == 0) { |
57 | if (range[1] < range[0]) | 57 | if (range[1] < range[0]) |
@@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, | |||
91 | } | 91 | } |
92 | 92 | ||
93 | 93 | ||
94 | static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp, | 94 | static int proc_tcp_congestion_control(ctl_table *ctl, int write, |
95 | void __user *buffer, size_t *lenp, loff_t *ppos) | 95 | void __user *buffer, size_t *lenp, loff_t *ppos) |
96 | { | 96 | { |
97 | char val[TCP_CA_NAME_MAX]; | 97 | char val[TCP_CA_NAME_MAX]; |
@@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * | |||
103 | 103 | ||
104 | tcp_get_default_congestion_control(val); | 104 | tcp_get_default_congestion_control(val); |
105 | 105 | ||
106 | ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); | 106 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); |
107 | if (write && ret == 0) | 107 | if (write && ret == 0) |
108 | ret = tcp_set_default_congestion_control(val); | 108 | ret = tcp_set_default_congestion_control(val); |
109 | return ret; | 109 | return ret; |
@@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, | |||
129 | } | 129 | } |
130 | 130 | ||
131 | static int proc_tcp_available_congestion_control(ctl_table *ctl, | 131 | static int proc_tcp_available_congestion_control(ctl_table *ctl, |
132 | int write, struct file * filp, | 132 | int write, |
133 | void __user *buffer, size_t *lenp, | 133 | void __user *buffer, size_t *lenp, |
134 | loff_t *ppos) | 134 | loff_t *ppos) |
135 | { | 135 | { |
@@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, | |||
140 | if (!tbl.data) | 140 | if (!tbl.data) |
141 | return -ENOMEM; | 141 | return -ENOMEM; |
142 | tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); | 142 | tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX); |
143 | ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); | 143 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); |
144 | kfree(tbl.data); | 144 | kfree(tbl.data); |
145 | return ret; | 145 | return ret; |
146 | } | 146 | } |
147 | 147 | ||
148 | static int proc_allowed_congestion_control(ctl_table *ctl, | 148 | static int proc_allowed_congestion_control(ctl_table *ctl, |
149 | int write, struct file * filp, | 149 | int write, |
150 | void __user *buffer, size_t *lenp, | 150 | void __user *buffer, size_t *lenp, |
151 | loff_t *ppos) | 151 | loff_t *ppos) |
152 | { | 152 | { |
@@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, | |||
158 | return -ENOMEM; | 158 | return -ENOMEM; |
159 | 159 | ||
160 | tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); | 160 | tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen); |
161 | ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos); | 161 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); |
162 | if (write && ret == 0) | 162 | if (write && ret == 0) |
163 | ret = tcp_set_allowed_congestion_control(tbl.data); | 163 | ret = tcp_set_allowed_congestion_control(tbl.data); |
164 | kfree(tbl.data); | 164 | kfree(tbl.data); |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 55f486d89c88..1fd0a3d775d2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) | |||
3986 | #ifdef CONFIG_SYSCTL | 3986 | #ifdef CONFIG_SYSCTL |
3987 | 3987 | ||
3988 | static | 3988 | static |
3989 | int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, | 3989 | int addrconf_sysctl_forward(ctl_table *ctl, int write, |
3990 | void __user *buffer, size_t *lenp, loff_t *ppos) | 3990 | void __user *buffer, size_t *lenp, loff_t *ppos) |
3991 | { | 3991 | { |
3992 | int *valp = ctl->data; | 3992 | int *valp = ctl->data; |
3993 | int val = *valp; | 3993 | int val = *valp; |
3994 | int ret; | 3994 | int ret; |
3995 | 3995 | ||
3996 | ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 3996 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
3997 | 3997 | ||
3998 | if (write) | 3998 | if (write) |
3999 | ret = addrconf_fixup_forwarding(ctl, valp, val); | 3999 | ret = addrconf_fixup_forwarding(ctl, valp, val); |
@@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) | |||
4090 | } | 4090 | } |
4091 | 4091 | ||
4092 | static | 4092 | static |
4093 | int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp, | 4093 | int addrconf_sysctl_disable(ctl_table *ctl, int write, |
4094 | void __user *buffer, size_t *lenp, loff_t *ppos) | 4094 | void __user *buffer, size_t *lenp, loff_t *ppos) |
4095 | { | 4095 | { |
4096 | int *valp = ctl->data; | 4096 | int *valp = ctl->data; |
4097 | int val = *valp; | 4097 | int val = *valp; |
4098 | int ret; | 4098 | int ret; |
4099 | 4099 | ||
4100 | ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 4100 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
4101 | 4101 | ||
4102 | if (write) | 4102 | if (write) |
4103 | ret = addrconf_disable_ipv6(ctl, valp, val); | 4103 | ret = addrconf_disable_ipv6(ctl, valp, val); |
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7015478797f6..498b9b0b0fad 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c | |||
@@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, | |||
1735 | } | 1735 | } |
1736 | } | 1736 | } |
1737 | 1737 | ||
1738 | int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos) | 1738 | int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) |
1739 | { | 1739 | { |
1740 | struct net_device *dev = ctl->extra1; | 1740 | struct net_device *dev = ctl->extra1; |
1741 | struct inet6_dev *idev; | 1741 | struct inet6_dev *idev; |
@@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f | |||
1746 | ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); | 1746 | ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); |
1747 | 1747 | ||
1748 | if (strcmp(ctl->procname, "retrans_time") == 0) | 1748 | if (strcmp(ctl->procname, "retrans_time") == 0) |
1749 | ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 1749 | ret = proc_dointvec(ctl, write, buffer, lenp, ppos); |
1750 | 1750 | ||
1751 | else if (strcmp(ctl->procname, "base_reachable_time") == 0) | 1751 | else if (strcmp(ctl->procname, "base_reachable_time") == 0) |
1752 | ret = proc_dointvec_jiffies(ctl, write, | 1752 | ret = proc_dointvec_jiffies(ctl, write, |
1753 | filp, buffer, lenp, ppos); | 1753 | buffer, lenp, ppos); |
1754 | 1754 | ||
1755 | else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || | 1755 | else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || |
1756 | (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) | 1756 | (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) |
1757 | ret = proc_dointvec_ms_jiffies(ctl, write, | 1757 | ret = proc_dointvec_ms_jiffies(ctl, write, |
1758 | filp, buffer, lenp, ppos); | 1758 | buffer, lenp, ppos); |
1759 | else | 1759 | else |
1760 | ret = -1; | 1760 | ret = -1; |
1761 | 1761 | ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 77aecbe8ff6c..d6fe7646a8ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = { | |||
2524 | #ifdef CONFIG_SYSCTL | 2524 | #ifdef CONFIG_SYSCTL |
2525 | 2525 | ||
2526 | static | 2526 | static |
2527 | int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, | 2527 | int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, |
2528 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2528 | void __user *buffer, size_t *lenp, loff_t *ppos) |
2529 | { | 2529 | { |
2530 | struct net *net = current->nsproxy->net_ns; | 2530 | struct net *net = current->nsproxy->net_ns; |
2531 | int delay = net->ipv6.sysctl.flush_delay; | 2531 | int delay = net->ipv6.sysctl.flush_delay; |
2532 | if (write) { | 2532 | if (write) { |
2533 | proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 2533 | proc_dointvec(ctl, write, buffer, lenp, ppos); |
2534 | fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); | 2534 | fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); |
2535 | return 0; | 2535 | return 0; |
2536 | } else | 2536 | } else |
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 57f8817c3979..5c86567e5a78 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c | |||
@@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100; /* 100us */ | |||
73 | /* For other sysctl, I've no idea of the range. Maybe Dag could help | 73 | /* For other sysctl, I've no idea of the range. Maybe Dag could help |
74 | * us on that - Jean II */ | 74 | * us on that - Jean II */ |
75 | 75 | ||
76 | static int do_devname(ctl_table *table, int write, struct file *filp, | 76 | static int do_devname(ctl_table *table, int write, |
77 | void __user *buffer, size_t *lenp, loff_t *ppos) | 77 | void __user *buffer, size_t *lenp, loff_t *ppos) |
78 | { | 78 | { |
79 | int ret; | 79 | int ret; |
80 | 80 | ||
81 | ret = proc_dostring(table, write, filp, buffer, lenp, ppos); | 81 | ret = proc_dostring(table, write, buffer, lenp, ppos); |
82 | if (ret == 0 && write) { | 82 | if (ret == 0 && write) { |
83 | struct ias_value *val; | 83 | struct ias_value *val; |
84 | 84 | ||
@@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp, | |||
90 | } | 90 | } |
91 | 91 | ||
92 | 92 | ||
93 | static int do_discovery(ctl_table *table, int write, struct file *filp, | 93 | static int do_discovery(ctl_table *table, int write, |
94 | void __user *buffer, size_t *lenp, loff_t *ppos) | 94 | void __user *buffer, size_t *lenp, loff_t *ppos) |
95 | { | 95 | { |
96 | int ret; | 96 | int ret; |
97 | 97 | ||
98 | ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 98 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
99 | if (ret) | 99 | if (ret) |
100 | return ret; | 100 | return ret; |
101 | 101 | ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index fba2892b99e1..446e9bd4b4bc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void) | |||
1496 | 1496 | ||
1497 | 1497 | ||
1498 | static int | 1498 | static int |
1499 | proc_do_defense_mode(ctl_table *table, int write, struct file * filp, | 1499 | proc_do_defense_mode(ctl_table *table, int write, |
1500 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1500 | void __user *buffer, size_t *lenp, loff_t *ppos) |
1501 | { | 1501 | { |
1502 | int *valp = table->data; | 1502 | int *valp = table->data; |
1503 | int val = *valp; | 1503 | int val = *valp; |
1504 | int rc; | 1504 | int rc; |
1505 | 1505 | ||
1506 | rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 1506 | rc = proc_dointvec(table, write, buffer, lenp, ppos); |
1507 | if (write && (*valp != val)) { | 1507 | if (write && (*valp != val)) { |
1508 | if ((*valp < 0) || (*valp > 3)) { | 1508 | if ((*valp < 0) || (*valp > 3)) { |
1509 | /* Restore the correct value */ | 1509 | /* Restore the correct value */ |
@@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp, | |||
1517 | 1517 | ||
1518 | 1518 | ||
1519 | static int | 1519 | static int |
1520 | proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, | 1520 | proc_do_sync_threshold(ctl_table *table, int write, |
1521 | void __user *buffer, size_t *lenp, loff_t *ppos) | 1521 | void __user *buffer, size_t *lenp, loff_t *ppos) |
1522 | { | 1522 | { |
1523 | int *valp = table->data; | 1523 | int *valp = table->data; |
@@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, | |||
1527 | /* backup the value first */ | 1527 | /* backup the value first */ |
1528 | memcpy(val, valp, sizeof(val)); | 1528 | memcpy(val, valp, sizeof(val)); |
1529 | 1529 | ||
1530 | rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 1530 | rc = proc_dointvec(table, write, buffer, lenp, ppos); |
1531 | if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { | 1531 | if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { |
1532 | /* Restore the correct value */ | 1532 | /* Restore the correct value */ |
1533 | memcpy(valp, val, sizeof(val)); | 1533 | memcpy(valp, val, sizeof(val)); |
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 4e620305f28c..c93494fef8ef 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c | |||
@@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; | |||
226 | static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; | 226 | static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; |
227 | static struct ctl_table_header *nf_log_dir_header; | 227 | static struct ctl_table_header *nf_log_dir_header; |
228 | 228 | ||
229 | static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, | 229 | static int nf_log_proc_dostring(ctl_table *table, int write, |
230 | void __user *buffer, size_t *lenp, loff_t *ppos) | 230 | void __user *buffer, size_t *lenp, loff_t *ppos) |
231 | { | 231 | { |
232 | const struct nf_logger *logger; | 232 | const struct nf_logger *logger; |
@@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, | |||
260 | table->data = "NONE"; | 260 | table->data = "NONE"; |
261 | else | 261 | else |
262 | table->data = logger->name; | 262 | table->data = logger->name; |
263 | r = proc_dostring(table, write, filp, buffer, lenp, ppos); | 263 | r = proc_dostring(table, write, buffer, lenp, ppos); |
264 | mutex_unlock(&nf_log_mutex); | 264 | mutex_unlock(&nf_log_mutex); |
265 | } | 265 | } |
266 | 266 | ||
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 7b5749ee2765..2220f3322326 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c | |||
@@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max) | |||
56 | } while (read_seqretry(&local_port_range_lock, seq)); | 56 | } while (read_seqretry(&local_port_range_lock, seq)); |
57 | } | 57 | } |
58 | 58 | ||
59 | static int proc_local_port_range(ctl_table *table, int write, struct file *filp, | 59 | static int proc_local_port_range(ctl_table *table, int write, |
60 | void __user *buffer, | 60 | void __user *buffer, |
61 | size_t *lenp, loff_t *ppos) | 61 | size_t *lenp, loff_t *ppos) |
62 | { | 62 | { |
@@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp, | |||
70 | .extra2 = &local_port_range_max, | 70 | .extra2 = &local_port_range_max, |
71 | }; | 71 | }; |
72 | 72 | ||
73 | ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); | 73 | ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
74 | 74 | ||
75 | if (write && ret == 0) { | 75 | if (write && ret == 0) { |
76 | if (range[1] < range[0]) | 76 | if (range[1] < range[0]) |
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 5231f7aaac0e..42f9748ae093 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
@@ -56,7 +56,7 @@ rpc_unregister_sysctl(void) | |||
56 | } | 56 | } |
57 | } | 57 | } |
58 | 58 | ||
59 | static int proc_do_xprt(ctl_table *table, int write, struct file *file, | 59 | static int proc_do_xprt(ctl_table *table, int write, |
60 | void __user *buffer, size_t *lenp, loff_t *ppos) | 60 | void __user *buffer, size_t *lenp, loff_t *ppos) |
61 | { | 61 | { |
62 | char tmpbuf[256]; | 62 | char tmpbuf[256]; |
@@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file, | |||
71 | } | 71 | } |
72 | 72 | ||
73 | static int | 73 | static int |
74 | proc_dodebug(ctl_table *table, int write, struct file *file, | 74 | proc_dodebug(ctl_table *table, int write, |
75 | void __user *buffer, size_t *lenp, loff_t *ppos) | 75 | void __user *buffer, size_t *lenp, loff_t *ppos) |
76 | { | 76 | { |
77 | char tmpbuf[20], c, *s; | 77 | char tmpbuf[20], c, *s; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 87101177825b..35fb68b9c8ec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c | |||
@@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep; | |||
80 | * current value. | 80 | * current value. |
81 | */ | 81 | */ |
82 | static int read_reset_stat(ctl_table *table, int write, | 82 | static int read_reset_stat(ctl_table *table, int write, |
83 | struct file *filp, void __user *buffer, size_t *lenp, | 83 | void __user *buffer, size_t *lenp, |
84 | loff_t *ppos) | 84 | loff_t *ppos) |
85 | { | 85 | { |
86 | atomic_t *stat = (atomic_t *)table->data; | 86 | atomic_t *stat = (atomic_t *)table->data; |
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index b8186bac8b7e..6cf8fd2b79e8 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
@@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) | |||
61 | struct cgroup_subsys devices_subsys; | 61 | struct cgroup_subsys devices_subsys; |
62 | 62 | ||
63 | static int devcgroup_can_attach(struct cgroup_subsys *ss, | 63 | static int devcgroup_can_attach(struct cgroup_subsys *ss, |
64 | struct cgroup *new_cgroup, struct task_struct *task) | 64 | struct cgroup *new_cgroup, struct task_struct *task, |
65 | bool threadgroup) | ||
65 | { | 66 | { |
66 | if (current != task && !capable(CAP_SYS_ADMIN)) | 67 | if (current != task && !capable(CAP_SYS_ADMIN)) |
67 | return -EPERM; | 68 | return -EPERM; |
diff --git a/security/min_addr.c b/security/min_addr.c index 14cc7b3b8d03..c844eed7915d 100644 --- a/security/min_addr.c +++ b/security/min_addr.c | |||
@@ -28,12 +28,12 @@ static void update_mmap_min_addr(void) | |||
28 | * sysctl handler which just sets dac_mmap_min_addr = the new value and then | 28 | * sysctl handler which just sets dac_mmap_min_addr = the new value and then |
29 | * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly | 29 | * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly |
30 | */ | 30 | */ |
31 | int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, | 31 | int mmap_min_addr_handler(struct ctl_table *table, int write, |
32 | void __user *buffer, size_t *lenp, loff_t *ppos) | 32 | void __user *buffer, size_t *lenp, loff_t *ppos) |
33 | { | 33 | { |
34 | int ret; | 34 | int ret; |
35 | 35 | ||
36 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | 36 | ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
37 | 37 | ||
38 | update_mmap_min_addr(); | 38 | update_mmap_min_addr(); |
39 | 39 | ||
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 417f7c994522..bb230d5d7085 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c | |||
@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) | |||
2411 | /* Wake up the parent if it is waiting so that it can recheck | 2411 | /* Wake up the parent if it is waiting so that it can recheck |
2412 | * wait permission to the new task SID. */ | 2412 | * wait permission to the new task SID. */ |
2413 | read_lock(&tasklist_lock); | 2413 | read_lock(&tasklist_lock); |
2414 | wake_up_interruptible(¤t->real_parent->signal->wait_chldexit); | 2414 | __wake_up_parent(current, current->real_parent); |
2415 | read_unlock(&tasklist_lock); | 2415 | read_unlock(&tasklist_lock); |
2416 | } | 2416 | } |
2417 | 2417 | ||