aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBob Moore <robert.moore@intel.com>2013-03-08 04:22:23 -0500
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2013-03-11 19:45:04 -0400
commitfd1af7126fb62688cfcf4b563c73b2909ac30f74 (patch)
treef3f1437c28c49da167389868ab047fa43b2eefa1
parentc39660b232c5e4aee3cded5a02d28e71ca447fb8 (diff)
ACPICA: Regression fix: reinstate safe exit macros
Removal caused a regression on at least FreeBSD. This fix reinstates the macros. Signed-off-by: Bob Moore <robert.moore@intel.com> Signed-off-by: Lv Zheng <lv.zheng@intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
-rw-r--r--drivers/acpi/acpica/dsutils.c10
-rw-r--r--drivers/acpi/acpica/evgpe.c6
-rw-r--r--drivers/acpi/acpica/evsci.c4
-rw-r--r--drivers/acpi/acpica/exprep.c4
-rw-r--r--drivers/acpi/acpica/exutils.c4
-rw-r--r--drivers/acpi/acpica/hwacpi.c10
-rw-r--r--drivers/acpi/acpica/nsutils.c8
-rw-r--r--drivers/acpi/acpica/psargs.c2
-rw-r--r--drivers/acpi/acpica/utaddress.c4
-rw-r--r--include/acpi/acoutput.h53
10 files changed, 67 insertions, 38 deletions
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index 4d8c992a51d8..99778997c35a 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -178,7 +178,7 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
178 178
179 if (!op) { 179 if (!op) {
180 ACPI_ERROR((AE_INFO, "Null Op")); 180 ACPI_ERROR((AE_INFO, "Null Op"));
181 return_VALUE(TRUE); 181 return_UINT8(TRUE);
182 } 182 }
183 183
184 /* 184 /*
@@ -210,7 +210,7 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
210 "At Method level, result of [%s] not used\n", 210 "At Method level, result of [%s] not used\n",
211 acpi_ps_get_opcode_name(op->common. 211 acpi_ps_get_opcode_name(op->common.
212 aml_opcode))); 212 aml_opcode)));
213 return_VALUE(FALSE); 213 return_UINT8(FALSE);
214 } 214 }
215 215
216 /* Get info on the parent. The root_op is AML_SCOPE */ 216 /* Get info on the parent. The root_op is AML_SCOPE */
@@ -219,7 +219,7 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
219 acpi_ps_get_opcode_info(op->common.parent->common.aml_opcode); 219 acpi_ps_get_opcode_info(op->common.parent->common.aml_opcode);
220 if (parent_info->class == AML_CLASS_UNKNOWN) { 220 if (parent_info->class == AML_CLASS_UNKNOWN) {
221 ACPI_ERROR((AE_INFO, "Unknown parent opcode Op=%p", op)); 221 ACPI_ERROR((AE_INFO, "Unknown parent opcode Op=%p", op));
222 return_VALUE(FALSE); 222 return_UINT8(FALSE);
223 } 223 }
224 224
225 /* 225 /*
@@ -307,7 +307,7 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
307 acpi_ps_get_opcode_name(op->common.parent->common. 307 acpi_ps_get_opcode_name(op->common.parent->common.
308 aml_opcode), op)); 308 aml_opcode), op));
309 309
310 return_VALUE(TRUE); 310 return_UINT8(TRUE);
311 311
312 result_not_used: 312 result_not_used:
313 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, 313 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH,
@@ -316,7 +316,7 @@ acpi_ds_is_result_used(union acpi_parse_object * op,
316 acpi_ps_get_opcode_name(op->common.parent->common. 316 acpi_ps_get_opcode_name(op->common.parent->common.
317 aml_opcode), op)); 317 aml_opcode), op));
318 318
319 return_VALUE(FALSE); 319 return_UINT8(FALSE);
320} 320}
321 321
322/******************************************************************************* 322/*******************************************************************************
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index b9adb9a7ed85..a493b528f8f9 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -707,7 +707,7 @@ acpi_ev_gpe_dispatch(struct acpi_namespace_node *gpe_device,
707 if (ACPI_FAILURE(status)) { 707 if (ACPI_FAILURE(status)) {
708 ACPI_EXCEPTION((AE_INFO, status, 708 ACPI_EXCEPTION((AE_INFO, status,
709 "Unable to clear GPE%02X", gpe_number)); 709 "Unable to clear GPE%02X", gpe_number));
710 return_VALUE(ACPI_INTERRUPT_NOT_HANDLED); 710 return_UINT32(ACPI_INTERRUPT_NOT_HANDLED);
711 } 711 }
712 } 712 }
713 713
@@ -724,7 +724,7 @@ acpi_ev_gpe_dispatch(struct acpi_namespace_node *gpe_device,
724 if (ACPI_FAILURE(status)) { 724 if (ACPI_FAILURE(status)) {
725 ACPI_EXCEPTION((AE_INFO, status, 725 ACPI_EXCEPTION((AE_INFO, status,
726 "Unable to disable GPE%02X", gpe_number)); 726 "Unable to disable GPE%02X", gpe_number));
727 return_VALUE(ACPI_INTERRUPT_NOT_HANDLED); 727 return_UINT32(ACPI_INTERRUPT_NOT_HANDLED);
728 } 728 }
729 729
730 /* 730 /*
@@ -784,7 +784,7 @@ acpi_ev_gpe_dispatch(struct acpi_namespace_node *gpe_device,
784 break; 784 break;
785 } 785 }
786 786
787 return_VALUE(ACPI_INTERRUPT_HANDLED); 787 return_UINT32(ACPI_INTERRUPT_HANDLED);
788} 788}
789 789
790#endif /* !ACPI_REDUCED_HARDWARE */ 790#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/drivers/acpi/acpica/evsci.c b/drivers/acpi/acpica/evsci.c
index f4b43bede015..b905acf7aacd 100644
--- a/drivers/acpi/acpica/evsci.c
+++ b/drivers/acpi/acpica/evsci.c
@@ -89,7 +89,7 @@ static u32 ACPI_SYSTEM_XFACE acpi_ev_sci_xrupt_handler(void *context)
89 */ 89 */
90 interrupt_handled |= acpi_ev_gpe_detect(gpe_xrupt_list); 90 interrupt_handled |= acpi_ev_gpe_detect(gpe_xrupt_list);
91 91
92 return_VALUE(interrupt_handled); 92 return_UINT32(interrupt_handled);
93} 93}
94 94
95/******************************************************************************* 95/*******************************************************************************
@@ -120,7 +120,7 @@ u32 ACPI_SYSTEM_XFACE acpi_ev_gpe_xrupt_handler(void *context)
120 120
121 interrupt_handled |= acpi_ev_gpe_detect(gpe_xrupt_list); 121 interrupt_handled |= acpi_ev_gpe_detect(gpe_xrupt_list);
122 122
123 return_VALUE(interrupt_handled); 123 return_UINT32(interrupt_handled);
124} 124}
125 125
126/****************************************************************************** 126/******************************************************************************
diff --git a/drivers/acpi/acpica/exprep.c b/drivers/acpi/acpica/exprep.c
index d6eab81f54fb..6b728aef2dca 100644
--- a/drivers/acpi/acpica/exprep.c
+++ b/drivers/acpi/acpica/exprep.c
@@ -276,7 +276,7 @@ acpi_ex_decode_field_access(union acpi_operand_object *obj_desc,
276 /* Invalid field access type */ 276 /* Invalid field access type */
277 277
278 ACPI_ERROR((AE_INFO, "Unknown field access type 0x%X", access)); 278 ACPI_ERROR((AE_INFO, "Unknown field access type 0x%X", access));
279 return_VALUE(0); 279 return_UINT32(0);
280 } 280 }
281 281
282 if (obj_desc->common.type == ACPI_TYPE_BUFFER_FIELD) { 282 if (obj_desc->common.type == ACPI_TYPE_BUFFER_FIELD) {
@@ -289,7 +289,7 @@ acpi_ex_decode_field_access(union acpi_operand_object *obj_desc,
289 } 289 }
290 290
291 *return_byte_alignment = byte_alignment; 291 *return_byte_alignment = byte_alignment;
292 return_VALUE(bit_length); 292 return_UINT32(bit_length);
293} 293}
294 294
295/******************************************************************************* 295/*******************************************************************************
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index b205cbb4b50c..99dc7b287d55 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -340,7 +340,7 @@ static u32 acpi_ex_digits_needed(u64 value, u32 base)
340 /* u64 is unsigned, so we don't worry about a '-' prefix */ 340 /* u64 is unsigned, so we don't worry about a '-' prefix */
341 341
342 if (value == 0) { 342 if (value == 0) {
343 return_VALUE(1); 343 return_UINT32(1);
344 } 344 }
345 345
346 current_value = value; 346 current_value = value;
@@ -354,7 +354,7 @@ static u32 acpi_ex_digits_needed(u64 value, u32 base)
354 num_digits++; 354 num_digits++;
355 } 355 }
356 356
357 return_VALUE(num_digits); 357 return_UINT32(num_digits);
358} 358}
359 359
360/******************************************************************************* 360/*******************************************************************************
diff --git a/drivers/acpi/acpica/hwacpi.c b/drivers/acpi/acpica/hwacpi.c
index 9b02a9f5b04a..579c3a53ac87 100644
--- a/drivers/acpi/acpica/hwacpi.c
+++ b/drivers/acpi/acpica/hwacpi.c
@@ -155,7 +155,7 @@ u32 acpi_hw_get_mode(void)
155 /* If the Hardware Reduced flag is set, machine is always in acpi mode */ 155 /* If the Hardware Reduced flag is set, machine is always in acpi mode */
156 156
157 if (acpi_gbl_reduced_hardware) { 157 if (acpi_gbl_reduced_hardware) {
158 return_VALUE(ACPI_SYS_MODE_ACPI); 158 return_UINT32(ACPI_SYS_MODE_ACPI);
159 } 159 }
160 160
161 /* 161 /*
@@ -163,18 +163,18 @@ u32 acpi_hw_get_mode(void)
163 * system does not support mode transition. 163 * system does not support mode transition.
164 */ 164 */
165 if (!acpi_gbl_FADT.smi_command) { 165 if (!acpi_gbl_FADT.smi_command) {
166 return_VALUE(ACPI_SYS_MODE_ACPI); 166 return_UINT32(ACPI_SYS_MODE_ACPI);
167 } 167 }
168 168
169 status = acpi_read_bit_register(ACPI_BITREG_SCI_ENABLE, &value); 169 status = acpi_read_bit_register(ACPI_BITREG_SCI_ENABLE, &value);
170 if (ACPI_FAILURE(status)) { 170 if (ACPI_FAILURE(status)) {
171 return_VALUE(ACPI_SYS_MODE_LEGACY); 171 return_UINT32(ACPI_SYS_MODE_LEGACY);
172 } 172 }
173 173
174 if (value) { 174 if (value) {
175 return_VALUE(ACPI_SYS_MODE_ACPI); 175 return_UINT32(ACPI_SYS_MODE_ACPI);
176 } else { 176 } else {
177 return_VALUE(ACPI_SYS_MODE_LEGACY); 177 return_UINT32(ACPI_SYS_MODE_LEGACY);
178 } 178 }
179} 179}
180 180
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 686420df684f..2808586fad30 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -112,10 +112,10 @@ acpi_object_type acpi_ns_get_type(struct acpi_namespace_node * node)
112 112
113 if (!node) { 113 if (!node) {
114 ACPI_WARNING((AE_INFO, "Null Node parameter")); 114 ACPI_WARNING((AE_INFO, "Null Node parameter"));
115 return_VALUE(ACPI_TYPE_ANY); 115 return_UINT8(ACPI_TYPE_ANY);
116 } 116 }
117 117
118 return_VALUE(node->type); 118 return_UINT8(node->type);
119} 119}
120 120
121/******************************************************************************* 121/*******************************************************************************
@@ -140,10 +140,10 @@ u32 acpi_ns_local(acpi_object_type type)
140 /* Type code out of range */ 140 /* Type code out of range */
141 141
142 ACPI_WARNING((AE_INFO, "Invalid Object Type 0x%X", type)); 142 ACPI_WARNING((AE_INFO, "Invalid Object Type 0x%X", type));
143 return_VALUE(ACPI_NS_NORMAL); 143 return_UINT32(ACPI_NS_NORMAL);
144 } 144 }
145 145
146 return_VALUE(acpi_gbl_ns_properties[type] & ACPI_NS_LOCAL); 146 return_UINT32(acpi_gbl_ns_properties[type] & ACPI_NS_LOCAL);
147} 147}
148 148
149/******************************************************************************* 149/*******************************************************************************
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index f51308cdbc65..9f25a3d4e992 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -108,7 +108,7 @@ acpi_ps_get_next_package_length(struct acpi_parse_state *parser_state)
108 /* Byte 0 is a special case, either bits [0:3] or [0:5] are used */ 108 /* Byte 0 is a special case, either bits [0:3] or [0:5] are used */
109 109
110 package_length |= (aml[0] & byte_zero_mask); 110 package_length |= (aml[0] & byte_zero_mask);
111 return_VALUE(package_length); 111 return_UINT32(package_length);
112} 112}
113 113
114/******************************************************************************* 114/*******************************************************************************
diff --git a/drivers/acpi/acpica/utaddress.c b/drivers/acpi/acpica/utaddress.c
index 698b9d385516..e0a2e2779c2e 100644
--- a/drivers/acpi/acpica/utaddress.c
+++ b/drivers/acpi/acpica/utaddress.c
@@ -214,7 +214,7 @@ acpi_ut_check_address_range(acpi_adr_space_type space_id,
214 214
215 if ((space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) && 215 if ((space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
216 (space_id != ACPI_ADR_SPACE_SYSTEM_IO)) { 216 (space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
217 return_VALUE(0); 217 return_UINT32(0);
218 } 218 }
219 219
220 range_info = acpi_gbl_address_range_list[space_id]; 220 range_info = acpi_gbl_address_range_list[space_id];
@@ -256,7 +256,7 @@ acpi_ut_check_address_range(acpi_adr_space_type space_id,
256 range_info = range_info->next; 256 range_info = range_info->next;
257 } 257 }
258 258
259 return_VALUE(overlap_count); 259 return_UINT32(overlap_count);
260} 260}
261 261
262/******************************************************************************* 262/*******************************************************************************
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 9885276178e0..4f52ea795c7a 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -324,9 +324,9 @@
324 324
325/* Helper macro */ 325/* Helper macro */
326 326
327#define ACPI_TRACE_ENTRY(name, function, cast, param) \ 327#define ACPI_TRACE_ENTRY(name, function, type, param) \
328 ACPI_FUNCTION_NAME (name) \ 328 ACPI_FUNCTION_NAME (name) \
329 function (ACPI_DEBUG_PARAMETERS, cast (param)) 329 function (ACPI_DEBUG_PARAMETERS, (type) (param))
330 330
331/* The actual entry trace macros */ 331/* The actual entry trace macros */
332 332
@@ -335,13 +335,13 @@
335 acpi_ut_trace (ACPI_DEBUG_PARAMETERS) 335 acpi_ut_trace (ACPI_DEBUG_PARAMETERS)
336 336
337#define ACPI_FUNCTION_TRACE_PTR(name, pointer) \ 337#define ACPI_FUNCTION_TRACE_PTR(name, pointer) \
338 ACPI_TRACE_ENTRY (name, acpi_ut_trace_ptr, (void *), pointer) 338 ACPI_TRACE_ENTRY (name, acpi_ut_trace_ptr, void *, pointer)
339 339
340#define ACPI_FUNCTION_TRACE_U32(name, value) \ 340#define ACPI_FUNCTION_TRACE_U32(name, value) \
341 ACPI_TRACE_ENTRY (name, acpi_ut_trace_u32, (u32), value) 341 ACPI_TRACE_ENTRY (name, acpi_ut_trace_u32, u32, value)
342 342
343#define ACPI_FUNCTION_TRACE_STR(name, string) \ 343#define ACPI_FUNCTION_TRACE_STR(name, string) \
344 ACPI_TRACE_ENTRY (name, acpi_ut_trace_str, (char *), string) 344 ACPI_TRACE_ENTRY (name, acpi_ut_trace_str, char *, string)
345 345
346#define ACPI_FUNCTION_ENTRY() \ 346#define ACPI_FUNCTION_ENTRY() \
347 acpi_ut_track_stack_ptr() 347 acpi_ut_track_stack_ptr()
@@ -355,16 +355,37 @@
355 * 355 *
356 * One of the FUNCTION_TRACE macros above must be used in conjunction 356 * One of the FUNCTION_TRACE macros above must be used in conjunction
357 * with these macros so that "_AcpiFunctionName" is defined. 357 * with these macros so that "_AcpiFunctionName" is defined.
358 *
359 * There are two versions of most of the return macros. The default version is
360 * safer, since it avoids side-effects by guaranteeing that the argument will
361 * not be evaluated twice.
362 *
363 * A less-safe version of the macros is provided for optional use if the
364 * compiler uses excessive CPU stack (for example, this may happen in the
365 * debug case if code optimzation is disabled.)
358 */ 366 */
359 367
360/* Exit trace helper macro */ 368/* Exit trace helper macro */
361 369
362#define ACPI_TRACE_EXIT(function, cast, param) \ 370#ifndef ACPI_SIMPLE_RETURN_MACROS
371
372#define ACPI_TRACE_EXIT(function, type, param) \
373 ACPI_DO_WHILE0 ({ \
374 register type _param = (type) (param); \
375 function (ACPI_DEBUG_PARAMETERS, _param); \
376 return (_param); \
377 })
378
379#else /* Use original less-safe macros */
380
381#define ACPI_TRACE_EXIT(function, type, param) \
363 ACPI_DO_WHILE0 ({ \ 382 ACPI_DO_WHILE0 ({ \
364 function (ACPI_DEBUG_PARAMETERS, cast (param)); \ 383 function (ACPI_DEBUG_PARAMETERS, (type) (param)); \
365 return ((param)); \ 384 return (param); \
366 }) 385 })
367 386
387#endif /* ACPI_SIMPLE_RETURN_MACROS */
388
368/* The actual exit macros */ 389/* The actual exit macros */
369 390
370#define return_VOID \ 391#define return_VOID \
@@ -374,13 +395,19 @@
374 }) 395 })
375 396
376#define return_ACPI_STATUS(status) \ 397#define return_ACPI_STATUS(status) \
377 ACPI_TRACE_EXIT (acpi_ut_status_exit, (acpi_status), status) 398 ACPI_TRACE_EXIT (acpi_ut_status_exit, acpi_status, status)
378 399
379#define return_PTR(pointer) \ 400#define return_PTR(pointer) \
380 ACPI_TRACE_EXIT (acpi_ut_ptr_exit, (u8 *), pointer) 401 ACPI_TRACE_EXIT (acpi_ut_ptr_exit, void *, pointer)
381 402
382#define return_VALUE(value) \ 403#define return_VALUE(value) \
383 ACPI_TRACE_EXIT (acpi_ut_value_exit, (u64), value) 404 ACPI_TRACE_EXIT (acpi_ut_value_exit, u64, value)
405
406#define return_UINT32(value) \
407 ACPI_TRACE_EXIT (acpi_ut_value_exit, u32, value)
408
409#define return_UINT8(value) \
410 ACPI_TRACE_EXIT (acpi_ut_value_exit, u8, value)
384 411
385/* Conditional execution */ 412/* Conditional execution */
386 413
@@ -428,8 +455,10 @@
428 455
429#define return_VOID return 456#define return_VOID return
430#define return_ACPI_STATUS(s) return(s) 457#define return_ACPI_STATUS(s) return(s)
431#define return_VALUE(s) return(s)
432#define return_PTR(s) return(s) 458#define return_PTR(s) return(s)
459#define return_VALUE(s) return(s)
460#define return_UINT8(s) return(s)
461#define return_UINT32(s) return(s)
433 462
434#endif /* ACPI_DEBUG_OUTPUT */ 463#endif /* ACPI_DEBUG_OUTPUT */
435 464
3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782
/*
 * kernel/workqueue.c - generic async execution with shared worker pool
 *
 * Copyright (C) 2002		Ingo Molnar
 *
 *   Derived from the taskqueue/keventd code by:
 *     David Woodhouse <dwmw2@infradead.org>
 *     Andrew Morton
 *     Kai Petzke <wpp@marie.physik.tu-berlin.de>
 *     Theodore Ts'o <tytso@mit.edu>
 *
 * Made to use alloc_percpu by Christoph Lameter.
 *
 * Copyright (C) 2010		SUSE Linux Products GmbH
 * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
 *
 * This is the generic async execution mechanism.  Work items as are
 * executed in process context.  The worker pool is shared and
 * automatically managed.  There is one worker pool for each CPU and
 * one extra for works which are better served by workers which are
 * not bound to any specific CPU.
 *
 * Please read Documentation/workqueue.txt for details.
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
#include <linux/hardirq.h>
#include <linux/mempolicy.h>
#include <linux/freezer.h>
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/idr.h>

#include "workqueue_sched.h"

enum {
	/* global_cwq flags */
	GCWQ_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
	GCWQ_MANAGING_WORKERS	= 1 << 1,	/* managing workers */
	GCWQ_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
	GCWQ_FREEZING		= 1 << 3,	/* freeze in progress */
	GCWQ_HIGHPRI_PENDING	= 1 << 4,	/* highpri works on queue */

	/* worker flags */
	WORKER_STARTED		= 1 << 0,	/* started */
	WORKER_DIE		= 1 << 1,	/* die die die */
	WORKER_IDLE		= 1 << 2,	/* is idle */
	WORKER_PREP		= 1 << 3,	/* preparing to run works */
	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
	WORKER_CPU_INTENSIVE	= 1 << 6,	/* cpu intensive */
	WORKER_UNBOUND		= 1 << 7,	/* worker is unbound */

	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
				  WORKER_CPU_INTENSIVE | WORKER_UNBOUND,

	/* gcwq->trustee_state */
	TRUSTEE_START		= 0,		/* start */
	TRUSTEE_IN_CHARGE	= 1,		/* trustee in charge of gcwq */
	TRUSTEE_BUTCHER		= 2,		/* butcher workers */
	TRUSTEE_RELEASE		= 3,		/* release workers */
	TRUSTEE_DONE		= 4,		/* trustee is done */

	BUSY_WORKER_HASH_ORDER	= 6,		/* 64 pointers */
	BUSY_WORKER_HASH_SIZE	= 1 << BUSY_WORKER_HASH_ORDER,
	BUSY_WORKER_HASH_MASK	= BUSY_WORKER_HASH_SIZE - 1,

	MAX_IDLE_WORKERS_RATIO	= 4,		/* 1/4 of busy can be idle */
	IDLE_WORKER_TIMEOUT	= 300 * HZ,	/* keep idle ones for 5 mins */

	MAYDAY_INITIAL_TIMEOUT  = HZ / 100 >= 2 ? HZ / 100 : 2,
						/* call for help after 10ms
						   (min two ticks) */
	MAYDAY_INTERVAL		= HZ / 10,	/* and then every 100ms */
	CREATE_COOLDOWN		= HZ,		/* time to breath after fail */
	TRUSTEE_COOLDOWN	= HZ / 10,	/* for trustee draining */

	/*
	 * Rescue workers are used only on emergencies and shared by
	 * all cpus.  Give -20.
	 */
	RESCUER_NICE_LEVEL	= -20,
};

/*
 * Structure fields follow one of the following exclusion rules.
 *
 * I: Modifiable by initialization/destruction paths and read-only for
 *    everyone else.
 *
 * P: Preemption protected.  Disabling preemption is enough and should
 *    only be modified and accessed from the local cpu.
 *
 * L: gcwq->lock protected.  Access with gcwq->lock held.
 *
 * X: During normal operation, modification requires gcwq->lock and
 *    should be done only from local cpu.  Either disabling preemption
 *    on local cpu or grabbing gcwq->lock is enough for read access.
 *    If GCWQ_DISASSOCIATED is set, it's identical to L.
 *
 * F: wq->flush_mutex protected.
 *
 * W: workqueue_lock protected.
 */

struct global_cwq;

/*
 * The poor guys doing the actual heavy lifting.  All on-duty workers
 * are either serving the manager role, on idle list or on busy hash.
 */
struct worker {
	/* on idle list while idle, on busy hash table while busy */
	union {
		struct list_head	entry;	/* L: while idle */
		struct hlist_node	hentry;	/* L: while busy */
	};

	struct work_struct	*current_work;	/* L: work being processed */
	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
	struct list_head	scheduled;	/* L: scheduled works */
	struct task_struct	*task;		/* I: worker task */
	struct global_cwq	*gcwq;		/* I: the associated gcwq */
	/* 64 bytes boundary on 64bit, 32 on 32bit */
	unsigned long		last_active;	/* L: last active timestamp */
	unsigned int		flags;		/* X: flags */
	int			id;		/* I: worker id */
	struct work_struct	rebind_work;	/* L: rebind worker to cpu */
};

/*
 * Global per-cpu workqueue.  There's one and only one for each cpu
 * and all works are queued and processed here regardless of their
 * target workqueues.
 */
struct global_cwq {
	spinlock_t		lock;		/* the gcwq lock */
	struct list_head	worklist;	/* L: list of pending works */
	unsigned int		cpu;		/* I: the associated cpu */
	unsigned int		flags;		/* L: GCWQ_* flags */

	int			nr_workers;	/* L: total number of workers */
	int			nr_idle;	/* L: currently idle ones */

	/* workers are chained either in the idle_list or busy_hash */
	struct list_head	idle_list;	/* X: list of idle workers */
	struct hlist_head	busy_hash[BUSY_WORKER_HASH_SIZE];
						/* L: hash of busy workers */

	struct timer_list	idle_timer;	/* L: worker idle timeout */
	struct timer_list	mayday_timer;	/* L: SOS timer for dworkers */

	struct ida		worker_ida;	/* L: for worker IDs */

	struct task_struct	*trustee;	/* L: for gcwq shutdown */
	unsigned int		trustee_state;	/* L: trustee state */
	wait_queue_head_t	trustee_wait;	/* trustee wait */
	struct worker		*first_idle;	/* L: first idle worker */
} ____cacheline_aligned_in_smp;

/*
 * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
 * work_struct->data are used for flags and thus cwqs need to be
 * aligned at two's power of the number of flag bits.
 */
struct cpu_workqueue_struct {
	struct global_cwq	*gcwq;		/* I: the associated gcwq */
	struct workqueue_struct *wq;		/* I: the owning workqueue */
	int			work_color;	/* L: current color */
	int			flush_color;	/* L: flushing color */
	int			nr_in_flight[WORK_NR_COLORS];
						/* L: nr of in_flight works */
	int			nr_active;	/* L: nr of active works */
	int			max_active;	/* L: max active works */
	struct list_head	delayed_works;	/* L: delayed works */
};

/*
 * Structure used to wait for workqueue flush.
 */
struct wq_flusher {
	struct list_head	list;		/* F: list of flushers */
	int			flush_color;	/* F: flush color waiting for */
	struct completion	done;		/* flush completion */
};

/*
 * All cpumasks are assumed to be always set on UP and thus can't be
 * used to determine whether there's something to be done.
 */
#ifdef CONFIG_SMP
typedef cpumask_var_t mayday_mask_t;
#define mayday_test_and_set_cpu(cpu, mask)	\
	cpumask_test_and_set_cpu((cpu), (mask))
#define mayday_clear_cpu(cpu, mask)		cpumask_clear_cpu((cpu), (mask))
#define for_each_mayday_cpu(cpu, mask)		for_each_cpu((cpu), (mask))
#define alloc_mayday_mask(maskp, gfp)		zalloc_cpumask_var((maskp), (gfp))
#define free_mayday_mask(mask)			free_cpumask_var((mask))
#else
typedef unsigned long mayday_mask_t;
#define mayday_test_and_set_cpu(cpu, mask)	test_and_set_bit(0, &(mask))
#define mayday_clear_cpu(cpu, mask)		clear_bit(0, &(mask))
#define for_each_mayday_cpu(cpu, mask)		if ((cpu) = 0, (mask))
#define alloc_mayday_mask(maskp, gfp)		true
#define free_mayday_mask(mask)			do { } while (0)
#endif

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct {
	unsigned int		flags;		/* I: WQ_* flags */
	union {
		struct cpu_workqueue_struct __percpu	*pcpu;
		struct cpu_workqueue_struct		*single;
		unsigned long				v;
	} cpu_wq;				/* I: cwq's */
	struct list_head	list;		/* W: list of all workqueues */

	struct mutex		flush_mutex;	/* protects wq flushing */
	int			work_color;	/* F: current work color */
	int			flush_color;	/* F: current flush color */
	atomic_t		nr_cwqs_to_flush; /* flush in progress */
	struct wq_flusher	*first_flusher;	/* F: first flusher */
	struct list_head	flusher_queue;	/* F: flush waiters */
	struct list_head	flusher_overflow; /* F: flush overflow list */

	mayday_mask_t		mayday_mask;	/* cpus requesting rescue */
	struct worker		*rescuer;	/* I: rescue worker */

	int			saved_max_active; /* W: saved cwq max_active */
	const char		*name;		/* I: workqueue name */
#ifdef CONFIG_LOCKDEP
	struct lockdep_map	lockdep_map;
#endif
};

struct workqueue_struct *system_wq __read_mostly;
struct workqueue_struct *system_long_wq __read_mostly;
struct workqueue_struct *system_nrt_wq __read_mostly;
struct workqueue_struct *system_unbound_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_wq);
EXPORT_SYMBOL_GPL(system_long_wq);
EXPORT_SYMBOL_GPL(system_nrt_wq);
EXPORT_SYMBOL_GPL(system_unbound_wq);

#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>

#define for_each_busy_worker(worker, i, pos, gcwq)			\
	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)

static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
				  unsigned int sw)
{
	if (cpu < nr_cpu_ids) {
		if (sw & 1) {
			cpu = cpumask_next(cpu, mask);
			if (cpu < nr_cpu_ids)
				return cpu;
		}
		if (sw & 2)
			return WORK_CPU_UNBOUND;
	}
	return WORK_CPU_NONE;
}

static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
				struct workqueue_struct *wq)
{
	return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
}

/*
 * CPU iterators
 *
 * An extra gcwq is defined for an invalid cpu number
 * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
 * specific CPU.  The following iterators are similar to
 * for_each_*_cpu() iterators but also considers the unbound gcwq.
 *
 * for_each_gcwq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
 * for_each_online_gcwq_cpu()	: online CPUs + WORK_CPU_UNBOUND
 * for_each_cwq_cpu()		: possible CPUs for bound workqueues,
 *				  WORK_CPU_UNBOUND for unbound workqueues
 */
#define for_each_gcwq_cpu(cpu)						\
	for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3);		\
	     (cpu) < WORK_CPU_NONE;					\
	     (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3))

#define for_each_online_gcwq_cpu(cpu)					\
	for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3);		\
	     (cpu) < WORK_CPU_NONE;					\
	     (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3))

#define for_each_cwq_cpu(cpu, wq)					\
	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq));	\
	     (cpu) < WORK_CPU_NONE;					\
	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))

#ifdef CONFIG_DEBUG_OBJECTS_WORK

static struct debug_obj_descr work_debug_descr;

/*
 * fixup_init is called when:
 * - an active object is initialized
 */
static int work_fixup_init(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_init(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

/*
 * fixup_activate is called when:
 * - an active object is activated
 * - an unknown object is activated (might be a statically initialized object)
 */
static int work_fixup_activate(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {

	case ODEBUG_STATE_NOTAVAILABLE:
		/*
		 * This is not really a fixup. The work struct was
		 * statically initialized. We just make sure that it
		 * is tracked in the object tracker.
		 */
		if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
			debug_object_init(work, &work_debug_descr);
			debug_object_activate(work, &work_debug_descr);
			return 0;
		}
		WARN_ON_ONCE(1);
		return 0;

	case ODEBUG_STATE_ACTIVE:
		WARN_ON(1);

	default:
		return 0;
	}
}

/*
 * fixup_free is called when:
 * - an active object is freed
 */
static int work_fixup_free(void *addr, enum debug_obj_state state)
{
	struct work_struct *work = addr;

	switch (state) {
	case ODEBUG_STATE_ACTIVE:
		cancel_work_sync(work);
		debug_object_free(work, &work_debug_descr);
		return 1;
	default:
		return 0;
	}
}

static struct debug_obj_descr work_debug_descr = {
	.name		= "work_struct",
	.fixup_init	= work_fixup_init,
	.fixup_activate	= work_fixup_activate,
	.fixup_free	= work_fixup_free,
};

static inline void debug_work_activate(struct work_struct *work)
{
	debug_object_activate(work, &work_debug_descr);
}

static inline void debug_work_deactivate(struct work_struct *work)
{
	debug_object_deactivate(work, &work_debug_descr);
}

void __init_work(struct work_struct *work, int onstack)
{
	if (onstack)
		debug_object_init_on_stack(work, &work_debug_descr);
	else
		debug_object_init(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(__init_work);

void destroy_work_on_stack(struct work_struct *work)
{
	debug_object_free(work, &work_debug_descr);
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);

#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif

/* Serializes the accesses to the list of workqueues. */
static DEFINE_SPINLOCK(workqueue_lock);
static LIST_HEAD(workqueues);
static bool workqueue_freezing;		/* W: have wqs started freezing? */

/*
 * The almighty global cpu workqueues.  nr_running is the only field
 * which is expected to be used frequently by other cpus via
 * try_to_wake_up().  Put it in a separate cacheline.
 */
static DEFINE_PER_CPU(struct global_cwq, global_cwq);
static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);

/*
 * Global cpu workqueue and nr_running counter for unbound gcwq.  The
 * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its
 * workers have WORKER_UNBOUND set.
 */
static struct global_cwq unbound_global_cwq;
static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0);	/* always 0 */

static int worker_thread(void *__worker);

static struct global_cwq *get_gcwq(unsigned int cpu)
{
	if (cpu != WORK_CPU_UNBOUND)
		return &per_cpu(global_cwq, cpu);
	else
		return &unbound_global_cwq;
}

static atomic_t *get_gcwq_nr_running(unsigned int cpu)
{
	if (cpu != WORK_CPU_UNBOUND)
		return &per_cpu(gcwq_nr_running, cpu);
	else
		return &unbound_gcwq_nr_running;
}

static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
					    struct workqueue_struct *wq)
{
	if (!(wq->flags & WQ_UNBOUND)) {
		if (likely(cpu < nr_cpu_ids)) {
#ifdef CONFIG_SMP
			return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
#else
			return wq->cpu_wq.single;
#endif
		}
	} else if (likely(cpu == WORK_CPU_UNBOUND))
		return wq->cpu_wq.single;
	return NULL;
}

static unsigned int work_color_to_flags(int color)
{
	return color << WORK_STRUCT_COLOR_SHIFT;
}

static int get_work_color(struct work_struct *work)
{
	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
		((1 << WORK_STRUCT_COLOR_BITS) - 1);
}

static int work_next_color(int color)
{
	return (color + 1) % WORK_NR_COLORS;
}

/*
 * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
 * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
 * cleared and the work data contains the cpu number it was last on.
 *
 * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
 * cwq, cpu or clear work->data.  These functions should only be
 * called while the work is owned - ie. while the PENDING bit is set.
 *
 * get_work_[g]cwq() can be used to obtain the gcwq or cwq
 * corresponding to a work.  gcwq is available once the work has been
 * queued anywhere after initialization.  cwq is available only from
 * queueing until execution starts.
 */
static inline void set_work_data(struct work_struct *work, unsigned long data,
				 unsigned long flags)
{
	BUG_ON(!work_pending(work));
	atomic_long_set(&work->data, data | flags | work_static(work));
}

static void set_work_cwq(struct work_struct *work,
			 struct cpu_workqueue_struct *cwq,
			 unsigned long extra_flags)
{
	set_work_data(work, (unsigned long)cwq,
		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
}

static void set_work_cpu(struct work_struct *work, unsigned int cpu)
{
	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
}

static void clear_work_data(struct work_struct *work)
{
	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
}

static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
{
	unsigned long data = atomic_long_read(&work->data);

	if (data & WORK_STRUCT_CWQ)
		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
	else
		return NULL;
}

static struct global_cwq *get_work_gcwq(struct work_struct *work)
{
	unsigned long data = atomic_long_read(&work->data);
	unsigned int cpu;

	if (data & WORK_STRUCT_CWQ)
		return ((struct cpu_workqueue_struct *)
			(data & WORK_STRUCT_WQ_DATA_MASK))->gcwq;

	cpu = data >> WORK_STRUCT_FLAG_BITS;
	if (cpu == WORK_CPU_NONE)
		return NULL;

	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
	return get_gcwq(cpu);
}

/*
 * Policy functions.  These define the policies on how the global
 * worker pool is managed.  Unless noted otherwise, these functions
 * assume that they're being called with gcwq->lock held.
 */

static bool __need_more_worker(struct global_cwq *gcwq)
{
	return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) ||
		gcwq->flags & GCWQ_HIGHPRI_PENDING;
}

/*
 * Need to wake up a worker?  Called from anything but currently
 * running workers.
 */
static bool need_more_worker(struct global_cwq *gcwq)
{
	return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq);
}

/* Can I start working?  Called from busy but !running workers. */
static bool may_start_working(struct global_cwq *gcwq)
{
	return gcwq->nr_idle;
}

/* Do I need to keep working?  Called from currently running workers. */
static bool keep_working(struct global_cwq *gcwq)
{
	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);

	return !list_empty(&gcwq->worklist) &&
		(atomic_read(nr_running) <= 1 ||
		 gcwq->flags & GCWQ_HIGHPRI_PENDING);
}

/* Do we need a new worker?  Called from manager. */
static bool need_to_create_worker(struct global_cwq *gcwq)
{
	return need_more_worker(gcwq) && !may_start_working(gcwq);
}

/* Do I need to be the manager? */
static bool need_to_manage_workers(struct global_cwq *gcwq)
{
	return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS;
}

/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct global_cwq *gcwq)
{
	bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS;
	int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */
	int nr_busy = gcwq->nr_workers - nr_idle;

	return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
}

/*
 * Wake up functions.
 */

/* Return the first worker.  Safe with preemption disabled */
static struct worker *first_worker(struct global_cwq *gcwq)
{
	if (unlikely(list_empty(&gcwq->idle_list)))
		return NULL;

	return list_first_entry(&gcwq->idle_list, struct worker, entry);
}

/**
 * wake_up_worker - wake up an idle worker
 * @gcwq: gcwq to wake worker for
 *
 * Wake up the first idle worker of @gcwq.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock).
 */
static void wake_up_worker(struct global_cwq *gcwq)
{
	struct worker *worker = first_worker(gcwq);

	if (likely(worker))
		wake_up_process(worker->task);
}

/**
 * wq_worker_waking_up - a worker is waking up
 * @task: task waking up
 * @cpu: CPU @task is waking up to
 *
 * This function is called during try_to_wake_up() when a worker is
 * being awoken.
 *
 * CONTEXT:
 * spin_lock_irq(rq->lock)
 */
void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
{
	struct worker *worker = kthread_data(task);

	if (!(worker->flags & WORKER_NOT_RUNNING))
		atomic_inc(get_gcwq_nr_running(cpu));
}

/**
 * wq_worker_sleeping - a worker is going to sleep
 * @task: task going to sleep
 * @cpu: CPU in question, must be the current CPU number
 *
 * This function is called during schedule() when a busy worker is
 * going to sleep.  Worker on the same cpu can be woken up by
 * returning pointer to its task.
 *
 * CONTEXT:
 * spin_lock_irq(rq->lock)
 *
 * RETURNS:
 * Worker task on @cpu to wake up, %NULL if none.
 */
struct task_struct *wq_worker_sleeping(struct task_struct *task,
				       unsigned int cpu)
{
	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
	struct global_cwq *gcwq = get_gcwq(cpu);
	atomic_t *nr_running = get_gcwq_nr_running(cpu);

	if (worker->flags & WORKER_NOT_RUNNING)
		return NULL;

	/* this can only happen on the local cpu */
	BUG_ON(cpu != raw_smp_processor_id());

	/*
	 * The counterpart of the following dec_and_test, implied mb,
	 * worklist not empty test sequence is in insert_work().
	 * Please read comment there.
	 *
	 * NOT_RUNNING is clear.  This means that trustee is not in
	 * charge and we're running on the local cpu w/ rq lock held
	 * and preemption disabled, which in turn means that none else
	 * could be manipulating idle_list, so dereferencing idle_list
	 * without gcwq lock is safe.
	 */
	if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist))
		to_wakeup = first_worker(gcwq);
	return to_wakeup ? to_wakeup->task : NULL;
}

/**
 * worker_set_flags - set worker flags and adjust nr_running accordingly
 * @worker: self
 * @flags: flags to set
 * @wakeup: wakeup an idle worker if necessary
 *
 * Set @flags in @worker->flags and adjust nr_running accordingly.  If
 * nr_running becomes zero and @wakeup is %true, an idle worker is
 * woken up.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock)
 */
static inline void worker_set_flags(struct worker *worker, unsigned int flags,
				    bool wakeup)
{
	struct global_cwq *gcwq = worker->gcwq;

	WARN_ON_ONCE(worker->task != current);

	/*
	 * If transitioning into NOT_RUNNING, adjust nr_running and
	 * wake up an idle worker as necessary if requested by
	 * @wakeup.
	 */
	if ((flags & WORKER_NOT_RUNNING) &&
	    !(worker->flags & WORKER_NOT_RUNNING)) {
		atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);

		if (wakeup) {
			if (atomic_dec_and_test(nr_running) &&
			    !list_empty(&gcwq->worklist))
				wake_up_worker(gcwq);
		} else
			atomic_dec(nr_running);
	}

	worker->flags |= flags;
}

/**
 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
 * @worker: self
 * @flags: flags to clear
 *
 * Clear @flags in @worker->flags and adjust nr_running accordingly.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock)
 */
static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
{
	struct global_cwq *gcwq = worker->gcwq;
	unsigned int oflags = worker->flags;

	WARN_ON_ONCE(worker->task != current);

	worker->flags &= ~flags;

	/*
	 * If transitioning out of NOT_RUNNING, increment nr_running.  Note
	 * that the nested NOT_RUNNING is not a noop.  NOT_RUNNING is mask
	 * of multiple flags, not a single flag.
	 */
	if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
		if (!(worker->flags & WORKER_NOT_RUNNING))
			atomic_inc(get_gcwq_nr_running(gcwq->cpu));
}

/**
 * busy_worker_head - return the busy hash head for a work
 * @gcwq: gcwq of interest
 * @work: work to be hashed
 *
 * Return hash head of @gcwq for @work.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock).
 *
 * RETURNS:
 * Pointer to the hash head.
 */
static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
					   struct work_struct *work)
{
	const int base_shift = ilog2(sizeof(struct work_struct));
	unsigned long v = (unsigned long)work;

	/* simple shift and fold hash, do we need something better? */
	v >>= base_shift;
	v += v >> BUSY_WORKER_HASH_ORDER;
	v &= BUSY_WORKER_HASH_MASK;

	return &gcwq->busy_hash[v];
}

/**
 * __find_worker_executing_work - find worker which is executing a work
 * @gcwq: gcwq of interest
 * @bwh: hash head as returned by busy_worker_head()
 * @work: work to find worker for
 *
 * Find a worker which is executing @work on @gcwq.  @bwh should be
 * the hash head obtained by calling busy_worker_head() with the same
 * work.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock).
 *
 * RETURNS:
 * Pointer to worker which is executing @work if found, NULL
 * otherwise.
 */
static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
						   struct hlist_head *bwh,
						   struct work_struct *work)
{
	struct worker *worker;
	struct hlist_node *tmp;

	hlist_for_each_entry(worker, tmp, bwh, hentry)
		if (worker->current_work == work)
			return worker;
	return NULL;
}

/**
 * find_worker_executing_work - find worker which is executing a work
 * @gcwq: gcwq of interest
 * @work: work to find worker for
 *
 * Find a worker which is executing @work on @gcwq.  This function is
 * identical to __find_worker_executing_work() except that this
 * function calculates @bwh itself.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock).
 *
 * RETURNS:
 * Pointer to worker which is executing @work if found, NULL
 * otherwise.
 */
static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
						 struct work_struct *work)
{
	return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
					    work);
}

/**
 * gcwq_determine_ins_pos - find insertion position
 * @gcwq: gcwq of interest
 * @cwq: cwq a work is being queued for
 *
 * A work for @cwq is about to be queued on @gcwq, determine insertion
 * position for the work.  If @cwq is for HIGHPRI wq, the work is
 * queued at the head of the queue but in FIFO order with respect to
 * other HIGHPRI works; otherwise, at the end of the queue.  This
 * function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that
 * there are HIGHPRI works pending.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock).
 *
 * RETURNS:
 * Pointer to inserstion position.
 */
static inline struct list_head *gcwq_determine_ins_pos(struct global_cwq *gcwq,
					       struct cpu_workqueue_struct *cwq)
{
	struct work_struct *twork;

	if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
		return &gcwq->worklist;

	list_for_each_entry(twork, &gcwq->worklist, entry) {
		struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);

		if (!(tcwq->wq->flags & WQ_HIGHPRI))
			break;
	}

	gcwq->flags |= GCWQ_HIGHPRI_PENDING;
	return &twork->entry;
}

/**
 * insert_work - insert a work into gcwq
 * @cwq: cwq @work belongs to
 * @work: work to insert
 * @head: insertion point
 * @extra_flags: extra WORK_STRUCT_* flags to set
 *
 * Insert @work which belongs to @cwq into @gcwq after @head.
 * @extra_flags is or'd to work_struct flags.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock).
 */
static void insert_work(struct cpu_workqueue_struct *cwq,
			struct work_struct *work, struct list_head *head,
			unsigned int extra_flags)
{
	struct global_cwq *gcwq = cwq->gcwq;

	/* we own @work, set data and link */
	set_work_cwq(work, cwq, extra_flags);

	/*
	 * Ensure that we get the right work->data if we see the
	 * result of list_add() below, see try_to_grab_pending().
	 */
	smp_wmb();

	list_add_tail(&work->entry, head);

	/*
	 * Ensure either worker_sched_deactivated() sees the above
	 * list_add_tail() or we see zero nr_running to avoid workers
	 * lying around lazily while there are works to be processed.
	 */
	smp_mb();

	if (__need_more_worker(gcwq))
		wake_up_worker(gcwq);
}

/*
 * Test whether @work is being queued from another work executing on the
 * same workqueue.  This is rather expensive and should only be used from
 * cold paths.
 */
static bool is_chained_work(struct workqueue_struct *wq)
{
	unsigned long flags;
	unsigned int cpu;

	for_each_gcwq_cpu(cpu) {
		struct global_cwq *gcwq = get_gcwq(cpu);
		struct worker *worker;
		struct hlist_node *pos;
		int i;

		spin_lock_irqsave(&gcwq->lock, flags);
		for_each_busy_worker(worker, i, pos, gcwq) {
			if (worker->task != current)
				continue;
			spin_unlock_irqrestore(&gcwq->lock, flags);
			/*
			 * I'm @worker, no locking necessary.  See if @work
			 * is headed to the same workqueue.
			 */
			return worker->current_cwq->wq == wq;
		}
		spin_unlock_irqrestore(&gcwq->lock, flags);
	}
	return false;
}

static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
			 struct work_struct *work)
{
	struct global_cwq *gcwq;
	struct cpu_workqueue_struct *cwq;
	struct list_head *worklist;
	unsigned int work_flags;
	unsigned long flags;

	debug_work_activate(work);

	/* if dying, only works from the same workqueue are allowed */
	if (unlikely(wq->flags & WQ_DYING) &&
	    WARN_ON_ONCE(!is_chained_work(wq)))
		return;

	/* determine gcwq to use */
	if (!(wq->flags & WQ_UNBOUND)) {
		struct global_cwq *last_gcwq;

		if (unlikely(cpu == WORK_CPU_UNBOUND))
			cpu = raw_smp_processor_id();

		/*
		 * It's multi cpu.  If @wq is non-reentrant and @work
		 * was previously on a different cpu, it might still
		 * be running there, in which case the work needs to
		 * be queued on that cpu to guarantee non-reentrance.
		 */
		gcwq = get_gcwq(cpu);
		if (wq->flags & WQ_NON_REENTRANT &&
		    (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
			struct worker *worker;

			spin_lock_irqsave(&last_gcwq->lock, flags);

			worker = find_worker_executing_work(last_gcwq, work);

			if (worker && worker->current_cwq->wq == wq)
				gcwq = last_gcwq;
			else {
				/* meh... not running there, queue here */
				spin_unlock_irqrestore(&last_gcwq->lock, flags);
				spin_lock_irqsave(&gcwq->lock, flags);
			}
		} else
			spin_lock_irqsave(&gcwq->lock, flags);
	} else {
		gcwq = get_gcwq(WORK_CPU_UNBOUND);
		spin_lock_irqsave(&gcwq->lock, flags);
	}

	/* gcwq determined, get cwq and queue */
	cwq = get_cwq(gcwq->cpu, wq);
	trace_workqueue_queue_work(cpu, cwq, work);

	BUG_ON(!list_empty(&work->entry));

	cwq->nr_in_flight[cwq->work_color]++;
	work_flags = work_color_to_flags(cwq->work_color);

	if (likely(cwq->nr_active < cwq->max_active)) {
		trace_workqueue_activate_work(work);
		cwq->nr_active++;
		worklist = gcwq_determine_ins_pos(gcwq, cwq);
	} else {
		work_flags |= WORK_STRUCT_DELAYED;
		worklist = &cwq->delayed_works;
	}

	insert_work(cwq, work, worklist, work_flags);

	spin_unlock_irqrestore(&gcwq->lock, flags);
}

/**
 * queue_work - queue work on a workqueue
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to the CPU on which it was submitted, but if the CPU dies
 * it can be processed by another CPU.
 */
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
	int ret;

	ret = queue_work_on(get_cpu(), wq, work);
	put_cpu();

	return ret;
}
EXPORT_SYMBOL_GPL(queue_work);

/**
 * queue_work_on - queue work on specific cpu
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @work: work to queue
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 *
 * We queue the work to a specific CPU, the caller must ensure it
 * can't go away.
 */
int
queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
{
	int ret = 0;

	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
		__queue_work(cpu, wq, work);
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_work_on);

static void delayed_work_timer_fn(unsigned long __data)
{
	struct delayed_work *dwork = (struct delayed_work *)__data;
	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);

	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
}

/**
 * queue_delayed_work - queue work on a workqueue after delay
 * @wq: workqueue to use
 * @dwork: delayable work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 */
int queue_delayed_work(struct workqueue_struct *wq,
			struct delayed_work *dwork, unsigned long delay)
{
	if (delay == 0)
		return queue_work(wq, &dwork->work);

	return queue_delayed_work_on(-1, wq, dwork, delay);
}
EXPORT_SYMBOL_GPL(queue_delayed_work);

/**
 * queue_delayed_work_on - queue work on specific CPU after delay
 * @cpu: CPU number to execute work on
 * @wq: workqueue to use
 * @dwork: work to queue
 * @delay: number of jiffies to wait before queueing
 *
 * Returns 0 if @work was already on a queue, non-zero otherwise.
 */
int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
			struct delayed_work *dwork, unsigned long delay)
{
	int ret = 0;
	struct timer_list *timer = &dwork->timer;
	struct work_struct *work = &dwork->work;

	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
		unsigned int lcpu;

		BUG_ON(timer_pending(timer));
		BUG_ON(!list_empty(&work->entry));

		timer_stats_timer_set_start_info(&dwork->timer);

		/*
		 * This stores cwq for the moment, for the timer_fn.
		 * Note that the work's gcwq is preserved to allow
		 * reentrance detection for delayed works.
		 */
		if (!(wq->flags & WQ_UNBOUND)) {
			struct global_cwq *gcwq = get_work_gcwq(work);

			if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
				lcpu = gcwq->cpu;
			else
				lcpu = raw_smp_processor_id();
		} else
			lcpu = WORK_CPU_UNBOUND;

		set_work_cwq(work, get_cwq(lcpu, wq), 0);

		timer->expires = jiffies + delay;
		timer->data = (unsigned long)dwork;
		timer->function = delayed_work_timer_fn;

		if (unlikely(cpu >= 0))
			add_timer_on(timer, cpu);
		else
			add_timer(timer);
		ret = 1;
	}
	return ret;
}
EXPORT_SYMBOL_GPL(queue_delayed_work_on);

/**
 * worker_enter_idle - enter idle state
 * @worker: worker which is entering idle state
 *
 * @worker is entering idle state.  Update stats and idle timer if
 * necessary.
 *
 * LOCKING:
 * spin_lock_irq(gcwq->lock).
 */
static void worker_enter_idle(struct worker *worker)
{
	struct global_cwq *gcwq = worker->gcwq;

	BUG_ON(worker->flags & WORKER_IDLE);
	BUG_ON(!list_empty(&worker->entry) &&
	       (worker->hentry.next || worker->hentry.pprev));

	/* can't use worker_set_flags(), also called from start_worker() */
	worker->flags |= WORKER_IDLE;
	gcwq->nr_idle++;
	worker->last_active = jiffies;

	/* idle_list is LIFO */
	list_add(&worker->entry, &gcwq->idle_list);

	if (likely(!(worker->flags & WORKER_ROGUE))) {
		if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer))
			mod_timer(&gcwq->idle_timer,
				  jiffies + IDLE_WORKER_TIMEOUT);
	} else
		wake_up_all(&gcwq->trustee_wait);

	/* sanity check nr_running */
	WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle &&
		     atomic_read(get_gcwq_nr_running(gcwq->cpu)));
}

/**
 * worker_leave_idle - leave idle state
 * @worker: worker which is leaving idle state
 *
 * @worker is leaving idle state.  Update stats.
 *
 * LOCKING:
 * spin_lock_irq(gcwq->lock).
 */
static void worker_leave_idle(struct worker *worker)
{
	struct global_cwq *gcwq = worker->gcwq;

	BUG_ON(!(worker->flags & WORKER_IDLE));
	worker_clr_flags(worker, WORKER_IDLE);
	gcwq->nr_idle--;
	list_del_init(&worker->entry);
}

/**
 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
 * @worker: self
 *
 * Works which are scheduled while the cpu is online must at least be
 * scheduled to a worker which is bound to the cpu so that if they are
 * flushed from cpu callbacks while cpu is going down, they are
 * guaranteed to execute on the cpu.
 *
 * This function is to be used by rogue workers and rescuers to bind
 * themselves to the target cpu and may race with cpu going down or
 * coming online.  kthread_bind() can't be used because it may put the
 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
 * verbatim as it's best effort and blocking and gcwq may be
 * [dis]associated in the meantime.
 *
 * This function tries set_cpus_allowed() and locks gcwq and verifies
 * the binding against GCWQ_DISASSOCIATED which is set during
 * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters
 * idle state or fetches works without dropping lock, it can guarantee
 * the scheduling requirement described in the first paragraph.
 *
 * CONTEXT:
 * Might sleep.  Called without any lock but returns with gcwq->lock
 * held.
 *
 * RETURNS:
 * %true if the associated gcwq is online (@worker is successfully
 * bound), %false if offline.
 */
static bool worker_maybe_bind_and_lock(struct worker *worker)
__acquires(&gcwq->lock)
{
	struct global_cwq *gcwq = worker->gcwq;
	struct task_struct *task = worker->task;

	while (true) {
		/*
		 * The following call may fail, succeed or succeed
		 * without actually migrating the task to the cpu if
		 * it races with cpu hotunplug operation.  Verify
		 * against GCWQ_DISASSOCIATED.
		 */
		if (!(gcwq->flags & GCWQ_DISASSOCIATED))
			set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));

		spin_lock_irq(&gcwq->lock);
		if (gcwq->flags & GCWQ_DISASSOCIATED)
			return false;
		if (task_cpu(task) == gcwq->cpu &&
		    cpumask_equal(&current->cpus_allowed,
				  get_cpu_mask(gcwq->cpu)))
			return true;
		spin_unlock_irq(&gcwq->lock);

		/* CPU has come up inbetween, retry migration */
		cpu_relax();
	}
}

/*
 * Function for worker->rebind_work used to rebind rogue busy workers
 * to the associated cpu which is coming back online.  This is
 * scheduled by cpu up but can race with other cpu hotplug operations
 * and may be executed twice without intervening cpu down.
 */
static void worker_rebind_fn(struct work_struct *work)
{
	struct worker *worker = container_of(work, struct worker, rebind_work);
	struct global_cwq *gcwq = worker->gcwq;

	if (worker_maybe_bind_and_lock(worker))
		worker_clr_flags(worker, WORKER_REBIND);

	spin_unlock_irq(&gcwq->lock);
}

static struct worker *alloc_worker(void)
{
	struct worker *worker;

	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
	if (worker) {
		INIT_LIST_HEAD(&worker->entry);
		INIT_LIST_HEAD(&worker->scheduled);
		INIT_WORK(&worker->rebind_work, worker_rebind_fn);
		/* on creation a worker is in !idle && prep state */
		worker->flags = WORKER_PREP;
	}
	return worker;
}

/**
 * create_worker - create a new workqueue worker
 * @gcwq: gcwq the new worker will belong to
 * @bind: whether to set affinity to @cpu or not
 *
 * Create a new worker which is bound to @gcwq.  The returned worker
 * can be started by calling start_worker() or destroyed using
 * destroy_worker().
 *
 * CONTEXT:
 * Might sleep.  Does GFP_KERNEL allocations.
 *
 * RETURNS:
 * Pointer to the newly created worker.
 */
static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
{
	bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND;
	struct worker *worker = NULL;
	int id = -1;

	spin_lock_irq(&gcwq->lock);
	while (ida_get_new(&gcwq->worker_ida, &id)) {
		spin_unlock_irq(&gcwq->lock);
		if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL))
			goto fail;
		spin_lock_irq(&gcwq->lock);
	}
	spin_unlock_irq(&gcwq->lock);

	worker = alloc_worker();
	if (!worker)
		goto fail;

	worker->gcwq = gcwq;
	worker->id = id;

	if (!on_unbound_cpu)
		worker->task = kthread_create(worker_thread, worker,
					      "kworker/%u:%d", gcwq->cpu, id);
	else
		worker->task = kthread_create(worker_thread, worker,
					      "kworker/u:%d", id);
	if (IS_ERR(worker->task))
		goto fail;

	/*
	 * A rogue worker will become a regular one if CPU comes
	 * online later on.  Make sure every worker has
	 * PF_THREAD_BOUND set.
	 */
	if (bind && !on_unbound_cpu)
		kthread_bind(worker->task, gcwq->cpu);
	else {
		worker->task->flags |= PF_THREAD_BOUND;
		if (on_unbound_cpu)
			worker->flags |= WORKER_UNBOUND;
	}

	return worker;
fail:
	if (id >= 0) {
		spin_lock_irq(&gcwq->lock);
		ida_remove(&gcwq->worker_ida, id);
		spin_unlock_irq(&gcwq->lock);
	}
	kfree(worker);
	return NULL;
}

/**
 * start_worker - start a newly created worker
 * @worker: worker to start
 *
 * Make the gcwq aware of @worker and start it.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock).
 */
static void start_worker(struct worker *worker)
{
	worker->flags |= WORKER_STARTED;
	worker->gcwq->nr_workers++;
	worker_enter_idle(worker);
	wake_up_process(worker->task);
}

/**
 * destroy_worker - destroy a workqueue worker
 * @worker: worker to be destroyed
 *
 * Destroy @worker and adjust @gcwq stats accordingly.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
 */
static void destroy_worker(struct worker *worker)
{
	struct global_cwq *gcwq = worker->gcwq;
	int id = worker->id;

	/* sanity check frenzy */
	BUG_ON(worker->current_work);
	BUG_ON(!list_empty(&worker->scheduled));

	if (worker->flags & WORKER_STARTED)
		gcwq->nr_workers--;
	if (worker->flags & WORKER_IDLE)
		gcwq->nr_idle--;

	list_del_init(&worker->entry);
	worker->flags |= WORKER_DIE;

	spin_unlock_irq(&gcwq->lock);

	kthread_stop(worker->task);
	kfree(worker);

	spin_lock_irq(&gcwq->lock);
	ida_remove(&gcwq->worker_ida, id);
}

static void idle_worker_timeout(unsigned long __gcwq)
{
	struct global_cwq *gcwq = (void *)__gcwq;

	spin_lock_irq(&gcwq->lock);

	if (too_many_workers(gcwq)) {
		struct worker *worker;
		unsigned long expires;

		/* idle_list is kept in LIFO order, check the last one */
		worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
		expires = worker->last_active + IDLE_WORKER_TIMEOUT;

		if (time_before(jiffies, expires))
			mod_timer(&gcwq->idle_timer, expires);
		else {
			/* it's been idle for too long, wake up manager */
			gcwq->flags |= GCWQ_MANAGE_WORKERS;
			wake_up_worker(gcwq);
		}
	}

	spin_unlock_irq(&gcwq->lock);
}

static bool send_mayday(struct work_struct *work)
{
	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
	struct workqueue_struct *wq = cwq->wq;
	unsigned int cpu;

	if (!(wq->flags & WQ_RESCUER))
		return false;

	/* mayday mayday mayday */
	cpu = cwq->gcwq->cpu;
	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
	if (cpu == WORK_CPU_UNBOUND)
		cpu = 0;
	if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask))
		wake_up_process(wq->rescuer->task);
	return true;
}

static void gcwq_mayday_timeout(unsigned long __gcwq)
{
	struct global_cwq *gcwq = (void *)__gcwq;
	struct work_struct *work;

	spin_lock_irq(&gcwq->lock);

	if (need_to_create_worker(gcwq)) {
		/*
		 * We've been trying to create a new worker but
		 * haven't been successful.  We might be hitting an
		 * allocation deadlock.  Send distress signals to
		 * rescuers.
		 */
		list_for_each_entry(work, &gcwq->worklist, entry)
			send_mayday(work);
	}

	spin_unlock_irq(&gcwq->lock);

	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL);
}

/**
 * maybe_create_worker - create a new worker if necessary
 * @gcwq: gcwq to create a new worker for
 *
 * Create a new worker for @gcwq if necessary.  @gcwq is guaranteed to
 * have at least one idle worker on return from this function.  If
 * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
 * sent to all rescuers with works scheduled on @gcwq to resolve
 * possible allocation deadlock.
 *
 * On return, need_to_create_worker() is guaranteed to be false and
 * may_start_working() true.
 *
 * LOCKING:
 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
 * multiple times.  Does GFP_KERNEL allocations.  Called only from
 * manager.
 *
 * RETURNS:
 * false if no action was taken and gcwq->lock stayed locked, true
 * otherwise.
 */
static bool maybe_create_worker(struct global_cwq *gcwq)
__releases(&gcwq->lock)
__acquires(&gcwq->lock)
{
	if (!need_to_create_worker(gcwq))
		return false;
restart:
	spin_unlock_irq(&gcwq->lock);

	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);

	while (true) {
		struct worker *worker;

		worker = create_worker(gcwq, true);
		if (worker) {
			del_timer_sync(&gcwq->mayday_timer);
			spin_lock_irq(&gcwq->lock);
			start_worker(worker);
			BUG_ON(need_to_create_worker(gcwq));
			return true;
		}

		if (!need_to_create_worker(gcwq))
			break;

		__set_current_state(TASK_INTERRUPTIBLE);
		schedule_timeout(CREATE_COOLDOWN);

		if (!need_to_create_worker(gcwq))
			break;
	}

	del_timer_sync(&gcwq->mayday_timer);
	spin_lock_irq(&gcwq->lock);
	if (need_to_create_worker(gcwq))
		goto restart;
	return true;
}

/**
 * maybe_destroy_worker - destroy workers which have been idle for a while
 * @gcwq: gcwq to destroy workers for
 *
 * Destroy @gcwq workers which have been idle for longer than
 * IDLE_WORKER_TIMEOUT.
 *
 * LOCKING:
 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
 * multiple times.  Called only from manager.
 *
 * RETURNS:
 * false if no action was taken and gcwq->lock stayed locked, true
 * otherwise.
 */
static bool maybe_destroy_workers(struct global_cwq *gcwq)
{
	bool ret = false;

	while (too_many_workers(gcwq)) {
		struct worker *worker;
		unsigned long expires;

		worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
		expires = worker->last_active + IDLE_WORKER_TIMEOUT;

		if (time_before(jiffies, expires)) {
			mod_timer(&gcwq->idle_timer, expires);
			break;
		}

		destroy_worker(worker);
		ret = true;
	}

	return ret;
}

/**
 * manage_workers - manage worker pool
 * @worker: self
 *
 * Assume the manager role and manage gcwq worker pool @worker belongs
 * to.  At any given time, there can be only zero or one manager per
 * gcwq.  The exclusion is handled automatically by this function.
 *
 * The caller can safely start processing works on false return.  On
 * true return, it's guaranteed that need_to_create_worker() is false
 * and may_start_working() is true.
 *
 * CONTEXT:
 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
 * multiple times.  Does GFP_KERNEL allocations.
 *
 * RETURNS:
 * false if no action was taken and gcwq->lock stayed locked, true if
 * some action was taken.
 */
static bool manage_workers(struct worker *worker)
{
	struct global_cwq *gcwq = worker->gcwq;
	bool ret = false;

	if (gcwq->flags & GCWQ_MANAGING_WORKERS)
		return ret;

	gcwq->flags &= ~GCWQ_MANAGE_WORKERS;
	gcwq->flags |= GCWQ_MANAGING_WORKERS;

	/*
	 * Destroy and then create so that may_start_working() is true
	 * on return.
	 */
	ret |= maybe_destroy_workers(gcwq);
	ret |= maybe_create_worker(gcwq);

	gcwq->flags &= ~GCWQ_MANAGING_WORKERS;

	/*
	 * The trustee might be waiting to take over the manager
	 * position, tell it we're done.
	 */
	if (unlikely(gcwq->trustee))
		wake_up_all(&gcwq->trustee_wait);

	return ret;
}

/**
 * move_linked_works - move linked works to a list
 * @work: start of series of works to be scheduled
 * @head: target list to append @work to
 * @nextp: out paramter for nested worklist walking
 *
 * Schedule linked works starting from @work to @head.  Work series to