diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 19:13:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-03 19:13:28 -0400 |
commit | 1a4a2bc460721bc8f91e4c1294d39b38e5af132f (patch) | |
tree | fe646d05f6e17f05601e0a32cc796bec718ab6e7 /drivers | |
parent | 110a9e42b68719f584879c5c5c727bbae90d15f9 (diff) | |
parent | 1ef55be16ed69538f89e0a6508be5e62fdc9851c (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar:
"In this cycle this topic tree has become one of those 'super topics'
that accumulated a lot of changes:
- Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on
x86 - preceded by an array of changes. v4.8 saw preparatory changes
in this area already - this is the rest of the work. Includes the
thread stack caching performance optimization. (Andy Lutomirski)
- switch_to() cleanups and all around enhancements. (Brian Gerst)
- A large number of dumpstack infrastructure enhancements and an
unwinder abstraction. The secret long term plan is safe(r) live
patching plus maybe another attempt at debuginfo based unwinding -
but all these current bits are standalone enhancements in a frame
pointer based debug environment as well. (Josh Poimboeuf)
- More __ro_after_init and const annotations. (Kees Cook)
- Enable KASLR for the vmemmap memory region. (Thomas Garnier)"
[ The virtually mapped stack changes are pretty fundamental, and not
x86-specific per se, even if they are only used on x86 right now. ]
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits)
x86/asm: Get rid of __read_cr4_safe()
thread_info: Use unsigned long for flags
x86/alternatives: Add stack frame dependency to alternative_call_2()
x86/dumpstack: Fix show_stack() task pointer regression
x86/dumpstack: Remove dump_trace() and related callbacks
x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder
oprofile/x86: Convert x86_backtrace() to use the new unwinder
x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder
perf/x86: Convert perf_callchain_kernel() to use the new unwinder
x86/unwind: Add new unwind interface and implementations
x86/dumpstack: Remove NULL task pointer convention
fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y
sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK
lib/syscall: Pin the task stack in collect_syscall()
x86/process: Pin the target stack in get_wchan()
x86/dumpstack: Pin the target stack when dumping it
kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function
sched/core: Add try_get_task_stack() and put_task_stack()
x86/entry/64: Fix a minor comment rebase error
iommu/amd: Don't put completion-wait semaphore on stack
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/iommu/amd_iommu.c | 51 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 2 |
2 files changed, 37 insertions, 16 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 96de97a46079..4025291ea0ae 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c | |||
@@ -940,15 +940,13 @@ static void build_inv_irt(struct iommu_cmd *cmd, u16 devid) | |||
940 | * Writes the command to the IOMMUs command buffer and informs the | 940 | * Writes the command to the IOMMUs command buffer and informs the |
941 | * hardware about the new command. | 941 | * hardware about the new command. |
942 | */ | 942 | */ |
943 | static int iommu_queue_command_sync(struct amd_iommu *iommu, | 943 | static int __iommu_queue_command_sync(struct amd_iommu *iommu, |
944 | struct iommu_cmd *cmd, | 944 | struct iommu_cmd *cmd, |
945 | bool sync) | 945 | bool sync) |
946 | { | 946 | { |
947 | u32 left, tail, head, next_tail; | 947 | u32 left, tail, head, next_tail; |
948 | unsigned long flags; | ||
949 | 948 | ||
950 | again: | 949 | again: |
951 | spin_lock_irqsave(&iommu->lock, flags); | ||
952 | 950 | ||
953 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 951 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
954 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 952 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
@@ -957,15 +955,14 @@ again: | |||
957 | 955 | ||
958 | if (left <= 2) { | 956 | if (left <= 2) { |
959 | struct iommu_cmd sync_cmd; | 957 | struct iommu_cmd sync_cmd; |
960 | volatile u64 sem = 0; | ||
961 | int ret; | 958 | int ret; |
962 | 959 | ||
963 | build_completion_wait(&sync_cmd, (u64)&sem); | 960 | iommu->cmd_sem = 0; |
964 | copy_cmd_to_buffer(iommu, &sync_cmd, tail); | ||
965 | 961 | ||
966 | spin_unlock_irqrestore(&iommu->lock, flags); | 962 | build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem); |
963 | copy_cmd_to_buffer(iommu, &sync_cmd, tail); | ||
967 | 964 | ||
968 | if ((ret = wait_on_sem(&sem)) != 0) | 965 | if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0) |
969 | return ret; | 966 | return ret; |
970 | 967 | ||
971 | goto again; | 968 | goto again; |
@@ -976,9 +973,21 @@ again: | |||
976 | /* We need to sync now to make sure all commands are processed */ | 973 | /* We need to sync now to make sure all commands are processed */ |
977 | iommu->need_sync = sync; | 974 | iommu->need_sync = sync; |
978 | 975 | ||
976 | return 0; | ||
977 | } | ||
978 | |||
979 | static int iommu_queue_command_sync(struct amd_iommu *iommu, | ||
980 | struct iommu_cmd *cmd, | ||
981 | bool sync) | ||
982 | { | ||
983 | unsigned long flags; | ||
984 | int ret; | ||
985 | |||
986 | spin_lock_irqsave(&iommu->lock, flags); | ||
987 | ret = __iommu_queue_command_sync(iommu, cmd, sync); | ||
979 | spin_unlock_irqrestore(&iommu->lock, flags); | 988 | spin_unlock_irqrestore(&iommu->lock, flags); |
980 | 989 | ||
981 | return 0; | 990 | return ret; |
982 | } | 991 | } |
983 | 992 | ||
984 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | 993 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) |
@@ -993,19 +1002,29 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
993 | static int iommu_completion_wait(struct amd_iommu *iommu) | 1002 | static int iommu_completion_wait(struct amd_iommu *iommu) |
994 | { | 1003 | { |
995 | struct iommu_cmd cmd; | 1004 | struct iommu_cmd cmd; |
996 | volatile u64 sem = 0; | 1005 | unsigned long flags; |
997 | int ret; | 1006 | int ret; |
998 | 1007 | ||
999 | if (!iommu->need_sync) | 1008 | if (!iommu->need_sync) |
1000 | return 0; | 1009 | return 0; |
1001 | 1010 | ||
1002 | build_completion_wait(&cmd, (u64)&sem); | ||
1003 | 1011 | ||
1004 | ret = iommu_queue_command_sync(iommu, &cmd, false); | 1012 | build_completion_wait(&cmd, (u64)&iommu->cmd_sem); |
1013 | |||
1014 | spin_lock_irqsave(&iommu->lock, flags); | ||
1015 | |||
1016 | iommu->cmd_sem = 0; | ||
1017 | |||
1018 | ret = __iommu_queue_command_sync(iommu, &cmd, false); | ||
1005 | if (ret) | 1019 | if (ret) |
1006 | return ret; | 1020 | goto out_unlock; |
1021 | |||
1022 | ret = wait_on_sem(&iommu->cmd_sem); | ||
1007 | 1023 | ||
1008 | return wait_on_sem(&sem); | 1024 | out_unlock: |
1025 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
1026 | |||
1027 | return ret; | ||
1009 | } | 1028 | } |
1010 | 1029 | ||
1011 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) | 1030 | static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) |
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index caf5e3822715..9652848e3155 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h | |||
@@ -524,6 +524,8 @@ struct amd_iommu { | |||
524 | struct irq_domain *ir_domain; | 524 | struct irq_domain *ir_domain; |
525 | struct irq_domain *msi_domain; | 525 | struct irq_domain *msi_domain; |
526 | #endif | 526 | #endif |
527 | |||
528 | volatile u64 __aligned(8) cmd_sem; | ||
527 | }; | 529 | }; |
528 | 530 | ||
529 | #define ACPIHID_UID_LEN 256 | 531 | #define ACPIHID_UID_LEN 256 |