diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 65 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 39 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h | 57 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 102 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h | 57 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/pmu_gm20b.c | 38 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/pci.c | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/vgpu.c | 4 |
19 files changed, 365 insertions, 114 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index f3e2c29d..6b3c9e16 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile | |||
@@ -8,6 +8,7 @@ ccflags-y += -I../nvgpu/include/uapi | |||
8 | ccflags-y += -Wno-multichar | 8 | ccflags-y += -Wno-multichar |
9 | ccflags-y += -Werror | 9 | ccflags-y += -Werror |
10 | ccflags-y += -Wno-error=cpp | 10 | ccflags-y += -Wno-error=cpp |
11 | ccflags-y += -Wno-duplicate-decl-specifier | ||
11 | 12 | ||
12 | ifeq ($(CONFIG_ARCH_TEGRA_18x_SOC),y) | 13 | ifeq ($(CONFIG_ARCH_TEGRA_18x_SOC),y) |
13 | ccflags-y += -I$(srctree)/../nvgpu-t18x/drivers/gpu/nvgpu | 14 | ccflags-y += -I$(srctree)/../nvgpu-t18x/drivers/gpu/nvgpu |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index f70c4bb7..a73a314c 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -2081,7 +2081,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2081 | } | 2081 | } |
2082 | 2082 | ||
2083 | if (err) { | 2083 | if (err) { |
2084 | gk20a_err(d, "timeout waiting for gpfifo space"); | ||
2085 | err = -ENOSPC; | 2084 | err = -ENOSPC; |
2086 | goto clean_up; | 2085 | goto clean_up; |
2087 | } | 2086 | } |
@@ -2279,7 +2278,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2279 | return err; | 2278 | return err; |
2280 | 2279 | ||
2281 | clean_up: | 2280 | clean_up: |
2282 | gk20a_err(d, "fail"); | 2281 | gk20a_dbg_fn("fail"); |
2283 | free_priv_cmdbuf(c, wait_cmd); | 2282 | free_priv_cmdbuf(c, wait_cmd); |
2284 | free_priv_cmdbuf(c, incr_cmd); | 2283 | free_priv_cmdbuf(c, incr_cmd); |
2285 | gk20a_fence_put(pre_fence); | 2284 | gk20a_fence_put(pre_fence); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 011c980e..10f1213b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -204,11 +204,8 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
204 | incr_cmd_size += 2; | 204 | incr_cmd_size += 2; |
205 | 205 | ||
206 | err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); | 206 | err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); |
207 | if (err) { | 207 | if (err) |
208 | gk20a_err(dev_from_gk20a(c->g), | ||
209 | "not enough priv cmd buffer space"); | ||
210 | return err; | 208 | return err; |
211 | } | ||
212 | 209 | ||
213 | off = incr_cmd->off; | 210 | off = incr_cmd->off; |
214 | 211 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c index 2008289b..1d05c902 100644 --- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c | |||
@@ -662,6 +662,10 @@ int gr_gk20a_css_attach(struct gk20a *g, | |||
662 | if (!cs_client) | 662 | if (!cs_client) |
663 | return -EINVAL; | 663 | return -EINVAL; |
664 | 664 | ||
665 | if (!perfmon_count || | ||
666 | perfmon_count > CSS_MAX_PERFMON_IDS - CSS_FIRST_PERFMON_ID) | ||
667 | return -EINVAL; | ||
668 | |||
665 | gr = &g->gr; | 669 | gr = &g->gr; |
666 | *cs_client = NULL; | 670 | *cs_client = NULL; |
667 | 671 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c index 6744699f..77e93458 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c | |||
@@ -70,11 +70,6 @@ static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr) | |||
70 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; | 70 | return (hdr->write_idx - hdr->read_idx) % hdr->num_ents; |
71 | } | 71 | } |
72 | 72 | ||
73 | static inline int ring_space(struct nvgpu_ctxsw_ring_header *hdr) | ||
74 | { | ||
75 | return (hdr->read_idx - hdr->write_idx - 1) % hdr->num_ents; | ||
76 | } | ||
77 | |||
78 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, | 73 | ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size, |
79 | loff_t *off) | 74 | loff_t *off) |
80 | { | 75 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 3ab26fb9..2e40a675 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -705,6 +705,13 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
705 | if (!g->power_on) | 705 | if (!g->power_on) |
706 | goto done; | 706 | goto done; |
707 | 707 | ||
708 | /* | ||
709 | * After this point, gk20a interrupts should not get | ||
710 | * serviced. | ||
711 | */ | ||
712 | disable_irq(g->irq_stall); | ||
713 | disable_irq(g->irq_nonstall); | ||
714 | |||
708 | gk20a_scale_suspend(dev); | 715 | gk20a_scale_suspend(dev); |
709 | 716 | ||
710 | /* cancel any pending cde work */ | 717 | /* cancel any pending cde work */ |
@@ -716,12 +723,6 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
716 | 723 | ||
717 | /* disable elpg before gr or fifo suspend */ | 724 | /* disable elpg before gr or fifo suspend */ |
718 | ret |= gk20a_pmu_destroy(g); | 725 | ret |= gk20a_pmu_destroy(g); |
719 | /* | ||
720 | * After this point, gk20a interrupts should not get | ||
721 | * serviced. | ||
722 | */ | ||
723 | disable_irq(g->irq_stall); | ||
724 | disable_irq(g->irq_nonstall); | ||
725 | 726 | ||
726 | ret |= gk20a_gr_suspend(g); | 727 | ret |= gk20a_gr_suspend(g); |
727 | ret |= gk20a_mm_suspend(g); | 728 | ret |= gk20a_mm_suspend(g); |
@@ -966,7 +967,8 @@ static int gk20a_create_device( | |||
966 | struct device *dev, int devno, | 967 | struct device *dev, int devno, |
967 | const char *interface_name, const char *cdev_name, | 968 | const char *interface_name, const char *cdev_name, |
968 | struct cdev *cdev, struct device **out, | 969 | struct cdev *cdev, struct device **out, |
969 | const struct file_operations *ops) | 970 | const struct file_operations *ops, |
971 | struct class *class) | ||
970 | { | 972 | { |
971 | struct device *subdev; | 973 | struct device *subdev; |
972 | int err; | 974 | int err; |
@@ -982,7 +984,7 @@ static int gk20a_create_device( | |||
982 | return err; | 984 | return err; |
983 | } | 985 | } |
984 | 986 | ||
985 | subdev = device_create(&nvgpu_class, NULL, devno, NULL, | 987 | subdev = device_create(class, NULL, devno, NULL, |
986 | interface_name, cdev_name); | 988 | interface_name, cdev_name); |
987 | 989 | ||
988 | if (IS_ERR(subdev)) { | 990 | if (IS_ERR(subdev)) { |
@@ -997,42 +999,42 @@ static int gk20a_create_device( | |||
997 | return 0; | 999 | return 0; |
998 | } | 1000 | } |
999 | 1001 | ||
1000 | void gk20a_user_deinit(struct device *dev) | 1002 | void gk20a_user_deinit(struct device *dev, struct class *class) |
1001 | { | 1003 | { |
1002 | struct gk20a *g = gk20a_from_dev(dev); | 1004 | struct gk20a *g = gk20a_from_dev(dev); |
1003 | 1005 | ||
1004 | if (g->channel.node) { | 1006 | if (g->channel.node) { |
1005 | device_destroy(&nvgpu_class, g->channel.cdev.dev); | 1007 | device_destroy(class, g->channel.cdev.dev); |
1006 | cdev_del(&g->channel.cdev); | 1008 | cdev_del(&g->channel.cdev); |
1007 | } | 1009 | } |
1008 | 1010 | ||
1009 | if (g->as.node) { | 1011 | if (g->as.node) { |
1010 | device_destroy(&nvgpu_class, g->as.cdev.dev); | 1012 | device_destroy(class, g->as.cdev.dev); |
1011 | cdev_del(&g->as.cdev); | 1013 | cdev_del(&g->as.cdev); |
1012 | } | 1014 | } |
1013 | 1015 | ||
1014 | if (g->ctrl.node) { | 1016 | if (g->ctrl.node) { |
1015 | device_destroy(&nvgpu_class, g->ctrl.cdev.dev); | 1017 | device_destroy(class, g->ctrl.cdev.dev); |
1016 | cdev_del(&g->ctrl.cdev); | 1018 | cdev_del(&g->ctrl.cdev); |
1017 | } | 1019 | } |
1018 | 1020 | ||
1019 | if (g->dbg.node) { | 1021 | if (g->dbg.node) { |
1020 | device_destroy(&nvgpu_class, g->dbg.cdev.dev); | 1022 | device_destroy(class, g->dbg.cdev.dev); |
1021 | cdev_del(&g->dbg.cdev); | 1023 | cdev_del(&g->dbg.cdev); |
1022 | } | 1024 | } |
1023 | 1025 | ||
1024 | if (g->prof.node) { | 1026 | if (g->prof.node) { |
1025 | device_destroy(&nvgpu_class, g->prof.cdev.dev); | 1027 | device_destroy(class, g->prof.cdev.dev); |
1026 | cdev_del(&g->prof.cdev); | 1028 | cdev_del(&g->prof.cdev); |
1027 | } | 1029 | } |
1028 | 1030 | ||
1029 | if (g->tsg.node) { | 1031 | if (g->tsg.node) { |
1030 | device_destroy(&nvgpu_class, g->tsg.cdev.dev); | 1032 | device_destroy(class, g->tsg.cdev.dev); |
1031 | cdev_del(&g->tsg.cdev); | 1033 | cdev_del(&g->tsg.cdev); |
1032 | } | 1034 | } |
1033 | 1035 | ||
1034 | if (g->ctxsw.node) { | 1036 | if (g->ctxsw.node) { |
1035 | device_destroy(&nvgpu_class, g->ctxsw.cdev.dev); | 1037 | device_destroy(class, g->ctxsw.cdev.dev); |
1036 | cdev_del(&g->ctxsw.cdev); | 1038 | cdev_del(&g->ctxsw.cdev); |
1037 | } | 1039 | } |
1038 | 1040 | ||
@@ -1040,7 +1042,8 @@ void gk20a_user_deinit(struct device *dev) | |||
1040 | unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); | 1042 | unregister_chrdev_region(g->cdev_region, GK20A_NUM_CDEVS); |
1041 | } | 1043 | } |
1042 | 1044 | ||
1043 | int gk20a_user_init(struct device *dev, const char *interface_name) | 1045 | int gk20a_user_init(struct device *dev, const char *interface_name, |
1046 | struct class *class) | ||
1044 | { | 1047 | { |
1045 | int err; | 1048 | int err; |
1046 | dev_t devno; | 1049 | dev_t devno; |
@@ -1055,44 +1058,51 @@ int gk20a_user_init(struct device *dev, const char *interface_name) | |||
1055 | 1058 | ||
1056 | err = gk20a_create_device(dev, devno++, interface_name, "", | 1059 | err = gk20a_create_device(dev, devno++, interface_name, "", |
1057 | &g->channel.cdev, &g->channel.node, | 1060 | &g->channel.cdev, &g->channel.node, |
1058 | &gk20a_channel_ops); | 1061 | &gk20a_channel_ops, |
1062 | class); | ||
1059 | if (err) | 1063 | if (err) |
1060 | goto fail; | 1064 | goto fail; |
1061 | 1065 | ||
1062 | err = gk20a_create_device(dev, devno++, interface_name, "-as", | 1066 | err = gk20a_create_device(dev, devno++, interface_name, "-as", |
1063 | &g->as.cdev, &g->as.node, | 1067 | &g->as.cdev, &g->as.node, |
1064 | &gk20a_as_ops); | 1068 | &gk20a_as_ops, |
1069 | class); | ||
1065 | if (err) | 1070 | if (err) |
1066 | goto fail; | 1071 | goto fail; |
1067 | 1072 | ||
1068 | err = gk20a_create_device(dev, devno++, interface_name, "-ctrl", | 1073 | err = gk20a_create_device(dev, devno++, interface_name, "-ctrl", |
1069 | &g->ctrl.cdev, &g->ctrl.node, | 1074 | &g->ctrl.cdev, &g->ctrl.node, |
1070 | &gk20a_ctrl_ops); | 1075 | &gk20a_ctrl_ops, |
1076 | class); | ||
1071 | if (err) | 1077 | if (err) |
1072 | goto fail; | 1078 | goto fail; |
1073 | 1079 | ||
1074 | err = gk20a_create_device(dev, devno++, interface_name, "-dbg", | 1080 | err = gk20a_create_device(dev, devno++, interface_name, "-dbg", |
1075 | &g->dbg.cdev, &g->dbg.node, | 1081 | &g->dbg.cdev, &g->dbg.node, |
1076 | &gk20a_dbg_ops); | 1082 | &gk20a_dbg_ops, |
1083 | class); | ||
1077 | if (err) | 1084 | if (err) |
1078 | goto fail; | 1085 | goto fail; |
1079 | 1086 | ||
1080 | err = gk20a_create_device(dev, devno++, interface_name, "-prof", | 1087 | err = gk20a_create_device(dev, devno++, interface_name, "-prof", |
1081 | &g->prof.cdev, &g->prof.node, | 1088 | &g->prof.cdev, &g->prof.node, |
1082 | &gk20a_prof_ops); | 1089 | &gk20a_prof_ops, |
1090 | class); | ||
1083 | if (err) | 1091 | if (err) |
1084 | goto fail; | 1092 | goto fail; |
1085 | 1093 | ||
1086 | err = gk20a_create_device(dev, devno++, interface_name, "-tsg", | 1094 | err = gk20a_create_device(dev, devno++, interface_name, "-tsg", |
1087 | &g->tsg.cdev, &g->tsg.node, | 1095 | &g->tsg.cdev, &g->tsg.node, |
1088 | &gk20a_tsg_ops); | 1096 | &gk20a_tsg_ops, |
1097 | class); | ||
1089 | if (err) | 1098 | if (err) |
1090 | goto fail; | 1099 | goto fail; |
1091 | 1100 | ||
1092 | #ifdef CONFIG_GK20A_CTXSW_TRACE | 1101 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
1093 | err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw", | 1102 | err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw", |
1094 | &g->ctxsw.cdev, &g->ctxsw.node, | 1103 | &g->ctxsw.cdev, &g->ctxsw.node, |
1095 | &gk20a_ctxsw_ops); | 1104 | &gk20a_ctxsw_ops, |
1105 | class); | ||
1096 | if (err) | 1106 | if (err) |
1097 | goto fail; | 1107 | goto fail; |
1098 | #endif | 1108 | #endif |
@@ -1100,7 +1110,7 @@ int gk20a_user_init(struct device *dev, const char *interface_name) | |||
1100 | 1110 | ||
1101 | return 0; | 1111 | return 0; |
1102 | fail: | 1112 | fail: |
1103 | gk20a_user_deinit(dev); | 1113 | gk20a_user_deinit(dev, &nvgpu_class); |
1104 | return err; | 1114 | return err; |
1105 | } | 1115 | } |
1106 | 1116 | ||
@@ -1464,7 +1474,7 @@ static int gk20a_probe(struct platform_device *dev) | |||
1464 | if (gk20a->irq_stall != gk20a->irq_nonstall) | 1474 | if (gk20a->irq_stall != gk20a->irq_nonstall) |
1465 | disable_irq(gk20a->irq_nonstall); | 1475 | disable_irq(gk20a->irq_nonstall); |
1466 | 1476 | ||
1467 | err = gk20a_user_init(&dev->dev, INTERFACE_NAME); | 1477 | err = gk20a_user_init(&dev->dev, INTERFACE_NAME, &nvgpu_class); |
1468 | if (err) | 1478 | if (err) |
1469 | return err; | 1479 | return err; |
1470 | 1480 | ||
@@ -1644,7 +1654,7 @@ static int __exit gk20a_remove(struct platform_device *pdev) | |||
1644 | if (g->remove_support) | 1654 | if (g->remove_support) |
1645 | g->remove_support(dev); | 1655 | g->remove_support(dev); |
1646 | 1656 | ||
1647 | gk20a_user_deinit(dev); | 1657 | gk20a_user_deinit(dev, &nvgpu_class); |
1648 | 1658 | ||
1649 | debugfs_remove_recursive(platform->debugfs); | 1659 | debugfs_remove_recursive(platform->debugfs); |
1650 | debugfs_remove_recursive(platform->debugfs_alias); | 1660 | debugfs_remove_recursive(platform->debugfs_alias); |
@@ -1751,7 +1761,6 @@ static int gk20a_domain_init(struct of_device_id *matches) | |||
1751 | } | 1761 | } |
1752 | #endif | 1762 | #endif |
1753 | 1763 | ||
1754 | |||
1755 | struct class nvgpu_class = { | 1764 | struct class nvgpu_class = { |
1756 | .owner = THIS_MODULE, | 1765 | .owner = THIS_MODULE, |
1757 | .name = CLASS_NAME, | 1766 | .name = CLASS_NAME, |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index f5b4bb3f..d131862b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -1067,8 +1067,9 @@ int gk20a_init_gpu_characteristics(struct gk20a *g); | |||
1067 | 1067 | ||
1068 | void gk20a_pbus_isr(struct gk20a *g); | 1068 | void gk20a_pbus_isr(struct gk20a *g); |
1069 | 1069 | ||
1070 | int gk20a_user_init(struct device *dev, const char *interface_name); | 1070 | int gk20a_user_init(struct device *dev, const char *interface_name, |
1071 | void gk20a_user_deinit(struct device *dev); | 1071 | struct class *class); |
1072 | void gk20a_user_deinit(struct device *dev, struct class *class); | ||
1072 | 1073 | ||
1073 | void gk20a_debug_dump_device(void *dev); | 1074 | void gk20a_debug_dump_device(void *dev); |
1074 | 1075 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 60247da8..e8d363e1 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1203,15 +1203,18 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, | |||
1203 | u32 reg_offset; | 1203 | u32 reg_offset; |
1204 | bool assign_alpha; | 1204 | bool assign_alpha; |
1205 | 1205 | ||
1206 | u32 map_alpha[gr_pd_alpha_ratio_table__size_1_v()]; | 1206 | u32 *map_alpha; |
1207 | u32 map_beta[gr_pd_alpha_ratio_table__size_1_v()]; | 1207 | u32 *map_beta; |
1208 | u32 map_reg_used[gr_pd_alpha_ratio_table__size_1_v()]; | 1208 | u32 *map_reg_used; |
1209 | 1209 | ||
1210 | gk20a_dbg_fn(""); | 1210 | gk20a_dbg_fn(""); |
1211 | 1211 | ||
1212 | memset(map_alpha, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); | 1212 | map_alpha = kzalloc(3 * gr_pd_alpha_ratio_table__size_1_v() * |
1213 | memset(map_beta, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); | 1213 | sizeof(u32), GFP_KERNEL); |
1214 | memset(map_reg_used, 0, gr_pd_alpha_ratio_table__size_1_v() * sizeof(u32)); | 1214 | if (!map_alpha) |
1215 | return -ENOMEM; | ||
1216 | map_beta = map_alpha + gr_pd_alpha_ratio_table__size_1_v(); | ||
1217 | map_reg_used = map_beta + gr_pd_alpha_ratio_table__size_1_v(); | ||
1215 | 1218 | ||
1216 | for (row = 0; row < rows; ++row) { | 1219 | for (row = 0; row < rows; ++row) { |
1217 | alpha_target = max_t(u32, gr->tpc_count * row / rows, 1); | 1220 | alpha_target = max_t(u32, gr->tpc_count * row / rows, 1); |
@@ -1277,6 +1280,7 @@ static int gr_gk20a_setup_alpha_beta_tables(struct gk20a *g, | |||
1277 | } | 1280 | } |
1278 | } | 1281 | } |
1279 | 1282 | ||
1283 | kfree(map_alpha); | ||
1280 | return 0; | 1284 | return 0; |
1281 | } | 1285 | } |
1282 | 1286 | ||
@@ -2032,8 +2036,7 @@ int gr_gk20a_init_ctxsw_ucode(struct gk20a *g) | |||
2032 | g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), | 2036 | g->gr.ctx_vars.ucode.gpccs.inst.count * sizeof(u32), |
2033 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); | 2037 | g->gr.ctx_vars.ucode.gpccs.data.count * sizeof(u32)); |
2034 | 2038 | ||
2035 | err = gk20a_gmmu_alloc_attr(g, DMA_ATTR_READ_ONLY, ucode_size, | 2039 | err = gk20a_gmmu_alloc(g, ucode_size, &ucode_info->surface_desc); |
2036 | &ucode_info->surface_desc); | ||
2037 | if (err) | 2040 | if (err) |
2038 | goto clean_up; | 2041 | goto clean_up; |
2039 | 2042 | ||
@@ -3261,15 +3264,17 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr) | |||
3261 | g->ops.gr.get_gpc_tpc_mask(g, gpc_index); | 3264 | g->ops.gr.get_gpc_tpc_mask(g, gpc_index); |
3262 | 3265 | ||
3263 | for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { | 3266 | for (pes_index = 0; pes_index < gr->pe_count_per_gpc; pes_index++) { |
3264 | gr->pes_tpc_count[pes_index] = | 3267 | if (!gr->pes_tpc_count[pes_index]) { |
3265 | kzalloc(gr->gpc_count * sizeof(u32), | 3268 | gr->pes_tpc_count[pes_index] = |
3266 | GFP_KERNEL); | 3269 | kzalloc(gr->gpc_count * sizeof(u32), |
3267 | gr->pes_tpc_mask[pes_index] = | 3270 | GFP_KERNEL); |
3268 | kzalloc(gr->gpc_count * sizeof(u32), | 3271 | gr->pes_tpc_mask[pes_index] = |
3269 | GFP_KERNEL); | 3272 | kzalloc(gr->gpc_count * sizeof(u32), |
3270 | if (!gr->pes_tpc_count[pes_index] || | 3273 | GFP_KERNEL); |
3271 | !gr->pes_tpc_mask[pes_index]) | 3274 | if (!gr->pes_tpc_count[pes_index] || |
3272 | goto clean_up; | 3275 | !gr->pes_tpc_mask[pes_index]) |
3276 | goto clean_up; | ||
3277 | } | ||
3273 | 3278 | ||
3274 | tmp = gk20a_readl(g, | 3279 | tmp = gk20a_readl(g, |
3275 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + | 3280 | gr_gpc0_gpm_pd_pes_tpc_id_mask_r(pes_index) + |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h index 8a69c573..2c902f52 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_bus_gk20a.h | |||
@@ -50,6 +50,30 @@ | |||
50 | #ifndef _hw_bus_gk20a_h_ | 50 | #ifndef _hw_bus_gk20a_h_ |
51 | #define _hw_bus_gk20a_h_ | 51 | #define _hw_bus_gk20a_h_ |
52 | 52 | ||
53 | static inline u32 bus_bar0_window_r(void) | ||
54 | { | ||
55 | return 0x00001700; | ||
56 | } | ||
57 | static inline u32 bus_bar0_window_base_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xffffff) << 0; | ||
60 | } | ||
61 | static inline u32 bus_bar0_window_target_vid_mem_f(void) | ||
62 | { | ||
63 | return 0x0; | ||
64 | } | ||
65 | static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void) | ||
66 | { | ||
67 | return 0x2000000; | ||
68 | } | ||
69 | static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void) | ||
70 | { | ||
71 | return 0x3000000; | ||
72 | } | ||
73 | static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void) | ||
74 | { | ||
75 | return 0x00000010; | ||
76 | } | ||
53 | static inline u32 bus_bar1_block_r(void) | 77 | static inline u32 bus_bar1_block_r(void) |
54 | { | 78 | { |
55 | return 0x00001704; | 79 | return 0x00001704; |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h new file mode 100644 index 00000000..918dad9a --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/hw_pram_gk20a.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pram_gk20a_h_ | ||
51 | #define _hw_pram_gk20a_h_ | ||
52 | |||
53 | static inline u32 pram_data032_r(u32 i) | ||
54 | { | ||
55 | return 0x00700000 + i*4; | ||
56 | } | ||
57 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index eb4f01e0..ec946fb6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include "hw_fb_gk20a.h" | 37 | #include "hw_fb_gk20a.h" |
38 | #include "hw_bus_gk20a.h" | 38 | #include "hw_bus_gk20a.h" |
39 | #include "hw_ram_gk20a.h" | 39 | #include "hw_ram_gk20a.h" |
40 | #include "hw_pram_gk20a.h" | ||
40 | #include "hw_mc_gk20a.h" | 41 | #include "hw_mc_gk20a.h" |
41 | #include "hw_flush_gk20a.h" | 42 | #include "hw_flush_gk20a.h" |
42 | #include "hw_ltc_gk20a.h" | 43 | #include "hw_ltc_gk20a.h" |
@@ -44,10 +45,20 @@ | |||
44 | #include "kind_gk20a.h" | 45 | #include "kind_gk20a.h" |
45 | #include "semaphore_gk20a.h" | 46 | #include "semaphore_gk20a.h" |
46 | 47 | ||
48 | /* | ||
49 | * Flip this to force all gk20a_mem* accesses via PRAMIN from the start of the | ||
50 | * boot, even for buffers that would work via cpu_va. In runtime, the flag is | ||
51 | * in debugfs, called "force_pramin". | ||
52 | */ | ||
53 | #define GK20A_FORCE_PRAMIN_DEFAULT false | ||
54 | |||
47 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | 55 | int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) |
48 | { | 56 | { |
49 | void *cpu_va; | 57 | void *cpu_va; |
50 | 58 | ||
59 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
60 | return 0; | ||
61 | |||
51 | if (WARN_ON(mem->cpu_va)) { | 62 | if (WARN_ON(mem->cpu_va)) { |
52 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); | 63 | gk20a_warn(dev_from_gk20a(g), "nested %s", __func__); |
53 | return -EBUSY; | 64 | return -EBUSY; |
@@ -66,20 +77,66 @@ int gk20a_mem_begin(struct gk20a *g, struct mem_desc *mem) | |||
66 | 77 | ||
67 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | 78 | void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) |
68 | { | 79 | { |
80 | if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) | ||
81 | return; | ||
82 | |||
69 | vunmap(mem->cpu_va); | 83 | vunmap(mem->cpu_va); |
70 | mem->cpu_va = NULL; | 84 | mem->cpu_va = NULL; |
71 | } | 85 | } |
72 | 86 | ||
87 | /* WARNING: returns pramin_base_lock taken, complement with pramin_exit() */ | ||
88 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
89 | { | ||
90 | u64 bufbase = g->ops.mm.get_iova_addr(g, mem->sgt->sgl, 0); | ||
91 | u64 addr = bufbase + w * sizeof(u32); | ||
92 | u32 hi = (u32)((addr & ~(u64)0xfffff) | ||
93 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | ||
94 | u32 lo = (addr & 0xfffff); | ||
95 | |||
96 | gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p", hi, lo, mem); | ||
97 | |||
98 | WARN_ON(!bufbase); | ||
99 | spin_lock(&g->mm.pramin_base_lock); | ||
100 | if (g->mm.pramin_base != hi) { | ||
101 | gk20a_writel(g, bus_bar0_window_r(), | ||
102 | (g->mm.vidmem_is_vidmem | ||
103 | && mem->aperture == APERTURE_SYSMEM ? | ||
104 | bus_bar0_window_target_sys_mem_noncoherent_f() : | ||
105 | bus_bar0_window_target_vid_mem_f()) | | ||
106 | bus_bar0_window_base_f(hi)); | ||
107 | gk20a_readl(g, bus_bar0_window_r()); | ||
108 | g->mm.pramin_base = hi; | ||
109 | } | ||
110 | |||
111 | return lo; | ||
112 | } | ||
113 | |||
114 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) | ||
115 | { | ||
116 | gk20a_dbg(gpu_dbg_mem, "end for %p", mem); | ||
117 | spin_unlock(&g->mm.pramin_base_lock); | ||
118 | } | ||
119 | |||
73 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | 120 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) |
74 | { | 121 | { |
75 | u32 *ptr = mem->cpu_va; | 122 | u32 data = 0; |
76 | u32 data; | 123 | |
124 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
125 | u32 *ptr = mem->cpu_va; | ||
77 | 126 | ||
78 | WARN_ON(!ptr); | 127 | WARN_ON(!ptr); |
79 | data = ptr[w]; | 128 | data = ptr[w]; |
80 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | 129 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM |
81 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | 130 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); |
82 | #endif | 131 | #endif |
132 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
133 | u32 addr = gk20a_pramin_enter(g, mem, w); | ||
134 | data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | ||
135 | gk20a_pramin_exit(g, mem); | ||
136 | } else { | ||
137 | WARN_ON("Accessing unallocated mem_desc"); | ||
138 | } | ||
139 | |||
83 | return data; | 140 | return data; |
84 | } | 141 | } |
85 | 142 | ||
@@ -106,13 +163,23 @@ void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | |||
106 | 163 | ||
107 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | 164 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) |
108 | { | 165 | { |
109 | u32 *ptr = mem->cpu_va; | 166 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { |
167 | u32 *ptr = mem->cpu_va; | ||
110 | 168 | ||
111 | WARN_ON(!ptr); | 169 | WARN_ON(!ptr); |
112 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | 170 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM |
113 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | 171 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); |
114 | #endif | 172 | #endif |
115 | ptr[w] = data; | 173 | ptr[w] = data; |
174 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
175 | u32 addr = gk20a_pramin_enter(g, mem, w); | ||
176 | gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); | ||
177 | /* read back to synchronize accesses*/ | ||
178 | gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | ||
179 | gk20a_pramin_exit(g, mem); | ||
180 | } else { | ||
181 | WARN_ON("Accessing unallocated mem_desc"); | ||
182 | } | ||
116 | } | 183 | } |
117 | 184 | ||
118 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) | 185 | void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) |
@@ -535,6 +602,13 @@ static int gk20a_alloc_sysmem_flush(struct gk20a *g) | |||
535 | return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush); | 602 | return gk20a_gmmu_alloc(g, SZ_4K, &g->mm.sysmem_flush); |
536 | } | 603 | } |
537 | 604 | ||
605 | static void gk20a_init_pramin(struct mm_gk20a *mm) | ||
606 | { | ||
607 | mm->pramin_base = 0; | ||
608 | spin_lock_init(&mm->pramin_base_lock); | ||
609 | mm->force_pramin = GK20A_FORCE_PRAMIN_DEFAULT; | ||
610 | } | ||
611 | |||
538 | int gk20a_init_mm_setup_sw(struct gk20a *g) | 612 | int gk20a_init_mm_setup_sw(struct gk20a *g) |
539 | { | 613 | { |
540 | struct mm_gk20a *mm = &g->mm; | 614 | struct mm_gk20a *mm = &g->mm; |
@@ -558,6 +632,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
558 | (int)(mm->channel.user_size >> 20), | 632 | (int)(mm->channel.user_size >> 20), |
559 | (int)(mm->channel.kernel_size >> 20)); | 633 | (int)(mm->channel.kernel_size >> 20)); |
560 | 634 | ||
635 | gk20a_init_pramin(mm); | ||
636 | |||
561 | err = gk20a_alloc_sysmem_flush(g); | 637 | err = gk20a_alloc_sysmem_flush(g); |
562 | if (err) | 638 | if (err) |
563 | return err; | 639 | return err; |
@@ -586,6 +662,7 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
586 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ | 662 | /* set vm_alloc_share op here as gk20a_as_alloc_share needs it */ |
587 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; | 663 | g->ops.mm.vm_alloc_share = gk20a_vm_alloc_share; |
588 | mm->remove_support = gk20a_remove_mm_support; | 664 | mm->remove_support = gk20a_remove_mm_support; |
665 | |||
589 | mm->sw_ready = true; | 666 | mm->sw_ready = true; |
590 | 667 | ||
591 | gk20a_dbg_fn("done"); | 668 | gk20a_dbg_fn("done"); |
@@ -690,6 +767,7 @@ static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | |||
690 | entry->mem.cpu_va = page_address(pages); | 767 | entry->mem.cpu_va = page_address(pages); |
691 | memset(entry->mem.cpu_va, 0, len); | 768 | memset(entry->mem.cpu_va, 0, len); |
692 | entry->mem.size = len; | 769 | entry->mem.size = len; |
770 | entry->mem.aperture = APERTURE_SYSMEM; | ||
693 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); | 771 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, sg_phys(entry->mem.sgt->sgl), len); |
694 | 772 | ||
695 | return 0; | 773 | return 0; |
@@ -716,6 +794,7 @@ static void free_gmmu_phys_pages(struct vm_gk20a *vm, | |||
716 | kfree(entry->mem.sgt); | 794 | kfree(entry->mem.sgt); |
717 | entry->mem.sgt = NULL; | 795 | entry->mem.sgt = NULL; |
718 | entry->mem.size = 0; | 796 | entry->mem.size = 0; |
797 | entry->mem.aperture = APERTURE_INVALID; | ||
719 | } | 798 | } |
720 | 799 | ||
721 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) | 800 | static int map_gmmu_phys_pages(struct gk20a_mm_entry *entry) |
@@ -2164,6 +2243,7 @@ int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, stru | |||
2164 | goto fail_free; | 2243 | goto fail_free; |
2165 | 2244 | ||
2166 | mem->size = size; | 2245 | mem->size = size; |
2246 | mem->aperture = APERTURE_SYSMEM; | ||
2167 | 2247 | ||
2168 | gk20a_dbg_fn("done"); | 2248 | gk20a_dbg_fn("done"); |
2169 | 2249 | ||
@@ -2210,6 +2290,7 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr, | |||
2210 | gk20a_free_sgtable(&mem->sgt); | 2290 | gk20a_free_sgtable(&mem->sgt); |
2211 | 2291 | ||
2212 | mem->size = 0; | 2292 | mem->size = 0; |
2293 | mem->aperture = APERTURE_INVALID; | ||
2213 | } | 2294 | } |
2214 | 2295 | ||
2215 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | 2296 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) |
@@ -4015,6 +4096,9 @@ void gk20a_mm_debugfs_init(struct device *dev) | |||
4015 | 4096 | ||
4016 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, | 4097 | debugfs_create_x64("separate_fixed_allocs", 0664, gpu_root, |
4017 | &g->separate_fixed_allocs); | 4098 | &g->separate_fixed_allocs); |
4099 | |||
4100 | debugfs_create_bool("force_pramin", 0664, gpu_root, | ||
4101 | &g->mm.force_pramin); | ||
4018 | } | 4102 | } |
4019 | 4103 | ||
4020 | void gk20a_init_mm(struct gpu_ops *gops) | 4104 | void gk20a_init_mm(struct gpu_ops *gops) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d943b231..c58a4fec 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -40,10 +40,17 @@ | |||
40 | outer_flush_range(pa, pa + (size_t)(size)); \ | 40 | outer_flush_range(pa, pa + (size_t)(size)); \ |
41 | } while (0) | 41 | } while (0) |
42 | 42 | ||
43 | enum gk20a_aperture { | ||
44 | APERTURE_INVALID, /* e.g., unallocated */ | ||
45 | APERTURE_SYSMEM, | ||
46 | APERTURE_VIDMEM | ||
47 | }; | ||
48 | |||
43 | struct mem_desc { | 49 | struct mem_desc { |
44 | void *cpu_va; | 50 | void *cpu_va; |
45 | struct page **pages; | 51 | struct page **pages; |
46 | struct sg_table *sgt; | 52 | struct sg_table *sgt; |
53 | enum gk20a_aperture aperture; | ||
47 | size_t size; | 54 | size_t size; |
48 | u64 gpu_va; | 55 | u64 gpu_va; |
49 | }; | 56 | }; |
@@ -357,6 +364,14 @@ struct mm_gk20a { | |||
357 | bool vidmem_is_vidmem; | 364 | bool vidmem_is_vidmem; |
358 | 365 | ||
359 | struct mem_desc sysmem_flush; | 366 | struct mem_desc sysmem_flush; |
367 | |||
368 | u32 pramin_base; | ||
369 | spinlock_t pramin_base_lock; | ||
370 | #if LINUX_VERSION_CODE < KERNEL_VERSION(4,4,0) | ||
371 | u32 force_pramin; /* via debugfs */ | ||
372 | #else | ||
373 | bool force_pramin; /* via debugfs */ | ||
374 | #endif | ||
360 | }; | 375 | }; |
361 | 376 | ||
362 | int gk20a_mm_init(struct mm_gk20a *mm); | 377 | int gk20a_mm_init(struct mm_gk20a *mm); |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index abe6771c..ca36d481 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c | |||
@@ -52,12 +52,12 @@ struct gk20a_emc_params { | |||
52 | }; | 52 | }; |
53 | 53 | ||
54 | static void __iomem *pmc = IO_ADDRESS(TEGRA_PMC_BASE); | 54 | static void __iomem *pmc = IO_ADDRESS(TEGRA_PMC_BASE); |
55 | static inline u32 pmc_read(unsigned long reg) | 55 | static inline u32 __maybe_unused pmc_read(unsigned long reg) |
56 | { | 56 | { |
57 | return readl(pmc + reg); | 57 | return readl(pmc + reg); |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline void pmc_write(u32 val, unsigned long reg) | 60 | static inline void __maybe_unused pmc_write(u32 val, unsigned long reg) |
61 | { | 61 | { |
62 | writel_relaxed(val, pmc + reg); | 62 | writel_relaxed(val, pmc + reg); |
63 | } | 63 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index 8bf382fd..08ef7738 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c | |||
@@ -2443,8 +2443,7 @@ static int gk20a_prepare_ucode(struct gk20a *g) | |||
2443 | pmu->ucode_image = (u32 *)((u8 *)pmu->desc + | 2443 | pmu->ucode_image = (u32 *)((u8 *)pmu->desc + |
2444 | pmu->desc->descriptor_size); | 2444 | pmu->desc->descriptor_size); |
2445 | 2445 | ||
2446 | err = gk20a_gmmu_alloc_map_attr(vm, DMA_ATTR_READ_ONLY, | 2446 | err = gk20a_gmmu_alloc_map(vm, GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode); |
2447 | GK20A_PMU_UCODE_SIZE_MAX, &pmu->ucode); | ||
2448 | if (err) | 2447 | if (err) |
2449 | goto err_release_fw; | 2448 | goto err_release_fw; |
2450 | 2449 | ||
diff --git a/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h index e69275e0..0b4eefe0 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_bus_gm20b.h | |||
@@ -50,6 +50,30 @@ | |||
50 | #ifndef _hw_bus_gm20b_h_ | 50 | #ifndef _hw_bus_gm20b_h_ |
51 | #define _hw_bus_gm20b_h_ | 51 | #define _hw_bus_gm20b_h_ |
52 | 52 | ||
53 | static inline u32 bus_bar0_window_r(void) | ||
54 | { | ||
55 | return 0x00001700; | ||
56 | } | ||
57 | static inline u32 bus_bar0_window_base_f(u32 v) | ||
58 | { | ||
59 | return (v & 0xffffff) << 0; | ||
60 | } | ||
61 | static inline u32 bus_bar0_window_target_vid_mem_f(void) | ||
62 | { | ||
63 | return 0x0; | ||
64 | } | ||
65 | static inline u32 bus_bar0_window_target_sys_mem_coherent_f(void) | ||
66 | { | ||
67 | return 0x2000000; | ||
68 | } | ||
69 | static inline u32 bus_bar0_window_target_sys_mem_noncoherent_f(void) | ||
70 | { | ||
71 | return 0x3000000; | ||
72 | } | ||
73 | static inline u32 bus_bar0_window_target_bar0_window_base_shift_v(void) | ||
74 | { | ||
75 | return 0x00000010; | ||
76 | } | ||
53 | static inline u32 bus_bar1_block_r(void) | 77 | static inline u32 bus_bar1_block_r(void) |
54 | { | 78 | { |
55 | return 0x00001704; | 79 | return 0x00001704; |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h new file mode 100644 index 00000000..f9c6f3d4 --- /dev/null +++ b/drivers/gpu/nvgpu/gm20b/hw_pram_gm20b.h | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | /* | ||
17 | * Function naming determines intended use: | ||
18 | * | ||
19 | * <x>_r(void) : Returns the offset for register <x>. | ||
20 | * | ||
21 | * <x>_o(void) : Returns the offset for element <x>. | ||
22 | * | ||
23 | * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. | ||
24 | * | ||
25 | * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. | ||
26 | * | ||
27 | * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted | ||
28 | * and masked to place it at field <y> of register <x>. This value | ||
29 | * can be |'d with others to produce a full register value for | ||
30 | * register <x>. | ||
31 | * | ||
32 | * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This | ||
33 | * value can be ~'d and then &'d to clear the value of field <y> for | ||
34 | * register <x>. | ||
35 | * | ||
36 | * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted | ||
37 | * to place it at field <y> of register <x>. This value can be |'d | ||
38 | * with others to produce a full register value for <x>. | ||
39 | * | ||
40 | * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register | ||
41 | * <x> value 'r' after being shifted to place its LSB at bit 0. | ||
42 | * This value is suitable for direct comparison with other unshifted | ||
43 | * values appropriate for use in field <y> of register <x>. | ||
44 | * | ||
45 | * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for | ||
46 | * field <y> of register <x>. This value is suitable for direct | ||
47 | * comparison with unshifted values appropriate for use in field <y> | ||
48 | * of register <x>. | ||
49 | */ | ||
50 | #ifndef _hw_pram_gm20b_h_ | ||
51 | #define _hw_pram_gm20b_h_ | ||
52 | |||
53 | static inline u32 pram_data032_r(u32 i) | ||
54 | { | ||
55 | return 0x00700000 + i*4; | ||
56 | } | ||
57 | #endif | ||
diff --git a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c index 8eb600ef..8702658c 100644 --- a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c | |||
@@ -69,44 +69,6 @@ static struct pg_init_sequence_list _pginitseq_gm20b[] = { | |||
69 | { 0x0010e06c, 0x00000099}, | 69 | { 0x0010e06c, 0x00000099}, |
70 | { 0x0010e06c, 0x0000009a}, | 70 | { 0x0010e06c, 0x0000009a}, |
71 | { 0x0010e06c, 0x0000009b}, | 71 | { 0x0010e06c, 0x0000009b}, |
72 | { 0x0010e06c, 0x00000000}, | ||
73 | { 0x0010e06c, 0x00000000}, | ||
74 | { 0x0010e06c, 0x00000000}, | ||
75 | { 0x0010e06c, 0x00000000}, | ||
76 | { 0x0010e06c, 0x00000000}, | ||
77 | { 0x0010e06c, 0x00000000}, | ||
78 | { 0x0010e06c, 0x00000000}, | ||
79 | { 0x0010e06c, 0x00000000}, | ||
80 | { 0x0010e06c, 0x00000000}, | ||
81 | { 0x0010e06c, 0x00000000}, | ||
82 | { 0x0010e06c, 0x00000000}, | ||
83 | { 0x0010e06c, 0x00000000}, | ||
84 | { 0x0010e06c, 0x00000000}, | ||
85 | { 0x0010e06c, 0x00000000}, | ||
86 | { 0x0010e06c, 0x00000000}, | ||
87 | { 0x0010e06c, 0x00000000}, | ||
88 | { 0x0010e06c, 0x00000000}, | ||
89 | { 0x0010e06c, 0x00000000}, | ||
90 | { 0x0010e06c, 0x00000000}, | ||
91 | { 0x0010e06c, 0x00000000}, | ||
92 | { 0x0010e06c, 0x00000000}, | ||
93 | { 0x0010e06c, 0x00000000}, | ||
94 | { 0x0010e06c, 0x00000000}, | ||
95 | { 0x0010e06c, 0x00000000}, | ||
96 | { 0x0010e06c, 0x00000000}, | ||
97 | { 0x0010e06c, 0x00000000}, | ||
98 | { 0x0010e06c, 0x00000000}, | ||
99 | { 0x0010e06c, 0x00000000}, | ||
100 | { 0x0010e06c, 0x00000000}, | ||
101 | { 0x0010e06c, 0x00000000}, | ||
102 | { 0x0010e06c, 0x00000000}, | ||
103 | { 0x0010e06c, 0x00000000}, | ||
104 | { 0x0010e06c, 0x00000000}, | ||
105 | { 0x0010e06c, 0x00000000}, | ||
106 | { 0x0010e06c, 0x00000000}, | ||
107 | { 0x0010e06c, 0x00000000}, | ||
108 | { 0x0010e06c, 0x00000000}, | ||
109 | { 0x0010e06c, 0x00000000}, | ||
110 | { 0x0010ab14, 0x00000000}, | 72 | { 0x0010ab14, 0x00000000}, |
111 | { 0x0010ab18, 0x00000000}, | 73 | { 0x0010ab18, 0x00000000}, |
112 | { 0x0010e024, 0x00000000}, | 74 | { 0x0010e024, 0x00000000}, |
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c index 9ae5e2c6..3057a625 100644 --- a/drivers/gpu/nvgpu/pci.c +++ b/drivers/gpu/nvgpu/pci.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include "gk20a/gk20a.h" | 21 | #include "gk20a/gk20a.h" |
22 | #include "gk20a/platform_gk20a.h" | 22 | #include "gk20a/platform_gk20a.h" |
23 | 23 | ||
24 | #define PCI_INTERFACE_NAME "nvgpu-pci-%s%%s" | 24 | #define PCI_INTERFACE_NAME "card-%s%%s" |
25 | 25 | ||
26 | static int nvgpu_pci_tegra_probe(struct device *dev) | 26 | static int nvgpu_pci_tegra_probe(struct device *dev) |
27 | { | 27 | { |
@@ -135,6 +135,17 @@ static int nvgpu_pci_init_support(struct pci_dev *pdev) | |||
135 | return err; | 135 | return err; |
136 | } | 136 | } |
137 | 137 | ||
138 | static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode) | ||
139 | { | ||
140 | return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev)); | ||
141 | } | ||
142 | |||
143 | struct class nvgpu_pci_class = { | ||
144 | .owner = THIS_MODULE, | ||
145 | .name = "nvidia-pci-gpu", | ||
146 | .devnode = nvgpu_pci_devnode, | ||
147 | }; | ||
148 | |||
138 | static int nvgpu_pci_probe(struct pci_dev *pdev, | 149 | static int nvgpu_pci_probe(struct pci_dev *pdev, |
139 | const struct pci_device_id *pent) | 150 | const struct pci_device_id *pent) |
140 | { | 151 | { |
@@ -187,7 +198,7 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, | |||
187 | if (!nodefmt) | 198 | if (!nodefmt) |
188 | return -ENOMEM; | 199 | return -ENOMEM; |
189 | 200 | ||
190 | err = gk20a_user_init(&pdev->dev, nodefmt); | 201 | err = gk20a_user_init(&pdev->dev, nodefmt, &nvgpu_pci_class); |
191 | kfree(nodefmt); | 202 | kfree(nodefmt); |
192 | nodefmt = NULL; | 203 | nodefmt = NULL; |
193 | if (err) | 204 | if (err) |
@@ -248,7 +259,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) | |||
248 | if (g->remove_support) | 259 | if (g->remove_support) |
249 | g->remove_support(g->dev); | 260 | g->remove_support(g->dev); |
250 | 261 | ||
251 | gk20a_user_deinit(g->dev); | 262 | gk20a_user_deinit(g->dev, &nvgpu_pci_class); |
252 | 263 | ||
253 | debugfs_remove_recursive(platform->debugfs); | 264 | debugfs_remove_recursive(platform->debugfs); |
254 | debugfs_remove_recursive(platform->debugfs_alias); | 265 | debugfs_remove_recursive(platform->debugfs_alias); |
@@ -270,10 +281,17 @@ static struct pci_driver nvgpu_pci_driver = { | |||
270 | 281 | ||
271 | int __init nvgpu_pci_init(void) | 282 | int __init nvgpu_pci_init(void) |
272 | { | 283 | { |
284 | int ret; | ||
285 | |||
286 | ret = class_register(&nvgpu_pci_class); | ||
287 | if (ret) | ||
288 | return ret; | ||
289 | |||
273 | return pci_register_driver(&nvgpu_pci_driver); | 290 | return pci_register_driver(&nvgpu_pci_driver); |
274 | } | 291 | } |
275 | 292 | ||
276 | void __exit nvgpu_pci_exit(void) | 293 | void __exit nvgpu_pci_exit(void) |
277 | { | 294 | { |
278 | pci_unregister_driver(&nvgpu_pci_driver); | 295 | pci_unregister_driver(&nvgpu_pci_driver); |
296 | class_unregister(&nvgpu_pci_class); | ||
279 | } | 297 | } |
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 78492d3b..3c06652b 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -418,7 +418,7 @@ int vgpu_probe(struct platform_device *pdev) | |||
418 | platform->g = gk20a; | 418 | platform->g = gk20a; |
419 | gk20a->dev = dev; | 419 | gk20a->dev = dev; |
420 | 420 | ||
421 | err = gk20a_user_init(dev, INTERFACE_NAME); | 421 | err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class); |
422 | if (err) | 422 | if (err) |
423 | return err; | 423 | return err; |
424 | 424 | ||
@@ -498,7 +498,7 @@ int vgpu_remove(struct platform_device *pdev) | |||
498 | kfree(vgpu_gpd); | 498 | kfree(vgpu_gpd); |
499 | 499 | ||
500 | vgpu_comm_deinit(); | 500 | vgpu_comm_deinit(); |
501 | gk20a_user_deinit(dev); | 501 | gk20a_user_deinit(dev, &nvgpu_class); |
502 | gk20a_get_platform(dev)->g = NULL; | 502 | gk20a_get_platform(dev)->g = NULL; |
503 | kfree(g); | 503 | kfree(g); |
504 | return 0; | 504 | return 0; |