aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 01:15:15 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 01:15:15 -0500
commit8e483ed1342a4ea45b70f0f33ac54eff7a33d918 (patch)
tree66c9f9ad196581966bdb06802e11e9856b1c0779 /drivers/misc
parente880e87488d5bbf630dd716e6de8a53585614568 (diff)
parente2d8680741edec84f843f783a7f4a44418b818d7 (diff)
Merge tag 'char-misc-4.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc driver updates from Greg KH: "Here is the big char/misc driver update for 4.4-rc1. Lots of different driver and subsystem updates, hwtracing being the largest with the addition of some new platforms that are now supported. Full details in the shortlog. All of these have been in linux-next for a long time with no reported issues" * tag 'char-misc-4.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (181 commits) fpga: socfpga: Fix check of return value of devm_request_irq lkdtm: fix ACCESS_USERSPACE test mcb: Destroy IDA on module unload mcb: Do not return zero on error path in mcb_pci_probe() mei: bus: set the device name before running fixup mei: bus: use correct lock ordering mei: Fix debugfs filename in error output char: ipmi: ipmi_ssif: Replace timeval with timespec64 fpga: zynq-fpga: Fix issue with drvdata being overwritten. fpga manager: remove unnecessary null pointer checks fpga manager: ensure lifetime with of_fpga_mgr_get fpga: zynq-fpga: Change fw format to handle bin instead of bit. fpga: zynq-fpga: Fix unbalanced clock handling misc: sram: partition base address belongs to __iomem space coresight: etm3x: adding documentation for sysFS's cpu interface vme: 8-bit status/id takes 256 values, not 255 fpga manager: Adding FPGA Manager support for Xilinx Zynq 7000 ARM: zynq: dt: Updated devicetree for Zynq 7000 platform. ARM: dt: fpga: Added binding docs for Xilinx Zynq FPGA manager. ver_linux: proc/modules, limit text processing to 'sed' ...
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/Kconfig2
-rw-r--r--drivers/misc/ad525x_dpot-i2c.c1
-rw-r--r--drivers/misc/genwqe/card_base.h2
-rw-r--r--drivers/misc/genwqe/card_ddcb.c2
-rw-r--r--drivers/misc/genwqe/card_dev.c4
-rw-r--r--drivers/misc/genwqe/card_utils.c5
-rw-r--r--drivers/misc/hpilo.c6
-rw-r--r--drivers/misc/kgdbts.c10
-rw-r--r--drivers/misc/lkdtm.c8
-rw-r--r--drivers/misc/mei/amthif.c2
-rw-r--r--drivers/misc/mei/bus-fixup.c4
-rw-r--r--drivers/misc/mei/bus.c218
-rw-r--r--drivers/misc/mei/client.h12
-rw-r--r--drivers/misc/mei/debugfs.c2
-rw-r--r--drivers/misc/mei/hbm.c6
-rw-r--r--drivers/misc/mei/hw-me.c4
-rw-r--r--drivers/misc/mei/init.c4
-rw-r--r--drivers/misc/mei/interrupt.c4
-rw-r--r--drivers/misc/mei/mei_dev.h19
-rw-r--r--drivers/misc/mic/Kconfig25
-rw-r--r--drivers/misc/mic/Makefile2
-rw-r--r--drivers/misc/mic/bus/Makefile1
-rw-r--r--drivers/misc/mic/bus/cosm_bus.c141
-rw-r--r--drivers/misc/mic/bus/cosm_bus.h134
-rw-r--r--drivers/misc/mic/bus/mic_bus.c24
-rw-r--r--drivers/misc/mic/bus/scif_bus.c9
-rw-r--r--drivers/misc/mic/bus/scif_bus.h6
-rw-r--r--drivers/misc/mic/card/mic_device.c88
-rw-r--r--drivers/misc/mic/card/mic_x100.c2
-rw-r--r--drivers/misc/mic/common/mic_dev.h13
-rw-r--r--drivers/misc/mic/cosm/Makefile10
-rw-r--r--drivers/misc/mic/cosm/cosm_debugfs.c156
-rw-r--r--drivers/misc/mic/cosm/cosm_main.c388
-rw-r--r--drivers/misc/mic/cosm/cosm_main.h70
-rw-r--r--drivers/misc/mic/cosm/cosm_scif_server.c405
-rw-r--r--drivers/misc/mic/cosm/cosm_sysfs.c461
-rw-r--r--drivers/misc/mic/cosm_client/Makefile7
-rw-r--r--drivers/misc/mic/cosm_client/cosm_scif_client.c275
-rw-r--r--drivers/misc/mic/host/Makefile1
-rw-r--r--drivers/misc/mic/host/mic_boot.c317
-rw-r--r--drivers/misc/mic/host/mic_debugfs.c114
-rw-r--r--drivers/misc/mic/host/mic_device.h88
-rw-r--r--drivers/misc/mic/host/mic_fops.c4
-rw-r--r--drivers/misc/mic/host/mic_intr.c46
-rw-r--r--drivers/misc/mic/host/mic_main.c223
-rw-r--r--drivers/misc/mic/host/mic_smpt.c30
-rw-r--r--drivers/misc/mic/host/mic_sysfs.c459
-rw-r--r--drivers/misc/mic/host/mic_virtio.c17
-rw-r--r--drivers/misc/mic/host/mic_virtio.h2
-rw-r--r--drivers/misc/mic/host/mic_x100.c46
-rw-r--r--drivers/misc/mic/scif/Makefile5
-rw-r--r--drivers/misc/mic/scif/scif_api.c234
-rw-r--r--drivers/misc/mic/scif/scif_debugfs.c85
-rw-r--r--drivers/misc/mic/scif/scif_dma.c1979
-rw-r--r--drivers/misc/mic/scif/scif_epd.c26
-rw-r--r--drivers/misc/mic/scif/scif_epd.h50
-rw-r--r--drivers/misc/mic/scif/scif_fd.c178
-rw-r--r--drivers/misc/mic/scif/scif_fence.c771
-rw-r--r--drivers/misc/mic/scif/scif_main.c111
-rw-r--r--drivers/misc/mic/scif/scif_main.h37
-rw-r--r--drivers/misc/mic/scif/scif_map.h25
-rw-r--r--drivers/misc/mic/scif/scif_mmap.c699
-rw-r--r--drivers/misc/mic/scif/scif_nm.c20
-rw-r--r--drivers/misc/mic/scif/scif_nodeqp.c149
-rw-r--r--drivers/misc/mic/scif/scif_nodeqp.h42
-rw-r--r--drivers/misc/mic/scif/scif_peer_bus.c179
-rw-r--r--drivers/misc/mic/scif/scif_peer_bus.h42
-rw-r--r--drivers/misc/mic/scif/scif_rma.c1775
-rw-r--r--drivers/misc/mic/scif/scif_rma.h464
-rw-r--r--drivers/misc/mic/scif/scif_rma_list.c291
-rw-r--r--drivers/misc/mic/scif/scif_rma_list.h57
-rw-r--r--drivers/misc/sgi-gru/gruhandles.c6
-rw-r--r--drivers/misc/sgi-gru/gruhandles.h1
-rw-r--r--drivers/misc/sgi-gru/grukdump.c16
-rw-r--r--drivers/misc/sgi-gru/grukservices.c15
-rw-r--r--drivers/misc/sgi-gru/grumain.c4
-rw-r--r--drivers/misc/sgi-gru/grutlbpurge.c25
-rw-r--r--drivers/misc/sram.c196
-rw-r--r--drivers/misc/ti-st/st_core.c18
-rw-r--r--drivers/misc/vmw_balloon.c843
-rw-r--r--drivers/misc/vmw_vmci/vmci_datagram.c3
81 files changed, 10375 insertions, 1850 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index ccccc2943f2f..22892c701c63 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -414,7 +414,7 @@ config TI_DAC7512
414 414
415config VMWARE_BALLOON 415config VMWARE_BALLOON
416 tristate "VMware Balloon Driver" 416 tristate "VMware Balloon Driver"
417 depends on X86 && HYPERVISOR_GUEST 417 depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST
418 help 418 help
419 This is VMware physical memory management driver which acts 419 This is VMware physical memory management driver which acts
420 like a "balloon" that can be inflated to reclaim physical pages 420 like a "balloon" that can be inflated to reclaim physical pages
diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c
index d11187d36ddd..4f832002d116 100644
--- a/drivers/misc/ad525x_dpot-i2c.c
+++ b/drivers/misc/ad525x_dpot-i2c.c
@@ -117,4 +117,3 @@ module_i2c_driver(ad_dpot_i2c_driver);
117MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); 117MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
118MODULE_DESCRIPTION("digital potentiometer I2C bus driver"); 118MODULE_DESCRIPTION("digital potentiometer I2C bus driver");
119MODULE_LICENSE("GPL"); 119MODULE_LICENSE("GPL");
120MODULE_ALIAS("i2c:ad_dpot");
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
index e7353449874b..cb851c14ca4b 100644
--- a/drivers/misc/genwqe/card_base.h
+++ b/drivers/misc/genwqe/card_base.h
@@ -514,7 +514,7 @@ int __genwqe_execute_ddcb(struct genwqe_dev *cd,
514/** 514/**
515 * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation 515 * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation
516 * 516 *
517 * This version will not do address translation or any modifcation of 517 * This version will not do address translation or any modification of
518 * the DDCB data. It is used e.g. for the MoveFlash DDCB which is 518 * the DDCB data. It is used e.g. for the MoveFlash DDCB which is
519 * entirely prepared by the driver itself. That means the appropriate 519 * entirely prepared by the driver itself. That means the appropriate
520 * DMA addresses are already in the DDCB and do not need any 520 * DMA addresses are already in the DDCB and do not need any
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
index 6d51e5f08664..353ee0cc733d 100644
--- a/drivers/misc/genwqe/card_ddcb.c
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -203,7 +203,7 @@ struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
203{ 203{
204 struct ddcb_requ *req; 204 struct ddcb_requ *req;
205 205
206 req = kzalloc(sizeof(*req), GFP_ATOMIC); 206 req = kzalloc(sizeof(*req), GFP_KERNEL);
207 if (!req) 207 if (!req)
208 return NULL; 208 return NULL;
209 209
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index 70e62d6a3231..7f1b282d7d96 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -449,7 +449,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
449 if (get_order(vsize) > MAX_ORDER) 449 if (get_order(vsize) > MAX_ORDER)
450 return -ENOMEM; 450 return -ENOMEM;
451 451
452 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); 452 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
453 if (dma_map == NULL) 453 if (dma_map == NULL)
454 return -ENOMEM; 454 return -ENOMEM;
455 455
@@ -785,7 +785,7 @@ static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
785 map_addr = (m->addr & PAGE_MASK); 785 map_addr = (m->addr & PAGE_MASK);
786 map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); 786 map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
787 787
788 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); 788 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL);
789 if (dma_map == NULL) 789 if (dma_map == NULL)
790 return -ENOMEM; 790 return -ENOMEM;
791 791
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 1ca94e6fa8fb..222367cc8c81 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -220,7 +220,8 @@ void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
220 if (get_order(size) > MAX_ORDER) 220 if (get_order(size) > MAX_ORDER)
221 return NULL; 221 return NULL;
222 222
223 return pci_alloc_consistent(cd->pci_dev, size, dma_handle); 223 return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
224 GFP_KERNEL);
224} 225}
225 226
226void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, 227void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
@@ -229,7 +230,7 @@ void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
229 if (vaddr == NULL) 230 if (vaddr == NULL)
230 return; 231 return;
231 232
232 pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle); 233 dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
233} 234}
234 235
235static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, 236static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index b83e3ca12a41..d6a901cd4222 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -2,7 +2,7 @@
2 * Driver for the HP iLO management processor. 2 * Driver for the HP iLO management processor.
3 * 3 *
4 * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. 4 * Copyright (C) 2008 Hewlett-Packard Development Company, L.P.
5 * David Altobelli <david.altobelli@hp.com> 5 * David Altobelli <david.altobelli@hpe.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -902,11 +902,11 @@ static void __exit ilo_exit(void)
902MODULE_VERSION("1.4.1"); 902MODULE_VERSION("1.4.1");
903MODULE_ALIAS(ILO_NAME); 903MODULE_ALIAS(ILO_NAME);
904MODULE_DESCRIPTION(ILO_NAME); 904MODULE_DESCRIPTION(ILO_NAME);
905MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>"); 905MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
906MODULE_LICENSE("GPL v2"); 906MODULE_LICENSE("GPL v2");
907 907
908module_param(max_ccb, uint, 0444); 908module_param(max_ccb, uint, 0444);
909MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (16)"); 909MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)");
910 910
911module_init(ilo_init); 911module_init(ilo_init);
912module_exit(ilo_exit); 912module_exit(ilo_exit);
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 9a60bd4d3c49..99635dd9dbac 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -1112,6 +1112,7 @@ static int __init init_kgdbts(void)
1112 1112
1113 return configure_kgdbts(); 1113 return configure_kgdbts();
1114} 1114}
1115device_initcall(init_kgdbts);
1115 1116
1116static int kgdbts_get_char(void) 1117static int kgdbts_get_char(void)
1117{ 1118{
@@ -1180,10 +1181,9 @@ static struct kgdb_io kgdbts_io_ops = {
1180 .post_exception = kgdbts_post_exp_handler, 1181 .post_exception = kgdbts_post_exp_handler,
1181}; 1182};
1182 1183
1183module_init(init_kgdbts); 1184/*
1185 * not really modular, but the easiest way to keep compat with existing
1186 * bootargs behaviour is to continue using module_param here.
1187 */
1184module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644); 1188module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644);
1185MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]"); 1189MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]");
1186MODULE_DESCRIPTION("KGDB Test Suite");
1187MODULE_LICENSE("GPL");
1188MODULE_AUTHOR("Wind River Systems, Inc.");
1189
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index b5abe34120b8..11fdadc68e53 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -472,7 +472,7 @@ static void lkdtm_do_action(enum ctype which)
472 break; 472 break;
473 } 473 }
474 case CT_ACCESS_USERSPACE: { 474 case CT_ACCESS_USERSPACE: {
475 unsigned long user_addr, tmp; 475 unsigned long user_addr, tmp = 0;
476 unsigned long *ptr; 476 unsigned long *ptr;
477 477
478 user_addr = vm_mmap(NULL, 0, PAGE_SIZE, 478 user_addr = vm_mmap(NULL, 0, PAGE_SIZE,
@@ -483,6 +483,12 @@ static void lkdtm_do_action(enum ctype which)
483 return; 483 return;
484 } 484 }
485 485
486 if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) {
487 pr_warn("copy_to_user failed\n");
488 vm_munmap(user_addr, PAGE_SIZE);
489 return;
490 }
491
486 ptr = (unsigned long *)user_addr; 492 ptr = (unsigned long *)user_addr;
487 493
488 pr_info("attempting bad read at %p\n", ptr); 494 pr_info("attempting bad read at %p\n", ptr);
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index 1e42781592d8..cd0403f09267 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -458,7 +458,7 @@ void mei_amthif_complete(struct mei_device *dev, struct mei_cl_cb *cb)
458 return; 458 return;
459 } 459 }
460 460
461 if (dev->iamthif_canceled != 1) { 461 if (!dev->iamthif_canceled) {
462 dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE; 462 dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE;
463 dev->iamthif_stall_timer = 0; 463 dev->iamthif_stall_timer = 0;
464 list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list); 464 list_add_tail(&cb->list, &dev->amthif_rd_complete_list.list);
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 3e536ca85f7d..020de5919c21 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -285,11 +285,11 @@ static struct mei_fixup {
285}; 285};
286 286
287/** 287/**
288 * mei_cl_dev_fixup - run fixup handlers 288 * mei_cldev_fixup - run fixup handlers
289 * 289 *
290 * @cldev: me client device 290 * @cldev: me client device
291 */ 291 */
292void mei_cl_dev_fixup(struct mei_cl_device *cldev) 292void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev)
293{ 293{
294 struct mei_fixup *f; 294 struct mei_fixup *f;
295 const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); 295 const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index eef1c6b46ad8..0b05aa938799 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -91,7 +91,7 @@ out:
91 * __mei_cl_recv - internal client receive (read) 91 * __mei_cl_recv - internal client receive (read)
92 * 92 *
93 * @cl: host client 93 * @cl: host client
94 * @buf: buffer to send 94 * @buf: buffer to receive
95 * @length: buffer length 95 * @length: buffer length
96 * 96 *
97 * Return: read size in bytes of < 0 on error 97 * Return: read size in bytes of < 0 on error
@@ -165,7 +165,7 @@ out:
165} 165}
166 166
167/** 167/**
168 * mei_cl_send - me device send (write) 168 * mei_cldev_send - me device send (write)
169 * 169 *
170 * @cldev: me client device 170 * @cldev: me client device
171 * @buf: buffer to send 171 * @buf: buffer to send
@@ -173,7 +173,7 @@ out:
173 * 173 *
174 * Return: written size in bytes or < 0 on error 174 * Return: written size in bytes or < 0 on error
175 */ 175 */
176ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length) 176ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
177{ 177{
178 struct mei_cl *cl = cldev->cl; 178 struct mei_cl *cl = cldev->cl;
179 179
@@ -182,18 +182,18 @@ ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length)
182 182
183 return __mei_cl_send(cl, buf, length, 1); 183 return __mei_cl_send(cl, buf, length, 1);
184} 184}
185EXPORT_SYMBOL_GPL(mei_cl_send); 185EXPORT_SYMBOL_GPL(mei_cldev_send);
186 186
187/** 187/**
188 * mei_cl_recv - client receive (read) 188 * mei_cldev_recv - client receive (read)
189 * 189 *
190 * @cldev: me client device 190 * @cldev: me client device
191 * @buf: buffer to send 191 * @buf: buffer to receive
192 * @length: buffer length 192 * @length: buffer length
193 * 193 *
194 * Return: read size in bytes of < 0 on error 194 * Return: read size in bytes of < 0 on error
195 */ 195 */
196ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) 196ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
197{ 197{
198 struct mei_cl *cl = cldev->cl; 198 struct mei_cl *cl = cldev->cl;
199 199
@@ -202,15 +202,15 @@ ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length)
202 202
203 return __mei_cl_recv(cl, buf, length); 203 return __mei_cl_recv(cl, buf, length);
204} 204}
205EXPORT_SYMBOL_GPL(mei_cl_recv); 205EXPORT_SYMBOL_GPL(mei_cldev_recv);
206 206
207/** 207/**
208 * mei_bus_event_work - dispatch rx event for a bus device 208 * mei_cl_bus_event_work - dispatch rx event for a bus device
209 * and schedule new work 209 * and schedule new work
210 * 210 *
211 * @work: work 211 * @work: work
212 */ 212 */
213static void mei_bus_event_work(struct work_struct *work) 213static void mei_cl_bus_event_work(struct work_struct *work)
214{ 214{
215 struct mei_cl_device *cldev; 215 struct mei_cl_device *cldev;
216 216
@@ -272,7 +272,7 @@ void mei_cl_bus_rx_event(struct mei_cl *cl)
272} 272}
273 273
274/** 274/**
275 * mei_cl_register_event_cb - register event callback 275 * mei_cldev_register_event_cb - register event callback
276 * 276 *
277 * @cldev: me client devices 277 * @cldev: me client devices
278 * @event_cb: callback function 278 * @event_cb: callback function
@@ -283,9 +283,9 @@ void mei_cl_bus_rx_event(struct mei_cl *cl)
283 * -EALREADY if an callback is already registered 283 * -EALREADY if an callback is already registered
284 * <0 on other errors 284 * <0 on other errors
285 */ 285 */
286int mei_cl_register_event_cb(struct mei_cl_device *cldev, 286int mei_cldev_register_event_cb(struct mei_cl_device *cldev,
287 unsigned long events_mask, 287 unsigned long events_mask,
288 mei_cl_event_cb_t event_cb, void *context) 288 mei_cldev_event_cb_t event_cb, void *context)
289{ 289{
290 int ret; 290 int ret;
291 291
@@ -296,7 +296,7 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev,
296 cldev->events_mask = events_mask; 296 cldev->events_mask = events_mask;
297 cldev->event_cb = event_cb; 297 cldev->event_cb = event_cb;
298 cldev->event_context = context; 298 cldev->event_context = context;
299 INIT_WORK(&cldev->event_work, mei_bus_event_work); 299 INIT_WORK(&cldev->event_work, mei_cl_bus_event_work);
300 300
301 if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) { 301 if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
302 ret = mei_cl_read_start(cldev->cl, 0, NULL); 302 ret = mei_cl_read_start(cldev->cl, 0, NULL);
@@ -314,42 +314,81 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev,
314 314
315 return 0; 315 return 0;
316} 316}
317EXPORT_SYMBOL_GPL(mei_cl_register_event_cb); 317EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb);
318 318
319/** 319/**
320 * mei_cl_get_drvdata - driver data getter 320 * mei_cldev_get_drvdata - driver data getter
321 * 321 *
322 * @cldev: mei client device 322 * @cldev: mei client device
323 * 323 *
324 * Return: driver private data 324 * Return: driver private data
325 */ 325 */
326void *mei_cl_get_drvdata(const struct mei_cl_device *cldev) 326void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev)
327{ 327{
328 return dev_get_drvdata(&cldev->dev); 328 return dev_get_drvdata(&cldev->dev);
329} 329}
330EXPORT_SYMBOL_GPL(mei_cl_get_drvdata); 330EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata);
331 331
332/** 332/**
333 * mei_cl_set_drvdata - driver data setter 333 * mei_cldev_set_drvdata - driver data setter
334 * 334 *
335 * @cldev: mei client device 335 * @cldev: mei client device
336 * @data: data to store 336 * @data: data to store
337 */ 337 */
338void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data) 338void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data)
339{ 339{
340 dev_set_drvdata(&cldev->dev, data); 340 dev_set_drvdata(&cldev->dev, data);
341} 341}
342EXPORT_SYMBOL_GPL(mei_cl_set_drvdata); 342EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata);
343
344/**
345 * mei_cldev_uuid - return uuid of the underlying me client
346 *
347 * @cldev: mei client device
348 *
349 * Return: me client uuid
350 */
351const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev)
352{
353 return mei_me_cl_uuid(cldev->me_cl);
354}
355EXPORT_SYMBOL_GPL(mei_cldev_uuid);
356
357/**
358 * mei_cldev_ver - return protocol version of the underlying me client
359 *
360 * @cldev: mei client device
361 *
362 * Return: me client protocol version
363 */
364u8 mei_cldev_ver(const struct mei_cl_device *cldev)
365{
366 return mei_me_cl_ver(cldev->me_cl);
367}
368EXPORT_SYMBOL_GPL(mei_cldev_ver);
369
370/**
371 * mei_cldev_enabled - check whether the device is enabled
372 *
373 * @cldev: mei client device
374 *
375 * Return: true if me client is initialized and connected
376 */
377bool mei_cldev_enabled(struct mei_cl_device *cldev)
378{
379 return cldev->cl && mei_cl_is_connected(cldev->cl);
380}
381EXPORT_SYMBOL_GPL(mei_cldev_enabled);
343 382
344/** 383/**
345 * mei_cl_enable_device - enable me client device 384 * mei_cldev_enable_device - enable me client device
346 * create connection with me client 385 * create connection with me client
347 * 386 *
348 * @cldev: me client device 387 * @cldev: me client device
349 * 388 *
350 * Return: 0 on success and < 0 on error 389 * Return: 0 on success and < 0 on error
351 */ 390 */
352int mei_cl_enable_device(struct mei_cl_device *cldev) 391int mei_cldev_enable(struct mei_cl_device *cldev)
353{ 392{
354 struct mei_device *bus = cldev->bus; 393 struct mei_device *bus = cldev->bus;
355 struct mei_cl *cl; 394 struct mei_cl *cl;
@@ -389,17 +428,17 @@ out:
389 428
390 return ret; 429 return ret;
391} 430}
392EXPORT_SYMBOL_GPL(mei_cl_enable_device); 431EXPORT_SYMBOL_GPL(mei_cldev_enable);
393 432
394/** 433/**
395 * mei_cl_disable_device - disable me client device 434 * mei_cldev_disable - disable me client device
396 * disconnect form the me client 435 * disconnect form the me client
397 * 436 *
398 * @cldev: me client device 437 * @cldev: me client device
399 * 438 *
400 * Return: 0 on success and < 0 on error 439 * Return: 0 on success and < 0 on error
401 */ 440 */
402int mei_cl_disable_device(struct mei_cl_device *cldev) 441int mei_cldev_disable(struct mei_cl_device *cldev)
403{ 442{
404 struct mei_device *bus; 443 struct mei_device *bus;
405 struct mei_cl *cl; 444 struct mei_cl *cl;
@@ -437,7 +476,7 @@ out:
437 mutex_unlock(&bus->device_lock); 476 mutex_unlock(&bus->device_lock);
438 return err; 477 return err;
439} 478}
440EXPORT_SYMBOL_GPL(mei_cl_disable_device); 479EXPORT_SYMBOL_GPL(mei_cldev_disable);
441 480
442/** 481/**
443 * mei_cl_device_find - find matching entry in the driver id table 482 * mei_cl_device_find - find matching entry in the driver id table
@@ -453,17 +492,26 @@ struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev,
453{ 492{
454 const struct mei_cl_device_id *id; 493 const struct mei_cl_device_id *id;
455 const uuid_le *uuid; 494 const uuid_le *uuid;
495 u8 version;
496 bool match;
456 497
457 uuid = mei_me_cl_uuid(cldev->me_cl); 498 uuid = mei_me_cl_uuid(cldev->me_cl);
499 version = mei_me_cl_ver(cldev->me_cl);
458 500
459 id = cldrv->id_table; 501 id = cldrv->id_table;
460 while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) { 502 while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) {
461 if (!uuid_le_cmp(*uuid, id->uuid)) { 503 if (!uuid_le_cmp(*uuid, id->uuid)) {
504 match = true;
462 505
463 if (!cldev->name[0]) 506 if (cldev->name[0])
464 return id; 507 if (strncmp(cldev->name, id->name,
508 sizeof(id->name)))
509 match = false;
465 510
466 if (!strncmp(cldev->name, id->name, sizeof(id->name))) 511 if (id->version != MEI_CL_VERSION_ANY)
512 if (id->version != version)
513 match = false;
514 if (match)
467 return id; 515 return id;
468 } 516 }
469 517
@@ -590,6 +638,19 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *a,
590} 638}
591static DEVICE_ATTR_RO(uuid); 639static DEVICE_ATTR_RO(uuid);
592 640
641static ssize_t version_show(struct device *dev, struct device_attribute *a,
642 char *buf)
643{
644 struct mei_cl_device *cldev = to_mei_cl_device(dev);
645 u8 version = mei_me_cl_ver(cldev->me_cl);
646 size_t len;
647
648 len = snprintf(buf, PAGE_SIZE, "%02X", version);
649
650 return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
651}
652static DEVICE_ATTR_RO(version);
653
593static ssize_t modalias_show(struct device *dev, struct device_attribute *a, 654static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
594 char *buf) 655 char *buf)
595{ 656{
@@ -597,20 +658,19 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
597 const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); 658 const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
598 size_t len; 659 size_t len;
599 660
600 len = snprintf(buf, PAGE_SIZE, "mei:%s:" MEI_CL_UUID_FMT ":", 661 len = snprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid);
601 cldev->name, MEI_CL_UUID_ARGS(uuid->b));
602
603 return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; 662 return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
604} 663}
605static DEVICE_ATTR_RO(modalias); 664static DEVICE_ATTR_RO(modalias);
606 665
607static struct attribute *mei_cl_dev_attrs[] = { 666static struct attribute *mei_cldev_attrs[] = {
608 &dev_attr_name.attr, 667 &dev_attr_name.attr,
609 &dev_attr_uuid.attr, 668 &dev_attr_uuid.attr,
669 &dev_attr_version.attr,
610 &dev_attr_modalias.attr, 670 &dev_attr_modalias.attr,
611 NULL, 671 NULL,
612}; 672};
613ATTRIBUTE_GROUPS(mei_cl_dev); 673ATTRIBUTE_GROUPS(mei_cldev);
614 674
615/** 675/**
616 * mei_cl_device_uevent - me client bus uevent handler 676 * mei_cl_device_uevent - me client bus uevent handler
@@ -624,6 +684,10 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
624{ 684{
625 struct mei_cl_device *cldev = to_mei_cl_device(dev); 685 struct mei_cl_device *cldev = to_mei_cl_device(dev);
626 const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); 686 const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl);
687 u8 version = mei_me_cl_ver(cldev->me_cl);
688
689 if (add_uevent_var(env, "MEI_CL_VERSION=%d", version))
690 return -ENOMEM;
627 691
628 if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid)) 692 if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid))
629 return -ENOMEM; 693 return -ENOMEM;
@@ -631,8 +695,8 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
631 if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name)) 695 if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name))
632 return -ENOMEM; 696 return -ENOMEM;
633 697
634 if (add_uevent_var(env, "MODALIAS=mei:%s:" MEI_CL_UUID_FMT ":", 698 if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:",
635 cldev->name, MEI_CL_UUID_ARGS(uuid->b))) 699 cldev->name, uuid, version))
636 return -ENOMEM; 700 return -ENOMEM;
637 701
638 return 0; 702 return 0;
@@ -640,7 +704,7 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env)
640 704
641static struct bus_type mei_cl_bus_type = { 705static struct bus_type mei_cl_bus_type = {
642 .name = "mei", 706 .name = "mei",
643 .dev_groups = mei_cl_dev_groups, 707 .dev_groups = mei_cldev_groups,
644 .match = mei_cl_device_match, 708 .match = mei_cl_device_match,
645 .probe = mei_cl_device_probe, 709 .probe = mei_cl_device_probe,
646 .remove = mei_cl_device_remove, 710 .remove = mei_cl_device_remove,
@@ -661,7 +725,7 @@ static void mei_dev_bus_put(struct mei_device *bus)
661 put_device(bus->dev); 725 put_device(bus->dev);
662} 726}
663 727
664static void mei_cl_dev_release(struct device *dev) 728static void mei_cl_bus_dev_release(struct device *dev)
665{ 729{
666 struct mei_cl_device *cldev = to_mei_cl_device(dev); 730 struct mei_cl_device *cldev = to_mei_cl_device(dev);
667 731
@@ -674,19 +738,32 @@ static void mei_cl_dev_release(struct device *dev)
674} 738}
675 739
676static struct device_type mei_cl_device_type = { 740static struct device_type mei_cl_device_type = {
677 .release = mei_cl_dev_release, 741 .release = mei_cl_bus_dev_release,
678}; 742};
679 743
680/** 744/**
681 * mei_cl_dev_alloc - initialize and allocate mei client device 745 * mei_cl_bus_set_name - set device name for me client device
746 *
747 * @cldev: me client device
748 */
749static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev)
750{
751 dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X",
752 cldev->name,
753 mei_me_cl_uuid(cldev->me_cl),
754 mei_me_cl_ver(cldev->me_cl));
755}
756
757/**
758 * mei_cl_bus_dev_alloc - initialize and allocate mei client device
682 * 759 *
683 * @bus: mei device 760 * @bus: mei device
684 * @me_cl: me client 761 * @me_cl: me client
685 * 762 *
686 * Return: allocated device structur or NULL on allocation failure 763 * Return: allocated device structur or NULL on allocation failure
687 */ 764 */
688static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus, 765static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus,
689 struct mei_me_client *me_cl) 766 struct mei_me_client *me_cl)
690{ 767{
691 struct mei_cl_device *cldev; 768 struct mei_cl_device *cldev;
692 769
@@ -700,6 +777,7 @@ static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus,
700 cldev->dev.type = &mei_cl_device_type; 777 cldev->dev.type = &mei_cl_device_type;
701 cldev->bus = mei_dev_bus_get(bus); 778 cldev->bus = mei_dev_bus_get(bus);
702 cldev->me_cl = mei_me_cl_get(me_cl); 779 cldev->me_cl = mei_me_cl_get(me_cl);
780 mei_cl_bus_set_name(cldev);
703 cldev->is_added = 0; 781 cldev->is_added = 0;
704 INIT_LIST_HEAD(&cldev->bus_list); 782 INIT_LIST_HEAD(&cldev->bus_list);
705 783
@@ -715,15 +793,15 @@ static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus,
715 * 793 *
716 * Return: true if the device is eligible for enumeration 794 * Return: true if the device is eligible for enumeration
717 */ 795 */
718static bool mei_cl_dev_setup(struct mei_device *bus, 796static bool mei_cl_bus_dev_setup(struct mei_device *bus,
719 struct mei_cl_device *cldev) 797 struct mei_cl_device *cldev)
720{ 798{
721 cldev->do_match = 1; 799 cldev->do_match = 1;
722 mei_cl_dev_fixup(cldev); 800 mei_cl_bus_dev_fixup(cldev);
723 801
802 /* the device name can change during fix up */
724 if (cldev->do_match) 803 if (cldev->do_match)
725 dev_set_name(&cldev->dev, "mei:%s:%pUl", 804 mei_cl_bus_set_name(cldev);
726 cldev->name, mei_me_cl_uuid(cldev->me_cl));
727 805
728 return cldev->do_match == 1; 806 return cldev->do_match == 1;
729} 807}
@@ -739,7 +817,9 @@ static int mei_cl_bus_dev_add(struct mei_cl_device *cldev)
739{ 817{
740 int ret; 818 int ret;
741 819
742 dev_dbg(cldev->bus->dev, "adding %pUL\n", mei_me_cl_uuid(cldev->me_cl)); 820 dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n",
821 mei_me_cl_uuid(cldev->me_cl),
822 mei_me_cl_ver(cldev->me_cl));
743 ret = device_add(&cldev->dev); 823 ret = device_add(&cldev->dev);
744 if (!ret) 824 if (!ret)
745 cldev->is_added = 1; 825 cldev->is_added = 1;
@@ -762,17 +842,20 @@ static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev)
762 * mei_cl_bus_dev_destroy - destroy me client devices object 842 * mei_cl_bus_dev_destroy - destroy me client devices object
763 * 843 *
764 * @cldev: me client device 844 * @cldev: me client device
845 *
846 * Locking: called under "dev->cl_bus_lock" lock
765 */ 847 */
766static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev) 848static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev)
767{ 849{
850
851 WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock));
852
768 if (!cldev->is_added) 853 if (!cldev->is_added)
769 return; 854 return;
770 855
771 device_del(&cldev->dev); 856 device_del(&cldev->dev);
772 857
773 mutex_lock(&cldev->bus->cl_bus_lock);
774 list_del_init(&cldev->bus_list); 858 list_del_init(&cldev->bus_list);
775 mutex_unlock(&cldev->bus->cl_bus_lock);
776 859
777 cldev->is_added = 0; 860 cldev->is_added = 0;
778 put_device(&cldev->dev); 861 put_device(&cldev->dev);
@@ -798,35 +881,40 @@ void mei_cl_bus_remove_devices(struct mei_device *bus)
798{ 881{
799 struct mei_cl_device *cldev, *next; 882 struct mei_cl_device *cldev, *next;
800 883
884 mutex_lock(&bus->cl_bus_lock);
801 list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list) 885 list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list)
802 mei_cl_bus_remove_device(cldev); 886 mei_cl_bus_remove_device(cldev);
887 mutex_unlock(&bus->cl_bus_lock);
803} 888}
804 889
805 890
806/** 891/**
807 * mei_cl_dev_init - allocate and initializes an mei client devices 892 * mei_cl_bus_dev_init - allocate and initializes an mei client devices
808 * based on me client 893 * based on me client
809 * 894 *
810 * @bus: mei device 895 * @bus: mei device
811 * @me_cl: me client 896 * @me_cl: me client
897 *
898 * Locking: called under "dev->cl_bus_lock" lock
812 */ 899 */
813static void mei_cl_dev_init(struct mei_device *bus, struct mei_me_client *me_cl) 900static void mei_cl_bus_dev_init(struct mei_device *bus,
901 struct mei_me_client *me_cl)
814{ 902{
815 struct mei_cl_device *cldev; 903 struct mei_cl_device *cldev;
816 904
905 WARN_ON(!mutex_is_locked(&bus->cl_bus_lock));
906
817 dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl)); 907 dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl));
818 908
819 if (me_cl->bus_added) 909 if (me_cl->bus_added)
820 return; 910 return;
821 911
822 cldev = mei_cl_dev_alloc(bus, me_cl); 912 cldev = mei_cl_bus_dev_alloc(bus, me_cl);
823 if (!cldev) 913 if (!cldev)
824 return; 914 return;
825 915
826 mutex_lock(&cldev->bus->cl_bus_lock);
827 me_cl->bus_added = true; 916 me_cl->bus_added = true;
828 list_add_tail(&cldev->bus_list, &bus->device_list); 917 list_add_tail(&cldev->bus_list, &bus->device_list);
829 mutex_unlock(&cldev->bus->cl_bus_lock);
830 918
831} 919}
832 920
@@ -841,12 +929,13 @@ void mei_cl_bus_rescan(struct mei_device *bus)
841 struct mei_cl_device *cldev, *n; 929 struct mei_cl_device *cldev, *n;
842 struct mei_me_client *me_cl; 930 struct mei_me_client *me_cl;
843 931
932 mutex_lock(&bus->cl_bus_lock);
933
844 down_read(&bus->me_clients_rwsem); 934 down_read(&bus->me_clients_rwsem);
845 list_for_each_entry(me_cl, &bus->me_clients, list) 935 list_for_each_entry(me_cl, &bus->me_clients, list)
846 mei_cl_dev_init(bus, me_cl); 936 mei_cl_bus_dev_init(bus, me_cl);
847 up_read(&bus->me_clients_rwsem); 937 up_read(&bus->me_clients_rwsem);
848 938
849 mutex_lock(&bus->cl_bus_lock);
850 list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) { 939 list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) {
851 940
852 if (!mei_me_cl_is_active(cldev->me_cl)) { 941 if (!mei_me_cl_is_active(cldev->me_cl)) {
@@ -857,7 +946,7 @@ void mei_cl_bus_rescan(struct mei_device *bus)
857 if (cldev->is_added) 946 if (cldev->is_added)
858 continue; 947 continue;
859 948
860 if (mei_cl_dev_setup(bus, cldev)) 949 if (mei_cl_bus_dev_setup(bus, cldev))
861 mei_cl_bus_dev_add(cldev); 950 mei_cl_bus_dev_add(cldev);
862 else { 951 else {
863 list_del_init(&cldev->bus_list); 952 list_del_init(&cldev->bus_list);
@@ -869,7 +958,8 @@ void mei_cl_bus_rescan(struct mei_device *bus)
869 dev_dbg(bus->dev, "rescan end"); 958 dev_dbg(bus->dev, "rescan end");
870} 959}
871 960
872int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner) 961int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
962 struct module *owner)
873{ 963{
874 int err; 964 int err;
875 965
@@ -885,15 +975,15 @@ int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner)
885 975
886 return 0; 976 return 0;
887} 977}
888EXPORT_SYMBOL_GPL(__mei_cl_driver_register); 978EXPORT_SYMBOL_GPL(__mei_cldev_driver_register);
889 979
890void mei_cl_driver_unregister(struct mei_cl_driver *cldrv) 980void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv)
891{ 981{
892 driver_unregister(&cldrv->driver); 982 driver_unregister(&cldrv->driver);
893 983
894 pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name); 984 pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name);
895} 985}
896EXPORT_SYMBOL_GPL(mei_cl_driver_unregister); 986EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister);
897 987
898 988
899int __init mei_cl_bus_init(void) 989int __init mei_cl_bus_init(void)
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 1c7cad07d731..04e1aa39243f 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -68,6 +68,18 @@ static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl)
68 return &me_cl->props.protocol_name; 68 return &me_cl->props.protocol_name;
69} 69}
70 70
71/**
72 * mei_me_cl_ver - return me client protocol version
73 *
74 * @me_cl: me client
75 *
76 * Return: me client protocol version
77 */
78static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl)
79{
80 return me_cl->props.protocol_version;
81}
82
71/* 83/*
72 * MEI IO Functions 84 * MEI IO Functions
73 */ 85 */
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index 8504dbeacd3b..a138d8a27ab5 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -215,7 +215,7 @@ int mei_dbgfs_register(struct mei_device *dev, const char *name)
215 f = debugfs_create_file("active", S_IRUSR, dir, 215 f = debugfs_create_file("active", S_IRUSR, dir,
216 dev, &mei_dbgfs_fops_active); 216 dev, &mei_dbgfs_fops_active);
217 if (!f) { 217 if (!f) {
218 dev_err(dev->dev, "meclients: registration failed\n"); 218 dev_err(dev->dev, "active: registration failed\n");
219 goto err; 219 goto err;
220 } 220 }
221 f = debugfs_create_file("devstate", S_IRUSR, dir, 221 f = debugfs_create_file("devstate", S_IRUSR, dir,
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 6d7c188fb65c..e7b7aad0999b 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -281,7 +281,7 @@ int mei_hbm_start_req(struct mei_device *dev)
281 return 0; 281 return 0;
282} 282}
283 283
284/* 284/**
285 * mei_hbm_enum_clients_req - sends enumeration client request message. 285 * mei_hbm_enum_clients_req - sends enumeration client request message.
286 * 286 *
287 * @dev: the device structure 287 * @dev: the device structure
@@ -314,7 +314,7 @@ static int mei_hbm_enum_clients_req(struct mei_device *dev)
314 return 0; 314 return 0;
315} 315}
316 316
317/* 317/**
318 * mei_hbm_me_cl_add - add new me client to the list 318 * mei_hbm_me_cl_add - add new me client to the list
319 * 319 *
320 * @dev: the device structure 320 * @dev: the device structure
@@ -569,7 +569,7 @@ static int mei_hbm_prop_req(struct mei_device *dev)
569 return 0; 569 return 0;
570} 570}
571 571
572/* 572/**
573 * mei_hbm_pg - sends pg command 573 * mei_hbm_pg - sends pg command
574 * 574 *
575 * @dev: the device structure 575 * @dev: the device structure
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 65511d39d89b..25b1997a62cb 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -150,7 +150,7 @@ static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
150 u32 reg; 150 u32 reg;
151 151
152 reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C); 152 reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C);
153 trace_mei_reg_read(dev->dev, "H_D0I3C", H_CSR, reg); 153 trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg);
154 154
155 return reg; 155 return reg;
156} 156}
@@ -163,7 +163,7 @@ static inline u32 mei_me_d0i3c_read(const struct mei_device *dev)
163 */ 163 */
164static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) 164static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
165{ 165{
166 trace_mei_reg_write(dev->dev, "H_D0I3C", H_CSR, reg); 166 trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg);
167 mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg); 167 mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg);
168} 168}
169 169
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index e374661652cd..3edafc8d3ad4 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -329,10 +329,10 @@ void mei_stop(struct mei_device *dev)
329{ 329{
330 dev_dbg(dev->dev, "stopping the device.\n"); 330 dev_dbg(dev->dev, "stopping the device.\n");
331 331
332 mei_cancel_work(dev);
333
334 mei_cl_bus_remove_devices(dev); 332 mei_cl_bus_remove_devices(dev);
335 333
334 mei_cancel_work(dev);
335
336 mutex_lock(&dev->device_lock); 336 mutex_lock(&dev->device_lock);
337 337
338 mei_wd_stop(dev); 338 mei_wd_stop(dev);
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index c418d7888994..64b568a0268d 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -21,6 +21,7 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/jiffies.h> 22#include <linux/jiffies.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/pm_runtime.h>
24 25
25#include <linux/mei.h> 26#include <linux/mei.h>
26 27
@@ -147,6 +148,9 @@ int mei_cl_irq_read_msg(struct mei_cl *cl,
147 cb->read_time = jiffies; 148 cb->read_time = jiffies;
148 cl_dbg(dev, cl, "completed read length = %lu\n", cb->buf_idx); 149 cl_dbg(dev, cl, "completed read length = %lu\n", cb->buf_idx);
149 list_move_tail(&cb->list, &complete_list->list); 150 list_move_tail(&cb->list, &complete_list->list);
151 } else {
152 pm_runtime_mark_last_busy(dev->dev);
153 pm_request_autosuspend(dev->dev);
150 } 154 }
151 155
152out: 156out:
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index d74b6aa8ae27..4250555d5e72 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -275,32 +275,33 @@ struct mei_cl {
275 struct mei_cl_device *cldev; 275 struct mei_cl_device *cldev;
276}; 276};
277 277
278/** struct mei_hw_ops 278/**
279 * struct mei_hw_ops - hw specific ops
279 * 280 *
280 * @host_is_ready : query for host readiness 281 * @host_is_ready : query for host readiness
281 282 *
282 * @hw_is_ready : query if hw is ready 283 * @hw_is_ready : query if hw is ready
283 * @hw_reset : reset hw 284 * @hw_reset : reset hw
284 * @hw_start : start hw after reset 285 * @hw_start : start hw after reset
285 * @hw_config : configure hw 286 * @hw_config : configure hw
286 287 *
287 * @fw_status : get fw status registers 288 * @fw_status : get fw status registers
288 * @pg_state : power gating state of the device 289 * @pg_state : power gating state of the device
289 * @pg_in_transition : is device now in pg transition 290 * @pg_in_transition : is device now in pg transition
290 * @pg_is_enabled : is power gating enabled 291 * @pg_is_enabled : is power gating enabled
291 292 *
292 * @intr_clear : clear pending interrupts 293 * @intr_clear : clear pending interrupts
293 * @intr_enable : enable interrupts 294 * @intr_enable : enable interrupts
294 * @intr_disable : disable interrupts 295 * @intr_disable : disable interrupts
295 296 *
296 * @hbuf_free_slots : query for write buffer empty slots 297 * @hbuf_free_slots : query for write buffer empty slots
297 * @hbuf_is_ready : query if write buffer is empty 298 * @hbuf_is_ready : query if write buffer is empty
298 * @hbuf_max_len : query for write buffer max len 299 * @hbuf_max_len : query for write buffer max len
299 300 *
300 * @write : write a message to FW 301 * @write : write a message to FW
301 302 *
302 * @rdbuf_full_slots : query how many slots are filled 303 * @rdbuf_full_slots : query how many slots are filled
303 304 *
304 * @read_hdr : get first 4 bytes (header) 305 * @read_hdr : get first 4 bytes (header)
305 * @read : read a buffer from the FW 306 * @read : read a buffer from the FW
306 */ 307 */
@@ -340,7 +341,7 @@ struct mei_hw_ops {
340 341
341/* MEI bus API*/ 342/* MEI bus API*/
342void mei_cl_bus_rescan(struct mei_device *bus); 343void mei_cl_bus_rescan(struct mei_device *bus);
343void mei_cl_dev_fixup(struct mei_cl_device *dev); 344void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
344ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, 345ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length,
345 bool blocking); 346 bool blocking);
346ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length); 347ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length);
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index e9f2f56c370d..40677df7f996 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -36,7 +36,7 @@ comment "Intel MIC Host Driver"
36 36
37config INTEL_MIC_HOST 37config INTEL_MIC_HOST
38 tristate "Intel MIC Host Driver" 38 tristate "Intel MIC Host Driver"
39 depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS 39 depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
40 select VHOST_RING 40 select VHOST_RING
41 help 41 help
42 This enables Host Driver support for the Intel Many Integrated 42 This enables Host Driver support for the Intel Many Integrated
@@ -56,7 +56,7 @@ comment "Intel MIC Card Driver"
56 56
57config INTEL_MIC_CARD 57config INTEL_MIC_CARD
58 tristate "Intel MIC Card Driver" 58 tristate "Intel MIC Card Driver"
59 depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS 59 depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM
60 select VIRTIO 60 select VIRTIO
61 help 61 help
62 This enables card driver support for the Intel Many Integrated 62 This enables card driver support for the Intel Many Integrated
@@ -74,7 +74,8 @@ comment "SCIF Driver"
74 74
75config SCIF 75config SCIF
76 tristate "SCIF Driver" 76 tristate "SCIF Driver"
77 depends on 64BIT && PCI && X86 && SCIF_BUS 77 depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
78 select IOMMU_IOVA
78 help 79 help
79 This enables SCIF Driver support for the Intel Many Integrated 80 This enables SCIF Driver support for the Intel Many Integrated
80 Core (MIC) family of PCIe form factor coprocessor devices that 81 Core (MIC) family of PCIe form factor coprocessor devices that
@@ -88,3 +89,21 @@ config SCIF
88 More information about the Intel MIC family as well as the Linux 89 More information about the Intel MIC family as well as the Linux
89 OS and tools for MIC to use with this driver are available from 90 OS and tools for MIC to use with this driver are available from
90 <http://software.intel.com/en-us/mic-developer>. 91 <http://software.intel.com/en-us/mic-developer>.
92
93comment "Intel MIC Coprocessor State Management (COSM) Drivers"
94
95config MIC_COSM
96 tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
97 depends on 64BIT && PCI && X86 && SCIF
98 help
99 This enables COSM driver support for the Intel Many
100 Integrated Core (MIC) family of PCIe form factor coprocessor
101 devices. COSM drivers implement functions such as boot,
102 shutdown, reset and reboot of MIC devices.
103
104 If you are building a host kernel with an Intel MIC device then
105 say M (recommended) or Y, else say N. If unsure say N.
106
107 More information about the Intel MIC family as well as the Linux
108 OS and tools for MIC to use with this driver are available from
109 <http://software.intel.com/en-us/mic-developer>.
diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile
index a74042c58649..e288a1106738 100644
--- a/drivers/misc/mic/Makefile
+++ b/drivers/misc/mic/Makefile
@@ -6,3 +6,5 @@ obj-$(CONFIG_INTEL_MIC_HOST) += host/
6obj-$(CONFIG_INTEL_MIC_CARD) += card/ 6obj-$(CONFIG_INTEL_MIC_CARD) += card/
7obj-y += bus/ 7obj-y += bus/
8obj-$(CONFIG_SCIF) += scif/ 8obj-$(CONFIG_SCIF) += scif/
9obj-$(CONFIG_MIC_COSM) += cosm/
10obj-$(CONFIG_MIC_COSM) += cosm_client/
diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile
index 1ed37e234c96..761842b0d0bb 100644
--- a/drivers/misc/mic/bus/Makefile
+++ b/drivers/misc/mic/bus/Makefile
@@ -4,3 +4,4 @@
4# 4#
5obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o 5obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o
6obj-$(CONFIG_SCIF_BUS) += scif_bus.o 6obj-$(CONFIG_SCIF_BUS) += scif_bus.o
7obj-$(CONFIG_MIC_COSM) += cosm_bus.o
diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c
new file mode 100644
index 000000000000..d31d6c6e6cb1
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.c
@@ -0,0 +1,141 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC COSM Bus Driver
19 */
20#include <linux/slab.h>
21#include <linux/module.h>
22#include <linux/idr.h>
23#include "cosm_bus.h"
24
25/* Unique numbering for cosm devices. */
26static DEFINE_IDA(cosm_index_ida);
27
28static int cosm_dev_probe(struct device *d)
29{
30 struct cosm_device *dev = dev_to_cosm(d);
31 struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
32
33 return drv->probe(dev);
34}
35
36static int cosm_dev_remove(struct device *d)
37{
38 struct cosm_device *dev = dev_to_cosm(d);
39 struct cosm_driver *drv = drv_to_cosm(dev->dev.driver);
40
41 drv->remove(dev);
42 return 0;
43}
44
45static struct bus_type cosm_bus = {
46 .name = "cosm_bus",
47 .probe = cosm_dev_probe,
48 .remove = cosm_dev_remove,
49};
50
51int cosm_register_driver(struct cosm_driver *driver)
52{
53 driver->driver.bus = &cosm_bus;
54 return driver_register(&driver->driver);
55}
56EXPORT_SYMBOL_GPL(cosm_register_driver);
57
58void cosm_unregister_driver(struct cosm_driver *driver)
59{
60 driver_unregister(&driver->driver);
61}
62EXPORT_SYMBOL_GPL(cosm_unregister_driver);
63
64static inline void cosm_release_dev(struct device *d)
65{
66 struct cosm_device *cdev = dev_to_cosm(d);
67
68 kfree(cdev);
69}
70
71struct cosm_device *
72cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops)
73{
74 struct cosm_device *cdev;
75 int ret;
76
77 cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
78 if (!cdev)
79 return ERR_PTR(-ENOMEM);
80
81 cdev->dev.parent = pdev;
82 cdev->dev.release = cosm_release_dev;
83 cdev->hw_ops = hw_ops;
84 dev_set_drvdata(&cdev->dev, cdev);
85 cdev->dev.bus = &cosm_bus;
86
87 /* Assign a unique device index and hence name */
88 ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL);
89 if (ret < 0)
90 goto free_cdev;
91
92 cdev->index = ret;
93 cdev->dev.id = ret;
94 dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index);
95
96 ret = device_register(&cdev->dev);
97 if (ret)
98 goto ida_remove;
99 return cdev;
100ida_remove:
101 ida_simple_remove(&cosm_index_ida, cdev->index);
102free_cdev:
103 put_device(&cdev->dev);
104 return ERR_PTR(ret);
105}
106EXPORT_SYMBOL_GPL(cosm_register_device);
107
108void cosm_unregister_device(struct cosm_device *dev)
109{
110 int index = dev->index; /* save for after device release */
111
112 device_unregister(&dev->dev);
113 ida_simple_remove(&cosm_index_ida, index);
114}
115EXPORT_SYMBOL_GPL(cosm_unregister_device);
116
117struct cosm_device *cosm_find_cdev_by_id(int id)
118{
119 struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL);
120
121 return dev ? container_of(dev, struct cosm_device, dev) : NULL;
122}
123EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id);
124
125static int __init cosm_init(void)
126{
127 return bus_register(&cosm_bus);
128}
129
130static void __exit cosm_exit(void)
131{
132 bus_unregister(&cosm_bus);
133 ida_destroy(&cosm_index_ida);
134}
135
136core_initcall(cosm_init);
137module_exit(cosm_exit);
138
139MODULE_AUTHOR("Intel Corporation");
140MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver");
141MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h
new file mode 100644
index 000000000000..f7c57f266916
--- /dev/null
+++ b/drivers/misc/mic/bus/cosm_bus.h
@@ -0,0 +1,134 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC COSM Bus Driver
19 */
20#ifndef _COSM_BUS_H_
21#define _COSM_BUS_H_
22
23#include <linux/scif.h>
24#include <linux/mic_common.h>
25#include "../common/mic_dev.h"
26
27/**
28 * cosm_device - representation of a cosm device
29 *
30 * @attr_group: Pointer to list of sysfs attribute groups.
31 * @sdev: Device for sysfs entries.
32 * @state: MIC state.
33 * @shutdown_status: MIC status reported by card for shutdown/crashes.
34 * @shutdown_status_int: Internal shutdown status maintained by the driver
35 * @cosm_mutex: Mutex for synchronizing access to data structures.
36 * @reset_trigger_work: Work for triggering reset requests.
37 * @scif_work: Work for handling per device SCIF connections
38 * @cmdline: Kernel command line.
39 * @firmware: Firmware file name.
40 * @ramdisk: Ramdisk file name.
41 * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
42 * @log_buf_addr: Log buffer address for MIC.
43 * @log_buf_len: Log buffer length address for MIC.
44 * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
45 * @hw_ops: the hardware bus ops for this device.
46 * @dev: underlying device.
47 * @index: unique position on the cosm bus
48 * @dbg_dir: debug fs directory
49 * @newepd: new endpoint from scif accept to be assigned to this cdev
50 * @epd: SCIF endpoint for this cdev
51 * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev
52 * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification
53 */
54struct cosm_device {
55 const struct attribute_group **attr_group;
56 struct device *sdev;
57 u8 state;
58 u8 shutdown_status;
59 u8 shutdown_status_int;
60 struct mutex cosm_mutex;
61 struct work_struct reset_trigger_work;
62 struct work_struct scif_work;
63 char *cmdline;
64 char *firmware;
65 char *ramdisk;
66 char *bootmode;
67 void *log_buf_addr;
68 int *log_buf_len;
69 struct kernfs_node *state_sysfs;
70 struct cosm_hw_ops *hw_ops;
71 struct device dev;
72 int index;
73 struct dentry *dbg_dir;
74 scif_epd_t newepd;
75 scif_epd_t epd;
76 bool heartbeat_watchdog_enable;
77 bool sysfs_heartbeat_enable;
78};
79
80/**
81 * cosm_driver - operations for a cosm driver
82 *
83 * @driver: underlying device driver (populate name and owner).
84 * @probe: the function to call when a device is found. Returns 0 or -errno.
85 * @remove: the function to call when a device is removed.
86 */
87struct cosm_driver {
88 struct device_driver driver;
89 int (*probe)(struct cosm_device *dev);
90 void (*remove)(struct cosm_device *dev);
91};
92
93/**
94 * cosm_hw_ops - cosm bus ops
95 *
96 * @reset: trigger MIC reset
97 * @force_reset: force MIC reset
98 * @post_reset: inform MIC reset is complete
99 * @ready: is MIC ready for OS download
100 * @start: boot MIC
101 * @stop: prepare MIC for reset
102 * @family: return MIC HW family string
103 * @stepping: return MIC HW stepping string
104 * @aper: return MIC PCIe aperture
105 */
106struct cosm_hw_ops {
107 void (*reset)(struct cosm_device *cdev);
108 void (*force_reset)(struct cosm_device *cdev);
109 void (*post_reset)(struct cosm_device *cdev, enum mic_states state);
110 bool (*ready)(struct cosm_device *cdev);
111 int (*start)(struct cosm_device *cdev, int id);
112 void (*stop)(struct cosm_device *cdev, bool force);
113 ssize_t (*family)(struct cosm_device *cdev, char *buf);
114 ssize_t (*stepping)(struct cosm_device *cdev, char *buf);
115 struct mic_mw *(*aper)(struct cosm_device *cdev);
116};
117
118struct cosm_device *
119cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops);
120void cosm_unregister_device(struct cosm_device *dev);
121int cosm_register_driver(struct cosm_driver *drv);
122void cosm_unregister_driver(struct cosm_driver *drv);
123struct cosm_device *cosm_find_cdev_by_id(int id);
124
125static inline struct cosm_device *dev_to_cosm(struct device *dev)
126{
127 return container_of(dev, struct cosm_device, dev);
128}
129
130static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv)
131{
132 return container_of(drv, struct cosm_driver, driver);
133}
134#endif /* _COSM_BUS_H */
diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c
index 961ae90aae47..be37890abb93 100644
--- a/drivers/misc/mic/bus/mic_bus.c
+++ b/drivers/misc/mic/bus/mic_bus.c
@@ -25,9 +25,6 @@
25#include <linux/idr.h> 25#include <linux/idr.h>
26#include <linux/mic_bus.h> 26#include <linux/mic_bus.h>
27 27
28/* Unique numbering for mbus devices. */
29static DEFINE_IDA(mbus_index_ida);
30
31static ssize_t device_show(struct device *d, 28static ssize_t device_show(struct device *d,
32 struct device_attribute *attr, char *buf) 29 struct device_attribute *attr, char *buf)
33{ 30{
@@ -147,7 +144,8 @@ static void mbus_release_dev(struct device *d)
147 144
148struct mbus_device * 145struct mbus_device *
149mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, 146mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
150 struct mbus_hw_ops *hw_ops, void __iomem *mmio_va) 147 struct mbus_hw_ops *hw_ops, int index,
148 void __iomem *mmio_va)
151{ 149{
152 int ret; 150 int ret;
153 struct mbus_device *mbdev; 151 struct mbus_device *mbdev;
@@ -166,13 +164,7 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
166 mbdev->dev.release = mbus_release_dev; 164 mbdev->dev.release = mbus_release_dev;
167 mbdev->hw_ops = hw_ops; 165 mbdev->hw_ops = hw_ops;
168 mbdev->dev.bus = &mic_bus; 166 mbdev->dev.bus = &mic_bus;
169 167 mbdev->index = index;
170 /* Assign a unique device index and hence name. */
171 ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL);
172 if (ret < 0)
173 goto free_mbdev;
174
175 mbdev->index = ret;
176 dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index); 168 dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index);
177 /* 169 /*
178 * device_register() causes the bus infrastructure to look for a 170 * device_register() causes the bus infrastructure to look for a
@@ -180,22 +172,17 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
180 */ 172 */
181 ret = device_register(&mbdev->dev); 173 ret = device_register(&mbdev->dev);
182 if (ret) 174 if (ret)
183 goto ida_remove; 175 goto free_mbdev;
184 return mbdev; 176 return mbdev;
185ida_remove:
186 ida_simple_remove(&mbus_index_ida, mbdev->index);
187free_mbdev: 177free_mbdev:
188 kfree(mbdev); 178 put_device(&mbdev->dev);
189 return ERR_PTR(ret); 179 return ERR_PTR(ret);
190} 180}
191EXPORT_SYMBOL_GPL(mbus_register_device); 181EXPORT_SYMBOL_GPL(mbus_register_device);
192 182
193void mbus_unregister_device(struct mbus_device *mbdev) 183void mbus_unregister_device(struct mbus_device *mbdev)
194{ 184{
195 int index = mbdev->index; /* save for after device release */
196
197 device_unregister(&mbdev->dev); 185 device_unregister(&mbdev->dev);
198 ida_simple_remove(&mbus_index_ida, index);
199} 186}
200EXPORT_SYMBOL_GPL(mbus_unregister_device); 187EXPORT_SYMBOL_GPL(mbus_unregister_device);
201 188
@@ -207,7 +194,6 @@ static int __init mbus_init(void)
207static void __exit mbus_exit(void) 194static void __exit mbus_exit(void)
208{ 195{
209 bus_unregister(&mic_bus); 196 bus_unregister(&mic_bus);
210 ida_destroy(&mbus_index_ida);
211} 197}
212 198
213core_initcall(mbus_init); 199core_initcall(mbus_init);
diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c
index 2da7ceed015d..ff6e01c25810 100644
--- a/drivers/misc/mic/bus/scif_bus.c
+++ b/drivers/misc/mic/bus/scif_bus.c
@@ -28,7 +28,6 @@ static ssize_t device_show(struct device *d,
28 28
29 return sprintf(buf, "0x%04x\n", dev->id.device); 29 return sprintf(buf, "0x%04x\n", dev->id.device);
30} 30}
31
32static DEVICE_ATTR_RO(device); 31static DEVICE_ATTR_RO(device);
33 32
34static ssize_t vendor_show(struct device *d, 33static ssize_t vendor_show(struct device *d,
@@ -38,7 +37,6 @@ static ssize_t vendor_show(struct device *d,
38 37
39 return sprintf(buf, "0x%04x\n", dev->id.vendor); 38 return sprintf(buf, "0x%04x\n", dev->id.vendor);
40} 39}
41
42static DEVICE_ATTR_RO(vendor); 40static DEVICE_ATTR_RO(vendor);
43 41
44static ssize_t modalias_show(struct device *d, 42static ssize_t modalias_show(struct device *d,
@@ -49,7 +47,6 @@ static ssize_t modalias_show(struct device *d,
49 return sprintf(buf, "scif:d%08Xv%08X\n", 47 return sprintf(buf, "scif:d%08Xv%08X\n",
50 dev->id.device, dev->id.vendor); 48 dev->id.device, dev->id.vendor);
51} 49}
52
53static DEVICE_ATTR_RO(modalias); 50static DEVICE_ATTR_RO(modalias);
54 51
55static struct attribute *scif_dev_attrs[] = { 52static struct attribute *scif_dev_attrs[] = {
@@ -144,7 +141,8 @@ struct scif_hw_dev *
144scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, 141scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
145 struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, 142 struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
146 struct mic_mw *mmio, struct mic_mw *aper, void *dp, 143 struct mic_mw *mmio, struct mic_mw *aper, void *dp,
147 void __iomem *rdp, struct dma_chan **chan, int num_chan) 144 void __iomem *rdp, struct dma_chan **chan, int num_chan,
145 bool card_rel_da)
148{ 146{
149 int ret; 147 int ret;
150 struct scif_hw_dev *sdev; 148 struct scif_hw_dev *sdev;
@@ -171,6 +169,7 @@ scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
171 dma_set_mask(&sdev->dev, DMA_BIT_MASK(64)); 169 dma_set_mask(&sdev->dev, DMA_BIT_MASK(64));
172 sdev->dma_ch = chan; 170 sdev->dma_ch = chan;
173 sdev->num_dma_ch = num_chan; 171 sdev->num_dma_ch = num_chan;
172 sdev->card_rel_da = card_rel_da;
174 dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode); 173 dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode);
175 /* 174 /*
176 * device_register() causes the bus infrastructure to look for a 175 * device_register() causes the bus infrastructure to look for a
@@ -181,7 +180,7 @@ scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops,
181 goto free_sdev; 180 goto free_sdev;
182 return sdev; 181 return sdev;
183free_sdev: 182free_sdev:
184 kfree(sdev); 183 put_device(&sdev->dev);
185 return ERR_PTR(ret); 184 return ERR_PTR(ret);
186} 185}
187EXPORT_SYMBOL_GPL(scif_register_device); 186EXPORT_SYMBOL_GPL(scif_register_device);
diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h
index 335a228a8236..94f29ac608b6 100644
--- a/drivers/misc/mic/bus/scif_bus.h
+++ b/drivers/misc/mic/bus/scif_bus.h
@@ -46,6 +46,8 @@ struct scif_hw_dev_id {
46 * @rdp - Remote device page 46 * @rdp - Remote device page
47 * @dma_ch - Array of DMA channels 47 * @dma_ch - Array of DMA channels
48 * @num_dma_ch - Number of DMA channels available 48 * @num_dma_ch - Number of DMA channels available
49 * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine
50 * are relative to the card point of view
49 */ 51 */
50struct scif_hw_dev { 52struct scif_hw_dev {
51 struct scif_hw_ops *hw_ops; 53 struct scif_hw_ops *hw_ops;
@@ -59,6 +61,7 @@ struct scif_hw_dev {
59 void __iomem *rdp; 61 void __iomem *rdp;
60 struct dma_chan **dma_ch; 62 struct dma_chan **dma_ch;
61 int num_dma_ch; 63 int num_dma_ch;
64 bool card_rel_da;
62}; 65};
63 66
64/** 67/**
@@ -114,7 +117,8 @@ scif_register_device(struct device *pdev, int id,
114 struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, 117 struct scif_hw_ops *hw_ops, u8 dnode, u8 snode,
115 struct mic_mw *mmio, struct mic_mw *aper, 118 struct mic_mw *mmio, struct mic_mw *aper,
116 void *dp, void __iomem *rdp, 119 void *dp, void __iomem *rdp,
117 struct dma_chan **chan, int num_chan); 120 struct dma_chan **chan, int num_chan,
121 bool card_rel_da);
118void scif_unregister_device(struct scif_hw_dev *sdev); 122void scif_unregister_device(struct scif_hw_dev *sdev);
119 123
120static inline struct scif_hw_dev *dev_to_scif(struct device *dev) 124static inline struct scif_hw_dev *dev_to_scif(struct device *dev)
diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c
index 6338908b2252..d0edaf7e0cd5 100644
--- a/drivers/misc/mic/card/mic_device.c
+++ b/drivers/misc/mic/card/mic_device.c
@@ -37,71 +37,6 @@
37#include "mic_virtio.h" 37#include "mic_virtio.h"
38 38
39static struct mic_driver *g_drv; 39static struct mic_driver *g_drv;
40static struct mic_irq *shutdown_cookie;
41
42static void mic_notify_host(u8 state)
43{
44 struct mic_driver *mdrv = g_drv;
45 struct mic_bootparam __iomem *bootparam = mdrv->dp;
46
47 iowrite8(state, &bootparam->shutdown_status);
48 dev_dbg(mdrv->dev, "%s %d system_state %d\n",
49 __func__, __LINE__, state);
50 mic_send_intr(&mdrv->mdev, ioread8(&bootparam->c2h_shutdown_db));
51}
52
53static int mic_panic_event(struct notifier_block *this, unsigned long event,
54 void *ptr)
55{
56 struct mic_driver *mdrv = g_drv;
57 struct mic_bootparam __iomem *bootparam = mdrv->dp;
58
59 iowrite8(-1, &bootparam->h2c_config_db);
60 iowrite8(-1, &bootparam->h2c_shutdown_db);
61 mic_notify_host(MIC_CRASHED);
62 return NOTIFY_DONE;
63}
64
65static struct notifier_block mic_panic = {
66 .notifier_call = mic_panic_event,
67};
68
69static irqreturn_t mic_shutdown_isr(int irq, void *data)
70{
71 struct mic_driver *mdrv = g_drv;
72 struct mic_bootparam __iomem *bootparam = mdrv->dp;
73
74 mic_ack_interrupt(&g_drv->mdev);
75 if (ioread8(&bootparam->shutdown_card))
76 orderly_poweroff(true);
77 return IRQ_HANDLED;
78}
79
80static int mic_shutdown_init(void)
81{
82 int rc = 0;
83 struct mic_driver *mdrv = g_drv;
84 struct mic_bootparam __iomem *bootparam = mdrv->dp;
85 int shutdown_db;
86
87 shutdown_db = mic_next_card_db();
88 shutdown_cookie = mic_request_card_irq(mic_shutdown_isr, NULL,
89 "Shutdown", mdrv, shutdown_db);
90 if (IS_ERR(shutdown_cookie))
91 rc = PTR_ERR(shutdown_cookie);
92 else
93 iowrite8(shutdown_db, &bootparam->h2c_shutdown_db);
94 return rc;
95}
96
97static void mic_shutdown_uninit(void)
98{
99 struct mic_driver *mdrv = g_drv;
100 struct mic_bootparam __iomem *bootparam = mdrv->dp;
101
102 iowrite8(-1, &bootparam->h2c_shutdown_db);
103 mic_free_card_irq(shutdown_cookie, mdrv);
104}
105 40
106static int __init mic_dp_init(void) 41static int __init mic_dp_init(void)
107{ 42{
@@ -359,11 +294,7 @@ int __init mic_driver_init(struct mic_driver *mdrv)
359 u8 node_id; 294 u8 node_id;
360 295
361 g_drv = mdrv; 296 g_drv = mdrv;
362 /* 297 /* Unloading the card module is not supported. */
363 * Unloading the card module is not supported. The MIC card module
364 * handles fundamental operations like host/card initiated shutdowns
365 * and informing the host about card crashes and cannot be unloaded.
366 */
367 if (!try_module_get(mdrv->dev->driver->owner)) { 298 if (!try_module_get(mdrv->dev->driver->owner)) {
368 rc = -ENODEV; 299 rc = -ENODEV;
369 goto done; 300 goto done;
@@ -374,12 +305,9 @@ int __init mic_driver_init(struct mic_driver *mdrv)
374 rc = mic_init_irq(); 305 rc = mic_init_irq();
375 if (rc) 306 if (rc)
376 goto dp_uninit; 307 goto dp_uninit;
377 rc = mic_shutdown_init();
378 if (rc)
379 goto irq_uninit;
380 if (!mic_request_dma_chans(mdrv)) { 308 if (!mic_request_dma_chans(mdrv)) {
381 rc = -ENODEV; 309 rc = -ENODEV;
382 goto shutdown_uninit; 310 goto irq_uninit;
383 } 311 }
384 rc = mic_devices_init(mdrv); 312 rc = mic_devices_init(mdrv);
385 if (rc) 313 if (rc)
@@ -390,21 +318,18 @@ int __init mic_driver_init(struct mic_driver *mdrv)
390 NULL, &scif_hw_ops, 318 NULL, &scif_hw_ops,
391 0, node_id, &mdrv->mdev.mmio, NULL, 319 0, node_id, &mdrv->mdev.mmio, NULL,
392 NULL, mdrv->dp, mdrv->dma_ch, 320 NULL, mdrv->dp, mdrv->dma_ch,
393 mdrv->num_dma_ch); 321 mdrv->num_dma_ch, true);
394 if (IS_ERR(mdrv->scdev)) { 322 if (IS_ERR(mdrv->scdev)) {
395 rc = PTR_ERR(mdrv->scdev); 323 rc = PTR_ERR(mdrv->scdev);
396 goto device_uninit; 324 goto device_uninit;
397 } 325 }
398 mic_create_card_debug_dir(mdrv); 326 mic_create_card_debug_dir(mdrv);
399 atomic_notifier_chain_register(&panic_notifier_list, &mic_panic);
400done: 327done:
401 return rc; 328 return rc;
402device_uninit: 329device_uninit:
403 mic_devices_uninit(mdrv); 330 mic_devices_uninit(mdrv);
404dma_free: 331dma_free:
405 mic_free_dma_chans(mdrv); 332 mic_free_dma_chans(mdrv);
406shutdown_uninit:
407 mic_shutdown_uninit();
408irq_uninit: 333irq_uninit:
409 mic_uninit_irq(); 334 mic_uninit_irq();
410dp_uninit: 335dp_uninit:
@@ -425,13 +350,6 @@ void mic_driver_uninit(struct mic_driver *mdrv)
425 scif_unregister_device(mdrv->scdev); 350 scif_unregister_device(mdrv->scdev);
426 mic_devices_uninit(mdrv); 351 mic_devices_uninit(mdrv);
427 mic_free_dma_chans(mdrv); 352 mic_free_dma_chans(mdrv);
428 /*
429 * Inform the host about the shutdown status i.e. poweroff/restart etc.
430 * The module cannot be unloaded so the only code path to call
431 * mic_devices_uninit(..) is the shutdown callback.
432 */
433 mic_notify_host(system_state);
434 mic_shutdown_uninit();
435 mic_uninit_irq(); 353 mic_uninit_irq();
436 mic_dp_uninit(); 354 mic_dp_uninit();
437 module_put(mdrv->dev->driver->owner); 355 module_put(mdrv->dev->driver->owner);
diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c
index 77fd41781c2e..b2958ce2368c 100644
--- a/drivers/misc/mic/card/mic_x100.c
+++ b/drivers/misc/mic/card/mic_x100.c
@@ -261,7 +261,7 @@ static int __init mic_probe(struct platform_device *pdev)
261 mic_hw_intr_init(mdrv); 261 mic_hw_intr_init(mdrv);
262 platform_set_drvdata(pdev, mdrv); 262 platform_set_drvdata(pdev, mdrv);
263 mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC, 263 mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC,
264 NULL, &mbus_hw_ops, 264 NULL, &mbus_hw_ops, 0,
265 mdrv->mdev.mmio.va); 265 mdrv->mdev.mmio.va);
266 if (IS_ERR(mdrv->dma_mbdev)) { 266 if (IS_ERR(mdrv->dma_mbdev)) {
267 rc = PTR_ERR(mdrv->dma_mbdev); 267 rc = PTR_ERR(mdrv->dma_mbdev);
diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
index 0b58c46045dc..50776772ebdf 100644
--- a/drivers/misc/mic/common/mic_dev.h
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -21,6 +21,19 @@
21#ifndef __MIC_DEV_H__ 21#ifndef __MIC_DEV_H__
22#define __MIC_DEV_H__ 22#define __MIC_DEV_H__
23 23
24/* The maximum number of MIC devices supported in a single host system. */
25#define MIC_MAX_NUM_DEVS 128
26
27/**
28 * enum mic_hw_family - The hardware family to which a device belongs.
29 */
30enum mic_hw_family {
31 MIC_FAMILY_X100 = 0,
32 MIC_FAMILY_X200,
33 MIC_FAMILY_UNKNOWN,
34 MIC_FAMILY_LAST
35};
36
24/** 37/**
25 * struct mic_mw - MIC memory window 38 * struct mic_mw - MIC memory window
26 * 39 *
diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile
new file mode 100644
index 000000000000..b85d4d49df46
--- /dev/null
+++ b/drivers/misc/mic/cosm/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile - Intel MIC Coprocessor State Management (COSM) Driver
3# Copyright(c) 2015, Intel Corporation.
4#
5obj-$(CONFIG_MIC_COSM) += mic_cosm.o
6
7mic_cosm-objs := cosm_main.o
8mic_cosm-objs += cosm_debugfs.o
9mic_cosm-objs += cosm_sysfs.o
10mic_cosm-objs += cosm_scif_server.o
diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c
new file mode 100644
index 000000000000..216cb3cd2fe3
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_debugfs.c
@@ -0,0 +1,156 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC Coprocessor State Management (COSM) Driver
19 *
20 */
21
22#include <linux/debugfs.h>
23#include <linux/slab.h>
24#include <linux/io.h>
25#include "cosm_main.h"
26
27/* Debugfs parent dir */
28static struct dentry *cosm_dbg;
29
30/**
31 * cosm_log_buf_show - Display MIC kernel log buffer
32 *
33 * log_buf addr/len is read from System.map by user space
34 * and populated in sysfs entries.
35 */
36static int cosm_log_buf_show(struct seq_file *s, void *unused)
37{
38 void __iomem *log_buf_va;
39 int __iomem *log_buf_len_va;
40 struct cosm_device *cdev = s->private;
41 void *kva;
42 int size;
43 u64 aper_offset;
44
45 if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len)
46 goto done;
47
48 mutex_lock(&cdev->cosm_mutex);
49 switch (cdev->state) {
50 case MIC_BOOTING:
51 case MIC_ONLINE:
52 case MIC_SHUTTING_DOWN:
53 break;
54 default:
55 goto unlock;
56 }
57
58 /*
59 * Card kernel will never be relocated and any kernel text/data mapping
60 * can be translated to phys address by subtracting __START_KERNEL_map.
61 */
62 aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map;
63 log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
64 aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map;
65 log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset;
66
67 size = ioread32(log_buf_len_va);
68 kva = kmalloc(size, GFP_KERNEL);
69 if (!kva)
70 goto unlock;
71
72 memcpy_fromio(kva, log_buf_va, size);
73 seq_write(s, kva, size);
74 kfree(kva);
75unlock:
76 mutex_unlock(&cdev->cosm_mutex);
77done:
78 return 0;
79}
80
81static int cosm_log_buf_open(struct inode *inode, struct file *file)
82{
83 return single_open(file, cosm_log_buf_show, inode->i_private);
84}
85
86static const struct file_operations log_buf_ops = {
87 .owner = THIS_MODULE,
88 .open = cosm_log_buf_open,
89 .read = seq_read,
90 .llseek = seq_lseek,
91 .release = single_release
92};
93
94/**
95 * cosm_force_reset_show - Force MIC reset
96 *
97 * Invokes the force_reset COSM bus op instead of the standard reset
98 * op in case a force reset of the MIC device is required
99 */
100static int cosm_force_reset_show(struct seq_file *s, void *pos)
101{
102 struct cosm_device *cdev = s->private;
103
104 cosm_stop(cdev, true);
105 return 0;
106}
107
108static int cosm_force_reset_debug_open(struct inode *inode, struct file *file)
109{
110 return single_open(file, cosm_force_reset_show, inode->i_private);
111}
112
113static const struct file_operations force_reset_ops = {
114 .owner = THIS_MODULE,
115 .open = cosm_force_reset_debug_open,
116 .read = seq_read,
117 .llseek = seq_lseek,
118 .release = single_release
119};
120
121void cosm_create_debug_dir(struct cosm_device *cdev)
122{
123 char name[16];
124
125 if (!cosm_dbg)
126 return;
127
128 scnprintf(name, sizeof(name), "mic%d", cdev->index);
129 cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg);
130 if (!cdev->dbg_dir)
131 return;
132
133 debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops);
134 debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev,
135 &force_reset_ops);
136}
137
138void cosm_delete_debug_dir(struct cosm_device *cdev)
139{
140 if (!cdev->dbg_dir)
141 return;
142
143 debugfs_remove_recursive(cdev->dbg_dir);
144}
145
146void cosm_init_debugfs(void)
147{
148 cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
149 if (!cosm_dbg)
150 pr_err("can't create debugfs dir\n");
151}
152
153void cosm_exit_debugfs(void)
154{
155 debugfs_remove(cosm_dbg);
156}
diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c
new file mode 100644
index 000000000000..4b4b356c797d
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.c
@@ -0,0 +1,388 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC Coprocessor State Management (COSM) Driver
19 *
20 */
21
22#include <linux/module.h>
23#include <linux/delay.h>
24#include <linux/idr.h>
25#include <linux/slab.h>
26#include <linux/cred.h>
27#include "cosm_main.h"
28
29static const char cosm_driver_name[] = "mic";
30
31/* COSM ID allocator */
32static struct ida g_cosm_ida;
33/* Class of MIC devices for sysfs accessibility. */
34static struct class *g_cosm_class;
35/* Number of MIC devices */
36static atomic_t g_num_dev;
37
38/**
39 * cosm_hw_reset - Issue a HW reset for the MIC device
40 * @cdev: pointer to cosm_device instance
41 */
42static void cosm_hw_reset(struct cosm_device *cdev, bool force)
43{
44 int i;
45
46#define MIC_RESET_TO (45)
47 if (force && cdev->hw_ops->force_reset)
48 cdev->hw_ops->force_reset(cdev);
49 else
50 cdev->hw_ops->reset(cdev);
51
52 for (i = 0; i < MIC_RESET_TO; i++) {
53 if (cdev->hw_ops->ready(cdev)) {
54 cosm_set_state(cdev, MIC_READY);
55 return;
56 }
57 /*
58 * Resets typically take 10s of seconds to complete.
59 * Since an MMIO read is required to check if the
60 * firmware is ready or not, a 1 second delay works nicely.
61 */
62 msleep(1000);
63 }
64 cosm_set_state(cdev, MIC_RESET_FAILED);
65}
66
67/**
68 * cosm_start - Start the MIC
69 * @cdev: pointer to cosm_device instance
70 *
71 * This function prepares an MIC for boot and initiates boot.
72 * RETURNS: An appropriate -ERRNO error value on error, or 0 for success.
73 */
74int cosm_start(struct cosm_device *cdev)
75{
76 const struct cred *orig_cred;
77 struct cred *override_cred;
78 int rc;
79
80 mutex_lock(&cdev->cosm_mutex);
81 if (!cdev->bootmode) {
82 dev_err(&cdev->dev, "%s %d bootmode not set\n",
83 __func__, __LINE__);
84 rc = -EINVAL;
85 goto unlock_ret;
86 }
87retry:
88 if (cdev->state != MIC_READY) {
89 dev_err(&cdev->dev, "%s %d MIC state not READY\n",
90 __func__, __LINE__);
91 rc = -EINVAL;
92 goto unlock_ret;
93 }
94 if (!cdev->hw_ops->ready(cdev)) {
95 cosm_hw_reset(cdev, false);
96 /*
97 * The state will either be MIC_READY if the reset succeeded
98 * or MIC_RESET_FAILED if the firmware reset failed.
99 */
100 goto retry;
101 }
102
103 /*
104 * Set credentials to root to allow non-root user to download initramsfs
105 * with 600 permissions
106 */
107 override_cred = prepare_creds();
108 if (!override_cred) {
109 dev_err(&cdev->dev, "%s %d prepare_creds failed\n",
110 __func__, __LINE__);
111 rc = -ENOMEM;
112 goto unlock_ret;
113 }
114 override_cred->fsuid = GLOBAL_ROOT_UID;
115 orig_cred = override_creds(override_cred);
116
117 rc = cdev->hw_ops->start(cdev, cdev->index);
118
119 revert_creds(orig_cred);
120 put_cred(override_cred);
121 if (rc)
122 goto unlock_ret;
123
124 /*
125 * If linux is being booted, card is treated 'online' only
126 * when the scif interface in the card is up. If anything else
127 * is booted, we set card to 'online' immediately.
128 */
129 if (!strcmp(cdev->bootmode, "linux"))
130 cosm_set_state(cdev, MIC_BOOTING);
131 else
132 cosm_set_state(cdev, MIC_ONLINE);
133unlock_ret:
134 mutex_unlock(&cdev->cosm_mutex);
135 if (rc)
136 dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc);
137 return rc;
138}
139
140/**
141 * cosm_stop - Prepare the MIC for reset and trigger reset
142 * @cdev: pointer to cosm_device instance
143 * @force: force a MIC to reset even if it is already reset and ready.
144 *
145 * RETURNS: None
146 */
147void cosm_stop(struct cosm_device *cdev, bool force)
148{
149 mutex_lock(&cdev->cosm_mutex);
150 if (cdev->state != MIC_READY || force) {
151 /*
152 * Don't call hw_ops if they have been called previously.
153 * stop(..) calls device_unregister and will crash the system if
154 * called multiple times.
155 */
156 bool call_hw_ops = cdev->state != MIC_RESET_FAILED &&
157 cdev->state != MIC_READY;
158
159 if (cdev->state != MIC_RESETTING)
160 cosm_set_state(cdev, MIC_RESETTING);
161 cdev->heartbeat_watchdog_enable = false;
162 if (call_hw_ops)
163 cdev->hw_ops->stop(cdev, force);
164 cosm_hw_reset(cdev, force);
165 cosm_set_shutdown_status(cdev, MIC_NOP);
166 if (call_hw_ops && cdev->hw_ops->post_reset)
167 cdev->hw_ops->post_reset(cdev, cdev->state);
168 }
169 mutex_unlock(&cdev->cosm_mutex);
170 flush_work(&cdev->scif_work);
171}
172
173/**
174 * cosm_reset_trigger_work - Trigger MIC reset
175 * @work: The work structure
176 *
177 * This work is scheduled whenever the host wants to reset the MIC.
178 */
179static void cosm_reset_trigger_work(struct work_struct *work)
180{
181 struct cosm_device *cdev = container_of(work, struct cosm_device,
182 reset_trigger_work);
183 cosm_stop(cdev, false);
184}
185
186/**
187 * cosm_reset - Schedule MIC reset
188 * @cdev: pointer to cosm_device instance
189 *
190 * RETURNS: An -EINVAL if the card is already READY or 0 for success.
191 */
192int cosm_reset(struct cosm_device *cdev)
193{
194 int rc = 0;
195
196 mutex_lock(&cdev->cosm_mutex);
197 if (cdev->state != MIC_READY) {
198 cosm_set_state(cdev, MIC_RESETTING);
199 schedule_work(&cdev->reset_trigger_work);
200 } else {
201 dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__);
202 rc = -EINVAL;
203 }
204 mutex_unlock(&cdev->cosm_mutex);
205 return rc;
206}
207
208/**
209 * cosm_shutdown - Initiate MIC shutdown.
210 * @cdev: pointer to cosm_device instance
211 *
212 * RETURNS: None
213 */
214int cosm_shutdown(struct cosm_device *cdev)
215{
216 struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN };
217 int rc = 0;
218
219 mutex_lock(&cdev->cosm_mutex);
220 if (cdev->state != MIC_ONLINE) {
221 rc = -EINVAL;
222 dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n",
223 __func__, __LINE__, cosm_state_string[cdev->state]);
224 goto err;
225 }
226
227 if (!cdev->epd) {
228 rc = -ENOTCONN;
229 dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n",
230 __func__, __LINE__, rc);
231 goto err;
232 }
233
234 rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
235 if (rc < 0) {
236 dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
237 __func__, __LINE__, rc);
238 goto err;
239 }
240 cdev->heartbeat_watchdog_enable = false;
241 cosm_set_state(cdev, MIC_SHUTTING_DOWN);
242 rc = 0;
243err:
244 mutex_unlock(&cdev->cosm_mutex);
245 return rc;
246}
247
248static int cosm_driver_probe(struct cosm_device *cdev)
249{
250 int rc;
251
252 /* Initialize SCIF server at first probe */
253 if (atomic_add_return(1, &g_num_dev) == 1) {
254 rc = cosm_scif_init();
255 if (rc)
256 goto scif_exit;
257 }
258 mutex_init(&cdev->cosm_mutex);
259 INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work);
260 INIT_WORK(&cdev->scif_work, cosm_scif_work);
261 cdev->sysfs_heartbeat_enable = true;
262 cosm_sysfs_init(cdev);
263 cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent,
264 MKDEV(0, cdev->index), cdev, cdev->attr_group,
265 "mic%d", cdev->index);
266 if (IS_ERR(cdev->sdev)) {
267 rc = PTR_ERR(cdev->sdev);
268 dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n",
269 rc);
270 goto scif_exit;
271 }
272
273 cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd,
274 "state");
275 if (!cdev->state_sysfs) {
276 rc = -ENODEV;
277 dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
278 goto destroy_device;
279 }
280 cosm_create_debug_dir(cdev);
281 return 0;
282destroy_device:
283 device_destroy(g_cosm_class, MKDEV(0, cdev->index));
284scif_exit:
285 if (atomic_dec_and_test(&g_num_dev))
286 cosm_scif_exit();
287 return rc;
288}
289
290static void cosm_driver_remove(struct cosm_device *cdev)
291{
292 cosm_delete_debug_dir(cdev);
293 sysfs_put(cdev->state_sysfs);
294 device_destroy(g_cosm_class, MKDEV(0, cdev->index));
295 flush_work(&cdev->reset_trigger_work);
296 cosm_stop(cdev, false);
297 if (atomic_dec_and_test(&g_num_dev))
298 cosm_scif_exit();
299
300 /* These sysfs entries might have allocated */
301 kfree(cdev->cmdline);
302 kfree(cdev->firmware);
303 kfree(cdev->ramdisk);
304 kfree(cdev->bootmode);
305}
306
307static int cosm_suspend(struct device *dev)
308{
309 struct cosm_device *cdev = dev_to_cosm(dev);
310
311 mutex_lock(&cdev->cosm_mutex);
312 switch (cdev->state) {
313 /**
314 * Suspend/freeze hooks in userspace have already shutdown the card.
315 * Card should be 'ready' in most cases. It is however possible that
316 * some userspace application initiated a boot. In those cases, we
317 * simply reset the card.
318 */
319 case MIC_ONLINE:
320 case MIC_BOOTING:
321 case MIC_SHUTTING_DOWN:
322 mutex_unlock(&cdev->cosm_mutex);
323 cosm_stop(cdev, false);
324 break;
325 default:
326 mutex_unlock(&cdev->cosm_mutex);
327 break;
328 }
329 return 0;
330}
331
332static const struct dev_pm_ops cosm_pm_ops = {
333 .suspend = cosm_suspend,
334 .freeze = cosm_suspend
335};
336
337static struct cosm_driver cosm_driver = {
338 .driver = {
339 .name = KBUILD_MODNAME,
340 .owner = THIS_MODULE,
341 .pm = &cosm_pm_ops,
342 },
343 .probe = cosm_driver_probe,
344 .remove = cosm_driver_remove
345};
346
347static int __init cosm_init(void)
348{
349 int ret;
350
351 cosm_init_debugfs();
352
353 g_cosm_class = class_create(THIS_MODULE, cosm_driver_name);
354 if (IS_ERR(g_cosm_class)) {
355 ret = PTR_ERR(g_cosm_class);
356 pr_err("class_create failed ret %d\n", ret);
357 goto cleanup_debugfs;
358 }
359
360 ida_init(&g_cosm_ida);
361 ret = cosm_register_driver(&cosm_driver);
362 if (ret) {
363 pr_err("cosm_register_driver failed ret %d\n", ret);
364 goto ida_destroy;
365 }
366 return 0;
367ida_destroy:
368 ida_destroy(&g_cosm_ida);
369 class_destroy(g_cosm_class);
370cleanup_debugfs:
371 cosm_exit_debugfs();
372 return ret;
373}
374
375static void __exit cosm_exit(void)
376{
377 cosm_unregister_driver(&cosm_driver);
378 ida_destroy(&g_cosm_ida);
379 class_destroy(g_cosm_class);
380 cosm_exit_debugfs();
381}
382
383module_init(cosm_init);
384module_exit(cosm_exit);
385
386MODULE_AUTHOR("Intel Corporation");
387MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver");
388MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h
new file mode 100644
index 000000000000..f01156fca881
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_main.h
@@ -0,0 +1,70 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC Coprocessor State Management (COSM) Driver
19 *
20 */
21#ifndef _COSM_COSM_H_
22#define _COSM_COSM_H_
23
24#include <linux/scif.h>
25#include "../bus/cosm_bus.h"
26
27#define COSM_HEARTBEAT_SEND_SEC 30
28#define SCIF_COSM_LISTEN_PORT 201
29
30/**
31 * enum COSM msg id's
32 * @COSM_MSG_SHUTDOWN: host->card trigger shutdown
33 * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time
34 * @COSM_MSG_HEARTBEAT: card->host heartbeat
35 * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload
36 */
37enum cosm_msg_id {
38 COSM_MSG_SHUTDOWN,
39 COSM_MSG_SYNC_TIME,
40 COSM_MSG_HEARTBEAT,
41 COSM_MSG_SHUTDOWN_STATUS,
42};
43
44struct cosm_msg {
45 u64 id;
46 union {
47 u64 shutdown_status;
48 struct timespec64 timespec;
49 };
50};
51
52extern const char * const cosm_state_string[];
53extern const char * const cosm_shutdown_status_string[];
54
55void cosm_sysfs_init(struct cosm_device *cdev);
56int cosm_start(struct cosm_device *cdev);
57void cosm_stop(struct cosm_device *cdev, bool force);
58int cosm_reset(struct cosm_device *cdev);
59int cosm_shutdown(struct cosm_device *cdev);
60void cosm_set_state(struct cosm_device *cdev, u8 state);
61void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status);
62void cosm_init_debugfs(void);
63void cosm_exit_debugfs(void);
64void cosm_create_debug_dir(struct cosm_device *cdev);
65void cosm_delete_debug_dir(struct cosm_device *cdev);
66int cosm_scif_init(void);
67void cosm_scif_exit(void);
68void cosm_scif_work(struct work_struct *work);
69
70#endif
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
new file mode 100644
index 000000000000..5696df4326b5
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -0,0 +1,405 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC Coprocessor State Management (COSM) Driver
19 *
20 */
21#include <linux/kthread.h>
22#include "cosm_main.h"
23
24/*
25 * The COSM driver uses SCIF to communicate between the management node and the
26 * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b)
27 * receive a shutdown status back from the card upon completion of shutdown and
28 * (c) receive periodic heartbeat messages from the card used to deduce if the
29 * card has crashed.
30 *
31 * A COSM server consisting of a SCIF listening endpoint waits for incoming
32 * connections from the card. Upon acceptance of the connection, a separate
33 * work-item is scheduled to handle SCIF message processing for that card. The
34 * life-time of this work-item is therefore the time from which the connection
35 * from a card is accepted to the time at which the connection is closed. A new
36 * work-item starts each time the card boots and is alive till the card (a)
37 * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is
38 * unloaded.
39 *
40 * From the point of view of COSM interactions with SCIF during card
41 * shutdown, reset and crash are as follows:
42 *
43 * Card shutdown
44 * -------------
45 * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN
46 * message from the host.
47 * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting
48 * in scif_remove(..) getting called on the card
49 * 3. scif_remove -> scif_stop -> scif_handle_remove_node ->
50 * scif_peer_unregister_device -> device_unregister for the host peer device
51 * 4. During device_unregister remove(..) method of cosm_client is invoked which
52 * closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT
53 * message being sent to host SCIF. SCIF_DISCNCT message processing on the
54 * host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes
55 * up the host COSM thread blocked in scif_poll(..) resulting in
56 * scif_poll(..) returning POLLHUP.
57 * 5. On the card, scif_peer_release_dev is next called which results in an
58 * SCIF_EXIT message being sent to the host and after receiving the
59 * SCIF_EXIT_ACK from the host the peer device teardown on the card is
60 * complete.
61 * 6. As part of the SCIF_EXIT message processing on the host, host sends a
62 * SCIF_REMOVE_NODE to itself corresponding to the card being removed. This
63 * starts a similar SCIF peer device teardown sequence on the host
64 * corresponding to the card being shut down.
65 *
66 * Card reset
67 * ----------
68 * The case of interest here is when the card has not been previously shut down
69 * since most of the steps below are skipped in that case:
70
71 * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver
72 * which unregisters the SCIF HW device resulting in scif_remove(..) being
73 * called on the host.
74 * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a
75 * SCIF_EXIT message being sent to the card.
76 * 3. The card executes scif_stop() as part of SCIF_EXIT message
77 * processing. This results in the COSM endpoint on the card being closed and
78 * the SCIF host peer device on the card getting unregistered similar to
79 * steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the
80 * host returns POLLHUP as a result.
81 * 4. On the host, card peer device unregister and SCIF HW remove(..) also
82 * subsequently complete.
83 *
84 * Card crash
85 * ----------
86 * If a reset is issued after the card has crashed, there is no SCIF_DISCNT
87 * message from the card which would result in scif_poll(..) returning
88 * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE
89 * message to itself resulting in the card SCIF peer device being unregistered,
90 * this results in a scif_peer_release_dev -> scif_cleanup_scifdev->
91 * scif_invalidate_ep call sequence which sets the endpoint state to
92 * DISCONNECTED and results in scif_poll(..) returning POLLHUP.
93 */
94
95#define COSM_SCIF_BACKLOG 16
96#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10
97#define COSM_HEARTBEAT_TIMEOUT_SEC \
98 (COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC)
99#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC)
100
101static struct task_struct *server_thread;
102static scif_epd_t listen_epd;
103
104/* Publish MIC card's shutdown status to user space MIC daemon */
105static void cosm_update_mic_status(struct cosm_device *cdev)
106{
107 if (cdev->shutdown_status_int != MIC_NOP) {
108 cosm_set_shutdown_status(cdev, cdev->shutdown_status_int);
109 cdev->shutdown_status_int = MIC_NOP;
110 }
111}
112
113/* Store MIC card's shutdown status internally when it is received */
114static void cosm_shutdown_status_int(struct cosm_device *cdev,
115 enum mic_status shutdown_status)
116{
117 switch (shutdown_status) {
118 case MIC_HALTED:
119 case MIC_POWER_OFF:
120 case MIC_RESTART:
121 case MIC_CRASHED:
122 break;
123 default:
124 dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n",
125 __func__, __LINE__, shutdown_status);
126 return;
127 };
128 cdev->shutdown_status_int = shutdown_status;
129 cdev->heartbeat_watchdog_enable = false;
130
131 if (cdev->state != MIC_SHUTTING_DOWN)
132 cosm_set_state(cdev, MIC_SHUTTING_DOWN);
133}
134
135/* Non-blocking recv. Read and process all available messages */
136static void cosm_scif_recv(struct cosm_device *cdev)
137{
138 struct cosm_msg msg;
139 int rc;
140
141 while (1) {
142 rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0);
143 if (!rc) {
144 break;
145 } else if (rc < 0) {
146 dev_dbg(&cdev->dev, "%s: %d rc %d\n",
147 __func__, __LINE__, rc);
148 break;
149 }
150 dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n",
151 __func__, __LINE__, rc, msg.id);
152
153 switch (msg.id) {
154 case COSM_MSG_SHUTDOWN_STATUS:
155 cosm_shutdown_status_int(cdev, msg.shutdown_status);
156 break;
157 case COSM_MSG_HEARTBEAT:
158 /* Nothing to do, heartbeat only unblocks scif_poll */
159 break;
160 default:
161 dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n",
162 __func__, __LINE__, msg.id);
163 break;
164 }
165 }
166}
167
168/* Publish crashed status for this MIC card */
169static void cosm_set_crashed(struct cosm_device *cdev)
170{
171 dev_err(&cdev->dev, "node alive timeout\n");
172 cosm_shutdown_status_int(cdev, MIC_CRASHED);
173 cosm_update_mic_status(cdev);
174}
175
176/* Send host time to the MIC card to sync system time between host and MIC */
177static void cosm_send_time(struct cosm_device *cdev)
178{
179 struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME };
180 int rc;
181
182 getnstimeofday64(&msg.timespec);
183 rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
184 if (rc < 0)
185 dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n",
186 __func__, __LINE__, rc);
187}
188
189/*
190 * Close this cosm_device's endpoint after its peer endpoint on the card has
191 * been closed. In all cases except MIC card crash POLLHUP on the host is
192 * triggered by the client's endpoint being closed.
193 */
194static void cosm_scif_close(struct cosm_device *cdev)
195{
196 /*
197 * Because SHUTDOWN_STATUS message is sent by the MIC cards in the
198 * reboot notifier when shutdown is still not complete, we notify mpssd
199 * to reset the card when SCIF endpoint is closed.
200 */
201 cosm_update_mic_status(cdev);
202 scif_close(cdev->epd);
203 cdev->epd = NULL;
204 dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
205}
206
207/*
208 * Set card state to ONLINE when a new SCIF connection from a MIC card is
209 * received. Normally the state is BOOTING when the connection comes in, but can
210 * be ONLINE if cosm_client driver on the card was unloaded and then reloaded.
211 */
212static int cosm_set_online(struct cosm_device *cdev)
213{
214 int rc = 0;
215
216 if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) {
217 cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable;
218 cdev->epd = cdev->newepd;
219 if (cdev->state == MIC_BOOTING)
220 cosm_set_state(cdev, MIC_ONLINE);
221 cosm_send_time(cdev);
222 dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__);
223 } else {
224 dev_warn(&cdev->dev, "%s %d not going online in state: %s\n",
225 __func__, __LINE__, cosm_state_string[cdev->state]);
226 rc = -EINVAL;
227 }
228 /* Drop reference acquired by bus_find_device in the server thread */
229 put_device(&cdev->dev);
230 return rc;
231}
232
233/*
234 * Work function for handling work for a SCIF connection from a particular MIC
235 * card. It first sets the card state to ONLINE and then calls scif_poll to
236 * block on activity such as incoming messages on the SCIF endpoint. When the
237 * endpoint is closed, the work function exits, completing its life cycle, from
238 * MIC card boot to card shutdown/reset/crash.
239 */
240void cosm_scif_work(struct work_struct *work)
241{
242 struct cosm_device *cdev = container_of(work, struct cosm_device,
243 scif_work);
244 struct scif_pollepd pollepd;
245 int rc;
246
247 mutex_lock(&cdev->cosm_mutex);
248 if (cosm_set_online(cdev))
249 goto exit;
250
251 while (1) {
252 pollepd.epd = cdev->epd;
253 pollepd.events = POLLIN;
254
255 /* Drop the mutex before blocking in scif_poll(..) */
256 mutex_unlock(&cdev->cosm_mutex);
257 /* poll(..) with timeout on our endpoint */
258 rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC);
259 mutex_lock(&cdev->cosm_mutex);
260 if (rc < 0) {
261 dev_err(&cdev->dev, "%s %d scif_poll rc %d\n",
262 __func__, __LINE__, rc);
263 continue;
264 }
265
266 /* There is a message from the card */
267 if (pollepd.revents & POLLIN)
268 cosm_scif_recv(cdev);
269
270 /* The peer endpoint is closed or this endpoint disconnected */
271 if (pollepd.revents & POLLHUP) {
272 cosm_scif_close(cdev);
273 break;
274 }
275
276 /* Did we timeout from poll? */
277 if (!rc && cdev->heartbeat_watchdog_enable)
278 cosm_set_crashed(cdev);
279 }
280exit:
281 dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__);
282 mutex_unlock(&cdev->cosm_mutex);
283}
284
285/*
286 * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC
287 * cards, finds the correct cosm_device to associate that connection with and
288 * schedules individual work items for each MIC card.
289 */
290static int cosm_scif_server(void *unused)
291{
292 struct cosm_device *cdev;
293 scif_epd_t newepd;
294 struct scif_port_id port_id;
295 int rc;
296
297 allow_signal(SIGKILL);
298
299 while (!kthread_should_stop()) {
300 rc = scif_accept(listen_epd, &port_id, &newepd,
301 SCIF_ACCEPT_SYNC);
302 if (rc < 0) {
303 if (-ERESTARTSYS != rc)
304 pr_err("%s %d rc %d\n", __func__, __LINE__, rc);
305 continue;
306 }
307
308 /*
309 * Associate the incoming connection with a particular
310 * cosm_device, COSM device ID == SCIF node ID - 1
311 */
312 cdev = cosm_find_cdev_by_id(port_id.node - 1);
313 if (!cdev)
314 continue;
315 cdev->newepd = newepd;
316 schedule_work(&cdev->scif_work);
317 }
318
319 pr_debug("%s %d Server thread stopped\n", __func__, __LINE__);
320 return 0;
321}
322
323static int cosm_scif_listen(void)
324{
325 int rc;
326
327 listen_epd = scif_open();
328 if (!listen_epd) {
329 pr_err("%s %d scif_open failed\n", __func__, __LINE__);
330 return -ENOMEM;
331 }
332
333 rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT);
334 if (rc < 0) {
335 pr_err("%s %d scif_bind failed rc %d\n",
336 __func__, __LINE__, rc);
337 goto err;
338 }
339
340 rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG);
341 if (rc < 0) {
342 pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc);
343 goto err;
344 }
345 pr_debug("%s %d listen_epd set up\n", __func__, __LINE__);
346 return 0;
347err:
348 scif_close(listen_epd);
349 listen_epd = NULL;
350 return rc;
351}
352
353static void cosm_scif_listen_exit(void)
354{
355 pr_debug("%s %d closing listen_epd\n", __func__, __LINE__);
356 if (listen_epd) {
357 scif_close(listen_epd);
358 listen_epd = NULL;
359 }
360}
361
362/*
363 * Create a listening SCIF endpoint and a server kthread which accepts incoming
364 * SCIF connections from MIC cards
365 */
366int cosm_scif_init(void)
367{
368 int rc = cosm_scif_listen();
369
370 if (rc) {
371 pr_err("%s %d cosm_scif_listen rc %d\n",
372 __func__, __LINE__, rc);
373 goto err;
374 }
375
376 server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server");
377 if (IS_ERR(server_thread)) {
378 rc = PTR_ERR(server_thread);
379 pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc);
380 goto listen_exit;
381 }
382 return 0;
383listen_exit:
384 cosm_scif_listen_exit();
385err:
386 return rc;
387}
388
389/* Stop the running server thread and close the listening SCIF endpoint */
390void cosm_scif_exit(void)
391{
392 int rc;
393
394 if (!IS_ERR_OR_NULL(server_thread)) {
395 rc = send_sig(SIGKILL, server_thread, 0);
396 if (rc) {
397 pr_err("%s %d send_sig rc %d\n",
398 __func__, __LINE__, rc);
399 return;
400 }
401 kthread_stop(server_thread);
402 }
403
404 cosm_scif_listen_exit();
405}
diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c
new file mode 100644
index 000000000000..29d6863b6e59
--- /dev/null
+++ b/drivers/misc/mic/cosm/cosm_sysfs.c
@@ -0,0 +1,461 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC Coprocessor State Management (COSM) Driver
19 *
20 */
21#include <linux/slab.h>
22#include "cosm_main.h"
23
24/*
25 * A state-to-string lookup table, for exposing a human readable state
26 * via sysfs. Always keep in sync with enum cosm_states
27 */
28const char * const cosm_state_string[] = {
29 [MIC_READY] = "ready",
30 [MIC_BOOTING] = "booting",
31 [MIC_ONLINE] = "online",
32 [MIC_SHUTTING_DOWN] = "shutting_down",
33 [MIC_RESETTING] = "resetting",
34 [MIC_RESET_FAILED] = "reset_failed",
35};
36
37/*
38 * A shutdown-status-to-string lookup table, for exposing a human
39 * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status
40 */
41const char * const cosm_shutdown_status_string[] = {
42 [MIC_NOP] = "nop",
43 [MIC_CRASHED] = "crashed",
44 [MIC_HALTED] = "halted",
45 [MIC_POWER_OFF] = "poweroff",
46 [MIC_RESTART] = "restart",
47};
48
49void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status)
50{
51 dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n",
52 cosm_shutdown_status_string[cdev->shutdown_status],
53 cosm_shutdown_status_string[shutdown_status]);
54 cdev->shutdown_status = shutdown_status;
55}
56
57void cosm_set_state(struct cosm_device *cdev, u8 state)
58{
59 dev_dbg(&cdev->dev, "State %s -> %s\n",
60 cosm_state_string[cdev->state],
61 cosm_state_string[state]);
62 cdev->state = state;
63 sysfs_notify_dirent(cdev->state_sysfs);
64}
65
66static ssize_t
67family_show(struct device *dev, struct device_attribute *attr, char *buf)
68{
69 struct cosm_device *cdev = dev_get_drvdata(dev);
70
71 if (!cdev)
72 return -EINVAL;
73
74 return cdev->hw_ops->family(cdev, buf);
75}
76static DEVICE_ATTR_RO(family);
77
78static ssize_t
79stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
80{
81 struct cosm_device *cdev = dev_get_drvdata(dev);
82
83 if (!cdev)
84 return -EINVAL;
85
86 return cdev->hw_ops->stepping(cdev, buf);
87}
88static DEVICE_ATTR_RO(stepping);
89
90static ssize_t
91state_show(struct device *dev, struct device_attribute *attr, char *buf)
92{
93 struct cosm_device *cdev = dev_get_drvdata(dev);
94
95 if (!cdev || cdev->state >= MIC_LAST)
96 return -EINVAL;
97
98 return scnprintf(buf, PAGE_SIZE, "%s\n",
99 cosm_state_string[cdev->state]);
100}
101
102static ssize_t
103state_store(struct device *dev, struct device_attribute *attr,
104 const char *buf, size_t count)
105{
106 struct cosm_device *cdev = dev_get_drvdata(dev);
107 int rc;
108
109 if (!cdev)
110 return -EINVAL;
111
112 if (sysfs_streq(buf, "boot")) {
113 rc = cosm_start(cdev);
114 goto done;
115 }
116 if (sysfs_streq(buf, "reset")) {
117 rc = cosm_reset(cdev);
118 goto done;
119 }
120
121 if (sysfs_streq(buf, "shutdown")) {
122 rc = cosm_shutdown(cdev);
123 goto done;
124 }
125 rc = -EINVAL;
126done:
127 if (rc)
128 count = rc;
129 return count;
130}
131static DEVICE_ATTR_RW(state);
132
133static ssize_t shutdown_status_show(struct device *dev,
134 struct device_attribute *attr, char *buf)
135{
136 struct cosm_device *cdev = dev_get_drvdata(dev);
137
138 if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST)
139 return -EINVAL;
140
141 return scnprintf(buf, PAGE_SIZE, "%s\n",
142 cosm_shutdown_status_string[cdev->shutdown_status]);
143}
144static DEVICE_ATTR_RO(shutdown_status);
145
146static ssize_t
147heartbeat_enable_show(struct device *dev,
148 struct device_attribute *attr, char *buf)
149{
150 struct cosm_device *cdev = dev_get_drvdata(dev);
151
152 if (!cdev)
153 return -EINVAL;
154
155 return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable);
156}
157
158static ssize_t
159heartbeat_enable_store(struct device *dev,
160 struct device_attribute *attr,
161 const char *buf, size_t count)
162{
163 struct cosm_device *cdev = dev_get_drvdata(dev);
164 int enable;
165 int ret;
166
167 if (!cdev)
168 return -EINVAL;
169
170 mutex_lock(&cdev->cosm_mutex);
171 ret = kstrtoint(buf, 10, &enable);
172 if (ret)
173 goto unlock;
174
175 cdev->sysfs_heartbeat_enable = enable;
176 /* if state is not online, cdev->heartbeat_watchdog_enable is 0 */
177 if (cdev->state == MIC_ONLINE)
178 cdev->heartbeat_watchdog_enable = enable;
179 ret = count;
180unlock:
181 mutex_unlock(&cdev->cosm_mutex);
182 return ret;
183}
184static DEVICE_ATTR_RW(heartbeat_enable);
185
186static ssize_t
187cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
188{
189 struct cosm_device *cdev = dev_get_drvdata(dev);
190 char *cmdline;
191
192 if (!cdev)
193 return -EINVAL;
194
195 cmdline = cdev->cmdline;
196
197 if (cmdline)
198 return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
199 return 0;
200}
201
202static ssize_t
203cmdline_store(struct device *dev, struct device_attribute *attr,
204 const char *buf, size_t count)
205{
206 struct cosm_device *cdev = dev_get_drvdata(dev);
207
208 if (!cdev)
209 return -EINVAL;
210
211 mutex_lock(&cdev->cosm_mutex);
212 kfree(cdev->cmdline);
213
214 cdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
215 if (!cdev->cmdline) {
216 count = -ENOMEM;
217 goto unlock;
218 }
219
220 strncpy(cdev->cmdline, buf, count);
221
222 if (cdev->cmdline[count - 1] == '\n')
223 cdev->cmdline[count - 1] = '\0';
224 else
225 cdev->cmdline[count] = '\0';
226unlock:
227 mutex_unlock(&cdev->cosm_mutex);
228 return count;
229}
230static DEVICE_ATTR_RW(cmdline);
231
232static ssize_t
233firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
234{
235 struct cosm_device *cdev = dev_get_drvdata(dev);
236 char *firmware;
237
238 if (!cdev)
239 return -EINVAL;
240
241 firmware = cdev->firmware;
242
243 if (firmware)
244 return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
245 return 0;
246}
247
248static ssize_t
249firmware_store(struct device *dev, struct device_attribute *attr,
250 const char *buf, size_t count)
251{
252 struct cosm_device *cdev = dev_get_drvdata(dev);
253
254 if (!cdev)
255 return -EINVAL;
256
257 mutex_lock(&cdev->cosm_mutex);
258 kfree(cdev->firmware);
259
260 cdev->firmware = kmalloc(count + 1, GFP_KERNEL);
261 if (!cdev->firmware) {
262 count = -ENOMEM;
263 goto unlock;
264 }
265 strncpy(cdev->firmware, buf, count);
266
267 if (cdev->firmware[count - 1] == '\n')
268 cdev->firmware[count - 1] = '\0';
269 else
270 cdev->firmware[count] = '\0';
271unlock:
272 mutex_unlock(&cdev->cosm_mutex);
273 return count;
274}
275static DEVICE_ATTR_RW(firmware);
276
277static ssize_t
278ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
279{
280 struct cosm_device *cdev = dev_get_drvdata(dev);
281 char *ramdisk;
282
283 if (!cdev)
284 return -EINVAL;
285
286 ramdisk = cdev->ramdisk;
287
288 if (ramdisk)
289 return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
290 return 0;
291}
292
293static ssize_t
294ramdisk_store(struct device *dev, struct device_attribute *attr,
295 const char *buf, size_t count)
296{
297 struct cosm_device *cdev = dev_get_drvdata(dev);
298
299 if (!cdev)
300 return -EINVAL;
301
302 mutex_lock(&cdev->cosm_mutex);
303 kfree(cdev->ramdisk);
304
305 cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
306 if (!cdev->ramdisk) {
307 count = -ENOMEM;
308 goto unlock;
309 }
310
311 strncpy(cdev->ramdisk, buf, count);
312
313 if (cdev->ramdisk[count - 1] == '\n')
314 cdev->ramdisk[count - 1] = '\0';
315 else
316 cdev->ramdisk[count] = '\0';
317unlock:
318 mutex_unlock(&cdev->cosm_mutex);
319 return count;
320}
321static DEVICE_ATTR_RW(ramdisk);
322
323static ssize_t
324bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
325{
326 struct cosm_device *cdev = dev_get_drvdata(dev);
327 char *bootmode;
328
329 if (!cdev)
330 return -EINVAL;
331
332 bootmode = cdev->bootmode;
333
334 if (bootmode)
335 return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
336 return 0;
337}
338
339static ssize_t
340bootmode_store(struct device *dev, struct device_attribute *attr,
341 const char *buf, size_t count)
342{
343 struct cosm_device *cdev = dev_get_drvdata(dev);
344
345 if (!cdev)
346 return -EINVAL;
347
348 if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash"))
349 return -EINVAL;
350
351 mutex_lock(&cdev->cosm_mutex);
352 kfree(cdev->bootmode);
353
354 cdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
355 if (!cdev->bootmode) {
356 count = -ENOMEM;
357 goto unlock;
358 }
359
360 strncpy(cdev->bootmode, buf, count);
361
362 if (cdev->bootmode[count - 1] == '\n')
363 cdev->bootmode[count - 1] = '\0';
364 else
365 cdev->bootmode[count] = '\0';
366unlock:
367 mutex_unlock(&cdev->cosm_mutex);
368 return count;
369}
370static DEVICE_ATTR_RW(bootmode);
371
372static ssize_t
373log_buf_addr_show(struct device *dev, struct device_attribute *attr,
374 char *buf)
375{
376 struct cosm_device *cdev = dev_get_drvdata(dev);
377
378 if (!cdev)
379 return -EINVAL;
380
381 return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr);
382}
383
384static ssize_t
385log_buf_addr_store(struct device *dev, struct device_attribute *attr,
386 const char *buf, size_t count)
387{
388 struct cosm_device *cdev = dev_get_drvdata(dev);
389 int ret;
390 unsigned long addr;
391
392 if (!cdev)
393 return -EINVAL;
394
395 ret = kstrtoul(buf, 16, &addr);
396 if (ret)
397 goto exit;
398
399 cdev->log_buf_addr = (void *)addr;
400 ret = count;
401exit:
402 return ret;
403}
404static DEVICE_ATTR_RW(log_buf_addr);
405
406static ssize_t
407log_buf_len_show(struct device *dev, struct device_attribute *attr,
408 char *buf)
409{
410 struct cosm_device *cdev = dev_get_drvdata(dev);
411
412 if (!cdev)
413 return -EINVAL;
414
415 return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len);
416}
417
418static ssize_t
419log_buf_len_store(struct device *dev, struct device_attribute *attr,
420 const char *buf, size_t count)
421{
422 struct cosm_device *cdev = dev_get_drvdata(dev);
423 int ret;
424 unsigned long addr;
425
426 if (!cdev)
427 return -EINVAL;
428
429 ret = kstrtoul(buf, 16, &addr);
430 if (ret)
431 goto exit;
432
433 cdev->log_buf_len = (int *)addr;
434 ret = count;
435exit:
436 return ret;
437}
438static DEVICE_ATTR_RW(log_buf_len);
439
440static struct attribute *cosm_default_attrs[] = {
441 &dev_attr_family.attr,
442 &dev_attr_stepping.attr,
443 &dev_attr_state.attr,
444 &dev_attr_shutdown_status.attr,
445 &dev_attr_heartbeat_enable.attr,
446 &dev_attr_cmdline.attr,
447 &dev_attr_firmware.attr,
448 &dev_attr_ramdisk.attr,
449 &dev_attr_bootmode.attr,
450 &dev_attr_log_buf_addr.attr,
451 &dev_attr_log_buf_len.attr,
452
453 NULL
454};
455
456ATTRIBUTE_GROUPS(cosm_default);
457
458void cosm_sysfs_init(struct cosm_device *cdev)
459{
460 cdev->attr_group = cosm_default_groups;
461}
diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile
new file mode 100644
index 000000000000..6f751a519a09
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile - Intel MIC COSM Client Driver
3# Copyright(c) 2015, Intel Corporation.
4#
5obj-$(CONFIG_MIC_COSM) += cosm_client.o
6
7cosm_client-objs += cosm_scif_client.o
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
new file mode 100644
index 000000000000..03e98bf1ac15
--- /dev/null
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -0,0 +1,275 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC COSM Client Driver
19 *
20 */
21#include <linux/module.h>
22#include <linux/delay.h>
23#include <linux/reboot.h>
24#include <linux/kthread.h>
25#include "../cosm/cosm_main.h"
26
27#define COSM_SCIF_MAX_RETRIES 10
28#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC)
29
30static struct task_struct *client_thread;
31static scif_epd_t client_epd;
32static struct scif_peer_dev *client_spdev;
33
34/*
35 * Reboot notifier: receives shutdown status from the OS and communicates it
36 * back to the COSM process on the host
37 */
38static int cosm_reboot_event(struct notifier_block *this, unsigned long event,
39 void *ptr)
40{
41 struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS };
42 int rc;
43
44 event = (event == SYS_RESTART) ? SYSTEM_RESTART : event;
45 dev_info(&client_spdev->dev, "%s %d received event %ld\n",
46 __func__, __LINE__, event);
47
48 msg.shutdown_status = event;
49 rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
50 if (rc < 0)
51 dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
52 __func__, __LINE__, rc);
53
54 return NOTIFY_DONE;
55}
56
57static struct notifier_block cosm_reboot = {
58 .notifier_call = cosm_reboot_event,
59};
60
61/* Set system time from timespec value received from the host */
62static void cosm_set_time(struct cosm_msg *msg)
63{
64 int rc = do_settimeofday64(&msg->timespec);
65
66 if (rc)
67 dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n",
68 __func__, __LINE__, rc);
69}
70
71/* COSM client receive message processing */
72static void cosm_client_recv(void)
73{
74 struct cosm_msg msg;
75 int rc;
76
77 while (1) {
78 rc = scif_recv(client_epd, &msg, sizeof(msg), 0);
79 if (!rc) {
80 return;
81 } else if (rc < 0) {
82 dev_err(&client_spdev->dev, "%s: %d rc %d\n",
83 __func__, __LINE__, rc);
84 return;
85 }
86
87 dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n",
88 __func__, __LINE__, rc, msg.id);
89
90 switch (msg.id) {
91 case COSM_MSG_SYNC_TIME:
92 cosm_set_time(&msg);
93 break;
94 case COSM_MSG_SHUTDOWN:
95 orderly_poweroff(true);
96 break;
97 default:
98 dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n",
99 __func__, __LINE__, msg.id);
100 break;
101 }
102 }
103}
104
105/* Initiate connection to the COSM server on the host */
106static int cosm_scif_connect(void)
107{
108 struct scif_port_id port_id;
109 int i, rc;
110
111 client_epd = scif_open();
112 if (!client_epd) {
113 dev_err(&client_spdev->dev, "%s %d scif_open failed\n",
114 __func__, __LINE__);
115 return -ENOMEM;
116 }
117
118 port_id.node = 0;
119 port_id.port = SCIF_COSM_LISTEN_PORT;
120
121 for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) {
122 rc = scif_connect(client_epd, &port_id);
123 if (rc < 0)
124 msleep(1000);
125 else
126 break;
127 }
128
129 if (rc < 0) {
130 dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n",
131 __func__, __LINE__, rc);
132 scif_close(client_epd);
133 client_epd = NULL;
134 }
135 return rc < 0 ? rc : 0;
136}
137
138/* Close host SCIF connection */
139static void cosm_scif_connect_exit(void)
140{
141 if (client_epd) {
142 scif_close(client_epd);
143 client_epd = NULL;
144 }
145}
146
147/*
148 * COSM SCIF client thread function: waits for messages from the host and sends
149 * a heartbeat to the host
150 */
151static int cosm_scif_client(void *unused)
152{
153 struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT };
154 struct scif_pollepd pollepd;
155 int rc;
156
157 allow_signal(SIGKILL);
158
159 while (!kthread_should_stop()) {
160 pollepd.epd = client_epd;
161 pollepd.events = POLLIN;
162
163 rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC);
164 if (rc < 0) {
165 if (-EINTR != rc)
166 dev_err(&client_spdev->dev,
167 "%s %d scif_poll rc %d\n",
168 __func__, __LINE__, rc);
169 continue;
170 }
171
172 if (pollepd.revents & POLLIN)
173 cosm_client_recv();
174
175 msg.id = COSM_MSG_HEARTBEAT;
176 rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK);
177 if (rc < 0)
178 dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n",
179 __func__, __LINE__, rc);
180 }
181
182 dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n",
183 __func__, __LINE__);
184 return 0;
185}
186
187static void cosm_scif_probe(struct scif_peer_dev *spdev)
188{
189 int rc;
190
191 dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
192 __func__, __LINE__, spdev->dnode);
193
194 /* We are only interested in the host with spdev->dnode == 0 */
195 if (spdev->dnode)
196 return;
197
198 client_spdev = spdev;
199 rc = cosm_scif_connect();
200 if (rc)
201 goto exit;
202
203 rc = register_reboot_notifier(&cosm_reboot);
204 if (rc) {
205 dev_err(&spdev->dev,
206 "reboot notifier registration failed rc %d\n", rc);
207 goto connect_exit;
208 }
209
210 client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client");
211 if (IS_ERR(client_thread)) {
212 rc = PTR_ERR(client_thread);
213 dev_err(&spdev->dev, "%s %d kthread_run rc %d\n",
214 __func__, __LINE__, rc);
215 goto unreg_reboot;
216 }
217 return;
218unreg_reboot:
219 unregister_reboot_notifier(&cosm_reboot);
220connect_exit:
221 cosm_scif_connect_exit();
222exit:
223 client_spdev = NULL;
224}
225
226static void cosm_scif_remove(struct scif_peer_dev *spdev)
227{
228 int rc;
229
230 dev_dbg(&spdev->dev, "%s %d: dnode %d\n",
231 __func__, __LINE__, spdev->dnode);
232
233 if (spdev->dnode)
234 return;
235
236 if (!IS_ERR_OR_NULL(client_thread)) {
237 rc = send_sig(SIGKILL, client_thread, 0);
238 if (rc) {
239 pr_err("%s %d send_sig rc %d\n",
240 __func__, __LINE__, rc);
241 return;
242 }
243 kthread_stop(client_thread);
244 }
245 unregister_reboot_notifier(&cosm_reboot);
246 cosm_scif_connect_exit();
247 client_spdev = NULL;
248}
249
250static struct scif_client scif_client_cosm = {
251 .name = KBUILD_MODNAME,
252 .probe = cosm_scif_probe,
253 .remove = cosm_scif_remove,
254};
255
256static int __init cosm_client_init(void)
257{
258 int rc = scif_client_register(&scif_client_cosm);
259
260 if (rc)
261 pr_err("scif_client_register failed rc %d\n", rc);
262 return rc;
263}
264
265static void __exit cosm_client_exit(void)
266{
267 scif_client_unregister(&scif_client_cosm);
268}
269
270module_init(cosm_client_init);
271module_exit(cosm_client_exit);
272
273MODULE_AUTHOR("Intel Corporation");
274MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver");
275MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile
index c2197f999394..004d3db0f990 100644
--- a/drivers/misc/mic/host/Makefile
+++ b/drivers/misc/mic/host/Makefile
@@ -5,7 +5,6 @@
5obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o 5obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o
6mic_host-objs := mic_main.o 6mic_host-objs := mic_main.o
7mic_host-objs += mic_x100.o 7mic_host-objs += mic_x100.o
8mic_host-objs += mic_sysfs.o
9mic_host-objs += mic_smpt.o 8mic_host-objs += mic_smpt.o
10mic_host-objs += mic_intr.o 9mic_host-objs += mic_intr.o
11mic_host-objs += mic_boot.o 10mic_host-objs += mic_boot.o
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
index e5f6a5e7bca1..7845564dff64 100644
--- a/drivers/misc/mic/host/mic_boot.c
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -22,9 +22,9 @@
22#include <linux/firmware.h> 22#include <linux/firmware.h>
23#include <linux/pci.h> 23#include <linux/pci.h>
24#include <linux/kmod.h> 24#include <linux/kmod.h>
25
26#include <linux/mic_common.h> 25#include <linux/mic_common.h>
27#include <linux/mic_bus.h> 26#include <linux/mic_bus.h>
27#include "../bus/scif_bus.h"
28#include "../common/mic_dev.h" 28#include "../common/mic_dev.h"
29#include "mic_device.h" 29#include "mic_device.h"
30#include "mic_smpt.h" 30#include "mic_smpt.h"
@@ -99,7 +99,7 @@ static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg,
99 int i, j, ret; 99 int i, j, ret;
100 dma_addr_t da; 100 dma_addr_t da;
101 101
102 ret = dma_map_sg(mdev->sdev->parent, sg, nents, dir); 102 ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir);
103 if (ret <= 0) 103 if (ret <= 0)
104 return 0; 104 return 0;
105 105
@@ -115,7 +115,7 @@ err:
115 mic_unmap(mdev, sg_dma_address(s), s->length); 115 mic_unmap(mdev, sg_dma_address(s), s->length);
116 sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s)); 116 sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s));
117 } 117 }
118 dma_unmap_sg(mdev->sdev->parent, sg, nents, dir); 118 dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
119 return 0; 119 return 0;
120} 120}
121 121
@@ -135,7 +135,7 @@ static void __mic_dma_unmap_sg(struct device *dev,
135 mic_unmap(mdev, sg_dma_address(s), s->length); 135 mic_unmap(mdev, sg_dma_address(s), s->length);
136 sg_dma_address(s) = da; 136 sg_dma_address(s) = da;
137 } 137 }
138 dma_unmap_sg(mdev->sdev->parent, sg, nents, dir); 138 dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir);
139} 139}
140 140
141static struct dma_map_ops __mic_dma_ops = { 141static struct dma_map_ops __mic_dma_ops = {
@@ -270,48 +270,13 @@ static struct mbus_hw_ops mbus_hw_ops = {
270 .ack_interrupt = _mic_ack_interrupt, 270 .ack_interrupt = _mic_ack_interrupt,
271}; 271};
272 272
273/**
274 * mic_reset - Reset the MIC device.
275 * @mdev: pointer to mic_device instance
276 */
277static void mic_reset(struct mic_device *mdev)
278{
279 int i;
280
281#define MIC_RESET_TO (45)
282
283 reinit_completion(&mdev->reset_wait);
284 mdev->ops->reset_fw_ready(mdev);
285 mdev->ops->reset(mdev);
286
287 for (i = 0; i < MIC_RESET_TO; i++) {
288 if (mdev->ops->is_fw_ready(mdev))
289 goto done;
290 /*
291 * Resets typically take 10s of seconds to complete.
292 * Since an MMIO read is required to check if the
293 * firmware is ready or not, a 1 second delay works nicely.
294 */
295 msleep(1000);
296 }
297 mic_set_state(mdev, MIC_RESET_FAILED);
298done:
299 complete_all(&mdev->reset_wait);
300}
301
302/* Initialize the MIC bootparams */ 273/* Initialize the MIC bootparams */
303void mic_bootparam_init(struct mic_device *mdev) 274void mic_bootparam_init(struct mic_device *mdev)
304{ 275{
305 struct mic_bootparam *bootparam = mdev->dp; 276 struct mic_bootparam *bootparam = mdev->dp;
306 277
307 bootparam->magic = cpu_to_le32(MIC_MAGIC); 278 bootparam->magic = cpu_to_le32(MIC_MAGIC);
308 bootparam->c2h_shutdown_db = mdev->shutdown_db;
309 bootparam->h2c_shutdown_db = -1;
310 bootparam->h2c_config_db = -1; 279 bootparam->h2c_config_db = -1;
311 bootparam->shutdown_status = 0;
312 bootparam->shutdown_card = 0;
313 /* Total nodes = number of MICs + 1 for self node */
314 bootparam->tot_nodes = atomic_read(&g_num_mics) + 1;
315 bootparam->node_id = mdev->id + 1; 280 bootparam->node_id = mdev->id + 1;
316 bootparam->scif_host_dma_addr = 0x0; 281 bootparam->scif_host_dma_addr = 0x0;
317 bootparam->scif_card_dma_addr = 0x0; 282 bootparam->scif_card_dma_addr = 0x0;
@@ -319,6 +284,26 @@ void mic_bootparam_init(struct mic_device *mdev)
319 bootparam->h2c_scif_db = -1; 284 bootparam->h2c_scif_db = -1;
320} 285}
321 286
287static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev)
288{
289 return dev_get_drvdata(cdev->dev.parent);
290}
291
292static void _mic_reset(struct cosm_device *cdev)
293{
294 struct mic_device *mdev = cosmdev_to_mdev(cdev);
295
296 mdev->ops->reset_fw_ready(mdev);
297 mdev->ops->reset(mdev);
298}
299
300static bool _mic_ready(struct cosm_device *cdev)
301{
302 struct mic_device *mdev = cosmdev_to_mdev(cdev);
303
304 return mdev->ops->is_fw_ready(mdev);
305}
306
322/** 307/**
323 * mic_request_dma_chans - Request DMA channels 308 * mic_request_dma_chans - Request DMA channels
324 * @mdev: pointer to mic_device instance 309 * @mdev: pointer to mic_device instance
@@ -336,14 +321,14 @@ static int mic_request_dma_chans(struct mic_device *mdev)
336 321
337 do { 322 do {
338 chan = dma_request_channel(mask, mdev->ops->dma_filter, 323 chan = dma_request_channel(mask, mdev->ops->dma_filter,
339 mdev->sdev->parent); 324 &mdev->pdev->dev);
340 if (chan) { 325 if (chan) {
341 mdev->dma_ch[mdev->num_dma_ch++] = chan; 326 mdev->dma_ch[mdev->num_dma_ch++] = chan;
342 if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN) 327 if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN)
343 break; 328 break;
344 } 329 }
345 } while (chan); 330 } while (chan);
346 dev_info(mdev->sdev->parent, "DMA channels # %d\n", mdev->num_dma_ch); 331 dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch);
347 return mdev->num_dma_ch; 332 return mdev->num_dma_ch;
348} 333}
349 334
@@ -365,34 +350,24 @@ static void mic_free_dma_chans(struct mic_device *mdev)
365} 350}
366 351
367/** 352/**
368 * mic_start - Start the MIC. 353 * _mic_start - Start the MIC.
369 * @mdev: pointer to mic_device instance 354 * @cdev: pointer to cosm_device instance
370 * @buf: buffer containing boot string including firmware/ramdisk path. 355 * @id: MIC device id/index provided by COSM used in other drivers like SCIF
371 * 356 *
372 * This function prepares an MIC for boot and initiates boot. 357 * This function prepares an MIC for boot and initiates boot.
373 * RETURNS: An appropriate -ERRNO error value on error, or zero for success. 358 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
359 *
360 * For all cosm_hw_ops the caller holds a mutex to ensure serialization.
374 */ 361 */
375int mic_start(struct mic_device *mdev, const char *buf) 362static int _mic_start(struct cosm_device *cdev, int id)
376{ 363{
364 struct mic_device *mdev = cosmdev_to_mdev(cdev);
377 int rc; 365 int rc;
378 mutex_lock(&mdev->mic_mutex); 366
379 mic_bootparam_init(mdev); 367 mic_bootparam_init(mdev);
380retry: 368 mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev,
381 if (MIC_OFFLINE != mdev->state) {
382 rc = -EINVAL;
383 goto unlock_ret;
384 }
385 if (!mdev->ops->is_fw_ready(mdev)) {
386 mic_reset(mdev);
387 /*
388 * The state will either be MIC_OFFLINE if the reset succeeded
389 * or MIC_RESET_FAILED if the firmware reset failed.
390 */
391 goto retry;
392 }
393 mdev->dma_mbdev = mbus_register_device(mdev->sdev->parent,
394 MBUS_DEV_DMA_HOST, &mic_dma_ops, 369 MBUS_DEV_DMA_HOST, &mic_dma_ops,
395 &mbus_hw_ops, mdev->mmio.va); 370 &mbus_hw_ops, id, mdev->mmio.va);
396 if (IS_ERR(mdev->dma_mbdev)) { 371 if (IS_ERR(mdev->dma_mbdev)) {
397 rc = PTR_ERR(mdev->dma_mbdev); 372 rc = PTR_ERR(mdev->dma_mbdev);
398 goto unlock_ret; 373 goto unlock_ret;
@@ -401,16 +376,18 @@ retry:
401 rc = -ENODEV; 376 rc = -ENODEV;
402 goto dma_remove; 377 goto dma_remove;
403 } 378 }
404 mdev->scdev = scif_register_device(mdev->sdev->parent, MIC_SCIF_DEV, 379 mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV,
405 &__mic_dma_ops, &scif_hw_ops, 380 &__mic_dma_ops, &scif_hw_ops,
406 mdev->id + 1, 0, &mdev->mmio, 381 id + 1, 0, &mdev->mmio,
407 &mdev->aper, mdev->dp, NULL, 382 &mdev->aper, mdev->dp, NULL,
408 mdev->dma_ch, mdev->num_dma_ch); 383 mdev->dma_ch, mdev->num_dma_ch,
384 true);
409 if (IS_ERR(mdev->scdev)) { 385 if (IS_ERR(mdev->scdev)) {
410 rc = PTR_ERR(mdev->scdev); 386 rc = PTR_ERR(mdev->scdev);
411 goto dma_free; 387 goto dma_free;
412 } 388 }
413 rc = mdev->ops->load_mic_fw(mdev, buf); 389
390 rc = mdev->ops->load_mic_fw(mdev, NULL);
414 if (rc) 391 if (rc)
415 goto scif_remove; 392 goto scif_remove;
416 mic_smpt_restore(mdev); 393 mic_smpt_restore(mdev);
@@ -419,7 +396,6 @@ retry:
419 mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr); 396 mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr);
420 mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32); 397 mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32);
421 mdev->ops->send_firmware_intr(mdev); 398 mdev->ops->send_firmware_intr(mdev);
422 mic_set_state(mdev, MIC_ONLINE);
423 goto unlock_ret; 399 goto unlock_ret;
424scif_remove: 400scif_remove:
425 scif_unregister_device(mdev->scdev); 401 scif_unregister_device(mdev->scdev);
@@ -428,198 +404,79 @@ dma_free:
428dma_remove: 404dma_remove:
429 mbus_unregister_device(mdev->dma_mbdev); 405 mbus_unregister_device(mdev->dma_mbdev);
430unlock_ret: 406unlock_ret:
431 mutex_unlock(&mdev->mic_mutex);
432 return rc; 407 return rc;
433} 408}
434 409
435/** 410/**
436 * mic_stop - Prepare the MIC for reset and trigger reset. 411 * _mic_stop - Prepare the MIC for reset and trigger reset.
437 * @mdev: pointer to mic_device instance 412 * @cdev: pointer to cosm_device instance
438 * @force: force a MIC to reset even if it is already offline. 413 * @force: force a MIC to reset even if it is already offline.
439 * 414 *
440 * RETURNS: None. 415 * RETURNS: None.
441 */ 416 */
442void mic_stop(struct mic_device *mdev, bool force) 417static void _mic_stop(struct cosm_device *cdev, bool force)
443{
444 mutex_lock(&mdev->mic_mutex);
445 if (MIC_OFFLINE != mdev->state || force) {
446 scif_unregister_device(mdev->scdev);
447 mic_virtio_reset_devices(mdev);
448 mic_free_dma_chans(mdev);
449 mbus_unregister_device(mdev->dma_mbdev);
450 mic_bootparam_init(mdev);
451 mic_reset(mdev);
452 if (MIC_RESET_FAILED == mdev->state)
453 goto unlock;
454 mic_set_shutdown_status(mdev, MIC_NOP);
455 if (MIC_SUSPENDED != mdev->state)
456 mic_set_state(mdev, MIC_OFFLINE);
457 }
458unlock:
459 mutex_unlock(&mdev->mic_mutex);
460}
461
462/**
463 * mic_shutdown - Initiate MIC shutdown.
464 * @mdev: pointer to mic_device instance
465 *
466 * RETURNS: None.
467 */
468void mic_shutdown(struct mic_device *mdev)
469{ 418{
470 struct mic_bootparam *bootparam = mdev->dp; 419 struct mic_device *mdev = cosmdev_to_mdev(cdev);
471 s8 db = bootparam->h2c_shutdown_db;
472
473 mutex_lock(&mdev->mic_mutex);
474 if (MIC_ONLINE == mdev->state && db != -1) {
475 bootparam->shutdown_card = 1;
476 mdev->ops->send_intr(mdev, db);
477 mic_set_state(mdev, MIC_SHUTTING_DOWN);
478 }
479 mutex_unlock(&mdev->mic_mutex);
480}
481
482/**
483 * mic_shutdown_work - Handle shutdown interrupt from MIC.
484 * @work: The work structure.
485 *
486 * This work is scheduled whenever the host has received a shutdown
487 * interrupt from the MIC.
488 */
489void mic_shutdown_work(struct work_struct *work)
490{
491 struct mic_device *mdev = container_of(work, struct mic_device,
492 shutdown_work);
493 struct mic_bootparam *bootparam = mdev->dp;
494
495 mutex_lock(&mdev->mic_mutex);
496 mic_set_shutdown_status(mdev, bootparam->shutdown_status);
497 bootparam->shutdown_status = 0;
498 420
499 /* 421 /*
500 * if state is MIC_SUSPENDED, OSPM suspend is in progress. We do not 422 * Since SCIF handles card shutdown and reset (using COSM), it will
501 * change the state here so as to prevent users from booting the card 423 * will be the first to be registered and the last to be
502 * during and after the suspend operation. 424 * unregistered.
503 */ 425 */
504 if (MIC_SHUTTING_DOWN != mdev->state && 426 mic_virtio_reset_devices(mdev);
505 MIC_SUSPENDED != mdev->state) 427 scif_unregister_device(mdev->scdev);
506 mic_set_state(mdev, MIC_SHUTTING_DOWN); 428 mic_free_dma_chans(mdev);
507 mutex_unlock(&mdev->mic_mutex); 429 mbus_unregister_device(mdev->dma_mbdev);
430 mic_bootparam_init(mdev);
508} 431}
509 432
510/** 433static ssize_t _mic_family(struct cosm_device *cdev, char *buf)
511 * mic_reset_trigger_work - Trigger MIC reset.
512 * @work: The work structure.
513 *
514 * This work is scheduled whenever the host wants to reset the MIC.
515 */
516void mic_reset_trigger_work(struct work_struct *work)
517{ 434{
518 struct mic_device *mdev = container_of(work, struct mic_device, 435 struct mic_device *mdev = cosmdev_to_mdev(cdev);
519 reset_trigger_work); 436 static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" };
520 437
521 mic_stop(mdev, false); 438 return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]);
522} 439}
523 440
524/** 441static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf)
525 * mic_complete_resume - Complete MIC Resume after an OSPM suspend/hibernate
526 * event.
527 * @mdev: pointer to mic_device instance
528 *
529 * RETURNS: None.
530 */
531void mic_complete_resume(struct mic_device *mdev)
532{ 442{
533 if (mdev->state != MIC_SUSPENDED) { 443 struct mic_device *mdev = cosmdev_to_mdev(cdev);
534 dev_warn(mdev->sdev->parent, "state %d should be %d\n", 444 const char *string = "??";
535 mdev->state, MIC_SUSPENDED);
536 return;
537 }
538
539 /* Make sure firmware is ready */
540 if (!mdev->ops->is_fw_ready(mdev))
541 mic_stop(mdev, true);
542 445
543 mutex_lock(&mdev->mic_mutex); 446 switch (mdev->stepping) {
544 mic_set_state(mdev, MIC_OFFLINE); 447 case MIC_A0_STEP:
545 mutex_unlock(&mdev->mic_mutex); 448 string = "A0";
546}
547
548/**
549 * mic_prepare_suspend - Handle suspend notification for the MIC device.
550 * @mdev: pointer to mic_device instance
551 *
552 * RETURNS: None.
553 */
554void mic_prepare_suspend(struct mic_device *mdev)
555{
556 unsigned long timeout;
557
558#define MIC_SUSPEND_TIMEOUT (60 * HZ)
559
560 mutex_lock(&mdev->mic_mutex);
561 switch (mdev->state) {
562 case MIC_OFFLINE:
563 /*
564 * Card is already offline. Set state to MIC_SUSPENDED
565 * to prevent users from booting the card.
566 */
567 mic_set_state(mdev, MIC_SUSPENDED);
568 mutex_unlock(&mdev->mic_mutex);
569 break; 449 break;
570 case MIC_ONLINE: 450 case MIC_B0_STEP:
571 /* 451 string = "B0";
572 * Card is online. Set state to MIC_SUSPENDING and notify 452 break;
573 * MIC user space daemon which will issue card 453 case MIC_B1_STEP:
574 * shutdown and reset. 454 string = "B1";
575 */
576 mic_set_state(mdev, MIC_SUSPENDING);
577 mutex_unlock(&mdev->mic_mutex);
578 timeout = wait_for_completion_timeout(&mdev->reset_wait,
579 MIC_SUSPEND_TIMEOUT);
580 /* Force reset the card if the shutdown completion timed out */
581 if (!timeout) {
582 mutex_lock(&mdev->mic_mutex);
583 mic_set_state(mdev, MIC_SUSPENDED);
584 mutex_unlock(&mdev->mic_mutex);
585 mic_stop(mdev, true);
586 }
587 break; 455 break;
588 case MIC_SHUTTING_DOWN: 456 case MIC_C0_STEP:
589 /* 457 string = "C0";
590 * Card is shutting down. Set state to MIC_SUSPENDED
591 * to prevent further boot of the card.
592 */
593 mic_set_state(mdev, MIC_SUSPENDED);
594 mutex_unlock(&mdev->mic_mutex);
595 timeout = wait_for_completion_timeout(&mdev->reset_wait,
596 MIC_SUSPEND_TIMEOUT);
597 /* Force reset the card if the shutdown completion timed out */
598 if (!timeout)
599 mic_stop(mdev, true);
600 break; 458 break;
601 default: 459 default:
602 mutex_unlock(&mdev->mic_mutex);
603 break; 460 break;
604 } 461 }
462 return scnprintf(buf, PAGE_SIZE, "%s\n", string);
605} 463}
606 464
607/** 465static struct mic_mw *_mic_aper(struct cosm_device *cdev)
608 * mic_suspend - Initiate MIC suspend. Suspend merely issues card shutdown.
609 * @mdev: pointer to mic_device instance
610 *
611 * RETURNS: None.
612 */
613void mic_suspend(struct mic_device *mdev)
614{ 466{
615 struct mic_bootparam *bootparam = mdev->dp; 467 struct mic_device *mdev = cosmdev_to_mdev(cdev);
616 s8 db = bootparam->h2c_shutdown_db;
617 468
618 mutex_lock(&mdev->mic_mutex); 469 return &mdev->aper;
619 if (MIC_SUSPENDING == mdev->state && db != -1) {
620 bootparam->shutdown_card = 1;
621 mdev->ops->send_intr(mdev, db);
622 mic_set_state(mdev, MIC_SUSPENDED);
623 }
624 mutex_unlock(&mdev->mic_mutex);
625} 470}
471
472struct cosm_hw_ops cosm_hw_ops = {
473 .reset = _mic_reset,
474 .force_reset = _mic_reset,
475 .post_reset = NULL,
476 .ready = _mic_ready,
477 .start = _mic_start,
478 .stop = _mic_stop,
479 .family = _mic_family,
480 .stepping = _mic_stepping,
481 .aper = _mic_aper,
482};
diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
index 3c9ea4896f3c..10581600777a 100644
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ b/drivers/misc/mic/host/mic_debugfs.c
@@ -31,71 +31,6 @@
31/* Debugfs parent dir */ 31/* Debugfs parent dir */
32static struct dentry *mic_dbg; 32static struct dentry *mic_dbg;
33 33
34/**
35 * mic_log_buf_show - Display MIC kernel log buffer.
36 *
37 * log_buf addr/len is read from System.map by user space
38 * and populated in sysfs entries.
39 */
40static int mic_log_buf_show(struct seq_file *s, void *unused)
41{
42 void __iomem *log_buf_va;
43 int __iomem *log_buf_len_va;
44 struct mic_device *mdev = s->private;
45 void *kva;
46 int size;
47 unsigned long aper_offset;
48
49 if (!mdev || !mdev->log_buf_addr || !mdev->log_buf_len)
50 goto done;
51 /*
52 * Card kernel will never be relocated and any kernel text/data mapping
53 * can be translated to phys address by subtracting __START_KERNEL_map.
54 */
55 aper_offset = (unsigned long)mdev->log_buf_len - __START_KERNEL_map;
56 log_buf_len_va = mdev->aper.va + aper_offset;
57 aper_offset = (unsigned long)mdev->log_buf_addr - __START_KERNEL_map;
58 log_buf_va = mdev->aper.va + aper_offset;
59 size = ioread32(log_buf_len_va);
60
61 kva = kmalloc(size, GFP_KERNEL);
62 if (!kva)
63 goto done;
64 mutex_lock(&mdev->mic_mutex);
65 memcpy_fromio(kva, log_buf_va, size);
66 switch (mdev->state) {
67 case MIC_ONLINE:
68 /* Fall through */
69 case MIC_SHUTTING_DOWN:
70 seq_write(s, kva, size);
71 break;
72 default:
73 break;
74 }
75 mutex_unlock(&mdev->mic_mutex);
76 kfree(kva);
77done:
78 return 0;
79}
80
81static int mic_log_buf_open(struct inode *inode, struct file *file)
82{
83 return single_open(file, mic_log_buf_show, inode->i_private);
84}
85
86static int mic_log_buf_release(struct inode *inode, struct file *file)
87{
88 return single_release(inode, file);
89}
90
91static const struct file_operations log_buf_ops = {
92 .owner = THIS_MODULE,
93 .open = mic_log_buf_open,
94 .read = seq_read,
95 .llseek = seq_lseek,
96 .release = mic_log_buf_release
97};
98
99static int mic_smpt_show(struct seq_file *s, void *pos) 34static int mic_smpt_show(struct seq_file *s, void *pos)
100{ 35{
101 int i; 36 int i;
@@ -138,32 +73,6 @@ static const struct file_operations smpt_file_ops = {
138 .release = mic_smpt_debug_release 73 .release = mic_smpt_debug_release
139}; 74};
140 75
141static int mic_soft_reset_show(struct seq_file *s, void *pos)
142{
143 struct mic_device *mdev = s->private;
144
145 mic_stop(mdev, true);
146 return 0;
147}
148
149static int mic_soft_reset_debug_open(struct inode *inode, struct file *file)
150{
151 return single_open(file, mic_soft_reset_show, inode->i_private);
152}
153
154static int mic_soft_reset_debug_release(struct inode *inode, struct file *file)
155{
156 return single_release(inode, file);
157}
158
159static const struct file_operations soft_reset_ops = {
160 .owner = THIS_MODULE,
161 .open = mic_soft_reset_debug_open,
162 .read = seq_read,
163 .llseek = seq_lseek,
164 .release = mic_soft_reset_debug_release
165};
166
167static int mic_post_code_show(struct seq_file *s, void *pos) 76static int mic_post_code_show(struct seq_file *s, void *pos)
168{ 77{
169 struct mic_device *mdev = s->private; 78 struct mic_device *mdev = s->private;
@@ -204,18 +113,8 @@ static int mic_dp_show(struct seq_file *s, void *pos)
204 113
205 seq_printf(s, "Bootparam: magic 0x%x\n", 114 seq_printf(s, "Bootparam: magic 0x%x\n",
206 bootparam->magic); 115 bootparam->magic);
207 seq_printf(s, "Bootparam: h2c_shutdown_db %d\n",
208 bootparam->h2c_shutdown_db);
209 seq_printf(s, "Bootparam: h2c_config_db %d\n", 116 seq_printf(s, "Bootparam: h2c_config_db %d\n",
210 bootparam->h2c_config_db); 117 bootparam->h2c_config_db);
211 seq_printf(s, "Bootparam: c2h_shutdown_db %d\n",
212 bootparam->c2h_shutdown_db);
213 seq_printf(s, "Bootparam: shutdown_status %d\n",
214 bootparam->shutdown_status);
215 seq_printf(s, "Bootparam: shutdown_card %d\n",
216 bootparam->shutdown_card);
217 seq_printf(s, "Bootparam: tot_nodes %d\n",
218 bootparam->tot_nodes);
219 seq_printf(s, "Bootparam: node_id %d\n", 118 seq_printf(s, "Bootparam: node_id %d\n",
220 bootparam->node_id); 119 bootparam->node_id);
221 seq_printf(s, "Bootparam: c2h_scif_db %d\n", 120 seq_printf(s, "Bootparam: c2h_scif_db %d\n",
@@ -392,8 +291,7 @@ static int mic_msi_irq_info_show(struct seq_file *s, void *pos)
392 int i, j; 291 int i, j;
393 u16 entry; 292 u16 entry;
394 u16 vector; 293 u16 vector;
395 struct pci_dev *pdev = container_of(mdev->sdev->parent, 294 struct pci_dev *pdev = mdev->pdev;
396 struct pci_dev, dev);
397 295
398 if (pci_dev_msi_enabled(pdev)) { 296 if (pci_dev_msi_enabled(pdev)) {
399 for (i = 0; i < mdev->irq_info.num_vectors; i++) { 297 for (i = 0; i < mdev->irq_info.num_vectors; i++) {
@@ -454,20 +352,18 @@ static const struct file_operations msi_irq_info_ops = {
454 */ 352 */
455void mic_create_debug_dir(struct mic_device *mdev) 353void mic_create_debug_dir(struct mic_device *mdev)
456{ 354{
355 char name[16];
356
457 if (!mic_dbg) 357 if (!mic_dbg)
458 return; 358 return;
459 359
460 mdev->dbg_dir = debugfs_create_dir(dev_name(mdev->sdev), mic_dbg); 360 scnprintf(name, sizeof(name), "mic%d", mdev->id);
361 mdev->dbg_dir = debugfs_create_dir(name, mic_dbg);
461 if (!mdev->dbg_dir) 362 if (!mdev->dbg_dir)
462 return; 363 return;
463 364
464 debugfs_create_file("log_buf", 0444, mdev->dbg_dir, mdev, &log_buf_ops);
465
466 debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops); 365 debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops);
467 366
468 debugfs_create_file("soft_reset", 0444, mdev->dbg_dir, mdev,
469 &soft_reset_ops);
470
471 debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev, 367 debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev,
472 &post_code_ops); 368 &post_code_ops);
473 369
diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h
index 01a7555aa648..461184a12fbb 100644
--- a/drivers/misc/mic/host/mic_device.h
+++ b/drivers/misc/mic/host/mic_device.h
@@ -26,21 +26,12 @@
26#include <linux/notifier.h> 26#include <linux/notifier.h>
27#include <linux/irqreturn.h> 27#include <linux/irqreturn.h>
28#include <linux/dmaengine.h> 28#include <linux/dmaengine.h>
29#include <linux/miscdevice.h>
29#include <linux/mic_bus.h> 30#include <linux/mic_bus.h>
30#include "../bus/scif_bus.h" 31#include "../bus/scif_bus.h"
32#include "../bus/cosm_bus.h"
31#include "mic_intr.h" 33#include "mic_intr.h"
32 34
33/* The maximum number of MIC devices supported in a single host system. */
34#define MIC_MAX_NUM_DEVS 256
35
36/**
37 * enum mic_hw_family - The hardware family to which a device belongs.
38 */
39enum mic_hw_family {
40 MIC_FAMILY_X100 = 0,
41 MIC_FAMILY_UNKNOWN
42};
43
44/** 35/**
45 * enum mic_stepping - MIC stepping ids. 36 * enum mic_stepping - MIC stepping ids.
46 */ 37 */
@@ -51,6 +42,8 @@ enum mic_stepping {
51 MIC_C0_STEP = 0x20, 42 MIC_C0_STEP = 0x20,
52}; 43};
53 44
45extern struct cosm_hw_ops cosm_hw_ops;
46
54/** 47/**
55 * struct mic_device - MIC device information for each card. 48 * struct mic_device - MIC device information for each card.
56 * 49 *
@@ -60,8 +53,7 @@ enum mic_stepping {
60 * @ops: MIC HW specific operations. 53 * @ops: MIC HW specific operations.
61 * @id: The unique device id for this MIC device. 54 * @id: The unique device id for this MIC device.
62 * @stepping: Stepping ID. 55 * @stepping: Stepping ID.
63 * @attr_group: Pointer to list of sysfs attribute groups. 56 * @pdev: Underlying PCI device.
64 * @sdev: Device for sysfs entries.
65 * @mic_mutex: Mutex for synchronizing access to mic_device. 57 * @mic_mutex: Mutex for synchronizing access to mic_device.
66 * @intr_ops: HW specific interrupt operations. 58 * @intr_ops: HW specific interrupt operations.
67 * @smpt_ops: Hardware specific SMPT operations. 59 * @smpt_ops: Hardware specific SMPT operations.
@@ -69,30 +61,17 @@ enum mic_stepping {
69 * @intr_info: H/W specific interrupt information. 61 * @intr_info: H/W specific interrupt information.
70 * @irq_info: The OS specific irq information 62 * @irq_info: The OS specific irq information
71 * @dbg_dir: debugfs directory of this MIC device. 63 * @dbg_dir: debugfs directory of this MIC device.
72 * @cmdline: Kernel command line.
73 * @firmware: Firmware file name.
74 * @ramdisk: Ramdisk file name.
75 * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates.
76 * @bootaddr: MIC boot address. 64 * @bootaddr: MIC boot address.
77 * @reset_trigger_work: Work for triggering reset requests.
78 * @shutdown_work: Work for handling shutdown interrupts.
79 * @state: MIC state.
80 * @shutdown_status: MIC status reported by card for shutdown/crashes.
81 * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes.
82 * @reset_wait: Waitqueue for sleeping while reset completes.
83 * @log_buf_addr: Log buffer address for MIC.
84 * @log_buf_len: Log buffer length address for MIC.
85 * @dp: virtio device page 65 * @dp: virtio device page
86 * @dp_dma_addr: virtio device page DMA address. 66 * @dp_dma_addr: virtio device page DMA address.
87 * @shutdown_db: shutdown doorbell. 67 * @name: name for the misc char device
88 * @shutdown_cookie: shutdown cookie. 68 * @miscdev: registered misc char device
89 * @cdev: Character device for MIC.
90 * @vdev_list: list of virtio devices. 69 * @vdev_list: list of virtio devices.
91 * @pm_notifier: Handles PM notifications from the OS.
92 * @dma_mbdev: MIC BUS DMA device. 70 * @dma_mbdev: MIC BUS DMA device.
93 * @dma_ch - Array of DMA channels 71 * @dma_ch - Array of DMA channels
94 * @num_dma_ch - Number of DMA channels available 72 * @num_dma_ch - Number of DMA channels available
95 * @scdev: SCIF device on the SCIF virtual bus. 73 * @scdev: SCIF device on the SCIF virtual bus.
74 * @cosm_dev: COSM device
96 */ 75 */
97struct mic_device { 76struct mic_device {
98 struct mic_mw mmio; 77 struct mic_mw mmio;
@@ -101,8 +80,7 @@ struct mic_device {
101 struct mic_hw_ops *ops; 80 struct mic_hw_ops *ops;
102 int id; 81 int id;
103 enum mic_stepping stepping; 82 enum mic_stepping stepping;
104 const struct attribute_group **attr_group; 83 struct pci_dev *pdev;
105 struct device *sdev;
106 struct mutex mic_mutex; 84 struct mutex mic_mutex;
107 struct mic_hw_intr_ops *intr_ops; 85 struct mic_hw_intr_ops *intr_ops;
108 struct mic_smpt_ops *smpt_ops; 86 struct mic_smpt_ops *smpt_ops;
@@ -110,30 +88,17 @@ struct mic_device {
110 struct mic_intr_info *intr_info; 88 struct mic_intr_info *intr_info;
111 struct mic_irq_info irq_info; 89 struct mic_irq_info irq_info;
112 struct dentry *dbg_dir; 90 struct dentry *dbg_dir;
113 char *cmdline;
114 char *firmware;
115 char *ramdisk;
116 char *bootmode;
117 u32 bootaddr; 91 u32 bootaddr;
118 struct work_struct reset_trigger_work;
119 struct work_struct shutdown_work;
120 u8 state;
121 u8 shutdown_status;
122 struct kernfs_node *state_sysfs;
123 struct completion reset_wait;
124 void *log_buf_addr;
125 int *log_buf_len;
126 void *dp; 92 void *dp;
127 dma_addr_t dp_dma_addr; 93 dma_addr_t dp_dma_addr;
128 int shutdown_db; 94 char name[16];
129 struct mic_irq *shutdown_cookie; 95 struct miscdevice miscdev;
130 struct cdev cdev;
131 struct list_head vdev_list; 96 struct list_head vdev_list;
132 struct notifier_block pm_notifier;
133 struct mbus_device *dma_mbdev; 97 struct mbus_device *dma_mbdev;
134 struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN]; 98 struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN];
135 int num_dma_ch; 99 int num_dma_ch;
136 struct scif_hw_dev *scdev; 100 struct scif_hw_dev *scdev;
101 struct cosm_device *cosm_dev;
137}; 102};
138 103
139/** 104/**
@@ -199,38 +164,9 @@ mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset)
199 iowrite32(val, mw->va + offset); 164 iowrite32(val, mw->va + offset);
200} 165}
201 166
202static inline struct dma_chan *mic_request_dma_chan(struct mic_device *mdev)
203{
204 dma_cap_mask_t mask;
205 struct dma_chan *chan;
206
207 dma_cap_zero(mask);
208 dma_cap_set(DMA_MEMCPY, mask);
209 chan = dma_request_channel(mask, mdev->ops->dma_filter,
210 mdev->sdev->parent);
211 if (chan)
212 return chan;
213 dev_err(mdev->sdev->parent, "%s %d unable to acquire channel\n",
214 __func__, __LINE__);
215 return NULL;
216}
217
218void mic_sysfs_init(struct mic_device *mdev);
219int mic_start(struct mic_device *mdev, const char *buf);
220void mic_stop(struct mic_device *mdev, bool force);
221void mic_shutdown(struct mic_device *mdev);
222void mic_reset_delayed_work(struct work_struct *work);
223void mic_reset_trigger_work(struct work_struct *work);
224void mic_shutdown_work(struct work_struct *work);
225void mic_bootparam_init(struct mic_device *mdev); 167void mic_bootparam_init(struct mic_device *mdev);
226void mic_set_state(struct mic_device *mdev, u8 state);
227void mic_set_shutdown_status(struct mic_device *mdev, u8 status);
228void mic_create_debug_dir(struct mic_device *dev); 168void mic_create_debug_dir(struct mic_device *dev);
229void mic_delete_debug_dir(struct mic_device *dev); 169void mic_delete_debug_dir(struct mic_device *dev);
230void __init mic_init_debugfs(void); 170void __init mic_init_debugfs(void);
231void mic_exit_debugfs(void); 171void mic_exit_debugfs(void);
232void mic_prepare_suspend(struct mic_device *mdev);
233void mic_complete_resume(struct mic_device *mdev);
234void mic_suspend(struct mic_device *mdev);
235extern atomic_t g_num_mics;
236#endif 172#endif
diff --git a/drivers/misc/mic/host/mic_fops.c b/drivers/misc/mic/host/mic_fops.c
index 85776d7327f3..8cc1d90cd949 100644
--- a/drivers/misc/mic/host/mic_fops.c
+++ b/drivers/misc/mic/host/mic_fops.c
@@ -30,8 +30,8 @@
30int mic_open(struct inode *inode, struct file *f) 30int mic_open(struct inode *inode, struct file *f)
31{ 31{
32 struct mic_vdev *mvdev; 32 struct mic_vdev *mvdev;
33 struct mic_device *mdev = container_of(inode->i_cdev, 33 struct mic_device *mdev = container_of(f->private_data,
34 struct mic_device, cdev); 34 struct mic_device, miscdev);
35 35
36 mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL); 36 mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
37 if (!mvdev) 37 if (!mvdev)
diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c
index b4ca6c884d19..08ca3e372fa4 100644
--- a/drivers/misc/mic/host/mic_intr.c
+++ b/drivers/misc/mic/host/mic_intr.c
@@ -30,8 +30,7 @@ static irqreturn_t mic_thread_fn(int irq, void *dev)
30 struct mic_intr_info *intr_info = mdev->intr_info; 30 struct mic_intr_info *intr_info = mdev->intr_info;
31 struct mic_irq_info *irq_info = &mdev->irq_info; 31 struct mic_irq_info *irq_info = &mdev->irq_info;
32 struct mic_intr_cb *intr_cb; 32 struct mic_intr_cb *intr_cb;
33 struct pci_dev *pdev = container_of(mdev->sdev->parent, 33 struct pci_dev *pdev = mdev->pdev;
34 struct pci_dev, dev);
35 int i; 34 int i;
36 35
37 spin_lock(&irq_info->mic_thread_lock); 36 spin_lock(&irq_info->mic_thread_lock);
@@ -57,8 +56,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev)
57 struct mic_intr_info *intr_info = mdev->intr_info; 56 struct mic_intr_info *intr_info = mdev->intr_info;
58 struct mic_irq_info *irq_info = &mdev->irq_info; 57 struct mic_irq_info *irq_info = &mdev->irq_info;
59 struct mic_intr_cb *intr_cb; 58 struct mic_intr_cb *intr_cb;
60 struct pci_dev *pdev = container_of(mdev->sdev->parent, 59 struct pci_dev *pdev = mdev->pdev;
61 struct pci_dev, dev);
62 u32 mask; 60 u32 mask;
63 int i; 61 int i;
64 62
@@ -83,7 +81,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev)
83 81
84/* Return the interrupt offset from the index. Index is 0 based. */ 82/* Return the interrupt offset from the index. Index is 0 based. */
85static u16 mic_map_src_to_offset(struct mic_device *mdev, 83static u16 mic_map_src_to_offset(struct mic_device *mdev,
86 int intr_src, enum mic_intr_type type) 84 int intr_src, enum mic_intr_type type)
87{ 85{
88 if (type >= MIC_NUM_INTR_TYPES) 86 if (type >= MIC_NUM_INTR_TYPES)
89 return MIC_NUM_OFFSETS; 87 return MIC_NUM_OFFSETS;
@@ -214,7 +212,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
214 mdev->irq_info.msix_entries[i].entry = i; 212 mdev->irq_info.msix_entries[i].entry = i;
215 213
216 rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries, 214 rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries,
217 MIC_MIN_MSIX); 215 MIC_MIN_MSIX);
218 if (rc) { 216 if (rc) {
219 dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc); 217 dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc);
220 goto err_enable_msix; 218 goto err_enable_msix;
@@ -229,7 +227,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev)
229 goto err_nomem2; 227 goto err_nomem2;
230 } 228 }
231 229
232 dev_dbg(mdev->sdev->parent, 230 dev_dbg(&mdev->pdev->dev,
233 "%d MSIx irqs setup\n", mdev->irq_info.num_vectors); 231 "%d MSIx irqs setup\n", mdev->irq_info.num_vectors);
234 return 0; 232 return 0;
235err_nomem2: 233err_nomem2:
@@ -281,7 +279,6 @@ static void mic_release_callbacks(struct mic_device *mdev)
281 spin_lock(&mdev->irq_info.mic_thread_lock); 279 spin_lock(&mdev->irq_info.mic_thread_lock);
282 spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); 280 spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags);
283 for (i = 0; i < MIC_NUM_OFFSETS; i++) { 281 for (i = 0; i < MIC_NUM_OFFSETS; i++) {
284
285 if (list_empty(&mdev->irq_info.cb_list[i])) 282 if (list_empty(&mdev->irq_info.cb_list[i]))
286 break; 283 break;
287 284
@@ -443,12 +440,11 @@ mic_request_threaded_irq(struct mic_device *mdev,
443 unsigned long cookie = 0; 440 unsigned long cookie = 0;
444 u16 entry; 441 u16 entry;
445 struct mic_intr_cb *intr_cb; 442 struct mic_intr_cb *intr_cb;
446 struct pci_dev *pdev = container_of(mdev->sdev->parent, 443 struct pci_dev *pdev = mdev->pdev;
447 struct pci_dev, dev);
448 444
449 offset = mic_map_src_to_offset(mdev, intr_src, type); 445 offset = mic_map_src_to_offset(mdev, intr_src, type);
450 if (offset >= MIC_NUM_OFFSETS) { 446 if (offset >= MIC_NUM_OFFSETS) {
451 dev_err(mdev->sdev->parent, 447 dev_err(&mdev->pdev->dev,
452 "Error mapping index %d to a valid source id.\n", 448 "Error mapping index %d to a valid source id.\n",
453 intr_src); 449 intr_src);
454 rc = -EINVAL; 450 rc = -EINVAL;
@@ -458,7 +454,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
458 if (mdev->irq_info.num_vectors > 1) { 454 if (mdev->irq_info.num_vectors > 1) {
459 msix = mic_get_available_vector(mdev); 455 msix = mic_get_available_vector(mdev);
460 if (!msix) { 456 if (!msix) {
461 dev_err(mdev->sdev->parent, 457 dev_err(&mdev->pdev->dev,
462 "No MSIx vectors available for use.\n"); 458 "No MSIx vectors available for use.\n");
463 rc = -ENOSPC; 459 rc = -ENOSPC;
464 goto err; 460 goto err;
@@ -467,7 +463,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
467 rc = request_threaded_irq(msix->vector, handler, thread_fn, 463 rc = request_threaded_irq(msix->vector, handler, thread_fn,
468 0, name, data); 464 0, name, data);
469 if (rc) { 465 if (rc) {
470 dev_dbg(mdev->sdev->parent, 466 dev_dbg(&mdev->pdev->dev,
471 "request irq failed rc = %d\n", rc); 467 "request irq failed rc = %d\n", rc);
472 goto err; 468 goto err;
473 } 469 }
@@ -476,13 +472,13 @@ mic_request_threaded_irq(struct mic_device *mdev,
476 mdev->intr_ops->program_msi_to_src_map(mdev, 472 mdev->intr_ops->program_msi_to_src_map(mdev,
477 entry, offset, true); 473 entry, offset, true);
478 cookie = MK_COOKIE(entry, offset); 474 cookie = MK_COOKIE(entry, offset);
479 dev_dbg(mdev->sdev->parent, "irq: %d assigned for src: %d\n", 475 dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n",
480 msix->vector, intr_src); 476 msix->vector, intr_src);
481 } else { 477 } else {
482 intr_cb = mic_register_intr_callback(mdev, offset, handler, 478 intr_cb = mic_register_intr_callback(mdev, offset, handler,
483 thread_fn, data); 479 thread_fn, data);
484 if (IS_ERR(intr_cb)) { 480 if (IS_ERR(intr_cb)) {
485 dev_err(mdev->sdev->parent, 481 dev_err(&mdev->pdev->dev,
486 "No available callback entries for use\n"); 482 "No available callback entries for use\n");
487 rc = PTR_ERR(intr_cb); 483 rc = PTR_ERR(intr_cb);
488 goto err; 484 goto err;
@@ -495,7 +491,7 @@ mic_request_threaded_irq(struct mic_device *mdev,
495 entry, offset, true); 491 entry, offset, true);
496 } 492 }
497 cookie = MK_COOKIE(entry, intr_cb->cb_id); 493 cookie = MK_COOKIE(entry, intr_cb->cb_id);
498 dev_dbg(mdev->sdev->parent, "callback %d registered for src: %d\n", 494 dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n",
499 intr_cb->cb_id, intr_src); 495 intr_cb->cb_id, intr_src);
500 } 496 }
501 return (struct mic_irq *)cookie; 497 return (struct mic_irq *)cookie;
@@ -515,20 +511,19 @@ err:
515 * returns: none. 511 * returns: none.
516 */ 512 */
517void mic_free_irq(struct mic_device *mdev, 513void mic_free_irq(struct mic_device *mdev,
518 struct mic_irq *cookie, void *data) 514 struct mic_irq *cookie, void *data)
519{ 515{
520 u32 offset; 516 u32 offset;
521 u32 entry; 517 u32 entry;
522 u8 src_id; 518 u8 src_id;
523 unsigned int irq; 519 unsigned int irq;
524 struct pci_dev *pdev = container_of(mdev->sdev->parent, 520 struct pci_dev *pdev = mdev->pdev;
525 struct pci_dev, dev);
526 521
527 entry = GET_ENTRY((unsigned long)cookie); 522 entry = GET_ENTRY((unsigned long)cookie);
528 offset = GET_OFFSET((unsigned long)cookie); 523 offset = GET_OFFSET((unsigned long)cookie);
529 if (mdev->irq_info.num_vectors > 1) { 524 if (mdev->irq_info.num_vectors > 1) {
530 if (entry >= mdev->irq_info.num_vectors) { 525 if (entry >= mdev->irq_info.num_vectors) {
531 dev_warn(mdev->sdev->parent, 526 dev_warn(&mdev->pdev->dev,
532 "entry %d should be < num_irq %d\n", 527 "entry %d should be < num_irq %d\n",
533 entry, mdev->irq_info.num_vectors); 528 entry, mdev->irq_info.num_vectors);
534 return; 529 return;
@@ -539,12 +534,12 @@ void mic_free_irq(struct mic_device *mdev,
539 mdev->intr_ops->program_msi_to_src_map(mdev, 534 mdev->intr_ops->program_msi_to_src_map(mdev,
540 entry, offset, false); 535 entry, offset, false);
541 536
542 dev_dbg(mdev->sdev->parent, "irq: %d freed\n", irq); 537 dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq);
543 } else { 538 } else {
544 irq = pdev->irq; 539 irq = pdev->irq;
545 src_id = mic_unregister_intr_callback(mdev, offset); 540 src_id = mic_unregister_intr_callback(mdev, offset);
546 if (src_id >= MIC_NUM_OFFSETS) { 541 if (src_id >= MIC_NUM_OFFSETS) {
547 dev_warn(mdev->sdev->parent, "Error unregistering callback\n"); 542 dev_warn(&mdev->pdev->dev, "Error unregistering callback\n");
548 return; 543 return;
549 } 544 }
550 if (pci_dev_msi_enabled(pdev)) { 545 if (pci_dev_msi_enabled(pdev)) {
@@ -552,7 +547,7 @@ void mic_free_irq(struct mic_device *mdev,
552 mdev->intr_ops->program_msi_to_src_map(mdev, 547 mdev->intr_ops->program_msi_to_src_map(mdev,
553 entry, src_id, false); 548 entry, src_id, false);
554 } 549 }
555 dev_dbg(mdev->sdev->parent, "callback %d unregistered for src: %d\n", 550 dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n",
556 offset, src_id); 551 offset, src_id);
557 } 552 }
558} 553}
@@ -579,7 +574,7 @@ int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
579 574
580 rc = mic_setup_intx(mdev, pdev); 575 rc = mic_setup_intx(mdev, pdev);
581 if (rc) { 576 if (rc) {
582 dev_err(mdev->sdev->parent, "no usable interrupts\n"); 577 dev_err(&mdev->pdev->dev, "no usable interrupts\n");
583 return rc; 578 return rc;
584 } 579 }
585done: 580done:
@@ -635,8 +630,7 @@ void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev)
635void mic_intr_restore(struct mic_device *mdev) 630void mic_intr_restore(struct mic_device *mdev)
636{ 631{
637 int entry, offset; 632 int entry, offset;
638 struct pci_dev *pdev = container_of(mdev->sdev->parent, 633 struct pci_dev *pdev = mdev->pdev;
639 struct pci_dev, dev);
640 634
641 if (!pci_dev_msi_enabled(pdev)) 635 if (!pci_dev_msi_enabled(pdev))
642 return; 636 return;
diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c
index 456462932151..153894e7ed5b 100644
--- a/drivers/misc/mic/host/mic_main.c
+++ b/drivers/misc/mic/host/mic_main.c
@@ -16,17 +16,11 @@
16 * the file called "COPYING". 16 * the file called "COPYING".
17 * 17 *
18 * Intel MIC Host driver. 18 * Intel MIC Host driver.
19 *
20 * Global TODO's across the driver to be added after initial base
21 * patches are accepted upstream:
22 * 1) Enable DMA support.
23 * 2) Enable per vring interrupt support.
24 */ 19 */
25#include <linux/fs.h> 20#include <linux/fs.h>
26#include <linux/module.h> 21#include <linux/module.h>
27#include <linux/pci.h> 22#include <linux/pci.h>
28#include <linux/poll.h> 23#include <linux/poll.h>
29#include <linux/suspend.h>
30 24
31#include <linux/mic_common.h> 25#include <linux/mic_common.h>
32#include "../common/mic_dev.h" 26#include "../common/mic_dev.h"
@@ -63,12 +57,8 @@ MODULE_DEVICE_TABLE(pci, mic_pci_tbl);
63 57
64/* ID allocator for MIC devices */ 58/* ID allocator for MIC devices */
65static struct ida g_mic_ida; 59static struct ida g_mic_ida;
66/* Class of MIC devices for sysfs accessibility. */
67static struct class *g_mic_class;
68/* Base device node number for MIC devices */ 60/* Base device node number for MIC devices */
69static dev_t g_mic_devno; 61static dev_t g_mic_devno;
70/* Track the total number of MIC devices */
71atomic_t g_num_mics;
72 62
73static const struct file_operations mic_fops = { 63static const struct file_operations mic_fops = {
74 .open = mic_open, 64 .open = mic_open,
@@ -83,17 +73,14 @@ static const struct file_operations mic_fops = {
83static int mic_dp_init(struct mic_device *mdev) 73static int mic_dp_init(struct mic_device *mdev)
84{ 74{
85 mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL); 75 mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL);
86 if (!mdev->dp) { 76 if (!mdev->dp)
87 dev_err(mdev->sdev->parent, "%s %d err %d\n",
88 __func__, __LINE__, -ENOMEM);
89 return -ENOMEM; 77 return -ENOMEM;
90 }
91 78
92 mdev->dp_dma_addr = mic_map_single(mdev, 79 mdev->dp_dma_addr = mic_map_single(mdev,
93 mdev->dp, MIC_DP_SIZE); 80 mdev->dp, MIC_DP_SIZE);
94 if (mic_map_error(mdev->dp_dma_addr)) { 81 if (mic_map_error(mdev->dp_dma_addr)) {
95 kfree(mdev->dp); 82 kfree(mdev->dp);
96 dev_err(mdev->sdev->parent, "%s %d err %d\n", 83 dev_err(&mdev->pdev->dev, "%s %d err %d\n",
97 __func__, __LINE__, -ENOMEM); 84 __func__, __LINE__, -ENOMEM);
98 return -ENOMEM; 85 return -ENOMEM;
99 } 86 }
@@ -110,30 +97,6 @@ static void mic_dp_uninit(struct mic_device *mdev)
110} 97}
111 98
112/** 99/**
113 * mic_shutdown_db - Shutdown doorbell interrupt handler.
114 */
115static irqreturn_t mic_shutdown_db(int irq, void *data)
116{
117 struct mic_device *mdev = data;
118 struct mic_bootparam *bootparam = mdev->dp;
119
120 mdev->ops->intr_workarounds(mdev);
121
122 switch (bootparam->shutdown_status) {
123 case MIC_HALTED:
124 case MIC_POWER_OFF:
125 case MIC_RESTART:
126 /* Fall through */
127 case MIC_CRASHED:
128 schedule_work(&mdev->shutdown_work);
129 break;
130 default:
131 break;
132 };
133 return IRQ_HANDLED;
134}
135
136/**
137 * mic_ops_init: Initialize HW specific operation tables. 100 * mic_ops_init: Initialize HW specific operation tables.
138 * 101 *
139 * @mdev: pointer to mic_device instance 102 * @mdev: pointer to mic_device instance
@@ -190,43 +153,6 @@ static enum mic_hw_family mic_get_family(struct pci_dev *pdev)
190} 153}
191 154
192/** 155/**
193* mic_pm_notifier: Notifier callback function that handles
194* PM notifications.
195*
196* @notifier_block: The notifier structure.
197* @pm_event: The event for which the driver was notified.
198* @unused: Meaningless. Always NULL.
199*
200* returns NOTIFY_DONE
201*/
202static int mic_pm_notifier(struct notifier_block *notifier,
203 unsigned long pm_event, void *unused)
204{
205 struct mic_device *mdev = container_of(notifier,
206 struct mic_device, pm_notifier);
207
208 switch (pm_event) {
209 case PM_HIBERNATION_PREPARE:
210 /* Fall through */
211 case PM_SUSPEND_PREPARE:
212 mic_prepare_suspend(mdev);
213 break;
214 case PM_POST_HIBERNATION:
215 /* Fall through */
216 case PM_POST_SUSPEND:
217 /* Fall through */
218 case PM_POST_RESTORE:
219 mic_complete_resume(mdev);
220 break;
221 case PM_RESTORE_PREPARE:
222 break;
223 default:
224 break;
225 }
226 return NOTIFY_DONE;
227}
228
229/**
230 * mic_device_init - Allocates and initializes the MIC device structure 156 * mic_device_init - Allocates and initializes the MIC device structure
231 * 157 *
232 * @mdev: pointer to mic_device instance 158 * @mdev: pointer to mic_device instance
@@ -234,52 +160,16 @@ static int mic_pm_notifier(struct notifier_block *notifier,
234 * 160 *
235 * returns none. 161 * returns none.
236 */ 162 */
237static int 163static void
238mic_device_init(struct mic_device *mdev, struct pci_dev *pdev) 164mic_device_init(struct mic_device *mdev, struct pci_dev *pdev)
239{ 165{
240 int rc; 166 mdev->pdev = pdev;
241
242 mdev->family = mic_get_family(pdev); 167 mdev->family = mic_get_family(pdev);
243 mdev->stepping = pdev->revision; 168 mdev->stepping = pdev->revision;
244 mic_ops_init(mdev); 169 mic_ops_init(mdev);
245 mic_sysfs_init(mdev);
246 mutex_init(&mdev->mic_mutex); 170 mutex_init(&mdev->mic_mutex);
247 mdev->irq_info.next_avail_src = 0; 171 mdev->irq_info.next_avail_src = 0;
248 INIT_WORK(&mdev->reset_trigger_work, mic_reset_trigger_work);
249 INIT_WORK(&mdev->shutdown_work, mic_shutdown_work);
250 init_completion(&mdev->reset_wait);
251 INIT_LIST_HEAD(&mdev->vdev_list); 172 INIT_LIST_HEAD(&mdev->vdev_list);
252 mdev->pm_notifier.notifier_call = mic_pm_notifier;
253 rc = register_pm_notifier(&mdev->pm_notifier);
254 if (rc) {
255 dev_err(&pdev->dev, "register_pm_notifier failed rc %d\n",
256 rc);
257 goto register_pm_notifier_fail;
258 }
259 return 0;
260register_pm_notifier_fail:
261 flush_work(&mdev->shutdown_work);
262 flush_work(&mdev->reset_trigger_work);
263 return rc;
264}
265
266/**
267 * mic_device_uninit - Frees resources allocated during mic_device_init(..)
268 *
269 * @mdev: pointer to mic_device instance
270 *
271 * returns none
272 */
273static void mic_device_uninit(struct mic_device *mdev)
274{
275 /* The cmdline sysfs entry might have allocated cmdline */
276 kfree(mdev->cmdline);
277 kfree(mdev->firmware);
278 kfree(mdev->ramdisk);
279 kfree(mdev->bootmode);
280 flush_work(&mdev->reset_trigger_work);
281 flush_work(&mdev->shutdown_work);
282 unregister_pm_notifier(&mdev->pm_notifier);
283} 173}
284 174
285/** 175/**
@@ -291,7 +181,7 @@ static void mic_device_uninit(struct mic_device *mdev)
291 * returns 0 on success, < 0 on failure. 181 * returns 0 on success, < 0 on failure.
292 */ 182 */
293static int mic_probe(struct pci_dev *pdev, 183static int mic_probe(struct pci_dev *pdev,
294 const struct pci_device_id *ent) 184 const struct pci_device_id *ent)
295{ 185{
296 int rc; 186 int rc;
297 struct mic_device *mdev; 187 struct mic_device *mdev;
@@ -309,16 +199,12 @@ static int mic_probe(struct pci_dev *pdev,
309 goto ida_fail; 199 goto ida_fail;
310 } 200 }
311 201
312 rc = mic_device_init(mdev, pdev); 202 mic_device_init(mdev, pdev);
313 if (rc) {
314 dev_err(&pdev->dev, "mic_device_init failed rc %d\n", rc);
315 goto device_init_fail;
316 }
317 203
318 rc = pci_enable_device(pdev); 204 rc = pci_enable_device(pdev);
319 if (rc) { 205 if (rc) {
320 dev_err(&pdev->dev, "failed to enable pci device.\n"); 206 dev_err(&pdev->dev, "failed to enable pci device.\n");
321 goto uninit_device; 207 goto ida_remove;
322 } 208 }
323 209
324 pci_set_master(pdev); 210 pci_set_master(pdev);
@@ -367,62 +253,39 @@ static int mic_probe(struct pci_dev *pdev,
367 253
368 pci_set_drvdata(pdev, mdev); 254 pci_set_drvdata(pdev, mdev);
369 255
370 mdev->sdev = device_create_with_groups(g_mic_class, &pdev->dev,
371 MKDEV(MAJOR(g_mic_devno), mdev->id), NULL,
372 mdev->attr_group, "mic%d", mdev->id);
373 if (IS_ERR(mdev->sdev)) {
374 rc = PTR_ERR(mdev->sdev);
375 dev_err(&pdev->dev,
376 "device_create_with_groups failed rc %d\n", rc);
377 goto smpt_uninit;
378 }
379 mdev->state_sysfs = sysfs_get_dirent(mdev->sdev->kobj.sd, "state");
380 if (!mdev->state_sysfs) {
381 rc = -ENODEV;
382 dev_err(&pdev->dev, "sysfs_get_dirent failed rc %d\n", rc);
383 goto destroy_device;
384 }
385
386 rc = mic_dp_init(mdev); 256 rc = mic_dp_init(mdev);
387 if (rc) { 257 if (rc) {
388 dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc); 258 dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc);
389 goto sysfs_put; 259 goto smpt_uninit;
390 }
391 mutex_lock(&mdev->mic_mutex);
392
393 mdev->shutdown_db = mic_next_db(mdev);
394 mdev->shutdown_cookie = mic_request_threaded_irq(mdev, mic_shutdown_db,
395 NULL, "shutdown-interrupt", mdev,
396 mdev->shutdown_db, MIC_INTR_DB);
397 if (IS_ERR(mdev->shutdown_cookie)) {
398 rc = PTR_ERR(mdev->shutdown_cookie);
399 mutex_unlock(&mdev->mic_mutex);
400 goto dp_uninit;
401 } 260 }
402 mutex_unlock(&mdev->mic_mutex);
403 mic_bootparam_init(mdev); 261 mic_bootparam_init(mdev);
404 262
405 mic_create_debug_dir(mdev); 263 mic_create_debug_dir(mdev);
406 cdev_init(&mdev->cdev, &mic_fops); 264
407 mdev->cdev.owner = THIS_MODULE; 265 mdev->miscdev.minor = MISC_DYNAMIC_MINOR;
408 rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic_devno), mdev->id), 1); 266 snprintf(mdev->name, sizeof(mdev->name), "mic%d", mdev->id);
267 mdev->miscdev.name = mdev->name;
268 mdev->miscdev.fops = &mic_fops;
269 mdev->miscdev.parent = &mdev->pdev->dev;
270 rc = misc_register(&mdev->miscdev);
409 if (rc) { 271 if (rc) {
410 dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc); 272 dev_err(&pdev->dev, "misc_register err id %d rc %d\n",
273 mdev->id, rc);
411 goto cleanup_debug_dir; 274 goto cleanup_debug_dir;
412 } 275 }
413 atomic_inc(&g_num_mics); 276
277 mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops);
278 if (IS_ERR(mdev->cosm_dev)) {
279 rc = PTR_ERR(mdev->cosm_dev);
280 dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc);
281 goto misc_dereg;
282 }
414 return 0; 283 return 0;
284misc_dereg:
285 misc_deregister(&mdev->miscdev);
415cleanup_debug_dir: 286cleanup_debug_dir:
416 mic_delete_debug_dir(mdev); 287 mic_delete_debug_dir(mdev);
417 mutex_lock(&mdev->mic_mutex);
418 mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
419 mutex_unlock(&mdev->mic_mutex);
420dp_uninit:
421 mic_dp_uninit(mdev); 288 mic_dp_uninit(mdev);
422sysfs_put:
423 sysfs_put(mdev->state_sysfs);
424destroy_device:
425 device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id));
426smpt_uninit: 289smpt_uninit:
427 mic_smpt_uninit(mdev); 290 mic_smpt_uninit(mdev);
428free_interrupts: 291free_interrupts:
@@ -435,9 +298,7 @@ release_regions:
435 pci_release_regions(pdev); 298 pci_release_regions(pdev);
436disable_device: 299disable_device:
437 pci_disable_device(pdev); 300 pci_disable_device(pdev);
438uninit_device: 301ida_remove:
439 mic_device_uninit(mdev);
440device_init_fail:
441 ida_simple_remove(&g_mic_ida, mdev->id); 302 ida_simple_remove(&g_mic_ida, mdev->id);
442ida_fail: 303ida_fail:
443 kfree(mdev); 304 kfree(mdev);
@@ -461,22 +322,14 @@ static void mic_remove(struct pci_dev *pdev)
461 if (!mdev) 322 if (!mdev)
462 return; 323 return;
463 324
464 mic_stop(mdev, false); 325 cosm_unregister_device(mdev->cosm_dev);
465 atomic_dec(&g_num_mics); 326 misc_deregister(&mdev->miscdev);
466 cdev_del(&mdev->cdev);
467 mic_delete_debug_dir(mdev); 327 mic_delete_debug_dir(mdev);
468 mutex_lock(&mdev->mic_mutex);
469 mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
470 mutex_unlock(&mdev->mic_mutex);
471 flush_work(&mdev->shutdown_work);
472 mic_dp_uninit(mdev); 328 mic_dp_uninit(mdev);
473 sysfs_put(mdev->state_sysfs);
474 device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id));
475 mic_smpt_uninit(mdev); 329 mic_smpt_uninit(mdev);
476 mic_free_interrupts(mdev, pdev); 330 mic_free_interrupts(mdev, pdev);
477 iounmap(mdev->mmio.va);
478 iounmap(mdev->aper.va); 331 iounmap(mdev->aper.va);
479 mic_device_uninit(mdev); 332 iounmap(mdev->mmio.va);
480 pci_release_regions(pdev); 333 pci_release_regions(pdev);
481 pci_disable_device(pdev); 334 pci_disable_device(pdev);
482 ida_simple_remove(&g_mic_ida, mdev->id); 335 ida_simple_remove(&g_mic_ida, mdev->id);
@@ -495,32 +348,23 @@ static int __init mic_init(void)
495 int ret; 348 int ret;
496 349
497 ret = alloc_chrdev_region(&g_mic_devno, 0, 350 ret = alloc_chrdev_region(&g_mic_devno, 0,
498 MIC_MAX_NUM_DEVS, mic_driver_name); 351 MIC_MAX_NUM_DEVS, mic_driver_name);
499 if (ret) { 352 if (ret) {
500 pr_err("alloc_chrdev_region failed ret %d\n", ret); 353 pr_err("alloc_chrdev_region failed ret %d\n", ret);
501 goto error; 354 goto error;
502 } 355 }
503 356
504 g_mic_class = class_create(THIS_MODULE, mic_driver_name);
505 if (IS_ERR(g_mic_class)) {
506 ret = PTR_ERR(g_mic_class);
507 pr_err("class_create failed ret %d\n", ret);
508 goto cleanup_chrdev;
509 }
510
511 mic_init_debugfs(); 357 mic_init_debugfs();
512 ida_init(&g_mic_ida); 358 ida_init(&g_mic_ida);
513 ret = pci_register_driver(&mic_driver); 359 ret = pci_register_driver(&mic_driver);
514 if (ret) { 360 if (ret) {
515 pr_err("pci_register_driver failed ret %d\n", ret); 361 pr_err("pci_register_driver failed ret %d\n", ret);
516 goto cleanup_debugfs; 362 goto cleanup_chrdev;
517 } 363 }
518 return ret; 364 return ret;
519cleanup_debugfs: 365cleanup_chrdev:
520 ida_destroy(&g_mic_ida); 366 ida_destroy(&g_mic_ida);
521 mic_exit_debugfs(); 367 mic_exit_debugfs();
522 class_destroy(g_mic_class);
523cleanup_chrdev:
524 unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); 368 unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
525error: 369error:
526 return ret; 370 return ret;
@@ -531,7 +375,6 @@ static void __exit mic_exit(void)
531 pci_unregister_driver(&mic_driver); 375 pci_unregister_driver(&mic_driver);
532 ida_destroy(&g_mic_ida); 376 ida_destroy(&g_mic_ida);
533 mic_exit_debugfs(); 377 mic_exit_debugfs();
534 class_destroy(g_mic_class);
535 unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); 378 unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS);
536} 379}
537 380
diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c
index cec82034875f..c3f958580fb0 100644
--- a/drivers/misc/mic/host/mic_smpt.c
+++ b/drivers/misc/mic/host/mic_smpt.c
@@ -76,7 +76,7 @@ mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa)
76 76
77/* Populate an SMPT entry and update the reference counts. */ 77/* Populate an SMPT entry and update the reference counts. */
78static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr, 78static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
79 int entries, struct mic_device *mdev) 79 int entries, struct mic_device *mdev)
80{ 80{
81 struct mic_smpt_info *smpt_info = mdev->smpt; 81 struct mic_smpt_info *smpt_info = mdev->smpt;
82 int i; 82 int i;
@@ -97,7 +97,7 @@ static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr,
97 * for a given DMA address and size. 97 * for a given DMA address and size.
98 */ 98 */
99static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr, 99static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr,
100 int entries, s64 *ref, size_t size) 100 int entries, s64 *ref, size_t size)
101{ 101{
102 int spt; 102 int spt;
103 int ae = 0; 103 int ae = 0;
@@ -148,7 +148,7 @@ found:
148 * and the starting smpt address 148 * and the starting smpt address
149 */ 149 */
150static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr, 150static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr,
151 size_t size, s64 *ref, u64 *smpt_start) 151 size_t size, s64 *ref, u64 *smpt_start)
152{ 152{
153 u64 start = dma_addr; 153 u64 start = dma_addr;
154 u64 end = dma_addr + size; 154 u64 end = dma_addr + size;
@@ -181,7 +181,7 @@ dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr)
181 dma_addr_t dma_addr; 181 dma_addr_t dma_addr;
182 182
183 if (!mic_is_system_addr(mdev, mic_addr)) { 183 if (!mic_is_system_addr(mdev, mic_addr)) {
184 dev_err(mdev->sdev->parent, 184 dev_err(&mdev->pdev->dev,
185 "mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr); 185 "mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr);
186 return -EINVAL; 186 return -EINVAL;
187 } 187 }
@@ -218,7 +218,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
218 return mic_addr; 218 return mic_addr;
219 219
220 num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size, 220 num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size,
221 ref, &smpt_start); 221 ref, &smpt_start);
222 222
223 /* Set the smpt table appropriately and get 16G aligned mic address */ 223 /* Set the smpt table appropriately and get 16G aligned mic address */
224 mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size); 224 mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size);
@@ -231,7 +231,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size)
231 * else generate mic_addr by adding the 16G offset in dma_addr 231 * else generate mic_addr by adding the 16G offset in dma_addr
232 */ 232 */
233 if (!mic_addr && MIC_FAMILY_X100 == mdev->family) { 233 if (!mic_addr && MIC_FAMILY_X100 == mdev->family) {
234 dev_err(mdev->sdev->parent, 234 dev_err(&mdev->pdev->dev,
235 "mic_map failed dma_addr 0x%llx size 0x%lx\n", 235 "mic_map failed dma_addr 0x%llx size 0x%lx\n",
236 dma_addr, size); 236 dma_addr, size);
237 return mic_addr; 237 return mic_addr;
@@ -264,7 +264,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
264 return; 264 return;
265 265
266 if (!mic_is_system_addr(mdev, mic_addr)) { 266 if (!mic_is_system_addr(mdev, mic_addr)) {
267 dev_err(mdev->sdev->parent, 267 dev_err(&mdev->pdev->dev,
268 "invalid address: 0x%llx\n", mic_addr); 268 "invalid address: 0x%llx\n", mic_addr);
269 return; 269 return;
270 } 270 }
@@ -284,7 +284,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
284 for (i = spt; i < spt + num_smpt; i++) { 284 for (i = spt; i < spt + num_smpt; i++) {
285 smpt_info->entry[i].ref_count -= ref[i - spt]; 285 smpt_info->entry[i].ref_count -= ref[i - spt];
286 if (smpt_info->entry[i].ref_count < 0) 286 if (smpt_info->entry[i].ref_count < 0)
287 dev_warn(mdev->sdev->parent, 287 dev_warn(&mdev->pdev->dev,
288 "ref count for entry %d is negative\n", i); 288 "ref count for entry %d is negative\n", i);
289 } 289 }
290 spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); 290 spin_unlock_irqrestore(&smpt_info->smpt_lock, flags);
@@ -307,15 +307,14 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
307dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size) 307dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
308{ 308{
309 dma_addr_t mic_addr = 0; 309 dma_addr_t mic_addr = 0;
310 struct pci_dev *pdev = container_of(mdev->sdev->parent, 310 struct pci_dev *pdev = mdev->pdev;
311 struct pci_dev, dev);
312 dma_addr_t dma_addr = 311 dma_addr_t dma_addr =
313 pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL); 312 pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL);
314 313
315 if (!pci_dma_mapping_error(pdev, dma_addr)) { 314 if (!pci_dma_mapping_error(pdev, dma_addr)) {
316 mic_addr = mic_map(mdev, dma_addr, size); 315 mic_addr = mic_map(mdev, dma_addr, size);
317 if (!mic_addr) { 316 if (!mic_addr) {
318 dev_err(mdev->sdev->parent, 317 dev_err(&mdev->pdev->dev,
319 "mic_map failed dma_addr 0x%llx size 0x%lx\n", 318 "mic_map failed dma_addr 0x%llx size 0x%lx\n",
320 dma_addr, size); 319 dma_addr, size);
321 pci_unmap_single(pdev, dma_addr, 320 pci_unmap_single(pdev, dma_addr,
@@ -339,8 +338,7 @@ dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size)
339void 338void
340mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) 339mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size)
341{ 340{
342 struct pci_dev *pdev = container_of(mdev->sdev->parent, 341 struct pci_dev *pdev = mdev->pdev;
343 struct pci_dev, dev);
344 dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr); 342 dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr);
345 mic_unmap(mdev, mic_addr, size); 343 mic_unmap(mdev, mic_addr, size);
346 pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); 344 pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
@@ -399,18 +397,18 @@ void mic_smpt_uninit(struct mic_device *mdev)
399 struct mic_smpt_info *smpt_info = mdev->smpt; 397 struct mic_smpt_info *smpt_info = mdev->smpt;
400 int i; 398 int i;
401 399
402 dev_dbg(mdev->sdev->parent, 400 dev_dbg(&mdev->pdev->dev,
403 "nodeid %d SMPT ref count %lld map %lld unmap %lld\n", 401 "nodeid %d SMPT ref count %lld map %lld unmap %lld\n",
404 mdev->id, smpt_info->ref_count, 402 mdev->id, smpt_info->ref_count,
405 smpt_info->map_count, smpt_info->unmap_count); 403 smpt_info->map_count, smpt_info->unmap_count);
406 404
407 for (i = 0; i < smpt_info->info.num_reg; i++) { 405 for (i = 0; i < smpt_info->info.num_reg; i++) {
408 dev_dbg(mdev->sdev->parent, 406 dev_dbg(&mdev->pdev->dev,
409 "SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n", 407 "SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n",
410 i, smpt_info->entry[i].dma_addr, 408 i, smpt_info->entry[i].dma_addr,
411 smpt_info->entry[i].ref_count); 409 smpt_info->entry[i].ref_count);
412 if (smpt_info->entry[i].ref_count) 410 if (smpt_info->entry[i].ref_count)
413 dev_warn(mdev->sdev->parent, 411 dev_warn(&mdev->pdev->dev,
414 "ref count for entry %d is not zero\n", i); 412 "ref count for entry %d is not zero\n", i);
415 } 413 }
416 kfree(smpt_info->entry); 414 kfree(smpt_info->entry);
diff --git a/drivers/misc/mic/host/mic_sysfs.c b/drivers/misc/mic/host/mic_sysfs.c
deleted file mode 100644
index 6dd864e4a617..000000000000
--- a/drivers/misc/mic/host/mic_sysfs.c
+++ /dev/null
@@ -1,459 +0,0 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC Host driver.
19 *
20 */
21#include <linux/pci.h>
22
23#include <linux/mic_common.h>
24#include "../common/mic_dev.h"
25#include "mic_device.h"
26
27/*
28 * A state-to-string lookup table, for exposing a human readable state
29 * via sysfs. Always keep in sync with enum mic_states
30 */
31static const char * const mic_state_string[] = {
32 [MIC_OFFLINE] = "offline",
33 [MIC_ONLINE] = "online",
34 [MIC_SHUTTING_DOWN] = "shutting_down",
35 [MIC_RESET_FAILED] = "reset_failed",
36 [MIC_SUSPENDING] = "suspending",
37 [MIC_SUSPENDED] = "suspended",
38};
39
40/*
41 * A shutdown-status-to-string lookup table, for exposing a human
42 * readable state via sysfs. Always keep in sync with enum mic_shutdown_status
43 */
44static const char * const mic_shutdown_status_string[] = {
45 [MIC_NOP] = "nop",
46 [MIC_CRASHED] = "crashed",
47 [MIC_HALTED] = "halted",
48 [MIC_POWER_OFF] = "poweroff",
49 [MIC_RESTART] = "restart",
50};
51
52void mic_set_shutdown_status(struct mic_device *mdev, u8 shutdown_status)
53{
54 dev_dbg(mdev->sdev->parent, "Shutdown Status %s -> %s\n",
55 mic_shutdown_status_string[mdev->shutdown_status],
56 mic_shutdown_status_string[shutdown_status]);
57 mdev->shutdown_status = shutdown_status;
58}
59
60void mic_set_state(struct mic_device *mdev, u8 state)
61{
62 dev_dbg(mdev->sdev->parent, "State %s -> %s\n",
63 mic_state_string[mdev->state],
64 mic_state_string[state]);
65 mdev->state = state;
66 sysfs_notify_dirent(mdev->state_sysfs);
67}
68
69static ssize_t
70family_show(struct device *dev, struct device_attribute *attr, char *buf)
71{
72 static const char x100[] = "x100";
73 static const char unknown[] = "Unknown";
74 const char *card = NULL;
75 struct mic_device *mdev = dev_get_drvdata(dev->parent);
76
77 if (!mdev)
78 return -EINVAL;
79
80 switch (mdev->family) {
81 case MIC_FAMILY_X100:
82 card = x100;
83 break;
84 default:
85 card = unknown;
86 break;
87 }
88 return scnprintf(buf, PAGE_SIZE, "%s\n", card);
89}
90static DEVICE_ATTR_RO(family);
91
92static ssize_t
93stepping_show(struct device *dev, struct device_attribute *attr, char *buf)
94{
95 struct mic_device *mdev = dev_get_drvdata(dev->parent);
96 char *string = "??";
97
98 if (!mdev)
99 return -EINVAL;
100
101 switch (mdev->stepping) {
102 case MIC_A0_STEP:
103 string = "A0";
104 break;
105 case MIC_B0_STEP:
106 string = "B0";
107 break;
108 case MIC_B1_STEP:
109 string = "B1";
110 break;
111 case MIC_C0_STEP:
112 string = "C0";
113 break;
114 default:
115 break;
116 }
117 return scnprintf(buf, PAGE_SIZE, "%s\n", string);
118}
119static DEVICE_ATTR_RO(stepping);
120
121static ssize_t
122state_show(struct device *dev, struct device_attribute *attr, char *buf)
123{
124 struct mic_device *mdev = dev_get_drvdata(dev->parent);
125
126 if (!mdev || mdev->state >= MIC_LAST)
127 return -EINVAL;
128
129 return scnprintf(buf, PAGE_SIZE, "%s\n",
130 mic_state_string[mdev->state]);
131}
132
133static ssize_t
134state_store(struct device *dev, struct device_attribute *attr,
135 const char *buf, size_t count)
136{
137 int rc = 0;
138 struct mic_device *mdev = dev_get_drvdata(dev->parent);
139 if (!mdev)
140 return -EINVAL;
141 if (sysfs_streq(buf, "boot")) {
142 rc = mic_start(mdev, buf);
143 if (rc) {
144 dev_err(mdev->sdev->parent,
145 "mic_boot failed rc %d\n", rc);
146 count = rc;
147 }
148 goto done;
149 }
150
151 if (sysfs_streq(buf, "reset")) {
152 schedule_work(&mdev->reset_trigger_work);
153 goto done;
154 }
155
156 if (sysfs_streq(buf, "shutdown")) {
157 mic_shutdown(mdev);
158 goto done;
159 }
160
161 if (sysfs_streq(buf, "suspend")) {
162 mic_suspend(mdev);
163 goto done;
164 }
165
166 count = -EINVAL;
167done:
168 return count;
169}
170static DEVICE_ATTR_RW(state);
171
172static ssize_t shutdown_status_show(struct device *dev,
173 struct device_attribute *attr, char *buf)
174{
175 struct mic_device *mdev = dev_get_drvdata(dev->parent);
176
177 if (!mdev || mdev->shutdown_status >= MIC_STATUS_LAST)
178 return -EINVAL;
179
180 return scnprintf(buf, PAGE_SIZE, "%s\n",
181 mic_shutdown_status_string[mdev->shutdown_status]);
182}
183static DEVICE_ATTR_RO(shutdown_status);
184
185static ssize_t
186cmdline_show(struct device *dev, struct device_attribute *attr, char *buf)
187{
188 struct mic_device *mdev = dev_get_drvdata(dev->parent);
189 char *cmdline;
190
191 if (!mdev)
192 return -EINVAL;
193
194 cmdline = mdev->cmdline;
195
196 if (cmdline)
197 return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline);
198 return 0;
199}
200
201static ssize_t
202cmdline_store(struct device *dev, struct device_attribute *attr,
203 const char *buf, size_t count)
204{
205 struct mic_device *mdev = dev_get_drvdata(dev->parent);
206
207 if (!mdev)
208 return -EINVAL;
209
210 mutex_lock(&mdev->mic_mutex);
211 kfree(mdev->cmdline);
212
213 mdev->cmdline = kmalloc(count + 1, GFP_KERNEL);
214 if (!mdev->cmdline) {
215 count = -ENOMEM;
216 goto unlock;
217 }
218
219 strncpy(mdev->cmdline, buf, count);
220
221 if (mdev->cmdline[count - 1] == '\n')
222 mdev->cmdline[count - 1] = '\0';
223 else
224 mdev->cmdline[count] = '\0';
225unlock:
226 mutex_unlock(&mdev->mic_mutex);
227 return count;
228}
229static DEVICE_ATTR_RW(cmdline);
230
231static ssize_t
232firmware_show(struct device *dev, struct device_attribute *attr, char *buf)
233{
234 struct mic_device *mdev = dev_get_drvdata(dev->parent);
235 char *firmware;
236
237 if (!mdev)
238 return -EINVAL;
239
240 firmware = mdev->firmware;
241
242 if (firmware)
243 return scnprintf(buf, PAGE_SIZE, "%s\n", firmware);
244 return 0;
245}
246
247static ssize_t
248firmware_store(struct device *dev, struct device_attribute *attr,
249 const char *buf, size_t count)
250{
251 struct mic_device *mdev = dev_get_drvdata(dev->parent);
252
253 if (!mdev)
254 return -EINVAL;
255
256 mutex_lock(&mdev->mic_mutex);
257 kfree(mdev->firmware);
258
259 mdev->firmware = kmalloc(count + 1, GFP_KERNEL);
260 if (!mdev->firmware) {
261 count = -ENOMEM;
262 goto unlock;
263 }
264 strncpy(mdev->firmware, buf, count);
265
266 if (mdev->firmware[count - 1] == '\n')
267 mdev->firmware[count - 1] = '\0';
268 else
269 mdev->firmware[count] = '\0';
270unlock:
271 mutex_unlock(&mdev->mic_mutex);
272 return count;
273}
274static DEVICE_ATTR_RW(firmware);
275
276static ssize_t
277ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf)
278{
279 struct mic_device *mdev = dev_get_drvdata(dev->parent);
280 char *ramdisk;
281
282 if (!mdev)
283 return -EINVAL;
284
285 ramdisk = mdev->ramdisk;
286
287 if (ramdisk)
288 return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk);
289 return 0;
290}
291
292static ssize_t
293ramdisk_store(struct device *dev, struct device_attribute *attr,
294 const char *buf, size_t count)
295{
296 struct mic_device *mdev = dev_get_drvdata(dev->parent);
297
298 if (!mdev)
299 return -EINVAL;
300
301 mutex_lock(&mdev->mic_mutex);
302 kfree(mdev->ramdisk);
303
304 mdev->ramdisk = kmalloc(count + 1, GFP_KERNEL);
305 if (!mdev->ramdisk) {
306 count = -ENOMEM;
307 goto unlock;
308 }
309
310 strncpy(mdev->ramdisk, buf, count);
311
312 if (mdev->ramdisk[count - 1] == '\n')
313 mdev->ramdisk[count - 1] = '\0';
314 else
315 mdev->ramdisk[count] = '\0';
316unlock:
317 mutex_unlock(&mdev->mic_mutex);
318 return count;
319}
320static DEVICE_ATTR_RW(ramdisk);
321
322static ssize_t
323bootmode_show(struct device *dev, struct device_attribute *attr, char *buf)
324{
325 struct mic_device *mdev = dev_get_drvdata(dev->parent);
326 char *bootmode;
327
328 if (!mdev)
329 return -EINVAL;
330
331 bootmode = mdev->bootmode;
332
333 if (bootmode)
334 return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode);
335 return 0;
336}
337
338static ssize_t
339bootmode_store(struct device *dev, struct device_attribute *attr,
340 const char *buf, size_t count)
341{
342 struct mic_device *mdev = dev_get_drvdata(dev->parent);
343
344 if (!mdev)
345 return -EINVAL;
346
347 if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "elf"))
348 return -EINVAL;
349
350 mutex_lock(&mdev->mic_mutex);
351 kfree(mdev->bootmode);
352
353 mdev->bootmode = kmalloc(count + 1, GFP_KERNEL);
354 if (!mdev->bootmode) {
355 count = -ENOMEM;
356 goto unlock;
357 }
358
359 strncpy(mdev->bootmode, buf, count);
360
361 if (mdev->bootmode[count - 1] == '\n')
362 mdev->bootmode[count - 1] = '\0';
363 else
364 mdev->bootmode[count] = '\0';
365unlock:
366 mutex_unlock(&mdev->mic_mutex);
367 return count;
368}
369static DEVICE_ATTR_RW(bootmode);
370
371static ssize_t
372log_buf_addr_show(struct device *dev, struct device_attribute *attr,
373 char *buf)
374{
375 struct mic_device *mdev = dev_get_drvdata(dev->parent);
376
377 if (!mdev)
378 return -EINVAL;
379
380 return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_addr);
381}
382
383static ssize_t
384log_buf_addr_store(struct device *dev, struct device_attribute *attr,
385 const char *buf, size_t count)
386{
387 struct mic_device *mdev = dev_get_drvdata(dev->parent);
388 int ret;
389 unsigned long addr;
390
391 if (!mdev)
392 return -EINVAL;
393
394 ret = kstrtoul(buf, 16, &addr);
395 if (ret)
396 goto exit;
397
398 mdev->log_buf_addr = (void *)addr;
399 ret = count;
400exit:
401 return ret;
402}
403static DEVICE_ATTR_RW(log_buf_addr);
404
405static ssize_t
406log_buf_len_show(struct device *dev, struct device_attribute *attr,
407 char *buf)
408{
409 struct mic_device *mdev = dev_get_drvdata(dev->parent);
410
411 if (!mdev)
412 return -EINVAL;
413
414 return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_len);
415}
416
417static ssize_t
418log_buf_len_store(struct device *dev, struct device_attribute *attr,
419 const char *buf, size_t count)
420{
421 struct mic_device *mdev = dev_get_drvdata(dev->parent);
422 int ret;
423 unsigned long addr;
424
425 if (!mdev)
426 return -EINVAL;
427
428 ret = kstrtoul(buf, 16, &addr);
429 if (ret)
430 goto exit;
431
432 mdev->log_buf_len = (int *)addr;
433 ret = count;
434exit:
435 return ret;
436}
437static DEVICE_ATTR_RW(log_buf_len);
438
439static struct attribute *mic_default_attrs[] = {
440 &dev_attr_family.attr,
441 &dev_attr_stepping.attr,
442 &dev_attr_state.attr,
443 &dev_attr_shutdown_status.attr,
444 &dev_attr_cmdline.attr,
445 &dev_attr_firmware.attr,
446 &dev_attr_ramdisk.attr,
447 &dev_attr_bootmode.attr,
448 &dev_attr_log_buf_addr.attr,
449 &dev_attr_log_buf_len.attr,
450
451 NULL
452};
453
454ATTRIBUTE_GROUPS(mic_default);
455
456void mic_sysfs_init(struct mic_device *mdev)
457{
458 mdev->attr_group = mic_default_groups;
459}
diff --git a/drivers/misc/mic/host/mic_virtio.c b/drivers/misc/mic/host/mic_virtio.c
index cc08e9f733c9..58b107a24a8b 100644
--- a/drivers/misc/mic/host/mic_virtio.c
+++ b/drivers/misc/mic/host/mic_virtio.c
@@ -23,7 +23,6 @@
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/dmaengine.h> 24#include <linux/dmaengine.h>
25#include <linux/mic_common.h> 25#include <linux/mic_common.h>
26
27#include "../common/mic_dev.h" 26#include "../common/mic_dev.h"
28#include "mic_device.h" 27#include "mic_device.h"
29#include "mic_smpt.h" 28#include "mic_smpt.h"
@@ -62,7 +61,7 @@ static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst,
62 } 61 }
63error: 62error:
64 if (err) 63 if (err)
65 dev_err(mdev->sdev->parent, "%s %d err %d\n", 64 dev_err(&mdev->pdev->dev, "%s %d err %d\n",
66 __func__, __LINE__, err); 65 __func__, __LINE__, err);
67 return err; 66 return err;
68} 67}
@@ -440,7 +439,7 @@ void mic_virtio_reset_devices(struct mic_device *mdev)
440 struct list_head *pos, *tmp; 439 struct list_head *pos, *tmp;
441 struct mic_vdev *mvdev; 440 struct mic_vdev *mvdev;
442 441
443 dev_dbg(mdev->sdev->parent, "%s\n", __func__); 442 dev_dbg(&mdev->pdev->dev, "%s\n", __func__);
444 443
445 list_for_each_safe(pos, tmp, &mdev->vdev_list) { 444 list_for_each_safe(pos, tmp, &mdev->vdev_list) {
446 mvdev = list_entry(pos, struct mic_vdev, list); 445 mvdev = list_entry(pos, struct mic_vdev, list);
@@ -686,7 +685,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
686 mvr->head = USHRT_MAX; 685 mvr->head = USHRT_MAX;
687 mvr->mvdev = mvdev; 686 mvr->mvdev = mvdev;
688 mvr->vrh.notify = mic_notify; 687 mvr->vrh.notify = mic_notify;
689 dev_dbg(mdev->sdev->parent, 688 dev_dbg(&mdev->pdev->dev,
690 "%s %d index %d va %p info %p vr_size 0x%x\n", 689 "%s %d index %d va %p info %p vr_size 0x%x\n",
691 __func__, __LINE__, i, vr->va, vr->info, vr_size); 690 __func__, __LINE__, i, vr->va, vr->info, vr_size);
692 mvr->buf = (void *)__get_free_pages(GFP_KERNEL, 691 mvr->buf = (void *)__get_free_pages(GFP_KERNEL,
@@ -704,7 +703,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
704 mvdev->virtio_db, MIC_INTR_DB); 703 mvdev->virtio_db, MIC_INTR_DB);
705 if (IS_ERR(mvdev->virtio_cookie)) { 704 if (IS_ERR(mvdev->virtio_cookie)) {
706 ret = PTR_ERR(mvdev->virtio_cookie); 705 ret = PTR_ERR(mvdev->virtio_cookie);
707 dev_dbg(mdev->sdev->parent, "request irq failed\n"); 706 dev_dbg(&mdev->pdev->dev, "request irq failed\n");
708 goto err; 707 goto err;
709 } 708 }
710 709
@@ -720,7 +719,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev,
720 smp_wmb(); 719 smp_wmb();
721 dd->type = type; 720 dd->type = type;
722 721
723 dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type); 722 dev_dbg(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type);
724 723
725 db = bootparam->h2c_config_db; 724 db = bootparam->h2c_config_db;
726 if (db != -1) 725 if (db != -1)
@@ -755,7 +754,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev)
755 db = bootparam->h2c_config_db; 754 db = bootparam->h2c_config_db;
756 if (db == -1) 755 if (db == -1)
757 goto skip_hot_remove; 756 goto skip_hot_remove;
758 dev_dbg(mdev->sdev->parent, 757 dev_dbg(&mdev->pdev->dev,
759 "Requesting hot remove id %d\n", mvdev->virtio_id); 758 "Requesting hot remove id %d\n", mvdev->virtio_id);
760 mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; 759 mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
761 mdev->ops->send_intr(mdev, db); 760 mdev->ops->send_intr(mdev, db);
@@ -765,7 +764,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev)
765 if (ret) 764 if (ret)
766 break; 765 break;
767 } 766 }
768 dev_dbg(mdev->sdev->parent, 767 dev_dbg(&mdev->pdev->dev,
769 "Device id %d config_change %d guest_ack %d retry %d\n", 768 "Device id %d config_change %d guest_ack %d retry %d\n",
770 mvdev->virtio_id, mvdev->dc->config_change, 769 mvdev->virtio_id, mvdev->dc->config_change,
771 mvdev->dc->guest_ack, retry); 770 mvdev->dc->guest_ack, retry);
@@ -794,7 +793,7 @@ skip_hot_remove:
794 tmp_mvdev = list_entry(pos, struct mic_vdev, list); 793 tmp_mvdev = list_entry(pos, struct mic_vdev, list);
795 if (tmp_mvdev == mvdev) { 794 if (tmp_mvdev == mvdev) {
796 list_del(pos); 795 list_del(pos);
797 dev_dbg(mdev->sdev->parent, 796 dev_dbg(&mdev->pdev->dev,
798 "Removing virtio device id %d\n", 797 "Removing virtio device id %d\n",
799 mvdev->virtio_id); 798 mvdev->virtio_id);
800 break; 799 break;
diff --git a/drivers/misc/mic/host/mic_virtio.h b/drivers/misc/mic/host/mic_virtio.h
index d574efb853d9..a80631f2790d 100644
--- a/drivers/misc/mic/host/mic_virtio.h
+++ b/drivers/misc/mic/host/mic_virtio.h
@@ -124,7 +124,7 @@ void mic_bh_handler(struct work_struct *work);
124/* Helper API to obtain the MIC PCIe device */ 124/* Helper API to obtain the MIC PCIe device */
125static inline struct device *mic_dev(struct mic_vdev *mvdev) 125static inline struct device *mic_dev(struct mic_vdev *mvdev)
126{ 126{
127 return mvdev->mdev->sdev->parent; 127 return &mvdev->mdev->pdev->dev;
128} 128}
129 129
130/* Helper API to check if a virtio device is initialized */ 130/* Helper API to check if a virtio device is initialized */
diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c
index 3341e90dede4..8118ac48c764 100644
--- a/drivers/misc/mic/host/mic_x100.c
+++ b/drivers/misc/mic/host/mic_x100.c
@@ -43,7 +43,7 @@
43static void 43static void
44mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val) 44mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val)
45{ 45{
46 dev_dbg(mdev->sdev->parent, "Writing 0x%x to scratch pad index %d\n", 46 dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n",
47 val, idx); 47 val, idx);
48 mic_mmio_write(&mdev->mmio, val, 48 mic_mmio_write(&mdev->mmio, val,
49 MIC_X100_SBOX_BASE_ADDRESS + 49 MIC_X100_SBOX_BASE_ADDRESS +
@@ -66,7 +66,7 @@ mic_x100_read_spad(struct mic_device *mdev, unsigned int idx)
66 MIC_X100_SBOX_BASE_ADDRESS + 66 MIC_X100_SBOX_BASE_ADDRESS +
67 MIC_X100_SBOX_SPAD0 + idx * 4); 67 MIC_X100_SBOX_SPAD0 + idx * 4);
68 68
69 dev_dbg(mdev->sdev->parent, 69 dev_dbg(&mdev->pdev->dev,
70 "Reading 0x%x from scratch pad index %d\n", val, idx); 70 "Reading 0x%x from scratch pad index %d\n", val, idx);
71 return val; 71 return val;
72} 72}
@@ -126,7 +126,7 @@ static void mic_x100_disable_interrupts(struct mic_device *mdev)
126 * @mdev: pointer to mic_device instance 126 * @mdev: pointer to mic_device instance
127 */ 127 */
128static void mic_x100_send_sbox_intr(struct mic_device *mdev, 128static void mic_x100_send_sbox_intr(struct mic_device *mdev,
129 int doorbell) 129 int doorbell)
130{ 130{
131 struct mic_mw *mw = &mdev->mmio; 131 struct mic_mw *mw = &mdev->mmio;
132 u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8; 132 u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8;
@@ -147,7 +147,7 @@ static void mic_x100_send_sbox_intr(struct mic_device *mdev,
147 * @mdev: pointer to mic_device instance 147 * @mdev: pointer to mic_device instance
148 */ 148 */
149static void mic_x100_send_rdmasr_intr(struct mic_device *mdev, 149static void mic_x100_send_rdmasr_intr(struct mic_device *mdev,
150 int doorbell) 150 int doorbell)
151{ 151{
152 int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2); 152 int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2);
153 /* Ensure that the interrupt is ordered w.r.t. previous stores. */ 153 /* Ensure that the interrupt is ordered w.r.t. previous stores. */
@@ -359,15 +359,14 @@ mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw)
359 359
360 boot_mem = mdev->aper.len >> 20; 360 boot_mem = mdev->aper.len >> 20;
361 buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL); 361 buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL);
362 if (!buf) { 362 if (!buf)
363 dev_err(mdev->sdev->parent,
364 "%s %d allocation failed\n", __func__, __LINE__);
365 return -ENOMEM; 363 return -ENOMEM;
366 } 364
367 len += snprintf(buf, CMDLINE_SIZE - len, 365 len += snprintf(buf, CMDLINE_SIZE - len,
368 " mem=%dM", boot_mem); 366 " mem=%dM", boot_mem);
369 if (mdev->cmdline) 367 if (mdev->cosm_dev->cmdline)
370 snprintf(buf + len, CMDLINE_SIZE - len, " %s", mdev->cmdline); 368 snprintf(buf + len, CMDLINE_SIZE - len, " %s",
369 mdev->cosm_dev->cmdline);
371 memcpy_toio(cmd_line_va, buf, strlen(buf) + 1); 370 memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
372 kfree(buf); 371 kfree(buf);
373 return 0; 372 return 0;
@@ -386,12 +385,11 @@ mic_x100_load_ramdisk(struct mic_device *mdev)
386 int rc; 385 int rc;
387 struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr; 386 struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr;
388 387
389 rc = request_firmware(&fw, 388 rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev);
390 mdev->ramdisk, mdev->sdev->parent);
391 if (rc < 0) { 389 if (rc < 0) {
392 dev_err(mdev->sdev->parent, 390 dev_err(&mdev->pdev->dev,
393 "ramdisk request_firmware failed: %d %s\n", 391 "ramdisk request_firmware failed: %d %s\n",
394 rc, mdev->ramdisk); 392 rc, mdev->cosm_dev->ramdisk);
395 goto error; 393 goto error;
396 } 394 }
397 /* 395 /*
@@ -423,10 +421,10 @@ mic_x100_get_boot_addr(struct mic_device *mdev)
423 421
424 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); 422 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO);
425 boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2); 423 boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2);
426 dev_dbg(mdev->sdev->parent, "%s %d boot_addr 0x%x\n", 424 dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n",
427 __func__, __LINE__, boot_addr); 425 __func__, __LINE__, boot_addr);
428 if (boot_addr > (1 << 31)) { 426 if (boot_addr > (1 << 31)) {
429 dev_err(mdev->sdev->parent, 427 dev_err(&mdev->pdev->dev,
430 "incorrect bootaddr 0x%x\n", 428 "incorrect bootaddr 0x%x\n",
431 boot_addr); 429 boot_addr);
432 rc = -EINVAL; 430 rc = -EINVAL;
@@ -454,37 +452,37 @@ mic_x100_load_firmware(struct mic_device *mdev, const char *buf)
454 if (rc) 452 if (rc)
455 goto error; 453 goto error;
456 /* load OS */ 454 /* load OS */
457 rc = request_firmware(&fw, mdev->firmware, mdev->sdev->parent); 455 rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev);
458 if (rc < 0) { 456 if (rc < 0) {
459 dev_err(mdev->sdev->parent, 457 dev_err(&mdev->pdev->dev,
460 "ramdisk request_firmware failed: %d %s\n", 458 "ramdisk request_firmware failed: %d %s\n",
461 rc, mdev->firmware); 459 rc, mdev->cosm_dev->firmware);
462 goto error; 460 goto error;
463 } 461 }
464 if (mdev->bootaddr > mdev->aper.len - fw->size) { 462 if (mdev->bootaddr > mdev->aper.len - fw->size) {
465 rc = -EINVAL; 463 rc = -EINVAL;
466 dev_err(mdev->sdev->parent, "%s %d rc %d bootaddr 0x%x\n", 464 dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n",
467 __func__, __LINE__, rc, mdev->bootaddr); 465 __func__, __LINE__, rc, mdev->bootaddr);
468 release_firmware(fw); 466 release_firmware(fw);
469 goto error; 467 goto error;
470 } 468 }
471 memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size); 469 memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size);
472 mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size); 470 mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size);
473 if (!strcmp(mdev->bootmode, "elf")) 471 if (!strcmp(mdev->cosm_dev->bootmode, "flash"))
474 goto done; 472 goto done;
475 /* load command line */ 473 /* load command line */
476 rc = mic_x100_load_command_line(mdev, fw); 474 rc = mic_x100_load_command_line(mdev, fw);
477 if (rc) { 475 if (rc) {
478 dev_err(mdev->sdev->parent, "%s %d rc %d\n", 476 dev_err(&mdev->pdev->dev, "%s %d rc %d\n",
479 __func__, __LINE__, rc); 477 __func__, __LINE__, rc);
480 goto error; 478 goto error;
481 } 479 }
482 release_firmware(fw); 480 release_firmware(fw);
483 /* load ramdisk */ 481 /* load ramdisk */
484 if (mdev->ramdisk) 482 if (mdev->cosm_dev->ramdisk)
485 rc = mic_x100_load_ramdisk(mdev); 483 rc = mic_x100_load_ramdisk(mdev);
486error: 484error:
487 dev_dbg(mdev->sdev->parent, "%s %d rc %d\n", __func__, __LINE__, rc); 485 dev_dbg(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc);
488done: 486done:
489 return rc; 487 return rc;
490} 488}
diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile
index bf10bb7e2b91..29cfc3e51ac9 100644
--- a/drivers/misc/mic/scif/Makefile
+++ b/drivers/misc/mic/scif/Makefile
@@ -13,3 +13,8 @@ scif-objs += scif_epd.o
13scif-objs += scif_rb.o 13scif-objs += scif_rb.o
14scif-objs += scif_nodeqp.o 14scif-objs += scif_nodeqp.o
15scif-objs += scif_nm.o 15scif-objs += scif_nm.o
16scif-objs += scif_dma.o
17scif-objs += scif_fence.o
18scif-objs += scif_mmap.o
19scif-objs += scif_rma.o
20scif-objs += scif_rma_list.o
diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c
index f39d3135a9ef..ddc9e4b08b5c 100644
--- a/drivers/misc/mic/scif/scif_api.c
+++ b/drivers/misc/mic/scif/scif_api.c
@@ -37,9 +37,21 @@ enum conn_async_state {
37 ASYNC_CONN_FLUSH_WORK /* async work flush in progress */ 37 ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
38}; 38};
39 39
40/*
41 * File operations for anonymous inode file associated with a SCIF endpoint,
42 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
43 * poll API in the kernel and these take in a struct file *. Since a struct
44 * file is not available to kernel mode SCIF, it uses an anonymous file for
45 * this purpose.
46 */
47const struct file_operations scif_anon_fops = {
48 .owner = THIS_MODULE,
49};
50
40scif_epd_t scif_open(void) 51scif_epd_t scif_open(void)
41{ 52{
42 struct scif_endpt *ep; 53 struct scif_endpt *ep;
54 int err;
43 55
44 might_sleep(); 56 might_sleep();
45 ep = kzalloc(sizeof(*ep), GFP_KERNEL); 57 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
@@ -50,15 +62,22 @@ scif_epd_t scif_open(void)
50 if (!ep->qp_info.qp) 62 if (!ep->qp_info.qp)
51 goto err_qp_alloc; 63 goto err_qp_alloc;
52 64
65 err = scif_anon_inode_getfile(ep);
66 if (err)
67 goto err_anon_inode;
68
53 spin_lock_init(&ep->lock); 69 spin_lock_init(&ep->lock);
54 mutex_init(&ep->sendlock); 70 mutex_init(&ep->sendlock);
55 mutex_init(&ep->recvlock); 71 mutex_init(&ep->recvlock);
56 72
73 scif_rma_ep_init(ep);
57 ep->state = SCIFEP_UNBOUND; 74 ep->state = SCIFEP_UNBOUND;
58 dev_dbg(scif_info.mdev.this_device, 75 dev_dbg(scif_info.mdev.this_device,
59 "SCIFAPI open: ep %p success\n", ep); 76 "SCIFAPI open: ep %p success\n", ep);
60 return ep; 77 return ep;
61 78
79err_anon_inode:
80 kfree(ep->qp_info.qp);
62err_qp_alloc: 81err_qp_alloc:
63 kfree(ep); 82 kfree(ep);
64err_ep_alloc: 83err_ep_alloc:
@@ -166,8 +185,11 @@ int scif_close(scif_epd_t epd)
166 185
167 switch (oldstate) { 186 switch (oldstate) {
168 case SCIFEP_ZOMBIE: 187 case SCIFEP_ZOMBIE:
188 dev_err(scif_info.mdev.this_device,
189 "SCIFAPI close: zombie state unexpected\n");
169 case SCIFEP_DISCONNECTED: 190 case SCIFEP_DISCONNECTED:
170 spin_unlock(&ep->lock); 191 spin_unlock(&ep->lock);
192 scif_unregister_all_windows(epd);
171 /* Remove from the disconnected list */ 193 /* Remove from the disconnected list */
172 mutex_lock(&scif_info.connlock); 194 mutex_lock(&scif_info.connlock);
173 list_for_each_safe(pos, tmpq, &scif_info.disconnected) { 195 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
@@ -189,6 +211,7 @@ int scif_close(scif_epd_t epd)
189 case SCIFEP_CLOSING: 211 case SCIFEP_CLOSING:
190 { 212 {
191 spin_unlock(&ep->lock); 213 spin_unlock(&ep->lock);
214 scif_unregister_all_windows(epd);
192 scif_disconnect_ep(ep); 215 scif_disconnect_ep(ep);
193 break; 216 break;
194 } 217 }
@@ -200,7 +223,7 @@ int scif_close(scif_epd_t epd)
200 struct scif_endpt *aep; 223 struct scif_endpt *aep;
201 224
202 spin_unlock(&ep->lock); 225 spin_unlock(&ep->lock);
203 spin_lock(&scif_info.eplock); 226 mutex_lock(&scif_info.eplock);
204 227
205 /* remove from listen list */ 228 /* remove from listen list */
206 list_for_each_safe(pos, tmpq, &scif_info.listen) { 229 list_for_each_safe(pos, tmpq, &scif_info.listen) {
@@ -222,7 +245,7 @@ int scif_close(scif_epd_t epd)
222 break; 245 break;
223 } 246 }
224 } 247 }
225 spin_unlock(&scif_info.eplock); 248 mutex_unlock(&scif_info.eplock);
226 mutex_lock(&scif_info.connlock); 249 mutex_lock(&scif_info.connlock);
227 list_for_each_safe(pos, tmpq, &scif_info.connected) { 250 list_for_each_safe(pos, tmpq, &scif_info.connected) {
228 tmpep = list_entry(pos, 251 tmpep = list_entry(pos,
@@ -242,13 +265,13 @@ int scif_close(scif_epd_t epd)
242 } 265 }
243 mutex_unlock(&scif_info.connlock); 266 mutex_unlock(&scif_info.connlock);
244 scif_teardown_ep(aep); 267 scif_teardown_ep(aep);
245 spin_lock(&scif_info.eplock); 268 mutex_lock(&scif_info.eplock);
246 scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD); 269 scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
247 ep->acceptcnt--; 270 ep->acceptcnt--;
248 } 271 }
249 272
250 spin_lock(&ep->lock); 273 spin_lock(&ep->lock);
251 spin_unlock(&scif_info.eplock); 274 mutex_unlock(&scif_info.eplock);
252 275
253 /* Remove and reject any pending connection requests. */ 276 /* Remove and reject any pending connection requests. */
254 while (ep->conreqcnt) { 277 while (ep->conreqcnt) {
@@ -279,6 +302,7 @@ int scif_close(scif_epd_t epd)
279 } 302 }
280 } 303 }
281 scif_put_port(ep->port.port); 304 scif_put_port(ep->port.port);
305 scif_anon_inode_fput(ep);
282 scif_teardown_ep(ep); 306 scif_teardown_ep(ep);
283 scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD); 307 scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
284 return 0; 308 return 0;
@@ -409,9 +433,9 @@ int scif_listen(scif_epd_t epd, int backlog)
409 scif_teardown_ep(ep); 433 scif_teardown_ep(ep);
410 ep->qp_info.qp = NULL; 434 ep->qp_info.qp = NULL;
411 435
412 spin_lock(&scif_info.eplock); 436 mutex_lock(&scif_info.eplock);
413 list_add_tail(&ep->list, &scif_info.listen); 437 list_add_tail(&ep->list, &scif_info.listen);
414 spin_unlock(&scif_info.eplock); 438 mutex_unlock(&scif_info.eplock);
415 return 0; 439 return 0;
416} 440}
417EXPORT_SYMBOL_GPL(scif_listen); 441EXPORT_SYMBOL_GPL(scif_listen);
@@ -450,6 +474,13 @@ static int scif_conn_func(struct scif_endpt *ep)
450 struct scifmsg msg; 474 struct scifmsg msg;
451 struct device *spdev; 475 struct device *spdev;
452 476
477 err = scif_reserve_dma_chan(ep);
478 if (err) {
479 dev_err(&ep->remote_dev->sdev->dev,
480 "%s %d err %d\n", __func__, __LINE__, err);
481 ep->state = SCIFEP_BOUND;
482 goto connect_error_simple;
483 }
453 /* Initiate the first part of the endpoint QP setup */ 484 /* Initiate the first part of the endpoint QP setup */
454 err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset, 485 err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
455 SCIF_ENDPT_QP_SIZE, ep->remote_dev); 486 SCIF_ENDPT_QP_SIZE, ep->remote_dev);
@@ -558,8 +589,10 @@ void scif_conn_handler(struct work_struct *work)
558 list_del(&ep->conn_list); 589 list_del(&ep->conn_list);
559 } 590 }
560 spin_unlock(&scif_info.nb_connect_lock); 591 spin_unlock(&scif_info.nb_connect_lock);
561 if (ep) 592 if (ep) {
562 ep->conn_err = scif_conn_func(ep); 593 ep->conn_err = scif_conn_func(ep);
594 wake_up_interruptible(&ep->conn_pend_wq);
595 }
563 } while (ep); 596 } while (ep);
564} 597}
565 598
@@ -660,6 +693,7 @@ int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
660 ep->remote_dev = &scif_dev[dst->node]; 693 ep->remote_dev = &scif_dev[dst->node];
661 ep->qp_info.qp->magic = SCIFEP_MAGIC; 694 ep->qp_info.qp->magic = SCIFEP_MAGIC;
662 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { 695 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
696 init_waitqueue_head(&ep->conn_pend_wq);
663 spin_lock(&scif_info.nb_connect_lock); 697 spin_lock(&scif_info.nb_connect_lock);
664 list_add_tail(&ep->conn_list, &scif_info.nb_connect_list); 698 list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
665 spin_unlock(&scif_info.nb_connect_lock); 699 spin_unlock(&scif_info.nb_connect_lock);
@@ -782,12 +816,25 @@ retry_connection:
782 cep->remote_dev = &scif_dev[peer->node]; 816 cep->remote_dev = &scif_dev[peer->node];
783 cep->remote_ep = conreq->msg.payload[0]; 817 cep->remote_ep = conreq->msg.payload[0];
784 818
819 scif_rma_ep_init(cep);
820
821 err = scif_reserve_dma_chan(cep);
822 if (err) {
823 dev_err(scif_info.mdev.this_device,
824 "%s %d err %d\n", __func__, __LINE__, err);
825 goto scif_accept_error_qpalloc;
826 }
827
785 cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL); 828 cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
786 if (!cep->qp_info.qp) { 829 if (!cep->qp_info.qp) {
787 err = -ENOMEM; 830 err = -ENOMEM;
788 goto scif_accept_error_qpalloc; 831 goto scif_accept_error_qpalloc;
789 } 832 }
790 833
834 err = scif_anon_inode_getfile(cep);
835 if (err)
836 goto scif_accept_error_anon_inode;
837
791 cep->qp_info.qp->magic = SCIFEP_MAGIC; 838 cep->qp_info.qp->magic = SCIFEP_MAGIC;
792 spdev = scif_get_peer_dev(cep->remote_dev); 839 spdev = scif_get_peer_dev(cep->remote_dev);
793 if (IS_ERR(spdev)) { 840 if (IS_ERR(spdev)) {
@@ -858,6 +905,8 @@ retry:
858 spin_unlock(&cep->lock); 905 spin_unlock(&cep->lock);
859 return 0; 906 return 0;
860scif_accept_error_map: 907scif_accept_error_map:
908 scif_anon_inode_fput(cep);
909scif_accept_error_anon_inode:
861 scif_teardown_ep(cep); 910 scif_teardown_ep(cep);
862scif_accept_error_qpalloc: 911scif_accept_error_qpalloc:
863 kfree(cep); 912 kfree(cep);
@@ -1247,6 +1296,134 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1247} 1296}
1248EXPORT_SYMBOL_GPL(scif_recv); 1297EXPORT_SYMBOL_GPL(scif_recv);
1249 1298
1299static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1300 poll_table *p, struct scif_endpt *ep)
1301{
1302 /*
1303 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1304 * and regrab it afterwards. Because the endpoint state might have
1305 * changed while the lock was given up, the state must be checked
1306 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1307 * does this.
1308 */
1309 spin_unlock(&ep->lock);
1310 poll_wait(f, wq, p);
1311 spin_lock(&ep->lock);
1312}
1313
1314unsigned int
1315__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1316{
1317 unsigned int mask = 0;
1318
1319 dev_dbg(scif_info.mdev.this_device,
1320 "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1321
1322 spin_lock(&ep->lock);
1323
1324 /* Endpoint is waiting for a non-blocking connect to complete */
1325 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1326 _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1327 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1328 if (ep->state == SCIFEP_CONNECTED ||
1329 ep->state == SCIFEP_DISCONNECTED ||
1330 ep->conn_err)
1331 mask |= POLLOUT;
1332 goto exit;
1333 }
1334 }
1335
1336 /* Endpoint is listening for incoming connection requests */
1337 if (ep->state == SCIFEP_LISTENING) {
1338 _scif_poll_wait(f, &ep->conwq, wait, ep);
1339 if (ep->state == SCIFEP_LISTENING) {
1340 if (ep->conreqcnt)
1341 mask |= POLLIN;
1342 goto exit;
1343 }
1344 }
1345
1346 /* Endpoint is connected or disconnected */
1347 if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1348 if (poll_requested_events(wait) & POLLIN)
1349 _scif_poll_wait(f, &ep->recvwq, wait, ep);
1350 if (poll_requested_events(wait) & POLLOUT)
1351 _scif_poll_wait(f, &ep->sendwq, wait, ep);
1352 if (ep->state == SCIFEP_CONNECTED ||
1353 ep->state == SCIFEP_DISCONNECTED) {
1354 /* Data can be read without blocking */
1355 if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1356 mask |= POLLIN;
1357 /* Data can be written without blocking */
1358 if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1359 mask |= POLLOUT;
1360 /* Return POLLHUP if endpoint is disconnected */
1361 if (ep->state == SCIFEP_DISCONNECTED)
1362 mask |= POLLHUP;
1363 goto exit;
1364 }
1365 }
1366
1367 /* Return POLLERR if the endpoint is in none of the above states */
1368 mask |= POLLERR;
1369exit:
1370 spin_unlock(&ep->lock);
1371 return mask;
1372}
1373
1374/**
1375 * scif_poll() - Kernel mode SCIF poll
1376 * @ufds: Array of scif_pollepd structures containing the end points
1377 * and events to poll on
1378 * @nfds: Size of the ufds array
1379 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1380 *
1381 * The code flow in this function is based on do_poll(..) in select.c
1382 *
1383 * Returns the number of endpoints which have pending events or 0 in
1384 * the event of a timeout. If a signal is used for wake up, -EINTR is
1385 * returned.
1386 */
1387int
1388scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1389{
1390 struct poll_wqueues table;
1391 poll_table *pt;
1392 int i, mask, count = 0, timed_out = timeout_msecs == 0;
1393 u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1394 : msecs_to_jiffies(timeout_msecs);
1395
1396 poll_initwait(&table);
1397 pt = &table.pt;
1398 while (1) {
1399 for (i = 0; i < nfds; i++) {
1400 pt->_key = ufds[i].events | POLLERR | POLLHUP;
1401 mask = __scif_pollfd(ufds[i].epd->anon,
1402 pt, ufds[i].epd);
1403 mask &= ufds[i].events | POLLERR | POLLHUP;
1404 if (mask) {
1405 count++;
1406 pt->_qproc = NULL;
1407 }
1408 ufds[i].revents = mask;
1409 }
1410 pt->_qproc = NULL;
1411 if (!count) {
1412 count = table.error;
1413 if (signal_pending(current))
1414 count = -EINTR;
1415 }
1416 if (count || timed_out)
1417 break;
1418
1419 if (!schedule_timeout_interruptible(timeout))
1420 timed_out = 1;
1421 }
1422 poll_freewait(&table);
1423 return count;
1424}
1425EXPORT_SYMBOL_GPL(scif_poll);
1426
1250int scif_get_node_ids(u16 *nodes, int len, u16 *self) 1427int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1251{ 1428{
1252 int online = 0; 1429 int online = 0;
@@ -1274,3 +1451,46 @@ int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1274 return online; 1451 return online;
1275} 1452}
1276EXPORT_SYMBOL_GPL(scif_get_node_ids); 1453EXPORT_SYMBOL_GPL(scif_get_node_ids);
1454
1455static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1456{
1457 struct scif_client *client =
1458 container_of(si, struct scif_client, si);
1459 struct scif_peer_dev *spdev =
1460 container_of(dev, struct scif_peer_dev, dev);
1461
1462 if (client->probe)
1463 client->probe(spdev);
1464 return 0;
1465}
1466
1467static void scif_remove_client_dev(struct device *dev,
1468 struct subsys_interface *si)
1469{
1470 struct scif_client *client =
1471 container_of(si, struct scif_client, si);
1472 struct scif_peer_dev *spdev =
1473 container_of(dev, struct scif_peer_dev, dev);
1474
1475 if (client->remove)
1476 client->remove(spdev);
1477}
1478
1479void scif_client_unregister(struct scif_client *client)
1480{
1481 subsys_interface_unregister(&client->si);
1482}
1483EXPORT_SYMBOL_GPL(scif_client_unregister);
1484
1485int scif_client_register(struct scif_client *client)
1486{
1487 struct subsys_interface *si = &client->si;
1488
1489 si->name = client->name;
1490 si->subsys = &scif_peer_bus;
1491 si->add_dev = scif_add_client_dev;
1492 si->remove_dev = scif_remove_client_dev;
1493
1494 return subsys_interface_register(&client->si);
1495}
1496EXPORT_SYMBOL_GPL(scif_client_register);
diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c
index 51f14e2a1196..6884dad97e17 100644
--- a/drivers/misc/mic/scif/scif_debugfs.c
+++ b/drivers/misc/mic/scif/scif_debugfs.c
@@ -62,10 +62,87 @@ static const struct file_operations scif_dev_ops = {
62 .release = scif_dev_test_release 62 .release = scif_dev_test_release
63}; 63};
64 64
65void __init scif_init_debugfs(void) 65static void scif_display_window(struct scif_window *window, struct seq_file *s)
66{
67 int j;
68 struct scatterlist *sg;
69 scif_pinned_pages_t pin = window->pinned_pages;
70
71 seq_printf(s, "window %p type %d temp %d offset 0x%llx ",
72 window, window->type, window->temp, window->offset);
73 seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ",
74 window->nr_pages, window->nr_contig_chunks, window->prot);
75 seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ",
76 window->ref_count, window->magic, window->peer_window);
77 seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n",
78 window->unreg_state, window->va_for_temp);
79
80 for (j = 0; j < window->nr_contig_chunks; j++)
81 seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j,
82 window->dma_addr[j], window->num_pages[j]);
83
84 if (window->type == SCIF_WINDOW_SELF && pin)
85 for (j = 0; j < window->nr_pages; j++)
86 seq_printf(s, "page[%d] = pinned_pages %p address %p\n",
87 j, pin->pages[j],
88 page_address(pin->pages[j]));
89
90 if (window->st)
91 for_each_sg(window->st->sgl, sg, window->st->nents, j)
92 seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n",
93 j, sg_dma_address(sg), sg_dma_len(sg));
94}
95
96static void scif_display_all_windows(struct list_head *head, struct seq_file *s)
66{ 97{
67 struct dentry *d; 98 struct list_head *item;
99 struct scif_window *window;
68 100
101 list_for_each(item, head) {
102 window = list_entry(item, struct scif_window, list);
103 scif_display_window(window, s);
104 }
105}
106
107static int scif_rma_test(struct seq_file *s, void *unused)
108{
109 struct scif_endpt *ep;
110 struct list_head *pos;
111
112 mutex_lock(&scif_info.connlock);
113 list_for_each(pos, &scif_info.connected) {
114 ep = list_entry(pos, struct scif_endpt, list);
115 seq_printf(s, "ep %p self windows\n", ep);
116 mutex_lock(&ep->rma_info.rma_lock);
117 scif_display_all_windows(&ep->rma_info.reg_list, s);
118 seq_printf(s, "ep %p remote windows\n", ep);
119 scif_display_all_windows(&ep->rma_info.remote_reg_list, s);
120 mutex_unlock(&ep->rma_info.rma_lock);
121 }
122 mutex_unlock(&scif_info.connlock);
123 return 0;
124}
125
126static int scif_rma_test_open(struct inode *inode, struct file *file)
127{
128 return single_open(file, scif_rma_test, inode->i_private);
129}
130
131static int scif_rma_test_release(struct inode *inode, struct file *file)
132{
133 return single_release(inode, file);
134}
135
136static const struct file_operations scif_rma_ops = {
137 .owner = THIS_MODULE,
138 .open = scif_rma_test_open,
139 .read = seq_read,
140 .llseek = seq_lseek,
141 .release = scif_rma_test_release
142};
143
144void __init scif_init_debugfs(void)
145{
69 scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); 146 scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
70 if (!scif_dbg) { 147 if (!scif_dbg) {
71 dev_err(scif_info.mdev.this_device, 148 dev_err(scif_info.mdev.this_device,
@@ -73,8 +150,8 @@ void __init scif_init_debugfs(void)
73 return; 150 return;
74 } 151 }
75 152
76 d = debugfs_create_file("scif_dev", 0444, scif_dbg, 153 debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops);
77 NULL, &scif_dev_ops); 154 debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops);
78 debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log); 155 debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log);
79 debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable); 156 debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable);
80} 157}
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
new file mode 100644
index 000000000000..95a13c629a8e
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -0,0 +1,1979 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18#include "scif_main.h"
19#include "scif_map.h"
20
21/*
22 * struct scif_dma_comp_cb - SCIF DMA completion callback
23 *
24 * @dma_completion_func: DMA completion callback
25 * @cb_cookie: DMA completion callback cookie
26 * @temp_buf: Temporary buffer
27 * @temp_buf_to_free: Temporary buffer to be freed
28 * @is_cache: Is a kmem_cache allocated buffer
29 * @dst_offset: Destination registration offset
30 * @dst_window: Destination registration window
31 * @len: Length of the temp buffer
32 * @temp_phys: DMA address of the temp buffer
33 * @sdev: The SCIF device
34 * @header_padding: padding for cache line alignment
35 */
36struct scif_dma_comp_cb {
37 void (*dma_completion_func)(void *cookie);
38 void *cb_cookie;
39 u8 *temp_buf;
40 u8 *temp_buf_to_free;
41 bool is_cache;
42 s64 dst_offset;
43 struct scif_window *dst_window;
44 size_t len;
45 dma_addr_t temp_phys;
46 struct scif_dev *sdev;
47 int header_padding;
48};
49
50/**
51 * struct scif_copy_work - Work for DMA copy
52 *
53 * @src_offset: Starting source offset
54 * @dst_offset: Starting destination offset
55 * @src_window: Starting src registered window
56 * @dst_window: Starting dst registered window
57 * @loopback: true if this is a loopback DMA transfer
58 * @len: Length of the transfer
59 * @comp_cb: DMA copy completion callback
60 * @remote_dev: The remote SCIF peer device
61 * @fence_type: polling or interrupt based
62 * @ordered: is this a tail byte ordered DMA transfer
63 */
64struct scif_copy_work {
65 s64 src_offset;
66 s64 dst_offset;
67 struct scif_window *src_window;
68 struct scif_window *dst_window;
69 int loopback;
70 size_t len;
71 struct scif_dma_comp_cb *comp_cb;
72 struct scif_dev *remote_dev;
73 int fence_type;
74 bool ordered;
75};
76
77#ifndef list_entry_next
78#define list_entry_next(pos, member) \
79 list_entry(pos->member.next, typeof(*pos), member)
80#endif
81
82/**
83 * scif_reserve_dma_chan:
84 * @ep: Endpoint Descriptor.
85 *
86 * This routine reserves a DMA channel for a particular
87 * endpoint. All DMA transfers for an endpoint are always
88 * programmed on the same DMA channel.
89 */
90int scif_reserve_dma_chan(struct scif_endpt *ep)
91{
92 int err = 0;
93 struct scif_dev *scifdev;
94 struct scif_hw_dev *sdev;
95 struct dma_chan *chan;
96
97 /* Loopback DMAs are not supported on the management node */
98 if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
99 return 0;
100 if (scif_info.nodeid)
101 scifdev = &scif_dev[0];
102 else
103 scifdev = ep->remote_dev;
104 sdev = scifdev->sdev;
105 if (!sdev->num_dma_ch)
106 return -ENODEV;
107 chan = sdev->dma_ch[scifdev->dma_ch_idx];
108 scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
109 mutex_lock(&ep->rma_info.rma_lock);
110 ep->rma_info.dma_chan = chan;
111 mutex_unlock(&ep->rma_info.rma_lock);
112 return err;
113}
114
115#ifdef CONFIG_MMU_NOTIFIER
116/**
117 * scif_rma_destroy_tcw:
118 *
119 * This routine destroys temporary cached windows
120 */
121static
122void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
123 struct scif_endpt *ep,
124 u64 start, u64 len)
125{
126 struct list_head *item, *tmp;
127 struct scif_window *window;
128 u64 start_va, end_va;
129 u64 end = start + len;
130
131 if (end <= start)
132 return;
133
134 list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
135 window = list_entry(item, struct scif_window, list);
136 ep = (struct scif_endpt *)window->ep;
137 if (!len)
138 break;
139 start_va = window->va_for_temp;
140 end_va = start_va + (window->nr_pages << PAGE_SHIFT);
141 if (start < start_va && end <= start_va)
142 break;
143 if (start >= end_va)
144 continue;
145 __scif_rma_destroy_tcw_helper(window);
146 }
147}
148
149static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
150{
151 struct scif_endpt *ep = mmn->ep;
152
153 spin_lock(&ep->rma_info.tc_lock);
154 __scif_rma_destroy_tcw(mmn, ep, start, len);
155 spin_unlock(&ep->rma_info.tc_lock);
156}
157
158static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
159{
160 struct list_head *item, *tmp;
161 struct scif_mmu_notif *mmn;
162
163 list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
164 mmn = list_entry(item, struct scif_mmu_notif, list);
165 scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
166 }
167}
168
169static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
170{
171 struct list_head *item, *tmp;
172 struct scif_mmu_notif *mmn;
173
174 spin_lock(&ep->rma_info.tc_lock);
175 list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
176 mmn = list_entry(item, struct scif_mmu_notif, list);
177 __scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
178 }
179 spin_unlock(&ep->rma_info.tc_lock);
180}
181
182static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
183{
184 if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
185 return false;
186 if ((atomic_read(&ep->rma_info.tcw_total_pages)
187 + (cur_bytes >> PAGE_SHIFT)) >
188 scif_info.rma_tc_limit) {
189 dev_info(scif_info.mdev.this_device,
190 "%s %d total=%d, current=%zu reached max\n",
191 __func__, __LINE__,
192 atomic_read(&ep->rma_info.tcw_total_pages),
193 (1 + (cur_bytes >> PAGE_SHIFT)));
194 scif_rma_destroy_tcw_invalid();
195 __scif_rma_destroy_tcw_ep(ep);
196 }
197 return true;
198}
199
200static void scif_mmu_notifier_release(struct mmu_notifier *mn,
201 struct mm_struct *mm)
202{
203 struct scif_mmu_notif *mmn;
204
205 mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
206 scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
207 schedule_work(&scif_info.misc_work);
208}
209
210static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
211 struct mm_struct *mm,
212 unsigned long address)
213{
214 struct scif_mmu_notif *mmn;
215
216 mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
217 scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
218}
219
220static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
221 struct mm_struct *mm,
222 unsigned long start,
223 unsigned long end)
224{
225 struct scif_mmu_notif *mmn;
226
227 mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
228 scif_rma_destroy_tcw(mmn, start, end - start);
229}
230
231static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
232 struct mm_struct *mm,
233 unsigned long start,
234 unsigned long end)
235{
236 /*
237 * Nothing to do here, everything needed was done in
238 * invalidate_range_start.
239 */
240}
241
242static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
243 .release = scif_mmu_notifier_release,
244 .clear_flush_young = NULL,
245 .invalidate_page = scif_mmu_notifier_invalidate_page,
246 .invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
247 .invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
248
249static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
250{
251 struct scif_endpt_rma_info *rma = &ep->rma_info;
252 struct scif_mmu_notif *mmn = NULL;
253 struct list_head *item, *tmp;
254
255 mutex_lock(&ep->rma_info.mmn_lock);
256 list_for_each_safe(item, tmp, &rma->mmn_list) {
257 mmn = list_entry(item, struct scif_mmu_notif, list);
258 mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
259 list_del(item);
260 kfree(mmn);
261 }
262 mutex_unlock(&ep->rma_info.mmn_lock);
263}
264
265static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
266 struct mm_struct *mm, struct scif_endpt *ep)
267{
268 mmn->ep = ep;
269 mmn->mm = mm;
270 mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
271 INIT_LIST_HEAD(&mmn->list);
272 INIT_LIST_HEAD(&mmn->tc_reg_list);
273}
274
275static struct scif_mmu_notif *
276scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
277{
278 struct scif_mmu_notif *mmn;
279 struct list_head *item;
280
281 list_for_each(item, &rma->mmn_list) {
282 mmn = list_entry(item, struct scif_mmu_notif, list);
283 if (mmn->mm == mm)
284 return mmn;
285 }
286 return NULL;
287}
288
289static struct scif_mmu_notif *
290scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
291{
292 struct scif_mmu_notif *mmn
293 = kzalloc(sizeof(*mmn), GFP_KERNEL);
294
295 if (!mmn)
296 return ERR_PTR(ENOMEM);
297
298 scif_init_mmu_notifier(mmn, current->mm, ep);
299 if (mmu_notifier_register(&mmn->ep_mmu_notifier,
300 current->mm)) {
301 kfree(mmn);
302 return ERR_PTR(EBUSY);
303 }
304 list_add(&mmn->list, &ep->rma_info.mmn_list);
305 return mmn;
306}
307
308/*
309 * Called from the misc thread to destroy temporary cached windows and
310 * unregister the MMU notifier for the SCIF endpoint.
311 */
312void scif_mmu_notif_handler(struct work_struct *work)
313{
314 struct list_head *pos, *tmpq;
315 struct scif_endpt *ep;
316restart:
317 scif_rma_destroy_tcw_invalid();
318 spin_lock(&scif_info.rmalock);
319 list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
320 ep = list_entry(pos, struct scif_endpt, mmu_list);
321 list_del(&ep->mmu_list);
322 spin_unlock(&scif_info.rmalock);
323 scif_rma_destroy_tcw_ep(ep);
324 scif_ep_unregister_mmu_notifier(ep);
325 goto restart;
326 }
327 spin_unlock(&scif_info.rmalock);
328}
329
330static bool scif_is_set_reg_cache(int flags)
331{
332 return !!(flags & SCIF_RMA_USECACHE);
333}
334#else
335static struct scif_mmu_notif *
336scif_find_mmu_notifier(struct mm_struct *mm,
337 struct scif_endpt_rma_info *rma)
338{
339 return NULL;
340}
341
342static struct scif_mmu_notif *
343scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
344{
345 return NULL;
346}
347
348void scif_mmu_notif_handler(struct work_struct *work)
349{
350}
351
352static bool scif_is_set_reg_cache(int flags)
353{
354 return false;
355}
356
357static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
358{
359 return false;
360}
361#endif
362
363/**
364 * scif_register_temp:
365 * @epd: End Point Descriptor.
366 * @addr: virtual address to/from which to copy
367 * @len: length of range to copy
368 * @out_offset: computed offset returned by reference.
369 * @out_window: allocated registered window returned by reference.
370 *
371 * Create a temporary registered window. The peer will not know about this
372 * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
373 */
374static int
375scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
376 off_t *out_offset, struct scif_window **out_window)
377{
378 struct scif_endpt *ep = (struct scif_endpt *)epd;
379 int err;
380 scif_pinned_pages_t pinned_pages;
381 size_t aligned_len;
382
383 aligned_len = ALIGN(len, PAGE_SIZE);
384
385 err = __scif_pin_pages((void *)(addr & PAGE_MASK),
386 aligned_len, &prot, 0, &pinned_pages);
387 if (err)
388 return err;
389
390 pinned_pages->prot = prot;
391
392 /* Compute the offset for this registration */
393 err = scif_get_window_offset(ep, 0, 0,
394 aligned_len >> PAGE_SHIFT,
395 (s64 *)out_offset);
396 if (err)
397 goto error_unpin;
398
399 /* Allocate and prepare self registration window */
400 *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
401 *out_offset, true);
402 if (!*out_window) {
403 scif_free_window_offset(ep, NULL, *out_offset);
404 err = -ENOMEM;
405 goto error_unpin;
406 }
407
408 (*out_window)->pinned_pages = pinned_pages;
409 (*out_window)->nr_pages = pinned_pages->nr_pages;
410 (*out_window)->prot = pinned_pages->prot;
411
412 (*out_window)->va_for_temp = addr & PAGE_MASK;
413 err = scif_map_window(ep->remote_dev, *out_window);
414 if (err) {
415 /* Something went wrong! Rollback */
416 scif_destroy_window(ep, *out_window);
417 *out_window = NULL;
418 } else {
419 *out_offset |= (addr - (*out_window)->va_for_temp);
420 }
421 return err;
422error_unpin:
423 if (err)
424 dev_err(&ep->remote_dev->sdev->dev,
425 "%s %d err %d\n", __func__, __LINE__, err);
426 scif_unpin_pages(pinned_pages);
427 return err;
428}
429
430#define SCIF_DMA_TO (3 * HZ)
431
432/*
433 * scif_sync_dma - Program a DMA without an interrupt descriptor
434 *
435 * @dev - The address of the pointer to the device instance used
436 * for DMA registration.
437 * @chan - DMA channel to be used.
438 * @sync_wait: Wait for DMA to complete?
439 *
440 * Return 0 on success and -errno on error.
441 */
442static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
443 bool sync_wait)
444{
445 int err = 0;
446 struct dma_async_tx_descriptor *tx = NULL;
447 enum dma_ctrl_flags flags = DMA_PREP_FENCE;
448 dma_cookie_t cookie;
449 struct dma_device *ddev;
450
451 if (!chan) {
452 err = -EIO;
453 dev_err(&sdev->dev, "%s %d err %d\n",
454 __func__, __LINE__, err);
455 return err;
456 }
457 ddev = chan->device;
458
459 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
460 if (!tx) {
461 err = -ENOMEM;
462 dev_err(&sdev->dev, "%s %d err %d\n",
463 __func__, __LINE__, err);
464 goto release;
465 }
466 cookie = tx->tx_submit(tx);
467
468 if (dma_submit_error(cookie)) {
469 err = -ENOMEM;
470 dev_err(&sdev->dev, "%s %d err %d\n",
471 __func__, __LINE__, err);
472 goto release;
473 }
474 if (!sync_wait) {
475 dma_async_issue_pending(chan);
476 } else {
477 if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
478 err = 0;
479 } else {
480 err = -EIO;
481 dev_err(&sdev->dev, "%s %d err %d\n",
482 __func__, __LINE__, err);
483 }
484 }
485release:
486 return err;
487}
488
489static void scif_dma_callback(void *arg)
490{
491 struct completion *done = (struct completion *)arg;
492
493 complete(done);
494}
495
496#define SCIF_DMA_SYNC_WAIT true
497#define SCIF_DMA_POLL BIT(0)
498#define SCIF_DMA_INTR BIT(1)
499
500/*
501 * scif_async_dma - Program a DMA with an interrupt descriptor
502 *
503 * @dev - The address of the pointer to the device instance used
504 * for DMA registration.
505 * @chan - DMA channel to be used.
506 * Return 0 on success and -errno on error.
507 */
508static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
509{
510 int err = 0;
511 struct dma_device *ddev;
512 struct dma_async_tx_descriptor *tx = NULL;
513 enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
514 DECLARE_COMPLETION_ONSTACK(done_wait);
515 dma_cookie_t cookie;
516 enum dma_status status;
517
518 if (!chan) {
519 err = -EIO;
520 dev_err(&sdev->dev, "%s %d err %d\n",
521 __func__, __LINE__, err);
522 return err;
523 }
524 ddev = chan->device;
525
526 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
527 if (!tx) {
528 err = -ENOMEM;
529 dev_err(&sdev->dev, "%s %d err %d\n",
530 __func__, __LINE__, err);
531 goto release;
532 }
533 reinit_completion(&done_wait);
534 tx->callback = scif_dma_callback;
535 tx->callback_param = &done_wait;
536 cookie = tx->tx_submit(tx);
537
538 if (dma_submit_error(cookie)) {
539 err = -ENOMEM;
540 dev_err(&sdev->dev, "%s %d err %d\n",
541 __func__, __LINE__, err);
542 goto release;
543 }
544 dma_async_issue_pending(chan);
545
546 err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
547 if (!err) {
548 err = -EIO;
549 dev_err(&sdev->dev, "%s %d err %d\n",
550 __func__, __LINE__, err);
551 goto release;
552 }
553 err = 0;
554 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
555 if (status != DMA_COMPLETE) {
556 err = -EIO;
557 dev_err(&sdev->dev, "%s %d err %d\n",
558 __func__, __LINE__, err);
559 goto release;
560 }
561release:
562 return err;
563}
564
565/*
566 * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
567 * DMA channel via polling.
568 *
569 * @sdev - The SCIF device
570 * @chan - DMA channel
571 * Return 0 on success and -errno on error.
572 */
573static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
574{
575 if (!chan)
576 return -EINVAL;
577 return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
578}
579
580/*
581 * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
582 * DMA channel via interrupt based blocking wait.
583 *
584 * @sdev - The SCIF device
585 * @chan - DMA channel
586 * Return 0 on success and -errno on error.
587 */
588int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
589{
590 if (!chan)
591 return -EINVAL;
592 return scif_async_dma(sdev, chan);
593}
594
595/**
596 * scif_rma_destroy_windows:
597 *
598 * This routine destroys all windows queued for cleanup
599 */
600void scif_rma_destroy_windows(void)
601{
602 struct list_head *item, *tmp;
603 struct scif_window *window;
604 struct scif_endpt *ep;
605 struct dma_chan *chan;
606
607 might_sleep();
608restart:
609 spin_lock(&scif_info.rmalock);
610 list_for_each_safe(item, tmp, &scif_info.rma) {
611 window = list_entry(item, struct scif_window,
612 list);
613 ep = (struct scif_endpt *)window->ep;
614 chan = ep->rma_info.dma_chan;
615
616 list_del_init(&window->list);
617 spin_unlock(&scif_info.rmalock);
618 if (!chan || !scifdev_alive(ep) ||
619 !scif_drain_dma_intr(ep->remote_dev->sdev,
620 ep->rma_info.dma_chan))
621 /* Remove window from global list */
622 window->unreg_state = OP_COMPLETED;
623 else
624 dev_warn(&ep->remote_dev->sdev->dev,
625 "DMA engine hung?\n");
626 if (window->unreg_state == OP_COMPLETED) {
627 if (window->type == SCIF_WINDOW_SELF)
628 scif_destroy_window(ep, window);
629 else
630 scif_destroy_remote_window(window);
631 atomic_dec(&ep->rma_info.tw_refcount);
632 }
633 goto restart;
634 }
635 spin_unlock(&scif_info.rmalock);
636}
637
638/**
639 * scif_rma_destroy_tcw:
640 *
641 * This routine destroys temporary cached registered windows
642 * which have been queued for cleanup.
643 */
644void scif_rma_destroy_tcw_invalid(void)
645{
646 struct list_head *item, *tmp;
647 struct scif_window *window;
648 struct scif_endpt *ep;
649 struct dma_chan *chan;
650
651 might_sleep();
652restart:
653 spin_lock(&scif_info.rmalock);
654 list_for_each_safe(item, tmp, &scif_info.rma_tc) {
655 window = list_entry(item, struct scif_window, list);
656 ep = (struct scif_endpt *)window->ep;
657 chan = ep->rma_info.dma_chan;
658 list_del_init(&window->list);
659 spin_unlock(&scif_info.rmalock);
660 mutex_lock(&ep->rma_info.rma_lock);
661 if (!chan || !scifdev_alive(ep) ||
662 !scif_drain_dma_intr(ep->remote_dev->sdev,
663 ep->rma_info.dma_chan)) {
664 atomic_sub(window->nr_pages,
665 &ep->rma_info.tcw_total_pages);
666 scif_destroy_window(ep, window);
667 atomic_dec(&ep->rma_info.tcw_refcount);
668 } else {
669 dev_warn(&ep->remote_dev->sdev->dev,
670 "DMA engine hung?\n");
671 }
672 mutex_unlock(&ep->rma_info.rma_lock);
673 goto restart;
674 }
675 spin_unlock(&scif_info.rmalock);
676}
677
678static inline
679void *_get_local_va(off_t off, struct scif_window *window, size_t len)
680{
681 int page_nr = (off - window->offset) >> PAGE_SHIFT;
682 off_t page_off = off & ~PAGE_MASK;
683 void *va = NULL;
684
685 if (window->type == SCIF_WINDOW_SELF) {
686 struct page **pages = window->pinned_pages->pages;
687
688 va = page_address(pages[page_nr]) + page_off;
689 }
690 return va;
691}
692
693static inline
694void *ioremap_remote(off_t off, struct scif_window *window,
695 size_t len, struct scif_dev *dev,
696 struct scif_window_iter *iter)
697{
698 dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
699
700 /*
701 * If the DMA address is not card relative then we need the DMA
702 * addresses to be an offset into the bar. The aperture base was already
703 * added so subtract it here since scif_ioremap is going to add it again
704 */
705 if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
706 dev->sdev->aper && !dev->sdev->card_rel_da)
707 phys = phys - dev->sdev->aper->pa;
708 return scif_ioremap(phys, len, dev);
709}
710
711static inline void
712iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
713{
714 scif_iounmap(virt, size, work->remote_dev);
715}
716
717/*
718 * Takes care of ordering issue caused by
719 * 1. Hardware: Only in the case of cpu copy from mgmt node to card
720 * because of WC memory.
721 * 2. Software: If memcpy reorders copy instructions for optimization.
722 * This could happen at both mgmt node and card.
723 */
724static inline void
725scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
726{
727 if (!count)
728 return;
729
730 memcpy_toio((void __iomem __force *)dst, src, --count);
731 /* Order the last byte with the previous stores */
732 wmb();
733 *(dst + count) = *(src + count);
734}
735
736static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
737 size_t count, bool ordered)
738{
739 if (ordered)
740 scif_ordered_memcpy_toio(dst, src, count);
741 else
742 memcpy_toio((void __iomem __force *)dst, src, count);
743}
744
745static inline
746void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
747{
748 if (!count)
749 return;
750
751 memcpy_fromio(dst, (void __iomem __force *)src, --count);
752 /* Order the last byte with the previous loads */
753 rmb();
754 *(dst + count) = *(src + count);
755}
756
757static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
758 size_t count, bool ordered)
759{
760 if (ordered)
761 scif_ordered_memcpy_fromio(dst, src, count);
762 else
763 memcpy_fromio(dst, (void __iomem __force *)src, count);
764}
765
766#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
767
768/*
769 * scif_off_to_dma_addr:
770 * Obtain the dma_addr given the window and the offset.
771 * @window: Registered window.
772 * @off: Window offset.
773 * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
774 * @index: Return the index of the dma_addr array found.
775 * @start_off: start offset of index of the dma addr array found.
776 * The nr_bytes provides the callee an estimate of the maximum possible
777 * DMA xfer possible while the index/start_off provide faster lookups
778 * for the next iteration.
779 */
780dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
781 size_t *nr_bytes, struct scif_window_iter *iter)
782{
783 int i, page_nr;
784 s64 start, end;
785 off_t page_off;
786
787 if (window->nr_pages == window->nr_contig_chunks) {
788 page_nr = (off - window->offset) >> PAGE_SHIFT;
789 page_off = off & ~PAGE_MASK;
790
791 if (nr_bytes)
792 *nr_bytes = PAGE_SIZE - page_off;
793 return window->dma_addr[page_nr] | page_off;
794 }
795 if (iter) {
796 i = iter->index;
797 start = iter->offset;
798 } else {
799 i = 0;
800 start = window->offset;
801 }
802 for (; i < window->nr_contig_chunks; i++) {
803 end = start + (window->num_pages[i] << PAGE_SHIFT);
804 if (off >= start && off < end) {
805 if (iter) {
806 iter->index = i;
807 iter->offset = start;
808 }
809 if (nr_bytes)
810 *nr_bytes = end - off;
811 return (window->dma_addr[i] + (off - start));
812 }
813 start += (window->num_pages[i] << PAGE_SHIFT);
814 }
815 dev_err(scif_info.mdev.this_device,
816 "%s %d BUG. Addr not found? window %p off 0x%llx\n",
817 __func__, __LINE__, window, off);
818 return SCIF_RMA_ERROR_CODE;
819}
820
821/*
822 * Copy between rma window and temporary buffer
823 */
824static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
825 u8 *temp, size_t rem_len, bool to_temp)
826{
827 void *window_virt;
828 size_t loop_len;
829 int offset_in_page;
830 s64 end_offset;
831
832 offset_in_page = offset & ~PAGE_MASK;
833 loop_len = PAGE_SIZE - offset_in_page;
834
835 if (rem_len < loop_len)
836 loop_len = rem_len;
837
838 window_virt = _get_local_va(offset, window, loop_len);
839 if (!window_virt)
840 return;
841 if (to_temp)
842 memcpy(temp, window_virt, loop_len);
843 else
844 memcpy(window_virt, temp, loop_len);
845
846 offset += loop_len;
847 temp += loop_len;
848 rem_len -= loop_len;
849
850 end_offset = window->offset +
851 (window->nr_pages << PAGE_SHIFT);
852 while (rem_len) {
853 if (offset == end_offset) {
854 window = list_entry_next(window, list);
855 end_offset = window->offset +
856 (window->nr_pages << PAGE_SHIFT);
857 }
858 loop_len = min(PAGE_SIZE, rem_len);
859 window_virt = _get_local_va(offset, window, loop_len);
860 if (!window_virt)
861 return;
862 if (to_temp)
863 memcpy(temp, window_virt, loop_len);
864 else
865 memcpy(window_virt, temp, loop_len);
866 offset += loop_len;
867 temp += loop_len;
868 rem_len -= loop_len;
869 }
870}
871
872/**
873 * scif_rma_completion_cb:
874 * @data: RMA cookie
875 *
876 * RMA interrupt completion callback.
877 */
878static void scif_rma_completion_cb(void *data)
879{
880 struct scif_dma_comp_cb *comp_cb = data;
881
882 /* Free DMA Completion CB. */
883 if (comp_cb->dst_window)
884 scif_rma_local_cpu_copy(comp_cb->dst_offset,
885 comp_cb->dst_window,
886 comp_cb->temp_buf +
887 comp_cb->header_padding,
888 comp_cb->len, false);
889 scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
890 SCIF_KMEM_UNALIGNED_BUF_SIZE);
891 if (comp_cb->is_cache)
892 kmem_cache_free(unaligned_cache,
893 comp_cb->temp_buf_to_free);
894 else
895 kfree(comp_cb->temp_buf_to_free);
896}
897
898/* Copies between temporary buffer and offsets provided in work */
899static int
900scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
901 u8 *temp, struct dma_chan *chan,
902 bool src_local)
903{
904 struct scif_dma_comp_cb *comp_cb = work->comp_cb;
905 dma_addr_t window_dma_addr, temp_dma_addr;
906 dma_addr_t temp_phys = comp_cb->temp_phys;
907 size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
908 int offset_in_ca, ret = 0;
909 s64 end_offset, offset;
910 struct scif_window *window;
911 void *window_virt_addr;
912 size_t tail_len;
913 struct dma_async_tx_descriptor *tx;
914 struct dma_device *dev = chan->device;
915 dma_cookie_t cookie;
916
917 if (src_local) {
918 offset = work->dst_offset;
919 window = work->dst_window;
920 } else {
921 offset = work->src_offset;
922 window = work->src_window;
923 }
924
925 offset_in_ca = offset & (L1_CACHE_BYTES - 1);
926 if (offset_in_ca) {
927 loop_len = L1_CACHE_BYTES - offset_in_ca;
928 loop_len = min(loop_len, remaining_len);
929 window_virt_addr = ioremap_remote(offset, window,
930 loop_len,
931 work->remote_dev,
932 NULL);
933 if (!window_virt_addr)
934 return -ENOMEM;
935 if (src_local)
936 scif_unaligned_cpy_toio(window_virt_addr, temp,
937 loop_len,
938 work->ordered &&
939 !(remaining_len - loop_len));
940 else
941 scif_unaligned_cpy_fromio(temp, window_virt_addr,
942 loop_len, work->ordered &&
943 !(remaining_len - loop_len));
944 iounmap_remote(window_virt_addr, loop_len, work);
945
946 offset += loop_len;
947 temp += loop_len;
948 temp_phys += loop_len;
949 remaining_len -= loop_len;
950 }
951
952 offset_in_ca = offset & ~PAGE_MASK;
953 end_offset = window->offset +
954 (window->nr_pages << PAGE_SHIFT);
955
956 tail_len = remaining_len & (L1_CACHE_BYTES - 1);
957 remaining_len -= tail_len;
958 while (remaining_len) {
959 if (offset == end_offset) {
960 window = list_entry_next(window, list);
961 end_offset = window->offset +
962 (window->nr_pages << PAGE_SHIFT);
963 }
964 if (scif_is_mgmt_node())
965 temp_dma_addr = temp_phys;
966 else
967 /* Fix if we ever enable IOMMU on the card */
968 temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
969 window_dma_addr = scif_off_to_dma_addr(window, offset,
970 &nr_contig_bytes,
971 NULL);
972 loop_len = min(nr_contig_bytes, remaining_len);
973 if (src_local) {
974 if (work->ordered && !tail_len &&
975 !(remaining_len - loop_len) &&
976 loop_len != L1_CACHE_BYTES) {
977 /*
978 * Break up the last chunk of the transfer into
979 * two steps. if there is no tail to guarantee
980 * DMA ordering. SCIF_DMA_POLLING inserts
981 * a status update descriptor in step 1 which
982 * acts as a double sided synchronization fence
983 * for the DMA engine to ensure that the last
984 * cache line in step 2 is updated last.
985 */
986 /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
987 tx =
988 dev->device_prep_dma_memcpy(chan,
989 window_dma_addr,
990 temp_dma_addr,
991 loop_len -
992 L1_CACHE_BYTES,
993 DMA_PREP_FENCE);
994 if (!tx) {
995 ret = -ENOMEM;
996 goto err;
997 }
998 cookie = tx->tx_submit(tx);
999 if (dma_submit_error(cookie)) {
1000 ret = -ENOMEM;
1001 goto err;
1002 }
1003 dma_async_issue_pending(chan);
1004 offset += (loop_len - L1_CACHE_BYTES);
1005 temp_dma_addr += (loop_len - L1_CACHE_BYTES);
1006 window_dma_addr += (loop_len - L1_CACHE_BYTES);
1007 remaining_len -= (loop_len - L1_CACHE_BYTES);
1008 loop_len = remaining_len;
1009
1010 /* Step 2) DMA: L1_CACHE_BYTES */
1011 tx =
1012 dev->device_prep_dma_memcpy(chan,
1013 window_dma_addr,
1014 temp_dma_addr,
1015 loop_len, 0);
1016 if (!tx) {
1017 ret = -ENOMEM;
1018 goto err;
1019 }
1020 cookie = tx->tx_submit(tx);
1021 if (dma_submit_error(cookie)) {
1022 ret = -ENOMEM;
1023 goto err;
1024 }
1025 dma_async_issue_pending(chan);
1026 } else {
1027 tx =
1028 dev->device_prep_dma_memcpy(chan,
1029 window_dma_addr,
1030 temp_dma_addr,
1031 loop_len, 0);
1032 if (!tx) {
1033 ret = -ENOMEM;
1034 goto err;
1035 }
1036 cookie = tx->tx_submit(tx);
1037 if (dma_submit_error(cookie)) {
1038 ret = -ENOMEM;
1039 goto err;
1040 }
1041 dma_async_issue_pending(chan);
1042 }
1043 } else {
1044 tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
1045 window_dma_addr, loop_len, 0);
1046 if (!tx) {
1047 ret = -ENOMEM;
1048 goto err;
1049 }
1050 cookie = tx->tx_submit(tx);
1051 if (dma_submit_error(cookie)) {
1052 ret = -ENOMEM;
1053 goto err;
1054 }
1055 dma_async_issue_pending(chan);
1056 }
1057 if (ret < 0)
1058 goto err;
1059 offset += loop_len;
1060 temp += loop_len;
1061 temp_phys += loop_len;
1062 remaining_len -= loop_len;
1063 offset_in_ca = 0;
1064 }
1065 if (tail_len) {
1066 if (offset == end_offset) {
1067 window = list_entry_next(window, list);
1068 end_offset = window->offset +
1069 (window->nr_pages << PAGE_SHIFT);
1070 }
1071 window_virt_addr = ioremap_remote(offset, window, tail_len,
1072 work->remote_dev,
1073 NULL);
1074 if (!window_virt_addr)
1075 return -ENOMEM;
1076 /*
1077 * The CPU copy for the tail bytes must be initiated only once
1078 * previous DMA transfers for this endpoint have completed
1079 * to guarantee ordering.
1080 */
1081 if (work->ordered) {
1082 struct scif_dev *rdev = work->remote_dev;
1083
1084 ret = scif_drain_dma_intr(rdev->sdev, chan);
1085 if (ret)
1086 return ret;
1087 }
1088 if (src_local)
1089 scif_unaligned_cpy_toio(window_virt_addr, temp,
1090 tail_len, work->ordered);
1091 else
1092 scif_unaligned_cpy_fromio(temp, window_virt_addr,
1093 tail_len, work->ordered);
1094 iounmap_remote(window_virt_addr, tail_len, work);
1095 }
1096 tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
1097 if (!tx) {
1098 ret = -ENOMEM;
1099 return ret;
1100 }
1101 tx->callback = &scif_rma_completion_cb;
1102 tx->callback_param = comp_cb;
1103 cookie = tx->tx_submit(tx);
1104
1105 if (dma_submit_error(cookie)) {
1106 ret = -ENOMEM;
1107 return ret;
1108 }
1109 dma_async_issue_pending(chan);
1110 return 0;
1111err:
1112 dev_err(scif_info.mdev.this_device,
1113 "%s %d Desc Prog Failed ret %d\n",
1114 __func__, __LINE__, ret);
1115 return ret;
1116}
1117
1118/*
1119 * _scif_rma_list_dma_copy_aligned:
1120 *
1121 * Traverse all the windows and perform DMA copy.
1122 */
1123static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
1124 struct dma_chan *chan)
1125{
1126 dma_addr_t src_dma_addr, dst_dma_addr;
1127 size_t loop_len, remaining_len, src_contig_bytes = 0;
1128 size_t dst_contig_bytes = 0;
1129 struct scif_window_iter src_win_iter;
1130 struct scif_window_iter dst_win_iter;
1131 s64 end_src_offset, end_dst_offset;
1132 struct scif_window *src_window = work->src_window;
1133 struct scif_window *dst_window = work->dst_window;
1134 s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1135 int ret = 0;
1136 struct dma_async_tx_descriptor *tx;
1137 struct dma_device *dev = chan->device;
1138 dma_cookie_t cookie;
1139
1140 remaining_len = work->len;
1141
1142 scif_init_window_iter(src_window, &src_win_iter);
1143 scif_init_window_iter(dst_window, &dst_win_iter);
1144 end_src_offset = src_window->offset +
1145 (src_window->nr_pages << PAGE_SHIFT);
1146 end_dst_offset = dst_window->offset +
1147 (dst_window->nr_pages << PAGE_SHIFT);
1148 while (remaining_len) {
1149 if (src_offset == end_src_offset) {
1150 src_window = list_entry_next(src_window, list);
1151 end_src_offset = src_window->offset +
1152 (src_window->nr_pages << PAGE_SHIFT);
1153 scif_init_window_iter(src_window, &src_win_iter);
1154 }
1155 if (dst_offset == end_dst_offset) {
1156 dst_window = list_entry_next(dst_window, list);
1157 end_dst_offset = dst_window->offset +
1158 (dst_window->nr_pages << PAGE_SHIFT);
1159 scif_init_window_iter(dst_window, &dst_win_iter);
1160 }
1161
1162 /* compute dma addresses for transfer */
1163 src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
1164 &src_contig_bytes,
1165 &src_win_iter);
1166 dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
1167 &dst_contig_bytes,
1168 &dst_win_iter);
1169 loop_len = min(src_contig_bytes, dst_contig_bytes);
1170 loop_len = min(loop_len, remaining_len);
1171 if (work->ordered && !(remaining_len - loop_len)) {
1172 /*
1173 * Break up the last chunk of the transfer into two
1174 * steps to ensure that the last byte in step 2 is
1175 * updated last.
1176 */
1177 /* Step 1) DMA: Body Length - 1 */
1178 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1179 src_dma_addr,
1180 loop_len - 1,
1181 DMA_PREP_FENCE);
1182 if (!tx) {
1183 ret = -ENOMEM;
1184 goto err;
1185 }
1186 cookie = tx->tx_submit(tx);
1187 if (dma_submit_error(cookie)) {
1188 ret = -ENOMEM;
1189 goto err;
1190 }
1191 src_offset += (loop_len - 1);
1192 dst_offset += (loop_len - 1);
1193 src_dma_addr += (loop_len - 1);
1194 dst_dma_addr += (loop_len - 1);
1195 remaining_len -= (loop_len - 1);
1196 loop_len = remaining_len;
1197
1198 /* Step 2) DMA: 1 BYTES */
1199 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1200 src_dma_addr, loop_len, 0);
1201 if (!tx) {
1202 ret = -ENOMEM;
1203 goto err;
1204 }
1205 cookie = tx->tx_submit(tx);
1206 if (dma_submit_error(cookie)) {
1207 ret = -ENOMEM;
1208 goto err;
1209 }
1210 dma_async_issue_pending(chan);
1211 } else {
1212 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1213 src_dma_addr, loop_len, 0);
1214 if (!tx) {
1215 ret = -ENOMEM;
1216 goto err;
1217 }
1218 cookie = tx->tx_submit(tx);
1219 if (dma_submit_error(cookie)) {
1220 ret = -ENOMEM;
1221 goto err;
1222 }
1223 }
1224 src_offset += loop_len;
1225 dst_offset += loop_len;
1226 remaining_len -= loop_len;
1227 }
1228 return ret;
1229err:
1230 dev_err(scif_info.mdev.this_device,
1231 "%s %d Desc Prog Failed ret %d\n",
1232 __func__, __LINE__, ret);
1233 return ret;
1234}
1235
1236/*
1237 * scif_rma_list_dma_copy_aligned:
1238 *
1239 * Traverse all the windows and perform DMA copy.
1240 */
1241static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
1242 struct dma_chan *chan)
1243{
1244 dma_addr_t src_dma_addr, dst_dma_addr;
1245 size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
1246 size_t dst_contig_bytes = 0;
1247 int src_cache_off;
1248 s64 end_src_offset, end_dst_offset;
1249 struct scif_window_iter src_win_iter;
1250 struct scif_window_iter dst_win_iter;
1251 void *src_virt, *dst_virt;
1252 struct scif_window *src_window = work->src_window;
1253 struct scif_window *dst_window = work->dst_window;
1254 s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1255 int ret = 0;
1256 struct dma_async_tx_descriptor *tx;
1257 struct dma_device *dev = chan->device;
1258 dma_cookie_t cookie;
1259
1260 remaining_len = work->len;
1261 scif_init_window_iter(src_window, &src_win_iter);
1262 scif_init_window_iter(dst_window, &dst_win_iter);
1263
1264 src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
1265 if (src_cache_off != 0) {
1266 /* Head */
1267 loop_len = L1_CACHE_BYTES - src_cache_off;
1268 loop_len = min(loop_len, remaining_len);
1269 src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
1270 dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
1271 if (src_window->type == SCIF_WINDOW_SELF)
1272 src_virt = _get_local_va(src_offset, src_window,
1273 loop_len);
1274 else
1275 src_virt = ioremap_remote(src_offset, src_window,
1276 loop_len,
1277 work->remote_dev, NULL);
1278 if (!src_virt)
1279 return -ENOMEM;
1280 if (dst_window->type == SCIF_WINDOW_SELF)
1281 dst_virt = _get_local_va(dst_offset, dst_window,
1282 loop_len);
1283 else
1284 dst_virt = ioremap_remote(dst_offset, dst_window,
1285 loop_len,
1286 work->remote_dev, NULL);
1287 if (!dst_virt) {
1288 if (src_window->type != SCIF_WINDOW_SELF)
1289 iounmap_remote(src_virt, loop_len, work);
1290 return -ENOMEM;
1291 }
1292 if (src_window->type == SCIF_WINDOW_SELF)
1293 scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
1294 remaining_len == loop_len ?
1295 work->ordered : false);
1296 else
1297 scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
1298 remaining_len == loop_len ?
1299 work->ordered : false);
1300 if (src_window->type != SCIF_WINDOW_SELF)
1301 iounmap_remote(src_virt, loop_len, work);
1302 if (dst_window->type != SCIF_WINDOW_SELF)
1303 iounmap_remote(dst_virt, loop_len, work);
1304 src_offset += loop_len;
1305 dst_offset += loop_len;
1306 remaining_len -= loop_len;
1307 }
1308
1309 end_src_offset = src_window->offset +
1310 (src_window->nr_pages << PAGE_SHIFT);
1311 end_dst_offset = dst_window->offset +
1312 (dst_window->nr_pages << PAGE_SHIFT);
1313 tail_len = remaining_len & (L1_CACHE_BYTES - 1);
1314 remaining_len -= tail_len;
1315 while (remaining_len) {
1316 if (src_offset == end_src_offset) {
1317 src_window = list_entry_next(src_window, list);
1318 end_src_offset = src_window->offset +
1319 (src_window->nr_pages << PAGE_SHIFT);
1320 scif_init_window_iter(src_window, &src_win_iter);
1321 }
1322 if (dst_offset == end_dst_offset) {
1323 dst_window = list_entry_next(dst_window, list);
1324 end_dst_offset = dst_window->offset +
1325 (dst_window->nr_pages << PAGE_SHIFT);
1326 scif_init_window_iter(dst_window, &dst_win_iter);
1327 }
1328
1329 /* compute dma addresses for transfer */
1330 src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
1331 &src_contig_bytes,
1332 &src_win_iter);
1333 dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
1334 &dst_contig_bytes,
1335 &dst_win_iter);
1336 loop_len = min(src_contig_bytes, dst_contig_bytes);
1337 loop_len = min(loop_len, remaining_len);
1338 if (work->ordered && !tail_len &&
1339 !(remaining_len - loop_len)) {
1340 /*
1341 * Break up the last chunk of the transfer into two
1342 * steps. if there is no tail to gurantee DMA ordering.
1343 * Passing SCIF_DMA_POLLING inserts a status update
1344 * descriptor in step 1 which acts as a double sided
1345 * synchronization fence for the DMA engine to ensure
1346 * that the last cache line in step 2 is updated last.
1347 */
1348 /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
1349 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1350 src_dma_addr,
1351 loop_len -
1352 L1_CACHE_BYTES,
1353 DMA_PREP_FENCE);
1354 if (!tx) {
1355 ret = -ENOMEM;
1356 goto err;
1357 }
1358 cookie = tx->tx_submit(tx);
1359 if (dma_submit_error(cookie)) {
1360 ret = -ENOMEM;
1361 goto err;
1362 }
1363 dma_async_issue_pending(chan);
1364 src_offset += (loop_len - L1_CACHE_BYTES);
1365 dst_offset += (loop_len - L1_CACHE_BYTES);
1366 src_dma_addr += (loop_len - L1_CACHE_BYTES);
1367 dst_dma_addr += (loop_len - L1_CACHE_BYTES);
1368 remaining_len -= (loop_len - L1_CACHE_BYTES);
1369 loop_len = remaining_len;
1370
1371 /* Step 2) DMA: L1_CACHE_BYTES */
1372 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1373 src_dma_addr,
1374 loop_len, 0);
1375 if (!tx) {
1376 ret = -ENOMEM;
1377 goto err;
1378 }
1379 cookie = tx->tx_submit(tx);
1380 if (dma_submit_error(cookie)) {
1381 ret = -ENOMEM;
1382 goto err;
1383 }
1384 dma_async_issue_pending(chan);
1385 } else {
1386 tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1387 src_dma_addr,
1388 loop_len, 0);
1389 if (!tx) {
1390 ret = -ENOMEM;
1391 goto err;
1392 }
1393 cookie = tx->tx_submit(tx);
1394 if (dma_submit_error(cookie)) {
1395 ret = -ENOMEM;
1396 goto err;
1397 }
1398 dma_async_issue_pending(chan);
1399 }
1400 src_offset += loop_len;
1401 dst_offset += loop_len;
1402 remaining_len -= loop_len;
1403 }
1404 remaining_len = tail_len;
1405 if (remaining_len) {
1406 loop_len = remaining_len;
1407 if (src_offset == end_src_offset)
1408 src_window = list_entry_next(src_window, list);
1409 if (dst_offset == end_dst_offset)
1410 dst_window = list_entry_next(dst_window, list);
1411
1412 src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
1413 dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
1414 /*
1415 * The CPU copy for the tail bytes must be initiated only once
1416 * previous DMA transfers for this endpoint have completed to
1417 * guarantee ordering.
1418 */
1419 if (work->ordered) {
1420 struct scif_dev *rdev = work->remote_dev;
1421
1422 ret = scif_drain_dma_poll(rdev->sdev, chan);
1423 if (ret)
1424 return ret;
1425 }
1426 if (src_window->type == SCIF_WINDOW_SELF)
1427 src_virt = _get_local_va(src_offset, src_window,
1428 loop_len);
1429 else
1430 src_virt = ioremap_remote(src_offset, src_window,
1431 loop_len,
1432 work->remote_dev, NULL);
1433 if (!src_virt)
1434 return -ENOMEM;
1435
1436 if (dst_window->type == SCIF_WINDOW_SELF)
1437 dst_virt = _get_local_va(dst_offset, dst_window,
1438 loop_len);
1439 else
1440 dst_virt = ioremap_remote(dst_offset, dst_window,
1441 loop_len,
1442 work->remote_dev, NULL);
1443 if (!dst_virt) {
1444 if (src_window->type != SCIF_WINDOW_SELF)
1445 iounmap_remote(src_virt, loop_len, work);
1446 return -ENOMEM;
1447 }
1448
1449 if (src_window->type == SCIF_WINDOW_SELF)
1450 scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
1451 work->ordered);
1452 else
1453 scif_unaligned_cpy_fromio(dst_virt, src_virt,
1454 loop_len, work->ordered);
1455 if (src_window->type != SCIF_WINDOW_SELF)
1456 iounmap_remote(src_virt, loop_len, work);
1457
1458 if (dst_window->type != SCIF_WINDOW_SELF)
1459 iounmap_remote(dst_virt, loop_len, work);
1460 remaining_len -= loop_len;
1461 }
1462 return ret;
1463err:
1464 dev_err(scif_info.mdev.this_device,
1465 "%s %d Desc Prog Failed ret %d\n",
1466 __func__, __LINE__, ret);
1467 return ret;
1468}
1469
1470/*
1471 * scif_rma_list_cpu_copy:
1472 *
1473 * Traverse all the windows and perform CPU copy.
1474 */
1475static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
1476{
1477 void *src_virt, *dst_virt;
1478 size_t loop_len, remaining_len;
1479 int src_page_off, dst_page_off;
1480 s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1481 struct scif_window *src_window = work->src_window;
1482 struct scif_window *dst_window = work->dst_window;
1483 s64 end_src_offset, end_dst_offset;
1484 int ret = 0;
1485 struct scif_window_iter src_win_iter;
1486 struct scif_window_iter dst_win_iter;
1487
1488 remaining_len = work->len;
1489
1490 scif_init_window_iter(src_window, &src_win_iter);
1491 scif_init_window_iter(dst_window, &dst_win_iter);
1492 while (remaining_len) {
1493 src_page_off = src_offset & ~PAGE_MASK;
1494 dst_page_off = dst_offset & ~PAGE_MASK;
1495 loop_len = min(PAGE_SIZE -
1496 max(src_page_off, dst_page_off),
1497 remaining_len);
1498
1499 if (src_window->type == SCIF_WINDOW_SELF)
1500 src_virt = _get_local_va(src_offset, src_window,
1501 loop_len);
1502 else
1503 src_virt = ioremap_remote(src_offset, src_window,
1504 loop_len,
1505 work->remote_dev,
1506 &src_win_iter);
1507 if (!src_virt) {
1508 ret = -ENOMEM;
1509 goto error;
1510 }
1511
1512 if (dst_window->type == SCIF_WINDOW_SELF)
1513 dst_virt = _get_local_va(dst_offset, dst_window,
1514 loop_len);
1515 else
1516 dst_virt = ioremap_remote(dst_offset, dst_window,
1517 loop_len,
1518 work->remote_dev,
1519 &dst_win_iter);
1520 if (!dst_virt) {
1521 if (src_window->type == SCIF_WINDOW_PEER)
1522 iounmap_remote(src_virt, loop_len, work);
1523 ret = -ENOMEM;
1524 goto error;
1525 }
1526
1527 if (work->loopback) {
1528 memcpy(dst_virt, src_virt, loop_len);
1529 } else {
1530 if (src_window->type == SCIF_WINDOW_SELF)
1531 memcpy_toio((void __iomem __force *)dst_virt,
1532 src_virt, loop_len);
1533 else
1534 memcpy_fromio(dst_virt,
1535 (void __iomem __force *)src_virt,
1536 loop_len);
1537 }
1538 if (src_window->type == SCIF_WINDOW_PEER)
1539 iounmap_remote(src_virt, loop_len, work);
1540
1541 if (dst_window->type == SCIF_WINDOW_PEER)
1542 iounmap_remote(dst_virt, loop_len, work);
1543
1544 src_offset += loop_len;
1545 dst_offset += loop_len;
1546 remaining_len -= loop_len;
1547 if (remaining_len) {
1548 end_src_offset = src_window->offset +
1549 (src_window->nr_pages << PAGE_SHIFT);
1550 end_dst_offset = dst_window->offset +
1551 (dst_window->nr_pages << PAGE_SHIFT);
1552 if (src_offset == end_src_offset) {
1553 src_window = list_entry_next(src_window, list);
1554 scif_init_window_iter(src_window,
1555 &src_win_iter);
1556 }
1557 if (dst_offset == end_dst_offset) {
1558 dst_window = list_entry_next(dst_window, list);
1559 scif_init_window_iter(dst_window,
1560 &dst_win_iter);
1561 }
1562 }
1563 }
1564error:
1565 return ret;
1566}
1567
1568static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
1569 struct scif_copy_work *work,
1570 struct dma_chan *chan, off_t loffset)
1571{
1572 int src_cache_off, dst_cache_off;
1573 s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1574 u8 *temp = NULL;
1575 bool src_local = true, dst_local = false;
1576 struct scif_dma_comp_cb *comp_cb;
1577 dma_addr_t src_dma_addr, dst_dma_addr;
1578 int err;
1579
1580 if (is_dma_copy_aligned(chan->device, 1, 1, 1))
1581 return _scif_rma_list_dma_copy_aligned(work, chan);
1582
1583 src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
1584 dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
1585
1586 if (dst_cache_off == src_cache_off)
1587 return scif_rma_list_dma_copy_aligned(work, chan);
1588
1589 if (work->loopback)
1590 return scif_rma_list_cpu_copy(work);
1591 src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
1592 dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
1593 src_local = work->src_window->type == SCIF_WINDOW_SELF;
1594 dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
1595
1596 dst_local = dst_local;
1597 /* Allocate dma_completion cb */
1598 comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
1599 if (!comp_cb)
1600 goto error;
1601
1602 work->comp_cb = comp_cb;
1603 comp_cb->cb_cookie = comp_cb;
1604 comp_cb->dma_completion_func = &scif_rma_completion_cb;
1605
1606 if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
1607 comp_cb->is_cache = false;
1608 /* Allocate padding bytes to align to a cache line */
1609 temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
1610 GFP_KERNEL);
1611 if (!temp)
1612 goto free_comp_cb;
1613 comp_cb->temp_buf_to_free = temp;
1614 /* kmalloc(..) does not guarantee cache line alignment */
1615 if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
1616 temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
1617 } else {
1618 comp_cb->is_cache = true;
1619 temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
1620 if (!temp)
1621 goto free_comp_cb;
1622 comp_cb->temp_buf_to_free = temp;
1623 }
1624
1625 if (src_local) {
1626 temp += dst_cache_off;
1627 scif_rma_local_cpu_copy(work->src_offset, work->src_window,
1628 temp, work->len, true);
1629 } else {
1630 comp_cb->dst_window = work->dst_window;
1631 comp_cb->dst_offset = work->dst_offset;
1632 work->src_offset = work->src_offset - src_cache_off;
1633 comp_cb->len = work->len;
1634 work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
1635 comp_cb->header_padding = src_cache_off;
1636 }
1637 comp_cb->temp_buf = temp;
1638
1639 err = scif_map_single(&comp_cb->temp_phys, temp,
1640 work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
1641 if (err)
1642 goto free_temp_buf;
1643 comp_cb->sdev = work->remote_dev;
1644 if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
1645 goto free_temp_buf;
1646 if (!src_local)
1647 work->fence_type = SCIF_DMA_INTR;
1648 return 0;
1649free_temp_buf:
1650 if (comp_cb->is_cache)
1651 kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
1652 else
1653 kfree(comp_cb->temp_buf_to_free);
1654free_comp_cb:
1655 kfree(comp_cb);
1656error:
1657 return -ENOMEM;
1658}
1659
1660/**
1661 * scif_rma_copy:
1662 * @epd: end point descriptor.
1663 * @loffset: offset in local registered address space to/from which to copy
1664 * @addr: user virtual address to/from which to copy
1665 * @len: length of range to copy
1666 * @roffset: offset in remote registered address space to/from which to copy
1667 * @flags: flags
1668 * @dir: LOCAL->REMOTE or vice versa.
1669 * @last_chunk: true if this is the last chunk of a larger transfer
1670 *
1671 * Validate parameters, check if src/dst registered ranges requested for copy
1672 * are valid and initiate either CPU or DMA copy.
1673 */
1674static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
1675 size_t len, off_t roffset, int flags,
1676 enum scif_rma_dir dir, bool last_chunk)
1677{
1678 struct scif_endpt *ep = (struct scif_endpt *)epd;
1679 struct scif_rma_req remote_req;
1680 struct scif_rma_req req;
1681 struct scif_window *local_window = NULL;
1682 struct scif_window *remote_window = NULL;
1683 struct scif_copy_work copy_work;
1684 bool loopback;
1685 int err = 0;
1686 struct dma_chan *chan;
1687 struct scif_mmu_notif *mmn = NULL;
1688 bool cache = false;
1689 struct device *spdev;
1690
1691 err = scif_verify_epd(ep);
1692 if (err)
1693 return err;
1694
1695 if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
1696 SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
1697 return -EINVAL;
1698
1699 loopback = scifdev_self(ep->remote_dev) ? true : false;
1700 copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
1701 SCIF_DMA_POLL : 0;
1702 copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
1703
1704 /* Use CPU for Mgmt node <-> Mgmt node copies */
1705 if (loopback && scif_is_mgmt_node()) {
1706 flags |= SCIF_RMA_USECPU;
1707 copy_work.fence_type = 0x0;
1708 }
1709
1710 cache = scif_is_set_reg_cache(flags);
1711
1712 remote_req.out_window = &remote_window;
1713 remote_req.offset = roffset;
1714 remote_req.nr_bytes = len;
1715 /*
1716 * If transfer is from local to remote then the remote window
1717 * must be writeable and vice versa.
1718 */
1719 remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
1720 remote_req.type = SCIF_WINDOW_PARTIAL;
1721 remote_req.head = &ep->rma_info.remote_reg_list;
1722
1723 spdev = scif_get_peer_dev(ep->remote_dev);
1724 if (IS_ERR(spdev)) {
1725 err = PTR_ERR(spdev);
1726 return err;
1727 }
1728
1729 if (addr && cache) {
1730 mutex_lock(&ep->rma_info.mmn_lock);
1731 mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
1732 if (!mmn)
1733 scif_add_mmu_notifier(current->mm, ep);
1734 mutex_unlock(&ep->rma_info.mmn_lock);
1735 if (IS_ERR(mmn)) {
1736 scif_put_peer_dev(spdev);
1737 return PTR_ERR(mmn);
1738 }
1739 cache = cache && !scif_rma_tc_can_cache(ep, len);
1740 }
1741 mutex_lock(&ep->rma_info.rma_lock);
1742 if (addr) {
1743 req.out_window = &local_window;
1744 req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
1745 PAGE_SIZE);
1746 req.va_for_temp = addr & PAGE_MASK;
1747 req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
1748 VM_READ : VM_WRITE | VM_READ);
1749 /* Does a valid local window exist? */
1750 if (mmn) {
1751 spin_lock(&ep->rma_info.tc_lock);
1752 req.head = &mmn->tc_reg_list;
1753 err = scif_query_tcw(ep, &req);
1754 spin_unlock(&ep->rma_info.tc_lock);
1755 }
1756 if (!mmn || err) {
1757 err = scif_register_temp(epd, req.va_for_temp,
1758 req.nr_bytes, req.prot,
1759 &loffset, &local_window);
1760 if (err) {
1761 mutex_unlock(&ep->rma_info.rma_lock);
1762 goto error;
1763 }
1764 if (!cache)
1765 goto skip_cache;
1766 atomic_inc(&ep->rma_info.tcw_refcount);
1767 atomic_add_return(local_window->nr_pages,
1768 &ep->rma_info.tcw_total_pages);
1769 if (mmn) {
1770 spin_lock(&ep->rma_info.tc_lock);
1771 scif_insert_tcw(local_window,
1772 &mmn->tc_reg_list);
1773 spin_unlock(&ep->rma_info.tc_lock);
1774 }
1775 }
1776skip_cache:
1777 loffset = local_window->offset +
1778 (addr - local_window->va_for_temp);
1779 } else {
1780 req.out_window = &local_window;
1781 req.offset = loffset;
1782 /*
1783 * If transfer is from local to remote then the self window
1784 * must be readable and vice versa.
1785 */
1786 req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
1787 req.nr_bytes = len;
1788 req.type = SCIF_WINDOW_PARTIAL;
1789 req.head = &ep->rma_info.reg_list;
1790 /* Does a valid local window exist? */
1791 err = scif_query_window(&req);
1792 if (err) {
1793 mutex_unlock(&ep->rma_info.rma_lock);
1794 goto error;
1795 }
1796 }
1797
1798 /* Does a valid remote window exist? */
1799 err = scif_query_window(&remote_req);
1800 if (err) {
1801 mutex_unlock(&ep->rma_info.rma_lock);
1802 goto error;
1803 }
1804
1805 /*
1806 * Prepare copy_work for submitting work to the DMA kernel thread
1807 * or CPU copy routine.
1808 */
1809 copy_work.len = len;
1810 copy_work.loopback = loopback;
1811 copy_work.remote_dev = ep->remote_dev;
1812 if (dir == SCIF_LOCAL_TO_REMOTE) {
1813 copy_work.src_offset = loffset;
1814 copy_work.src_window = local_window;
1815 copy_work.dst_offset = roffset;
1816 copy_work.dst_window = remote_window;
1817 } else {
1818 copy_work.src_offset = roffset;
1819 copy_work.src_window = remote_window;
1820 copy_work.dst_offset = loffset;
1821 copy_work.dst_window = local_window;
1822 }
1823
1824 if (flags & SCIF_RMA_USECPU) {
1825 scif_rma_list_cpu_copy(&copy_work);
1826 } else {
1827 chan = ep->rma_info.dma_chan;
1828 err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
1829 chan, loffset);
1830 }
1831 if (addr && !cache)
1832 atomic_inc(&ep->rma_info.tw_refcount);
1833
1834 mutex_unlock(&ep->rma_info.rma_lock);
1835
1836 if (last_chunk) {
1837 struct scif_dev *rdev = ep->remote_dev;
1838
1839 if (copy_work.fence_type == SCIF_DMA_POLL)
1840 err = scif_drain_dma_poll(rdev->sdev,
1841 ep->rma_info.dma_chan);
1842 else if (copy_work.fence_type == SCIF_DMA_INTR)
1843 err = scif_drain_dma_intr(rdev->sdev,
1844 ep->rma_info.dma_chan);
1845 }
1846
1847 if (addr && !cache)
1848 scif_queue_for_cleanup(local_window, &scif_info.rma);
1849 scif_put_peer_dev(spdev);
1850 return err;
1851error:
1852 if (err) {
1853 if (addr && local_window && !cache)
1854 scif_destroy_window(ep, local_window);
1855 dev_err(scif_info.mdev.this_device,
1856 "%s %d err %d len 0x%lx\n",
1857 __func__, __LINE__, err, len);
1858 }
1859 scif_put_peer_dev(spdev);
1860 return err;
1861}
1862
1863int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
1864 off_t roffset, int flags)
1865{
1866 int err;
1867
1868 dev_dbg(scif_info.mdev.this_device,
1869 "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
1870 epd, loffset, len, roffset, flags);
1871 if (scif_unaligned(loffset, roffset)) {
1872 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1873 err = scif_rma_copy(epd, loffset, 0x0,
1874 SCIF_MAX_UNALIGNED_BUF_SIZE,
1875 roffset, flags,
1876 SCIF_REMOTE_TO_LOCAL, false);
1877 if (err)
1878 goto readfrom_err;
1879 loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1880 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1881 len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1882 }
1883 }
1884 err = scif_rma_copy(epd, loffset, 0x0, len,
1885 roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
1886readfrom_err:
1887 return err;
1888}
1889EXPORT_SYMBOL_GPL(scif_readfrom);
1890
1891int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
1892 off_t roffset, int flags)
1893{
1894 int err;
1895
1896 dev_dbg(scif_info.mdev.this_device,
1897 "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
1898 epd, loffset, len, roffset, flags);
1899 if (scif_unaligned(loffset, roffset)) {
1900 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1901 err = scif_rma_copy(epd, loffset, 0x0,
1902 SCIF_MAX_UNALIGNED_BUF_SIZE,
1903 roffset, flags,
1904 SCIF_LOCAL_TO_REMOTE, false);
1905 if (err)
1906 goto writeto_err;
1907 loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1908 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1909 len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1910 }
1911 }
1912 err = scif_rma_copy(epd, loffset, 0x0, len,
1913 roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
1914writeto_err:
1915 return err;
1916}
1917EXPORT_SYMBOL_GPL(scif_writeto);
1918
1919int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
1920 off_t roffset, int flags)
1921{
1922 int err;
1923
1924 dev_dbg(scif_info.mdev.this_device,
1925 "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
1926 epd, addr, len, roffset, flags);
1927 if (scif_unaligned((off_t __force)addr, roffset)) {
1928 if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
1929 flags &= ~SCIF_RMA_USECACHE;
1930
1931 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1932 err = scif_rma_copy(epd, 0, (u64)addr,
1933 SCIF_MAX_UNALIGNED_BUF_SIZE,
1934 roffset, flags,
1935 SCIF_REMOTE_TO_LOCAL, false);
1936 if (err)
1937 goto vreadfrom_err;
1938 addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
1939 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1940 len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1941 }
1942 }
1943 err = scif_rma_copy(epd, 0, (u64)addr, len,
1944 roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
1945vreadfrom_err:
1946 return err;
1947}
1948EXPORT_SYMBOL_GPL(scif_vreadfrom);
1949
1950int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
1951 off_t roffset, int flags)
1952{
1953 int err;
1954
1955 dev_dbg(scif_info.mdev.this_device,
1956 "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
1957 epd, addr, len, roffset, flags);
1958 if (scif_unaligned((off_t __force)addr, roffset)) {
1959 if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
1960 flags &= ~SCIF_RMA_USECACHE;
1961
1962 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1963 err = scif_rma_copy(epd, 0, (u64)addr,
1964 SCIF_MAX_UNALIGNED_BUF_SIZE,
1965 roffset, flags,
1966 SCIF_LOCAL_TO_REMOTE, false);
1967 if (err)
1968 goto vwriteto_err;
1969 addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
1970 roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1971 len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1972 }
1973 }
1974 err = scif_rma_copy(epd, 0, (u64)addr, len,
1975 roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
1976vwriteto_err:
1977 return err;
1978}
1979EXPORT_SYMBOL_GPL(scif_vwriteto);
diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c
index b4bfbb08a8e3..00e5d6d66e7b 100644
--- a/drivers/misc/mic/scif/scif_epd.c
+++ b/drivers/misc/mic/scif/scif_epd.c
@@ -65,14 +65,14 @@ void scif_teardown_ep(void *endpt)
65void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held) 65void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held)
66{ 66{
67 if (!eplock_held) 67 if (!eplock_held)
68 spin_lock(&scif_info.eplock); 68 mutex_lock(&scif_info.eplock);
69 spin_lock(&ep->lock); 69 spin_lock(&ep->lock);
70 ep->state = SCIFEP_ZOMBIE; 70 ep->state = SCIFEP_ZOMBIE;
71 spin_unlock(&ep->lock); 71 spin_unlock(&ep->lock);
72 list_add_tail(&ep->list, &scif_info.zombie); 72 list_add_tail(&ep->list, &scif_info.zombie);
73 scif_info.nr_zombies++; 73 scif_info.nr_zombies++;
74 if (!eplock_held) 74 if (!eplock_held)
75 spin_unlock(&scif_info.eplock); 75 mutex_unlock(&scif_info.eplock);
76 schedule_work(&scif_info.misc_work); 76 schedule_work(&scif_info.misc_work);
77} 77}
78 78
@@ -81,16 +81,15 @@ static struct scif_endpt *scif_find_listen_ep(u16 port)
81 struct scif_endpt *ep = NULL; 81 struct scif_endpt *ep = NULL;
82 struct list_head *pos, *tmpq; 82 struct list_head *pos, *tmpq;
83 83
84 spin_lock(&scif_info.eplock); 84 mutex_lock(&scif_info.eplock);
85 list_for_each_safe(pos, tmpq, &scif_info.listen) { 85 list_for_each_safe(pos, tmpq, &scif_info.listen) {
86 ep = list_entry(pos, struct scif_endpt, list); 86 ep = list_entry(pos, struct scif_endpt, list);
87 if (ep->port.port == port) { 87 if (ep->port.port == port) {
88 spin_lock(&ep->lock); 88 mutex_unlock(&scif_info.eplock);
89 spin_unlock(&scif_info.eplock);
90 return ep; 89 return ep;
91 } 90 }
92 } 91 }
93 spin_unlock(&scif_info.eplock); 92 mutex_unlock(&scif_info.eplock);
94 return NULL; 93 return NULL;
95} 94}
96 95
@@ -99,14 +98,17 @@ void scif_cleanup_zombie_epd(void)
99 struct list_head *pos, *tmpq; 98 struct list_head *pos, *tmpq;
100 struct scif_endpt *ep; 99 struct scif_endpt *ep;
101 100
102 spin_lock(&scif_info.eplock); 101 mutex_lock(&scif_info.eplock);
103 list_for_each_safe(pos, tmpq, &scif_info.zombie) { 102 list_for_each_safe(pos, tmpq, &scif_info.zombie) {
104 ep = list_entry(pos, struct scif_endpt, list); 103 ep = list_entry(pos, struct scif_endpt, list);
105 list_del(pos); 104 if (scif_rma_ep_can_uninit(ep)) {
106 scif_info.nr_zombies--; 105 list_del(pos);
107 kfree(ep); 106 scif_info.nr_zombies--;
107 put_iova_domain(&ep->rma_info.iovad);
108 kfree(ep);
109 }
108 } 110 }
109 spin_unlock(&scif_info.eplock); 111 mutex_unlock(&scif_info.eplock);
110} 112}
111 113
112/** 114/**
@@ -137,6 +139,8 @@ void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg)
137 if (!ep) 139 if (!ep)
138 /* Send reject due to no listening ports */ 140 /* Send reject due to no listening ports */
139 goto conreq_sendrej_free; 141 goto conreq_sendrej_free;
142 else
143 spin_lock(&ep->lock);
140 144
141 if (ep->backlog <= ep->conreqcnt) { 145 if (ep->backlog <= ep->conreqcnt) {
142 /* Send reject due to too many pending requests */ 146 /* Send reject due to too many pending requests */
diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h
index 331322a25213..1771d7a9b8d0 100644
--- a/drivers/misc/mic/scif/scif_epd.h
+++ b/drivers/misc/mic/scif/scif_epd.h
@@ -96,7 +96,11 @@ struct scif_endpt_qp_info {
96 * @conn_port: Connection port 96 * @conn_port: Connection port
97 * @conn_err: Errors during connection 97 * @conn_err: Errors during connection
98 * @conn_async_state: Async connection 98 * @conn_async_state: Async connection
99 * @conn_pend_wq: Used by poll while waiting for incoming connections
99 * @conn_list: List of async connection requests 100 * @conn_list: List of async connection requests
101 * @rma_info: Information for triggering SCIF RMA and DMA operations
102 * @mmu_list: link to list of MMU notifier cleanup work
103 * @anon: anonymous file for use in kernel mode scif poll
100 */ 104 */
101struct scif_endpt { 105struct scif_endpt {
102 enum scif_epd_state state; 106 enum scif_epd_state state;
@@ -125,7 +129,11 @@ struct scif_endpt {
125 struct scif_port_id conn_port; 129 struct scif_port_id conn_port;
126 int conn_err; 130 int conn_err;
127 int conn_async_state; 131 int conn_async_state;
132 wait_queue_head_t conn_pend_wq;
128 struct list_head conn_list; 133 struct list_head conn_list;
134 struct scif_endpt_rma_info rma_info;
135 struct list_head mmu_list;
136 struct file *anon;
129}; 137};
130 138
131static inline int scifdev_alive(struct scif_endpt *ep) 139static inline int scifdev_alive(struct scif_endpt *ep)
@@ -133,6 +141,43 @@ static inline int scifdev_alive(struct scif_endpt *ep)
133 return _scifdev_alive(ep->remote_dev); 141 return _scifdev_alive(ep->remote_dev);
134} 142}
135 143
144/*
145 * scif_verify_epd:
146 * ep: SCIF endpoint
147 *
148 * Checks several generic error conditions and returns the
149 * appropriate error.
150 */
151static inline int scif_verify_epd(struct scif_endpt *ep)
152{
153 if (ep->state == SCIFEP_DISCONNECTED)
154 return -ECONNRESET;
155
156 if (ep->state != SCIFEP_CONNECTED)
157 return -ENOTCONN;
158
159 if (!scifdev_alive(ep))
160 return -ENODEV;
161
162 return 0;
163}
164
165static inline int scif_anon_inode_getfile(scif_epd_t epd)
166{
167 epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0);
168 if (IS_ERR(epd->anon))
169 return PTR_ERR(epd->anon);
170 return 0;
171}
172
173static inline void scif_anon_inode_fput(scif_epd_t epd)
174{
175 if (epd->anon) {
176 fput(epd->anon);
177 epd->anon = NULL;
178 }
179}
180
136void scif_cleanup_zombie_epd(void); 181void scif_cleanup_zombie_epd(void);
137void scif_teardown_ep(void *endpt); 182void scif_teardown_ep(void *endpt);
138void scif_cleanup_ep_qp(struct scif_endpt *ep); 183void scif_cleanup_ep_qp(struct scif_endpt *ep);
@@ -157,4 +202,9 @@ void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg);
157void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg); 202void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg);
158int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block); 203int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block);
159int __scif_flush(scif_epd_t epd); 204int __scif_flush(scif_epd_t epd);
205int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
206unsigned int __scif_pollfd(struct file *f, poll_table *wait,
207 struct scif_endpt *ep);
208int __scif_pin_pages(void *addr, size_t len, int *out_prot,
209 int map_flags, scif_pinned_pages_t *pages);
160#endif /* SCIF_EPD_H */ 210#endif /* SCIF_EPD_H */
diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c
index eccf7e7135f9..f7e826142a72 100644
--- a/drivers/misc/mic/scif/scif_fd.c
+++ b/drivers/misc/mic/scif/scif_fd.c
@@ -34,6 +34,20 @@ static int scif_fdclose(struct inode *inode, struct file *f)
34 return scif_close(priv); 34 return scif_close(priv);
35} 35}
36 36
37static int scif_fdmmap(struct file *f, struct vm_area_struct *vma)
38{
39 struct scif_endpt *priv = f->private_data;
40
41 return scif_mmap(vma, priv);
42}
43
44static unsigned int scif_fdpoll(struct file *f, poll_table *wait)
45{
46 struct scif_endpt *priv = f->private_data;
47
48 return __scif_pollfd(f, wait, priv);
49}
50
37static int scif_fdflush(struct file *f, fl_owner_t id) 51static int scif_fdflush(struct file *f, fl_owner_t id)
38{ 52{
39 struct scif_endpt *ep = f->private_data; 53 struct scif_endpt *ep = f->private_data;
@@ -140,12 +154,12 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
140 * Add to the list of user mode eps where the second half 154 * Add to the list of user mode eps where the second half
141 * of the accept is not yet completed. 155 * of the accept is not yet completed.
142 */ 156 */
143 spin_lock(&scif_info.eplock); 157 mutex_lock(&scif_info.eplock);
144 list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept); 158 list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept);
145 list_add_tail(&((*ep)->liacceptlist), &priv->li_accept); 159 list_add_tail(&((*ep)->liacceptlist), &priv->li_accept);
146 (*ep)->listenep = priv; 160 (*ep)->listenep = priv;
147 priv->acceptcnt++; 161 priv->acceptcnt++;
148 spin_unlock(&scif_info.eplock); 162 mutex_unlock(&scif_info.eplock);
149 163
150 return 0; 164 return 0;
151 } 165 }
@@ -163,7 +177,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
163 return -EFAULT; 177 return -EFAULT;
164 178
165 /* Remove form the user accept queue */ 179 /* Remove form the user accept queue */
166 spin_lock(&scif_info.eplock); 180 mutex_lock(&scif_info.eplock);
167 list_for_each_safe(pos, tmpq, &scif_info.uaccept) { 181 list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
168 tmpep = list_entry(pos, 182 tmpep = list_entry(pos,
169 struct scif_endpt, miacceptlist); 183 struct scif_endpt, miacceptlist);
@@ -175,7 +189,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
175 } 189 }
176 190
177 if (!fep) { 191 if (!fep) {
178 spin_unlock(&scif_info.eplock); 192 mutex_unlock(&scif_info.eplock);
179 return -ENOENT; 193 return -ENOENT;
180 } 194 }
181 195
@@ -190,9 +204,10 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg)
190 } 204 }
191 } 205 }
192 206
193 spin_unlock(&scif_info.eplock); 207 mutex_unlock(&scif_info.eplock);
194 208
195 /* Free the resources automatically created from the open. */ 209 /* Free the resources automatically created from the open. */
210 scif_anon_inode_fput(priv);
196 scif_teardown_ep(priv); 211 scif_teardown_ep(priv);
197 scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD); 212 scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD);
198 f->private_data = newep; 213 f->private_data = newep;
@@ -290,6 +305,157 @@ getnodes_err1:
290getnodes_err2: 305getnodes_err2:
291 return err; 306 return err;
292 } 307 }
308 case SCIF_REG:
309 {
310 struct scif_endpt *priv = f->private_data;
311 struct scifioctl_reg reg;
312 off_t ret;
313
314 if (copy_from_user(&reg, argp, sizeof(reg))) {
315 err = -EFAULT;
316 goto reg_err;
317 }
318 if (reg.flags & SCIF_MAP_KERNEL) {
319 err = -EINVAL;
320 goto reg_err;
321 }
322 ret = scif_register(priv, (void *)reg.addr, reg.len,
323 reg.offset, reg.prot, reg.flags);
324 if (ret < 0) {
325 err = (int)ret;
326 goto reg_err;
327 }
328
329 if (copy_to_user(&((struct scifioctl_reg __user *)argp)
330 ->out_offset, &ret, sizeof(reg.out_offset))) {
331 err = -EFAULT;
332 goto reg_err;
333 }
334 err = 0;
335reg_err:
336 scif_err_debug(err, "scif_register");
337 return err;
338 }
339 case SCIF_UNREG:
340 {
341 struct scif_endpt *priv = f->private_data;
342 struct scifioctl_unreg unreg;
343
344 if (copy_from_user(&unreg, argp, sizeof(unreg))) {
345 err = -EFAULT;
346 goto unreg_err;
347 }
348 err = scif_unregister(priv, unreg.offset, unreg.len);
349unreg_err:
350 scif_err_debug(err, "scif_unregister");
351 return err;
352 }
353 case SCIF_READFROM:
354 {
355 struct scif_endpt *priv = f->private_data;
356 struct scifioctl_copy copy;
357
358 if (copy_from_user(&copy, argp, sizeof(copy))) {
359 err = -EFAULT;
360 goto readfrom_err;
361 }
362 err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset,
363 copy.flags);
364readfrom_err:
365 scif_err_debug(err, "scif_readfrom");
366 return err;
367 }
368 case SCIF_WRITETO:
369 {
370 struct scif_endpt *priv = f->private_data;
371 struct scifioctl_copy copy;
372
373 if (copy_from_user(&copy, argp, sizeof(copy))) {
374 err = -EFAULT;
375 goto writeto_err;
376 }
377 err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset,
378 copy.flags);
379writeto_err:
380 scif_err_debug(err, "scif_writeto");
381 return err;
382 }
383 case SCIF_VREADFROM:
384 {
385 struct scif_endpt *priv = f->private_data;
386 struct scifioctl_copy copy;
387
388 if (copy_from_user(&copy, argp, sizeof(copy))) {
389 err = -EFAULT;
390 goto vreadfrom_err;
391 }
392 err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len,
393 copy.roffset, copy.flags);
394vreadfrom_err:
395 scif_err_debug(err, "scif_vreadfrom");
396 return err;
397 }
398 case SCIF_VWRITETO:
399 {
400 struct scif_endpt *priv = f->private_data;
401 struct scifioctl_copy copy;
402
403 if (copy_from_user(&copy, argp, sizeof(copy))) {
404 err = -EFAULT;
405 goto vwriteto_err;
406 }
407 err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len,
408 copy.roffset, copy.flags);
409vwriteto_err:
410 scif_err_debug(err, "scif_vwriteto");
411 return err;
412 }
413 case SCIF_FENCE_MARK:
414 {
415 struct scif_endpt *priv = f->private_data;
416 struct scifioctl_fence_mark mark;
417 int tmp_mark = 0;
418
419 if (copy_from_user(&mark, argp, sizeof(mark))) {
420 err = -EFAULT;
421 goto fence_mark_err;
422 }
423 err = scif_fence_mark(priv, mark.flags, &tmp_mark);
424 if (err)
425 goto fence_mark_err;
426 if (copy_to_user((void __user *)mark.mark, &tmp_mark,
427 sizeof(tmp_mark))) {
428 err = -EFAULT;
429 goto fence_mark_err;
430 }
431fence_mark_err:
432 scif_err_debug(err, "scif_fence_mark");
433 return err;
434 }
435 case SCIF_FENCE_WAIT:
436 {
437 struct scif_endpt *priv = f->private_data;
438
439 err = scif_fence_wait(priv, arg);
440 scif_err_debug(err, "scif_fence_wait");
441 return err;
442 }
443 case SCIF_FENCE_SIGNAL:
444 {
445 struct scif_endpt *priv = f->private_data;
446 struct scifioctl_fence_signal signal;
447
448 if (copy_from_user(&signal, argp, sizeof(signal))) {
449 err = -EFAULT;
450 goto fence_signal_err;
451 }
452
453 err = scif_fence_signal(priv, signal.loff, signal.lval,
454 signal.roff, signal.rval, signal.flags);
455fence_signal_err:
456 scif_err_debug(err, "scif_fence_signal");
457 return err;
458 }
293 } 459 }
294 return -EINVAL; 460 return -EINVAL;
295} 461}
@@ -298,6 +464,8 @@ const struct file_operations scif_fops = {
298 .open = scif_fdopen, 464 .open = scif_fdopen,
299 .release = scif_fdclose, 465 .release = scif_fdclose,
300 .unlocked_ioctl = scif_fdioctl, 466 .unlocked_ioctl = scif_fdioctl,
467 .mmap = scif_fdmmap,
468 .poll = scif_fdpoll,
301 .flush = scif_fdflush, 469 .flush = scif_fdflush,
302 .owner = THIS_MODULE, 470 .owner = THIS_MODULE,
303}; 471};
diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c
new file mode 100644
index 000000000000..7f2c96f57066
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_fence.c
@@ -0,0 +1,771 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18
19#include "scif_main.h"
20
21/**
22 * scif_recv_mark: Handle SCIF_MARK request
23 * @msg: Interrupt message
24 *
25 * The peer has requested a mark.
26 */
27void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg)
28{
29 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
30 int mark, err;
31
32 err = _scif_fence_mark(ep, &mark);
33 if (err)
34 msg->uop = SCIF_MARK_NACK;
35 else
36 msg->uop = SCIF_MARK_ACK;
37 msg->payload[0] = ep->remote_ep;
38 msg->payload[2] = mark;
39 scif_nodeqp_send(ep->remote_dev, msg);
40}
41
42/**
43 * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
44 * @msg: Interrupt message
45 *
46 * The peer has responded to a SCIF_MARK message.
47 */
48void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg)
49{
50 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
51 struct scif_fence_info *fence_req =
52 (struct scif_fence_info *)msg->payload[1];
53
54 mutex_lock(&ep->rma_info.rma_lock);
55 if (msg->uop == SCIF_MARK_ACK) {
56 fence_req->state = OP_COMPLETED;
57 fence_req->dma_mark = (int)msg->payload[2];
58 } else {
59 fence_req->state = OP_FAILED;
60 }
61 mutex_unlock(&ep->rma_info.rma_lock);
62 complete(&fence_req->comp);
63}
64
65/**
66 * scif_recv_wait: Handle SCIF_WAIT request
67 * @msg: Interrupt message
68 *
69 * The peer has requested waiting on a fence.
70 */
71void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg)
72{
73 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
74 struct scif_remote_fence_info *fence;
75
76 /*
77 * Allocate structure for remote fence information and
78 * send a NACK if the allocation failed. The peer will
79 * return ENOMEM upon receiving a NACK.
80 */
81 fence = kmalloc(sizeof(*fence), GFP_KERNEL);
82 if (!fence) {
83 msg->payload[0] = ep->remote_ep;
84 msg->uop = SCIF_WAIT_NACK;
85 scif_nodeqp_send(ep->remote_dev, msg);
86 return;
87 }
88
89 /* Prepare the fence request */
90 memcpy(&fence->msg, msg, sizeof(struct scifmsg));
91 INIT_LIST_HEAD(&fence->list);
92
93 /* Insert to the global remote fence request list */
94 mutex_lock(&scif_info.fencelock);
95 atomic_inc(&ep->rma_info.fence_refcount);
96 list_add_tail(&fence->list, &scif_info.fence);
97 mutex_unlock(&scif_info.fencelock);
98
99 schedule_work(&scif_info.misc_work);
100}
101
102/**
103 * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
104 * @msg: Interrupt message
105 *
106 * The peer has responded to a SCIF_WAIT message.
107 */
108void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg)
109{
110 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
111 struct scif_fence_info *fence_req =
112 (struct scif_fence_info *)msg->payload[1];
113
114 mutex_lock(&ep->rma_info.rma_lock);
115 if (msg->uop == SCIF_WAIT_ACK)
116 fence_req->state = OP_COMPLETED;
117 else
118 fence_req->state = OP_FAILED;
119 mutex_unlock(&ep->rma_info.rma_lock);
120 complete(&fence_req->comp);
121}
122
123/**
124 * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request
125 * @msg: Interrupt message
126 *
127 * The peer has requested a signal on a local offset.
128 */
129void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg)
130{
131 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
132 int err;
133
134 err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
135 SCIF_WINDOW_SELF);
136 if (err)
137 msg->uop = SCIF_SIG_NACK;
138 else
139 msg->uop = SCIF_SIG_ACK;
140 msg->payload[0] = ep->remote_ep;
141 scif_nodeqp_send(ep->remote_dev, msg);
142}
143
144/**
145 * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request
146 * @msg: Interrupt message
147 *
148 * The peer has requested a signal on a remote offset.
149 */
150void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg)
151{
152 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
153 int err;
154
155 err = scif_prog_signal(ep, msg->payload[1], msg->payload[2],
156 SCIF_WINDOW_PEER);
157 if (err)
158 msg->uop = SCIF_SIG_NACK;
159 else
160 msg->uop = SCIF_SIG_ACK;
161 msg->payload[0] = ep->remote_ep;
162 scif_nodeqp_send(ep->remote_dev, msg);
163}
164
165/**
166 * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages.
167 * @msg: Interrupt message
168 *
169 * The peer has responded to a signal request.
170 */
171void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg)
172{
173 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
174 struct scif_fence_info *fence_req =
175 (struct scif_fence_info *)msg->payload[3];
176
177 mutex_lock(&ep->rma_info.rma_lock);
178 if (msg->uop == SCIF_SIG_ACK)
179 fence_req->state = OP_COMPLETED;
180 else
181 fence_req->state = OP_FAILED;
182 mutex_unlock(&ep->rma_info.rma_lock);
183 complete(&fence_req->comp);
184}
185
186static inline void *scif_get_local_va(off_t off, struct scif_window *window)
187{
188 struct page **pages = window->pinned_pages->pages;
189 int page_nr = (off - window->offset) >> PAGE_SHIFT;
190 off_t page_off = off & ~PAGE_MASK;
191
192 return page_address(pages[page_nr]) + page_off;
193}
194
195static void scif_prog_signal_cb(void *arg)
196{
197 struct scif_status *status = arg;
198
199 dma_pool_free(status->ep->remote_dev->signal_pool, status,
200 status->src_dma_addr);
201}
202
203static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val)
204{
205 struct scif_endpt *ep = (struct scif_endpt *)epd;
206 struct dma_chan *chan = ep->rma_info.dma_chan;
207 struct dma_device *ddev = chan->device;
208 bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1);
209 struct dma_async_tx_descriptor *tx;
210 struct scif_status *status = NULL;
211 dma_addr_t src;
212 dma_cookie_t cookie;
213 int err;
214
215 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
216 if (!tx) {
217 err = -ENOMEM;
218 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
219 __func__, __LINE__, err);
220 goto alloc_fail;
221 }
222 cookie = tx->tx_submit(tx);
223 if (dma_submit_error(cookie)) {
224 err = (int)cookie;
225 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
226 __func__, __LINE__, err);
227 goto alloc_fail;
228 }
229 dma_async_issue_pending(chan);
230 if (x100) {
231 /*
232 * For X100 use the status descriptor to write the value to
233 * the destination.
234 */
235 tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0);
236 } else {
237 status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL,
238 &src);
239 if (!status) {
240 err = -ENOMEM;
241 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
242 __func__, __LINE__, err);
243 goto alloc_fail;
244 }
245 status->val = val;
246 status->src_dma_addr = src;
247 status->ep = ep;
248 src += offsetof(struct scif_status, val);
249 tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val),
250 DMA_PREP_INTERRUPT);
251 }
252 if (!tx) {
253 err = -ENOMEM;
254 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
255 __func__, __LINE__, err);
256 goto dma_fail;
257 }
258 if (!x100) {
259 tx->callback = scif_prog_signal_cb;
260 tx->callback_param = status;
261 }
262 cookie = tx->tx_submit(tx);
263 if (dma_submit_error(cookie)) {
264 err = -EIO;
265 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
266 __func__, __LINE__, err);
267 goto dma_fail;
268 }
269 dma_async_issue_pending(chan);
270 return 0;
271dma_fail:
272 if (!x100)
273 dma_pool_free(ep->remote_dev->signal_pool, status,
274 status->src_dma_addr);
275alloc_fail:
276 return err;
277}
278
279/*
280 * scif_prog_signal:
281 * @epd - Endpoint Descriptor
282 * @offset - registered address to write @val to
283 * @val - Value to be written at @offset
284 * @type - Type of the window.
285 *
286 * Arrange to write a value to the registered offset after ensuring that the
287 * offset provided is indeed valid.
288 */
289int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
290 enum scif_window_type type)
291{
292 struct scif_endpt *ep = (struct scif_endpt *)epd;
293 struct scif_window *window = NULL;
294 struct scif_rma_req req;
295 dma_addr_t dst_dma_addr;
296 int err;
297
298 mutex_lock(&ep->rma_info.rma_lock);
299 req.out_window = &window;
300 req.offset = offset;
301 req.nr_bytes = sizeof(u64);
302 req.prot = SCIF_PROT_WRITE;
303 req.type = SCIF_WINDOW_SINGLE;
304 if (type == SCIF_WINDOW_SELF)
305 req.head = &ep->rma_info.reg_list;
306 else
307 req.head = &ep->rma_info.remote_reg_list;
308 /* Does a valid window exist? */
309 err = scif_query_window(&req);
310 if (err) {
311 dev_err(scif_info.mdev.this_device,
312 "%s %d err %d\n", __func__, __LINE__, err);
313 goto unlock_ret;
314 }
315
316 if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) {
317 u64 *dst_virt;
318
319 if (type == SCIF_WINDOW_SELF)
320 dst_virt = scif_get_local_va(offset, window);
321 else
322 dst_virt =
323 scif_get_local_va(offset, (struct scif_window *)
324 window->peer_window);
325 *dst_virt = val;
326 } else {
327 dst_dma_addr = __scif_off_to_dma_addr(window, offset);
328 err = _scif_prog_signal(epd, dst_dma_addr, val);
329 }
330unlock_ret:
331 mutex_unlock(&ep->rma_info.rma_lock);
332 return err;
333}
334
335static int _scif_fence_wait(scif_epd_t epd, int mark)
336{
337 struct scif_endpt *ep = (struct scif_endpt *)epd;
338 dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE;
339 int err;
340
341 /* Wait for DMA callback in scif_fence_mark_cb(..) */
342 err = wait_event_interruptible_timeout(ep->rma_info.markwq,
343 dma_async_is_tx_complete(
344 ep->rma_info.dma_chan,
345 cookie, NULL, NULL) ==
346 DMA_COMPLETE,
347 SCIF_NODE_ALIVE_TIMEOUT);
348 if (!err)
349 err = -ETIMEDOUT;
350 else if (err > 0)
351 err = 0;
352 return err;
353}
354
355/**
356 * scif_rma_handle_remote_fences:
357 *
358 * This routine services remote fence requests.
359 */
360void scif_rma_handle_remote_fences(void)
361{
362 struct list_head *item, *tmp;
363 struct scif_remote_fence_info *fence;
364 struct scif_endpt *ep;
365 int mark, err;
366
367 might_sleep();
368 mutex_lock(&scif_info.fencelock);
369 list_for_each_safe(item, tmp, &scif_info.fence) {
370 fence = list_entry(item, struct scif_remote_fence_info,
371 list);
372 /* Remove fence from global list */
373 list_del(&fence->list);
374
375 /* Initiate the fence operation */
376 ep = (struct scif_endpt *)fence->msg.payload[0];
377 mark = fence->msg.payload[2];
378 err = _scif_fence_wait(ep, mark);
379 if (err)
380 fence->msg.uop = SCIF_WAIT_NACK;
381 else
382 fence->msg.uop = SCIF_WAIT_ACK;
383 fence->msg.payload[0] = ep->remote_ep;
384 scif_nodeqp_send(ep->remote_dev, &fence->msg);
385 kfree(fence);
386 if (!atomic_sub_return(1, &ep->rma_info.fence_refcount))
387 schedule_work(&scif_info.misc_work);
388 }
389 mutex_unlock(&scif_info.fencelock);
390}
391
392static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark)
393{
394 int err;
395 struct scifmsg msg;
396 struct scif_fence_info *fence_req;
397 struct scif_endpt *ep = (struct scif_endpt *)epd;
398
399 fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
400 if (!fence_req) {
401 err = -ENOMEM;
402 goto error;
403 }
404
405 fence_req->state = OP_IN_PROGRESS;
406 init_completion(&fence_req->comp);
407
408 msg.src = ep->port;
409 msg.uop = uop;
410 msg.payload[0] = ep->remote_ep;
411 msg.payload[1] = (u64)fence_req;
412 if (uop == SCIF_WAIT)
413 msg.payload[2] = mark;
414 spin_lock(&ep->lock);
415 if (ep->state == SCIFEP_CONNECTED)
416 err = scif_nodeqp_send(ep->remote_dev, &msg);
417 else
418 err = -ENOTCONN;
419 spin_unlock(&ep->lock);
420 if (err)
421 goto error_free;
422retry:
423 /* Wait for a SCIF_WAIT_(N)ACK message */
424 err = wait_for_completion_timeout(&fence_req->comp,
425 SCIF_NODE_ALIVE_TIMEOUT);
426 if (!err && scifdev_alive(ep))
427 goto retry;
428 if (!err)
429 err = -ENODEV;
430 if (err > 0)
431 err = 0;
432 mutex_lock(&ep->rma_info.rma_lock);
433 if (err < 0) {
434 if (fence_req->state == OP_IN_PROGRESS)
435 fence_req->state = OP_FAILED;
436 }
437 if (fence_req->state == OP_FAILED && !err)
438 err = -ENOMEM;
439 if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED)
440 *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
441 mutex_unlock(&ep->rma_info.rma_lock);
442error_free:
443 kfree(fence_req);
444error:
445 return err;
446}
447
448/**
449 * scif_send_fence_mark:
450 * @epd: end point descriptor.
451 * @out_mark: Output DMA mark reported by peer.
452 *
453 * Send a remote fence mark request.
454 */
455static int scif_send_fence_mark(scif_epd_t epd, int *out_mark)
456{
457 return _scif_send_fence(epd, SCIF_MARK, 0, out_mark);
458}
459
460/**
461 * scif_send_fence_wait:
462 * @epd: end point descriptor.
463 * @mark: DMA mark to wait for.
464 *
465 * Send a remote fence wait request.
466 */
467static int scif_send_fence_wait(scif_epd_t epd, int mark)
468{
469 return _scif_send_fence(epd, SCIF_WAIT, mark, NULL);
470}
471
472static int _scif_send_fence_signal_wait(struct scif_endpt *ep,
473 struct scif_fence_info *fence_req)
474{
475 int err;
476
477retry:
478 /* Wait for a SCIF_SIG_(N)ACK message */
479 err = wait_for_completion_timeout(&fence_req->comp,
480 SCIF_NODE_ALIVE_TIMEOUT);
481 if (!err && scifdev_alive(ep))
482 goto retry;
483 if (!err)
484 err = -ENODEV;
485 if (err > 0)
486 err = 0;
487 if (err < 0) {
488 mutex_lock(&ep->rma_info.rma_lock);
489 if (fence_req->state == OP_IN_PROGRESS)
490 fence_req->state = OP_FAILED;
491 mutex_unlock(&ep->rma_info.rma_lock);
492 }
493 if (fence_req->state == OP_FAILED && !err)
494 err = -ENXIO;
495 return err;
496}
497
498/**
499 * scif_send_fence_signal:
500 * @epd - endpoint descriptor
501 * @loff - local offset
502 * @lval - local value to write to loffset
503 * @roff - remote offset
504 * @rval - remote value to write to roffset
505 * @flags - flags
506 *
507 * Sends a remote fence signal request
508 */
509static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval,
510 off_t loff, u64 lval, int flags)
511{
512 int err = 0;
513 struct scifmsg msg;
514 struct scif_fence_info *fence_req;
515 struct scif_endpt *ep = (struct scif_endpt *)epd;
516
517 fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL);
518 if (!fence_req) {
519 err = -ENOMEM;
520 goto error;
521 }
522
523 fence_req->state = OP_IN_PROGRESS;
524 init_completion(&fence_req->comp);
525 msg.src = ep->port;
526 if (flags & SCIF_SIGNAL_LOCAL) {
527 msg.uop = SCIF_SIG_LOCAL;
528 msg.payload[0] = ep->remote_ep;
529 msg.payload[1] = roff;
530 msg.payload[2] = rval;
531 msg.payload[3] = (u64)fence_req;
532 spin_lock(&ep->lock);
533 if (ep->state == SCIFEP_CONNECTED)
534 err = scif_nodeqp_send(ep->remote_dev, &msg);
535 else
536 err = -ENOTCONN;
537 spin_unlock(&ep->lock);
538 if (err)
539 goto error_free;
540 err = _scif_send_fence_signal_wait(ep, fence_req);
541 if (err)
542 goto error_free;
543 }
544 fence_req->state = OP_IN_PROGRESS;
545
546 if (flags & SCIF_SIGNAL_REMOTE) {
547 msg.uop = SCIF_SIG_REMOTE;
548 msg.payload[0] = ep->remote_ep;
549 msg.payload[1] = loff;
550 msg.payload[2] = lval;
551 msg.payload[3] = (u64)fence_req;
552 spin_lock(&ep->lock);
553 if (ep->state == SCIFEP_CONNECTED)
554 err = scif_nodeqp_send(ep->remote_dev, &msg);
555 else
556 err = -ENOTCONN;
557 spin_unlock(&ep->lock);
558 if (err)
559 goto error_free;
560 err = _scif_send_fence_signal_wait(ep, fence_req);
561 }
562error_free:
563 kfree(fence_req);
564error:
565 return err;
566}
567
568static void scif_fence_mark_cb(void *arg)
569{
570 struct scif_endpt *ep = (struct scif_endpt *)arg;
571
572 wake_up_interruptible(&ep->rma_info.markwq);
573 atomic_dec(&ep->rma_info.fence_refcount);
574}
575
576/*
577 * _scif_fence_mark:
578 *
579 * @epd - endpoint descriptor
580 * Set up a mark for this endpoint and return the value of the mark.
581 */
582int _scif_fence_mark(scif_epd_t epd, int *mark)
583{
584 struct scif_endpt *ep = (struct scif_endpt *)epd;
585 struct dma_chan *chan = ep->rma_info.dma_chan;
586 struct dma_device *ddev = chan->device;
587 struct dma_async_tx_descriptor *tx;
588 dma_cookie_t cookie;
589 int err;
590
591 tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE);
592 if (!tx) {
593 err = -ENOMEM;
594 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
595 __func__, __LINE__, err);
596 return err;
597 }
598 cookie = tx->tx_submit(tx);
599 if (dma_submit_error(cookie)) {
600 err = (int)cookie;
601 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
602 __func__, __LINE__, err);
603 return err;
604 }
605 dma_async_issue_pending(chan);
606 tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
607 if (!tx) {
608 err = -ENOMEM;
609 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
610 __func__, __LINE__, err);
611 return err;
612 }
613 tx->callback = scif_fence_mark_cb;
614 tx->callback_param = ep;
615 *mark = cookie = tx->tx_submit(tx);
616 if (dma_submit_error(cookie)) {
617 err = (int)cookie;
618 dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n",
619 __func__, __LINE__, err);
620 return err;
621 }
622 atomic_inc(&ep->rma_info.fence_refcount);
623 dma_async_issue_pending(chan);
624 return 0;
625}
626
627#define SCIF_LOOPB_MAGIC_MARK 0xdead
628
629int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
630{
631 struct scif_endpt *ep = (struct scif_endpt *)epd;
632 int err = 0;
633
634 dev_dbg(scif_info.mdev.this_device,
635 "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n",
636 ep, flags, *mark);
637 err = scif_verify_epd(ep);
638 if (err)
639 return err;
640
641 /* Invalid flags? */
642 if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
643 return -EINVAL;
644
645 /* At least one of init self or peer RMA should be set */
646 if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
647 return -EINVAL;
648
649 /* Exactly one of init self or peer RMA should be set but not both */
650 if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
651 return -EINVAL;
652
653 /*
654 * Management node loopback does not need to use DMA.
655 * Return a valid mark to be symmetric.
656 */
657 if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
658 *mark = SCIF_LOOPB_MAGIC_MARK;
659 return 0;
660 }
661
662 if (flags & SCIF_FENCE_INIT_SELF)
663 err = _scif_fence_mark(epd, mark);
664 else
665 err = scif_send_fence_mark(ep, mark);
666
667 if (err)
668 dev_err(scif_info.mdev.this_device,
669 "%s %d err %d\n", __func__, __LINE__, err);
670 dev_dbg(scif_info.mdev.this_device,
671 "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n",
672 ep, flags, *mark, err);
673 return err;
674}
675EXPORT_SYMBOL_GPL(scif_fence_mark);
676
677int scif_fence_wait(scif_epd_t epd, int mark)
678{
679 struct scif_endpt *ep = (struct scif_endpt *)epd;
680 int err = 0;
681
682 dev_dbg(scif_info.mdev.this_device,
683 "SCIFAPI fence_wait: ep %p mark 0x%x\n",
684 ep, mark);
685 err = scif_verify_epd(ep);
686 if (err)
687 return err;
688 /*
689 * Management node loopback does not need to use DMA.
690 * The only valid mark provided is 0 so simply
691 * return success if the mark is valid.
692 */
693 if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) {
694 if (mark == SCIF_LOOPB_MAGIC_MARK)
695 return 0;
696 else
697 return -EINVAL;
698 }
699 if (mark & SCIF_REMOTE_FENCE)
700 err = scif_send_fence_wait(epd, mark);
701 else
702 err = _scif_fence_wait(epd, mark);
703 if (err < 0)
704 dev_err(scif_info.mdev.this_device,
705 "%s %d err %d\n", __func__, __LINE__, err);
706 return err;
707}
708EXPORT_SYMBOL_GPL(scif_fence_wait);
709
710int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval,
711 off_t roff, u64 rval, int flags)
712{
713 struct scif_endpt *ep = (struct scif_endpt *)epd;
714 int err = 0;
715
716 dev_dbg(scif_info.mdev.this_device,
717 "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n",
718 ep, loff, lval, roff, rval, flags);
719 err = scif_verify_epd(ep);
720 if (err)
721 return err;
722
723 /* Invalid flags? */
724 if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
725 SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
726 return -EINVAL;
727
728 /* At least one of init self or peer RMA should be set */
729 if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
730 return -EINVAL;
731
732 /* Exactly one of init self or peer RMA should be set but not both */
733 if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
734 return -EINVAL;
735
736 /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
737 if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
738 return -EINVAL;
739
740 /* Only Dword offsets allowed */
741 if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1)))
742 return -EINVAL;
743
744 /* Only Dword aligned offsets allowed */
745 if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1)))
746 return -EINVAL;
747
748 if (flags & SCIF_FENCE_INIT_PEER) {
749 err = scif_send_fence_signal(epd, roff, rval, loff,
750 lval, flags);
751 } else {
752 /* Local Signal in Local RAS */
753 if (flags & SCIF_SIGNAL_LOCAL) {
754 err = scif_prog_signal(epd, loff, lval,
755 SCIF_WINDOW_SELF);
756 if (err)
757 goto error_ret;
758 }
759
760 /* Signal in Remote RAS */
761 if (flags & SCIF_SIGNAL_REMOTE)
762 err = scif_prog_signal(epd, roff,
763 rval, SCIF_WINDOW_PEER);
764 }
765error_ret:
766 if (err)
767 dev_err(scif_info.mdev.this_device,
768 "%s %d err %d\n", __func__, __LINE__, err);
769 return err;
770}
771EXPORT_SYMBOL_GPL(scif_fence_signal);
diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c
index 6ce851f5c7e6..36d847af1209 100644
--- a/drivers/misc/mic/scif/scif_main.c
+++ b/drivers/misc/mic/scif/scif_main.c
@@ -34,6 +34,7 @@ struct scif_info scif_info = {
34}; 34};
35 35
36struct scif_dev *scif_dev; 36struct scif_dev *scif_dev;
37struct kmem_cache *unaligned_cache;
37static atomic_t g_loopb_cnt; 38static atomic_t g_loopb_cnt;
38 39
39/* Runs in the context of intr_wq */ 40/* Runs in the context of intr_wq */
@@ -80,35 +81,6 @@ irqreturn_t scif_intr_handler(int irq, void *data)
80 return IRQ_HANDLED; 81 return IRQ_HANDLED;
81} 82}
82 83
83static int scif_peer_probe(struct scif_peer_dev *spdev)
84{
85 struct scif_dev *scifdev = &scif_dev[spdev->dnode];
86
87 mutex_lock(&scif_info.conflock);
88 scif_info.total++;
89 scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
90 mutex_unlock(&scif_info.conflock);
91 rcu_assign_pointer(scifdev->spdev, spdev);
92
93 /* In the future SCIF kernel client devices will be added here */
94 return 0;
95}
96
97static void scif_peer_remove(struct scif_peer_dev *spdev)
98{
99 struct scif_dev *scifdev = &scif_dev[spdev->dnode];
100
101 /* In the future SCIF kernel client devices will be removed here */
102 spdev = rcu_dereference(scifdev->spdev);
103 if (spdev)
104 RCU_INIT_POINTER(scifdev->spdev, NULL);
105 synchronize_rcu();
106
107 mutex_lock(&scif_info.conflock);
108 scif_info.total--;
109 mutex_unlock(&scif_info.conflock);
110}
111
112static void scif_qp_setup_handler(struct work_struct *work) 84static void scif_qp_setup_handler(struct work_struct *work)
113{ 85{
114 struct scif_dev *scifdev = container_of(work, struct scif_dev, 86 struct scif_dev *scifdev = container_of(work, struct scif_dev,
@@ -139,20 +111,13 @@ static void scif_qp_setup_handler(struct work_struct *work)
139 } 111 }
140} 112}
141 113
142static int scif_setup_scifdev(struct scif_hw_dev *sdev) 114static int scif_setup_scifdev(void)
143{ 115{
116 /* We support a maximum of 129 SCIF nodes including the mgmt node */
117#define MAX_SCIF_NODES 129
144 int i; 118 int i;
145 u8 num_nodes; 119 u8 num_nodes = MAX_SCIF_NODES;
146
147 if (sdev->snode) {
148 struct mic_bootparam __iomem *bp = sdev->rdp;
149
150 num_nodes = ioread8(&bp->tot_nodes);
151 } else {
152 struct mic_bootparam *bp = sdev->dp;
153 120
154 num_nodes = bp->tot_nodes;
155 }
156 scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL); 121 scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL);
157 if (!scif_dev) 122 if (!scif_dev)
158 return -ENOMEM; 123 return -ENOMEM;
@@ -163,7 +128,7 @@ static int scif_setup_scifdev(struct scif_hw_dev *sdev)
163 scifdev->exit = OP_IDLE; 128 scifdev->exit = OP_IDLE;
164 init_waitqueue_head(&scifdev->disconn_wq); 129 init_waitqueue_head(&scifdev->disconn_wq);
165 mutex_init(&scifdev->lock); 130 mutex_init(&scifdev->lock);
166 INIT_WORK(&scifdev->init_msg_work, scif_qp_response_ack); 131 INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device);
167 INIT_DELAYED_WORK(&scifdev->p2p_dwork, 132 INIT_DELAYED_WORK(&scifdev->p2p_dwork,
168 scif_poll_qp_state); 133 scif_poll_qp_state);
169 INIT_DELAYED_WORK(&scifdev->qp_dwork, 134 INIT_DELAYED_WORK(&scifdev->qp_dwork,
@@ -181,27 +146,21 @@ static void scif_destroy_scifdev(void)
181 146
182static int scif_probe(struct scif_hw_dev *sdev) 147static int scif_probe(struct scif_hw_dev *sdev)
183{ 148{
184 struct scif_dev *scifdev; 149 struct scif_dev *scifdev = &scif_dev[sdev->dnode];
185 int rc; 150 int rc;
186 151
187 dev_set_drvdata(&sdev->dev, sdev); 152 dev_set_drvdata(&sdev->dev, sdev);
153 scifdev->sdev = sdev;
154
188 if (1 == atomic_add_return(1, &g_loopb_cnt)) { 155 if (1 == atomic_add_return(1, &g_loopb_cnt)) {
189 struct scif_dev *loopb_dev; 156 struct scif_dev *loopb_dev = &scif_dev[sdev->snode];
190 157
191 rc = scif_setup_scifdev(sdev);
192 if (rc)
193 goto exit;
194 scifdev = &scif_dev[sdev->dnode];
195 scifdev->sdev = sdev;
196 loopb_dev = &scif_dev[sdev->snode];
197 loopb_dev->sdev = sdev; 158 loopb_dev->sdev = sdev;
198 rc = scif_setup_loopback_qp(loopb_dev); 159 rc = scif_setup_loopback_qp(loopb_dev);
199 if (rc) 160 if (rc)
200 goto free_sdev; 161 goto exit;
201 } else {
202 scifdev = &scif_dev[sdev->dnode];
203 scifdev->sdev = sdev;
204 } 162 }
163
205 rc = scif_setup_intr_wq(scifdev); 164 rc = scif_setup_intr_wq(scifdev);
206 if (rc) 165 if (rc)
207 goto destroy_loopb; 166 goto destroy_loopb;
@@ -237,8 +196,6 @@ destroy_intr:
237destroy_loopb: 196destroy_loopb:
238 if (atomic_dec_and_test(&g_loopb_cnt)) 197 if (atomic_dec_and_test(&g_loopb_cnt))
239 scif_destroy_loopback_qp(&scif_dev[sdev->snode]); 198 scif_destroy_loopback_qp(&scif_dev[sdev->snode]);
240free_sdev:
241 scif_destroy_scifdev();
242exit: 199exit:
243 return rc; 200 return rc;
244} 201}
@@ -290,13 +247,6 @@ static void scif_remove(struct scif_hw_dev *sdev)
290 scifdev->sdev = NULL; 247 scifdev->sdev = NULL;
291} 248}
292 249
293static struct scif_peer_driver scif_peer_driver = {
294 .driver.name = KBUILD_MODNAME,
295 .driver.owner = THIS_MODULE,
296 .probe = scif_peer_probe,
297 .remove = scif_peer_remove,
298};
299
300static struct scif_hw_dev_id id_table[] = { 250static struct scif_hw_dev_id id_table[] = {
301 { MIC_SCIF_DEV, SCIF_DEV_ANY_ID }, 251 { MIC_SCIF_DEV, SCIF_DEV_ANY_ID },
302 { 0 }, 252 { 0 },
@@ -312,29 +262,54 @@ static struct scif_driver scif_driver = {
312 262
313static int _scif_init(void) 263static int _scif_init(void)
314{ 264{
315 spin_lock_init(&scif_info.eplock); 265 int rc;
266
267 mutex_init(&scif_info.eplock);
268 spin_lock_init(&scif_info.rmalock);
316 spin_lock_init(&scif_info.nb_connect_lock); 269 spin_lock_init(&scif_info.nb_connect_lock);
317 spin_lock_init(&scif_info.port_lock); 270 spin_lock_init(&scif_info.port_lock);
318 mutex_init(&scif_info.conflock); 271 mutex_init(&scif_info.conflock);
319 mutex_init(&scif_info.connlock); 272 mutex_init(&scif_info.connlock);
273 mutex_init(&scif_info.fencelock);
320 INIT_LIST_HEAD(&scif_info.uaccept); 274 INIT_LIST_HEAD(&scif_info.uaccept);
321 INIT_LIST_HEAD(&scif_info.listen); 275 INIT_LIST_HEAD(&scif_info.listen);
322 INIT_LIST_HEAD(&scif_info.zombie); 276 INIT_LIST_HEAD(&scif_info.zombie);
323 INIT_LIST_HEAD(&scif_info.connected); 277 INIT_LIST_HEAD(&scif_info.connected);
324 INIT_LIST_HEAD(&scif_info.disconnected); 278 INIT_LIST_HEAD(&scif_info.disconnected);
279 INIT_LIST_HEAD(&scif_info.rma);
280 INIT_LIST_HEAD(&scif_info.rma_tc);
281 INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup);
282 INIT_LIST_HEAD(&scif_info.fence);
325 INIT_LIST_HEAD(&scif_info.nb_connect_list); 283 INIT_LIST_HEAD(&scif_info.nb_connect_list);
326 init_waitqueue_head(&scif_info.exitwq); 284 init_waitqueue_head(&scif_info.exitwq);
285 scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
327 scif_info.en_msg_log = 0; 286 scif_info.en_msg_log = 0;
328 scif_info.p2p_enable = 1; 287 scif_info.p2p_enable = 1;
288 rc = scif_setup_scifdev();
289 if (rc)
290 goto error;
291 unaligned_cache = kmem_cache_create("Unaligned_DMA",
292 SCIF_KMEM_UNALIGNED_BUF_SIZE,
293 0, SLAB_HWCACHE_ALIGN, NULL);
294 if (!unaligned_cache) {
295 rc = -ENOMEM;
296 goto free_sdev;
297 }
329 INIT_WORK(&scif_info.misc_work, scif_misc_handler); 298 INIT_WORK(&scif_info.misc_work, scif_misc_handler);
299 INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler);
330 INIT_WORK(&scif_info.conn_work, scif_conn_handler); 300 INIT_WORK(&scif_info.conn_work, scif_conn_handler);
331 idr_init(&scif_ports); 301 idr_init(&scif_ports);
332 return 0; 302 return 0;
303free_sdev:
304 scif_destroy_scifdev();
305error:
306 return rc;
333} 307}
334 308
335static void _scif_exit(void) 309static void _scif_exit(void)
336{ 310{
337 idr_destroy(&scif_ports); 311 idr_destroy(&scif_ports);
312 kmem_cache_destroy(unaligned_cache);
338 scif_destroy_scifdev(); 313 scif_destroy_scifdev();
339} 314}
340 315
@@ -344,15 +319,13 @@ static int __init scif_init(void)
344 int rc; 319 int rc;
345 320
346 _scif_init(); 321 _scif_init();
322 iova_cache_get();
347 rc = scif_peer_bus_init(); 323 rc = scif_peer_bus_init();
348 if (rc) 324 if (rc)
349 goto exit; 325 goto exit;
350 rc = scif_peer_register_driver(&scif_peer_driver);
351 if (rc)
352 goto peer_bus_exit;
353 rc = scif_register_driver(&scif_driver); 326 rc = scif_register_driver(&scif_driver);
354 if (rc) 327 if (rc)
355 goto unreg_scif_peer; 328 goto peer_bus_exit;
356 rc = misc_register(mdev); 329 rc = misc_register(mdev);
357 if (rc) 330 if (rc)
358 goto unreg_scif; 331 goto unreg_scif;
@@ -360,8 +333,6 @@ static int __init scif_init(void)
360 return 0; 333 return 0;
361unreg_scif: 334unreg_scif:
362 scif_unregister_driver(&scif_driver); 335 scif_unregister_driver(&scif_driver);
363unreg_scif_peer:
364 scif_peer_unregister_driver(&scif_peer_driver);
365peer_bus_exit: 336peer_bus_exit:
366 scif_peer_bus_exit(); 337 scif_peer_bus_exit();
367exit: 338exit:
@@ -374,8 +345,8 @@ static void __exit scif_exit(void)
374 scif_exit_debugfs(); 345 scif_exit_debugfs();
375 misc_deregister(&scif_info.mdev); 346 misc_deregister(&scif_info.mdev);
376 scif_unregister_driver(&scif_driver); 347 scif_unregister_driver(&scif_driver);
377 scif_peer_unregister_driver(&scif_peer_driver);
378 scif_peer_bus_exit(); 348 scif_peer_bus_exit();
349 iova_cache_put();
379 _scif_exit(); 350 _scif_exit();
380} 351}
381 352
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index 580bc63e1b23..a08f0b600a9e 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -22,15 +22,18 @@
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/miscdevice.h> 23#include <linux/miscdevice.h>
24#include <linux/dmaengine.h> 24#include <linux/dmaengine.h>
25#include <linux/iova.h>
26#include <linux/anon_inodes.h>
25#include <linux/file.h> 27#include <linux/file.h>
28#include <linux/vmalloc.h>
26#include <linux/scif.h> 29#include <linux/scif.h>
27
28#include "../common/mic_dev.h" 30#include "../common/mic_dev.h"
29 31
30#define SCIF_MGMT_NODE 0 32#define SCIF_MGMT_NODE 0
31#define SCIF_DEFAULT_WATCHDOG_TO 30 33#define SCIF_DEFAULT_WATCHDOG_TO 30
32#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ) 34#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ)
33#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ) 35#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ)
36#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
34 37
35/* 38/*
36 * Generic state used for certain node QP message exchanges 39 * Generic state used for certain node QP message exchanges
@@ -73,13 +76,21 @@ enum scif_msg_state {
73 * @loopb_work: Used for submitting work to loopb_wq 76 * @loopb_work: Used for submitting work to loopb_wq
74 * @loopb_recv_q: List of messages received on the loopb_wq 77 * @loopb_recv_q: List of messages received on the loopb_wq
75 * @card_initiated_exit: set when the card has initiated the exit 78 * @card_initiated_exit: set when the card has initiated the exit
79 * @rmalock: Synchronize access to RMA operations
80 * @fencelock: Synchronize access to list of remote fences requested.
81 * @rma: List of temporary registered windows to be destroyed.
82 * @rma_tc: List of temporary registered & cached Windows to be destroyed
83 * @fence: List of remote fence requests
84 * @mmu_notif_work: Work for registration caching MMU notifier workqueue
85 * @mmu_notif_cleanup: List of temporary cached windows for reg cache
86 * @rma_tc_limit: RMA temporary cache limit
76 */ 87 */
77struct scif_info { 88struct scif_info {
78 u8 nodeid; 89 u8 nodeid;
79 u8 maxid; 90 u8 maxid;
80 u8 total; 91 u8 total;
81 u32 nr_zombies; 92 u32 nr_zombies;
82 spinlock_t eplock; 93 struct mutex eplock;
83 struct mutex connlock; 94 struct mutex connlock;
84 spinlock_t nb_connect_lock; 95 spinlock_t nb_connect_lock;
85 spinlock_t port_lock; 96 spinlock_t port_lock;
@@ -102,6 +113,14 @@ struct scif_info {
102 struct work_struct loopb_work; 113 struct work_struct loopb_work;
103 struct list_head loopb_recv_q; 114 struct list_head loopb_recv_q;
104 bool card_initiated_exit; 115 bool card_initiated_exit;
116 spinlock_t rmalock;
117 struct mutex fencelock;
118 struct list_head rma;
119 struct list_head rma_tc;
120 struct list_head fence;
121 struct work_struct mmu_notif_work;
122 struct list_head mmu_notif_cleanup;
123 unsigned long rma_tc_limit;
105}; 124};
106 125
107/* 126/*
@@ -139,7 +158,7 @@ struct scif_p2p_info {
139 * @db: doorbell the peer will trigger to generate an interrupt on self 158 * @db: doorbell the peer will trigger to generate an interrupt on self
140 * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer 159 * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer
141 * @cookie: Cookie received while registering the interrupt handler 160 * @cookie: Cookie received while registering the interrupt handler
142 * init_msg_work: work scheduled for SCIF_INIT message processing 161 * @peer_add_work: Work for handling device_add for peer devices
143 * @p2p_dwork: Delayed work to enable polling for P2P state 162 * @p2p_dwork: Delayed work to enable polling for P2P state
144 * @qp_dwork: Delayed work for enabling polling for remote QP information 163 * @qp_dwork: Delayed work for enabling polling for remote QP information
145 * @p2p_retry: Number of times to retry polling of P2P state 164 * @p2p_retry: Number of times to retry polling of P2P state
@@ -152,6 +171,8 @@ struct scif_p2p_info {
152 * @disconn_rescnt: Keeps track of number of node remove requests sent 171 * @disconn_rescnt: Keeps track of number of node remove requests sent
153 * @exit: Status of exit message 172 * @exit: Status of exit message
154 * @qp_dma_addr: Queue pair DMA address passed to the peer 173 * @qp_dma_addr: Queue pair DMA address passed to the peer
174 * @dma_ch_idx: Round robin index for DMA channels
175 * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's
155*/ 176*/
156struct scif_dev { 177struct scif_dev {
157 u8 node; 178 u8 node;
@@ -165,7 +186,7 @@ struct scif_dev {
165 int db; 186 int db;
166 int rdb; 187 int rdb;
167 struct mic_irq *cookie; 188 struct mic_irq *cookie;
168 struct work_struct init_msg_work; 189 struct work_struct peer_add_work;
169 struct delayed_work p2p_dwork; 190 struct delayed_work p2p_dwork;
170 struct delayed_work qp_dwork; 191 struct delayed_work qp_dwork;
171 int p2p_retry; 192 int p2p_retry;
@@ -178,17 +199,25 @@ struct scif_dev {
178 atomic_t disconn_rescnt; 199 atomic_t disconn_rescnt;
179 enum scif_msg_state exit; 200 enum scif_msg_state exit;
180 dma_addr_t qp_dma_addr; 201 dma_addr_t qp_dma_addr;
202 int dma_ch_idx;
203 struct dma_pool *signal_pool;
181}; 204};
182 205
206extern bool scif_reg_cache_enable;
207extern bool scif_ulimit_check;
183extern struct scif_info scif_info; 208extern struct scif_info scif_info;
184extern struct idr scif_ports; 209extern struct idr scif_ports;
210extern struct bus_type scif_peer_bus;
185extern struct scif_dev *scif_dev; 211extern struct scif_dev *scif_dev;
186extern const struct file_operations scif_fops; 212extern const struct file_operations scif_fops;
213extern const struct file_operations scif_anon_fops;
187 214
188/* Size of the RB for the Node QP */ 215/* Size of the RB for the Node QP */
189#define SCIF_NODE_QP_SIZE 0x10000 216#define SCIF_NODE_QP_SIZE 0x10000
190 217
191#include "scif_nodeqp.h" 218#include "scif_nodeqp.h"
219#include "scif_rma.h"
220#include "scif_rma_list.h"
192 221
193/* 222/*
194 * scifdev_self: 223 * scifdev_self:
diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h
index 20e50b4e19b2..3e86360ba5a6 100644
--- a/drivers/misc/mic/scif/scif_map.h
+++ b/drivers/misc/mic/scif/scif_map.h
@@ -80,7 +80,7 @@ scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev,
80 size_t size) 80 size_t size)
81{ 81{
82 if (!scifdev_self(scifdev)) { 82 if (!scifdev_self(scifdev)) {
83 if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr) 83 if (scifdev_is_p2p(scifdev))
84 local = local - scifdev->base_addr; 84 local = local - scifdev->base_addr;
85 dma_unmap_single(&scifdev->sdev->dev, local, 85 dma_unmap_single(&scifdev->sdev->dev, local,
86 size, DMA_BIDIRECTIONAL); 86 size, DMA_BIDIRECTIONAL);
@@ -110,4 +110,27 @@ scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev)
110 sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt); 110 sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt);
111 } 111 }
112} 112}
113
114static __always_inline int
115scif_map_page(dma_addr_t *dma_handle, struct page *page,
116 struct scif_dev *scifdev)
117{
118 int err = 0;
119
120 if (scifdev_self(scifdev)) {
121 *dma_handle = page_to_phys(page);
122 } else {
123 struct scif_hw_dev *sdev = scifdev->sdev;
124 *dma_handle = dma_map_page(&sdev->dev,
125 page, 0x0, PAGE_SIZE,
126 DMA_BIDIRECTIONAL);
127 if (dma_mapping_error(&sdev->dev, *dma_handle))
128 err = -ENOMEM;
129 else if (scifdev_is_p2p(scifdev))
130 *dma_handle = *dma_handle + scifdev->base_addr;
131 }
132 if (err)
133 *dma_handle = 0;
134 return err;
135}
113#endif /* SCIF_MAP_H */ 136#endif /* SCIF_MAP_H */
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
new file mode 100644
index 000000000000..49cb8f7b4672
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -0,0 +1,699 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18#include "scif_main.h"
19
20/*
21 * struct scif_vma_info - Information about a remote memory mapping
22 * created via scif_mmap(..)
23 * @vma: VM area struct
24 * @list: link to list of active vmas
25 */
26struct scif_vma_info {
27 struct vm_area_struct *vma;
28 struct list_head list;
29};
30
31void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
32{
33 struct scif_rma_req req;
34 struct scif_window *window = NULL;
35 struct scif_window *recv_window =
36 (struct scif_window *)msg->payload[0];
37 struct scif_endpt *ep;
38
39 ep = (struct scif_endpt *)recv_window->ep;
40 req.out_window = &window;
41 req.offset = recv_window->offset;
42 req.prot = recv_window->prot;
43 req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
44 req.type = SCIF_WINDOW_FULL;
45 req.head = &ep->rma_info.reg_list;
46 msg->payload[0] = ep->remote_ep;
47
48 mutex_lock(&ep->rma_info.rma_lock);
49 /* Does a valid window exist? */
50 if (scif_query_window(&req)) {
51 dev_err(&scifdev->sdev->dev,
52 "%s %d -ENXIO\n", __func__, __LINE__);
53 msg->uop = SCIF_UNREGISTER_ACK;
54 goto error;
55 }
56
57 scif_put_window(window, window->nr_pages);
58
59 if (!window->ref_count) {
60 atomic_inc(&ep->rma_info.tw_refcount);
61 ep->rma_info.async_list_del = 1;
62 list_del_init(&window->list);
63 scif_free_window_offset(ep, window, window->offset);
64 }
65error:
66 mutex_unlock(&ep->rma_info.rma_lock);
67 if (window && !window->ref_count)
68 scif_queue_for_cleanup(window, &scif_info.rma);
69}
70
71/*
72 * Remove valid remote memory mappings created via scif_mmap(..) from the
73 * process address space since the remote node is lost
74 */
75static void __scif_zap_mmaps(struct scif_endpt *ep)
76{
77 struct list_head *item;
78 struct scif_vma_info *info;
79 struct vm_area_struct *vma;
80 unsigned long size;
81
82 spin_lock(&ep->lock);
83 list_for_each(item, &ep->rma_info.vma_list) {
84 info = list_entry(item, struct scif_vma_info, list);
85 vma = info->vma;
86 size = vma->vm_end - vma->vm_start;
87 zap_vma_ptes(vma, vma->vm_start, size);
88 dev_dbg(scif_info.mdev.this_device,
89 "%s ep %p zap vma %p size 0x%lx\n",
90 __func__, ep, info->vma, size);
91 }
92 spin_unlock(&ep->lock);
93}
94
95/*
96 * Traverse the list of endpoints for a particular remote node and
97 * zap valid remote memory mappings since the remote node is lost
98 */
99static void _scif_zap_mmaps(int node, struct list_head *head)
100{
101 struct scif_endpt *ep;
102 struct list_head *item;
103
104 mutex_lock(&scif_info.connlock);
105 list_for_each(item, head) {
106 ep = list_entry(item, struct scif_endpt, list);
107 if (ep->remote_dev->node == node)
108 __scif_zap_mmaps(ep);
109 }
110 mutex_unlock(&scif_info.connlock);
111}
112
113/*
114 * Wrapper for removing remote memory mappings for a particular node. This API
115 * is called by peer nodes as part of handling a lost node.
116 */
117void scif_zap_mmaps(int node)
118{
119 _scif_zap_mmaps(node, &scif_info.connected);
120 _scif_zap_mmaps(node, &scif_info.disconnected);
121}
122
123/*
124 * This API is only called while handling a lost node:
125 * a) Remote node is dead.
126 * b) Remote memory mappings have been zapped
127 * So we can traverse the remote_reg_list without any locks. Since
128 * the window has not yet been unregistered we can drop the ref count
129 * and queue it to the cleanup thread.
130 */
131static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
132{
133 struct list_head *pos, *tmp;
134 struct scif_window *window;
135
136 list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
137 window = list_entry(pos, struct scif_window, list);
138 if (window->ref_count)
139 scif_put_window(window, window->nr_pages);
140 else
141 dev_err(scif_info.mdev.this_device,
142 "%s %d unexpected\n",
143 __func__, __LINE__);
144 if (!window->ref_count) {
145 atomic_inc(&ep->rma_info.tw_refcount);
146 list_del_init(&window->list);
147 scif_queue_for_cleanup(window, &scif_info.rma);
148 }
149 }
150}
151
152/* Cleanup remote registration lists for zombie endpoints */
153void scif_cleanup_rma_for_zombies(int node)
154{
155 struct scif_endpt *ep;
156 struct list_head *item;
157
158 mutex_lock(&scif_info.eplock);
159 list_for_each(item, &scif_info.zombie) {
160 ep = list_entry(item, struct scif_endpt, list);
161 if (ep->remote_dev && ep->remote_dev->node == node)
162 __scif_cleanup_rma_for_zombies(ep);
163 }
164 mutex_unlock(&scif_info.eplock);
165 flush_work(&scif_info.misc_work);
166}
167
168/* Insert the VMA into the per endpoint VMA list */
169static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
170{
171 struct scif_vma_info *info;
172 int err = 0;
173
174 info = kzalloc(sizeof(*info), GFP_KERNEL);
175 if (!info) {
176 err = -ENOMEM;
177 goto done;
178 }
179 info->vma = vma;
180 spin_lock(&ep->lock);
181 list_add_tail(&info->list, &ep->rma_info.vma_list);
182 spin_unlock(&ep->lock);
183done:
184 return err;
185}
186
187/* Delete the VMA from the per endpoint VMA list */
188static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
189{
190 struct list_head *item;
191 struct scif_vma_info *info;
192
193 spin_lock(&ep->lock);
194 list_for_each(item, &ep->rma_info.vma_list) {
195 info = list_entry(item, struct scif_vma_info, list);
196 if (info->vma == vma) {
197 list_del(&info->list);
198 kfree(info);
199 break;
200 }
201 }
202 spin_unlock(&ep->lock);
203}
204
205static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
206{
207 struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
208 struct scif_hw_dev *sdev = scifdev->sdev;
209 phys_addr_t out_phys, apt_base = 0;
210
211 /*
212 * If the DMA address is card relative then we need to add the
213 * aperture base for mmap to work correctly
214 */
215 if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
216 apt_base = sdev->aper->pa;
217 out_phys = apt_base + phys;
218 return out_phys;
219}
220
221int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
222 struct scif_range **pages)
223{
224 struct scif_endpt *ep = (struct scif_endpt *)epd;
225 struct scif_rma_req req;
226 struct scif_window *window = NULL;
227 int nr_pages, err, i;
228
229 dev_dbg(scif_info.mdev.this_device,
230 "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
231 ep, offset, len);
232 err = scif_verify_epd(ep);
233 if (err)
234 return err;
235
236 if (!len || (offset < 0) ||
237 (offset + len < offset) ||
238 (ALIGN(offset, PAGE_SIZE) != offset) ||
239 (ALIGN(len, PAGE_SIZE) != len))
240 return -EINVAL;
241
242 nr_pages = len >> PAGE_SHIFT;
243
244 req.out_window = &window;
245 req.offset = offset;
246 req.prot = 0;
247 req.nr_bytes = len;
248 req.type = SCIF_WINDOW_SINGLE;
249 req.head = &ep->rma_info.remote_reg_list;
250
251 mutex_lock(&ep->rma_info.rma_lock);
252 /* Does a valid window exist? */
253 err = scif_query_window(&req);
254 if (err) {
255 dev_err(&ep->remote_dev->sdev->dev,
256 "%s %d err %d\n", __func__, __LINE__, err);
257 goto error;
258 }
259
260 /* Allocate scif_range */
261 *pages = kzalloc(sizeof(**pages), GFP_KERNEL);
262 if (!*pages) {
263 err = -ENOMEM;
264 goto error;
265 }
266
267 /* Allocate phys addr array */
268 (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
269 if (!((*pages)->phys_addr)) {
270 err = -ENOMEM;
271 goto error;
272 }
273
274 if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
275 /* Allocate virtual address array */
276 ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
277 if (!(*pages)->va) {
278 err = -ENOMEM;
279 goto error;
280 }
281 }
282 /* Populate the values */
283 (*pages)->cookie = window;
284 (*pages)->nr_pages = nr_pages;
285 (*pages)->prot_flags = window->prot;
286
287 for (i = 0; i < nr_pages; i++) {
288 (*pages)->phys_addr[i] =
289 __scif_off_to_dma_addr(window, offset +
290 (i * PAGE_SIZE));
291 (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
292 ep);
293 if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
294 (*pages)->va[i] =
295 ep->remote_dev->sdev->aper->va +
296 (*pages)->phys_addr[i] -
297 ep->remote_dev->sdev->aper->pa;
298 }
299
300 scif_get_window(window, nr_pages);
301error:
302 mutex_unlock(&ep->rma_info.rma_lock);
303 if (err) {
304 if (*pages) {
305 scif_free((*pages)->phys_addr,
306 nr_pages * sizeof(dma_addr_t));
307 scif_free((*pages)->va,
308 nr_pages * sizeof(void *));
309 kfree(*pages);
310 *pages = NULL;
311 }
312 dev_err(&ep->remote_dev->sdev->dev,
313 "%s %d err %d\n", __func__, __LINE__, err);
314 }
315 return err;
316}
317EXPORT_SYMBOL_GPL(scif_get_pages);
318
319int scif_put_pages(struct scif_range *pages)
320{
321 struct scif_endpt *ep;
322 struct scif_window *window;
323 struct scifmsg msg;
324
325 if (!pages || !pages->cookie)
326 return -EINVAL;
327
328 window = pages->cookie;
329
330 if (!window || window->magic != SCIFEP_MAGIC)
331 return -EINVAL;
332
333 ep = (struct scif_endpt *)window->ep;
334 /*
335 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
336 * callee should be allowed to release references to the pages,
337 * else the endpoint was not connected in the first place,
338 * hence the ENOTCONN.
339 */
340 if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
341 return -ENOTCONN;
342
343 mutex_lock(&ep->rma_info.rma_lock);
344
345 scif_put_window(window, pages->nr_pages);
346
347 /* Initiate window destruction if ref count is zero */
348 if (!window->ref_count) {
349 list_del(&window->list);
350 mutex_unlock(&ep->rma_info.rma_lock);
351 scif_drain_dma_intr(ep->remote_dev->sdev,
352 ep->rma_info.dma_chan);
353 /* Inform the peer about this window being destroyed. */
354 msg.uop = SCIF_MUNMAP;
355 msg.src = ep->port;
356 msg.payload[0] = window->peer_window;
357 /* No error handling for notification messages */
358 scif_nodeqp_send(ep->remote_dev, &msg);
359 /* Destroy this window from the peer's registered AS */
360 scif_destroy_remote_window(window);
361 } else {
362 mutex_unlock(&ep->rma_info.rma_lock);
363 }
364
365 scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
366 scif_free(pages->va, pages->nr_pages * sizeof(void *));
367 kfree(pages);
368 return 0;
369}
370EXPORT_SYMBOL_GPL(scif_put_pages);
371
372/*
373 * scif_rma_list_mmap:
374 *
375 * Traverse the remote registration list starting from start_window:
376 * 1) Create VtoP mappings via remap_pfn_range(..)
377 * 2) Once step 1) and 2) complete successfully then traverse the range of
378 * windows again and bump the reference count.
379 * RMA lock must be held.
380 */
381static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
382 int nr_pages, struct vm_area_struct *vma)
383{
384 s64 end_offset, loop_offset = offset;
385 struct scif_window *window = start_window;
386 int loop_nr_pages, nr_pages_left = nr_pages;
387 struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
388 struct list_head *head = &ep->rma_info.remote_reg_list;
389 int i, err = 0;
390 dma_addr_t phys_addr;
391 struct scif_window_iter src_win_iter;
392 size_t contig_bytes = 0;
393
394 might_sleep();
395 list_for_each_entry_from(window, head, list) {
396 end_offset = window->offset +
397 (window->nr_pages << PAGE_SHIFT);
398 loop_nr_pages = min_t(int,
399 (end_offset - loop_offset) >> PAGE_SHIFT,
400 nr_pages_left);
401 scif_init_window_iter(window, &src_win_iter);
402 for (i = 0; i < loop_nr_pages; i++) {
403 phys_addr = scif_off_to_dma_addr(window, loop_offset,
404 &contig_bytes,
405 &src_win_iter);
406 phys_addr = scif_get_phys(phys_addr, ep);
407 err = remap_pfn_range(vma,
408 vma->vm_start +
409 loop_offset - offset,
410 phys_addr >> PAGE_SHIFT,
411 PAGE_SIZE,
412 vma->vm_page_prot);
413 if (err)
414 goto error;
415 loop_offset += PAGE_SIZE;
416 }
417 nr_pages_left -= loop_nr_pages;
418 if (!nr_pages_left)
419 break;
420 }
421 /*
422 * No more failures expected. Bump up the ref count for all
423 * the windows. Another traversal from start_window required
424 * for handling errors encountered across windows during
425 * remap_pfn_range(..).
426 */
427 loop_offset = offset;
428 nr_pages_left = nr_pages;
429 window = start_window;
430 head = &ep->rma_info.remote_reg_list;
431 list_for_each_entry_from(window, head, list) {
432 end_offset = window->offset +
433 (window->nr_pages << PAGE_SHIFT);
434 loop_nr_pages = min_t(int,
435 (end_offset - loop_offset) >> PAGE_SHIFT,
436 nr_pages_left);
437 scif_get_window(window, loop_nr_pages);
438 nr_pages_left -= loop_nr_pages;
439 loop_offset += (loop_nr_pages << PAGE_SHIFT);
440 if (!nr_pages_left)
441 break;
442 }
443error:
444 if (err)
445 dev_err(scif_info.mdev.this_device,
446 "%s %d err %d\n", __func__, __LINE__, err);
447 return err;
448}
449
450/*
451 * scif_rma_list_munmap:
452 *
453 * Traverse the remote registration list starting from window:
454 * 1) Decrement ref count.
455 * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
456 * RMA lock must be held.
457 */
458static void scif_rma_list_munmap(struct scif_window *start_window,
459 s64 offset, int nr_pages)
460{
461 struct scifmsg msg;
462 s64 loop_offset = offset, end_offset;
463 int loop_nr_pages, nr_pages_left = nr_pages;
464 struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
465 struct list_head *head = &ep->rma_info.remote_reg_list;
466 struct scif_window *window = start_window, *_window;
467
468 msg.uop = SCIF_MUNMAP;
469 msg.src = ep->port;
470 loop_offset = offset;
471 nr_pages_left = nr_pages;
472 list_for_each_entry_safe_from(window, _window, head, list) {
473 end_offset = window->offset +
474 (window->nr_pages << PAGE_SHIFT);
475 loop_nr_pages = min_t(int,
476 (end_offset - loop_offset) >> PAGE_SHIFT,
477 nr_pages_left);
478 scif_put_window(window, loop_nr_pages);
479 if (!window->ref_count) {
480 struct scif_dev *rdev = ep->remote_dev;
481
482 scif_drain_dma_intr(rdev->sdev,
483 ep->rma_info.dma_chan);
484 /* Inform the peer about this munmap */
485 msg.payload[0] = window->peer_window;
486 /* No error handling for Notification messages. */
487 scif_nodeqp_send(ep->remote_dev, &msg);
488 list_del(&window->list);
489 /* Destroy this window from the peer's registered AS */
490 scif_destroy_remote_window(window);
491 }
492 nr_pages_left -= loop_nr_pages;
493 loop_offset += (loop_nr_pages << PAGE_SHIFT);
494 if (!nr_pages_left)
495 break;
496 }
497}
498
499/*
500 * The private data field of each VMA used to mmap a remote window
501 * points to an instance of struct vma_pvt
502 */
503struct vma_pvt {
504 struct scif_endpt *ep; /* End point for remote window */
505 s64 offset; /* offset within remote window */
506 bool valid_offset; /* offset is valid only if the original
507 * mmap request was for a single page
508 * else the offset within the vma is
509 * the correct offset
510 */
511 struct kref ref;
512};
513
514static void vma_pvt_release(struct kref *ref)
515{
516 struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
517
518 kfree(vmapvt);
519}
520
521/**
522 * scif_vma_open - VMA open driver callback
523 * @vma: VMM memory area.
524 * The open method is called by the kernel to allow the subsystem implementing
525 * the VMA to initialize the area. This method is invoked any time a new
526 * reference to the VMA is made (when a process forks, for example).
527 * The one exception happens when the VMA is first created by mmap;
528 * in this case, the driver's mmap method is called instead.
529 * This function is also invoked when an existing VMA is split by the kernel
530 * due to a call to munmap on a subset of the VMA resulting in two VMAs.
531 * The kernel invokes this function only on one of the two VMAs.
532 */
533static void scif_vma_open(struct vm_area_struct *vma)
534{
535 struct vma_pvt *vmapvt = vma->vm_private_data;
536
537 dev_dbg(scif_info.mdev.this_device,
538 "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
539 vma->vm_start, vma->vm_end);
540 scif_insert_vma(vmapvt->ep, vma);
541 kref_get(&vmapvt->ref);
542}
543
544/**
545 * scif_munmap - VMA close driver callback.
546 * @vma: VMM memory area.
547 * When an area is destroyed, the kernel calls its close operation.
548 * Note that there's no usage count associated with VMA's; the area
549 * is opened and closed exactly once by each process that uses it.
550 */
551static void scif_munmap(struct vm_area_struct *vma)
552{
553 struct scif_endpt *ep;
554 struct vma_pvt *vmapvt = vma->vm_private_data;
555 int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
556 s64 offset;
557 struct scif_rma_req req;
558 struct scif_window *window = NULL;
559 int err;
560
561 might_sleep();
562 dev_dbg(scif_info.mdev.this_device,
563 "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
564 vma->vm_start, vma->vm_end);
565 ep = vmapvt->ep;
566 offset = vmapvt->valid_offset ? vmapvt->offset :
567 (vma->vm_pgoff) << PAGE_SHIFT;
568 dev_dbg(scif_info.mdev.this_device,
569 "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
570 ep, nr_pages, offset);
571 req.out_window = &window;
572 req.offset = offset;
573 req.nr_bytes = vma->vm_end - vma->vm_start;
574 req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
575 req.type = SCIF_WINDOW_PARTIAL;
576 req.head = &ep->rma_info.remote_reg_list;
577
578 mutex_lock(&ep->rma_info.rma_lock);
579
580 err = scif_query_window(&req);
581 if (err)
582 dev_err(scif_info.mdev.this_device,
583 "%s %d err %d\n", __func__, __LINE__, err);
584 else
585 scif_rma_list_munmap(window, offset, nr_pages);
586
587 mutex_unlock(&ep->rma_info.rma_lock);
588 /*
589 * The kernel probably zeroes these out but we still want
590 * to clean up our own mess just in case.
591 */
592 vma->vm_ops = NULL;
593 vma->vm_private_data = NULL;
594 kref_put(&vmapvt->ref, vma_pvt_release);
595 scif_delete_vma(ep, vma);
596}
597
598static const struct vm_operations_struct scif_vm_ops = {
599 .open = scif_vma_open,
600 .close = scif_munmap,
601};
602
603/**
604 * scif_mmap - Map pages in virtual address space to a remote window.
605 * @vma: VMM memory area.
606 * @epd: endpoint descriptor
607 *
608 * Return: Upon successful completion, scif_mmap() returns zero
609 * else an apt error is returned as documented in scif.h
610 */
611int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
612{
613 struct scif_rma_req req;
614 struct scif_window *window = NULL;
615 struct scif_endpt *ep = (struct scif_endpt *)epd;
616 s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
617 int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
618 int err;
619 struct vma_pvt *vmapvt;
620
621 dev_dbg(scif_info.mdev.this_device,
622 "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
623 ep, start_offset, nr_pages);
624 err = scif_verify_epd(ep);
625 if (err)
626 return err;
627
628 might_sleep();
629
630 err = scif_insert_vma(ep, vma);
631 if (err)
632 return err;
633
634 vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
635 if (!vmapvt) {
636 scif_delete_vma(ep, vma);
637 return -ENOMEM;
638 }
639
640 vmapvt->ep = ep;
641 kref_init(&vmapvt->ref);
642
643 req.out_window = &window;
644 req.offset = start_offset;
645 req.nr_bytes = vma->vm_end - vma->vm_start;
646 req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
647 req.type = SCIF_WINDOW_PARTIAL;
648 req.head = &ep->rma_info.remote_reg_list;
649
650 mutex_lock(&ep->rma_info.rma_lock);
651 /* Does a valid window exist? */
652 err = scif_query_window(&req);
653 if (err) {
654 dev_err(&ep->remote_dev->sdev->dev,
655 "%s %d err %d\n", __func__, __LINE__, err);
656 goto error_unlock;
657 }
658
659 /* Default prot for loopback */
660 if (!scifdev_self(ep->remote_dev))
661 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
662
663 /*
664 * VM_DONTCOPY - Do not copy this vma on fork
665 * VM_DONTEXPAND - Cannot expand with mremap()
666 * VM_RESERVED - Count as reserved_vm like IO
667 * VM_PFNMAP - Page-ranges managed without "struct page"
668 * VM_IO - Memory mapped I/O or similar
669 *
670 * We do not want to copy this VMA automatically on a fork(),
671 * expand this VMA due to mremap() or swap out these pages since
672 * the VMA is actually backed by physical pages in the remote
673 * node's physical memory and not via a struct page.
674 */
675 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
676
677 if (!scifdev_self(ep->remote_dev))
678 vma->vm_flags |= VM_IO | VM_PFNMAP;
679
680 /* Map this range of windows */
681 err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
682 if (err) {
683 dev_err(&ep->remote_dev->sdev->dev,
684 "%s %d err %d\n", __func__, __LINE__, err);
685 goto error_unlock;
686 }
687 /* Set up the driver call back */
688 vma->vm_ops = &scif_vm_ops;
689 vma->vm_private_data = vmapvt;
690error_unlock:
691 mutex_unlock(&ep->rma_info.rma_lock);
692 if (err) {
693 kfree(vmapvt);
694 dev_err(&ep->remote_dev->sdev->dev,
695 "%s %d err %d\n", __func__, __LINE__, err);
696 scif_delete_vma(ep, vma);
697 }
698 return err;
699}
diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c
index 9b4c5382d6a7..79f26a02a1cb 100644
--- a/drivers/misc/mic/scif/scif_nm.c
+++ b/drivers/misc/mic/scif/scif_nm.c
@@ -34,6 +34,7 @@ static void scif_invalidate_ep(int node)
34 list_for_each_safe(pos, tmpq, &scif_info.disconnected) { 34 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
35 ep = list_entry(pos, struct scif_endpt, list); 35 ep = list_entry(pos, struct scif_endpt, list);
36 if (ep->remote_dev->node == node) { 36 if (ep->remote_dev->node == node) {
37 scif_unmap_all_windows(ep);
37 spin_lock(&ep->lock); 38 spin_lock(&ep->lock);
38 scif_cleanup_ep_qp(ep); 39 scif_cleanup_ep_qp(ep);
39 spin_unlock(&ep->lock); 40 spin_unlock(&ep->lock);
@@ -50,6 +51,7 @@ static void scif_invalidate_ep(int node)
50 wake_up_interruptible(&ep->sendwq); 51 wake_up_interruptible(&ep->sendwq);
51 wake_up_interruptible(&ep->recvwq); 52 wake_up_interruptible(&ep->recvwq);
52 spin_unlock(&ep->lock); 53 spin_unlock(&ep->lock);
54 scif_unmap_all_windows(ep);
53 } 55 }
54 } 56 }
55 mutex_unlock(&scif_info.connlock); 57 mutex_unlock(&scif_info.connlock);
@@ -61,8 +63,8 @@ void scif_free_qp(struct scif_dev *scifdev)
61 63
62 if (!qp) 64 if (!qp)
63 return; 65 return;
64 scif_free_coherent((void *)qp->inbound_q.rb_base, 66 scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size);
65 qp->local_buf, scifdev, qp->inbound_q.size); 67 kfree(qp->inbound_q.rb_base);
66 scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp)); 68 scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp));
67 kfree(scifdev->qpairs); 69 kfree(scifdev->qpairs);
68 scifdev->qpairs = NULL; 70 scifdev->qpairs = NULL;
@@ -125,8 +127,12 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
125 } 127 }
126 scif_destroy_intr_wq(dev); 128 scif_destroy_intr_wq(dev);
127 } 129 }
130 flush_work(&scif_info.misc_work);
128 scif_destroy_p2p(dev); 131 scif_destroy_p2p(dev);
129 scif_invalidate_ep(dev->node); 132 scif_invalidate_ep(dev->node);
133 scif_zap_mmaps(dev->node);
134 scif_cleanup_rma_for_zombies(dev->node);
135 flush_work(&scif_info.misc_work);
130 scif_send_acks(dev); 136 scif_send_acks(dev);
131 if (!dev->node && scif_info.card_initiated_exit) { 137 if (!dev->node && scif_info.card_initiated_exit) {
132 /* 138 /*
@@ -147,14 +153,8 @@ void scif_cleanup_scifdev(struct scif_dev *dev)
147void scif_handle_remove_node(int node) 153void scif_handle_remove_node(int node)
148{ 154{
149 struct scif_dev *scifdev = &scif_dev[node]; 155 struct scif_dev *scifdev = &scif_dev[node];
150 struct scif_peer_dev *spdev; 156
151 157 if (scif_peer_unregister_device(scifdev))
152 rcu_read_lock();
153 spdev = rcu_dereference(scifdev->spdev);
154 rcu_read_unlock();
155 if (spdev)
156 scif_peer_unregister_device(spdev);
157 else
158 scif_send_acks(scifdev); 158 scif_send_acks(scifdev);
159} 159}
160 160
diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c
index 6dfdae3452d6..c66ca1a5814e 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.c
+++ b/drivers/misc/mic/scif/scif_nodeqp.c
@@ -105,18 +105,22 @@
105int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, 105int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset,
106 int local_size, struct scif_dev *scifdev) 106 int local_size, struct scif_dev *scifdev)
107{ 107{
108 void *local_q = NULL; 108 void *local_q = qp->inbound_q.rb_base;
109 int err = 0; 109 int err = 0;
110 u32 tmp_rd = 0; 110 u32 tmp_rd = 0;
111 111
112 spin_lock_init(&qp->send_lock); 112 spin_lock_init(&qp->send_lock);
113 spin_lock_init(&qp->recv_lock); 113 spin_lock_init(&qp->recv_lock);
114 114
115 local_q = kzalloc(local_size, GFP_KERNEL); 115 /* Allocate rb only if not already allocated */
116 if (!local_q) { 116 if (!local_q) {
117 err = -ENOMEM; 117 local_q = kzalloc(local_size, GFP_KERNEL);
118 return err; 118 if (!local_q) {
119 err = -ENOMEM;
120 return err;
121 }
119 } 122 }
123
120 err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); 124 err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size);
121 if (err) 125 if (err)
122 goto kfree; 126 goto kfree;
@@ -260,6 +264,11 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
260 r_buf, 264 r_buf,
261 get_count_order(remote_size)); 265 get_count_order(remote_size));
262 /* 266 /*
267 * Because the node QP may already be processing an INIT message, set
268 * the read pointer so the cached read offset isn't lost
269 */
270 qp->remote_qp->local_read = qp->inbound_q.current_read_offset;
271 /*
263 * resetup the inbound_q now that we know where the 272 * resetup the inbound_q now that we know where the
264 * inbound_read really is. 273 * inbound_read really is.
265 */ 274 */
@@ -426,6 +435,21 @@ free_p2p:
426 return NULL; 435 return NULL;
427} 436}
428 437
438/* Uninitialize and release resources from a p2p mapping */
439static void scif_deinit_p2p_info(struct scif_dev *scifdev,
440 struct scif_p2p_info *p2p)
441{
442 struct scif_hw_dev *sdev = scifdev->sdev;
443
444 dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO],
445 p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL);
446 dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER],
447 p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL);
448 scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]);
449 scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]);
450 kfree(p2p);
451}
452
429/** 453/**
430 * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message 454 * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
431 * @dst: Destination node 455 * @dst: Destination node
@@ -468,8 +492,10 @@ static void scif_node_connect(struct scif_dev *scifdev, int dst)
468 if (!p2p_ij) 492 if (!p2p_ij)
469 return; 493 return;
470 p2p_ji = scif_init_p2p_info(dev_j, dev_i); 494 p2p_ji = scif_init_p2p_info(dev_j, dev_i);
471 if (!p2p_ji) 495 if (!p2p_ji) {
496 scif_deinit_p2p_info(dev_i, p2p_ij);
472 return; 497 return;
498 }
473 list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p); 499 list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p);
474 list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p); 500 list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p);
475 501
@@ -529,27 +555,6 @@ static void scif_p2p_setup(void)
529 } 555 }
530} 556}
531 557
532void scif_qp_response_ack(struct work_struct *work)
533{
534 struct scif_dev *scifdev = container_of(work, struct scif_dev,
535 init_msg_work);
536 struct scif_peer_dev *spdev;
537
538 /* Drop the INIT message if it has already been received */
539 if (_scifdev_alive(scifdev))
540 return;
541
542 spdev = scif_peer_register_device(scifdev);
543 if (IS_ERR(spdev))
544 return;
545
546 if (scif_is_mgmt_node()) {
547 mutex_lock(&scif_info.conflock);
548 scif_p2p_setup();
549 mutex_unlock(&scif_info.conflock);
550 }
551}
552
553static char *message_types[] = {"BAD", 558static char *message_types[] = {"BAD",
554 "INIT", 559 "INIT",
555 "EXIT", 560 "EXIT",
@@ -568,7 +573,29 @@ static char *message_types[] = {"BAD",
568 "DISCNT_ACK", 573 "DISCNT_ACK",
569 "CLIENT_SENT", 574 "CLIENT_SENT",
570 "CLIENT_RCVD", 575 "CLIENT_RCVD",
571 "SCIF_GET_NODE_INFO"}; 576 "SCIF_GET_NODE_INFO",
577 "REGISTER",
578 "REGISTER_ACK",
579 "REGISTER_NACK",
580 "UNREGISTER",
581 "UNREGISTER_ACK",
582 "UNREGISTER_NACK",
583 "ALLOC_REQ",
584 "ALLOC_GNT",
585 "ALLOC_REJ",
586 "FREE_PHYS",
587 "FREE_VIRT",
588 "MUNMAP",
589 "MARK",
590 "MARK_ACK",
591 "MARK_NACK",
592 "WAIT",
593 "WAIT_ACK",
594 "WAIT_NACK",
595 "SIGNAL_LOCAL",
596 "SIGNAL_REMOTE",
597 "SIG_ACK",
598 "SIG_NACK"};
572 599
573static void 600static void
574scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg, 601scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg,
@@ -662,10 +689,16 @@ int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg)
662 * 689 *
663 * Work queue handler for servicing miscellaneous SCIF tasks. 690 * Work queue handler for servicing miscellaneous SCIF tasks.
664 * Examples include: 691 * Examples include:
665 * 1) Cleanup of zombie endpoints. 692 * 1) Remote fence requests.
693 * 2) Destruction of temporary registered windows
694 * created during scif_vreadfrom()/scif_vwriteto().
695 * 3) Cleanup of zombie endpoints.
666 */ 696 */
667void scif_misc_handler(struct work_struct *work) 697void scif_misc_handler(struct work_struct *work)
668{ 698{
699 scif_rma_handle_remote_fences();
700 scif_rma_destroy_windows();
701 scif_rma_destroy_tcw_invalid();
669 scif_cleanup_zombie_epd(); 702 scif_cleanup_zombie_epd();
670} 703}
671 704
@@ -682,13 +715,14 @@ scif_init(struct scif_dev *scifdev, struct scifmsg *msg)
682 * address to complete initializing the inbound_q. 715 * address to complete initializing the inbound_q.
683 */ 716 */
684 flush_delayed_work(&scifdev->qp_dwork); 717 flush_delayed_work(&scifdev->qp_dwork);
685 /* 718
686 * Delegate the peer device registration to a workqueue, otherwise if 719 scif_peer_register_device(scifdev);
687 * SCIF client probe (called during peer device registration) calls 720
688 * scif_connect(..), it will block the message processing thread causing 721 if (scif_is_mgmt_node()) {
689 * a deadlock. 722 mutex_lock(&scif_info.conflock);
690 */ 723 scif_p2p_setup();
691 schedule_work(&scifdev->init_msg_work); 724 mutex_unlock(&scif_info.conflock);
725 }
692} 726}
693 727
694/** 728/**
@@ -838,13 +872,13 @@ void scif_poll_qp_state(struct work_struct *work)
838 msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT)); 872 msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT));
839 return; 873 return;
840 } 874 }
841 scif_peer_register_device(peerdev);
842 return; 875 return;
843timeout: 876timeout:
844 dev_err(&peerdev->sdev->dev, 877 dev_err(&peerdev->sdev->dev,
845 "%s %d remote node %d offline, state = 0x%x\n", 878 "%s %d remote node %d offline, state = 0x%x\n",
846 __func__, __LINE__, peerdev->node, qp->qp_state); 879 __func__, __LINE__, peerdev->node, qp->qp_state);
847 qp->remote_qp->qp_state = SCIF_QP_OFFLINE; 880 qp->remote_qp->qp_state = SCIF_QP_OFFLINE;
881 scif_peer_unregister_device(peerdev);
848 scif_cleanup_scifdev(peerdev); 882 scif_cleanup_scifdev(peerdev);
849} 883}
850 884
@@ -894,6 +928,9 @@ scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg)
894 goto local_error; 928 goto local_error;
895 peerdev->rdb = msg->payload[2]; 929 peerdev->rdb = msg->payload[2];
896 qp->remote_qp->qp_state = SCIF_QP_ONLINE; 930 qp->remote_qp->qp_state = SCIF_QP_ONLINE;
931
932 scif_peer_register_device(peerdev);
933
897 schedule_delayed_work(&peerdev->p2p_dwork, 0); 934 schedule_delayed_work(&peerdev->p2p_dwork, 0);
898 return; 935 return;
899local_error: 936local_error:
@@ -1007,6 +1044,27 @@ static void (*scif_intr_func[SCIF_MAX_MSG + 1])
1007 scif_clientsend, /* SCIF_CLIENT_SENT */ 1044 scif_clientsend, /* SCIF_CLIENT_SENT */
1008 scif_clientrcvd, /* SCIF_CLIENT_RCVD */ 1045 scif_clientrcvd, /* SCIF_CLIENT_RCVD */
1009 scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */ 1046 scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */
1047 scif_recv_reg, /* SCIF_REGISTER */
1048 scif_recv_reg_ack, /* SCIF_REGISTER_ACK */
1049 scif_recv_reg_nack, /* SCIF_REGISTER_NACK */
1050 scif_recv_unreg, /* SCIF_UNREGISTER */
1051 scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */
1052 scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */
1053 scif_alloc_req, /* SCIF_ALLOC_REQ */
1054 scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */
1055 scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */
1056 scif_free_virt, /* SCIF_FREE_VIRT */
1057 scif_recv_munmap, /* SCIF_MUNMAP */
1058 scif_recv_mark, /* SCIF_MARK */
1059 scif_recv_mark_resp, /* SCIF_MARK_ACK */
1060 scif_recv_mark_resp, /* SCIF_MARK_NACK */
1061 scif_recv_wait, /* SCIF_WAIT */
1062 scif_recv_wait_resp, /* SCIF_WAIT_ACK */
1063 scif_recv_wait_resp, /* SCIF_WAIT_NACK */
1064 scif_recv_sig_local, /* SCIF_SIG_LOCAL */
1065 scif_recv_sig_remote, /* SCIF_SIG_REMOTE */
1066 scif_recv_sig_resp, /* SCIF_SIG_ACK */
1067 scif_recv_sig_resp, /* SCIF_SIG_NACK */
1010}; 1068};
1011 1069
1012/** 1070/**
@@ -1169,7 +1227,6 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
1169 int err = 0; 1227 int err = 0;
1170 void *local_q; 1228 void *local_q;
1171 struct scif_qp *qp; 1229 struct scif_qp *qp;
1172 struct scif_peer_dev *spdev;
1173 1230
1174 err = scif_setup_intr_wq(scifdev); 1231 err = scif_setup_intr_wq(scifdev);
1175 if (err) 1232 if (err)
@@ -1216,15 +1273,11 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev)
1216 &qp->local_write, 1273 &qp->local_write,
1217 local_q, get_count_order(SCIF_NODE_QP_SIZE)); 1274 local_q, get_count_order(SCIF_NODE_QP_SIZE));
1218 scif_info.nodeid = scifdev->node; 1275 scif_info.nodeid = scifdev->node;
1219 spdev = scif_peer_register_device(scifdev); 1276
1220 if (IS_ERR(spdev)) { 1277 scif_peer_register_device(scifdev);
1221 err = PTR_ERR(spdev); 1278
1222 goto free_local_q;
1223 }
1224 scif_info.loopb_dev = scifdev; 1279 scif_info.loopb_dev = scifdev;
1225 return err; 1280 return err;
1226free_local_q:
1227 kfree(local_q);
1228free_qpairs: 1281free_qpairs:
1229 kfree(scifdev->qpairs); 1282 kfree(scifdev->qpairs);
1230destroy_loopb_wq: 1283destroy_loopb_wq:
@@ -1243,13 +1296,7 @@ exit:
1243 */ 1296 */
1244int scif_destroy_loopback_qp(struct scif_dev *scifdev) 1297int scif_destroy_loopback_qp(struct scif_dev *scifdev)
1245{ 1298{
1246 struct scif_peer_dev *spdev; 1299 scif_peer_unregister_device(scifdev);
1247
1248 rcu_read_lock();
1249 spdev = rcu_dereference(scifdev->spdev);
1250 rcu_read_unlock();
1251 if (spdev)
1252 scif_peer_unregister_device(spdev);
1253 destroy_workqueue(scif_info.loopb_wq); 1300 destroy_workqueue(scif_info.loopb_wq);
1254 scif_destroy_intr_wq(scifdev); 1301 scif_destroy_intr_wq(scifdev);
1255 kfree(scifdev->qpairs->outbound_q.rb_base); 1302 kfree(scifdev->qpairs->outbound_q.rb_base);
diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h
index 6c0ed6783479..95896273138e 100644
--- a/drivers/misc/mic/scif/scif_nodeqp.h
+++ b/drivers/misc/mic/scif/scif_nodeqp.h
@@ -74,7 +74,28 @@
74#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */ 74#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */
75#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */ 75#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */
76#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/ 76#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/
77#define SCIF_MAX_MSG SCIF_GET_NODE_INFO 77#define SCIF_REGISTER 19 /* Tell peer about a new registered window */
78#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */
79#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */
80#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */
81#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */
82#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */
83#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */
84#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */
85#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */
86#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */
87#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */
88#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */
89#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */
90#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */
91#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */
92#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */
93#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */
94#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */
95#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */
96#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */
97#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */
98#define SCIF_MAX_MSG SCIF_SIG_NACK
78 99
79/* 100/*
80 * struct scifmsg - Node QP message format 101 * struct scifmsg - Node QP message format
@@ -92,6 +113,24 @@ struct scifmsg {
92} __packed; 113} __packed;
93 114
94/* 115/*
116 * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request
117 * the remote note to allocate memory
118 *
119 * phys_addr: Physical address of the buffer
120 * vaddr: Virtual address of the buffer
121 * size: Size of the buffer
122 * state: Current state
123 * allocwq: wait queue for status
124 */
125struct scif_allocmsg {
126 dma_addr_t phys_addr;
127 unsigned long vaddr;
128 size_t size;
129 enum scif_msg_state state;
130 wait_queue_head_t allocwq;
131};
132
133/*
95 * struct scif_qp - Node Queue Pair 134 * struct scif_qp - Node Queue Pair
96 * 135 *
97 * Interesting structure -- a little difficult because we can only 136 * Interesting structure -- a little difficult because we can only
@@ -158,7 +197,6 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev,
158int scif_setup_loopback_qp(struct scif_dev *scifdev); 197int scif_setup_loopback_qp(struct scif_dev *scifdev);
159int scif_destroy_loopback_qp(struct scif_dev *scifdev); 198int scif_destroy_loopback_qp(struct scif_dev *scifdev);
160void scif_poll_qp_state(struct work_struct *work); 199void scif_poll_qp_state(struct work_struct *work);
161void scif_qp_response_ack(struct work_struct *work);
162void scif_destroy_p2p(struct scif_dev *scifdev); 200void scif_destroy_p2p(struct scif_dev *scifdev);
163void scif_send_exit(struct scif_dev *scifdev); 201void scif_send_exit(struct scif_dev *scifdev);
164static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev) 202static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c
index 589ae9ad2501..6ffa3bdbd45b 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.c
+++ b/drivers/misc/mic/scif/scif_peer_bus.c
@@ -24,93 +24,152 @@ dev_to_scif_peer(struct device *dev)
24 return container_of(dev, struct scif_peer_dev, dev); 24 return container_of(dev, struct scif_peer_dev, dev);
25} 25}
26 26
27static inline struct scif_peer_driver * 27struct bus_type scif_peer_bus = {
28drv_to_scif_peer(struct device_driver *drv) 28 .name = "scif_peer_bus",
29{ 29};
30 return container_of(drv, struct scif_peer_driver, driver);
31}
32 30
33static int scif_peer_dev_match(struct device *dv, struct device_driver *dr) 31static void scif_peer_release_dev(struct device *d)
34{ 32{
35 return !strncmp(dev_name(dv), dr->name, 4); 33 struct scif_peer_dev *sdev = dev_to_scif_peer(d);
34 struct scif_dev *scifdev = &scif_dev[sdev->dnode];
35
36 scif_cleanup_scifdev(scifdev);
37 kfree(sdev);
36} 38}
37 39
38static int scif_peer_dev_probe(struct device *d) 40static int scif_peer_initialize_device(struct scif_dev *scifdev)
39{ 41{
40 struct scif_peer_dev *dev = dev_to_scif_peer(d); 42 struct scif_peer_dev *spdev;
41 struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver); 43 int ret;
42 44
43 return drv->probe(dev); 45 spdev = kzalloc(sizeof(*spdev), GFP_KERNEL);
44} 46 if (!spdev) {
47 ret = -ENOMEM;
48 goto err;
49 }
45 50
46static int scif_peer_dev_remove(struct device *d) 51 spdev->dev.parent = scifdev->sdev->dev.parent;
47{ 52 spdev->dev.release = scif_peer_release_dev;
48 struct scif_peer_dev *dev = dev_to_scif_peer(d); 53 spdev->dnode = scifdev->node;
49 struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver); 54 spdev->dev.bus = &scif_peer_bus;
55 dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
56
57 device_initialize(&spdev->dev);
58 get_device(&spdev->dev);
59 rcu_assign_pointer(scifdev->spdev, spdev);
50 60
51 drv->remove(dev); 61 mutex_lock(&scif_info.conflock);
62 scif_info.total++;
63 scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid);
64 mutex_unlock(&scif_info.conflock);
52 return 0; 65 return 0;
66err:
67 dev_err(&scifdev->sdev->dev,
68 "dnode %d: initialize_device rc %d\n", scifdev->node, ret);
69 return ret;
53} 70}
54 71
55static struct bus_type scif_peer_bus = { 72static int scif_peer_add_device(struct scif_dev *scifdev)
56 .name = "scif_peer_bus",
57 .match = scif_peer_dev_match,
58 .probe = scif_peer_dev_probe,
59 .remove = scif_peer_dev_remove,
60};
61
62int scif_peer_register_driver(struct scif_peer_driver *driver)
63{ 73{
64 driver->driver.bus = &scif_peer_bus; 74 struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev);
65 return driver_register(&driver->driver); 75 char pool_name[16];
76 int ret;
77
78 ret = device_add(&spdev->dev);
79 put_device(&spdev->dev);
80 if (ret) {
81 dev_err(&scifdev->sdev->dev,
82 "dnode %d: peer device_add failed\n", scifdev->node);
83 goto put_spdev;
84 }
85
86 scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode);
87 scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev,
88 sizeof(struct scif_status), 1,
89 0);
90 if (!scifdev->signal_pool) {
91 dev_err(&scifdev->sdev->dev,
92 "dnode %d: dmam_pool_create failed\n", scifdev->node);
93 ret = -ENOMEM;
94 goto del_spdev;
95 }
96 dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode);
97 return 0;
98del_spdev:
99 device_del(&spdev->dev);
100put_spdev:
101 RCU_INIT_POINTER(scifdev->spdev, NULL);
102 synchronize_rcu();
103 put_device(&spdev->dev);
104
105 mutex_lock(&scif_info.conflock);
106 scif_info.total--;
107 mutex_unlock(&scif_info.conflock);
108 return ret;
66} 109}
67 110
68void scif_peer_unregister_driver(struct scif_peer_driver *driver) 111void scif_add_peer_device(struct work_struct *work)
69{ 112{
70 driver_unregister(&driver->driver); 113 struct scif_dev *scifdev = container_of(work, struct scif_dev,
114 peer_add_work);
115
116 scif_peer_add_device(scifdev);
71} 117}
72 118
73static void scif_peer_release_dev(struct device *d) 119/*
120 * Peer device registration is split into a device_initialize and a device_add.
121 * The reason for doing this is as follows: First, peer device registration
122 * itself cannot be done in the message processing thread and must be delegated
123 * to another workqueue, otherwise if SCIF client probe, called during peer
124 * device registration, calls scif_connect(..), it will block the message
125 * processing thread causing a deadlock. Next, device_initialize is done in the
126 * "top-half" message processing thread and device_add in the "bottom-half"
127 * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing
128 * concurrently with SCIF_INIT message processing is unable to get a reference
129 * on the peer device, thereby failing the connect request.
130 */
131void scif_peer_register_device(struct scif_dev *scifdev)
74{ 132{
75 struct scif_peer_dev *sdev = dev_to_scif_peer(d); 133 int ret;
76 struct scif_dev *scifdev = &scif_dev[sdev->dnode];
77 134
78 scif_cleanup_scifdev(scifdev); 135 mutex_lock(&scifdev->lock);
79 kfree(sdev); 136 ret = scif_peer_initialize_device(scifdev);
137 if (ret)
138 goto exit;
139 schedule_work(&scifdev->peer_add_work);
140exit:
141 mutex_unlock(&scifdev->lock);
80} 142}
81 143
82struct scif_peer_dev * 144int scif_peer_unregister_device(struct scif_dev *scifdev)
83scif_peer_register_device(struct scif_dev *scifdev)
84{ 145{
85 int ret;
86 struct scif_peer_dev *spdev; 146 struct scif_peer_dev *spdev;
87 147
88 spdev = kzalloc(sizeof(*spdev), GFP_KERNEL); 148 mutex_lock(&scifdev->lock);
89 if (!spdev) 149 /* Flush work to ensure device register is complete */
90 return ERR_PTR(-ENOMEM); 150 flush_work(&scifdev->peer_add_work);
91
92 spdev->dev.parent = scifdev->sdev->dev.parent;
93 spdev->dev.release = scif_peer_release_dev;
94 spdev->dnode = scifdev->node;
95 spdev->dev.bus = &scif_peer_bus;
96 151
97 dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode);
98 /* 152 /*
99 * device_register() causes the bus infrastructure to look for a 153 * Continue holding scifdev->lock since theoretically unregister_device
100 * matching driver. 154 * can be called simultaneously from multiple threads
101 */ 155 */
102 ret = device_register(&spdev->dev); 156 spdev = rcu_dereference(scifdev->spdev);
103 if (ret) 157 if (!spdev) {
104 goto free_spdev; 158 mutex_unlock(&scifdev->lock);
105 return spdev; 159 return -ENODEV;
106free_spdev: 160 }
107 kfree(spdev); 161
108 return ERR_PTR(ret); 162 RCU_INIT_POINTER(scifdev->spdev, NULL);
109} 163 synchronize_rcu();
110 164 mutex_unlock(&scifdev->lock);
111void scif_peer_unregister_device(struct scif_peer_dev *sdev) 165
112{ 166 dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode);
113 device_unregister(&sdev->dev); 167 device_unregister(&spdev->dev);
168
169 mutex_lock(&scif_info.conflock);
170 scif_info.total--;
171 mutex_unlock(&scif_info.conflock);
172 return 0;
114} 173}
115 174
116int scif_peer_bus_init(void) 175int scif_peer_bus_init(void)
diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h
index 33f0dbb30152..a3b8dd2edaa5 100644
--- a/drivers/misc/mic/scif/scif_peer_bus.h
+++ b/drivers/misc/mic/scif/scif_peer_bus.h
@@ -19,47 +19,13 @@
19 19
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/mic_common.h> 21#include <linux/mic_common.h>
22 22#include <linux/scif.h>
23/*
24 * Peer devices show up as PCIe devices for the mgmt node but not the cards.
25 * The mgmt node discovers all the cards on the PCIe bus and informs the other
26 * cards about their peers. Upon notification of a peer a node adds a peer
27 * device to the peer bus to maintain symmetry in the way devices are
28 * discovered across all nodes in the SCIF network.
29 */
30/**
31 * scif_peer_dev - representation of a peer SCIF device
32 * @dev: underlying device
33 * @dnode - The destination node which this device will communicate with.
34 */
35struct scif_peer_dev {
36 struct device dev;
37 u8 dnode;
38};
39
40/**
41 * scif_peer_driver - operations for a scif_peer I/O driver
42 * @driver: underlying device driver (populate name and owner).
43 * @id_table: the ids serviced by this driver.
44 * @probe: the function to call when a device is found. Returns 0 or -errno.
45 * @remove: the function to call when a device is removed.
46 */
47struct scif_peer_driver {
48 struct device_driver driver;
49 const struct scif_peer_dev_id *id_table;
50
51 int (*probe)(struct scif_peer_dev *dev);
52 void (*remove)(struct scif_peer_dev *dev);
53};
54 23
55struct scif_dev; 24struct scif_dev;
56 25
57int scif_peer_register_driver(struct scif_peer_driver *driver); 26void scif_add_peer_device(struct work_struct *work);
58void scif_peer_unregister_driver(struct scif_peer_driver *driver); 27void scif_peer_register_device(struct scif_dev *sdev);
59 28int scif_peer_unregister_device(struct scif_dev *scifdev);
60struct scif_peer_dev *scif_peer_register_device(struct scif_dev *sdev);
61void scif_peer_unregister_device(struct scif_peer_dev *sdev);
62
63int scif_peer_bus_init(void); 29int scif_peer_bus_init(void);
64void scif_peer_bus_exit(void); 30void scif_peer_bus_exit(void);
65#endif /* _SCIF_PEER_BUS_H */ 31#endif /* _SCIF_PEER_BUS_H */
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
new file mode 100644
index 000000000000..8310b4dbff06
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -0,0 +1,1775 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18#include <linux/dma_remapping.h>
19#include <linux/pagemap.h>
20#include "scif_main.h"
21#include "scif_map.h"
22
23/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
24#define SCIF_MAP_ULIMIT 0x40
25
26bool scif_ulimit_check = 1;
27
28/**
29 * scif_rma_ep_init:
30 * @ep: end point
31 *
32 * Initialize RMA per EP data structures.
33 */
34void scif_rma_ep_init(struct scif_endpt *ep)
35{
36 struct scif_endpt_rma_info *rma = &ep->rma_info;
37
38 mutex_init(&rma->rma_lock);
39 init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
40 SCIF_DMA_64BIT_PFN);
41 spin_lock_init(&rma->tc_lock);
42 mutex_init(&rma->mmn_lock);
43 INIT_LIST_HEAD(&rma->reg_list);
44 INIT_LIST_HEAD(&rma->remote_reg_list);
45 atomic_set(&rma->tw_refcount, 0);
46 atomic_set(&rma->tcw_refcount, 0);
47 atomic_set(&rma->tcw_total_pages, 0);
48 atomic_set(&rma->fence_refcount, 0);
49
50 rma->async_list_del = 0;
51 rma->dma_chan = NULL;
52 INIT_LIST_HEAD(&rma->mmn_list);
53 INIT_LIST_HEAD(&rma->vma_list);
54 init_waitqueue_head(&rma->markwq);
55}
56
57/**
58 * scif_rma_ep_can_uninit:
59 * @ep: end point
60 *
61 * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
62 */
63int scif_rma_ep_can_uninit(struct scif_endpt *ep)
64{
65 int ret = 0;
66
67 mutex_lock(&ep->rma_info.rma_lock);
68 /* Destroy RMA Info only if both lists are empty */
69 if (list_empty(&ep->rma_info.reg_list) &&
70 list_empty(&ep->rma_info.remote_reg_list) &&
71 list_empty(&ep->rma_info.mmn_list) &&
72 !atomic_read(&ep->rma_info.tw_refcount) &&
73 !atomic_read(&ep->rma_info.tcw_refcount) &&
74 !atomic_read(&ep->rma_info.fence_refcount))
75 ret = 1;
76 mutex_unlock(&ep->rma_info.rma_lock);
77 return ret;
78}
79
80/**
81 * scif_create_pinned_pages:
82 * @nr_pages: number of pages in window
83 * @prot: read/write protection
84 *
85 * Allocate and prepare a set of pinned pages.
86 */
87static struct scif_pinned_pages *
88scif_create_pinned_pages(int nr_pages, int prot)
89{
90 struct scif_pinned_pages *pin;
91
92 might_sleep();
93 pin = scif_zalloc(sizeof(*pin));
94 if (!pin)
95 goto error;
96
97 pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
98 if (!pin->pages)
99 goto error_free_pinned_pages;
100
101 pin->prot = prot;
102 pin->magic = SCIFEP_MAGIC;
103 return pin;
104
105error_free_pinned_pages:
106 scif_free(pin, sizeof(*pin));
107error:
108 return NULL;
109}
110
111/**
112 * scif_destroy_pinned_pages:
113 * @pin: A set of pinned pages.
114 *
115 * Deallocate resources for pinned pages.
116 */
117static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
118{
119 int j;
120 int writeable = pin->prot & SCIF_PROT_WRITE;
121 int kernel = SCIF_MAP_KERNEL & pin->map_flags;
122
123 for (j = 0; j < pin->nr_pages; j++) {
124 if (pin->pages[j] && !kernel) {
125 if (writeable)
126 SetPageDirty(pin->pages[j]);
127 put_page(pin->pages[j]);
128 }
129 }
130
131 scif_free(pin->pages,
132 pin->nr_pages * sizeof(*pin->pages));
133 scif_free(pin, sizeof(*pin));
134 return 0;
135}
136
137/*
138 * scif_create_window:
139 * @ep: end point
140 * @nr_pages: number of pages
141 * @offset: registration offset
142 * @temp: true if a temporary window is being created
143 *
144 * Allocate and prepare a self registration window.
145 */
146struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
147 s64 offset, bool temp)
148{
149 struct scif_window *window;
150
151 might_sleep();
152 window = scif_zalloc(sizeof(*window));
153 if (!window)
154 goto error;
155
156 window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
157 if (!window->dma_addr)
158 goto error_free_window;
159
160 window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
161 if (!window->num_pages)
162 goto error_free_window;
163
164 window->offset = offset;
165 window->ep = (u64)ep;
166 window->magic = SCIFEP_MAGIC;
167 window->reg_state = OP_IDLE;
168 init_waitqueue_head(&window->regwq);
169 window->unreg_state = OP_IDLE;
170 init_waitqueue_head(&window->unregwq);
171 INIT_LIST_HEAD(&window->list);
172 window->type = SCIF_WINDOW_SELF;
173 window->temp = temp;
174 return window;
175
176error_free_window:
177 scif_free(window->dma_addr,
178 nr_pages * sizeof(*window->dma_addr));
179 scif_free(window, sizeof(*window));
180error:
181 return NULL;
182}
183
184/**
185 * scif_destroy_incomplete_window:
186 * @ep: end point
187 * @window: registration window
188 *
189 * Deallocate resources for self window.
190 */
191static void scif_destroy_incomplete_window(struct scif_endpt *ep,
192 struct scif_window *window)
193{
194 int err;
195 int nr_pages = window->nr_pages;
196 struct scif_allocmsg *alloc = &window->alloc_handle;
197 struct scifmsg msg;
198
199retry:
200 /* Wait for a SCIF_ALLOC_GNT/REJ message */
201 err = wait_event_timeout(alloc->allocwq,
202 alloc->state != OP_IN_PROGRESS,
203 SCIF_NODE_ALIVE_TIMEOUT);
204 if (!err && scifdev_alive(ep))
205 goto retry;
206
207 mutex_lock(&ep->rma_info.rma_lock);
208 if (alloc->state == OP_COMPLETED) {
209 msg.uop = SCIF_FREE_VIRT;
210 msg.src = ep->port;
211 msg.payload[0] = ep->remote_ep;
212 msg.payload[1] = window->alloc_handle.vaddr;
213 msg.payload[2] = (u64)window;
214 msg.payload[3] = SCIF_REGISTER;
215 _scif_nodeqp_send(ep->remote_dev, &msg);
216 }
217 mutex_unlock(&ep->rma_info.rma_lock);
218
219 scif_free_window_offset(ep, window, window->offset);
220 scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
221 scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
222 scif_free(window, sizeof(*window));
223}
224
225/**
226 * scif_unmap_window:
227 * @remote_dev: SCIF remote device
228 * @window: registration window
229 *
230 * Delete any DMA mappings created for a registered self window
231 */
232void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
233{
234 int j;
235
236 if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
237 if (window->st) {
238 dma_unmap_sg(&remote_dev->sdev->dev,
239 window->st->sgl, window->st->nents,
240 DMA_BIDIRECTIONAL);
241 sg_free_table(window->st);
242 kfree(window->st);
243 window->st = NULL;
244 }
245 } else {
246 for (j = 0; j < window->nr_contig_chunks; j++) {
247 if (window->dma_addr[j]) {
248 scif_unmap_single(window->dma_addr[j],
249 remote_dev,
250 window->num_pages[j] <<
251 PAGE_SHIFT);
252 window->dma_addr[j] = 0x0;
253 }
254 }
255 }
256}
257
258static inline struct mm_struct *__scif_acquire_mm(void)
259{
260 if (scif_ulimit_check)
261 return get_task_mm(current);
262 return NULL;
263}
264
265static inline void __scif_release_mm(struct mm_struct *mm)
266{
267 if (mm)
268 mmput(mm);
269}
270
271static inline int
272__scif_dec_pinned_vm_lock(struct mm_struct *mm,
273 int nr_pages, bool try_lock)
274{
275 if (!mm || !nr_pages || !scif_ulimit_check)
276 return 0;
277 if (try_lock) {
278 if (!down_write_trylock(&mm->mmap_sem)) {
279 dev_err(scif_info.mdev.this_device,
280 "%s %d err\n", __func__, __LINE__);
281 return -1;
282 }
283 } else {
284 down_write(&mm->mmap_sem);
285 }
286 mm->pinned_vm -= nr_pages;
287 up_write(&mm->mmap_sem);
288 return 0;
289}
290
291static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
292 int nr_pages)
293{
294 unsigned long locked, lock_limit;
295
296 if (!mm || !nr_pages || !scif_ulimit_check)
297 return 0;
298
299 locked = nr_pages;
300 locked += mm->pinned_vm;
301 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
302 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
303 dev_err(scif_info.mdev.this_device,
304 "locked(%lu) > lock_limit(%lu)\n",
305 locked, lock_limit);
306 return -ENOMEM;
307 }
308 mm->pinned_vm = locked;
309 return 0;
310}
311
312/**
313 * scif_destroy_window:
314 * @ep: end point
315 * @window: registration window
316 *
317 * Deallocate resources for self window.
318 */
319int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
320{
321 int j;
322 struct scif_pinned_pages *pinned_pages = window->pinned_pages;
323 int nr_pages = window->nr_pages;
324
325 might_sleep();
326 if (!window->temp && window->mm) {
327 __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
328 __scif_release_mm(window->mm);
329 window->mm = NULL;
330 }
331
332 scif_free_window_offset(ep, window, window->offset);
333 scif_unmap_window(ep->remote_dev, window);
334 /*
335 * Decrement references for this set of pinned pages from
336 * this window.
337 */
338 j = atomic_sub_return(1, &pinned_pages->ref_count);
339 if (j < 0)
340 dev_err(scif_info.mdev.this_device,
341 "%s %d incorrect ref count %d\n",
342 __func__, __LINE__, j);
343 /*
344 * If the ref count for pinned_pages is zero then someone
345 * has already called scif_unpin_pages() for it and we should
346 * destroy the page cache.
347 */
348 if (!j)
349 scif_destroy_pinned_pages(window->pinned_pages);
350 scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
351 scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
352 window->magic = 0;
353 scif_free(window, sizeof(*window));
354 return 0;
355}
356
357/**
358 * scif_create_remote_lookup:
359 * @remote_dev: SCIF remote device
360 * @window: remote window
361 *
362 * Allocate and prepare lookup entries for the remote
363 * end to copy over the physical addresses.
364 * Returns 0 on success and appropriate errno on failure.
365 */
366static int scif_create_remote_lookup(struct scif_dev *remote_dev,
367 struct scif_window *window)
368{
369 int i, j, err = 0;
370 int nr_pages = window->nr_pages;
371 bool vmalloc_dma_phys, vmalloc_num_pages;
372
373 might_sleep();
374 /* Map window */
375 err = scif_map_single(&window->mapped_offset,
376 window, remote_dev, sizeof(*window));
377 if (err)
378 goto error_window;
379
380 /* Compute the number of lookup entries. 21 == 2MB Shift */
381 window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
382 ((2) * 1024 * 1024)) >> 21;
383
384 window->dma_addr_lookup.lookup =
385 scif_alloc_coherent(&window->dma_addr_lookup.offset,
386 remote_dev, window->nr_lookup *
387 sizeof(*window->dma_addr_lookup.lookup),
388 GFP_KERNEL | __GFP_ZERO);
389 if (!window->dma_addr_lookup.lookup) {
390 err = -ENOMEM;
391 goto error_window;
392 }
393
394 window->num_pages_lookup.lookup =
395 scif_alloc_coherent(&window->num_pages_lookup.offset,
396 remote_dev, window->nr_lookup *
397 sizeof(*window->num_pages_lookup.lookup),
398 GFP_KERNEL | __GFP_ZERO);
399 if (!window->num_pages_lookup.lookup) {
400 err = -ENOMEM;
401 goto error_window;
402 }
403
404 vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
405 vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
406
407 /* Now map each of the pages containing physical addresses */
408 for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
409 err = scif_map_page(&window->dma_addr_lookup.lookup[j],
410 vmalloc_dma_phys ?
411 vmalloc_to_page(&window->dma_addr[i]) :
412 virt_to_page(&window->dma_addr[i]),
413 remote_dev);
414 if (err)
415 goto error_window;
416 err = scif_map_page(&window->num_pages_lookup.lookup[j],
417 vmalloc_dma_phys ?
418 vmalloc_to_page(&window->num_pages[i]) :
419 virt_to_page(&window->num_pages[i]),
420 remote_dev);
421 if (err)
422 goto error_window;
423 }
424 return 0;
425error_window:
426 return err;
427}
428
429/**
430 * scif_destroy_remote_lookup:
431 * @remote_dev: SCIF remote device
432 * @window: remote window
433 *
434 * Destroy lookup entries used for the remote
435 * end to copy over the physical addresses.
436 */
437static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
438 struct scif_window *window)
439{
440 int i, j;
441
442 if (window->nr_lookup) {
443 struct scif_rma_lookup *lup = &window->dma_addr_lookup;
444 struct scif_rma_lookup *npup = &window->num_pages_lookup;
445
446 for (i = 0, j = 0; i < window->nr_pages;
447 i += SCIF_NR_ADDR_IN_PAGE, j++) {
448 if (lup->lookup && lup->lookup[j])
449 scif_unmap_single(lup->lookup[j],
450 remote_dev,
451 PAGE_SIZE);
452 if (npup->lookup && npup->lookup[j])
453 scif_unmap_single(npup->lookup[j],
454 remote_dev,
455 PAGE_SIZE);
456 }
457 if (lup->lookup)
458 scif_free_coherent(lup->lookup, lup->offset,
459 remote_dev, window->nr_lookup *
460 sizeof(*lup->lookup));
461 if (npup->lookup)
462 scif_free_coherent(npup->lookup, npup->offset,
463 remote_dev, window->nr_lookup *
464 sizeof(*npup->lookup));
465 if (window->mapped_offset)
466 scif_unmap_single(window->mapped_offset,
467 remote_dev, sizeof(*window));
468 window->nr_lookup = 0;
469 }
470}
471
472/**
473 * scif_create_remote_window:
474 * @ep: end point
475 * @nr_pages: number of pages in window
476 *
477 * Allocate and prepare a remote registration window.
478 */
479static struct scif_window *
480scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
481{
482 struct scif_window *window;
483
484 might_sleep();
485 window = scif_zalloc(sizeof(*window));
486 if (!window)
487 goto error_ret;
488
489 window->magic = SCIFEP_MAGIC;
490 window->nr_pages = nr_pages;
491
492 window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
493 if (!window->dma_addr)
494 goto error_window;
495
496 window->num_pages = scif_zalloc(nr_pages *
497 sizeof(*window->num_pages));
498 if (!window->num_pages)
499 goto error_window;
500
501 if (scif_create_remote_lookup(scifdev, window))
502 goto error_window;
503
504 window->type = SCIF_WINDOW_PEER;
505 window->unreg_state = OP_IDLE;
506 INIT_LIST_HEAD(&window->list);
507 return window;
508error_window:
509 scif_destroy_remote_window(window);
510error_ret:
511 return NULL;
512}
513
514/**
515 * scif_destroy_remote_window:
516 * @ep: end point
517 * @window: remote registration window
518 *
519 * Deallocate resources for remote window.
520 */
521void
522scif_destroy_remote_window(struct scif_window *window)
523{
524 scif_free(window->dma_addr, window->nr_pages *
525 sizeof(*window->dma_addr));
526 scif_free(window->num_pages, window->nr_pages *
527 sizeof(*window->num_pages));
528 window->magic = 0;
529 scif_free(window, sizeof(*window));
530}
531
532/**
533 * scif_iommu_map: create DMA mappings if the IOMMU is enabled
534 * @remote_dev: SCIF remote device
535 * @window: remote registration window
536 *
537 * Map the physical pages using dma_map_sg(..) and then detect the number
538 * of contiguous DMA mappings allocated
539 */
540static int scif_iommu_map(struct scif_dev *remote_dev,
541 struct scif_window *window)
542{
543 struct scatterlist *sg;
544 int i, err;
545 scif_pinned_pages_t pin = window->pinned_pages;
546
547 window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
548 if (!window->st)
549 return -ENOMEM;
550
551 err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
552 if (err)
553 return err;
554
555 for_each_sg(window->st->sgl, sg, window->st->nents, i)
556 sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
557
558 err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
559 window->st->nents, DMA_BIDIRECTIONAL);
560 if (!err)
561 return -ENOMEM;
562 /* Detect contiguous ranges of DMA mappings */
563 sg = window->st->sgl;
564 for (i = 0; sg; i++) {
565 dma_addr_t last_da;
566
567 window->dma_addr[i] = sg_dma_address(sg);
568 window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
569 last_da = sg_dma_address(sg) + sg_dma_len(sg);
570 while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
571 window->num_pages[i] +=
572 (sg_dma_len(sg) >> PAGE_SHIFT);
573 last_da = window->dma_addr[i] +
574 sg_dma_len(sg);
575 }
576 window->nr_contig_chunks++;
577 }
578 return 0;
579}
580
581/**
582 * scif_map_window:
583 * @remote_dev: SCIF remote device
584 * @window: self registration window
585 *
586 * Map pages of a window into the aperture/PCI.
587 * Also determine addresses required for DMA.
588 */
589int
590scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
591{
592 int i, j, k, err = 0, nr_contig_pages;
593 scif_pinned_pages_t pin;
594 phys_addr_t phys_prev, phys_curr;
595
596 might_sleep();
597
598 pin = window->pinned_pages;
599
600 if (intel_iommu_enabled && !scifdev_self(remote_dev))
601 return scif_iommu_map(remote_dev, window);
602
603 for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
604 phys_prev = page_to_phys(pin->pages[i]);
605 nr_contig_pages = 1;
606
607 /* Detect physically contiguous chunks */
608 for (k = i + 1; k < window->nr_pages; k++) {
609 phys_curr = page_to_phys(pin->pages[k]);
610 if (phys_curr != (phys_prev + PAGE_SIZE))
611 break;
612 phys_prev = phys_curr;
613 nr_contig_pages++;
614 }
615 window->num_pages[j] = nr_contig_pages;
616 window->nr_contig_chunks++;
617 if (scif_is_mgmt_node()) {
618 /*
619 * Management node has to deal with SMPT on X100 and
620 * hence the DMA mapping is required
621 */
622 err = scif_map_single(&window->dma_addr[j],
623 phys_to_virt(page_to_phys(
624 pin->pages[i])),
625 remote_dev,
626 nr_contig_pages << PAGE_SHIFT);
627 if (err)
628 return err;
629 } else {
630 window->dma_addr[j] = page_to_phys(pin->pages[i]);
631 }
632 }
633 return err;
634}
635
636/**
637 * scif_send_scif_unregister:
638 * @ep: end point
639 * @window: self registration window
640 *
641 * Send a SCIF_UNREGISTER message.
642 */
643static int scif_send_scif_unregister(struct scif_endpt *ep,
644 struct scif_window *window)
645{
646 struct scifmsg msg;
647
648 msg.uop = SCIF_UNREGISTER;
649 msg.src = ep->port;
650 msg.payload[0] = window->alloc_handle.vaddr;
651 msg.payload[1] = (u64)window;
652 return scif_nodeqp_send(ep->remote_dev, &msg);
653}
654
655/**
656 * scif_unregister_window:
657 * @window: self registration window
658 *
659 * Send an unregistration request and wait for a response.
660 */
661int scif_unregister_window(struct scif_window *window)
662{
663 int err = 0;
664 struct scif_endpt *ep = (struct scif_endpt *)window->ep;
665 bool send_msg = false;
666
667 might_sleep();
668 switch (window->unreg_state) {
669 case OP_IDLE:
670 {
671 window->unreg_state = OP_IN_PROGRESS;
672 send_msg = true;
673 /* fall through */
674 }
675 case OP_IN_PROGRESS:
676 {
677 scif_get_window(window, 1);
678 mutex_unlock(&ep->rma_info.rma_lock);
679 if (send_msg) {
680 err = scif_send_scif_unregister(ep, window);
681 if (err) {
682 window->unreg_state = OP_COMPLETED;
683 goto done;
684 }
685 } else {
686 /* Return ENXIO since unregistration is in progress */
687 mutex_lock(&ep->rma_info.rma_lock);
688 return -ENXIO;
689 }
690retry:
691 /* Wait for a SCIF_UNREGISTER_(N)ACK message */
692 err = wait_event_timeout(window->unregwq,
693 window->unreg_state != OP_IN_PROGRESS,
694 SCIF_NODE_ALIVE_TIMEOUT);
695 if (!err && scifdev_alive(ep))
696 goto retry;
697 if (!err) {
698 err = -ENODEV;
699 window->unreg_state = OP_COMPLETED;
700 dev_err(scif_info.mdev.this_device,
701 "%s %d err %d\n", __func__, __LINE__, err);
702 }
703 if (err > 0)
704 err = 0;
705done:
706 mutex_lock(&ep->rma_info.rma_lock);
707 scif_put_window(window, 1);
708 break;
709 }
710 case OP_FAILED:
711 {
712 if (!scifdev_alive(ep)) {
713 err = -ENODEV;
714 window->unreg_state = OP_COMPLETED;
715 }
716 break;
717 }
718 case OP_COMPLETED:
719 break;
720 default:
721 err = -ENODEV;
722 }
723
724 if (window->unreg_state == OP_COMPLETED && window->ref_count)
725 scif_put_window(window, window->nr_pages);
726
727 if (!window->ref_count) {
728 atomic_inc(&ep->rma_info.tw_refcount);
729 list_del_init(&window->list);
730 scif_free_window_offset(ep, window, window->offset);
731 mutex_unlock(&ep->rma_info.rma_lock);
732 if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
733 scifdev_alive(ep)) {
734 scif_drain_dma_intr(ep->remote_dev->sdev,
735 ep->rma_info.dma_chan);
736 } else {
737 if (!__scif_dec_pinned_vm_lock(window->mm,
738 window->nr_pages, 1)) {
739 __scif_release_mm(window->mm);
740 window->mm = NULL;
741 }
742 }
743 scif_queue_for_cleanup(window, &scif_info.rma);
744 mutex_lock(&ep->rma_info.rma_lock);
745 }
746 return err;
747}
748
749/**
750 * scif_send_alloc_request:
751 * @ep: end point
752 * @window: self registration window
753 *
754 * Send a remote window allocation request
755 */
756static int scif_send_alloc_request(struct scif_endpt *ep,
757 struct scif_window *window)
758{
759 struct scifmsg msg;
760 struct scif_allocmsg *alloc = &window->alloc_handle;
761
762 /* Set up the Alloc Handle */
763 alloc->state = OP_IN_PROGRESS;
764 init_waitqueue_head(&alloc->allocwq);
765
766 /* Send out an allocation request */
767 msg.uop = SCIF_ALLOC_REQ;
768 msg.payload[1] = window->nr_pages;
769 msg.payload[2] = (u64)&window->alloc_handle;
770 return _scif_nodeqp_send(ep->remote_dev, &msg);
771}
772
773/**
774 * scif_prep_remote_window:
775 * @ep: end point
776 * @window: self registration window
777 *
778 * Send a remote window allocation request, wait for an allocation response,
779 * and prepares the remote window by copying over the page lists
780 */
781static int scif_prep_remote_window(struct scif_endpt *ep,
782 struct scif_window *window)
783{
784 struct scifmsg msg;
785 struct scif_window *remote_window;
786 struct scif_allocmsg *alloc = &window->alloc_handle;
787 dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
788 int i = 0, j = 0;
789 int nr_contig_chunks, loop_nr_contig_chunks;
790 int remaining_nr_contig_chunks, nr_lookup;
791 int err, map_err;
792
793 map_err = scif_map_window(ep->remote_dev, window);
794 if (map_err)
795 dev_err(&ep->remote_dev->sdev->dev,
796 "%s %d map_err %d\n", __func__, __LINE__, map_err);
797 remaining_nr_contig_chunks = window->nr_contig_chunks;
798 nr_contig_chunks = window->nr_contig_chunks;
799retry:
800 /* Wait for a SCIF_ALLOC_GNT/REJ message */
801 err = wait_event_timeout(alloc->allocwq,
802 alloc->state != OP_IN_PROGRESS,
803 SCIF_NODE_ALIVE_TIMEOUT);
804 mutex_lock(&ep->rma_info.rma_lock);
805 /* Synchronize with the thread waking up allocwq */
806 mutex_unlock(&ep->rma_info.rma_lock);
807 if (!err && scifdev_alive(ep))
808 goto retry;
809
810 if (!err)
811 err = -ENODEV;
812
813 if (err > 0)
814 err = 0;
815 else
816 return err;
817
818 /* Bail out. The remote end rejected this request */
819 if (alloc->state == OP_FAILED)
820 return -ENOMEM;
821
822 if (map_err) {
823 dev_err(&ep->remote_dev->sdev->dev,
824 "%s %d err %d\n", __func__, __LINE__, map_err);
825 msg.uop = SCIF_FREE_VIRT;
826 msg.src = ep->port;
827 msg.payload[0] = ep->remote_ep;
828 msg.payload[1] = window->alloc_handle.vaddr;
829 msg.payload[2] = (u64)window;
830 msg.payload[3] = SCIF_REGISTER;
831 spin_lock(&ep->lock);
832 if (ep->state == SCIFEP_CONNECTED)
833 err = _scif_nodeqp_send(ep->remote_dev, &msg);
834 else
835 err = -ENOTCONN;
836 spin_unlock(&ep->lock);
837 return err;
838 }
839
840 remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
841 ep->remote_dev);
842
843 /* Compute the number of lookup entries. 21 == 2MB Shift */
844 nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
845 >> ilog2(SCIF_NR_ADDR_IN_PAGE);
846
847 dma_phys_lookup =
848 scif_ioremap(remote_window->dma_addr_lookup.offset,
849 nr_lookup *
850 sizeof(*remote_window->dma_addr_lookup.lookup),
851 ep->remote_dev);
852 num_pages_lookup =
853 scif_ioremap(remote_window->num_pages_lookup.offset,
854 nr_lookup *
855 sizeof(*remote_window->num_pages_lookup.lookup),
856 ep->remote_dev);
857
858 while (remaining_nr_contig_chunks) {
859 loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
860 (int)SCIF_NR_ADDR_IN_PAGE);
861 /* #1/2 - Copy physical addresses over to the remote side */
862
863 /* #2/2 - Copy DMA addresses (addresses that are fed into the
864 * DMA engine) We transfer bus addresses which are then
865 * converted into a MIC physical address on the remote
866 * side if it is a MIC, if the remote node is a mgmt node we
867 * transfer the MIC physical address
868 */
869 tmp = scif_ioremap(dma_phys_lookup[j],
870 loop_nr_contig_chunks *
871 sizeof(*window->dma_addr),
872 ep->remote_dev);
873 tmp1 = scif_ioremap(num_pages_lookup[j],
874 loop_nr_contig_chunks *
875 sizeof(*window->num_pages),
876 ep->remote_dev);
877 if (scif_is_mgmt_node()) {
878 memcpy_toio((void __force __iomem *)tmp,
879 &window->dma_addr[i], loop_nr_contig_chunks
880 * sizeof(*window->dma_addr));
881 memcpy_toio((void __force __iomem *)tmp1,
882 &window->num_pages[i], loop_nr_contig_chunks
883 * sizeof(*window->num_pages));
884 } else {
885 if (scifdev_is_p2p(ep->remote_dev)) {
886 /*
887 * add remote node's base address for this node
888 * to convert it into a MIC address
889 */
890 int m;
891 dma_addr_t dma_addr;
892
893 for (m = 0; m < loop_nr_contig_chunks; m++) {
894 dma_addr = window->dma_addr[i + m] +
895 ep->remote_dev->base_addr;
896 writeq(dma_addr,
897 (void __force __iomem *)&tmp[m]);
898 }
899 memcpy_toio((void __force __iomem *)tmp1,
900 &window->num_pages[i],
901 loop_nr_contig_chunks
902 * sizeof(*window->num_pages));
903 } else {
904 /* Mgmt node or loopback - transfer DMA
905 * addresses as is, this is the same as a
906 * MIC physical address (we use the dma_addr
907 * and not the phys_addr array since the
908 * phys_addr is only setup if there is a mmap()
909 * request from the mgmt node)
910 */
911 memcpy_toio((void __force __iomem *)tmp,
912 &window->dma_addr[i],
913 loop_nr_contig_chunks *
914 sizeof(*window->dma_addr));
915 memcpy_toio((void __force __iomem *)tmp1,
916 &window->num_pages[i],
917 loop_nr_contig_chunks *
918 sizeof(*window->num_pages));
919 }
920 }
921 remaining_nr_contig_chunks -= loop_nr_contig_chunks;
922 i += loop_nr_contig_chunks;
923 j++;
924 scif_iounmap(tmp, loop_nr_contig_chunks *
925 sizeof(*window->dma_addr), ep->remote_dev);
926 scif_iounmap(tmp1, loop_nr_contig_chunks *
927 sizeof(*window->num_pages), ep->remote_dev);
928 }
929
930 /* Prepare the remote window for the peer */
931 remote_window->peer_window = (u64)window;
932 remote_window->offset = window->offset;
933 remote_window->prot = window->prot;
934 remote_window->nr_contig_chunks = nr_contig_chunks;
935 remote_window->ep = ep->remote_ep;
936 scif_iounmap(num_pages_lookup,
937 nr_lookup *
938 sizeof(*remote_window->num_pages_lookup.lookup),
939 ep->remote_dev);
940 scif_iounmap(dma_phys_lookup,
941 nr_lookup *
942 sizeof(*remote_window->dma_addr_lookup.lookup),
943 ep->remote_dev);
944 scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
945 window->peer_window = alloc->vaddr;
946 return err;
947}
948
949/**
950 * scif_send_scif_register:
951 * @ep: end point
952 * @window: self registration window
953 *
954 * Send a SCIF_REGISTER message if EP is connected and wait for a
955 * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
956 * message so that the peer can free its remote window allocated earlier.
957 */
958static int scif_send_scif_register(struct scif_endpt *ep,
959 struct scif_window *window)
960{
961 int err = 0;
962 struct scifmsg msg;
963
964 msg.src = ep->port;
965 msg.payload[0] = ep->remote_ep;
966 msg.payload[1] = window->alloc_handle.vaddr;
967 msg.payload[2] = (u64)window;
968 spin_lock(&ep->lock);
969 if (ep->state == SCIFEP_CONNECTED) {
970 msg.uop = SCIF_REGISTER;
971 window->reg_state = OP_IN_PROGRESS;
972 err = _scif_nodeqp_send(ep->remote_dev, &msg);
973 spin_unlock(&ep->lock);
974 if (!err) {
975retry:
976 /* Wait for a SCIF_REGISTER_(N)ACK message */
977 err = wait_event_timeout(window->regwq,
978 window->reg_state !=
979 OP_IN_PROGRESS,
980 SCIF_NODE_ALIVE_TIMEOUT);
981 if (!err && scifdev_alive(ep))
982 goto retry;
983 err = !err ? -ENODEV : 0;
984 if (window->reg_state == OP_FAILED)
985 err = -ENOTCONN;
986 }
987 } else {
988 msg.uop = SCIF_FREE_VIRT;
989 msg.payload[3] = SCIF_REGISTER;
990 err = _scif_nodeqp_send(ep->remote_dev, &msg);
991 spin_unlock(&ep->lock);
992 if (!err)
993 err = -ENOTCONN;
994 }
995 return err;
996}
997
998/**
999 * scif_get_window_offset:
1000 * @ep: end point descriptor
1001 * @flags: flags
1002 * @offset: offset hint
1003 * @num_pages: number of pages
1004 * @out_offset: computed offset returned by reference.
1005 *
1006 * Compute/Claim a new offset for this EP.
1007 */
1008int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
1009 int num_pages, s64 *out_offset)
1010{
1011 s64 page_index;
1012 struct iova *iova_ptr;
1013 int err = 0;
1014
1015 if (flags & SCIF_MAP_FIXED) {
1016 page_index = SCIF_IOVA_PFN(offset);
1017 iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1018 page_index + num_pages - 1);
1019 if (!iova_ptr)
1020 err = -EADDRINUSE;
1021 } else {
1022 iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1023 SCIF_DMA_63BIT_PFN - 1, 0);
1024 if (!iova_ptr)
1025 err = -ENOMEM;
1026 }
1027 if (!err)
1028 *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1029 return err;
1030}
1031
1032/**
1033 * scif_free_window_offset:
1034 * @ep: end point descriptor
1035 * @window: registration window
1036 * @offset: Offset to be freed
1037 *
1038 * Free offset for this EP. The callee is supposed to grab
1039 * the RMA mutex before calling this API.
1040 */
1041void scif_free_window_offset(struct scif_endpt *ep,
1042 struct scif_window *window, s64 offset)
1043{
1044 if ((window && !window->offset_freed) || !window) {
1045 free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1046 if (window)
1047 window->offset_freed = true;
1048 }
1049}
1050
1051/**
1052 * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1053 * @msg: Interrupt message
1054 *
1055 * Remote side is requesting a memory allocation.
1056 */
1057void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1058{
1059 int err;
1060 struct scif_window *window = NULL;
1061 int nr_pages = msg->payload[1];
1062
1063 window = scif_create_remote_window(scifdev, nr_pages);
1064 if (!window) {
1065 err = -ENOMEM;
1066 goto error;
1067 }
1068
1069 /* The peer's allocation request is granted */
1070 msg->uop = SCIF_ALLOC_GNT;
1071 msg->payload[0] = (u64)window;
1072 msg->payload[1] = window->mapped_offset;
1073 err = scif_nodeqp_send(scifdev, msg);
1074 if (err)
1075 scif_destroy_remote_window(window);
1076 return;
1077error:
1078 /* The peer's allocation request is rejected */
1079 dev_err(&scifdev->sdev->dev,
1080 "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1081 __func__, __LINE__, err, window, nr_pages);
1082 msg->uop = SCIF_ALLOC_REJ;
1083 scif_nodeqp_send(scifdev, msg);
1084}
1085
1086/**
1087 * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1088 * @msg: Interrupt message
1089 *
1090 * Remote side responded to a memory allocation.
1091 */
1092void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1093{
1094 struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1095 struct scif_window *window = container_of(handle, struct scif_window,
1096 alloc_handle);
1097 struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1098
1099 mutex_lock(&ep->rma_info.rma_lock);
1100 handle->vaddr = msg->payload[0];
1101 handle->phys_addr = msg->payload[1];
1102 if (msg->uop == SCIF_ALLOC_GNT)
1103 handle->state = OP_COMPLETED;
1104 else
1105 handle->state = OP_FAILED;
1106 wake_up(&handle->allocwq);
1107 mutex_unlock(&ep->rma_info.rma_lock);
1108}
1109
1110/**
1111 * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1112 * @msg: Interrupt message
1113 *
1114 * Free up memory kmalloc'd earlier.
1115 */
1116void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1117{
1118 struct scif_window *window = (struct scif_window *)msg->payload[1];
1119
1120 scif_destroy_remote_window(window);
1121}
1122
1123static void
1124scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1125{
1126 int j;
1127 struct scif_hw_dev *sdev = dev->sdev;
1128 phys_addr_t apt_base = 0;
1129
1130 /*
1131 * Add the aperture base if the DMA address is not card relative
1132 * since the DMA addresses need to be an offset into the bar
1133 */
1134 if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1135 sdev->aper && !sdev->card_rel_da)
1136 apt_base = sdev->aper->pa;
1137 else
1138 return;
1139
1140 for (j = 0; j < window->nr_contig_chunks; j++) {
1141 if (window->num_pages[j])
1142 window->dma_addr[j] += apt_base;
1143 else
1144 break;
1145 }
1146}
1147
1148/**
1149 * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1150 * @msg: Interrupt message
1151 *
1152 * Update remote window list with a new registered window.
1153 */
1154void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1155{
1156 struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1157 struct scif_window *window =
1158 (struct scif_window *)msg->payload[1];
1159
1160 mutex_lock(&ep->rma_info.rma_lock);
1161 spin_lock(&ep->lock);
1162 if (ep->state == SCIFEP_CONNECTED) {
1163 msg->uop = SCIF_REGISTER_ACK;
1164 scif_nodeqp_send(ep->remote_dev, msg);
1165 scif_fixup_aper_base(ep->remote_dev, window);
1166 /* No further failures expected. Insert new window */
1167 scif_insert_window(window, &ep->rma_info.remote_reg_list);
1168 } else {
1169 msg->uop = SCIF_REGISTER_NACK;
1170 scif_nodeqp_send(ep->remote_dev, msg);
1171 }
1172 spin_unlock(&ep->lock);
1173 mutex_unlock(&ep->rma_info.rma_lock);
1174 /* free up any lookup resources now that page lists are transferred */
1175 scif_destroy_remote_lookup(ep->remote_dev, window);
1176 /*
1177 * We could not insert the window but we need to
1178 * destroy the window.
1179 */
1180 if (msg->uop == SCIF_REGISTER_NACK)
1181 scif_destroy_remote_window(window);
1182}
1183
1184/**
1185 * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1186 * @msg: Interrupt message
1187 *
1188 * Remove window from remote registration list;
1189 */
1190void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1191{
1192 struct scif_rma_req req;
1193 struct scif_window *window = NULL;
1194 struct scif_window *recv_window =
1195 (struct scif_window *)msg->payload[0];
1196 struct scif_endpt *ep;
1197 int del_window = 0;
1198
1199 ep = (struct scif_endpt *)recv_window->ep;
1200 req.out_window = &window;
1201 req.offset = recv_window->offset;
1202 req.prot = 0;
1203 req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1204 req.type = SCIF_WINDOW_FULL;
1205 req.head = &ep->rma_info.remote_reg_list;
1206 msg->payload[0] = ep->remote_ep;
1207
1208 mutex_lock(&ep->rma_info.rma_lock);
1209 /* Does a valid window exist? */
1210 if (scif_query_window(&req)) {
1211 dev_err(&scifdev->sdev->dev,
1212 "%s %d -ENXIO\n", __func__, __LINE__);
1213 msg->uop = SCIF_UNREGISTER_ACK;
1214 goto error;
1215 }
1216 if (window) {
1217 if (window->ref_count)
1218 scif_put_window(window, window->nr_pages);
1219 else
1220 dev_err(&scifdev->sdev->dev,
1221 "%s %d ref count should be +ve\n",
1222 __func__, __LINE__);
1223 window->unreg_state = OP_COMPLETED;
1224 if (!window->ref_count) {
1225 msg->uop = SCIF_UNREGISTER_ACK;
1226 atomic_inc(&ep->rma_info.tw_refcount);
1227 ep->rma_info.async_list_del = 1;
1228 list_del_init(&window->list);
1229 del_window = 1;
1230 } else {
1231 /* NACK! There are valid references to this window */
1232 msg->uop = SCIF_UNREGISTER_NACK;
1233 }
1234 } else {
1235 /* The window did not make its way to the list at all. ACK */
1236 msg->uop = SCIF_UNREGISTER_ACK;
1237 scif_destroy_remote_window(recv_window);
1238 }
1239error:
1240 mutex_unlock(&ep->rma_info.rma_lock);
1241 if (del_window)
1242 scif_drain_dma_intr(ep->remote_dev->sdev,
1243 ep->rma_info.dma_chan);
1244 scif_nodeqp_send(ep->remote_dev, msg);
1245 if (del_window)
1246 scif_queue_for_cleanup(window, &scif_info.rma);
1247}
1248
1249/**
1250 * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1251 * @msg: Interrupt message
1252 *
1253 * Wake up the window waiting to complete registration.
1254 */
1255void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1256{
1257 struct scif_window *window =
1258 (struct scif_window *)msg->payload[2];
1259 struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1260
1261 mutex_lock(&ep->rma_info.rma_lock);
1262 window->reg_state = OP_COMPLETED;
1263 wake_up(&window->regwq);
1264 mutex_unlock(&ep->rma_info.rma_lock);
1265}
1266
1267/**
1268 * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1269 * @msg: Interrupt message
1270 *
1271 * Wake up the window waiting to inform it that registration
1272 * cannot be completed.
1273 */
1274void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1275{
1276 struct scif_window *window =
1277 (struct scif_window *)msg->payload[2];
1278 struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1279
1280 mutex_lock(&ep->rma_info.rma_lock);
1281 window->reg_state = OP_FAILED;
1282 wake_up(&window->regwq);
1283 mutex_unlock(&ep->rma_info.rma_lock);
1284}
1285
1286/**
1287 * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1288 * @msg: Interrupt message
1289 *
1290 * Wake up the window waiting to complete unregistration.
1291 */
1292void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1293{
1294 struct scif_window *window =
1295 (struct scif_window *)msg->payload[1];
1296 struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1297
1298 mutex_lock(&ep->rma_info.rma_lock);
1299 window->unreg_state = OP_COMPLETED;
1300 wake_up(&window->unregwq);
1301 mutex_unlock(&ep->rma_info.rma_lock);
1302}
1303
1304/**
1305 * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1306 * @msg: Interrupt message
1307 *
1308 * Wake up the window waiting to inform it that unregistration
1309 * cannot be completed immediately.
1310 */
1311void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1312{
1313 struct scif_window *window =
1314 (struct scif_window *)msg->payload[1];
1315 struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1316
1317 mutex_lock(&ep->rma_info.rma_lock);
1318 window->unreg_state = OP_FAILED;
1319 wake_up(&window->unregwq);
1320 mutex_unlock(&ep->rma_info.rma_lock);
1321}
1322
1323int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1324 int map_flags, scif_pinned_pages_t *pages)
1325{
1326 struct scif_pinned_pages *pinned_pages;
1327 int nr_pages, err = 0, i;
1328 bool vmalloc_addr = false;
1329 bool try_upgrade = false;
1330 int prot = *out_prot;
1331 int ulimit = 0;
1332 struct mm_struct *mm = NULL;
1333
1334 /* Unsupported flags */
1335 if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1336 return -EINVAL;
1337 ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1338
1339 /* Unsupported protection requested */
1340 if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1341 return -EINVAL;
1342
1343 /* addr/len must be page aligned. len should be non zero */
1344 if (!len ||
1345 (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1346 (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1347 return -EINVAL;
1348
1349 might_sleep();
1350
1351 nr_pages = len >> PAGE_SHIFT;
1352
1353 /* Allocate a set of pinned pages */
1354 pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1355 if (!pinned_pages)
1356 return -ENOMEM;
1357
1358 if (map_flags & SCIF_MAP_KERNEL) {
1359 if (is_vmalloc_addr(addr))
1360 vmalloc_addr = true;
1361
1362 for (i = 0; i < nr_pages; i++) {
1363 if (vmalloc_addr)
1364 pinned_pages->pages[i] =
1365 vmalloc_to_page(addr + (i * PAGE_SIZE));
1366 else
1367 pinned_pages->pages[i] =
1368 virt_to_page(addr + (i * PAGE_SIZE));
1369 }
1370 pinned_pages->nr_pages = nr_pages;
1371 pinned_pages->map_flags = SCIF_MAP_KERNEL;
1372 } else {
1373 /*
1374 * SCIF supports registration caching. If a registration has
1375 * been requested with read only permissions, then we try
1376 * to pin the pages with RW permissions so that a subsequent
1377 * transfer with RW permission can hit the cache instead of
1378 * invalidating it. If the upgrade fails with RW then we
1379 * revert back to R permission and retry
1380 */
1381 if (prot == SCIF_PROT_READ)
1382 try_upgrade = true;
1383 prot |= SCIF_PROT_WRITE;
1384retry:
1385 mm = current->mm;
1386 down_write(&mm->mmap_sem);
1387 if (ulimit) {
1388 err = __scif_check_inc_pinned_vm(mm, nr_pages);
1389 if (err) {
1390 up_write(&mm->mmap_sem);
1391 pinned_pages->nr_pages = 0;
1392 goto error_unmap;
1393 }
1394 }
1395
1396 pinned_pages->nr_pages = get_user_pages(
1397 current,
1398 mm,
1399 (u64)addr,
1400 nr_pages,
1401 !!(prot & SCIF_PROT_WRITE),
1402 0,
1403 pinned_pages->pages,
1404 NULL);
1405 up_write(&mm->mmap_sem);
1406 if (nr_pages != pinned_pages->nr_pages) {
1407 if (try_upgrade) {
1408 if (ulimit)
1409 __scif_dec_pinned_vm_lock(mm,
1410 nr_pages, 0);
1411 /* Roll back any pinned pages */
1412 for (i = 0; i < pinned_pages->nr_pages; i++) {
1413 if (pinned_pages->pages[i])
1414 put_page(
1415 pinned_pages->pages[i]);
1416 }
1417 prot &= ~SCIF_PROT_WRITE;
1418 try_upgrade = false;
1419 goto retry;
1420 }
1421 }
1422 pinned_pages->map_flags = 0;
1423 }
1424
1425 if (pinned_pages->nr_pages < nr_pages) {
1426 err = -EFAULT;
1427 pinned_pages->nr_pages = nr_pages;
1428 goto dec_pinned;
1429 }
1430
1431 *out_prot = prot;
1432 atomic_set(&pinned_pages->ref_count, 1);
1433 *pages = pinned_pages;
1434 return err;
1435dec_pinned:
1436 if (ulimit)
1437 __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
1438 /* Something went wrong! Rollback */
1439error_unmap:
1440 pinned_pages->nr_pages = nr_pages;
1441 scif_destroy_pinned_pages(pinned_pages);
1442 *pages = NULL;
1443 dev_dbg(scif_info.mdev.this_device,
1444 "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1445 return err;
1446}
1447
1448int scif_pin_pages(void *addr, size_t len, int prot,
1449 int map_flags, scif_pinned_pages_t *pages)
1450{
1451 return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1452}
1453EXPORT_SYMBOL_GPL(scif_pin_pages);
1454
1455int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1456{
1457 int err = 0, ret;
1458
1459 if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1460 return -EINVAL;
1461
1462 ret = atomic_sub_return(1, &pinned_pages->ref_count);
1463 if (ret < 0) {
1464 dev_err(scif_info.mdev.this_device,
1465 "%s %d scif_unpin_pages called without pinning? rc %d\n",
1466 __func__, __LINE__, ret);
1467 return -EINVAL;
1468 }
1469 /*
1470 * Destroy the window if the ref count for this set of pinned
1471 * pages has dropped to zero. If it is positive then there is
1472 * a valid registered window which is backed by these pages and
1473 * it will be destroyed once all such windows are unregistered.
1474 */
1475 if (!ret)
1476 err = scif_destroy_pinned_pages(pinned_pages);
1477
1478 return err;
1479}
1480EXPORT_SYMBOL_GPL(scif_unpin_pages);
1481
1482static inline void
1483scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1484{
1485 mutex_lock(&ep->rma_info.rma_lock);
1486 scif_insert_window(window, &ep->rma_info.reg_list);
1487 mutex_unlock(&ep->rma_info.rma_lock);
1488}
1489
1490off_t scif_register_pinned_pages(scif_epd_t epd,
1491 scif_pinned_pages_t pinned_pages,
1492 off_t offset, int map_flags)
1493{
1494 struct scif_endpt *ep = (struct scif_endpt *)epd;
1495 s64 computed_offset;
1496 struct scif_window *window;
1497 int err;
1498 size_t len;
1499 struct device *spdev;
1500
1501 /* Unsupported flags */
1502 if (map_flags & ~SCIF_MAP_FIXED)
1503 return -EINVAL;
1504
1505 len = pinned_pages->nr_pages << PAGE_SHIFT;
1506
1507 /*
1508 * Offset is not page aligned/negative or offset+len
1509 * wraps around with SCIF_MAP_FIXED.
1510 */
1511 if ((map_flags & SCIF_MAP_FIXED) &&
1512 ((ALIGN(offset, PAGE_SIZE) != offset) ||
1513 (offset < 0) ||
1514 (offset + (off_t)len < offset)))
1515 return -EINVAL;
1516
1517 might_sleep();
1518
1519 err = scif_verify_epd(ep);
1520 if (err)
1521 return err;
1522 /*
1523 * It is an error to pass pinned_pages to scif_register_pinned_pages()
1524 * after calling scif_unpin_pages().
1525 */
1526 if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1527 return -EINVAL;
1528
1529 /* Compute the offset for this registration */
1530 err = scif_get_window_offset(ep, map_flags, offset,
1531 len, &computed_offset);
1532 if (err) {
1533 atomic_sub(1, &pinned_pages->ref_count);
1534 return err;
1535 }
1536
1537 /* Allocate and prepare self registration window */
1538 window = scif_create_window(ep, pinned_pages->nr_pages,
1539 computed_offset, false);
1540 if (!window) {
1541 atomic_sub(1, &pinned_pages->ref_count);
1542 scif_free_window_offset(ep, NULL, computed_offset);
1543 return -ENOMEM;
1544 }
1545
1546 window->pinned_pages = pinned_pages;
1547 window->nr_pages = pinned_pages->nr_pages;
1548 window->prot = pinned_pages->prot;
1549
1550 spdev = scif_get_peer_dev(ep->remote_dev);
1551 if (IS_ERR(spdev)) {
1552 err = PTR_ERR(spdev);
1553 scif_destroy_window(ep, window);
1554 return err;
1555 }
1556 err = scif_send_alloc_request(ep, window);
1557 if (err) {
1558 dev_err(&ep->remote_dev->sdev->dev,
1559 "%s %d err %d\n", __func__, __LINE__, err);
1560 goto error_unmap;
1561 }
1562
1563 /* Prepare the remote registration window */
1564 err = scif_prep_remote_window(ep, window);
1565 if (err) {
1566 dev_err(&ep->remote_dev->sdev->dev,
1567 "%s %d err %d\n", __func__, __LINE__, err);
1568 goto error_unmap;
1569 }
1570
1571 /* Tell the peer about the new window */
1572 err = scif_send_scif_register(ep, window);
1573 if (err) {
1574 dev_err(&ep->remote_dev->sdev->dev,
1575 "%s %d err %d\n", __func__, __LINE__, err);
1576 goto error_unmap;
1577 }
1578
1579 scif_put_peer_dev(spdev);
1580 /* No further failures expected. Insert new window */
1581 scif_insert_local_window(window, ep);
1582 return computed_offset;
1583error_unmap:
1584 scif_destroy_window(ep, window);
1585 scif_put_peer_dev(spdev);
1586 dev_err(&ep->remote_dev->sdev->dev,
1587 "%s %d err %d\n", __func__, __LINE__, err);
1588 return err;
1589}
1590EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1591
1592off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1593 int prot, int map_flags)
1594{
1595 scif_pinned_pages_t pinned_pages;
1596 off_t err;
1597 struct scif_endpt *ep = (struct scif_endpt *)epd;
1598 s64 computed_offset;
1599 struct scif_window *window;
1600 struct mm_struct *mm = NULL;
1601 struct device *spdev;
1602
1603 dev_dbg(scif_info.mdev.this_device,
1604 "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1605 epd, addr, len, offset, prot, map_flags);
1606 /* Unsupported flags */
1607 if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1608 return -EINVAL;
1609
1610 /*
1611 * Offset is not page aligned/negative or offset+len
1612 * wraps around with SCIF_MAP_FIXED.
1613 */
1614 if ((map_flags & SCIF_MAP_FIXED) &&
1615 ((ALIGN(offset, PAGE_SIZE) != offset) ||
1616 (offset < 0) ||
1617 (offset + (off_t)len < offset)))
1618 return -EINVAL;
1619
1620 /* Unsupported protection requested */
1621 if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1622 return -EINVAL;
1623
1624 /* addr/len must be page aligned. len should be non zero */
1625 if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1626 (ALIGN(len, PAGE_SIZE) != len))
1627 return -EINVAL;
1628
1629 might_sleep();
1630
1631 err = scif_verify_epd(ep);
1632 if (err)
1633 return err;
1634
1635 /* Compute the offset for this registration */
1636 err = scif_get_window_offset(ep, map_flags, offset,
1637 len >> PAGE_SHIFT, &computed_offset);
1638 if (err)
1639 return err;
1640
1641 spdev = scif_get_peer_dev(ep->remote_dev);
1642 if (IS_ERR(spdev)) {
1643 err = PTR_ERR(spdev);
1644 scif_free_window_offset(ep, NULL, computed_offset);
1645 return err;
1646 }
1647 /* Allocate and prepare self registration window */
1648 window = scif_create_window(ep, len >> PAGE_SHIFT,
1649 computed_offset, false);
1650 if (!window) {
1651 scif_free_window_offset(ep, NULL, computed_offset);
1652 scif_put_peer_dev(spdev);
1653 return -ENOMEM;
1654 }
1655
1656 window->nr_pages = len >> PAGE_SHIFT;
1657
1658 err = scif_send_alloc_request(ep, window);
1659 if (err) {
1660 scif_destroy_incomplete_window(ep, window);
1661 scif_put_peer_dev(spdev);
1662 return err;
1663 }
1664
1665 if (!(map_flags & SCIF_MAP_KERNEL)) {
1666 mm = __scif_acquire_mm();
1667 map_flags |= SCIF_MAP_ULIMIT;
1668 }
1669 /* Pin down the pages */
1670 err = __scif_pin_pages(addr, len, &prot,
1671 map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1672 &pinned_pages);
1673 if (err) {
1674 scif_destroy_incomplete_window(ep, window);
1675 __scif_release_mm(mm);
1676 goto error;
1677 }
1678
1679 window->pinned_pages = pinned_pages;
1680 window->prot = pinned_pages->prot;
1681 window->mm = mm;
1682
1683 /* Prepare the remote registration window */
1684 err = scif_prep_remote_window(ep, window);
1685 if (err) {
1686 dev_err(&ep->remote_dev->sdev->dev,
1687 "%s %d err %ld\n", __func__, __LINE__, err);
1688 goto error_unmap;
1689 }
1690
1691 /* Tell the peer about the new window */
1692 err = scif_send_scif_register(ep, window);
1693 if (err) {
1694 dev_err(&ep->remote_dev->sdev->dev,
1695 "%s %d err %ld\n", __func__, __LINE__, err);
1696 goto error_unmap;
1697 }
1698
1699 scif_put_peer_dev(spdev);
1700 /* No further failures expected. Insert new window */
1701 scif_insert_local_window(window, ep);
1702 dev_dbg(&ep->remote_dev->sdev->dev,
1703 "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1704 epd, addr, len, computed_offset);
1705 return computed_offset;
1706error_unmap:
1707 scif_destroy_window(ep, window);
1708error:
1709 scif_put_peer_dev(spdev);
1710 dev_err(&ep->remote_dev->sdev->dev,
1711 "%s %d err %ld\n", __func__, __LINE__, err);
1712 return err;
1713}
1714EXPORT_SYMBOL_GPL(scif_register);
1715
1716int
1717scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1718{
1719 struct scif_endpt *ep = (struct scif_endpt *)epd;
1720 struct scif_window *window = NULL;
1721 struct scif_rma_req req;
1722 int nr_pages, err;
1723 struct device *spdev;
1724
1725 dev_dbg(scif_info.mdev.this_device,
1726 "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1727 ep, offset, len);
1728 /* len must be page aligned. len should be non zero */
1729 if (!len ||
1730 (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1731 return -EINVAL;
1732
1733 /* Offset is not page aligned or offset+len wraps around */
1734 if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1735 (offset + (off_t)len < offset))
1736 return -EINVAL;
1737
1738 err = scif_verify_epd(ep);
1739 if (err)
1740 return err;
1741
1742 might_sleep();
1743 nr_pages = len >> PAGE_SHIFT;
1744
1745 req.out_window = &window;
1746 req.offset = offset;
1747 req.prot = 0;
1748 req.nr_bytes = len;
1749 req.type = SCIF_WINDOW_FULL;
1750 req.head = &ep->rma_info.reg_list;
1751
1752 spdev = scif_get_peer_dev(ep->remote_dev);
1753 if (IS_ERR(spdev)) {
1754 err = PTR_ERR(spdev);
1755 return err;
1756 }
1757 mutex_lock(&ep->rma_info.rma_lock);
1758 /* Does a valid window exist? */
1759 err = scif_query_window(&req);
1760 if (err) {
1761 dev_err(&ep->remote_dev->sdev->dev,
1762 "%s %d err %d\n", __func__, __LINE__, err);
1763 goto error;
1764 }
1765 /* Unregister all the windows in this range */
1766 err = scif_rma_list_unregister(window, offset, nr_pages);
1767 if (err)
1768 dev_err(&ep->remote_dev->sdev->dev,
1769 "%s %d err %d\n", __func__, __LINE__, err);
1770error:
1771 mutex_unlock(&ep->rma_info.rma_lock);
1772 scif_put_peer_dev(spdev);
1773 return err;
1774}
1775EXPORT_SYMBOL_GPL(scif_unregister);
diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h
new file mode 100644
index 000000000000..fa6722279196
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma.h
@@ -0,0 +1,464 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * Copyright(c) 2015 Intel Corporation.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * BSD LICENSE
21 *
22 * Copyright(c) 2015 Intel Corporation.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 *
28 * * Redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer.
30 * * Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in
32 * the documentation and/or other materials provided with the
33 * distribution.
34 * * Neither the name of Intel Corporation nor the names of its
35 * contributors may be used to endorse or promote products derived
36 * from this software without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
39 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
40 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
41 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
42 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
45 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
46 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
47 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
48 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 *
50 * Intel SCIF driver.
51 *
52 */
53#ifndef SCIF_RMA_H
54#define SCIF_RMA_H
55
56#include <linux/dma_remapping.h>
57#include <linux/mmu_notifier.h>
58
59#include "../bus/scif_bus.h"
60
61/* If this bit is set then the mark is a remote fence mark */
62#define SCIF_REMOTE_FENCE_BIT 31
63/* Magic value used to indicate a remote fence request */
64#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT)
65
66#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
67#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \
68 (L1_CACHE_BYTES << 1))
69
70#define SCIF_IOVA_START_PFN (1)
71#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
72#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64))
73#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63))
74
75/*
76 * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information
77 *
78 * @reg_list: List of registration windows for self
79 * @remote_reg_list: List of registration windows for peer
80 * @iovad: Offset generator
81 * @rma_lock: Synchronizes access to self/remote list and also protects the
82 * window from being destroyed while RMAs are in progress.
83 * @tc_lock: Synchronizes access to temporary cached windows list
84 * for SCIF Registration Caching.
85 * @mmn_lock: Synchronizes access to the list of MMU notifiers registered
86 * @tw_refcount: Keeps track of number of outstanding temporary registered
87 * windows created by scif_vreadfrom/scif_vwriteto which have
88 * not been destroyed.
89 * @tcw_refcount: Same as tw_refcount but for temporary cached windows
90 * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned
91 * @mmn_list: MMU notifier so that we can destroy the windows when required
92 * @fence_refcount: Keeps track of number of outstanding remote fence
93 * requests which have been received by the peer.
94 * @dma_chan: DMA channel used for all DMA transfers for this endpoint.
95 * @async_list_del: Detect asynchronous list entry deletion
96 * @vma_list: List of vmas with remote memory mappings
97 * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait
98*/
99struct scif_endpt_rma_info {
100 struct list_head reg_list;
101 struct list_head remote_reg_list;
102 struct iova_domain iovad;
103 struct mutex rma_lock;
104 spinlock_t tc_lock;
105 struct mutex mmn_lock;
106 atomic_t tw_refcount;
107 atomic_t tcw_refcount;
108 atomic_t tcw_total_pages;
109 struct list_head mmn_list;
110 atomic_t fence_refcount;
111 struct dma_chan *dma_chan;
112 int async_list_del;
113 struct list_head vma_list;
114 wait_queue_head_t markwq;
115};
116
117/*
118 * struct scif_fence_info - used for tracking fence requests
119 *
120 * @state: State of this transfer
121 * @wq: Fences wait on this queue
122 * @dma_mark: Used for storing the DMA mark
123 */
124struct scif_fence_info {
125 enum scif_msg_state state;
126 struct completion comp;
127 int dma_mark;
128};
129
130/*
131 * struct scif_remote_fence_info - used for tracking remote fence requests
132 *
133 * @msg: List of SCIF node QP fence messages
134 * @list: Link to list of remote fence requests
135 */
136struct scif_remote_fence_info {
137 struct scifmsg msg;
138 struct list_head list;
139};
140
141/*
142 * Specifies whether an RMA operation can span across partial windows, a single
143 * window or multiple contiguous windows. Mmaps can span across partial windows.
144 * Unregistration can span across complete windows. scif_get_pages() can span a
145 * single window. A window can also be of type self or peer.
146 */
147enum scif_window_type {
148 SCIF_WINDOW_PARTIAL,
149 SCIF_WINDOW_SINGLE,
150 SCIF_WINDOW_FULL,
151 SCIF_WINDOW_SELF,
152 SCIF_WINDOW_PEER
153};
154
155/* The number of physical addresses that can be stored in a PAGE. */
156#define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3)
157
158/*
159 * struct scif_rma_lookup - RMA lookup data structure for page list transfers
160 *
161 * Store an array of lookup offsets. Each offset in this array maps
162 * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
163 * offsets in a 4K page will correspond to 1GB of registered address space.
164
165 * @lookup: Array of offsets
166 * @offset: DMA offset of lookup array
167 */
168struct scif_rma_lookup {
169 dma_addr_t *lookup;
170 dma_addr_t offset;
171};
172
173/*
174 * struct scif_pinned_pages - A set of pinned pages obtained with
175 * scif_pin_pages() which could be part of multiple registered
176 * windows across different end points.
177 *
178 * @nr_pages: Number of pages which is defined as a s64 instead of an int
179 * to avoid sign extension with buffers >= 2GB
180 * @prot: read/write protections
181 * @map_flags: Flags specified during the pin operation
182 * @ref_count: Reference count bumped in terms of number of pages
183 * @magic: A magic value
184 * @pages: Array of pointers to struct pages populated with get_user_pages(..)
185 */
186struct scif_pinned_pages {
187 s64 nr_pages;
188 int prot;
189 int map_flags;
190 atomic_t ref_count;
191 u64 magic;
192 struct page **pages;
193};
194
195/*
196 * struct scif_status - Stores DMA status update information
197 *
198 * @src_dma_addr: Source buffer DMA address
199 * @val: src location for value to be written to the destination
200 * @ep: SCIF endpoint
201 */
202struct scif_status {
203 dma_addr_t src_dma_addr;
204 u64 val;
205 struct scif_endpt *ep;
206};
207
208/*
209 * struct scif_window - Registration Window for Self and Remote
210 *
211 * @nr_pages: Number of pages which is defined as a s64 instead of an int
212 * to avoid sign extension with buffers >= 2GB
213 * @nr_contig_chunks: Number of contiguous physical chunks
214 * @prot: read/write protections
215 * @ref_count: reference count in terms of number of pages
216 * @magic: Cookie to detect corruption
217 * @offset: registered offset
218 * @va_for_temp: va address that this window represents
219 * @dma_mark: Used to determine if all DMAs against the window are done
220 * @ep: Pointer to EP. Useful for passing EP around with messages to
221 avoid expensive list traversals.
222 * @list: link to list of windows for the endpoint
223 * @type: self or peer window
224 * @peer_window: Pointer to peer window. Useful for sending messages to peer
225 * without requiring an extra list traversal
226 * @unreg_state: unregistration state
227 * @offset_freed: True if the offset has been freed
228 * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto
229 * @mm: memory descriptor for the task_struct which initiated the RMA
230 * @st: scatter gather table for DMA mappings with IOMMU enabled
231 * @pinned_pages: The set of pinned_pages backing this window
232 * @alloc_handle: Handle for sending ALLOC_REQ
233 * @regwq: Wait Queue for an registration (N)ACK
234 * @reg_state: Registration state
235 * @unregwq: Wait Queue for an unregistration (N)ACK
236 * @dma_addr_lookup: Lookup for physical addresses used for DMA
237 * @nr_lookup: Number of entries in lookup
238 * @mapped_offset: Offset used to map the window by the peer
239 * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA
240 * @num_pages: Array specifying number of pages for each physical address
241 */
242struct scif_window {
243 s64 nr_pages;
244 int nr_contig_chunks;
245 int prot;
246 int ref_count;
247 u64 magic;
248 s64 offset;
249 unsigned long va_for_temp;
250 int dma_mark;
251 u64 ep;
252 struct list_head list;
253 enum scif_window_type type;
254 u64 peer_window;
255 enum scif_msg_state unreg_state;
256 bool offset_freed;
257 bool temp;
258 struct mm_struct *mm;
259 struct sg_table *st;
260 union {
261 struct {
262 struct scif_pinned_pages *pinned_pages;
263 struct scif_allocmsg alloc_handle;
264 wait_queue_head_t regwq;
265 enum scif_msg_state reg_state;
266 wait_queue_head_t unregwq;
267 };
268 struct {
269 struct scif_rma_lookup dma_addr_lookup;
270 struct scif_rma_lookup num_pages_lookup;
271 int nr_lookup;
272 dma_addr_t mapped_offset;
273 };
274 };
275 dma_addr_t *dma_addr;
276 u64 *num_pages;
277} __packed;
278
279/*
280 * scif_mmu_notif - SCIF mmu notifier information
281 *
282 * @mmu_notifier ep_mmu_notifier: MMU notifier operations
283 * @tc_reg_list: List of temp registration windows for self
284 * @mm: memory descriptor for the task_struct which initiated the RMA
285 * @ep: SCIF endpoint
286 * @list: link to list of MMU notifier information
287 */
288struct scif_mmu_notif {
289#ifdef CONFIG_MMU_NOTIFIER
290 struct mmu_notifier ep_mmu_notifier;
291#endif
292 struct list_head tc_reg_list;
293 struct mm_struct *mm;
294 struct scif_endpt *ep;
295 struct list_head list;
296};
297
298enum scif_rma_dir {
299 SCIF_LOCAL_TO_REMOTE,
300 SCIF_REMOTE_TO_LOCAL
301};
302
303extern struct kmem_cache *unaligned_cache;
304/* Initialize RMA for this EP */
305void scif_rma_ep_init(struct scif_endpt *ep);
306/* Check if epd can be uninitialized */
307int scif_rma_ep_can_uninit(struct scif_endpt *ep);
308/* Obtain a new offset. Callee must grab RMA lock */
309int scif_get_window_offset(struct scif_endpt *ep, int flags,
310 s64 offset, int nr_pages, s64 *out_offset);
311/* Free offset. Callee must grab RMA lock */
312void scif_free_window_offset(struct scif_endpt *ep,
313 struct scif_window *window, s64 offset);
314/* Create self registration window */
315struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
316 s64 offset, bool temp);
317/* Destroy self registration window.*/
318int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window);
319void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window);
320/* Map pages of self window to Aperture/PCI */
321int scif_map_window(struct scif_dev *remote_dev,
322 struct scif_window *window);
323/* Unregister a self window */
324int scif_unregister_window(struct scif_window *window);
325/* Destroy remote registration window */
326void
327scif_destroy_remote_window(struct scif_window *window);
328/* remove valid remote memory mappings from process address space */
329void scif_zap_mmaps(int node);
330/* Query if any applications have remote memory mappings */
331bool scif_rma_do_apps_have_mmaps(int node);
332/* Cleanup remote registration lists for zombie endpoints */
333void scif_cleanup_rma_for_zombies(int node);
334/* Reserve a DMA channel for a particular endpoint */
335int scif_reserve_dma_chan(struct scif_endpt *ep);
336/* Setup a DMA mark for an endpoint */
337int _scif_fence_mark(scif_epd_t epd, int *mark);
338int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val,
339 enum scif_window_type type);
340void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg);
341void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg);
342void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg);
343void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg);
344void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg);
345void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
346void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
347void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg);
348void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg);
349void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg);
350void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg);
351void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg);
352void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg);
353void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg);
354void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg);
355void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg);
356void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg);
357void scif_mmu_notif_handler(struct work_struct *work);
358void scif_rma_handle_remote_fences(void);
359void scif_rma_destroy_windows(void);
360void scif_rma_destroy_tcw_invalid(void);
361int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan);
362
363struct scif_window_iter {
364 s64 offset;
365 int index;
366};
367
368static inline void
369scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter)
370{
371 iter->offset = window->offset;
372 iter->index = 0;
373}
374
375dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
376 size_t *nr_bytes,
377 struct scif_window_iter *iter);
378static inline
379dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off)
380{
381 return scif_off_to_dma_addr(window, off, NULL, NULL);
382}
383
384static inline bool scif_unaligned(off_t src_offset, off_t dst_offset)
385{
386 src_offset = src_offset & (L1_CACHE_BYTES - 1);
387 dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
388 return !(src_offset == dst_offset);
389}
390
391/*
392 * scif_zalloc:
393 * @size: Size of the allocation request.
394 *
395 * Helper API which attempts to allocate zeroed pages via
396 * __get_free_pages(..) first and then falls back on
397 * vzalloc(..) if that fails.
398 */
399static inline void *scif_zalloc(size_t size)
400{
401 void *ret = NULL;
402 size_t align = ALIGN(size, PAGE_SIZE);
403
404 if (align && get_order(align) < MAX_ORDER)
405 ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
406 get_order(align));
407 return ret ? ret : vzalloc(align);
408}
409
410/*
411 * scif_free:
412 * @addr: Address to be freed.
413 * @size: Size of the allocation.
414 * Helper API which frees memory allocated via scif_zalloc().
415 */
416static inline void scif_free(void *addr, size_t size)
417{
418 size_t align = ALIGN(size, PAGE_SIZE);
419
420 if (is_vmalloc_addr(addr))
421 vfree(addr);
422 else
423 free_pages((unsigned long)addr, get_order(align));
424}
425
426static inline void scif_get_window(struct scif_window *window, int nr_pages)
427{
428 window->ref_count += nr_pages;
429}
430
431static inline void scif_put_window(struct scif_window *window, int nr_pages)
432{
433 window->ref_count -= nr_pages;
434}
435
436static inline void scif_set_window_ref(struct scif_window *window, int nr_pages)
437{
438 window->ref_count = nr_pages;
439}
440
441static inline void
442scif_queue_for_cleanup(struct scif_window *window, struct list_head *list)
443{
444 spin_lock(&scif_info.rmalock);
445 list_add_tail(&window->list, list);
446 spin_unlock(&scif_info.rmalock);
447 schedule_work(&scif_info.misc_work);
448}
449
450static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window)
451{
452 list_del_init(&window->list);
453 scif_queue_for_cleanup(window, &scif_info.rma_tc);
454}
455
456static inline bool scif_is_iommu_enabled(void)
457{
458#ifdef CONFIG_INTEL_IOMMU
459 return intel_iommu_enabled;
460#else
461 return false;
462#endif
463}
464#endif /* SCIF_RMA_H */
diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c
new file mode 100644
index 000000000000..e1ef8daedd5a
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.c
@@ -0,0 +1,291 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18#include "scif_main.h"
19#include <linux/mmu_notifier.h>
20#include <linux/highmem.h>
21
22/*
23 * scif_insert_tcw:
24 *
25 * Insert a temp window to the temp registration list sorted by va_for_temp.
26 * RMA lock must be held.
27 */
28void scif_insert_tcw(struct scif_window *window, struct list_head *head)
29{
30 struct scif_window *curr = NULL;
31 struct scif_window *prev = list_entry(head, struct scif_window, list);
32 struct list_head *item;
33
34 INIT_LIST_HEAD(&window->list);
35 /* Compare with tail and if the entry is new tail add it to the end */
36 if (!list_empty(head)) {
37 curr = list_entry(head->prev, struct scif_window, list);
38 if (curr->va_for_temp < window->va_for_temp) {
39 list_add_tail(&window->list, head);
40 return;
41 }
42 }
43 list_for_each(item, head) {
44 curr = list_entry(item, struct scif_window, list);
45 if (curr->va_for_temp > window->va_for_temp)
46 break;
47 prev = curr;
48 }
49 list_add(&window->list, &prev->list);
50}
51
52/*
53 * scif_insert_window:
54 *
55 * Insert a window to the self registration list sorted by offset.
56 * RMA lock must be held.
57 */
58void scif_insert_window(struct scif_window *window, struct list_head *head)
59{
60 struct scif_window *curr = NULL, *prev = NULL;
61 struct list_head *item;
62
63 INIT_LIST_HEAD(&window->list);
64 list_for_each(item, head) {
65 curr = list_entry(item, struct scif_window, list);
66 if (curr->offset > window->offset)
67 break;
68 prev = curr;
69 }
70 if (!prev)
71 list_add(&window->list, head);
72 else
73 list_add(&window->list, &prev->list);
74 scif_set_window_ref(window, window->nr_pages);
75}
76
77/*
78 * scif_query_tcw:
79 *
80 * Query the temp cached registration list of ep for an overlapping window
81 * in case of permission mismatch, destroy the previous window. if permissions
82 * match and overlap is partial, destroy the window but return the new range
83 * RMA lock must be held.
84 */
85int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req)
86{
87 struct list_head *item, *temp, *head = req->head;
88 struct scif_window *window;
89 u64 start_va_window, start_va_req = req->va_for_temp;
90 u64 end_va_window, end_va_req = start_va_req + req->nr_bytes;
91
92 if (!req->nr_bytes)
93 return -EINVAL;
94 /*
95 * Avoid traversing the entire list to find out that there
96 * is no entry that matches
97 */
98 if (!list_empty(head)) {
99 window = list_last_entry(head, struct scif_window, list);
100 end_va_window = window->va_for_temp +
101 (window->nr_pages << PAGE_SHIFT);
102 if (start_va_req > end_va_window)
103 return -ENXIO;
104 }
105 list_for_each_safe(item, temp, head) {
106 window = list_entry(item, struct scif_window, list);
107 start_va_window = window->va_for_temp;
108 end_va_window = window->va_for_temp +
109 (window->nr_pages << PAGE_SHIFT);
110 if (start_va_req < start_va_window &&
111 end_va_req < start_va_window)
112 break;
113 if (start_va_req >= end_va_window)
114 continue;
115 if ((window->prot & req->prot) == req->prot) {
116 if (start_va_req >= start_va_window &&
117 end_va_req <= end_va_window) {
118 *req->out_window = window;
119 return 0;
120 }
121 /* expand window */
122 if (start_va_req < start_va_window) {
123 req->nr_bytes +=
124 start_va_window - start_va_req;
125 req->va_for_temp = start_va_window;
126 }
127 if (end_va_req >= end_va_window)
128 req->nr_bytes += end_va_window - end_va_req;
129 }
130 /* Destroy the old window to create a new one */
131 __scif_rma_destroy_tcw_helper(window);
132 break;
133 }
134 return -ENXIO;
135}
136
137/*
138 * scif_query_window:
139 *
140 * Query the registration list and check if a valid contiguous
141 * range of windows exist.
142 * RMA lock must be held.
143 */
144int scif_query_window(struct scif_rma_req *req)
145{
146 struct list_head *item;
147 struct scif_window *window;
148 s64 end_offset, offset = req->offset;
149 u64 tmp_min, nr_bytes_left = req->nr_bytes;
150
151 if (!req->nr_bytes)
152 return -EINVAL;
153
154 list_for_each(item, req->head) {
155 window = list_entry(item, struct scif_window, list);
156 end_offset = window->offset +
157 (window->nr_pages << PAGE_SHIFT);
158 if (offset < window->offset)
159 /* Offset not found! */
160 return -ENXIO;
161 if (offset >= end_offset)
162 continue;
163 /* Check read/write protections. */
164 if ((window->prot & req->prot) != req->prot)
165 return -EPERM;
166 if (nr_bytes_left == req->nr_bytes)
167 /* Store the first window */
168 *req->out_window = window;
169 tmp_min = min((u64)end_offset - offset, nr_bytes_left);
170 nr_bytes_left -= tmp_min;
171 offset += tmp_min;
172 /*
173 * Range requested encompasses
174 * multiple windows contiguously.
175 */
176 if (!nr_bytes_left) {
177 /* Done for partial window */
178 if (req->type == SCIF_WINDOW_PARTIAL ||
179 req->type == SCIF_WINDOW_SINGLE)
180 return 0;
181 /* Extra logic for full windows */
182 if (offset == end_offset)
183 /* Spanning multiple whole windows */
184 return 0;
185 /* Not spanning multiple whole windows */
186 return -ENXIO;
187 }
188 if (req->type == SCIF_WINDOW_SINGLE)
189 break;
190 }
191 dev_err(scif_info.mdev.this_device,
192 "%s %d ENXIO\n", __func__, __LINE__);
193 return -ENXIO;
194}
195
196/*
197 * scif_rma_list_unregister:
198 *
199 * Traverse the self registration list starting from window:
200 * 1) Call scif_unregister_window(..)
201 * RMA lock must be held.
202 */
203int scif_rma_list_unregister(struct scif_window *window,
204 s64 offset, int nr_pages)
205{
206 struct scif_endpt *ep = (struct scif_endpt *)window->ep;
207 struct list_head *head = &ep->rma_info.reg_list;
208 s64 end_offset;
209 int err = 0;
210 int loop_nr_pages;
211 struct scif_window *_window;
212
213 list_for_each_entry_safe_from(window, _window, head, list) {
214 end_offset = window->offset + (window->nr_pages << PAGE_SHIFT);
215 loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
216 nr_pages);
217 err = scif_unregister_window(window);
218 if (err)
219 return err;
220 nr_pages -= loop_nr_pages;
221 offset += (loop_nr_pages << PAGE_SHIFT);
222 if (!nr_pages)
223 break;
224 }
225 return 0;
226}
227
228/*
229 * scif_unmap_all_window:
230 *
231 * Traverse all the windows in the self registration list and:
232 * 1) Delete any DMA mappings created
233 */
234void scif_unmap_all_windows(scif_epd_t epd)
235{
236 struct list_head *item, *tmp;
237 struct scif_window *window;
238 struct scif_endpt *ep = (struct scif_endpt *)epd;
239 struct list_head *head = &ep->rma_info.reg_list;
240
241 mutex_lock(&ep->rma_info.rma_lock);
242 list_for_each_safe(item, tmp, head) {
243 window = list_entry(item, struct scif_window, list);
244 scif_unmap_window(ep->remote_dev, window);
245 }
246 mutex_unlock(&ep->rma_info.rma_lock);
247}
248
249/*
250 * scif_unregister_all_window:
251 *
252 * Traverse all the windows in the self registration list and:
253 * 1) Call scif_unregister_window(..)
254 * RMA lock must be held.
255 */
256int scif_unregister_all_windows(scif_epd_t epd)
257{
258 struct list_head *item, *tmp;
259 struct scif_window *window;
260 struct scif_endpt *ep = (struct scif_endpt *)epd;
261 struct list_head *head = &ep->rma_info.reg_list;
262 int err = 0;
263
264 mutex_lock(&ep->rma_info.rma_lock);
265retry:
266 item = NULL;
267 tmp = NULL;
268 list_for_each_safe(item, tmp, head) {
269 window = list_entry(item, struct scif_window, list);
270 ep->rma_info.async_list_del = 0;
271 err = scif_unregister_window(window);
272 if (err)
273 dev_err(scif_info.mdev.this_device,
274 "%s %d err %d\n",
275 __func__, __LINE__, err);
276 /*
277 * Need to restart list traversal if there has been
278 * an asynchronous list entry deletion.
279 */
280 if (ACCESS_ONCE(ep->rma_info.async_list_del))
281 goto retry;
282 }
283 mutex_unlock(&ep->rma_info.rma_lock);
284 if (!list_empty(&ep->rma_info.mmn_list)) {
285 spin_lock(&scif_info.rmalock);
286 list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup);
287 spin_unlock(&scif_info.rmalock);
288 schedule_work(&scif_info.mmu_notif_work);
289 }
290 return err;
291}
diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h
new file mode 100644
index 000000000000..7d58d1d551b0
--- /dev/null
+++ b/drivers/misc/mic/scif/scif_rma_list.h
@@ -0,0 +1,57 @@
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2015 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18#ifndef SCIF_RMA_LIST_H
19#define SCIF_RMA_LIST_H
20
21/*
22 * struct scif_rma_req - Self Registration list RMA Request query
23 *
24 * @out_window - Returns the window if found
25 * @offset: Starting offset
26 * @nr_bytes: number of bytes
27 * @prot: protection requested i.e. read or write or both
28 * @type: Specify single, partial or multiple windows
29 * @head: Head of list on which to search
30 * @va_for_temp: VA for searching temporary cached windows
31 */
32struct scif_rma_req {
33 struct scif_window **out_window;
34 union {
35 s64 offset;
36 unsigned long va_for_temp;
37 };
38 size_t nr_bytes;
39 int prot;
40 enum scif_window_type type;
41 struct list_head *head;
42};
43
44/* Insert */
45void scif_insert_window(struct scif_window *window, struct list_head *head);
46void scif_insert_tcw(struct scif_window *window,
47 struct list_head *head);
48/* Query */
49int scif_query_window(struct scif_rma_req *request);
50int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request);
51/* Called from close to unregister all self windows */
52int scif_unregister_all_windows(scif_epd_t epd);
53void scif_unmap_all_windows(scif_epd_t epd);
54/* Traverse list and unregister */
55int scif_rma_list_unregister(struct scif_window *window, s64 offset,
56 int nr_pages);
57#endif /* SCIF_RMA_LIST_H */
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
index 2f30badc6ffd..1ee8e82ba710 100644
--- a/drivers/misc/sgi-gru/gruhandles.c
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -196,12 +196,6 @@ void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
196 start_instruction(tfh); 196 start_instruction(tfh);
197} 197}
198 198
199void tfh_restart(struct gru_tlb_fault_handle *tfh)
200{
201 tfh->opc = TFHOP_RESTART;
202 start_instruction(tfh);
203}
204
205void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) 199void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
206{ 200{
207 tfh->opc = TFHOP_USER_POLLING_MODE; 201 tfh->opc = TFHOP_USER_POLLING_MODE;
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
index 3f998b924d8f..3d7bd36a1c89 100644
--- a/drivers/misc/sgi-gru/gruhandles.h
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -524,7 +524,6 @@ int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
524 int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); 524 int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
525void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, 525void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
526 int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); 526 int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
527void tfh_restart(struct gru_tlb_fault_handle *tfh);
528void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh); 527void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
529void tfh_exception(struct gru_tlb_fault_handle *tfh); 528void tfh_exception(struct gru_tlb_fault_handle *tfh);
530 529
diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
index a3700a56b8ff..313da3150262 100644
--- a/drivers/misc/sgi-gru/grukdump.c
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -78,11 +78,10 @@ static int gru_dump_tfm(struct gru_state *gru,
78 void __user *ubuf, void __user *ubufend) 78 void __user *ubuf, void __user *ubufend)
79{ 79{
80 struct gru_tlb_fault_map *tfm; 80 struct gru_tlb_fault_map *tfm;
81 int i, ret, bytes; 81 int i;
82 82
83 bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; 83 if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
84 if (bytes > ubufend - ubuf) 84 return -EFBIG;
85 ret = -EFBIG;
86 85
87 for (i = 0; i < GRU_NUM_TFM; i++) { 86 for (i = 0; i < GRU_NUM_TFM; i++) {
88 tfm = get_tfm(gru->gs_gru_base_vaddr, i); 87 tfm = get_tfm(gru->gs_gru_base_vaddr, i);
@@ -99,11 +98,10 @@ static int gru_dump_tgh(struct gru_state *gru,
99 void __user *ubuf, void __user *ubufend) 98 void __user *ubuf, void __user *ubufend)
100{ 99{
101 struct gru_tlb_global_handle *tgh; 100 struct gru_tlb_global_handle *tgh;
102 int i, ret, bytes; 101 int i;
103 102
104 bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; 103 if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf)
105 if (bytes > ubufend - ubuf) 104 return -EFBIG;
106 ret = -EFBIG;
107 105
108 for (i = 0; i < GRU_NUM_TGH; i++) { 106 for (i = 0; i < GRU_NUM_TGH; i++) {
109 tgh = get_tgh(gru->gs_gru_base_vaddr, i); 107 tgh = get_tgh(gru->gs_gru_base_vaddr, i);
@@ -196,7 +194,7 @@ int gru_dump_chiplet_request(unsigned long arg)
196 return -EFAULT; 194 return -EFAULT;
197 195
198 /* Currently, only dump by gid is implemented */ 196 /* Currently, only dump by gid is implemented */
199 if (req.gid >= gru_max_gids || req.gid < 0) 197 if (req.gid >= gru_max_gids)
200 return -EINVAL; 198 return -EINVAL;
201 199
202 gru = GID_TO_GRU(req.gid); 200 gru = GID_TO_GRU(req.gid);
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 913de07e577c..967b9dd24fe9 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -160,7 +160,12 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
160 down_write(&bs->bs_kgts_sema); 160 down_write(&bs->bs_kgts_sema);
161 161
162 if (!bs->bs_kgts) { 162 if (!bs->bs_kgts) {
163 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); 163 do {
164 bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
165 if (!IS_ERR(bs->bs_kgts))
166 break;
167 msleep(1);
168 } while (true);
164 bs->bs_kgts->ts_user_blade_id = blade_id; 169 bs->bs_kgts->ts_user_blade_id = blade_id;
165 } 170 }
166 kgts = bs->bs_kgts; 171 kgts = bs->bs_kgts;
@@ -429,8 +434,8 @@ int gru_get_cb_exception_detail(void *cb,
429 return 0; 434 return 0;
430} 435}
431 436
432char *gru_get_cb_exception_detail_str(int ret, void *cb, 437static char *gru_get_cb_exception_detail_str(int ret, void *cb,
433 char *buf, int size) 438 char *buf, int size)
434{ 439{
435 struct gru_control_block_status *gen = (void *)cb; 440 struct gru_control_block_status *gen = (void *)cb;
436 struct control_block_extended_exc_detail excdet; 441 struct control_block_extended_exc_detail excdet;
@@ -505,7 +510,7 @@ int gru_wait_proc(void *cb)
505 return ret; 510 return ret;
506} 511}
507 512
508void gru_abort(int ret, void *cb, char *str) 513static void gru_abort(int ret, void *cb, char *str)
509{ 514{
510 char buf[GRU_EXC_STR_SIZE]; 515 char buf[GRU_EXC_STR_SIZE];
511 516
@@ -997,7 +1002,6 @@ static int quicktest1(unsigned long arg)
997{ 1002{
998 struct gru_message_queue_desc mqd; 1003 struct gru_message_queue_desc mqd;
999 void *p, *mq; 1004 void *p, *mq;
1000 unsigned long *dw;
1001 int i, ret = -EIO; 1005 int i, ret = -EIO;
1002 char mes[GRU_CACHE_LINE_BYTES], *m; 1006 char mes[GRU_CACHE_LINE_BYTES], *m;
1003 1007
@@ -1007,7 +1011,6 @@ static int quicktest1(unsigned long arg)
1007 return -ENOMEM; 1011 return -ENOMEM;
1008 mq = ALIGNUP(p, 1024); 1012 mq = ALIGNUP(p, 1024);
1009 memset(mes, 0xee, sizeof(mes)); 1013 memset(mes, 0xee, sizeof(mes));
1010 dw = mq;
1011 1014
1012 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); 1015 gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0);
1013 for (i = 0; i < 6; i++) { 1016 for (i = 0; i < 6; i++) {
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index ae16c8cb4f3e..1525870f460a 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -930,6 +930,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
930{ 930{
931 struct gru_thread_state *gts; 931 struct gru_thread_state *gts;
932 unsigned long paddr, vaddr; 932 unsigned long paddr, vaddr;
933 unsigned long expires;
933 934
934 vaddr = (unsigned long)vmf->virtual_address; 935 vaddr = (unsigned long)vmf->virtual_address;
935 gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", 936 gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n",
@@ -954,7 +955,8 @@ again:
954 mutex_unlock(&gts->ts_ctxlock); 955 mutex_unlock(&gts->ts_ctxlock);
955 set_current_state(TASK_INTERRUPTIBLE); 956 set_current_state(TASK_INTERRUPTIBLE);
956 schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ 957 schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
957 if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) 958 expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY;
959 if (time_before(expires, jiffies))
958 gru_steal_context(gts); 960 gru_steal_context(gts);
959 goto again; 961 goto again;
960 } 962 }
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index 2129274ef7ab..e936d43895d2 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -306,19 +306,20 @@ struct gru_mm_struct *gru_register_mmu_notifier(void)
306 atomic_inc(&gms->ms_refcnt); 306 atomic_inc(&gms->ms_refcnt);
307 } else { 307 } else {
308 gms = kzalloc(sizeof(*gms), GFP_KERNEL); 308 gms = kzalloc(sizeof(*gms), GFP_KERNEL);
309 if (gms) { 309 if (!gms)
310 STAT(gms_alloc); 310 return ERR_PTR(-ENOMEM);
311 spin_lock_init(&gms->ms_asid_lock); 311 STAT(gms_alloc);
312 gms->ms_notifier.ops = &gru_mmuops; 312 spin_lock_init(&gms->ms_asid_lock);
313 atomic_set(&gms->ms_refcnt, 1); 313 gms->ms_notifier.ops = &gru_mmuops;
314 init_waitqueue_head(&gms->ms_wait_queue); 314 atomic_set(&gms->ms_refcnt, 1);
315 err = __mmu_notifier_register(&gms->ms_notifier, current->mm); 315 init_waitqueue_head(&gms->ms_wait_queue);
316 if (err) 316 err = __mmu_notifier_register(&gms->ms_notifier, current->mm);
317 goto error; 317 if (err)
318 } 318 goto error;
319 } 319 }
320 gru_dbg(grudev, "gms %p, refcnt %d\n", gms, 320 if (gms)
321 atomic_read(&gms->ms_refcnt)); 321 gru_dbg(grudev, "gms %p, refcnt %d\n", gms,
322 atomic_read(&gms->ms_refcnt));
322 return gms; 323 return gms;
323error: 324error:
324 kfree(gms); 325 kfree(gms);
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
index 431e1dd528bc..736dae715dbf 100644
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -28,20 +28,144 @@
28 28
29#define SRAM_GRANULARITY 32 29#define SRAM_GRANULARITY 32
30 30
31struct sram_partition {
32 void __iomem *base;
33
34 struct gen_pool *pool;
35 struct bin_attribute battr;
36 struct mutex lock;
37};
38
31struct sram_dev { 39struct sram_dev {
32 struct device *dev; 40 struct device *dev;
33 void __iomem *virt_base; 41 void __iomem *virt_base;
34 42
35 struct gen_pool *pool; 43 struct gen_pool *pool;
36 struct clk *clk; 44 struct clk *clk;
45
46 struct sram_partition *partition;
47 u32 partitions;
37}; 48};
38 49
39struct sram_reserve { 50struct sram_reserve {
40 struct list_head list; 51 struct list_head list;
41 u32 start; 52 u32 start;
42 u32 size; 53 u32 size;
54 bool export;
55 bool pool;
56 const char *label;
43}; 57};
44 58
59static ssize_t sram_read(struct file *filp, struct kobject *kobj,
60 struct bin_attribute *attr,
61 char *buf, loff_t pos, size_t count)
62{
63 struct sram_partition *part;
64
65 part = container_of(attr, struct sram_partition, battr);
66
67 mutex_lock(&part->lock);
68 memcpy_fromio(buf, part->base + pos, count);
69 mutex_unlock(&part->lock);
70
71 return count;
72}
73
74static ssize_t sram_write(struct file *filp, struct kobject *kobj,
75 struct bin_attribute *attr,
76 char *buf, loff_t pos, size_t count)
77{
78 struct sram_partition *part;
79
80 part = container_of(attr, struct sram_partition, battr);
81
82 mutex_lock(&part->lock);
83 memcpy_toio(part->base + pos, buf, count);
84 mutex_unlock(&part->lock);
85
86 return count;
87}
88
89static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block,
90 phys_addr_t start, struct sram_partition *part)
91{
92 int ret;
93
94 part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
95 NUMA_NO_NODE, block->label);
96 if (IS_ERR(part->pool))
97 return PTR_ERR(part->pool);
98
99 ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start,
100 block->size, NUMA_NO_NODE);
101 if (ret < 0) {
102 dev_err(sram->dev, "failed to register subpool: %d\n", ret);
103 return ret;
104 }
105
106 return 0;
107}
108
109static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block,
110 phys_addr_t start, struct sram_partition *part)
111{
112 sysfs_bin_attr_init(&part->battr);
113 part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL,
114 "%llx.sram",
115 (unsigned long long)start);
116 if (!part->battr.attr.name)
117 return -ENOMEM;
118
119 part->battr.attr.mode = S_IRUSR | S_IWUSR;
120 part->battr.read = sram_read;
121 part->battr.write = sram_write;
122 part->battr.size = block->size;
123
124 return device_create_bin_file(sram->dev, &part->battr);
125}
126
127static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block,
128 phys_addr_t start)
129{
130 int ret;
131 struct sram_partition *part = &sram->partition[sram->partitions];
132
133 mutex_init(&part->lock);
134 part->base = sram->virt_base + block->start;
135
136 if (block->pool) {
137 ret = sram_add_pool(sram, block, start, part);
138 if (ret)
139 return ret;
140 }
141 if (block->export) {
142 ret = sram_add_export(sram, block, start, part);
143 if (ret)
144 return ret;
145 }
146 sram->partitions++;
147
148 return 0;
149}
150
151static void sram_free_partitions(struct sram_dev *sram)
152{
153 struct sram_partition *part;
154
155 if (!sram->partitions)
156 return;
157
158 part = &sram->partition[sram->partitions - 1];
159 for (; sram->partitions; sram->partitions--, part--) {
160 if (part->battr.size)
161 device_remove_bin_file(sram->dev, &part->battr);
162
163 if (part->pool &&
164 gen_pool_avail(part->pool) < gen_pool_size(part->pool))
165 dev_err(sram->dev, "removed pool while SRAM allocated\n");
166 }
167}
168
45static int sram_reserve_cmp(void *priv, struct list_head *a, 169static int sram_reserve_cmp(void *priv, struct list_head *a,
46 struct list_head *b) 170 struct list_head *b)
47{ 171{
@@ -57,7 +181,8 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
57 unsigned long size, cur_start, cur_size; 181 unsigned long size, cur_start, cur_size;
58 struct sram_reserve *rblocks, *block; 182 struct sram_reserve *rblocks, *block;
59 struct list_head reserve_list; 183 struct list_head reserve_list;
60 unsigned int nblocks; 184 unsigned int nblocks, exports = 0;
185 const char *label;
61 int ret = 0; 186 int ret = 0;
62 187
63 INIT_LIST_HEAD(&reserve_list); 188 INIT_LIST_HEAD(&reserve_list);
@@ -69,7 +194,7 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
69 * after the reserved blocks from the dt are processed. 194 * after the reserved blocks from the dt are processed.
70 */ 195 */
71 nblocks = (np) ? of_get_available_child_count(np) + 1 : 1; 196 nblocks = (np) ? of_get_available_child_count(np) + 1 : 1;
72 rblocks = kmalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL); 197 rblocks = kzalloc((nblocks) * sizeof(*rblocks), GFP_KERNEL);
73 if (!rblocks) 198 if (!rblocks)
74 return -ENOMEM; 199 return -ENOMEM;
75 200
@@ -82,7 +207,6 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
82 dev_err(sram->dev, 207 dev_err(sram->dev,
83 "could not get address for node %s\n", 208 "could not get address for node %s\n",
84 child->full_name); 209 child->full_name);
85 of_node_put(child);
86 goto err_chunks; 210 goto err_chunks;
87 } 211 }
88 212
@@ -91,7 +215,6 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
91 "reserved block %s outside the sram area\n", 215 "reserved block %s outside the sram area\n",
92 child->full_name); 216 child->full_name);
93 ret = -EINVAL; 217 ret = -EINVAL;
94 of_node_put(child);
95 goto err_chunks; 218 goto err_chunks;
96 } 219 }
97 220
@@ -99,11 +222,42 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
99 block->size = resource_size(&child_res); 222 block->size = resource_size(&child_res);
100 list_add_tail(&block->list, &reserve_list); 223 list_add_tail(&block->list, &reserve_list);
101 224
102 dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n", 225 if (of_find_property(child, "export", NULL))
103 block->start, block->start + block->size); 226 block->export = true;
227
228 if (of_find_property(child, "pool", NULL))
229 block->pool = true;
230
231 if ((block->export || block->pool) && block->size) {
232 exports++;
233
234 label = NULL;
235 ret = of_property_read_string(child, "label", &label);
236 if (ret && ret != -EINVAL) {
237 dev_err(sram->dev,
238 "%s has invalid label name\n",
239 child->full_name);
240 goto err_chunks;
241 }
242 if (!label)
243 label = child->name;
244
245 block->label = devm_kstrdup(sram->dev,
246 label, GFP_KERNEL);
247 if (!block->label)
248 goto err_chunks;
249
250 dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n",
251 block->export ? "exported " : "", block->label,
252 block->start, block->start + block->size);
253 } else {
254 dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n",
255 block->start, block->start + block->size);
256 }
104 257
105 block++; 258 block++;
106 } 259 }
260 child = NULL;
107 261
108 /* the last chunk marks the end of the region */ 262 /* the last chunk marks the end of the region */
109 rblocks[nblocks - 1].start = size; 263 rblocks[nblocks - 1].start = size;
@@ -112,8 +266,17 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
112 266
113 list_sort(NULL, &reserve_list, sram_reserve_cmp); 267 list_sort(NULL, &reserve_list, sram_reserve_cmp);
114 268
115 cur_start = 0; 269 if (exports) {
270 sram->partition = devm_kzalloc(sram->dev,
271 exports * sizeof(*sram->partition),
272 GFP_KERNEL);
273 if (!sram->partition) {
274 ret = -ENOMEM;
275 goto err_chunks;
276 }
277 }
116 278
279 cur_start = 0;
117 list_for_each_entry(block, &reserve_list, list) { 280 list_for_each_entry(block, &reserve_list, list) {
118 /* can only happen if sections overlap */ 281 /* can only happen if sections overlap */
119 if (block->start < cur_start) { 282 if (block->start < cur_start) {
@@ -121,9 +284,19 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
121 "block at 0x%x starts after current offset 0x%lx\n", 284 "block at 0x%x starts after current offset 0x%lx\n",
122 block->start, cur_start); 285 block->start, cur_start);
123 ret = -EINVAL; 286 ret = -EINVAL;
287 sram_free_partitions(sram);
124 goto err_chunks; 288 goto err_chunks;
125 } 289 }
126 290
291 if ((block->export || block->pool) && block->size) {
292 ret = sram_add_partition(sram, block,
293 res->start + block->start);
294 if (ret) {
295 sram_free_partitions(sram);
296 goto err_chunks;
297 }
298 }
299
127 /* current start is in a reserved block, so continue after it */ 300 /* current start is in a reserved block, so continue after it */
128 if (block->start == cur_start) { 301 if (block->start == cur_start) {
129 cur_start = block->start + block->size; 302 cur_start = block->start + block->size;
@@ -143,14 +316,19 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
143 ret = gen_pool_add_virt(sram->pool, 316 ret = gen_pool_add_virt(sram->pool,
144 (unsigned long)sram->virt_base + cur_start, 317 (unsigned long)sram->virt_base + cur_start,
145 res->start + cur_start, cur_size, -1); 318 res->start + cur_start, cur_size, -1);
146 if (ret < 0) 319 if (ret < 0) {
320 sram_free_partitions(sram);
147 goto err_chunks; 321 goto err_chunks;
322 }
148 323
149 /* next allocation after this reserved block */ 324 /* next allocation after this reserved block */
150 cur_start = block->start + block->size; 325 cur_start = block->start + block->size;
151 } 326 }
152 327
153 err_chunks: 328 err_chunks:
329 if (child)
330 of_node_put(child);
331
154 kfree(rblocks); 332 kfree(rblocks);
155 333
156 return ret; 334 return ret;
@@ -213,6 +391,8 @@ static int sram_remove(struct platform_device *pdev)
213{ 391{
214 struct sram_dev *sram = platform_get_drvdata(pdev); 392 struct sram_dev *sram = platform_get_drvdata(pdev);
215 393
394 sram_free_partitions(sram);
395
216 if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool)) 396 if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool))
217 dev_err(sram->dev, "removed while SRAM allocated\n"); 397 dev_err(sram->dev, "removed while SRAM allocated\n");
218 398
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index c8c6a363069c..6e3af8b42cdd 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -460,6 +460,13 @@ static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb)
460 * - TTY layer when write's finished 460 * - TTY layer when write's finished
461 * - st_write (in context of the protocol stack) 461 * - st_write (in context of the protocol stack)
462 */ 462 */
463static void work_fn_write_wakeup(struct work_struct *work)
464{
465 struct st_data_s *st_gdata = container_of(work, struct st_data_s,
466 work_write_wakeup);
467
468 st_tx_wakeup((void *)st_gdata);
469}
463void st_tx_wakeup(struct st_data_s *st_data) 470void st_tx_wakeup(struct st_data_s *st_data)
464{ 471{
465 struct sk_buff *skb; 472 struct sk_buff *skb;
@@ -812,8 +819,12 @@ static void st_tty_wakeup(struct tty_struct *tty)
812 /* don't do an wakeup for now */ 819 /* don't do an wakeup for now */
813 clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); 820 clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
814 821
815 /* call our internal wakeup */ 822 /*
816 st_tx_wakeup((void *)st_gdata); 823 * schedule the internal wakeup instead of calling directly to
824 * avoid lockup (port->lock needed in tty->ops->write is
825 * already taken here
826 */
827 schedule_work(&st_gdata->work_write_wakeup);
817} 828}
818 829
819static void st_tty_flush_buffer(struct tty_struct *tty) 830static void st_tty_flush_buffer(struct tty_struct *tty)
@@ -881,6 +892,9 @@ int st_core_init(struct st_data_s **core_data)
881 pr_err("unable to un-register ldisc"); 892 pr_err("unable to un-register ldisc");
882 return err; 893 return err;
883 } 894 }
895
896 INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup);
897
884 *core_data = st_gdata; 898 *core_data = st_gdata;
885 return 0; 899 return 0;
886} 900}
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index ffb56340d0c7..89300870fefb 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * VMware Balloon driver. 2 * VMware Balloon driver.
3 * 3 *
4 * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved. 4 * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the 7 * under the terms of the GNU General Public License as published by the
@@ -37,16 +37,19 @@
37#include <linux/types.h> 37#include <linux/types.h>
38#include <linux/kernel.h> 38#include <linux/kernel.h>
39#include <linux/mm.h> 39#include <linux/mm.h>
40#include <linux/vmalloc.h>
40#include <linux/sched.h> 41#include <linux/sched.h>
41#include <linux/module.h> 42#include <linux/module.h>
42#include <linux/workqueue.h> 43#include <linux/workqueue.h>
43#include <linux/debugfs.h> 44#include <linux/debugfs.h>
44#include <linux/seq_file.h> 45#include <linux/seq_file.h>
46#include <linux/vmw_vmci_defs.h>
47#include <linux/vmw_vmci_api.h>
45#include <asm/hypervisor.h> 48#include <asm/hypervisor.h>
46 49
47MODULE_AUTHOR("VMware, Inc."); 50MODULE_AUTHOR("VMware, Inc.");
48MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 51MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
49MODULE_VERSION("1.3.0.0-k"); 52MODULE_VERSION("1.5.0.0-k");
50MODULE_ALIAS("dmi:*:svnVMware*:*"); 53MODULE_ALIAS("dmi:*:svnVMware*:*");
51MODULE_ALIAS("vmware_vmmemctl"); 54MODULE_ALIAS("vmware_vmmemctl");
52MODULE_LICENSE("GPL"); 55MODULE_LICENSE("GPL");
@@ -57,12 +60,6 @@ MODULE_LICENSE("GPL");
57 */ 60 */
58 61
59/* 62/*
60 * Rate of allocating memory when there is no memory pressure
61 * (driver performs non-sleeping allocations).
62 */
63#define VMW_BALLOON_NOSLEEP_ALLOC_MAX 16384U
64
65/*
66 * Rates of memory allocaton when guest experiences memory pressure 63 * Rates of memory allocaton when guest experiences memory pressure
67 * (driver performs sleeping allocations). 64 * (driver performs sleeping allocations).
68 */ 65 */
@@ -71,13 +68,6 @@ MODULE_LICENSE("GPL");
71#define VMW_BALLOON_RATE_ALLOC_INC 16U 68#define VMW_BALLOON_RATE_ALLOC_INC 16U
72 69
73/* 70/*
74 * Rates for releasing pages while deflating balloon.
75 */
76#define VMW_BALLOON_RATE_FREE_MIN 512U
77#define VMW_BALLOON_RATE_FREE_MAX 16384U
78#define VMW_BALLOON_RATE_FREE_INC 16U
79
80/*
81 * When guest is under memory pressure, use a reduced page allocation 71 * When guest is under memory pressure, use a reduced page allocation
82 * rate for next several cycles. 72 * rate for next several cycles.
83 */ 73 */
@@ -99,9 +89,6 @@ MODULE_LICENSE("GPL");
99 */ 89 */
100#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) 90#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
101 91
102/* Maximum number of page allocations without yielding processor */
103#define VMW_BALLOON_YIELD_THRESHOLD 1024
104
105/* Maximum number of refused pages we accumulate during inflation cycle */ 92/* Maximum number of refused pages we accumulate during inflation cycle */
106#define VMW_BALLOON_MAX_REFUSED 16 93#define VMW_BALLOON_MAX_REFUSED 16
107 94
@@ -116,17 +103,45 @@ enum vmwballoon_capabilities {
116 /* 103 /*
117 * Bit 0 is reserved and not associated to any capability. 104 * Bit 0 is reserved and not associated to any capability.
118 */ 105 */
119 VMW_BALLOON_BASIC_CMDS = (1 << 1), 106 VMW_BALLOON_BASIC_CMDS = (1 << 1),
120 VMW_BALLOON_BATCHED_CMDS = (1 << 2) 107 VMW_BALLOON_BATCHED_CMDS = (1 << 2),
108 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
109 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
121}; 110};
122 111
123#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS) 112#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
113 | VMW_BALLOON_BATCHED_CMDS \
114 | VMW_BALLOON_BATCHED_2M_CMDS \
115 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
116
117#define VMW_BALLOON_2M_SHIFT (9)
118#define VMW_BALLOON_NUM_PAGE_SIZES (2)
119
120/*
121 * Backdoor commands availability:
122 *
123 * START, GET_TARGET and GUEST_ID are always available,
124 *
125 * VMW_BALLOON_BASIC_CMDS:
126 * LOCK and UNLOCK commands,
127 * VMW_BALLOON_BATCHED_CMDS:
128 * BATCHED_LOCK and BATCHED_UNLOCK commands.
129 * VMW BALLOON_BATCHED_2M_CMDS:
130 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
131 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
132 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
133 */
134#define VMW_BALLOON_CMD_START 0
135#define VMW_BALLOON_CMD_GET_TARGET 1
136#define VMW_BALLOON_CMD_LOCK 2
137#define VMW_BALLOON_CMD_UNLOCK 3
138#define VMW_BALLOON_CMD_GUEST_ID 4
139#define VMW_BALLOON_CMD_BATCHED_LOCK 6
140#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
141#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
142#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
143#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
124 144
125#define VMW_BALLOON_CMD_START 0
126#define VMW_BALLOON_CMD_GET_TARGET 1
127#define VMW_BALLOON_CMD_LOCK 2
128#define VMW_BALLOON_CMD_UNLOCK 3
129#define VMW_BALLOON_CMD_GUEST_ID 4
130 145
131/* error codes */ 146/* error codes */
132#define VMW_BALLOON_SUCCESS 0 147#define VMW_BALLOON_SUCCESS 0
@@ -142,18 +157,60 @@ enum vmwballoon_capabilities {
142 157
143#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 158#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
144 159
145#define VMWARE_BALLOON_CMD(cmd, data, result) \ 160/* Batch page description */
161
162/*
163 * Layout of a page in the batch page:
164 *
165 * +-------------+----------+--------+
166 * | | | |
167 * | Page number | Reserved | Status |
168 * | | | |
169 * +-------------+----------+--------+
170 * 64 PAGE_SHIFT 6 0
171 *
172 * The reserved field should be set to 0.
173 */
174#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64))
175#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1)
176#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1))
177
178struct vmballoon_batch_page {
179 u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
180};
181
182static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
183{
184 return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
185}
186
187static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
188 int idx)
189{
190 return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
191}
192
193static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
194 u64 pa)
195{
196 batch->pages[idx] = pa;
197}
198
199
200#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \
146({ \ 201({ \
147 unsigned long __status, __dummy1, __dummy2; \ 202 unsigned long __status, __dummy1, __dummy2, __dummy3; \
148 __asm__ __volatile__ ("inl %%dx" : \ 203 __asm__ __volatile__ ("inl %%dx" : \
149 "=a"(__status), \ 204 "=a"(__status), \
150 "=c"(__dummy1), \ 205 "=c"(__dummy1), \
151 "=d"(__dummy2), \ 206 "=d"(__dummy2), \
152 "=b"(result) : \ 207 "=b"(result), \
208 "=S" (__dummy3) : \
153 "0"(VMW_BALLOON_HV_MAGIC), \ 209 "0"(VMW_BALLOON_HV_MAGIC), \
154 "1"(VMW_BALLOON_CMD_##cmd), \ 210 "1"(VMW_BALLOON_CMD_##cmd), \
155 "2"(VMW_BALLOON_HV_PORT), \ 211 "2"(VMW_BALLOON_HV_PORT), \
156 "3"(data) : \ 212 "3"(arg1), \
213 "4" (arg2) : \
157 "memory"); \ 214 "memory"); \
158 if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ 215 if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \
159 result = __dummy1; \ 216 result = __dummy1; \
@@ -164,27 +221,30 @@ enum vmwballoon_capabilities {
164#ifdef CONFIG_DEBUG_FS 221#ifdef CONFIG_DEBUG_FS
165struct vmballoon_stats { 222struct vmballoon_stats {
166 unsigned int timer; 223 unsigned int timer;
224 unsigned int doorbell;
167 225
168 /* allocation statistics */ 226 /* allocation statistics */
169 unsigned int alloc; 227 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
170 unsigned int alloc_fail; 228 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
171 unsigned int sleep_alloc; 229 unsigned int sleep_alloc;
172 unsigned int sleep_alloc_fail; 230 unsigned int sleep_alloc_fail;
173 unsigned int refused_alloc; 231 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
174 unsigned int refused_free; 232 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
175 unsigned int free; 233 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
176 234
177 /* monitor operations */ 235 /* monitor operations */
178 unsigned int lock; 236 unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
179 unsigned int lock_fail; 237 unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
180 unsigned int unlock; 238 unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
181 unsigned int unlock_fail; 239 unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
182 unsigned int target; 240 unsigned int target;
183 unsigned int target_fail; 241 unsigned int target_fail;
184 unsigned int start; 242 unsigned int start;
185 unsigned int start_fail; 243 unsigned int start_fail;
186 unsigned int guest_type; 244 unsigned int guest_type;
187 unsigned int guest_type_fail; 245 unsigned int guest_type_fail;
246 unsigned int doorbell_set;
247 unsigned int doorbell_unset;
188}; 248};
189 249
190#define STATS_INC(stat) (stat)++ 250#define STATS_INC(stat) (stat)++
@@ -192,14 +252,30 @@ struct vmballoon_stats {
192#define STATS_INC(stat) 252#define STATS_INC(stat)
193#endif 253#endif
194 254
195struct vmballoon { 255struct vmballoon;
196 256
257struct vmballoon_ops {
258 void (*add_page)(struct vmballoon *b, int idx, struct page *p);
259 int (*lock)(struct vmballoon *b, unsigned int num_pages,
260 bool is_2m_pages, unsigned int *target);
261 int (*unlock)(struct vmballoon *b, unsigned int num_pages,
262 bool is_2m_pages, unsigned int *target);
263};
264
265struct vmballoon_page_size {
197 /* list of reserved physical pages */ 266 /* list of reserved physical pages */
198 struct list_head pages; 267 struct list_head pages;
199 268
200 /* transient list of non-balloonable pages */ 269 /* transient list of non-balloonable pages */
201 struct list_head refused_pages; 270 struct list_head refused_pages;
202 unsigned int n_refused_pages; 271 unsigned int n_refused_pages;
272};
273
274struct vmballoon {
275 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
276
277 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
278 unsigned supported_page_sizes;
203 279
204 /* balloon size in pages */ 280 /* balloon size in pages */
205 unsigned int size; 281 unsigned int size;
@@ -210,11 +286,18 @@ struct vmballoon {
210 286
211 /* adjustment rates (pages per second) */ 287 /* adjustment rates (pages per second) */
212 unsigned int rate_alloc; 288 unsigned int rate_alloc;
213 unsigned int rate_free;
214 289
215 /* slowdown page allocations for next few cycles */ 290 /* slowdown page allocations for next few cycles */
216 unsigned int slow_allocation_cycles; 291 unsigned int slow_allocation_cycles;
217 292
293 unsigned long capabilities;
294
295 struct vmballoon_batch_page *batch_page;
296 unsigned int batch_max_pages;
297 struct page *page;
298
299 const struct vmballoon_ops *ops;
300
218#ifdef CONFIG_DEBUG_FS 301#ifdef CONFIG_DEBUG_FS
219 /* statistics */ 302 /* statistics */
220 struct vmballoon_stats stats; 303 struct vmballoon_stats stats;
@@ -226,6 +309,8 @@ struct vmballoon {
226 struct sysinfo sysinfo; 309 struct sysinfo sysinfo;
227 310
228 struct delayed_work dwork; 311 struct delayed_work dwork;
312
313 struct vmci_handle vmci_doorbell;
229}; 314};
230 315
231static struct vmballoon balloon; 316static struct vmballoon balloon;
@@ -234,20 +319,38 @@ static struct vmballoon balloon;
234 * Send "start" command to the host, communicating supported version 319 * Send "start" command to the host, communicating supported version
235 * of the protocol. 320 * of the protocol.
236 */ 321 */
237static bool vmballoon_send_start(struct vmballoon *b) 322static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
238{ 323{
239 unsigned long status, capabilities; 324 unsigned long status, capabilities, dummy = 0;
325 bool success;
240 326
241 STATS_INC(b->stats.start); 327 STATS_INC(b->stats.start);
242 328
243 status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_CAPABILITIES, 329 status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
244 capabilities);
245 if (status == VMW_BALLOON_SUCCESS)
246 return true;
247 330
248 pr_debug("%s - failed, hv returns %ld\n", __func__, status); 331 switch (status) {
249 STATS_INC(b->stats.start_fail); 332 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
250 return false; 333 b->capabilities = capabilities;
334 success = true;
335 break;
336 case VMW_BALLOON_SUCCESS:
337 b->capabilities = VMW_BALLOON_BASIC_CMDS;
338 success = true;
339 break;
340 default:
341 success = false;
342 }
343
344 if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
345 b->supported_page_sizes = 2;
346 else
347 b->supported_page_sizes = 1;
348
349 if (!success) {
350 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
351 STATS_INC(b->stats.start_fail);
352 }
353 return success;
251} 354}
252 355
253static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) 356static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
@@ -273,9 +376,10 @@ static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
273 */ 376 */
274static bool vmballoon_send_guest_id(struct vmballoon *b) 377static bool vmballoon_send_guest_id(struct vmballoon *b)
275{ 378{
276 unsigned long status, dummy; 379 unsigned long status, dummy = 0;
277 380
278 status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy); 381 status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
382 dummy);
279 383
280 STATS_INC(b->stats.guest_type); 384 STATS_INC(b->stats.guest_type);
281 385
@@ -287,6 +391,14 @@ static bool vmballoon_send_guest_id(struct vmballoon *b)
287 return false; 391 return false;
288} 392}
289 393
394static u16 vmballoon_page_size(bool is_2m_page)
395{
396 if (is_2m_page)
397 return 1 << VMW_BALLOON_2M_SHIFT;
398
399 return 1;
400}
401
290/* 402/*
291 * Retrieve desired balloon size from the host. 403 * Retrieve desired balloon size from the host.
292 */ 404 */
@@ -295,6 +407,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
295 unsigned long status; 407 unsigned long status;
296 unsigned long target; 408 unsigned long target;
297 unsigned long limit; 409 unsigned long limit;
410 unsigned long dummy = 0;
298 u32 limit32; 411 u32 limit32;
299 412
300 /* 413 /*
@@ -313,7 +426,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
313 /* update stats */ 426 /* update stats */
314 STATS_INC(b->stats.target); 427 STATS_INC(b->stats.target);
315 428
316 status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target); 429 status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
317 if (vmballoon_check_status(b, status)) { 430 if (vmballoon_check_status(b, status)) {
318 *new_target = target; 431 *new_target = target;
319 return true; 432 return true;
@@ -330,23 +443,46 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
330 * check the return value and maybe submit a different page. 443 * check the return value and maybe submit a different page.
331 */ 444 */
332static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, 445static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
333 unsigned int *hv_status) 446 unsigned int *hv_status, unsigned int *target)
334{ 447{
335 unsigned long status, dummy; 448 unsigned long status, dummy = 0;
336 u32 pfn32; 449 u32 pfn32;
337 450
338 pfn32 = (u32)pfn; 451 pfn32 = (u32)pfn;
339 if (pfn32 != pfn) 452 if (pfn32 != pfn)
340 return -1; 453 return -1;
341 454
342 STATS_INC(b->stats.lock); 455 STATS_INC(b->stats.lock[false]);
343 456
344 *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy); 457 *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
345 if (vmballoon_check_status(b, status)) 458 if (vmballoon_check_status(b, status))
346 return 0; 459 return 0;
347 460
348 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 461 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
349 STATS_INC(b->stats.lock_fail); 462 STATS_INC(b->stats.lock_fail[false]);
463 return 1;
464}
465
466static int vmballoon_send_batched_lock(struct vmballoon *b,
467 unsigned int num_pages, bool is_2m_pages, unsigned int *target)
468{
469 unsigned long status;
470 unsigned long pfn = page_to_pfn(b->page);
471
472 STATS_INC(b->stats.lock[is_2m_pages]);
473
474 if (is_2m_pages)
475 status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
476 *target);
477 else
478 status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
479 *target);
480
481 if (vmballoon_check_status(b, status))
482 return 0;
483
484 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
485 STATS_INC(b->stats.lock_fail[is_2m_pages]);
350 return 1; 486 return 1;
351} 487}
352 488
@@ -354,26 +490,66 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
354 * Notify the host that guest intends to release given page back into 490 * Notify the host that guest intends to release given page back into
355 * the pool of available (to the guest) pages. 491 * the pool of available (to the guest) pages.
356 */ 492 */
357static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn) 493static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
494 unsigned int *target)
358{ 495{
359 unsigned long status, dummy; 496 unsigned long status, dummy = 0;
360 u32 pfn32; 497 u32 pfn32;
361 498
362 pfn32 = (u32)pfn; 499 pfn32 = (u32)pfn;
363 if (pfn32 != pfn) 500 if (pfn32 != pfn)
364 return false; 501 return false;
365 502
366 STATS_INC(b->stats.unlock); 503 STATS_INC(b->stats.unlock[false]);
367 504
368 status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy); 505 status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
369 if (vmballoon_check_status(b, status)) 506 if (vmballoon_check_status(b, status))
370 return true; 507 return true;
371 508
372 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); 509 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
373 STATS_INC(b->stats.unlock_fail); 510 STATS_INC(b->stats.unlock_fail[false]);
511 return false;
512}
513
514static bool vmballoon_send_batched_unlock(struct vmballoon *b,
515 unsigned int num_pages, bool is_2m_pages, unsigned int *target)
516{
517 unsigned long status;
518 unsigned long pfn = page_to_pfn(b->page);
519
520 STATS_INC(b->stats.unlock[is_2m_pages]);
521
522 if (is_2m_pages)
523 status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
524 *target);
525 else
526 status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
527 *target);
528
529 if (vmballoon_check_status(b, status))
530 return true;
531
532 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
533 STATS_INC(b->stats.unlock_fail[is_2m_pages]);
374 return false; 534 return false;
375} 535}
376 536
537static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
538{
539 if (is_2m_page)
540 return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
541
542 return alloc_page(flags);
543}
544
545static void vmballoon_free_page(struct page *page, bool is_2m_page)
546{
547 if (is_2m_page)
548 __free_pages(page, VMW_BALLOON_2M_SHIFT);
549 else
550 __free_page(page);
551}
552
377/* 553/*
378 * Quickly release all pages allocated for the balloon. This function is 554 * Quickly release all pages allocated for the balloon. This function is
379 * called when host decides to "reset" balloon for one reason or another. 555 * called when host decides to "reset" balloon for one reason or another.
@@ -383,35 +559,31 @@ static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
383static void vmballoon_pop(struct vmballoon *b) 559static void vmballoon_pop(struct vmballoon *b)
384{ 560{
385 struct page *page, *next; 561 struct page *page, *next;
386 unsigned int count = 0; 562 unsigned is_2m_pages;
387 563
388 list_for_each_entry_safe(page, next, &b->pages, lru) { 564 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
389 list_del(&page->lru); 565 is_2m_pages++) {
390 __free_page(page); 566 struct vmballoon_page_size *page_size =
391 STATS_INC(b->stats.free); 567 &b->page_sizes[is_2m_pages];
392 b->size--; 568 u16 size_per_page = vmballoon_page_size(is_2m_pages);
393 569
394 if (++count >= b->rate_free) { 570 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
395 count = 0; 571 list_del(&page->lru);
572 vmballoon_free_page(page, is_2m_pages);
573 STATS_INC(b->stats.free[is_2m_pages]);
574 b->size -= size_per_page;
396 cond_resched(); 575 cond_resched();
397 } 576 }
398 } 577 }
399}
400 578
401/* 579 if (b->batch_page) {
402 * Perform standard reset sequence by popping the balloon (in case it 580 vunmap(b->batch_page);
403 * is not empty) and then restarting protocol. This operation normally 581 b->batch_page = NULL;
404 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 582 }
405 */
406static void vmballoon_reset(struct vmballoon *b)
407{
408 /* free all pages, skipping monitor unlock */
409 vmballoon_pop(b);
410 583
411 if (vmballoon_send_start(b)) { 584 if (b->page) {
412 b->reset_required = false; 585 __free_page(b->page);
413 if (!vmballoon_send_guest_id(b)) 586 b->page = NULL;
414 pr_err("failed to send guest ID to the host\n");
415 } 587 }
416} 588}
417 589
@@ -420,17 +592,23 @@ static void vmballoon_reset(struct vmballoon *b)
420 * refuse list, those refused page are then released at the end of the 592 * refuse list, those refused page are then released at the end of the
421 * inflation cycle. 593 * inflation cycle.
422 */ 594 */
423static int vmballoon_lock_page(struct vmballoon *b, struct page *page) 595static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
596 bool is_2m_pages, unsigned int *target)
424{ 597{
425 int locked, hv_status; 598 int locked, hv_status;
599 struct page *page = b->page;
600 struct vmballoon_page_size *page_size = &b->page_sizes[false];
601
602 /* is_2m_pages can never happen as 2m pages support implies batching */
426 603
427 locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status); 604 locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
605 target);
428 if (locked > 0) { 606 if (locked > 0) {
429 STATS_INC(b->stats.refused_alloc); 607 STATS_INC(b->stats.refused_alloc[false]);
430 608
431 if (hv_status == VMW_BALLOON_ERROR_RESET || 609 if (hv_status == VMW_BALLOON_ERROR_RESET ||
432 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { 610 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
433 __free_page(page); 611 vmballoon_free_page(page, false);
434 return -EIO; 612 return -EIO;
435 } 613 }
436 614
@@ -439,17 +617,17 @@ static int vmballoon_lock_page(struct vmballoon *b, struct page *page)
439 * and retry allocation, unless we already accumulated 617 * and retry allocation, unless we already accumulated
440 * too many of them, in which case take a breather. 618 * too many of them, in which case take a breather.
441 */ 619 */
442 if (b->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { 620 if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
443 b->n_refused_pages++; 621 page_size->n_refused_pages++;
444 list_add(&page->lru, &b->refused_pages); 622 list_add(&page->lru, &page_size->refused_pages);
445 } else { 623 } else {
446 __free_page(page); 624 vmballoon_free_page(page, false);
447 } 625 }
448 return -EIO; 626 return -EIO;
449 } 627 }
450 628
451 /* track allocated page */ 629 /* track allocated page */
452 list_add(&page->lru, &b->pages); 630 list_add(&page->lru, &page_size->pages);
453 631
454 /* update balloon size */ 632 /* update balloon size */
455 b->size++; 633 b->size++;
@@ -457,21 +635,81 @@ static int vmballoon_lock_page(struct vmballoon *b, struct page *page)
457 return 0; 635 return 0;
458} 636}
459 637
638static int vmballoon_lock_batched_page(struct vmballoon *b,
639 unsigned int num_pages, bool is_2m_pages, unsigned int *target)
640{
641 int locked, i;
642 u16 size_per_page = vmballoon_page_size(is_2m_pages);
643
644 locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
645 target);
646 if (locked > 0) {
647 for (i = 0; i < num_pages; i++) {
648 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
649 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
650
651 vmballoon_free_page(p, is_2m_pages);
652 }
653
654 return -EIO;
655 }
656
657 for (i = 0; i < num_pages; i++) {
658 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
659 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
660 struct vmballoon_page_size *page_size =
661 &b->page_sizes[is_2m_pages];
662
663 locked = vmballoon_batch_get_status(b->batch_page, i);
664
665 switch (locked) {
666 case VMW_BALLOON_SUCCESS:
667 list_add(&p->lru, &page_size->pages);
668 b->size += size_per_page;
669 break;
670 case VMW_BALLOON_ERROR_PPN_PINNED:
671 case VMW_BALLOON_ERROR_PPN_INVALID:
672 if (page_size->n_refused_pages
673 < VMW_BALLOON_MAX_REFUSED) {
674 list_add(&p->lru, &page_size->refused_pages);
675 page_size->n_refused_pages++;
676 break;
677 }
678 /* Fallthrough */
679 case VMW_BALLOON_ERROR_RESET:
680 case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
681 vmballoon_free_page(p, is_2m_pages);
682 break;
683 default:
684 /* This should never happen */
685 WARN_ON_ONCE(true);
686 }
687 }
688
689 return 0;
690}
691
460/* 692/*
461 * Release the page allocated for the balloon. Note that we first notify 693 * Release the page allocated for the balloon. Note that we first notify
462 * the host so it can make sure the page will be available for the guest 694 * the host so it can make sure the page will be available for the guest
463 * to use, if needed. 695 * to use, if needed.
464 */ 696 */
465static int vmballoon_release_page(struct vmballoon *b, struct page *page) 697static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
698 bool is_2m_pages, unsigned int *target)
466{ 699{
467 if (!vmballoon_send_unlock_page(b, page_to_pfn(page))) 700 struct page *page = b->page;
468 return -EIO; 701 struct vmballoon_page_size *page_size = &b->page_sizes[false];
702
703 /* is_2m_pages can never happen as 2m pages support implies batching */
469 704
470 list_del(&page->lru); 705 if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
706 list_add(&page->lru, &page_size->pages);
707 return -EIO;
708 }
471 709
472 /* deallocate page */ 710 /* deallocate page */
473 __free_page(page); 711 vmballoon_free_page(page, false);
474 STATS_INC(b->stats.free); 712 STATS_INC(b->stats.free[false]);
475 713
476 /* update balloon size */ 714 /* update balloon size */
477 b->size--; 715 b->size--;
@@ -479,21 +717,76 @@ static int vmballoon_release_page(struct vmballoon *b, struct page *page)
479 return 0; 717 return 0;
480} 718}
481 719
720static int vmballoon_unlock_batched_page(struct vmballoon *b,
721 unsigned int num_pages, bool is_2m_pages,
722 unsigned int *target)
723{
724 int locked, i, ret = 0;
725 bool hv_success;
726 u16 size_per_page = vmballoon_page_size(is_2m_pages);
727
728 hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
729 target);
730 if (!hv_success)
731 ret = -EIO;
732
733 for (i = 0; i < num_pages; i++) {
734 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
735 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
736 struct vmballoon_page_size *page_size =
737 &b->page_sizes[is_2m_pages];
738
739 locked = vmballoon_batch_get_status(b->batch_page, i);
740 if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
741 /*
742 * That page wasn't successfully unlocked by the
743 * hypervisor, re-add it to the list of pages owned by
744 * the balloon driver.
745 */
746 list_add(&p->lru, &page_size->pages);
747 } else {
748 /* deallocate page */
749 vmballoon_free_page(p, is_2m_pages);
750 STATS_INC(b->stats.free[is_2m_pages]);
751
752 /* update balloon size */
753 b->size -= size_per_page;
754 }
755 }
756
757 return ret;
758}
759
482/* 760/*
483 * Release pages that were allocated while attempting to inflate the 761 * Release pages that were allocated while attempting to inflate the
484 * balloon but were refused by the host for one reason or another. 762 * balloon but were refused by the host for one reason or another.
485 */ 763 */
486static void vmballoon_release_refused_pages(struct vmballoon *b) 764static void vmballoon_release_refused_pages(struct vmballoon *b,
765 bool is_2m_pages)
487{ 766{
488 struct page *page, *next; 767 struct page *page, *next;
768 struct vmballoon_page_size *page_size =
769 &b->page_sizes[is_2m_pages];
489 770
490 list_for_each_entry_safe(page, next, &b->refused_pages, lru) { 771 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
491 list_del(&page->lru); 772 list_del(&page->lru);
492 __free_page(page); 773 vmballoon_free_page(page, is_2m_pages);
493 STATS_INC(b->stats.refused_free); 774 STATS_INC(b->stats.refused_free[is_2m_pages]);
494 } 775 }
495 776
496 b->n_refused_pages = 0; 777 page_size->n_refused_pages = 0;
778}
779
780static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
781{
782 b->page = p;
783}
784
785static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
786 struct page *p)
787{
788 vmballoon_batch_set_pa(b->batch_page, idx,
789 (u64)page_to_pfn(p) << PAGE_SHIFT);
497} 790}
498 791
499/* 792/*
@@ -503,12 +796,12 @@ static void vmballoon_release_refused_pages(struct vmballoon *b)
503 */ 796 */
504static void vmballoon_inflate(struct vmballoon *b) 797static void vmballoon_inflate(struct vmballoon *b)
505{ 798{
506 unsigned int goal; 799 unsigned rate;
507 unsigned int rate;
508 unsigned int i;
509 unsigned int allocations = 0; 800 unsigned int allocations = 0;
801 unsigned int num_pages = 0;
510 int error = 0; 802 int error = 0;
511 gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; 803 gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
804 bool is_2m_pages;
512 805
513 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 806 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
514 807
@@ -527,27 +820,50 @@ static void vmballoon_inflate(struct vmballoon *b)
527 * slowdown page allocations considerably. 820 * slowdown page allocations considerably.
528 */ 821 */
529 822
530 goal = b->target - b->size;
531 /* 823 /*
532 * Start with no sleep allocation rate which may be higher 824 * Start with no sleep allocation rate which may be higher
533 * than sleeping allocation rate. 825 * than sleeping allocation rate.
534 */ 826 */
535 rate = b->slow_allocation_cycles ? 827 if (b->slow_allocation_cycles) {
536 b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX; 828 rate = b->rate_alloc;
829 is_2m_pages = false;
830 } else {
831 rate = UINT_MAX;
832 is_2m_pages =
833 b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
834 }
537 835
538 pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n", 836 pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
539 __func__, goal, rate, b->rate_alloc); 837 __func__, b->target - b->size, rate, b->rate_alloc);
540 838
541 for (i = 0; i < goal; i++) { 839 while (!b->reset_required &&
840 b->size + num_pages * vmballoon_page_size(is_2m_pages)
841 < b->target) {
542 struct page *page; 842 struct page *page;
543 843
544 if (flags == VMW_PAGE_ALLOC_NOSLEEP) 844 if (flags == VMW_PAGE_ALLOC_NOSLEEP)
545 STATS_INC(b->stats.alloc); 845 STATS_INC(b->stats.alloc[is_2m_pages]);
546 else 846 else
547 STATS_INC(b->stats.sleep_alloc); 847 STATS_INC(b->stats.sleep_alloc);
548 848
549 page = alloc_page(flags); 849 page = vmballoon_alloc_page(flags, is_2m_pages);
550 if (!page) { 850 if (!page) {
851 STATS_INC(b->stats.alloc_fail[is_2m_pages]);
852
853 if (is_2m_pages) {
854 b->ops->lock(b, num_pages, true, &b->target);
855
856 /*
857 * ignore errors from locking as we now switch
858 * to 4k pages and we might get different
859 * errors.
860 */
861
862 num_pages = 0;
863 is_2m_pages = false;
864 continue;
865 }
866
551 if (flags == VMW_PAGE_ALLOC_CANSLEEP) { 867 if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
552 /* 868 /*
553 * CANSLEEP page allocation failed, so guest 869 * CANSLEEP page allocation failed, so guest
@@ -559,7 +875,6 @@ static void vmballoon_inflate(struct vmballoon *b)
559 STATS_INC(b->stats.sleep_alloc_fail); 875 STATS_INC(b->stats.sleep_alloc_fail);
560 break; 876 break;
561 } 877 }
562 STATS_INC(b->stats.alloc_fail);
563 878
564 /* 879 /*
565 * NOSLEEP page allocation failed, so the guest is 880 * NOSLEEP page allocation failed, so the guest is
@@ -571,7 +886,7 @@ static void vmballoon_inflate(struct vmballoon *b)
571 */ 886 */
572 b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES; 887 b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
573 888
574 if (i >= b->rate_alloc) 889 if (allocations >= b->rate_alloc)
575 break; 890 break;
576 891
577 flags = VMW_PAGE_ALLOC_CANSLEEP; 892 flags = VMW_PAGE_ALLOC_CANSLEEP;
@@ -580,34 +895,40 @@ static void vmballoon_inflate(struct vmballoon *b)
580 continue; 895 continue;
581 } 896 }
582 897
583 error = vmballoon_lock_page(b, page); 898 b->ops->add_page(b, num_pages++, page);
584 if (error) 899 if (num_pages == b->batch_max_pages) {
585 break; 900 error = b->ops->lock(b, num_pages, is_2m_pages,
586 901 &b->target);
587 if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) { 902 num_pages = 0;
588 cond_resched(); 903 if (error)
589 allocations = 0; 904 break;
590 } 905 }
591 906
592 if (i >= rate) { 907 cond_resched();
908
909 if (allocations >= rate) {
593 /* We allocated enough pages, let's take a break. */ 910 /* We allocated enough pages, let's take a break. */
594 break; 911 break;
595 } 912 }
596 } 913 }
597 914
915 if (num_pages > 0)
916 b->ops->lock(b, num_pages, is_2m_pages, &b->target);
917
598 /* 918 /*
599 * We reached our goal without failures so try increasing 919 * We reached our goal without failures so try increasing
600 * allocation rate. 920 * allocation rate.
601 */ 921 */
602 if (error == 0 && i >= b->rate_alloc) { 922 if (error == 0 && allocations >= b->rate_alloc) {
603 unsigned int mult = i / b->rate_alloc; 923 unsigned int mult = allocations / b->rate_alloc;
604 924
605 b->rate_alloc = 925 b->rate_alloc =
606 min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC, 926 min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
607 VMW_BALLOON_RATE_ALLOC_MAX); 927 VMW_BALLOON_RATE_ALLOC_MAX);
608 } 928 }
609 929
610 vmballoon_release_refused_pages(b); 930 vmballoon_release_refused_pages(b, true);
931 vmballoon_release_refused_pages(b, false);
611} 932}
612 933
613/* 934/*
@@ -615,35 +936,176 @@ static void vmballoon_inflate(struct vmballoon *b)
615 */ 936 */
616static void vmballoon_deflate(struct vmballoon *b) 937static void vmballoon_deflate(struct vmballoon *b)
617{ 938{
618 struct page *page, *next; 939 unsigned is_2m_pages;
619 unsigned int i = 0;
620 unsigned int goal;
621 int error;
622 940
623 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); 941 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
624 942
625 /* limit deallocation rate */ 943 /* free pages to reach target */
626 goal = min(b->size - b->target, b->rate_free); 944 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
945 is_2m_pages++) {
946 struct page *page, *next;
947 unsigned int num_pages = 0;
948 struct vmballoon_page_size *page_size =
949 &b->page_sizes[is_2m_pages];
950
951 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
952 if (b->reset_required ||
953 (b->target > 0 &&
954 b->size - num_pages
955 * vmballoon_page_size(is_2m_pages)
956 < b->target + vmballoon_page_size(true)))
957 break;
958
959 list_del(&page->lru);
960 b->ops->add_page(b, num_pages++, page);
627 961
628 pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free); 962 if (num_pages == b->batch_max_pages) {
963 int error;
629 964
630 /* free pages to reach target */ 965 error = b->ops->unlock(b, num_pages,
631 list_for_each_entry_safe(page, next, &b->pages, lru) { 966 is_2m_pages, &b->target);
632 error = vmballoon_release_page(b, page); 967 num_pages = 0;
633 if (error) { 968 if (error)
634 /* quickly decrease rate in case of error */ 969 return;
635 b->rate_free = max(b->rate_free / 2, 970 }
636 VMW_BALLOON_RATE_FREE_MIN); 971
637 return; 972 cond_resched();
638 } 973 }
639 974
640 if (++i >= goal) 975 if (num_pages > 0)
641 break; 976 b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
977 }
978}
979
980static const struct vmballoon_ops vmballoon_basic_ops = {
981 .add_page = vmballoon_add_page,
982 .lock = vmballoon_lock_page,
983 .unlock = vmballoon_unlock_page
984};
985
986static const struct vmballoon_ops vmballoon_batched_ops = {
987 .add_page = vmballoon_add_batched_page,
988 .lock = vmballoon_lock_batched_page,
989 .unlock = vmballoon_unlock_batched_page
990};
991
992static bool vmballoon_init_batching(struct vmballoon *b)
993{
994 b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
995 if (!b->page)
996 return false;
997
998 b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
999 if (!b->batch_page) {
1000 __free_page(b->page);
1001 return false;
1002 }
1003
1004 return true;
1005}
1006
1007/*
1008 * Receive notification and resize balloon
1009 */
1010static void vmballoon_doorbell(void *client_data)
1011{
1012 struct vmballoon *b = client_data;
1013
1014 STATS_INC(b->stats.doorbell);
1015
1016 mod_delayed_work(system_freezable_wq, &b->dwork, 0);
1017}
1018
1019/*
1020 * Clean up vmci doorbell
1021 */
1022static void vmballoon_vmci_cleanup(struct vmballoon *b)
1023{
1024 int error;
1025
1026 VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
1027 VMCI_INVALID_ID, error);
1028 STATS_INC(b->stats.doorbell_unset);
1029
1030 if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
1031 vmci_doorbell_destroy(b->vmci_doorbell);
1032 b->vmci_doorbell = VMCI_INVALID_HANDLE;
1033 }
1034}
1035
1036/*
1037 * Initialize vmci doorbell, to get notified as soon as balloon changes
1038 */
1039static int vmballoon_vmci_init(struct vmballoon *b)
1040{
1041 int error = 0;
1042
1043 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
1044 error = vmci_doorbell_create(&b->vmci_doorbell,
1045 VMCI_FLAG_DELAYED_CB,
1046 VMCI_PRIVILEGE_FLAG_RESTRICTED,
1047 vmballoon_doorbell, b);
1048
1049 if (error == VMCI_SUCCESS) {
1050 VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
1051 b->vmci_doorbell.context,
1052 b->vmci_doorbell.resource, error);
1053 STATS_INC(b->stats.doorbell_set);
1054 }
1055 }
1056
1057 if (error != 0) {
1058 vmballoon_vmci_cleanup(b);
1059
1060 return -EIO;
642 } 1061 }
643 1062
644 /* slowly increase rate if there were no errors */ 1063 return 0;
645 b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC, 1064}
646 VMW_BALLOON_RATE_FREE_MAX); 1065
1066/*
1067 * Perform standard reset sequence by popping the balloon (in case it
1068 * is not empty) and then restarting protocol. This operation normally
1069 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
1070 */
1071static void vmballoon_reset(struct vmballoon *b)
1072{
1073 int error;
1074
1075 vmballoon_vmci_cleanup(b);
1076
1077 /* free all pages, skipping monitor unlock */
1078 vmballoon_pop(b);
1079
1080 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
1081 return;
1082
1083 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1084 b->ops = &vmballoon_batched_ops;
1085 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1086 if (!vmballoon_init_batching(b)) {
1087 /*
1088 * We failed to initialize batching, inform the monitor
1089 * about it by sending a null capability.
1090 *
1091 * The guest will retry in one second.
1092 */
1093 vmballoon_send_start(b, 0);
1094 return;
1095 }
1096 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1097 b->ops = &vmballoon_basic_ops;
1098 b->batch_max_pages = 1;
1099 }
1100
1101 b->reset_required = false;
1102
1103 error = vmballoon_vmci_init(b);
1104 if (error)
1105 pr_err("failed to initialize vmci doorbell\n");
1106
1107 if (!vmballoon_send_guest_id(b))
1108 pr_err("failed to send guest ID to the host\n");
647} 1109}
648 1110
649/* 1111/*
@@ -664,13 +1126,14 @@ static void vmballoon_work(struct work_struct *work)
664 if (b->slow_allocation_cycles > 0) 1126 if (b->slow_allocation_cycles > 0)
665 b->slow_allocation_cycles--; 1127 b->slow_allocation_cycles--;
666 1128
667 if (vmballoon_send_get_target(b, &target)) { 1129 if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
668 /* update target, adjust size */ 1130 /* update target, adjust size */
669 b->target = target; 1131 b->target = target;
670 1132
671 if (b->size < target) 1133 if (b->size < target)
672 vmballoon_inflate(b); 1134 vmballoon_inflate(b);
673 else if (b->size > target) 1135 else if (target == 0 ||
1136 b->size > target + vmballoon_page_size(true))
674 vmballoon_deflate(b); 1137 vmballoon_deflate(b);
675 } 1138 }
676 1139
@@ -692,6 +1155,14 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset)
692 struct vmballoon *b = f->private; 1155 struct vmballoon *b = f->private;
693 struct vmballoon_stats *stats = &b->stats; 1156 struct vmballoon_stats *stats = &b->stats;
694 1157
1158 /* format capabilities info */
1159 seq_printf(f,
1160 "balloon capabilities: %#4x\n"
1161 "used capabilities: %#4lx\n"
1162 "is resetting: %c\n",
1163 VMW_BALLOON_CAPABILITIES, b->capabilities,
1164 b->reset_required ? 'y' : 'n');
1165
695 /* format size info */ 1166 /* format size info */
696 seq_printf(f, 1167 seq_printf(f,
697 "target: %8d pages\n" 1168 "target: %8d pages\n"
@@ -700,35 +1171,48 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset)
700 1171
701 /* format rate info */ 1172 /* format rate info */
702 seq_printf(f, 1173 seq_printf(f,
703 "rateNoSleepAlloc: %8d pages/sec\n" 1174 "rateSleepAlloc: %8d pages/sec\n",
704 "rateSleepAlloc: %8d pages/sec\n" 1175 b->rate_alloc);
705 "rateFree: %8d pages/sec\n",
706 VMW_BALLOON_NOSLEEP_ALLOC_MAX,
707 b->rate_alloc, b->rate_free);
708 1176
709 seq_printf(f, 1177 seq_printf(f,
710 "\n" 1178 "\n"
711 "timer: %8u\n" 1179 "timer: %8u\n"
1180 "doorbell: %8u\n"
712 "start: %8u (%4u failed)\n" 1181 "start: %8u (%4u failed)\n"
713 "guestType: %8u (%4u failed)\n" 1182 "guestType: %8u (%4u failed)\n"
1183 "2m-lock: %8u (%4u failed)\n"
714 "lock: %8u (%4u failed)\n" 1184 "lock: %8u (%4u failed)\n"
1185 "2m-unlock: %8u (%4u failed)\n"
715 "unlock: %8u (%4u failed)\n" 1186 "unlock: %8u (%4u failed)\n"
716 "target: %8u (%4u failed)\n" 1187 "target: %8u (%4u failed)\n"
1188 "prim2mAlloc: %8u (%4u failed)\n"
717 "primNoSleepAlloc: %8u (%4u failed)\n" 1189 "primNoSleepAlloc: %8u (%4u failed)\n"
718 "primCanSleepAlloc: %8u (%4u failed)\n" 1190 "primCanSleepAlloc: %8u (%4u failed)\n"
1191 "prim2mFree: %8u\n"
719 "primFree: %8u\n" 1192 "primFree: %8u\n"
1193 "err2mAlloc: %8u\n"
720 "errAlloc: %8u\n" 1194 "errAlloc: %8u\n"
721 "errFree: %8u\n", 1195 "err2mFree: %8u\n"
1196 "errFree: %8u\n"
1197 "doorbellSet: %8u\n"
1198 "doorbellUnset: %8u\n",
722 stats->timer, 1199 stats->timer,
1200 stats->doorbell,
723 stats->start, stats->start_fail, 1201 stats->start, stats->start_fail,
724 stats->guest_type, stats->guest_type_fail, 1202 stats->guest_type, stats->guest_type_fail,
725 stats->lock, stats->lock_fail, 1203 stats->lock[true], stats->lock_fail[true],
726 stats->unlock, stats->unlock_fail, 1204 stats->lock[false], stats->lock_fail[false],
1205 stats->unlock[true], stats->unlock_fail[true],
1206 stats->unlock[false], stats->unlock_fail[false],
727 stats->target, stats->target_fail, 1207 stats->target, stats->target_fail,
728 stats->alloc, stats->alloc_fail, 1208 stats->alloc[true], stats->alloc_fail[true],
1209 stats->alloc[false], stats->alloc_fail[false],
729 stats->sleep_alloc, stats->sleep_alloc_fail, 1210 stats->sleep_alloc, stats->sleep_alloc_fail,
730 stats->free, 1211 stats->free[true],
731 stats->refused_alloc, stats->refused_free); 1212 stats->free[false],
1213 stats->refused_alloc[true], stats->refused_alloc[false],
1214 stats->refused_free[true], stats->refused_free[false],
1215 stats->doorbell_set, stats->doorbell_unset);
732 1216
733 return 0; 1217 return 0;
734} 1218}
@@ -782,7 +1266,7 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
782static int __init vmballoon_init(void) 1266static int __init vmballoon_init(void)
783{ 1267{
784 int error; 1268 int error;
785 1269 unsigned is_2m_pages;
786 /* 1270 /*
787 * Check if we are running on VMware's hypervisor and bail out 1271 * Check if we are running on VMware's hypervisor and bail out
788 * if we are not. 1272 * if we are not.
@@ -790,32 +1274,26 @@ static int __init vmballoon_init(void)
790 if (x86_hyper != &x86_hyper_vmware) 1274 if (x86_hyper != &x86_hyper_vmware)
791 return -ENODEV; 1275 return -ENODEV;
792 1276
793 INIT_LIST_HEAD(&balloon.pages); 1277 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
794 INIT_LIST_HEAD(&balloon.refused_pages); 1278 is_2m_pages++) {
1279 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1280 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1281 }
795 1282
796 /* initialize rates */ 1283 /* initialize rates */
797 balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX; 1284 balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
798 balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX;
799 1285
800 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1286 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
801 1287
802 /*
803 * Start balloon.
804 */
805 if (!vmballoon_send_start(&balloon)) {
806 pr_err("failed to send start command to the host\n");
807 return -EIO;
808 }
809
810 if (!vmballoon_send_guest_id(&balloon)) {
811 pr_err("failed to send guest ID to the host\n");
812 return -EIO;
813 }
814
815 error = vmballoon_debugfs_init(&balloon); 1288 error = vmballoon_debugfs_init(&balloon);
816 if (error) 1289 if (error)
817 return error; 1290 return error;
818 1291
1292 balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
1293 balloon.batch_page = NULL;
1294 balloon.page = NULL;
1295 balloon.reset_required = true;
1296
819 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1297 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
820 1298
821 return 0; 1299 return 0;
@@ -824,6 +1302,7 @@ module_init(vmballoon_init);
824 1302
825static void __exit vmballoon_exit(void) 1303static void __exit vmballoon_exit(void)
826{ 1304{
1305 vmballoon_vmci_cleanup(&balloon);
827 cancel_delayed_work_sync(&balloon.dwork); 1306 cancel_delayed_work_sync(&balloon.dwork);
828 1307
829 vmballoon_debugfs_exit(&balloon); 1308 vmballoon_debugfs_exit(&balloon);
@@ -833,7 +1312,7 @@ static void __exit vmballoon_exit(void)
833 * Reset connection before deallocating memory to avoid potential for 1312 * Reset connection before deallocating memory to avoid potential for
834 * additional spurious resets from guest touching deallocated pages. 1313 * additional spurious resets from guest touching deallocated pages.
835 */ 1314 */
836 vmballoon_send_start(&balloon); 1315 vmballoon_send_start(&balloon, 0);
837 vmballoon_pop(&balloon); 1316 vmballoon_pop(&balloon);
838} 1317}
839module_exit(vmballoon_exit); 1318module_exit(vmballoon_exit);
diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c
index 822665245588..8a4b6bbe1bee 100644
--- a/drivers/misc/vmw_vmci/vmci_datagram.c
+++ b/drivers/misc/vmw_vmci/vmci_datagram.c
@@ -276,11 +276,10 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg)
276 } 276 }
277 277
278 /* We make a copy to enqueue. */ 278 /* We make a copy to enqueue. */
279 new_dg = kmalloc(dg_size, GFP_KERNEL); 279 new_dg = kmemdup(dg, dg_size, GFP_KERNEL);
280 if (new_dg == NULL) 280 if (new_dg == NULL)
281 return VMCI_ERROR_NO_MEM; 281 return VMCI_ERROR_NO_MEM;
282 282
283 memcpy(new_dg, dg, dg_size);
284 retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg); 283 retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg);
285 if (retval < VMCI_SUCCESS) { 284 if (retval < VMCI_SUCCESS) {
286 kfree(new_dg); 285 kfree(new_dg);