From 8b063441b7417a79b0c27efc401479748ccf8ad1 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 13 Sep 2019 00:52:27 +0800 Subject: drivers/misc: ti-st: Remove unneeded variable in st_tty_open st_tty_open do not need local variable to store different value, Hence just remove it. Signed-off-by: zhong jiang Link: https://lore.kernel.org/r/1568307147-43468-1-git-send-email-zhongjiang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 7d9e23aa0b92..2ae9948a91e1 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -708,7 +708,6 @@ EXPORT_SYMBOL_GPL(st_unregister); */ static int st_tty_open(struct tty_struct *tty) { - int err = 0; struct st_data_s *st_gdata; pr_info("%s ", __func__); @@ -731,7 +730,8 @@ static int st_tty_open(struct tty_struct *tty) */ st_kim_complete(st_gdata->kim_data); pr_debug("done %s", __func__); - return err; + + return 0; } static void st_tty_close(struct tty_struct *tty) -- cgit From 780ee709bdb49c0d3562890855d7ff7919e64075 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Fri, 4 Oct 2019 21:26:59 +0300 Subject: mei: buf: drop 'running hook' debug messages. Drop 'running hook' debug messages, as this info can be already retrieved via ftrace. Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191004182659.2933-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 32e9b1aed2ca..e52c8d02c234 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -46,8 +46,6 @@ static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO; */ static void number_of_connections(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - if (cldev->me_cl->props.max_number_of_connections > 1) cldev->do_match = 0; } @@ -59,8 +57,6 @@ static void number_of_connections(struct mei_cl_device *cldev) */ static void blacklist(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - cldev->do_match = 0; } @@ -71,8 +67,6 @@ static void blacklist(struct mei_cl_device *cldev) */ static void whitelist(struct mei_cl_device *cldev) { - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - cldev->do_match = 1; } @@ -248,7 +242,6 @@ static void mei_wd(struct mei_cl_device *cldev) { struct pci_dev *pdev = to_pci_dev(cldev->dev.parent); - dev_dbg(&cldev->dev, "running hook %s\n", __func__); if (pdev->device == MEI_DEV_ID_WPT_LP || pdev->device == MEI_DEV_ID_SPT || pdev->device == MEI_DEV_ID_SPT_H) @@ -402,8 +395,6 @@ static void mei_nfc(struct mei_cl_device *cldev) bus = cldev->bus; - dev_dbg(&cldev->dev, "running hook %s\n", __func__); - mutex_lock(&bus->device_lock); /* we need to connect to INFO GUID */ cl = mei_cl_alloc_linked(bus); -- cgit From 3079b54aa9a0f0432b88a52e205cfa3486898330 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 2 Oct 2019 10:48:44 +0200 Subject: eeprom: Warn that the driver is deprecated Deprecating the driver in Kconfig is one thing, but we also need to let the users themselves know. Log a warning each time a device is bound to the deprecated eeprom driver. Signed-off-by: Jean Delvare Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20191002104844.1dc4d8f3@endymion Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/eeprom.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/misc') diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index 2cfe3d4ae144..226b5efa6a77 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -175,6 +175,10 @@ static int eeprom_probe(struct i2c_client *client, } } + /* Let the users know they are using deprecated driver */ + dev_notice(&client->dev, + "eeprom driver is deprecated, please use at24 instead\n"); + /* create the sysfs eeprom file */ return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr); } -- cgit From 3e917975b7cdef6cfe92931e04677d8cf1d3df98 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 8 Oct 2019 03:57:35 +0300 Subject: mei: me: fix me_intr_clear function name in KDoc Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191008005735.12707-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index abe1b1f4362f..47e6d173fa66 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -269,7 +269,7 @@ static inline void me_intr_disable(struct mei_device *dev, u32 hcsr) } /** - * mei_me_intr_clear - clear and stop interrupts + * me_intr_clear - clear and stop interrupts * * @dev: the device structure * @hcsr: supplied hcsr register value -- cgit From ad90ff6964d733ee59d1679f692c23fb1dab7f30 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 9 Oct 2019 22:37:52 +0800 Subject: misc: atmel_tclib: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20191009143752.11236-1-yuehaibing@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/atmel_tclib.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c index 08b5b639d77f..7de7840f613c 100644 --- a/drivers/misc/atmel_tclib.c +++ b/drivers/misc/atmel_tclib.c @@ -109,7 +109,6 @@ static int __init tc_probe(struct platform_device *pdev) struct atmel_tc *tc; struct clk *clk; int irq; - struct resource *r; unsigned int i; if (of_get_child_count(pdev->dev.of_node)) @@ -133,8 +132,7 @@ static int __init tc_probe(struct platform_device *pdev) if (IS_ERR(tc->slow_clk)) return PTR_ERR(tc->slow_clk); - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - tc->regs = devm_ioremap_resource(&pdev->dev, r); + tc->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(tc->regs)) return PTR_ERR(tc->regs); -- cgit From fa6f90f349ac3c79bf085b2b4f5212ea102724d5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 7 Oct 2019 11:30:46 -0700 Subject: sgi-gru: simplify procfs code some more Use seq_puts and simple string output and not seq_printf with formats and individual strings to reduce overall object size. $ size drivers/misc/sgi-gru/gruprocfs.o* (x86-64 defconfig with gru) text data bss dec hex filename 7006 8 0 7014 1b66 drivers/misc/sgi-gru/gruprocfs.o.new 7472 8 0 7480 1d38 drivers/misc/sgi-gru/gruprocfs.o.old Signed-off-by: Joe Perches Acked-by: Dimitri Sivanich Link: https://lore.kernel.org/r/cce61906a5f7f42f5b2b8b947fc61357bcb56e71.camel@perches.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/sgi-gru/gruprocfs.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 3a8d76d1ccae..2817f4751306 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -119,7 +119,7 @@ static int mcs_statistics_show(struct seq_file *s, void *p) "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", "tfh_write_restart", "tgh_invalidate"}; - seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); + seq_puts(s, "#id count aver-clks max-clks\n"); for (op = 0; op < mcsop_last; op++) { count = atomic_long_read(&mcs_op_statistics[op].count); total = atomic_long_read(&mcs_op_statistics[op].total); @@ -165,8 +165,7 @@ static int cch_seq_show(struct seq_file *file, void *data) const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; if (gid == 0) - seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", - "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); + seq_puts(file, "# gid bid ctx# asid pid cbrs dsbytes mode\n"); if (gru) for (i = 0; i < GRU_NUM_CCH; i++) { ts = gru->gs_gts[i]; @@ -191,10 +190,8 @@ static int gru_seq_show(struct seq_file *file, void *data) struct gru_state *gru = GID_TO_GRU(gid); if (gid == 0) { - seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", - "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); - seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", - "busy", "busy", "free", "free", "free"); + seq_puts(file, "# gid nid ctx cbr dsr ctx cbr dsr\n"); + seq_puts(file, "# busy busy busy free free free\n"); } if (gru) { ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; -- cgit From bb4d6e0ee83c91b34585d1ce2aceed1ae2d3729f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 2 Oct 2019 14:56:58 -0700 Subject: lis3lv02d: switch to using input device polling mode Now that instances of input_dev support polling mode natively, we no longer need to create input_polled_dev instance. Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/20191002215658.GA134561@dtor-ws Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 1 - drivers/misc/lis3lv02d/lis3lv02d.c | 80 +++++++++++++++++++++----------------- drivers/misc/lis3lv02d/lis3lv02d.h | 4 +- 3 files changed, 46 insertions(+), 39 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index c55b63750757..2fefecef6e06 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -8,7 +8,6 @@ menu "Misc devices" config SENSORS_LIS3LV02D tristate depends on INPUT - select INPUT_POLLDEV config AD525X_DPOT tristate "Analog Devices Digital Potentiometers" diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index 057d7bbde402..dd65cedf3b12 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -434,23 +434,23 @@ int lis3lv02d_poweron(struct lis3lv02d *lis3) EXPORT_SYMBOL_GPL(lis3lv02d_poweron); -static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev) +static void lis3lv02d_joystick_poll(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); int x, y, z; mutex_lock(&lis3->mutex); lis3lv02d_get_xyz(lis3, &x, &y, &z); - input_report_abs(pidev->input, ABS_X, x); - input_report_abs(pidev->input, ABS_Y, y); - input_report_abs(pidev->input, ABS_Z, z); - input_sync(pidev->input); + input_report_abs(input, ABS_X, x); + input_report_abs(input, ABS_Y, y); + input_report_abs(input, ABS_Z, z); + input_sync(input); mutex_unlock(&lis3->mutex); } -static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) +static int lis3lv02d_joystick_open(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); if (lis3->pm_dev) pm_runtime_get_sync(lis3->pm_dev); @@ -461,12 +461,14 @@ static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) * Update coordinates for the case where poll interval is 0 and * the chip in running purely under interrupt control */ - lis3lv02d_joystick_poll(pidev); + lis3lv02d_joystick_poll(input); + + return 0; } -static void lis3lv02d_joystick_close(struct input_polled_dev *pidev) +static void lis3lv02d_joystick_close(struct input_dev *input) { - struct lis3lv02d *lis3 = pidev->private; + struct lis3lv02d *lis3 = input_get_drvdata(input); atomic_set(&lis3->wake_thread, 0); if (lis3->pm_dev) @@ -497,7 +499,7 @@ out: static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3) { - struct input_dev *dev = lis3->idev->input; + struct input_dev *dev = lis3->idev; u8 click_src; mutex_lock(&lis3->mutex); @@ -677,26 +679,19 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) if (lis3->idev) return -EINVAL; - lis3->idev = input_allocate_polled_device(); - if (!lis3->idev) + input_dev = input_allocate_device(); + if (!input_dev) return -ENOMEM; - lis3->idev->poll = lis3lv02d_joystick_poll; - lis3->idev->open = lis3lv02d_joystick_open; - lis3->idev->close = lis3lv02d_joystick_close; - lis3->idev->poll_interval = MDPS_POLL_INTERVAL; - lis3->idev->poll_interval_min = MDPS_POLL_MIN; - lis3->idev->poll_interval_max = MDPS_POLL_MAX; - lis3->idev->private = lis3; - input_dev = lis3->idev->input; - input_dev->name = "ST LIS3LV02DL Accelerometer"; input_dev->phys = DRIVER_NAME "/input0"; input_dev->id.bustype = BUS_HOST; input_dev->id.vendor = 0; input_dev->dev.parent = &lis3->pdev->dev; - set_bit(EV_ABS, input_dev->evbit); + input_dev->open = lis3lv02d_joystick_open; + input_dev->close = lis3lv02d_joystick_close; + max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY; if (lis3->whoami == WAI_12B) { fuzz = LIS3_DEFAULT_FUZZ_12B; @@ -712,17 +707,32 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat); input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat); + input_set_drvdata(input_dev, lis3); + lis3->idev = input_dev; + + err = input_setup_polling(input_dev, lis3lv02d_joystick_poll); + if (err) + goto err_free_input; + + input_set_poll_interval(input_dev, MDPS_POLL_INTERVAL); + input_set_min_poll_interval(input_dev, MDPS_POLL_MIN); + input_set_max_poll_interval(input_dev, MDPS_POLL_MAX); + lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns); lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns); lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns); - err = input_register_polled_device(lis3->idev); - if (err) { - input_free_polled_device(lis3->idev); - lis3->idev = NULL; - } + err = input_register_device(lis3->idev); + if (err) + goto err_free_input; + return 0; + +err_free_input: + input_free_device(input_dev); + lis3->idev = NULL; return err; + } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); @@ -738,8 +748,7 @@ void lis3lv02d_joystick_disable(struct lis3lv02d *lis3) if (lis3->irq) misc_deregister(&lis3->miscdev); - input_unregister_polled_device(lis3->idev); - input_free_polled_device(lis3->idev); + input_unregister_device(lis3->idev); lis3->idev = NULL; } EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable); @@ -895,10 +904,9 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, (p->click_thresh_y << 4)); if (lis3->idev) { - struct input_dev *input_dev = lis3->idev->input; - input_set_capability(input_dev, EV_KEY, BTN_X); - input_set_capability(input_dev, EV_KEY, BTN_Y); - input_set_capability(input_dev, EV_KEY, BTN_Z); + input_set_capability(lis3->idev, EV_KEY, BTN_X); + input_set_capability(lis3->idev, EV_KEY, BTN_Y); + input_set_capability(lis3->idev, EV_KEY, BTN_Z); } } diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index 1b0c99883c57..c394c0b08519 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -6,7 +6,7 @@ * Copyright (C) 2008-2009 Eric Piel */ #include -#include +#include #include #include @@ -281,7 +281,7 @@ struct lis3lv02d { * (1/1000th of earth gravity) */ - struct input_polled_dev *idev; /* input device */ + struct input_dev *idev; /* input device */ struct platform_device *pdev; /* platform device */ struct regulator_bulk_data regulators[2]; atomic_t count; /* interrupt count after last read */ -- cgit From 9964f8c899fc66539f1fb57b9dc6c401b77b5cdc Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 13 Sep 2019 01:04:01 +0800 Subject: misc: rtsx: Remove unneeded variable in rts5260_card_power_on rts5260_card_power_on do not need local variable to store different value, Hence just remove it. Signed-off-by: zhong jiang Link: https://lore.kernel.org/r/1568307841-44065-1-git-send-email-zhongjiang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rts5260.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 40a6d199f2ea..4214f02a17fd 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -191,7 +191,6 @@ static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) { - int err = 0; struct rtsx_cr_option *option = &pcr->option; if (option->ocp_en) @@ -231,7 +230,7 @@ static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0); - return err; + return 0; } static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) -- cgit From 689e3557a2d1245475bb6ecfccf4ca5ab03e3a74 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 Sep 2019 17:58:56 -0700 Subject: misc: MIC: drop all 'comment' lines from its Kconfig The "comment" Kconfig lines for the Intel MIC drivers are redundant, and nowhere else do we use this kind of Kconfig style, so remove them. Signed-off-by: Randy Dunlap Cc: Sudeep Dutt Cc: Ashutosh Dixit Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Link: https://lore.kernel.org/r/3aa90a0f-1576-d38b-8382-6ed623ed5466@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mic/Kconfig | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 948f45bbf135..b6841ba6d922 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "Intel MIC & related support" -comment "Intel MIC Bus Driver" - config INTEL_MIC_BUS tristate "Intel MIC Bus Driver" depends on 64BIT && PCI && X86 @@ -18,8 +16,6 @@ config INTEL_MIC_BUS OS and tools for MIC to use with this driver are available from . -comment "SCIF Bus Driver" - config SCIF_BUS tristate "SCIF Bus Driver" depends on 64BIT && PCI && X86 @@ -35,8 +31,6 @@ config SCIF_BUS OS and tools for MIC to use with this driver are available from . -comment "VOP Bus Driver" - config VOP_BUS tristate "VOP Bus Driver" help @@ -51,8 +45,6 @@ config VOP_BUS OS and tools for MIC to use with this driver are available from . -comment "Intel MIC Host Driver" - config INTEL_MIC_HOST tristate "Intel MIC Host Driver" depends on 64BIT && PCI && X86 @@ -71,8 +63,6 @@ config INTEL_MIC_HOST OS and tools for MIC to use with this driver are available from . -comment "Intel MIC Card Driver" - config INTEL_MIC_CARD tristate "Intel MIC Card Driver" depends on 64BIT && X86 @@ -90,8 +80,6 @@ config INTEL_MIC_CARD For more information see . -comment "SCIF Driver" - config SCIF tristate "SCIF Driver" depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT @@ -110,8 +98,6 @@ config SCIF OS and tools for MIC to use with this driver are available from . -comment "Intel MIC Coprocessor State Management (COSM) Drivers" - config MIC_COSM tristate "Intel MIC Coprocessor State Management (COSM) Drivers" depends on 64BIT && PCI && X86 && SCIF @@ -128,8 +114,6 @@ config MIC_COSM OS and tools for MIC to use with this driver are available from . -comment "VOP Driver" - config VOP tristate "VOP Driver" depends on VOP_BUS -- cgit From 2419e55e532de14fdf336e09e453aa2831c73a25 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Wed, 9 Oct 2019 15:41:19 +0100 Subject: misc: fastrpc: add mmap/unmap support Support the allocation/deallocation of buffers mapped to the DSP. When the memory mapped to the DSP at process creation is not enough, the fastrpc library can extend it at runtime. This avoids having to do large preallocations by default. Signed-off-by: Jorge Ramirez-Ortiz Reviewed-by: Srinivas Kandagatla Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20191009144123.24583-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) (limited to 'drivers/misc') diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 47ae84afac2e..aa1249bb581a 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -34,6 +34,7 @@ #define FASTRPC_CTXID_MASK (0xFF0) #define INIT_FILELEN_MAX (64 * 1024 * 1024) #define FASTRPC_DEVICE_NAME "fastrpc" +#define ADSP_MMAP_ADD_PAGES 0x1000 /* Retrives number of input buffers from the scalars parameter */ #define REMOTE_SCALARS_INBUFS(sc) (((sc) >> 16) & 0x0ff) @@ -66,6 +67,8 @@ /* Remote Method id table */ #define FASTRPC_RMID_INIT_ATTACH 0 #define FASTRPC_RMID_INIT_RELEASE 1 +#define FASTRPC_RMID_INIT_MMAP 4 +#define FASTRPC_RMID_INIT_MUNMAP 5 #define FASTRPC_RMID_INIT_CREATE 6 #define FASTRPC_RMID_INIT_CREATE_ATTR 7 #define FASTRPC_RMID_INIT_CREATE_STATIC 8 @@ -89,6 +92,23 @@ struct fastrpc_remote_arg { u64 len; }; +struct fastrpc_mmap_rsp_msg { + u64 vaddr; +}; + +struct fastrpc_mmap_req_msg { + s32 pgid; + u32 flags; + u64 vaddr; + s32 num; +}; + +struct fastrpc_munmap_req_msg { + s32 pgid; + u64 vaddr; + u64 size; +}; + struct fastrpc_msg { int pid; /* process group id */ int tid; /* thread id */ @@ -123,6 +143,9 @@ struct fastrpc_buf { /* Lock for dma buf attachments */ struct mutex lock; struct list_head attachments; + /* mmap support */ + struct list_head node; /* list of user requested mmaps */ + uintptr_t raddr; }; struct fastrpc_dma_buf_attachment { @@ -192,6 +215,7 @@ struct fastrpc_user { struct list_head user; struct list_head maps; struct list_head pending; + struct list_head mmaps; struct fastrpc_channel_ctx *cctx; struct fastrpc_session_ctx *sctx; @@ -269,6 +293,7 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, return -ENOMEM; INIT_LIST_HEAD(&buf->attachments); + INIT_LIST_HEAD(&buf->node); mutex_init(&buf->lock); buf->fl = fl; @@ -276,6 +301,7 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev, buf->phys = 0; buf->size = size; buf->dev = dev; + buf->raddr = 0; buf->virt = dma_alloc_coherent(dev, buf->size, (dma_addr_t *)&buf->phys, GFP_KERNEL); @@ -1130,6 +1156,7 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) struct fastrpc_channel_ctx *cctx = fl->cctx; struct fastrpc_invoke_ctx *ctx, *n; struct fastrpc_map *map, *m; + struct fastrpc_buf *buf, *b; unsigned long flags; fastrpc_release_current_dsp_process(fl); @@ -1151,6 +1178,11 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) fastrpc_map_put(map); } + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + list_del(&buf->node); + fastrpc_buf_free(buf); + } + fastrpc_session_free(cctx, fl->sctx); fastrpc_channel_ctx_put(cctx); @@ -1179,6 +1211,7 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp) mutex_init(&fl->mutex); INIT_LIST_HEAD(&fl->pending); INIT_LIST_HEAD(&fl->maps); + INIT_LIST_HEAD(&fl->mmaps); INIT_LIST_HEAD(&fl->user); fl->tgid = current->tgid; fl->cctx = cctx; @@ -1284,6 +1317,148 @@ static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp) return err; } +static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, + struct fastrpc_req_munmap *req) +{ + struct fastrpc_invoke_args args[1] = { [0] = { 0 } }; + struct fastrpc_buf *buf, *b; + struct fastrpc_munmap_req_msg req_msg; + struct device *dev = fl->sctx->dev; + int err; + u32 sc; + + spin_lock(&fl->lock); + list_for_each_entry_safe(buf, b, &fl->mmaps, node) { + if ((buf->raddr == req->vaddrout) && (buf->size == req->size)) + break; + buf = NULL; + } + spin_unlock(&fl->lock); + + if (!buf) { + dev_err(dev, "mmap not in list\n"); + return -EINVAL; + } + + req_msg.pgid = fl->tgid; + req_msg.size = buf->size; + req_msg.vaddr = buf->raddr; + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MUNMAP, 1, 0); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, + &args[0]); + if (!err) { + dev_dbg(dev, "unmmap\tpt 0x%09lx OK\n", buf->raddr); + spin_lock(&fl->lock); + list_del(&buf->node); + spin_unlock(&fl->lock); + fastrpc_buf_free(buf); + } else { + dev_err(dev, "unmmap\tpt 0x%09lx ERROR\n", buf->raddr); + } + + return err; +} + +static int fastrpc_req_munmap(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_req_munmap req; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + return fastrpc_req_munmap_impl(fl, &req); +} + +static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) +{ + struct fastrpc_invoke_args args[3] = { [0 ... 2] = { 0 } }; + struct fastrpc_buf *buf = NULL; + struct fastrpc_mmap_req_msg req_msg; + struct fastrpc_mmap_rsp_msg rsp_msg; + struct fastrpc_req_munmap req_unmap; + struct fastrpc_phy_page pages; + struct fastrpc_req_mmap req; + struct device *dev = fl->sctx->dev; + int err; + u32 sc; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + if (req.flags != ADSP_MMAP_ADD_PAGES) { + dev_err(dev, "flag not supported 0x%x\n", req.flags); + return -EINVAL; + } + + if (req.vaddrin) { + dev_err(dev, "adding user allocated pages is not supported\n"); + return -EINVAL; + } + + err = fastrpc_buf_alloc(fl, fl->sctx->dev, req.size, &buf); + if (err) { + dev_err(dev, "failed to allocate buffer\n"); + return err; + } + + req_msg.pgid = fl->tgid; + req_msg.flags = req.flags; + req_msg.vaddr = req.vaddrin; + req_msg.num = sizeof(pages); + + args[0].ptr = (u64) (uintptr_t) &req_msg; + args[0].length = sizeof(req_msg); + + pages.addr = buf->phys; + pages.size = buf->size; + + args[1].ptr = (u64) (uintptr_t) &pages; + args[1].length = sizeof(pages); + + args[2].ptr = (u64) (uintptr_t) &rsp_msg; + args[2].length = sizeof(rsp_msg); + + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MMAP, 2, 1); + err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, + &args[0]); + if (err) { + dev_err(dev, "mmap error (len 0x%08llx)\n", buf->size); + goto err_invoke; + } + + /* update the buffer to be able to deallocate the memory on the DSP */ + buf->raddr = (uintptr_t) rsp_msg.vaddr; + + /* let the client know the address to use */ + req.vaddrout = rsp_msg.vaddr; + + spin_lock(&fl->lock); + list_add_tail(&buf->node, &fl->mmaps); + spin_unlock(&fl->lock); + + if (copy_to_user((void __user *)argp, &req, sizeof(req))) { + /* unmap the memory and release the buffer */ + req_unmap.vaddrout = buf->raddr; + req_unmap.size = buf->size; + fastrpc_req_munmap_impl(fl, &req_unmap); + return -EFAULT; + } + + dev_dbg(dev, "mmap\t\tpt 0x%09lx OK [len 0x%08llx]\n", + buf->raddr, buf->size); + + return 0; + +err_invoke: + fastrpc_buf_free(buf); + + return err; +} + static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -1304,6 +1479,12 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, case FASTRPC_IOCTL_ALLOC_DMA_BUFF: err = fastrpc_dmabuf_alloc(fl, argp); break; + case FASTRPC_IOCTL_MMAP: + err = fastrpc_req_mmap(fl, argp); + break; + case FASTRPC_IOCTL_MUNMAP: + err = fastrpc_req_munmap(fl, argp); + break; default: err = -ENOTTY; break; -- cgit From 2d10d2d170723e9278282458a6704552dcb77eac Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 9 Oct 2019 15:41:20 +0100 Subject: misc: fastrpc: fix memory leak from miscdev->name Fix a memory leak in miscdev->name by using devm_variant Orignally reported by kmemleak: [] kmemleak_alloc+0x50/0x84 [] __kmalloc_track_caller+0xe8/0x168 [] kvasprintf+0x78/0x100 [] kasprintf+0x50/0x74 [] fastrpc_rpmsg_probe+0xd8/0x20c [] rpmsg_dev_probe+0xa8/0x148 [] really_probe+0x208/0x248 [] driver_probe_device+0x98/0xc0 [] __device_attach_driver+0x9c/0xac [] bus_for_each_drv+0x60/0x8c [] __device_attach+0x8c/0x100 [] device_initial_probe+0x20/0x28 [] bus_probe_device+0x34/0x7c [] device_add+0x420/0x498 [] device_register+0x24/0x2c Signed-off-by: Srinivas Kandagatla Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20191009144123.24583-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index aa1249bb581a..c0fe3419c685 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1610,8 +1610,8 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev) return -ENOMEM; data->miscdev.minor = MISC_DYNAMIC_MINOR; - data->miscdev.name = kasprintf(GFP_KERNEL, "fastrpc-%s", - domains[domain_id]); + data->miscdev.name = devm_kasprintf(rdev, GFP_KERNEL, "fastrpc-%s", + domains[domain_id]); data->miscdev.fops = &fastrpc_fops; err = misc_register(&data->miscdev); if (err) -- cgit From 55bcda35584c995d2544902e77c2ad5bee6e729b Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Wed, 9 Oct 2019 15:41:21 +0100 Subject: misc: fastrpc: do not interrupt kernel calls the DSP firmware requires some calls to be held until processing has completed: this is to guarantee that memory continues to be accessible. Nevertheless, the fastrpc driver chooses not support the case were requests need to be held for unbounded amounts of time. If such a use-case becomes necessary, this timeout will need to be revisited. Signed-off-by: Jorge Ramirez-Ortiz Reviewed-by: Srinivas Kandagatla Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20191009144123.24583-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index c0fe3419c685..666c431380ce 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -959,8 +959,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (err) goto bail; - /* Wait for remote dsp to respond or time out */ - err = wait_for_completion_interruptible(&ctx->work); + if (kernel) { + if (!wait_for_completion_timeout(&ctx->work, 10 * HZ)) + err = -ETIMEDOUT; + } else { + err = wait_for_completion_interruptible(&ctx->work); + } + if (err) goto bail; -- cgit From 387f625585d1a59e5dc7fbd6bd4002360cad78b0 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Wed, 9 Oct 2019 15:41:22 +0100 Subject: misc: fastrpc: handle interrupted contexts Buffers owned by a context that has been interrupted either by a signal or a timeout might still be being accessed by the DSP. delegate returning the associated memory to a later time when the device is released. Signed-off-by: Jorge Ramirez-Ortiz Reviewed-by: Srinivas Kandagatla Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20191009144123.24583-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 666c431380ce..eef2cdc00672 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -984,12 +984,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, } bail: - /* We are done with this compute context, remove it from pending list */ - spin_lock(&fl->lock); - list_del(&ctx->node); - spin_unlock(&fl->lock); - fastrpc_context_put(ctx); - + if (err != -ERESTARTSYS && err != -ETIMEDOUT) { + /* We are done with this compute context */ + spin_lock(&fl->lock); + list_del(&ctx->node); + spin_unlock(&fl->lock); + fastrpc_context_put(ctx); + } if (err) dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err); -- cgit From efcd2390f56ba979e0d1022c46f48d7beec0bdd6 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Wed, 9 Oct 2019 15:41:23 +0100 Subject: misc: fastrpc: revert max init file size back to 2MB With the integration of the mmap/unmap functionality, it is no longer necessary to allow large memory allocations upfront since they can be handled during runtime. Tested on QCS404 with CDSP Neural Processing test suite. Signed-off-by: Jorge Ramirez-Ortiz Reviewed-by: Srinivas Kandagatla Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20191009144123.24583-6-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index eef2cdc00672..b6420aae45b9 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -32,7 +32,7 @@ #define FASTRPC_CTX_MAX (256) #define FASTRPC_INIT_HANDLE 1 #define FASTRPC_CTXID_MASK (0xFF0) -#define INIT_FILELEN_MAX (64 * 1024 * 1024) +#define INIT_FILELEN_MAX (2 * 1024 * 1024) #define FASTRPC_DEVICE_NAME "fastrpc" #define ADSP_MMAP_ADD_PAGES 0x1000 -- cgit From 7ed42113ce0379197c44232429e309bcc72424b0 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Fri, 20 Sep 2019 21:48:30 +0530 Subject: ocxl: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files for Open Coherent Accelerator (OCXL) compatible device drivers. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Acked-by: Andrew Donnellan Link: https://lore.kernel.org/r/20190920161826.GA6894@nishad Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ocxl/ocxl_internal.h | 2 +- drivers/misc/ocxl/trace.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h index 97415afd79f3..345bf843a38e 100644 --- a/drivers/misc/ocxl/ocxl_internal.h +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright 2017 IBM Corp. #ifndef _OCXL_INTERNAL_H_ #define _OCXL_INTERNAL_H_ diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h index 024f417e7e01..17e21cb2addd 100644 --- a/drivers/misc/ocxl/trace.h +++ b/drivers/misc/ocxl/trace.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright 2017 IBM Corp. #undef TRACE_SYSTEM #define TRACE_SYSTEM ocxl -- cgit From 40688bd58df71269d146996e10e84ca3888fd330 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Fri, 20 Sep 2019 21:29:34 +0530 Subject: misc: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in header files for Miscellaneous device drivers. For C header files Documentation/process/license-rules.rst mandates C-like comments (opposed to C source files where C++ style should be used) Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Link: https://lore.kernel.org/r/20190920155931.GA6251@nishad Signed-off-by: Greg Kroah-Hartman --- drivers/misc/hpilo.h | 2 +- drivers/misc/ibmvmc.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h index 94dfb9e40e29..1aa433a7f66c 100644 --- a/drivers/misc/hpilo.h +++ b/drivers/misc/hpilo.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/drivers/char/hpilo.h * diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h index e140ada8fe2c..0e1756fffeae 100644 --- a/drivers/misc/ibmvmc.h +++ b/drivers/misc/ibmvmc.h @@ -1,5 +1,5 @@ -/* SPDX-License-Identifier: GPL-2.0+ - * +/* SPDX-License-Identifier: GPL-2.0+ */ +/* * linux/drivers/misc/ibmvmc.h * * IBM Power Systems Virtual Management Channel Support. -- cgit From 7a2b9e6ec84588b0be65cc0ae45a65bac431496b Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 5 Nov 2019 17:05:13 +0200 Subject: mei: bus: prefix device names on bus with the bus name Add parent device name to the name of devices on bus to avoid device names collisions for same client UUID available from different MEI heads. Namely this prevents sysfs collision under /sys/bus/mei/device/ In the device part leave just UUID other parameters that are required for device matching are not required here and are just bloating the name. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191105150514.14010-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 985bd4fd3328..53bb394ccba6 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -873,15 +873,16 @@ static const struct device_type mei_cl_device_type = { /** * mei_cl_bus_set_name - set device name for me client device + * - + * Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb * * @cldev: me client device */ static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev) { - dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X", - cldev->name, - mei_me_cl_uuid(cldev->me_cl), - mei_me_cl_ver(cldev->me_cl)); + dev_set_name(&cldev->dev, "%s-%pUl", + dev_name(cldev->bus->dev), + mei_me_cl_uuid(cldev->me_cl)); } /** -- cgit From 82b29b9f72afdccb40ea5f3c13c6a3cb65a597bc Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 5 Nov 2019 17:05:14 +0200 Subject: mei: me: add comet point V device id Comet Point (Comet Lake) V device id. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191105150514.14010-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 1 + drivers/misc/mei/pci-me.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index c09f8bb49495..b359f06f05e7 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -81,6 +81,7 @@ #define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ #define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ +#define MEI_DEV_ID_CMP_V 0xA3BA /* Comet Point Lake V */ #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 3dca63eddaa0..ce43415a536c 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -98,6 +98,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, -- cgit From 907b471ca228a5fc95f7ee8b3d189e64ade7ce9b Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 7 Nov 2019 00:38:39 +0200 Subject: mei: me: mei_me_dev_init() use struct device instead of struct pci_dev. It's enough to bind mei_device with associated 'struct device' instead of actual 'struct pci_dev'. This is to allow working with mei devices embedded within another pci device, usually via MFD framework, where mei device is represented as a platform device. Bump copyright year to 2019 on effected files. Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191106223841.15802-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 10 +++++----- drivers/misc/mei/hw-me.h | 4 ++-- drivers/misc/mei/pci-me.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 5ef30c7c92b3..7d241c70e3e0 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2003-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2003-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -1461,19 +1461,19 @@ const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) /** * mei_me_dev_init - allocates and initializes the mei device structure * - * @pdev: The pci device structure + * @parent: device associated with physical device (pci/platform) * @cfg: per device generation config * * Return: The mei_device pointer on success, NULL on failure. */ -struct mei_device *mei_me_dev_init(struct pci_dev *pdev, +struct mei_device *mei_me_dev_init(struct device *parent, const struct mei_cfg *cfg) { struct mei_device *dev; struct mei_me_hw *hw; int i; - dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) + + dev = devm_kzalloc(parent, sizeof(struct mei_device) + sizeof(struct mei_me_hw), GFP_KERNEL); if (!dev) return NULL; @@ -1483,7 +1483,7 @@ struct mei_device *mei_me_dev_init(struct pci_dev *pdev, for (i = 0; i < DMA_DSCR_NUM; i++) dev->dr_dscr[i].size = cfg->dma_size[i]; - mei_device_init(dev, &pdev->dev, &mei_me_hw_ops); + mei_device_init(dev, parent, &mei_me_hw_ops); hw->cfg = cfg; dev->fw_f_fw_ver_supported = cfg->fw_ver_supported; diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 1d8794828cbc..b39347faadf5 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2012-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -91,7 +91,7 @@ enum mei_cfg_idx { const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx); -struct mei_device *mei_me_dev_init(struct pci_dev *pdev, +struct mei_device *mei_me_dev_init(struct device *parent, const struct mei_cfg *cfg); int mei_me_pg_enter_sync(struct mei_device *dev); diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index ce43415a536c..e382ecca96d7 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -192,7 +192,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* allocates and initializes the mei dev structure */ - dev = mei_me_dev_init(pdev, cfg); + dev = mei_me_dev_init(&pdev->dev, cfg); if (!dev) { err = -ENOMEM; goto end; -- cgit From 261b3e1f2a01c72b1882cf5bccfbd4bf40ea62e8 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 7 Nov 2019 00:38:40 +0200 Subject: mei: me: store irq number in the hw struct. Store irq number in hw struct to by used by synchronize_irq(). This is to allow working with mei devices embedded within another pci devices, via MFD framework, where mei device is represented as a platform device. Bump the copyright year to 2019 on hw-me.c and hw-me.h Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191106223841.15802-3-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 4 ++-- drivers/misc/mei/hw-me.h | 2 ++ drivers/misc/mei/pci-me.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 7d241c70e3e0..23606d0ddcd6 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -323,9 +323,9 @@ static void mei_me_intr_disable(struct mei_device *dev) */ static void mei_me_synchronize_irq(struct mei_device *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); + struct mei_me_hw *hw = to_me_hw(dev); - synchronize_irq(pdev->irq); + synchronize_irq(hw->irq); } /** diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index b39347faadf5..c45b32a7cc46 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -42,6 +42,7 @@ struct mei_cfg { * * @cfg: per device generation config and ops * @mem_addr: io memory address + * @irq: irq number * @pg_state: power gating state * @d0i3_supported: di03 support * @hbuf_depth: depth of hardware host/write buffer in slots @@ -49,6 +50,7 @@ struct mei_cfg { struct mei_me_hw { const struct mei_cfg *cfg; void __iomem *mem_addr; + int irq; enum mei_pg_state pg_state; bool d0i3_supported; u8 hbuf_depth; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index e382ecca96d7..6233b3ca1c1d 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -199,6 +199,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } hw = to_me_hw(dev); hw->mem_addr = pcim_iomap_table(pdev)[0]; + hw->irq = pdev->irq; pci_enable_msi(pdev); -- cgit From 261e071acd9bcbcfbc30652640385615ced27f4f Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 7 Nov 2019 00:38:41 +0200 Subject: mei: abstract fw status register read. This is to allow working with mei devices embedded within another pci device, where mei device is represented as a platform child device and fw status registers are not necessarily resident in the device pci config space. Bump the copyright year to 2019 on the modified files. Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191106223841.15802-4-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me.c | 24 ++++++++++++++++-------- drivers/misc/mei/hw-me.h | 2 ++ drivers/misc/mei/hw-txe.c | 10 +++++++--- drivers/misc/mei/init.c | 6 ++++-- drivers/misc/mei/mei_dev.h | 8 ++++---- drivers/misc/mei/pci-me.c | 8 ++++++++ 6 files changed, 41 insertions(+), 17 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 23606d0ddcd6..0ec55431e26b 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -183,20 +183,19 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) static int mei_me_fw_status(struct mei_device *dev, struct mei_fw_status *fw_status) { - struct pci_dev *pdev = to_pci_dev(dev->dev); struct mei_me_hw *hw = to_me_hw(dev); const struct mei_fw_status *fw_src = &hw->cfg->fw_status; int ret; int i; - if (!fw_status) + if (!fw_status || !hw->read_fws) return -EINVAL; fw_status->count = fw_src->count; for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) { - ret = pci_read_config_dword(pdev, fw_src->status[i], - &fw_status->status[i]); - trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X", + ret = hw->read_fws(dev, fw_src->status[i], + &fw_status->status[i]); + trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_X", fw_src->status[i], fw_status->status[i]); if (ret) @@ -210,19 +209,26 @@ static int mei_me_fw_status(struct mei_device *dev, * mei_me_hw_config - configure hw dependent settings * * @dev: mei device + * + * Return: + * * -EINVAL when read_fws is not set + * * 0 on success + * */ -static void mei_me_hw_config(struct mei_device *dev) +static int mei_me_hw_config(struct mei_device *dev) { - struct pci_dev *pdev = to_pci_dev(dev->dev); struct mei_me_hw *hw = to_me_hw(dev); u32 hcsr, reg; + if (WARN_ON(!hw->read_fws)) + return -EINVAL; + /* Doesn't change in runtime */ hcsr = mei_hcsr_read(dev); hw->hbuf_depth = (hcsr & H_CBD) >> 24; reg = 0; - pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + hw->read_fws(dev, PCI_CFG_HFS_1, ®); trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg); hw->d0i3_supported = ((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK); @@ -233,6 +239,8 @@ static void mei_me_hw_config(struct mei_device *dev) if (reg & H_D0I3C_I3) hw->pg_state = MEI_PG_ON; } + + return 0; } /** diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index c45b32a7cc46..3352d19b8e85 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -46,6 +46,7 @@ struct mei_cfg { * @pg_state: power gating state * @d0i3_supported: di03 support * @hbuf_depth: depth of hardware host/write buffer in slots + * @read_fws: read FW status register handler */ struct mei_me_hw { const struct mei_cfg *cfg; @@ -54,6 +55,7 @@ struct mei_me_hw { enum mei_pg_state pg_state; bool d0i3_supported; u8 hbuf_depth; + int (*read_fws)(const struct mei_device *dev, int where, u32 *val); }; #define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw) diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index 5e58656b8e19..785b260b3ae9 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2013-2014, Intel Corporation. All rights reserved. + * Copyright (c) 2013-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -660,14 +660,16 @@ static int mei_txe_fw_status(struct mei_device *dev, } /** - * mei_txe_hw_config - configure hardware at the start of the devices + * mei_txe_hw_config - configure hardware at the start of the devices * * @dev: the device structure * * Configure hardware at the start of the device should be done only * once at the device probe time + * + * Return: always 0 */ -static void mei_txe_hw_config(struct mei_device *dev) +static int mei_txe_hw_config(struct mei_device *dev) { struct mei_txe_hw *hw = to_txe_hw(dev); @@ -677,6 +679,8 @@ static void mei_txe_hw_config(struct mei_device *dev) dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n", hw->aliveness, hw->readiness); + + return 0; } /** diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index b9fef773e71b..bcee77768b91 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2012-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2012-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -190,7 +190,9 @@ int mei_start(struct mei_device *dev) /* acknowledge interrupt and stop interrupts */ mei_clear_interrupts(dev); - mei_hw_config(dev); + ret = mei_hw_config(dev); + if (ret) + goto err; dev_dbg(dev->dev, "reset in start the mei device.\n"); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 0f2141178299..e0ac660c96e7 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2003-2018, Intel Corporation. All rights reserved. + * Copyright (c) 2003-2019, Intel Corporation. All rights reserved. * Intel Management Engine Interface (Intel MEI) Linux driver */ @@ -287,7 +287,7 @@ struct mei_hw_ops { bool (*hw_is_ready)(struct mei_device *dev); int (*hw_reset)(struct mei_device *dev, bool enable); int (*hw_start)(struct mei_device *dev); - void (*hw_config)(struct mei_device *dev); + int (*hw_config)(struct mei_device *dev); int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); enum mei_pg_state (*pg_state)(struct mei_device *dev); @@ -614,9 +614,9 @@ void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list); */ -static inline void mei_hw_config(struct mei_device *dev) +static inline int mei_hw_config(struct mei_device *dev) { - dev->ops->hw_config(dev); + return dev->ops->hw_config(dev); } static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 6233b3ca1c1d..1de6daf38602 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -121,6 +121,13 @@ static inline void mei_me_set_pm_domain(struct mei_device *dev) {} static inline void mei_me_unset_pm_domain(struct mei_device *dev) {} #endif /* CONFIG_PM */ +static int mei_me_read_fws(const struct mei_device *dev, int where, u32 *val) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + return pci_read_config_dword(pdev, where, val); +} + /** * mei_me_quirk_probe - probe for devices that doesn't valid ME interface * @@ -200,6 +207,7 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw = to_me_hw(dev); hw->mem_addr = pcim_iomap_table(pdev)[0]; hw->irq = pdev->irq; + hw->read_fws = mei_me_read_fws; pci_enable_msi(pdev); -- cgit From 52f6efdf80924449023c559c3134258c2c6da43b Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 7 Nov 2019 12:44:45 +0200 Subject: mei: add trc detection register to sysfs The glitch detection HW (TRC) save it status information into TRC status register. Make it available to user-space via read-only sysfs file. The TRC register is availab for PCH15 gen and newer, for older platforms reading the sysfs file will fail with EOPNOTSUPP. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191107104445.19101-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 3 ++- drivers/misc/mei/hw-me.c | 34 ++++++++++++++++++++++++++++++++++ drivers/misc/mei/hw-me.h | 4 ++++ drivers/misc/mei/main.c | 24 ++++++++++++++++++++++++ drivers/misc/mei/mei_dev.h | 10 ++++++++++ drivers/misc/mei/pci-me.c | 4 ++-- 6 files changed, 76 insertions(+), 3 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index b359f06f05e7..7cd67fb2365d 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -163,7 +163,8 @@ access to ME_CBD */ #define ME_IS_HRA 0x00000002 /* ME Interrupt Enable HRA - host read only access to ME_IE */ #define ME_IE_HRA 0x00000001 - +/* TRC control shadow register */ +#define ME_TRC 0x00000030 /* H_HPG_CSR register bits */ #define H_HPG_CSR_PGIHEXR 0x00000001 diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 0ec55431e26b..668418d7ea77 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -172,6 +172,27 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg); } +/** + * mei_me_trc_status - read trc status register + * + * @dev: mei device + * @trc: trc status register value + * + * Return: 0 on success, error otherwise + */ +static int mei_me_trc_status(struct mei_device *dev, u32 *trc) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (!hw->cfg->hw_trc_supported) + return -EOPNOTSUPP; + + *trc = mei_me_reg_read(hw, ME_TRC); + trace_mei_reg_read(dev->dev, "ME_TRC", ME_TRC, *trc); + + return 0; +} + /** * mei_me_fw_status - read fw status register from pci config space * @@ -1302,6 +1323,7 @@ end: static const struct mei_hw_ops mei_me_hw_ops = { + .trc_status = mei_me_trc_status, .fw_status = mei_me_fw_status, .pg_state = mei_me_pg_state, @@ -1392,6 +1414,9 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev) .dma_size[DMA_DSCR_DEVICE] = SZ_128K, \ .dma_size[DMA_DSCR_CTRL] = PAGE_SIZE +#define MEI_CFG_TRC \ + .hw_trc_supported = 1 + /* ICH Legacy devices */ static const struct mei_cfg mei_me_ich_cfg = { MEI_CFG_ICH_HFS, @@ -1440,6 +1465,14 @@ static const struct mei_cfg mei_me_pch12_cfg = { MEI_CFG_DMA_128, }; +/* Tiger Lake and newer devices */ +static const struct mei_cfg mei_me_pch15_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_DMA_128, + MEI_CFG_TRC, +}; + /* * mei_cfg_list - A list of platform platform specific configurations. * Note: has to be synchronized with enum mei_cfg_idx. @@ -1454,6 +1487,7 @@ static const struct mei_cfg *const mei_cfg_list[] = { [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg, + [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg, }; const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h index 3352d19b8e85..4a8d4dcd5a91 100644 --- a/drivers/misc/mei/hw-me.h +++ b/drivers/misc/mei/hw-me.h @@ -21,12 +21,14 @@ * @quirk_probe: device exclusion quirk * @dma_size: device DMA buffers size * @fw_ver_supported: is fw version retrievable from FW + * @hw_trc_supported: does the hw support trc register */ struct mei_cfg { const struct mei_fw_status fw_status; bool (*quirk_probe)(struct pci_dev *pdev); size_t dma_size[DMA_DSCR_NUM]; u32 fw_ver_supported:1; + u32 hw_trc_supported:1; }; @@ -78,6 +80,7 @@ struct mei_me_hw { * servers platforms with quirk for * SPS firmware exclusion. * @MEI_ME_PCH12_CFG: Platform Controller Hub Gen12 and newer + * @MEI_ME_PCH15_CFG: Platform Controller Hub Gen15 and newer * @MEI_ME_NUM_CFG: Upper Sentinel. */ enum mei_cfg_idx { @@ -90,6 +93,7 @@ enum mei_cfg_idx { MEI_ME_PCH8_CFG, MEI_ME_PCH8_SPS_CFG, MEI_ME_PCH12_CFG, + MEI_ME_PCH15_CFG, MEI_ME_NUM_CFG, }; diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 7310b476323c..4ef6e37caafc 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -700,6 +700,29 @@ static int mei_fasync(int fd, struct file *file, int band) return fasync_helper(fd, file, band, &cl->ev_async); } +/** + * trc_show - mei device trc attribute show method + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t trc_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + u32 trc; + int ret; + + ret = mei_trc_status(dev, &trc); + if (ret) + return ret; + return sprintf(buf, "%08X\n", trc); +} +static DEVICE_ATTR_RO(trc); + /** * fw_status_show - mei device fw_status attribute show method * @@ -887,6 +910,7 @@ static struct attribute *mei_attrs[] = { &dev_attr_tx_queue_limit.attr, &dev_attr_fw_ver.attr, &dev_attr_dev_state.attr, + &dev_attr_trc.attr, NULL }; ATTRIBUTE_GROUPS(mei); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index e0ac660c96e7..76f8ff5ff974 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -260,6 +260,7 @@ struct mei_cl { * @hw_config : configure hw * * @fw_status : get fw status registers + * @trc_status : get trc status register * @pg_state : power gating state of the device * @pg_in_transition : is device now in pg transition * @pg_is_enabled : is power gating enabled @@ -290,6 +291,8 @@ struct mei_hw_ops { int (*hw_config)(struct mei_device *dev); int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); + int (*trc_status)(struct mei_device *dev, u32 *trc); + enum mei_pg_state (*pg_state)(struct mei_device *dev); bool (*pg_in_transition)(struct mei_device *dev); bool (*pg_is_enabled)(struct mei_device *dev); @@ -711,6 +714,13 @@ static inline int mei_count_full_read_slots(struct mei_device *dev) return dev->ops->rdbuf_full_slots(dev); } +static inline int mei_trc_status(struct mei_device *dev, u32 *trc) +{ + if (dev->ops->trc_status) + return dev->ops->trc_status(dev, trc); + return -EOPNOTSUPP; +} + static inline int mei_fw_status(struct mei_device *dev, struct mei_fw_status *fw_status) { diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 1de6daf38602..c845b7e40f26 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -102,9 +102,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, - {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, /* required last entry */ -- cgit From c0e5f4e73a7148e18b763067d181661987cb4c09 Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Mon, 21 Oct 2019 16:05:05 +0800 Subject: misc: rtsx: Add support for RTS5261 Add support for new chip rts5261. In order to support rts5261, the definitions of some internal registers and workflow have to be modified and are different from its predecessors. So we need this patch to ensure RTS5261 can work. Signed-off-by: Rui Feng Link: https://lore.kernel.org/r/1571645105-5028-1-git-send-email-rui_feng@realsil.com.cn Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/Makefile | 2 +- drivers/misc/cardreader/rts5261.c | 792 +++++++++++++++++++++++++++++++++++++ drivers/misc/cardreader/rts5261.h | 233 +++++++++++ drivers/misc/cardreader/rtsx_pcr.c | 43 +- drivers/misc/cardreader/rtsx_pcr.h | 1 + 5 files changed, 1063 insertions(+), 8 deletions(-) create mode 100644 drivers/misc/cardreader/rts5261.c create mode 100644 drivers/misc/cardreader/rts5261.h (limited to 'drivers/misc') diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile index d9bff5a2217e..1f56267ed2f4 100644 --- a/drivers/misc/cardreader/Makefile +++ b/drivers/misc/cardreader/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MISC_ALCOR_PCI) += alcor_pci.o obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o -rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o rts5261.o obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c new file mode 100644 index 000000000000..32dcec2e9dfd --- /dev/null +++ b/drivers/misc/cardreader/rts5261.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved. + * + * Author: + * Rui FENG + * Wei WANG + */ + +#include +#include +#include + +#include "rts5261.h" +#include "rtsx_pcr.h" + +static u8 rts5261_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & IC_VERSION_MASK; +} + +static void rts5261_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[4][3] = { + {0x13, 0x13, 0x13}, + {0x96, 0x96, 0x96}, + {0x7F, 0x7F, 0x7F}, + {0x96, 0x96, 0x96}, + }; + u8 driving_1v8[4][3] = { + {0x99, 0x99, 0x99}, + {0x3A, 0x3A, 0x3A}, + {0xE6, 0xE6, 0xE6}, + {0xB3, 0xB3, 0xB3}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_write_register(pcr, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + + rtsx_pci_write_register(pcr, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + + rtsx_pci_write_register(pcr, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + /* 0x814~0x817 */ + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + + if (!rts5261_vendor_setting_valid(reg)) { + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rts5261_reg_to_card_drive_sel(reg); + + if (rts5261_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; + + /* 0x724~0x727 */ + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + pcr->aspm_en = rts5261_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(reg); + pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg); +} + +static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, + RELINK_TIME_MASK, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL, + SSC_POWER_DOWN, SSC_POWER_DOWN); +} + +static int rts5261_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_EN); +} + +static int rts5261_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_DISABLE); +} + +static int rts5261_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, + 0x02, 0x02); +} + +static int rts5261_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, + 0x02, 0x00); +} + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5261_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5261_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +static int rts5261_sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK + | SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF, + CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + + return 0; +} + +static int rts5261_card_power_on(struct rtsx_pcr *pcr, int card) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) + rtsx_pci_enable_ocp(pcr); + + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG1, + RTS5261_LDO1_TUNE_MASK, RTS5261_LDO1_33); + rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO1_POWERON, RTS5261_LDO1_POWERON); + + rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO3318_POWERON, RTS5261_LDO3318_POWERON); + + msleep(20); + + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN); + + /* Initialize SD_CFG1 register */ + rtsx_pci_write_register(pcr, SD_CFG1, 0xFF, + SD_CLK_DIVIDE_128 | SD_20_MODE | SD_BUS_WIDTH_1BIT); + + rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL, + 0xFF, SD20_RX_POS_EDGE); + rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0); + rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR, + SD_STOP | SD_CLR_ERR); + + /* Reset SD_CFG3 register */ + rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0); + rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG, + SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 | + SD30_CLK_STOP_CFG0, 0); + + if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 || + pcr->extra_caps & EXTRA_CAPS_SD_SDR104) + rts5261_sd_set_sample_push_timing_sd30(pcr); + + return 0; +} + +static int rts5261_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + u16 val = 0; + + rtsx_pci_write_register(pcr, RTS5261_CARD_PWR_CTL, + RTS5261_PUPDC, RTS5261_PUPDC); + + switch (voltage) { + case OUTPUT_3V3: + rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val); + val |= PHY_TUNE_SDBUS_33; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val); + if (err < 0) + return err; + + rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG, + RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_33); + rtsx_pci_write_register(pcr, SD_PAD_CTL, + SD_IO_USING_1V8, 0); + break; + case OUTPUT_1V8: + rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val); + val &= ~PHY_TUNE_SDBUS_33; + err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val); + if (err < 0) + return err; + + rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG, + RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_18); + rtsx_pci_write_register(pcr, SD_PAD_CTL, + SD_IO_USING_1V8, SD_IO_USING_1V8); + break; + default: + return -EINVAL; + } + + /* set pad drive */ + rts5261_fill_driving(pcr, voltage); + + return 0; +} + +static void rts5261_stop_cmd(struct rtsx_pcr *pcr) +{ + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0, + RTS5260_DMA_RST | RTS5260_ADMA3_RST, + RTS5260_DMA_RST | RTS5260_ADMA3_RST); + rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH); +} + +static void rts5261_card_before_power_off(struct rtsx_pcr *pcr) +{ + rts5261_stop_cmd(pcr); + rts5261_switch_output_voltage(pcr, OUTPUT_3V3); + +} + +static void rts5261_enable_ocp(struct rtsx_pcr *pcr) +{ + u8 val = 0; + + val = SD_OCP_INT_EN | SD_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val); + +} + +static void rts5261_disable_ocp(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + + mask = SD_OCP_INT_EN | SD_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0); + +} + +static int rts5261_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err = 0; + + rts5261_card_before_power_off(pcr); + err = rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL, + RTS5261_LDO_POWERON_MASK, 0); + + if (pcr->option.ocp_en) + rtsx_pci_disable_ocp(pcr); + + return err; +} + +static void rts5261_init_ocp(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) { + u8 mask, val; + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN); + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_THD_MASK, option->sd_800mA_ocp_thd); + + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_LMT_THD_MASK, + RTS5261_LDO1_LMT_THD_2000); + + mask = SD_OCP_GLITCH_MASK; + val = pcr->hw_param.ocp_glitch; + rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val); + + rts5261_enable_ocp(pcr); + } else { + rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0, + RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0); + } +} + +static void rts5261_clear_ocpstat(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + u8 val = 0; + + mask = SD_OCP_INT_CLR | SD_OC_CLR; + val = SD_OCP_INT_CLR | SD_OC_CLR; + + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val); + + udelay(10); + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + +} + +static void rts5261_process_ocp(struct rtsx_pcr *pcr) +{ + if (!pcr->option.ocp_en) + return; + + rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat); + + if (pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) { + rts5261_card_power_off(pcr, RTSX_SD_CARD); + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0); + rts5261_clear_ocpstat(pcr); + pcr->ocp_stat = 0; + } + +} + +static int rts5261_init_from_hw(struct rtsx_pcr *pcr) +{ + int retval; + u32 lval, i; + u8 valid, efuse_valid, tmp; + + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POR | REG_EFUSE_POWER_MASK, + REG_EFUSE_POR | REG_EFUSE_POWERON); + udelay(1); + rtsx_pci_write_register(pcr, RTS5261_EFUSE_ADDR, + RTS5261_EFUSE_ADDR_MASK, 0x00); + rtsx_pci_write_register(pcr, RTS5261_EFUSE_CTL, + RTS5261_EFUSE_ENABLE | RTS5261_EFUSE_MODE_MASK, + RTS5261_EFUSE_ENABLE); + + /* Wait transfer end */ + for (i = 0; i < MAX_RW_REG_CNT; i++) { + rtsx_pci_read_register(pcr, RTS5261_EFUSE_CTL, &tmp); + if ((tmp & 0x80) == 0) + break; + } + rtsx_pci_read_register(pcr, RTS5261_EFUSE_READ_DATA, &tmp); + efuse_valid = ((tmp & 0x0C) >> 2); + pcr_dbg(pcr, "Load efuse valid: 0x%x\n", efuse_valid); + + if (efuse_valid == 0) { + retval = rtsx_pci_read_config_dword(pcr, + PCR_SETTING_REG2, &lval); + if (retval != 0) + pcr_dbg(pcr, "read 0x814 DW fail\n"); + pcr_dbg(pcr, "DW from 0x814: 0x%x\n", lval); + /* 0x816 */ + valid = (u8)((lval >> 16) & 0x03); + pcr_dbg(pcr, "0x816: %d\n", valid); + } + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POR, 0); + pcr_dbg(pcr, "Disable efuse por!\n"); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &lval); + lval = lval & 0x00FFFFFF; + retval = rtsx_pci_write_config_dword(pcr, PCR_SETTING_REG2, lval); + if (retval != 0) + pcr_dbg(pcr, "write config fail\n"); + + return retval; +} + +static void rts5261_init_from_cfg(struct rtsx_pcr *pcr) +{ + u32 lval; + struct rtsx_cr_option *option = &pcr->option; + + rtsx_pci_read_config_dword(pcr, PCR_ASPM_SETTING_REG1, &lval); + + if (lval & ASPM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & ASPM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + else + rtsx_clear_dev_flag(pcr, PM_L1_2_EN); + + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0); + if (option->ltr_en) { + u16 val; + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); + if (val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + } else { + option->ltr_enabled = false; + } + } + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; +} + +static int rts5261_extra_init_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1, + CD_RESUME_EN_MASK, CD_RESUME_EN_MASK); + + rts5261_init_from_cfg(pcr); + rts5261_init_from_hw(pcr); + + /* power off efuse */ + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + REG_EFUSE_POWER_MASK, REG_EFUSE_POWEROFF); + rtsx_pci_write_register(pcr, L1SUB_CONFIG1, + AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE); + rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, 0); + + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4, + RTS5261_AUX_CLK_16M_EN, 0); + + /* Release PRSNT# */ + rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4, + RTS5261_FORCE_PRSNT_LOW, 0); + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, + FUNC_FORCE_UPME_XMT_DBG, FUNC_FORCE_UPME_XMT_DBG); + + rtsx_pci_write_register(pcr, PCLK_CTL, + PCLK_MODE_SEL, PCLK_MODE_SEL); + + rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); + rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, CLK_PM_EN, CLK_PM_EN); + + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x0F, 0x02); + + /* Configure driving */ + rts5261_fill_driving(pcr, OUTPUT_3V3); + + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_write_register(pcr, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00); + rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL, + FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL); + + /* Clear Enter RTD3_cold Information*/ + rtsx_pci_write_register(pcr, RTS5261_FW_CTL, + RTS5261_INFORM_RTD3_COLD, 0); + + return 0; +} + +static void rts5261_enable_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + val = FORCE_ASPM_CTL0; + val |= (pcr->aspm_en & 0x02); + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } + pcr->aspm_enabled = enable; + +} + +static void rts5261_disable_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + val = 0; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + val = 0; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + val = FORCE_ASPM_CTL0; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0); + udelay(10); + pcr->aspm_enabled = enable; +} + +static void rts5261_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + if (enable) + rts5261_enable_aspm(pcr, true); + else + rts5261_disable_aspm(pcr, false); +} + +static void rts5261_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) +{ + struct rtsx_cr_option *option = &pcr->option; + int aspm_L1_1, aspm_L1_2; + u8 val = 0; + + aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); + aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); + + if (active) { + /* run, latency: 60us */ + if (aspm_L1_1) + val = option->ltr_l1off_snooze_sspwrgate; + } else { + /* l1off, latency: 300us */ + if (aspm_L1_2) + val = option->ltr_l1off_sspwrgate; + } + + rtsx_set_l1off_sub(pcr, val); +} + +static const struct pcr_ops rts5261_pcr_ops = { + .fetch_vendor_settings = rtsx5261_fetch_vendor_settings, + .turn_on_led = rts5261_turn_on_led, + .turn_off_led = rts5261_turn_off_led, + .extra_init_hw = rts5261_extra_init_hw, + .enable_auto_blink = rts5261_enable_auto_blink, + .disable_auto_blink = rts5261_disable_auto_blink, + .card_power_on = rts5261_card_power_on, + .card_power_off = rts5261_card_power_off, + .switch_output_voltage = rts5261_switch_output_voltage, + .force_power_down = rts5261_force_power_down, + .stop_cmd = rts5261_stop_cmd, + .set_aspm = rts5261_set_aspm, + .set_l1off_cfg_sub_d0 = rts5261_set_l1off_cfg_sub_d0, + .enable_ocp = rts5261_enable_ocp, + .disable_ocp = rts5261_disable_ocp, + .init_ocp = rts5261_init_ocp, + .process_ocp = rts5261_process_ocp, + .clear_ocpstat = rts5261_clear_ocpstat, +}; + +static inline u8 double_ssc_depth(u8 depth) +{ + return ((depth > 1) ? (depth - 1) : depth); +} + +int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int err, clk; + u8 n, clk_divider, mcu_cnt, div; + static const u8 depth[] = { + [RTSX_SSC_DEPTH_4M] = RTS5261_SSC_DEPTH_4M, + [RTSX_SSC_DEPTH_2M] = RTS5261_SSC_DEPTH_2M, + [RTSX_SSC_DEPTH_1M] = RTS5261_SSC_DEPTH_1M, + [RTSX_SSC_DEPTH_500K] = RTS5261_SSC_DEPTH_512K, + }; + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + err = rtsx_pci_write_register(pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (err < 0) + return err; + + card_clock /= 1000000; + pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock); + + clk = card_clock; + if (!initial_mode && double_clk) + clk = card_clock * 2; + pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n", + clk, pcr->cur_clock); + + if (clk == pcr->cur_clock) + return 0; + + if (pcr->ops->conv_clk_and_div_n) + n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N); + else + n = (u8)(clk - 4); + if ((clk <= 4) || (n > 396)) + return -EINVAL; + + mcu_cnt = (u8)(125/clk + 3); + if (mcu_cnt > 15) + mcu_cnt = 15; + + div = CLK_DIV_1; + while ((n < MIN_DIV_N_PCR - 4) && (div < CLK_DIV_8)) { + if (pcr->ops->conv_clk_and_div_n) { + int dbl_clk = pcr->ops->conv_clk_and_div_n(n, + DIV_N_TO_CLK) * 2; + n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk, + CLK_TO_DIV_N); + } else { + n = (n + 4) * 2 - 4; + } + div++; + } + + n = (n / 2); + pcr_dbg(pcr, "n = %d, div = %d\n", n, div); + + ssc_depth = depth[ssc_depth]; + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + if (ssc_depth) { + if (div == CLK_DIV_2) { + if (ssc_depth > 1) + ssc_depth -= 1; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } else if (div == CLK_DIV_4) { + if (ssc_depth > 2) + ssc_depth -= 2; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } else if (div == CLK_DIV_8) { + if (ssc_depth > 3) + ssc_depth -= 3; + else + ssc_depth = RTS5261_SSC_DEPTH_8M; + } + } else { + ssc_depth = 0; + } + pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, + CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, + 0xFF, (div << 4) | mcu_cnt); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + err = rtsx_pci_send_cmd(pcr, 2000); + if (err < 0) + return err; + + /* Wait SSC clock stable */ + udelay(SSC_CLOCK_STABLE_WAIT); + err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + if (err < 0) + return err; + + pcr->cur_clock = clk; + return 0; + +} + +void rts5261_init_params(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + struct rtsx_hw_param *hw_param = &pcr->hw_param; + + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 1; + pcr->ops = &rts5261_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5261_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5261_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5261_sd_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = RTS5261_AUTOLOAD_CFG3; + + option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN + | LTR_L1SS_PWR_GATE_EN); + option->ltr_en = true; + + /* init latency of active, idle, L1OFF to 60us, 300us, 3ms */ + option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; + option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; + option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; + option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; + option->ltr_l1off_sspwrgate = 0x7F; + option->ltr_l1off_snooze_sspwrgate = 0x78; + option->dev_aspm_mode = DEV_ASPM_DYNAMIC; + + option->ocp_en = 1; + hw_param->interrupt_en |= SD_OC_INT_EN; + hw_param->ocp_glitch = SD_OCP_GLITCH_800U; + option->sd_800mA_ocp_thd = RTS5261_LDO1_OCP_THD_1040; +} diff --git a/drivers/misc/cardreader/rts5261.h b/drivers/misc/cardreader/rts5261.h new file mode 100644 index 000000000000..ebfdd236a553 --- /dev/null +++ b/drivers/misc/cardreader/rts5261.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved. + * + * Author: + * Rui FENG + * Wei WANG + */ +#ifndef RTS5261_H +#define RTS5261_H + +/*New add*/ +#define rts5261_vendor_setting_valid(reg) ((reg) & 0x010000) +#define rts5261_reg_to_aspm(reg) (((reg) >> 28) ^ 0x03) +#define rts5261_reg_check_reverse_socket(reg) ((reg) & 0x04) +#define rts5261_reg_to_card_drive_sel(reg) ((((reg) >> 6) & 0x01) << 6) +#define rts5261_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 22) ^ 0x03) +#define rts5261_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 16) ^ 0x03) + + +#define RTS5261_AUTOLOAD_CFG0 0xFF7B +#define RTS5261_AUTOLOAD_CFG1 0xFF7C +#define RTS5261_AUTOLOAD_CFG2 0xFF7D +#define RTS5261_AUTOLOAD_CFG3 0xFF7E +#define RTS5261_AUTOLOAD_CFG4 0xFF7F +#define RTS5261_FORCE_PRSNT_LOW (1 << 6) +#define RTS5261_AUX_CLK_16M_EN (1 << 5) + +#define RTS5261_REG_VREF 0xFE97 +#define RTS5261_PWD_SUSPND_EN (1 << 4) + +#define RTS5261_PAD_H3L1 0xFF79 +#define PAD_GPIO_H3L1 (1 << 3) + +/* SSC_CTL2 0xFC12 */ +#define RTS5261_SSC_DEPTH_MASK 0x07 +#define RTS5261_SSC_DEPTH_DISALBE 0x00 +#define RTS5261_SSC_DEPTH_8M 0x01 +#define RTS5261_SSC_DEPTH_4M 0x02 +#define RTS5261_SSC_DEPTH_2M 0x03 +#define RTS5261_SSC_DEPTH_1M 0x04 +#define RTS5261_SSC_DEPTH_512K 0x05 +#define RTS5261_SSC_DEPTH_256K 0x06 +#define RTS5261_SSC_DEPTH_128K 0x07 + +/* efuse control register*/ +#define RTS5261_EFUSE_CTL 0xFC30 +#define RTS5261_EFUSE_ENABLE 0x80 +/* EFUSE_MODE: 0=READ 1=PROGRAM */ +#define RTS5261_EFUSE_MODE_MASK 0x40 +#define RTS5261_EFUSE_PROGRAM 0x40 + +#define RTS5261_EFUSE_ADDR 0xFC31 +#define RTS5261_EFUSE_ADDR_MASK 0x3F + +#define RTS5261_EFUSE_WRITE_DATA 0xFC32 +#define RTS5261_EFUSE_READ_DATA 0xFC34 + +/* DMACTL 0xFE2C */ +#define RTS5261_DMA_PACK_SIZE_MASK 0xF0 + +/* FW config info register */ +#define RTS5261_FW_CFG_INFO0 0xFF50 +#define RTS5261_FW_EXPRESS_TEST_MASK (0x01<<0) +#define RTS5261_FW_EA_MODE_MASK (0x01<<5) + +/* FW config register */ +#define RTS5261_FW_CFG0 0xFF54 +#define RTS5261_FW_ENTER_EXPRESS (0x01<<0) + +#define RTS5261_FW_CFG1 0xFF55 +#define RTS5261_SYS_CLK_SEL_MCU_CLK (0x01<<7) +#define RTS5261_CRC_CLK_SEL_MCU_CLK (0x01<<6) +#define RTS5261_FAKE_MCU_CLOCK_GATING (0x01<<5) +/*MCU_bus_mode_sel: 0=real 8051 1=fake mcu*/ +#define RTS5261_MCU_BUS_SEL_MASK (0x01<<4) +/*MCU_clock_sel:VerA 00=aux16M 01=aux400K 1x=REFCLK100M*/ +/*MCU_clock_sel:VerB 00=aux400K 01=aux16M 10=REFCLK100M*/ +#define RTS5261_MCU_CLOCK_SEL_MASK (0x03<<2) +#define RTS5261_MCU_CLOCK_SEL_16M (0x01<<2) +#define RTS5261_MCU_CLOCK_GATING (0x01<<1) +#define RTS5261_DRIVER_ENABLE_FW (0x01<<0) + +/* FW status register */ +#define RTS5261_FW_STATUS 0xFF56 +#define RTS5261_EXPRESS_LINK_FAIL_MASK (0x01<<7) + +/* FW control register */ +#define RTS5261_FW_CTL 0xFF5F +#define RTS5261_INFORM_RTD3_COLD (0x01<<5) + +#define RTS5261_REG_FPDCTL 0xFF60 + +#define RTS5261_REG_LDO12_CFG 0xFF6E +#define RTS5261_LDO12_VO_TUNE_MASK (0x07<<1) +#define RTS5261_LDO12_115 (0x03<<1) +#define RTS5261_LDO12_120 (0x04<<1) +#define RTS5261_LDO12_125 (0x05<<1) +#define RTS5261_LDO12_130 (0x06<<1) +#define RTS5261_LDO12_135 (0x07<<1) + +/* LDO control register */ +#define RTS5261_CARD_PWR_CTL 0xFD50 +#define RTS5261_SD_CLK_ISO (0x01<<7) +#define RTS5261_PAD_SD_DAT_FW_CTRL (0x01<<6) +#define RTS5261_PUPDC (0x01<<5) +#define RTS5261_SD_CMD_ISO (0x01<<4) +#define RTS5261_SD_DAT_ISO_MASK (0x0F<<0) + +#define RTS5261_LDO1233318_POW_CTL 0xFF70 +#define RTS5261_LDO3318_POWERON (0x01<<3) +#define RTS5261_LDO3_POWERON (0x01<<2) +#define RTS5261_LDO2_POWERON (0x01<<1) +#define RTS5261_LDO1_POWERON (0x01<<0) +#define RTS5261_LDO_POWERON_MASK (0x0F<<0) + +#define RTS5261_DV3318_CFG 0xFF71 +#define RTS5261_DV3318_TUNE_MASK (0x07<<4) +#define RTS5261_DV3318_18 (0x02<<4) +#define RTS5261_DV3318_19 (0x04<<4) +#define RTS5261_DV3318_33 (0x07<<4) + +#define RTS5261_LDO1_CFG0 0xFF72 +#define RTS5261_LDO1_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO1_OCP_EN (0x01<<4) +#define RTS5261_LDO1_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO1_OCP_LMT_EN (0x01<<1) + +/* CRD6603-433 190319 request changed */ +#define RTS5261_LDO1_OCP_THD_740 (0x00<<5) +#define RTS5261_LDO1_OCP_THD_800 (0x01<<5) +#define RTS5261_LDO1_OCP_THD_860 (0x02<<5) +#define RTS5261_LDO1_OCP_THD_920 (0x03<<5) +#define RTS5261_LDO1_OCP_THD_980 (0x04<<5) +#define RTS5261_LDO1_OCP_THD_1040 (0x05<<5) +#define RTS5261_LDO1_OCP_THD_1100 (0x06<<5) +#define RTS5261_LDO1_OCP_THD_1160 (0x07<<5) + +#define RTS5261_LDO1_LMT_THD_450 (0x00<<2) +#define RTS5261_LDO1_LMT_THD_1000 (0x01<<2) +#define RTS5261_LDO1_LMT_THD_1500 (0x02<<2) +#define RTS5261_LDO1_LMT_THD_2000 (0x03<<2) + +#define RTS5261_LDO1_CFG1 0xFF73 +#define RTS5261_LDO1_TUNE_MASK (0x07<<1) +#define RTS5261_LDO1_18 (0x05<<1) +#define RTS5261_LDO1_33 (0x07<<1) +#define RTS5261_LDO1_PWD_MASK (0x01<<0) + +#define RTS5261_LDO2_CFG0 0xFF74 +#define RTS5261_LDO2_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO2_OCP_EN (0x01<<4) +#define RTS5261_LDO2_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO2_OCP_LMT_EN (0x01<<1) + +#define RTS5261_LDO2_OCP_THD_620 (0x00<<5) +#define RTS5261_LDO2_OCP_THD_650 (0x01<<5) +#define RTS5261_LDO2_OCP_THD_680 (0x02<<5) +#define RTS5261_LDO2_OCP_THD_720 (0x03<<5) +#define RTS5261_LDO2_OCP_THD_750 (0x04<<5) +#define RTS5261_LDO2_OCP_THD_780 (0x05<<5) +#define RTS5261_LDO2_OCP_THD_810 (0x06<<5) +#define RTS5261_LDO2_OCP_THD_840 (0x07<<5) + +#define RTS5261_LDO2_CFG1 0xFF75 +#define RTS5261_LDO2_TUNE_MASK (0x07<<1) +#define RTS5261_LDO2_18 (0x05<<1) +#define RTS5261_LDO2_33 (0x07<<1) +#define RTS5261_LDO2_PWD_MASK (0x01<<0) + +#define RTS5261_LDO3_CFG0 0xFF76 +#define RTS5261_LDO3_OCP_THD_MASK (0x07<<5) +#define RTS5261_LDO3_OCP_EN (0x01<<4) +#define RTS5261_LDO3_OCP_LMT_THD_MASK (0x03<<2) +#define RTS5261_LDO3_OCP_LMT_EN (0x01<<1) + +#define RTS5261_LDO3_OCP_THD_620 (0x00<<5) +#define RTS5261_LDO3_OCP_THD_650 (0x01<<5) +#define RTS5261_LDO3_OCP_THD_680 (0x02<<5) +#define RTS5261_LDO3_OCP_THD_720 (0x03<<5) +#define RTS5261_LDO3_OCP_THD_750 (0x04<<5) +#define RTS5261_LDO3_OCP_THD_780 (0x05<<5) +#define RTS5261_LDO3_OCP_THD_810 (0x06<<5) +#define RTS5261_LDO3_OCP_THD_840 (0x07<<5) + +#define RTS5261_LDO3_CFG1 0xFF77 +#define RTS5261_LDO3_TUNE_MASK (0x07<<1) +#define RTS5261_LDO3_18 (0x05<<1) +#define RTS5261_LDO3_33 (0x07<<1) +#define RTS5261_LDO3_PWD_MASK (0x01<<0) + +#define RTS5261_REG_PME_FORCE_CTL 0xFF78 +#define FORCE_PM_CONTROL 0x20 +#define FORCE_PM_VALUE 0x10 +#define REG_EFUSE_BYPASS 0x08 +#define REG_EFUSE_POR 0x04 +#define REG_EFUSE_POWER_MASK 0x03 +#define REG_EFUSE_POWERON 0x03 +#define REG_EFUSE_POWEROFF 0x00 + + +/* Single LUN, support SD/SD EXPRESS */ +#define DEFAULT_SINGLE 0 +#define SD_LUN 1 +#define SD_EXPRESS_LUN 2 + +/* For Change_FPGA_SSCClock Function */ +#define MULTIPLY_BY_1 0x00 +#define MULTIPLY_BY_2 0x01 +#define MULTIPLY_BY_3 0x02 +#define MULTIPLY_BY_4 0x03 +#define MULTIPLY_BY_5 0x04 +#define MULTIPLY_BY_6 0x05 +#define MULTIPLY_BY_7 0x06 +#define MULTIPLY_BY_8 0x07 +#define MULTIPLY_BY_9 0x08 +#define MULTIPLY_BY_10 0x09 + +#define DIVIDE_BY_2 0x01 +#define DIVIDE_BY_3 0x02 +#define DIVIDE_BY_4 0x03 +#define DIVIDE_BY_5 0x04 +#define DIVIDE_BY_6 0x05 +#define DIVIDE_BY_7 0x06 +#define DIVIDE_BY_8 0x07 +#define DIVIDE_BY_9 0x08 +#define DIVIDE_BY_10 0x09 + +int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); + +#endif /* RTS5261_H */ diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index b4a66b64f742..fd7b2167103d 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -22,6 +22,7 @@ #include #include "rtsx_pcr.h" +#include "rts5261.h" static bool msi_en = true; module_param(msi_en, bool, S_IRUGO | S_IWUSR); @@ -34,9 +35,6 @@ static struct mfd_cell rtsx_pcr_cells[] = { [RTSX_SD_CARD] = { .name = DRV_NAME_RTSX_PCI_SDMMC, }, - [RTSX_MS_CARD] = { - .name = DRV_NAME_RTSX_PCI_MS, - }, }; static const struct pci_device_id rtsx_pci_ids[] = { @@ -51,6 +49,7 @@ static const struct pci_device_id rtsx_pci_ids[] = { { PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5261), PCI_CLASS_OTHERS << 16, 0xFF0000 }, { 0, } }; @@ -438,8 +437,16 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, if (end) option |= RTSX_SG_END; - val = ((u64)addr << 32) | ((u64)len << 12) | option; + if (PCI_PID(pcr) == PID_5261) { + if (len > 0xFFFF) + val = ((u64)addr << 32) | (((u64)len & 0xFFFF) << 16) + | (((u64)len >> 16) << 6) | option; + else + val = ((u64)addr << 32) | ((u64)len << 16) | option; + } else { + val = ((u64)addr << 32) | ((u64)len << 12) | option; + } put_unaligned_le64(val, ptr); pcr->sgi++; } @@ -684,7 +691,6 @@ int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card) else return -EINVAL; - return rtsx_pci_set_pull_ctl(pcr, tbl); } EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable); @@ -735,6 +741,10 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, [RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K, }; + if (PCI_PID(pcr) == PID_5261) + return rts5261_pci_switch_clock(pcr, card_clock, + ssc_depth, initial_mode, double_clk, vpclk); + if (initial_mode) { /* We use 250k(around) here, in initial stage */ clk_divider = SD_CLK_DIVIDE_128; @@ -1253,7 +1263,15 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) rtsx_pci_enable_bus_int(pcr); /* Power on SSC */ - err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + if (PCI_PID(pcr) == PID_5261) { + /* Gating real mcu clock */ + err = rtsx_pci_write_register(pcr, RTS5261_FW_CFG1, + RTS5261_MCU_CLOCK_GATING, 0); + err = rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL, + SSC_POWER_DOWN, 0); + } else { + err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + } if (err < 0) return err; @@ -1283,7 +1301,12 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) /* Enable SSC Clock */ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, 0xFF, SSC_8X_EN | SSC_SEL_4M); - rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + if (PCI_PID(pcr) == PID_5261) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, + RTS5261_SSC_DEPTH_2M); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + /* Disable cd_pwr_save */ rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10); /* Clear Link Ready Interrupt */ @@ -1314,6 +1337,7 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) case PID_524A: case PID_525A: case PID_5260: + case PID_5261: rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1); break; default: @@ -1393,9 +1417,14 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) case 0x5286: rtl8402_init_params(pcr); break; + case 0x5260: rts5260_init_params(pcr); break; + + case 0x5261: + rts5261_init_params(pcr); + break; } pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n", diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h index 98f729263dc1..ed391df52f4f 100644 --- a/drivers/misc/cardreader/rtsx_pcr.h +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -53,6 +53,7 @@ void rts524a_init_params(struct rtsx_pcr *pcr); void rts525a_init_params(struct rtsx_pcr *pcr); void rtl8411b_init_params(struct rtsx_pcr *pcr); void rts5260_init_params(struct rtsx_pcr *pcr); +void rts5261_init_params(struct rtsx_pcr *pcr); static inline u8 map_sd_drive(int idx) { -- cgit From 64498695dd800bcb494d96b94f99356ece91ea90 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sat, 16 Nov 2019 16:21:36 +0200 Subject: mei: bus: add more client attributes to sysfs Export more client attributes via sysfs that are usually obtained upon connection. In some cases, for example a monitoring application may wish to know the attributes without actually performing the connection. Added attributes: max number of connections, fixed address, max message length. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20191116142136.17535-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 33 +++++++++++++++++++++++++++++++++ drivers/misc/mei/client.h | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'drivers/misc') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 53bb394ccba6..a0a495c95e3c 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -791,11 +791,44 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a, } static DEVICE_ATTR_RO(modalias); +static ssize_t max_conn_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 maxconn = mei_me_cl_max_conn(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%d", maxconn); +} +static DEVICE_ATTR_RO(max_conn); + +static ssize_t fixed_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 fixed = mei_me_cl_fixed(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%d", fixed); +} +static DEVICE_ATTR_RO(fixed); + +static ssize_t max_len_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u32 maxlen = mei_me_cl_max_len(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%u", maxlen); +} +static DEVICE_ATTR_RO(max_len); + static struct attribute *mei_cldev_attrs[] = { &dev_attr_name.attr, &dev_attr_uuid.attr, &dev_attr_version.attr, &dev_attr_modalias.attr, + &dev_attr_max_conn.attr, + &dev_attr_fixed.attr, + &dev_attr_max_len.attr, NULL, }; ATTRIBUTE_GROUPS(mei_cldev); diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index c1f9e810cf81..2f8954def591 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -69,6 +69,42 @@ static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl) return me_cl->props.protocol_version; } +/** + * mei_me_cl_max_conn - return me client max number of connections + * + * @me_cl: me client + * + * Return: me client max number of connections + */ +static inline u8 mei_me_cl_max_conn(const struct mei_me_client *me_cl) +{ + return me_cl->props.max_number_of_connections; +} + +/** + * mei_me_cl_fixed - return me client fixed address, if any + * + * @me_cl: me client + * + * Return: me client fixed address + */ +static inline u8 mei_me_cl_fixed(const struct mei_me_client *me_cl) +{ + return me_cl->props.fixed_address; +} + +/** + * mei_me_cl_max_len - return me client max msg length + * + * @me_cl: me client + * + * Return: me client max msg length + */ +static inline u32 mei_me_cl_max_len(const struct mei_me_client *me_cl) +{ + return me_cl->props.max_msg_length; +} + /* * MEI IO Functions */ -- cgit From ab64ec1db25e0cceab0bad15b03fd57e2b461b15 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 20 Nov 2019 21:40:56 +0800 Subject: misc: Fix Kconfig indentation Adjust indentation from spaces to tab (+optional two spaces) as in coding style with command like: $ sed -e 's/^ /\t/' -i */Kconfig Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20191120134056.14677-1-krzk@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 2fefecef6e06..7f0d48f406e3 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -325,14 +325,14 @@ config SENSORS_TSL2550 will be called tsl2550. config SENSORS_BH1770 - tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" - depends on I2C - ---help--- - Say Y here if you want to build a driver for BH1770GLC (ROHM) or + tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" + depends on I2C + ---help--- + Say Y here if you want to build a driver for BH1770GLC (ROHM) or SFH7770 (Osram) combined ambient light and proximity sensor chip. - To compile this driver as a module, choose M here: the - module will be called bh1770glc. If unsure, say N here. + To compile this driver as a module, choose M here: the + module will be called bh1770glc. If unsure, say N here. config SENSORS_APDS990X tristate "APDS990X combined als and proximity sensors" @@ -437,8 +437,8 @@ config PCI_ENDPOINT_TEST select CRC32 tristate "PCI Endpoint Test driver" ---help--- - Enable this configuration option to enable the host side test driver - for PCI Endpoint. + Enable this configuration option to enable the host side test driver + for PCI Endpoint. config XILINX_SDFEC tristate "Xilinx SDFEC 16" -- cgit From abb7e16fb6dfc32d0c0e63787409fdb4c348915c Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 16 Sep 2019 09:16:29 +0300 Subject: habanalabs: handle F/W failure for sensor initialization In case the F/W fails to initialize the thermal sensors, print an appropriate error message to kernel log and fail the device initialization. Reviewed-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 5 +++++ drivers/misc/habanalabs/include/hl_boot_if.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 6fba14b81f90..09caef7642fd 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2328,6 +2328,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) "ARM status %d - u-boot stopped by user\n", status); break; + case CPU_BOOT_STATUS_TS_INIT_FAIL: + dev_err(hdev->dev, + "ARM status %d - Thermal Sensor initialization failed\n", + status); + break; default: dev_err(hdev->dev, "ARM status %d - Invalid status code\n", diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h index 4cd04c090285..2853a2de8cf6 100644 --- a/drivers/misc/habanalabs/include/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/hl_boot_if.h @@ -20,6 +20,8 @@ enum cpu_boot_status { CPU_BOOT_STATUS_DRAM_INIT_FAIL, CPU_BOOT_STATUS_FIT_CORRUPTED, CPU_BOOT_STATUS_UBOOT_NOT_READY, + CPU_BOOT_STATUS_RESERVED, + CPU_BOOT_STATUS_TS_INIT_FAIL, }; enum kmd_msg { -- cgit From 1e295d4dd5b2e3d9dab68bfcab1f5666cde3804d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 28 Sep 2019 12:18:04 +0800 Subject: habanalabs: remove set but not used variable 'ctx' Fixes gcc '-Wunused-but-set-variable' warning: drivers/misc/habanalabs/device.c: In function hpriv_release: drivers/misc/habanalabs/device.c:45:17: warning: variable ctx set but not used [-Wunused-but-set-variable] It is never used since commit eb7caf84b029 ("habanalabs: maintain a list of file private data objects") Reported-by: Hulk Robot Signed-off-by: YueHaibing Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 459fee70a597..2f5a4da707e7 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -42,12 +42,10 @@ static void hpriv_release(struct kref *ref) { struct hl_fpriv *hpriv; struct hl_device *hdev; - struct hl_ctx *ctx; hpriv = container_of(ref, struct hl_fpriv, refcount); hdev = hpriv->hdev; - ctx = hpriv->ctx; put_pid(hpriv->taskpid); -- cgit From f435614ff55c6783919028cb914ffd7422e0b03b Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 2 Oct 2019 13:53:52 +0000 Subject: habanalabs: Fix typos s/paerser/parser/ s/requeusted/requested/ s/an JOB/a JOB/ Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/habanalabs.h | 2 +- drivers/misc/habanalabs/hw_queue.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 75862be53c60..c3d24ffad9fa 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -774,7 +774,7 @@ struct hl_cs_job { }; /** - * struct hl_cs_parser - command submission paerser properties. + * struct hl_cs_parser - command submission parser properties. * @user_cb: the CB we got from the user. * @patched_cb: in case of patching, this is internal CB which is submitted on * the queue instead of the CB we got from the IOCTL. diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index 55b383b2a116..f733b534f738 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -220,7 +220,7 @@ out: } /* - * ext_hw_queue_schedule_job - submit an JOB to an external queue + * ext_hw_queue_schedule_job - submit a JOB to an external queue * * @job: pointer to the job that needs to be submitted to the queue * @@ -278,7 +278,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job) } /* - * int_hw_queue_schedule_job - submit an JOB to an internal queue + * int_hw_queue_schedule_job - submit a JOB to an internal queue * * @job: pointer to the job that needs to be submitted to the queue * -- cgit From df762375f17e1765bc3a0b345378e1726d85ca75 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 3 Oct 2019 15:22:35 +0000 Subject: habanalabs: Mark queue as expecting CB handle or address Jobs on some queues must be provided with a handle to a driver command buffer object, while for other queues, jobs must be provided with an address to a command buffer. Currently the distinction is done based on the queue type, which is less flexible if the same queue type behaves differently on different types of ASICs. This patch adds a new queue property for this target, which is configured per queue type per ASIC type. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_submission.c | 4 +++- drivers/misc/habanalabs/goya/goya.c | 3 +++ drivers/misc/habanalabs/habanalabs.h | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index a9ac045dcfde..f44205540520 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -414,7 +414,9 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, "Queue index %d is restricted for the kernel driver\n", chunk->queue_index); return NULL; - } else if (hw_queue_prop->type == QUEUE_TYPE_INT) { + } + + if (!hw_queue_prop->requires_kernel_cb) { *ext_queue = false; return (struct hl_cb *) (uintptr_t) chunk->cb_handle; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 09caef7642fd..71693fcffb16 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -337,17 +337,20 @@ void goya_get_fixed_properties(struct hl_device *hdev) for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 1; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; prop->hw_queues_props[i].driver_only = 1; + prop->hw_queues_props[i].requires_kernel_cb = 0; } for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + NUMBER_OF_INT_HW_QUEUES; i++) { prop->hw_queues_props[i].type = QUEUE_TYPE_INT; prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 0; } for (; i < HL_MAX_QUEUES; i++) diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index c3d24ffad9fa..f47f4b22cb6b 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -98,10 +98,13 @@ enum hl_queue_type { * @type: queue type. * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. + * @requires_kernel_cb: true if a CB handle must be provided for jobs on this + * queue, false otherwise (a CB address must be provided). */ struct hw_queue_properties { enum hl_queue_type type; u8 driver_only; + u8 requires_kernel_cb; }; /** -- cgit From cb596aee8842c87605ea1a9062af2ab435a742d4 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 3 Oct 2019 15:22:36 +0000 Subject: habanalabs: Add a new H/W queue type This patch adds a support for a new H/W queue type. This type of queue is for DMA and compute engines jobs, for which completion notification are sent by H/W. Command buffer for this queue can be created either through the CB IOCTL and using the retrieved CB handle, or by preparing a buffer on the host or device SRAM/DRAM, and using the device address to that buffer. The patch includes the handling of the 2 options, as well as the initialization of the H/W queue and its jobs scheduling. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_submission.c | 120 +++++++++---- drivers/misc/habanalabs/goya/goya.c | 4 +- drivers/misc/habanalabs/habanalabs.h | 24 ++- drivers/misc/habanalabs/hw_queue.c | 249 +++++++++++++++++++++------ drivers/misc/habanalabs/include/qman_if.h | 12 ++ 5 files changed, 308 insertions(+), 101 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index f44205540520..776ddafc47fb 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs) kref_put(&cs->refcount, cs_do_release); } +static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) +{ + /* + * Patched CB is created for external queues jobs, and for H/W queues + * jobs if the user CB was allocated by driver and MMU is disabled. + */ + return (job->queue_type == QUEUE_TYPE_EXT || + (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && + !hdev->mmu_enable)); +} + /* * cs_parser - parse the user command submission * @@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) parser.patched_cb = NULL; parser.user_cb = job->user_cb; parser.user_cb_size = job->user_cb_size; - parser.ext_queue = job->ext_queue; + parser.queue_type = job->queue_type; + parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; job->patched_cb = NULL; rc = hdev->asic_funcs->cs_parser(hdev, &parser); - if (job->ext_queue) { + + if (is_cb_patched(hdev, job)) { if (!rc) { job->patched_cb = parser.patched_cb; job->job_cb_size = parser.patched_cb_size; @@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) { struct hl_cs *cs = job->cs; - if (job->ext_queue) { + if (is_cb_patched(hdev, job)) { hl_userptr_delete_list(hdev, &job->userptr_list); /* @@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) } } + /* For H/W queue jobs, if a user CB was allocated by driver and MMU is + * enabled, the user CB isn't released in cs_parser() and thus should be + * released here. + */ + if (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && hdev->mmu_enable) { + spin_lock(&job->user_cb->lock); + job->user_cb->cs_cnt--; + spin_unlock(&job->user_cb->lock); + + hl_cb_put(job->user_cb); + } + /* * This is the only place where there can be multiple threads * modifying the list at the same time @@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) hl_debugfs_remove_job(hdev, job); - if (job->ext_queue) + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) cs_put(cs); kfree(job); @@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work) free_job(hdev, job); } -static struct hl_cb *validate_queue_index(struct hl_device *hdev, - struct hl_cb_mgr *cb_mgr, - struct hl_cs_chunk *chunk, - bool *ext_queue) +static int validate_queue_index(struct hl_device *hdev, + struct hl_cs_chunk *chunk, + enum hl_queue_type *queue_type, + bool *is_kernel_allocated_cb) { struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; - u32 cb_handle; - struct hl_cb *cb; - - /* Assume external queue */ - *ext_queue = true; hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; @@ -406,22 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, (hw_queue_prop->type == QUEUE_TYPE_NA)) { dev_err(hdev->dev, "Queue index %d is invalid\n", chunk->queue_index); - return NULL; + return -EINVAL; } if (hw_queue_prop->driver_only) { dev_err(hdev->dev, "Queue index %d is restricted for the kernel driver\n", chunk->queue_index); - return NULL; + return -EINVAL; } - if (!hw_queue_prop->requires_kernel_cb) { - *ext_queue = false; - return (struct hl_cb *) (uintptr_t) chunk->cb_handle; - } + *queue_type = hw_queue_prop->type; + *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; + + return 0; +} + +static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, + struct hl_cb_mgr *cb_mgr, + struct hl_cs_chunk *chunk) +{ + struct hl_cb *cb; + u32 cb_handle; - /* Retrieve CB object */ cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); cb = hl_cb_get(hdev, cb_mgr, cb_handle); @@ -446,7 +476,8 @@ release_cb: return NULL; } -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb) { struct hl_cs_job *job; @@ -454,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) if (!job) return NULL; - job->ext_queue = ext_queue; + job->queue_type = queue_type; + job->is_kernel_allocated_cb = is_kernel_allocated_cb; - if (job->ext_queue) { + if (is_cb_patched(hdev, job)) INIT_LIST_HEAD(&job->userptr_list); + + if (job->queue_type == QUEUE_TYPE_EXT) INIT_WORK(&job->finish_work, job_wq_completion); - } return job; } @@ -472,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; - bool ext_queue_present = false; + bool int_queues_only = true; u32 size_to_copy; int rc, i, parse_cnt; @@ -516,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, /* Validate ALL the CS chunks before submitting the CS */ for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) { struct hl_cs_chunk *chunk = &cs_chunk_array[i]; - bool ext_queue; + enum hl_queue_type queue_type; + bool is_kernel_allocated_cb; - cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk, - &ext_queue); - if (ext_queue) { - ext_queue_present = true; + rc = validate_queue_index(hdev, chunk, &queue_type, + &is_kernel_allocated_cb); + if (rc) + goto free_cs_object; + + if (is_kernel_allocated_cb) { + cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { rc = -EINVAL; goto free_cs_object; } + } else { + cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; } - job = hl_cs_allocate_job(hdev, ext_queue); + if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) + int_queues_only = false; + + job = hl_cs_allocate_job(hdev, queue_type, + is_kernel_allocated_cb); if (!job) { dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; - if (ext_queue) + if (is_kernel_allocated_cb) goto release_cb; else goto free_cs_object; @@ -542,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, job->cs = cs; job->user_cb = cb; job->user_cb_size = chunk->cb_size; - if (job->ext_queue) + if (is_kernel_allocated_cb) job->job_cb_size = cb->size; else job->job_cb_size = chunk->cb_size; @@ -555,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, /* * Increment CS reference. When CS reference is 0, CS is * done and can be signaled to user and free all its resources - * Only increment for JOB on external queues, because only - * for those JOBs we get completion + * Only increment for JOB on external or H/W queues, because + * only for those JOBs we get completion */ - if (job->ext_queue) + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) cs_get(cs); hl_debugfs_add_job(hdev, job); @@ -572,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, } } - if (!ext_queue_present) { + if (int_queues_only) { dev_err(hdev->dev, - "Reject CS %d.%llu because no external queues jobs\n", + "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); rc = -EINVAL; goto free_cs_object; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 71693fcffb16..0b40915bede2 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3943,7 +3943,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) { struct goya_device *goya = hdev->asic_specific; - if (!parser->ext_queue) + if (parser->queue_type == QUEUE_TYPE_INT) return goya_parse_cb_no_ext_queue(hdev, parser); if (goya->hw_cap_initialized & HW_CAP_MMU) @@ -4614,7 +4614,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, lin_dma_pkt++; } while (--lin_dma_pkts_cnt); - job = hl_cs_allocate_job(hdev, true); + job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); if (!job) { dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index f47f4b22cb6b..371d1ec15697 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -85,12 +85,15 @@ struct hl_fpriv; * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's * memories and/or operates the compute engines. * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. + * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion + * notifications are sent by H/W. */ enum hl_queue_type { QUEUE_TYPE_NA, QUEUE_TYPE_EXT, QUEUE_TYPE_INT, - QUEUE_TYPE_CPU + QUEUE_TYPE_CPU, + QUEUE_TYPE_HW }; /** @@ -755,11 +758,14 @@ struct hl_cs { * @userptr_list: linked-list of userptr mappings that belong to this job and * wait for completion. * @debugfs_list: node in debugfs list of command submission jobs. + * @queue_type: the type of the H/W queue this job is submitted to. * @id: the id of this job inside a CS. * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. * @job_cb_size: the actual size of the CB that we put on the queue. - * @ext_queue: whether the job is for external queue or internal queue. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). */ struct hl_cs_job { struct list_head cs_node; @@ -769,11 +775,12 @@ struct hl_cs_job { struct work_struct finish_work; struct list_head userptr_list; struct list_head debugfs_list; + enum hl_queue_type queue_type; u32 id; u32 hw_queue_id; u32 user_cb_size; u32 job_cb_size; - u8 ext_queue; + u8 is_kernel_allocated_cb; }; /** @@ -784,24 +791,28 @@ struct hl_cs_job { * @job_userptr_list: linked-list of userptr mappings that belong to the related * job and wait for completion. * @cs_sequence: the sequence number of the related CS. + * @queue_type: the type of the H/W queue this job is submitted to. * @ctx_id: the ID of the context the related CS belongs to. * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. * @patched_cb_size: the size of the CB after parsing. - * @ext_queue: whether the job is for external queue or internal queue. * @job_id: the id of the related job inside the related CS. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). */ struct hl_cs_parser { struct hl_cb *user_cb; struct hl_cb *patched_cb; struct list_head *job_userptr_list; u64 cs_sequence; + enum hl_queue_type queue_type; u32 ctx_id; u32 hw_queue_id; u32 user_cb_size; u32 patched_cb_size; - u8 ext_queue; u8 job_id; + u8 is_kernel_allocated_cb; }; @@ -1504,7 +1515,8 @@ int hl_cb_pool_init(struct hl_device *hdev); int hl_cb_pool_fini(struct hl_device *hdev); void hl_cs_rollback_all(struct hl_device *hdev); -struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue); +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void goya_set_asic_funcs(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index f733b534f738..91579dde9262 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -58,8 +58,8 @@ out: } /* - * ext_queue_submit_bd - Submit a buffer descriptor to an external queue - * + * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a + * H/W queue. * @hdev: pointer to habanalabs device structure * @q: pointer to habanalabs queue structure * @ctl: BD's control word @@ -73,8 +73,8 @@ out: * This function must be called when the scheduler mutex is taken * */ -static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, - u32 ctl, u32 len, u64 ptr) +static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, + struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) { struct hl_bd *bd; @@ -173,6 +173,45 @@ static int int_queue_sanity_checks(struct hl_device *hdev, return 0; } +/* + * hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue. + * @hdev: Pointer to hl_device structure. + * @q: Pointer to hl_hw_queue structure. + * @num_of_entries: How many entries to check for space. + * + * Perform the following: + * - Make sure we have enough space in the completion queue. + * This check also ensures that there is enough space in the h/w queue, as + * both queues are of the same size. + * - Reserve space in the completion queue (needs to be reversed if there + * is a failure down the road before the actual submission of work). + * + * Both operations are done using the "free_slots_cnt" field of the completion + * queue. The CI counters of the queue and the completion queue are not + * needed/used for the H/W queue type. + */ +static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, + int num_of_entries) +{ + atomic_t *free_slots = + &hdev->completion_queue[q->hw_queue_id].free_slots_cnt; + + /* + * Check we have enough space in the completion queue. + * Add -1 to counter (decrement) unless counter was already 0. + * In that case, CQ is full so we can't submit a new CB. + * atomic_add_unless will return 0 if counter was already 0. + */ + if (atomic_add_negative(num_of_entries * -1, free_slots)) { + dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n", + num_of_entries, q->hw_queue_id); + atomic_add(num_of_entries, free_slots); + return -EAGAIN; + } + + return 0; +} + /* * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion * @@ -188,7 +227,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, u32 cb_size, u64 cb_ptr) { struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; - int rc; + int rc = 0; /* * The CPU queue is a synchronous queue with an effective depth of @@ -206,11 +245,18 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, goto out; } - rc = ext_queue_sanity_checks(hdev, q, 1, false); - if (rc) - goto out; + /* + * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue + * type only on init phase, when the queues are empty and being tested, + * so there is no need for sanity checks. + */ + if (q->queue_type != QUEUE_TYPE_HW) { + rc = ext_queue_sanity_checks(hdev, q, 1, false); + if (rc) + goto out; + } - ext_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); + ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); out: if (q->queue_type != QUEUE_TYPE_CPU) @@ -220,14 +266,14 @@ out: } /* - * ext_hw_queue_schedule_job - submit a JOB to an external queue + * ext_queue_schedule_job - submit a JOB to an external queue * * @job: pointer to the job that needs to be submitted to the queue * * This function must be called when the scheduler mutex is taken * */ -static void ext_hw_queue_schedule_job(struct hl_cs_job *job) +static void ext_queue_schedule_job(struct hl_cs_job *job) { struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; @@ -260,7 +306,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job) * H/W queues is done under the scheduler mutex * * No need to check if CQ is full because it was already - * checked in hl_queue_sanity_checks + * checked in ext_queue_sanity_checks */ cq = &hdev->completion_queue[q->hw_queue_id]; cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); @@ -274,18 +320,18 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job) cq->pi = hl_cq_inc_ptr(cq->pi); - ext_queue_submit_bd(hdev, q, ctl, len, ptr); + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } /* - * int_hw_queue_schedule_job - submit a JOB to an internal queue + * int_queue_schedule_job - submit a JOB to an internal queue * * @job: pointer to the job that needs to be submitted to the queue * * This function must be called when the scheduler mutex is taken * */ -static void int_hw_queue_schedule_job(struct hl_cs_job *job) +static void int_queue_schedule_job(struct hl_cs_job *job) { struct hl_device *hdev = job->cs->ctx->hdev; struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; @@ -307,6 +353,60 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job) hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); } +/* + * hw_queue_schedule_job - submit a JOB to a H/W queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void hw_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + struct hl_cq *cq; + u64 ptr; + u32 offset, ctl, len; + + /* + * Upon PQE completion, COMP_DATA is used as the write data to the + * completion queue (QMAN HBW message), and COMP_OFFSET is used as the + * write address offset in the SM block (QMAN LBW message). + * The write address offset is calculated as "COMP_OFFSET << 2". + */ + offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1); + ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | + ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); + + len = job->job_cb_size; + + /* + * A patched CB is created only if a user CB was allocated by driver and + * MMU is disabled. If MMU is enabled, the user CB should be used + * instead. If the user CB wasn't allocated by driver, assume that it + * holds an address. + */ + if (job->patched_cb) + ptr = job->patched_cb->bus_address; + else if (job->is_kernel_allocated_cb) + ptr = job->user_cb->bus_address; + else + ptr = (u64) (uintptr_t) job->user_cb; + + /* + * No need to protect pi_offset because scheduling to the + * H/W queues is done under the scheduler mutex + * + * No need to check if CQ is full because it was already + * checked in hw_queue_sanity_checks + */ + cq = &hdev->completion_queue[q->hw_queue_id]; + cq->pi = hl_cq_inc_ptr(cq->pi); + + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); +} + /* * hl_hw_queue_schedule_cs - schedule a command submission * @@ -330,23 +430,34 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } q = &hdev->kernel_queues[0]; - /* This loop assumes all external queues are consecutive */ for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) { - if (q->queue_type == QUEUE_TYPE_EXT) { - if (cs->jobs_in_queue_cnt[i]) { + if (cs->jobs_in_queue_cnt[i]) { + switch (q->queue_type) { + case QUEUE_TYPE_EXT: rc = ext_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i], true); - if (rc) - goto unroll_cq_resv; - cq_cnt++; - } - } else if (q->queue_type == QUEUE_TYPE_INT) { - if (cs->jobs_in_queue_cnt[i]) { + cs->jobs_in_queue_cnt[i], true); + break; + case QUEUE_TYPE_INT: rc = int_queue_sanity_checks(hdev, q, - cs->jobs_in_queue_cnt[i]); - if (rc) - goto unroll_cq_resv; + cs->jobs_in_queue_cnt[i]); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i]); + break; + default: + dev_err(hdev->dev, "Queue type %d is invalid\n", + q->queue_type); + rc = -EINVAL; + break; } + + if (rc) + goto unroll_cq_resv; + + if (q->queue_type == QUEUE_TYPE_EXT || + q->queue_type == QUEUE_TYPE_HW) + cq_cnt++; } } @@ -373,21 +484,30 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) - if (job->ext_queue) - ext_hw_queue_schedule_job(job); - else - int_hw_queue_schedule_job(job); + switch (job->queue_type) { + case QUEUE_TYPE_EXT: + ext_queue_schedule_job(job); + break; + case QUEUE_TYPE_INT: + int_queue_schedule_job(job); + break; + case QUEUE_TYPE_HW: + hw_queue_schedule_job(job); + break; + default: + break; + } cs->submitted = true; goto out; unroll_cq_resv: - /* This loop assumes all external queues are consecutive */ q = &hdev->kernel_queues[0]; for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) { - if ((q->queue_type == QUEUE_TYPE_EXT) && - (cs->jobs_in_queue_cnt[i])) { + if ((q->queue_type == QUEUE_TYPE_EXT || + q->queue_type == QUEUE_TYPE_HW) && + cs->jobs_in_queue_cnt[i]) { atomic_t *free_slots = &hdev->completion_queue[i].free_slots_cnt; atomic_add(cs->jobs_in_queue_cnt[i], free_slots); @@ -414,8 +534,8 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) q->ci = hl_queue_inc_ptr(q->ci); } -static int ext_and_cpu_hw_queue_init(struct hl_device *hdev, - struct hl_hw_queue *q, bool is_cpu_queue) +static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, + bool is_cpu_queue) { void *p; int rc; @@ -465,7 +585,7 @@ free_queue: return rc; } -static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { void *p; @@ -485,18 +605,38 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) return 0; } -static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + return ext_and_cpu_queue_init(hdev, q, true); +} + +static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q, true); + return ext_and_cpu_queue_init(hdev, q, false); } -static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) { - return ext_and_cpu_hw_queue_init(hdev, q, false); + void *p; + + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->kernel_address = (u64) (uintptr_t) p; + + /* Make sure read/write pointers are initialized to start of queue */ + q->ci = 0; + q->pi = 0; + + return 0; } /* - * hw_queue_init - main initialization function for H/W queue object + * queue_init - main initialization function for H/W queue object * * @hdev: pointer to hl_device device structure * @q: pointer to hl_hw_queue queue structure @@ -505,7 +645,7 @@ static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) * Allocate dma-able memory for the queue and initialize fields * Returns 0 on success */ -static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, +static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, u32 hw_queue_id) { int rc; @@ -516,21 +656,20 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, switch (q->queue_type) { case QUEUE_TYPE_EXT: - rc = ext_hw_queue_init(hdev, q); + rc = ext_queue_init(hdev, q); break; - case QUEUE_TYPE_INT: - rc = int_hw_queue_init(hdev, q); + rc = int_queue_init(hdev, q); break; - case QUEUE_TYPE_CPU: - rc = cpu_hw_queue_init(hdev, q); + rc = cpu_queue_init(hdev, q); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_init(hdev, q); break; - case QUEUE_TYPE_NA: q->valid = 0; return 0; - default: dev_crit(hdev->dev, "wrong queue type %d during init\n", q->queue_type); @@ -554,7 +693,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, * * Free the queue memory */ -static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) +static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) { if (!q->valid) return; @@ -612,7 +751,7 @@ int hl_hw_queues_create(struct hl_device *hdev) i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) { q->queue_type = asic->hw_queues_props[i].type; - rc = hw_queue_init(hdev, q, i); + rc = queue_init(hdev, q, i); if (rc) { dev_err(hdev->dev, "failed to initialize queue %d\n", i); @@ -624,7 +763,7 @@ int hl_hw_queues_create(struct hl_device *hdev) release_queues: for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) - hw_queue_fini(hdev, q); + queue_fini(hdev, q); kfree(hdev->kernel_queues); @@ -637,7 +776,7 @@ void hl_hw_queues_destroy(struct hl_device *hdev) int i; for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) - hw_queue_fini(hdev, q); + queue_fini(hdev, q); kfree(hdev->kernel_queues); } diff --git a/drivers/misc/habanalabs/include/qman_if.h b/drivers/misc/habanalabs/include/qman_if.h index bf59bbe27fdc..0fdb49188ed7 100644 --- a/drivers/misc/habanalabs/include/qman_if.h +++ b/drivers/misc/habanalabs/include/qman_if.h @@ -23,6 +23,8 @@ struct hl_bd { #define HL_BD_SIZE sizeof(struct hl_bd) /* + * S/W CTL FIELDS. + * * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is * valid. 1 means the repeat field is valid, 0 means not-valid, * i.e. repeat == 1 @@ -33,6 +35,16 @@ struct hl_bd { #define BD_CTL_SHADOW_INDEX_SHIFT 0 #define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF +/* + * H/W CTL FIELDS + */ + +#define BD_CTL_COMP_OFFSET_SHIFT 16 +#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000 + +#define BD_CTL_COMP_DATA_SHIFT 0 +#define BD_CTL_COMP_DATA_MASK 0x0000FFFF + /* * COMPLETION QUEUE */ -- cgit From 8fdacf2a530f36f6f0621a95ef0e37d8db2d2f89 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 2 Oct 2019 14:14:08 +0300 Subject: habanalabs: set TPC Icache to 16 cache lines Reduce latency to memory during TPC kernel execution. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/goya/goya.c | 3 +++ drivers/misc/habanalabs/habanalabs.h | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0b40915bede2..d49f5ecd903b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1457,6 +1457,9 @@ static void goya_init_golden_registers(struct hl_device *hdev) 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT); WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset, 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT); + + WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset, + ICACHE_FETCH_LINE_NUM, 2); } WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 371d1ec15697..91445371b08b 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1062,9 +1062,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); #define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT #define REG_FIELD_MASK(reg, field) reg##_##field##_MASK -#define WREG32_FIELD(reg, field, val) \ - WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \ - (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_FIELD(reg, offset, field, val) \ + WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \ + ~REG_FIELD_MASK(reg, field)) | \ + (val) << REG_FIELD_SHIFT(reg, field)) /* Timeout should be longer when working with simulator but cap the * increased timeout to some maximum -- cgit From 62c1e124a9e03ccb8bb39efe1d092c2376967528 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 10 Oct 2019 15:48:59 +0300 Subject: habanalabs: add opcode to INFO IOCTL to return clock rate Add a new opcode to the INFO IOCTL to allow the user application to retrieve the ASIC's current and maximum clock rate. The rate is returned in MHz. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/goya/goya.c | 3 ++- drivers/misc/habanalabs/goya/goyaP.h | 2 ++ drivers/misc/habanalabs/goya/goya_hwmgr.c | 31 ++++++++++++++++++++++++++++++ drivers/misc/habanalabs/habanalabs.h | 2 ++ drivers/misc/habanalabs/habanalabs_ioctl.c | 23 ++++++++++++++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d49f5ecd903b..ac574d18c139 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5148,7 +5148,8 @@ static const struct hl_asic_funcs goya_funcs = { .init_iatu = goya_init_iatu, .rreg = hl_rreg, .wreg = hl_wreg, - .halt_coresight = goya_halt_coresight + .halt_coresight = goya_halt_coresight, + .get_clk_rate = goya_get_clk_rate }; /* diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 89b6574f8e4f..c3230cb6e25c 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -233,4 +233,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); +int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); + #endif /* GOYAP_H_ */ diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index a2a700c3d597..b2ebc01e27f4 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -32,6 +32,37 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) } } +int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) +{ + long value; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + value = hl_get_frequency(hdev, MME_PLL, false); + + if (value < 0) { + dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", + value); + return value; + } + + *max_clk = (value / 1000 / 1000); + + value = hl_get_frequency(hdev, MME_PLL, true); + + if (value < 0) { + dev_err(hdev->dev, + "Failed to retrieve device current clock %ld\n", + value); + return value; + } + + *cur_clk = (value / 1000 / 1000); + + return 0; +} + static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 91445371b08b..4ff2da859653 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -508,6 +508,7 @@ enum hl_pll_frequency { * @rreg: Read a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support. * @halt_coresight: stop the ETF and ETR traces. + * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -590,6 +591,7 @@ struct hl_asic_funcs { u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); void (*halt_coresight)(struct hl_device *hdev); + int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); }; diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 66d9c710073c..cd4b5a9ceac1 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -221,6 +221,25 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; } +static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_clk_rate clk_rate = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, + &clk_rate.max_clk_rate_mhz); + if (rc) + return rc; + + return copy_to_user(out, &clk_rate, + min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -271,6 +290,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, rc = hw_events_info(hdev, true, args); break; + case HL_INFO_CLK_RATE: + rc = get_clk_rate(hdev, args); + break; + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; -- cgit From 8d6de52866dcf1d6cbdec9aa10f722dd43b2431f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 16 Oct 2019 16:46:32 +0800 Subject: habanalabs: remove set but not used variable 'qman_base_addr' Fixes gcc '-Wunused-but-set-variable' warning: drivers/misc/habanalabs/goya/goya.c: In function 'goya_init_mme_cmdq': drivers/misc/habanalabs/goya/goya.c:1536:6: warning: variable 'qman_base_addr' set but not used [-Wunused-but-set-variable] It is never used, so can be removed. Reported-by: Hulk Robot Signed-off-by: YueHaibing Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ac574d18c139..e8812154343f 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1539,7 +1539,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) u32 mtr_base_lo, mtr_base_hi; u32 so_base_lo, so_base_hi; u32 gic_base_lo, gic_base_hi; - u64 qman_base_addr; mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); @@ -1551,9 +1550,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) gic_base_hi = upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); - qman_base_addr = hdev->asic_prop.sram_base_address + - MME_QMAN_BASE_OFFSET; - WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo); -- cgit From 91edbf2cf8f0416b854674e891d7a5274f4b1702 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 16 Oct 2019 11:53:52 +0300 Subject: habanalabs: expose card name in INFO IOCTL To enable userspace processes, e.g. management utilities, to display the card name to the user, add the card name property to the HW_IP structure that is copied to the user in the INFO IOCTL. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 3 +++ drivers/misc/habanalabs/habanalabs_ioctl.c | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e8812154343f..d3ee9e2aa57e 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -396,6 +396,9 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; + + strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); } /* diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index cd4b5a9ceac1..02d7491fa28f 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -63,8 +63,13 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) if (hw_ip.dram_size > 0) hw_ip.dram_enabled = 1; hw_ip.num_of_events = prop->num_of_events; - memcpy(hw_ip.armcp_version, - prop->armcp_info.armcp_version, VERSION_MAX_LEN); + + memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, + min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); + + memcpy(hw_ip.card_name, prop->armcp_info.card_name, + min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); + hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; -- cgit From f05912d8f16bf303e293d4add2caecb8a9231c41 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 20 Oct 2019 11:07:11 +0300 Subject: habanalabs: read F/W versions before failure Move the read of the F/W boot versions before exiting on possible failures of the F/W boot. This will help debug boot failures as we will be able to know the F/W boot version. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/goya/goya.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d3ee9e2aa57e..4e767e1d78e4 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2296,6 +2296,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) 10000, cpu_timeout); + /* Read U-Boot version now in case we will later fail */ + goya_read_device_fw_version(hdev, FW_COMP_UBOOT); + goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); + if (rc) { dev_err(hdev->dev, "Error in ARM u-boot!"); switch (status) { @@ -2347,10 +2351,6 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) return -EIO; } - /* Read U-Boot version now in case we will later fail */ - goya_read_device_fw_version(hdev, FW_COMP_UBOOT); - goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); - if (!hdev->fw_loading) { dev_info(hdev->dev, "Skip loading FW\n"); goto out; -- cgit From e1a84d56fcb92d4551692cbec4bada1cec00e620 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 24 Oct 2019 09:52:25 +0300 Subject: habanalabs: use registers name defines for ETR block We have a single ETR block in the SOC, so use explicit register name defines for initializing this block. This makes it more readable and maintainable. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/goya/goya_coresight.c | 51 +++++---- .../habanalabs/include/goya/asic_reg/goya_regs.h | 1 + .../include/goya/asic_reg/psoc_etr_regs.h | 114 +++++++++++++++++++++ 3 files changed, 140 insertions(+), 26 deletions(-) create mode 100644 drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index b4d406af1bed..16bcd60b111f 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -377,33 +377,32 @@ static int goya_config_etr(struct hl_device *hdev, struct hl_debug_params *params) { struct hl_debug_params_etr *input; - u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE; u32 val; int rc; - WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); - val = RREG32(base_reg + 0x304); + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; - WREG32(base_reg + 0x304, val); + WREG32(mmPSOC_ETR_FFCR, val); val |= 0x40; - WREG32(base_reg + 0x304, val); + WREG32(mmPSOC_ETR_FFCR, val); - rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false); + rc = goya_coresight_timeout(hdev, mmPSOC_ETR_FFCR, 6, false); if (rc) { dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", params->enable ? "enable" : "disable", rc); return rc; } - rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true); + rc = goya_coresight_timeout(hdev, mmPSOC_ETR_STS, 2, true); if (rc) { dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", params->enable ? "enable" : "disable", rc); return rc; } - WREG32(base_reg + 0x20, 0); + WREG32(mmPSOC_ETR_CTL, 0); if (params->enable) { input = params->input; @@ -423,25 +422,25 @@ static int goya_config_etr(struct hl_device *hdev, return -EINVAL; } - WREG32(base_reg + 0x34, 0x3FFC); - WREG32(base_reg + 0x4, input->buffer_size); - WREG32(base_reg + 0x28, input->sink_mode); - WREG32(base_reg + 0x110, 0x700); - WREG32(base_reg + 0x118, + WREG32(mmPSOC_ETR_BUFWM, 0x3FFC); + WREG32(mmPSOC_ETR_RSZ, input->buffer_size); + WREG32(mmPSOC_ETR_MODE, input->sink_mode); + WREG32(mmPSOC_ETR_AXICTL, 0x700); + WREG32(mmPSOC_ETR_DBALO, lower_32_bits(input->buffer_address)); - WREG32(base_reg + 0x11C, + WREG32(mmPSOC_ETR_DBAHI, upper_32_bits(input->buffer_address)); - WREG32(base_reg + 0x304, 3); - WREG32(base_reg + 0x308, 0xA); - WREG32(base_reg + 0x20, 1); + WREG32(mmPSOC_ETR_FFCR, 3); + WREG32(mmPSOC_ETR_PSCR, 0xA); + WREG32(mmPSOC_ETR_CTL, 1); } else { - WREG32(base_reg + 0x34, 0); - WREG32(base_reg + 0x4, 0x400); - WREG32(base_reg + 0x118, 0); - WREG32(base_reg + 0x11C, 0); - WREG32(base_reg + 0x308, 0); - WREG32(base_reg + 0x28, 0); - WREG32(base_reg + 0x304, 0); + WREG32(mmPSOC_ETR_BUFWM, 0); + WREG32(mmPSOC_ETR_RSZ, 0x400); + WREG32(mmPSOC_ETR_DBALO, 0); + WREG32(mmPSOC_ETR_DBAHI, 0); + WREG32(mmPSOC_ETR_PSCR, 0); + WREG32(mmPSOC_ETR_MODE, 0); + WREG32(mmPSOC_ETR_FFCR, 0); if (params->output_size >= sizeof(u64)) { u32 rwp, rwphi; @@ -451,8 +450,8 @@ static int goya_config_etr(struct hl_device *hdev, * the buffer is set in the RWP register (lower 32 * bits), and in the RWPHI register (upper 8 bits). */ - rwp = RREG32(base_reg + 0x18); - rwphi = RREG32(base_reg + 0x3c) & 0xff; + rwp = RREG32(mmPSOC_ETR_RWP); + rwphi = RREG32(mmPSOC_ETR_RWPHI) & 0xff; *(u64 *) params->output = ((u64) rwphi << 32) | rwp; } } diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h index 19b0f0ef1d0b..fce490e6a231 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h @@ -84,6 +84,7 @@ #include "tpc6_rtr_regs.h" #include "tpc7_nrtr_regs.h" #include "tpc0_eml_cfg_regs.h" +#include "psoc_etr_regs.h" #include "psoc_global_conf_masks.h" #include "dma_macro_masks.h" diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h new file mode 100644 index 000000000000..b7c33e025db5 --- /dev/null +++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2018 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +/************************************ + ** This is an auto-generated file ** + ** DO NOT EDIT BELOW ** + ************************************/ + +#ifndef ASIC_REG_PSOC_ETR_REGS_H_ +#define ASIC_REG_PSOC_ETR_REGS_H_ + +/* + ***************************************** + * PSOC_ETR (Prototype: ETR) + ***************************************** + */ + +#define mmPSOC_ETR_RSZ 0x2C43004 + +#define mmPSOC_ETR_STS 0x2C4300C + +#define mmPSOC_ETR_RRD 0x2C43010 + +#define mmPSOC_ETR_RRP 0x2C43014 + +#define mmPSOC_ETR_RWP 0x2C43018 + +#define mmPSOC_ETR_TRG 0x2C4301C + +#define mmPSOC_ETR_CTL 0x2C43020 + +#define mmPSOC_ETR_RWD 0x2C43024 + +#define mmPSOC_ETR_MODE 0x2C43028 + +#define mmPSOC_ETR_LBUFLEVEL 0x2C4302C + +#define mmPSOC_ETR_CBUFLEVEL 0x2C43030 + +#define mmPSOC_ETR_BUFWM 0x2C43034 + +#define mmPSOC_ETR_RRPHI 0x2C43038 + +#define mmPSOC_ETR_RWPHI 0x2C4303C + +#define mmPSOC_ETR_AXICTL 0x2C43110 + +#define mmPSOC_ETR_DBALO 0x2C43118 + +#define mmPSOC_ETR_DBAHI 0x2C4311C + +#define mmPSOC_ETR_FFSR 0x2C43300 + +#define mmPSOC_ETR_FFCR 0x2C43304 + +#define mmPSOC_ETR_PSCR 0x2C43308 + +#define mmPSOC_ETR_ITMISCOP0 0x2C43EE0 + +#define mmPSOC_ETR_ITTRFLIN 0x2C43EE8 + +#define mmPSOC_ETR_ITATBDATA0 0x2C43EEC + +#define mmPSOC_ETR_ITATBCTR2 0x2C43EF0 + +#define mmPSOC_ETR_ITATBCTR1 0x2C43EF4 + +#define mmPSOC_ETR_ITATBCTR0 0x2C43EF8 + +#define mmPSOC_ETR_ITCTRL 0x2C43F00 + +#define mmPSOC_ETR_CLAIMSET 0x2C43FA0 + +#define mmPSOC_ETR_CLAIMCLR 0x2C43FA4 + +#define mmPSOC_ETR_LAR 0x2C43FB0 + +#define mmPSOC_ETR_LSR 0x2C43FB4 + +#define mmPSOC_ETR_AUTHSTATUS 0x2C43FB8 + +#define mmPSOC_ETR_DEVID 0x2C43FC8 + +#define mmPSOC_ETR_DEVTYPE 0x2C43FCC + +#define mmPSOC_ETR_PERIPHID4 0x2C43FD0 + +#define mmPSOC_ETR_PERIPHID5 0x2C43FD4 + +#define mmPSOC_ETR_PERIPHID6 0x2C43FD8 + +#define mmPSOC_ETR_PERIPHID7 0x2C43FDC + +#define mmPSOC_ETR_PERIPHID0 0x2C43FE0 + +#define mmPSOC_ETR_PERIPHID1 0x2C43FE4 + +#define mmPSOC_ETR_PERIPHID2 0x2C43FE8 + +#define mmPSOC_ETR_PERIPHID3 0x2C43FEC + +#define mmPSOC_ETR_COMPID0 0x2C43FF0 + +#define mmPSOC_ETR_COMPID1 0x2C43FF4 + +#define mmPSOC_ETR_COMPID2 0x2C43FF8 + +#define mmPSOC_ETR_COMPID3 0x2C43FFC + +#endif /* ASIC_REG_PSOC_ETR_REGS_H_ */ -- cgit From 6476b472437de2c41dc8873134c60d2928f806ce Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 24 Oct 2019 10:12:35 +0300 Subject: habanalabs: set ETR as non-secured ETR should always be non-secured as it is used by the users to record profiling/trace data. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/goya/goya_coresight.c | 4 +++- drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index 16bcd60b111f..c1ee6e2b5dff 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -8,6 +8,7 @@ #include "goyaP.h" #include "include/goya/goya_coresight.h" #include "include/goya/asic_reg/goya_regs.h" +#include "include/goya/asic_reg/goya_masks.h" #include @@ -425,7 +426,8 @@ static int goya_config_etr(struct hl_device *hdev, WREG32(mmPSOC_ETR_BUFWM, 0x3FFC); WREG32(mmPSOC_ETR_RSZ, input->buffer_size); WREG32(mmPSOC_ETR_MODE, input->sink_mode); - WREG32(mmPSOC_ETR_AXICTL, 0x700); + WREG32(mmPSOC_ETR_AXICTL, + 0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT); WREG32(mmPSOC_ETR_DBALO, lower_32_bits(input->buffer_address)); WREG32(mmPSOC_ETR_DBAHI, diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h index 8618891d5afa..3c44ef3a23ed 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h @@ -260,4 +260,6 @@ #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT +#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1 + #endif /* ASIC_REG_GOYA_MASKS_H_ */ -- cgit From bd4c8cb17d4e8f9e01ce48e3f2009307a58e60d2 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 9 Nov 2019 23:16:33 +0200 Subject: habanalabs: increase max jobs number to 512 In training, there is a need for a large amount of patching to the recipe. This results in many command buffers contains a lot of DMA packets. The number of command buffers per CS is larger than the current maximum of 64, which is an arbitrary number that is enough for inference, but it has no real affect on the code and/or resources of the host machine. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/habanalabs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 4ff2da859653..0813041f669a 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -40,7 +40,7 @@ #define HL_MAX_QUEUES 128 -#define HL_MAX_JOBS_PER_CS 64 +#define HL_MAX_JOBS_PER_CS 512 /* MUST BE POWER OF 2 and larger than 1 */ #define HL_MAX_PENDING_CS 64 -- cgit From eda58bf7860a028f207e8d4201d86191b898bbee Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 10 Nov 2019 18:48:06 +0200 Subject: habanalabs: don't print error when queues are full If the queues are full and we return -EAGAIN to the user, there is no need to print an error, as that case isn't an error and the user is expected to re-submit the work. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/command_submission.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 776ddafc47fb..8850f475a413 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -626,9 +626,10 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, rc = hl_hw_queue_schedule_cs(cs); if (rc) { - dev_err(hdev->dev, - "Failed to submit CS %d.%llu to H/W queues, error %d\n", - cs->ctx->asid, cs->sequence, rc); + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to submit CS %d.%llu to H/W queues, error %d\n", + cs->ctx->asid, cs->sequence, rc); goto free_cs_object; } -- cgit From 5d1012576d20dd7cb70e00ea1b4c2af11a6c9156 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 10 Nov 2019 16:08:26 +0200 Subject: habanalabs: export uapi defines to user-space The two defines that control the maximum size of a command buffer and the maximum number of JOBS per CS need to be exported to the user as they are part of the API towards user-space. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/habanalabs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 0813041f669a..2a5344cc1a60 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -40,8 +40,6 @@ #define HL_MAX_QUEUES 128 -#define HL_MAX_JOBS_PER_CS 512 - /* MUST BE POWER OF 2 and larger than 1 */ #define HL_MAX_PENDING_CS 64 @@ -242,8 +240,6 @@ struct hl_dma_fence { * Command Buffers */ -#define HL_MAX_CB_SIZE 0x200000 /* 2MB */ - /** * struct hl_cb_mgr - describes a Command Buffer Manager. * @cb_lock: protects cb_handles. -- cgit From 7f74d4d335f1bdcb51fca584d5ad065c4ff996ac Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Mon, 12 Aug 2019 11:48:46 +0300 Subject: habanalabs: re-factor memory module code Some of the functions in the memory module code were too long and/or contained multiple operations that are not always done together. Re-factor the code by dividing those functions to smaller functions which are more readable and maintainable. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 2 +- drivers/misc/habanalabs/habanalabs.h | 4 +- drivers/misc/habanalabs/memory.c | 281 +++++++++++++++++++---------------- 3 files changed, 158 insertions(+), 129 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 4e767e1d78e4..9712122d6cb1 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3935,7 +3935,7 @@ static int goya_parse_cb_no_ext_queue(struct hl_device *hdev, return 0; dev_err(hdev->dev, - "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n", + "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n", parser->user_cb, parser->user_cb_size); return -EFAULT; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 2a5344cc1a60..78aef59e690b 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -692,7 +692,7 @@ struct hl_ctx_mgr { * @sgt: pointer to the scatter-gather table that holds the pages. * @dir: for DMA unmapping, the direction must be supplied, so save it. * @debugfs_list: node in debugfs list of command submissions. - * @addr: user-space virtual pointer to the start of the memory area. + * @addr: user-space virtual address of the start of the memory area. * @size: size of the memory area to pin & map. * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. */ @@ -1527,7 +1527,7 @@ void hl_vm_fini(struct hl_device *hdev); int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, struct hl_userptr *userptr); -int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); void hl_userptr_delete_list(struct hl_device *hdev, struct list_head *userptr_list); bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 365fb0cb8dff..8ade9886a5a7 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -159,20 +159,19 @@ pages_pack_err: } /* - * get_userptr_from_host_va - initialize userptr structure from given host - * virtual address - * - * @hdev : habanalabs device structure - * @args : parameters containing the virtual address and size - * @p_userptr : pointer to result userptr structure + * dma_map_host_va - DMA mapping of the given host virtual address. + * @hdev: habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @p_userptr: pointer to result userptr structure * * This function does the following: * - Allocate userptr structure * - Pin the given host memory using the userptr structure * - Perform DMA mapping to have the DMA addresses of the pages */ -static int get_userptr_from_host_va(struct hl_device *hdev, - struct hl_mem_in *args, struct hl_userptr **p_userptr) +static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr **p_userptr) { struct hl_userptr *userptr; int rc; @@ -183,8 +182,7 @@ static int get_userptr_from_host_va(struct hl_device *hdev, goto userptr_err; } - rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr, - args->map_host.mem_size, userptr); + rc = hl_pin_host_memory(hdev, addr, size, userptr); if (rc) { dev_err(hdev->dev, "Failed to pin host memory\n"); goto pin_err; @@ -215,16 +213,16 @@ userptr_err: } /* - * free_userptr - free userptr structure - * - * @hdev : habanalabs device structure - * @userptr : userptr to free + * dma_unmap_host_va - DMA unmapping of the given host virtual address. + * @hdev: habanalabs device structure + * @userptr: userptr to free * * This function does the following: * - Unpins the physical pages * - Frees the userptr structure */ -static void free_userptr(struct hl_device *hdev, struct hl_userptr *userptr) +static void dma_unmap_host_va(struct hl_device *hdev, + struct hl_userptr *userptr) { hl_unpin_host_memory(hdev, userptr); kfree(userptr); @@ -253,10 +251,9 @@ static void dram_pg_pool_do_release(struct kref *ref) } /* - * free_phys_pg_pack - free physical page pack - * - * @hdev : habanalabs device structure - * @phys_pg_pack : physical page pack to free + * free_phys_pg_pack - free physical page pack + * @hdev: habanalabs device structure + * @phys_pg_pack: physical page pack to free * * This function does the following: * - For DRAM memory only, iterate over the pack and free each physical block @@ -264,7 +261,7 @@ static void dram_pg_pool_do_release(struct kref *ref) * - Free the hl_vm_phys_pg_pack structure */ static void free_phys_pg_pack(struct hl_device *hdev, - struct hl_vm_phys_pg_pack *phys_pg_pack) + struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; u64 i; @@ -631,20 +628,18 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) /* * init_phys_pg_pack_from_userptr - initialize physical page pack from host - * memory - * - * @ctx : current context - * @userptr : userptr to initialize from - * @pphys_pg_pack : res pointer + * memory + * @asid: current context ASID + * @userptr: userptr to initialize from + * @pphys_pg_pack: result pointer * * This function does the following: * - Pin the physical pages related to the given virtual block * - Create a physical page pack from the physical pages related to the given * virtual block */ -static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, - struct hl_userptr *userptr, - struct hl_vm_phys_pg_pack **pphys_pg_pack) +static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, + struct hl_vm_phys_pg_pack **pphys_pg_pack) { struct hl_vm_phys_pg_pack *phys_pg_pack; struct scatterlist *sg; @@ -660,7 +655,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, phys_pg_pack->vm_type = userptr->vm_type; phys_pg_pack->created_from_userptr = true; - phys_pg_pack->asid = ctx->asid; + phys_pg_pack->asid = asid; atomic_set(&phys_pg_pack->mapping_cnt, 1); /* Only if all dma_addrs are aligned to 2MB and their @@ -731,19 +726,18 @@ page_pack_arr_mem_err: } /* - * map_phys_page_pack - maps the physical page pack - * - * @ctx : current context - * @vaddr : start address of the virtual area to map from - * @phys_pg_pack : the pack of physical pages to map to + * map_phys_pg_pack - maps the physical page pack. + * @ctx: current context + * @vaddr: start address of the virtual area to map from + * @phys_pg_pack: the pack of physical pages to map to * * This function does the following: * - Maps each chunk of virtual memory to matching physical chunk * - Stores number of successful mappings in the given argument - * - Returns 0 on success, error code otherwise. + * - Returns 0 on success, error code otherwise */ -static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr, - struct hl_vm_phys_pg_pack *phys_pg_pack) +static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_device *hdev = ctx->hdev; u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; @@ -783,6 +777,36 @@ err: return rc; } +/* + * unmap_phys_pg_pack - unmaps the physical page pack + * @ctx: current context + * @vaddr: start address of the virtual area to unmap + * @phys_pg_pack: the pack of physical pages to unmap + */ +static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_device *hdev = ctx->hdev; + u64 next_vaddr, i; + u32 page_size; + + page_size = phys_pg_pack->page_size; + next_vaddr = vaddr; + + for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { + if (hl_mmu_unmap(ctx, next_vaddr, page_size)) + dev_warn_ratelimited(hdev->dev, + "unmap failed for vaddr: 0x%llx\n", next_vaddr); + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + */ + if (hdev->pldm) + usleep_range(500, 1000); + } +} + static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *paddr) { @@ -839,18 +863,21 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, *device_addr = 0; if (is_userptr) { - rc = get_userptr_from_host_va(hdev, args, &userptr); + u64 addr = args->map_host.host_virt_addr, + size = args->map_host.mem_size; + + rc = dma_map_host_va(hdev, addr, size, &userptr); if (rc) { dev_err(hdev->dev, "failed to get userptr from va\n"); return rc; } - rc = init_phys_pg_pack_from_userptr(ctx, userptr, + rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr, &phys_pg_pack); if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", - args->map_host.host_virt_addr); + addr); goto init_page_pack_err; } @@ -909,7 +936,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, mutex_lock(&ctx->mmu_lock); - rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack); + rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { mutex_unlock(&ctx->mmu_lock); dev_err(hdev->dev, "mapping page pack failed for handle %u\n", @@ -955,7 +982,7 @@ shared_err: free_phys_pg_pack(hdev, phys_pg_pack); init_page_pack_err: if (is_userptr) - free_userptr(hdev, userptr); + dma_unmap_host_va(hdev, userptr); return rc; } @@ -977,8 +1004,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) struct hl_vm_hash_node *hnode = NULL; struct hl_userptr *userptr = NULL; enum vm_type_t *vm_type; - u64 next_vaddr, i; - u32 page_size; bool is_userptr; int rc; @@ -1004,8 +1029,8 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; userptr = hnode->ptr; - rc = init_phys_pg_pack_from_userptr(ctx, userptr, - &phys_pg_pack); + rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr, + &phys_pg_pack); if (rc) { dev_err(hdev->dev, "unable to init page pack for vaddr 0x%llx\n", @@ -1029,24 +1054,11 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) goto mapping_cnt_err; } - page_size = phys_pg_pack->page_size; - vaddr &= ~(((u64) page_size) - 1); - - next_vaddr = vaddr; + vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); mutex_lock(&ctx->mmu_lock); - for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { - if (hl_mmu_unmap(ctx, next_vaddr, page_size)) - dev_warn_ratelimited(hdev->dev, - "unmap failed for vaddr: 0x%llx\n", next_vaddr); - - /* unmapping on Palladium can be really long, so avoid a CPU - * soft lockup bug by sleeping a little between unmapping pages - */ - if (hdev->pldm) - usleep_range(500, 1000); - } + unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); hdev->asic_funcs->mmu_invalidate_cache(hdev, true); @@ -1064,7 +1076,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) if (is_userptr) { free_phys_pg_pack(hdev, phys_pg_pack); - free_userptr(hdev, userptr); + dma_unmap_host_va(hdev, userptr); } return 0; @@ -1203,20 +1215,72 @@ out: return rc; } +static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, + u32 npages, u64 start, u32 offset, + struct hl_userptr *userptr) +{ + int rc; + + if (!access_ok((void __user *) (uintptr_t) addr, size)) { + dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); + return -EFAULT; + } + + userptr->vec = frame_vector_create(npages); + if (!userptr->vec) { + dev_err(hdev->dev, "Failed to create frame vector\n"); + return -ENOMEM; + } + + rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + userptr->vec); + + if (rc != npages) { + dev_err(hdev->dev, + "Failed to map host memory, user ptr probably wrong\n"); + if (rc < 0) + goto destroy_framevec; + rc = -EFAULT; + goto put_framevec; + } + + if (frame_vector_to_pages(userptr->vec) < 0) { + dev_err(hdev->dev, + "Failed to translate frame vector to pages\n"); + rc = -EFAULT; + goto put_framevec; + } + + rc = sg_alloc_table_from_pages(userptr->sgt, + frame_vector_pages(userptr->vec), + npages, offset, size, GFP_ATOMIC); + if (rc < 0) { + dev_err(hdev->dev, "failed to create SG table from pages\n"); + goto put_framevec; + } + + return 0; + +put_framevec: + put_vaddr_frames(userptr->vec); +destroy_framevec: + frame_vector_destroy(userptr->vec); + return rc; +} + /* - * hl_pin_host_memory - pins a chunk of host memory - * - * @hdev : pointer to the habanalabs device structure - * @addr : the user-space virtual address of the memory area - * @size : the size of the memory area - * @userptr : pointer to hl_userptr structure + * hl_pin_host_memory - pins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @userptr: pointer to hl_userptr structure * * This function does the following: * - Pins the physical pages - * - Create a SG list from those pages + * - Create an SG list from those pages */ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, - struct hl_userptr *userptr) + struct hl_userptr *userptr) { u64 start, end; u32 npages, offset; @@ -1227,11 +1291,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } - if (!access_ok((void __user *) (uintptr_t) addr, size)) { - dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); - return -EFAULT; - } - /* * If the combination of the address and size requested for this memory * region causes an integer overflow, return error. @@ -1244,6 +1303,14 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, return -EINVAL; } + /* + * This function can be called also from data path, hence use atomic + * always as it is not a big allocation. + */ + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + if (!userptr->sgt) + return -ENOMEM; + start = addr & PAGE_MASK; offset = addr & ~PAGE_MASK; end = PAGE_ALIGN(addr + size); @@ -1254,42 +1321,12 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, userptr->dma_mapped = false; INIT_LIST_HEAD(&userptr->job_node); - userptr->vec = frame_vector_create(npages); - if (!userptr->vec) { - dev_err(hdev->dev, "Failed to create frame vector\n"); - return -ENOMEM; - } - - rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, - userptr->vec); - - if (rc != npages) { - dev_err(hdev->dev, - "Failed to map host memory, user ptr probably wrong\n"); - if (rc < 0) - goto destroy_framevec; - rc = -EFAULT; - goto put_framevec; - } - - if (frame_vector_to_pages(userptr->vec) < 0) { + rc = get_user_memory(hdev, addr, size, npages, start, offset, + userptr); + if (rc) { dev_err(hdev->dev, - "Failed to translate frame vector to pages\n"); - rc = -EFAULT; - goto put_framevec; - } - - userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); - if (!userptr->sgt) { - rc = -ENOMEM; - goto put_framevec; - } - - rc = sg_alloc_table_from_pages(userptr->sgt, - frame_vector_pages(userptr->vec), - npages, offset, size, GFP_ATOMIC); - if (rc < 0) { - dev_err(hdev->dev, "failed to create SG table from pages\n"); + "failed to get user memory for address 0x%llx\n", + addr); goto free_sgt; } @@ -1299,34 +1336,28 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, free_sgt: kfree(userptr->sgt); -put_framevec: - put_vaddr_frames(userptr->vec); -destroy_framevec: - frame_vector_destroy(userptr->vec); return rc; } /* - * hl_unpin_host_memory - unpins a chunk of host memory - * - * @hdev : pointer to the habanalabs device structure - * @userptr : pointer to hl_userptr structure + * hl_unpin_host_memory - unpins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @userptr: pointer to hl_userptr structure * * This function does the following: * - Unpins the physical pages related to the host memory * - Free the SG list */ -int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) { struct page **pages; hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sg(hdev, - userptr->sgt->sgl, - userptr->sgt->nents, - userptr->dir); + hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, + userptr->dir); pages = frame_vector_pages(userptr->vec); if (!IS_ERR(pages)) { @@ -1342,8 +1373,6 @@ int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) sg_free_table(userptr->sgt); kfree(userptr->sgt); - - return 0; } /* @@ -1627,7 +1656,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) if (phys_pg_list->asid == ctx->asid) { dev_dbg(hdev->dev, - "page list 0x%p of asid %d is still alive\n", + "page list 0x%px of asid %d is still alive\n", phys_pg_list, ctx->asid); atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem); -- cgit From 7b6e4ea0f7b16ab292df5e67f5d847929f8e4d3e Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:53 +0000 Subject: habanalabs: type specific MMU cache invalidation Add the ability to invalidate the necessary MMU cache only. This ability is a prerequisite for future ASICs support. Note that in Goya ASIC, a single cache is used for both host/DRAM mappings and hence this patch should not have any effect on current behavior. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 6 ++++-- drivers/misc/habanalabs/habanalabs.h | 11 ++++++----- drivers/misc/habanalabs/memory.c | 4 ++-- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 9712122d6cb1..3c22fb96a26f 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2463,7 +2463,8 @@ int goya_mmu_init(struct hl_device *hdev) WREG32_AND(mmSTLB_STLB_FEATURE_EN, (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, + VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK); WREG32(mmMMU_MMU_ENABLE, 1); WREG32(mmMMU_SPI_MASK, 0xF); @@ -4845,7 +4846,8 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid); } -static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard) +static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, + u32 flags) { struct goya_device *goya = hdev->asic_specific; u32 status, timeout_usec; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 78aef59e690b..36d05c32f7ec 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -114,8 +114,8 @@ struct hw_queue_properties { * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address. */ enum vm_type_t { - VM_TYPE_USERPTR, - VM_TYPE_PHYS_PACK + VM_TYPE_USERPTR = 0x1, + VM_TYPE_PHYS_PACK = 0x2 }; /** @@ -483,8 +483,8 @@ enum hl_pll_frequency { * @get_events_stat: retrieve event queue entries histogram. * @read_pte: read MMU page table entry from DRAM. * @write_pte: write MMU page table entry to DRAM. - * @mmu_invalidate_cache: flush MMU STLB cache, either with soft (L1 only) or - * hard (L0 & L1) flush. + * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft + * (L1 only) or hard (L0 & L1) flush. * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. @@ -565,7 +565,8 @@ struct hl_asic_funcs { u32 *size); u64 (*read_pte)(struct hl_device *hdev, u64 addr); void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val); - void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard); + void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard, + u32 flags); void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 8ade9886a5a7..12db6609da27 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -944,7 +944,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto map_err; } - hdev->asic_funcs->mmu_invalidate_cache(hdev, false); + hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); mutex_unlock(&ctx->mmu_lock); @@ -1060,7 +1060,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, *vm_type); mutex_unlock(&ctx->mmu_lock); -- cgit From 30919edef243e9dc91a3c65e5b1059d481e597e9 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:54 +0000 Subject: habanalabs: re-factor MMU masks and documentation Some cosmetics around the MMU code to make it more self-explanatory. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/debugfs.c | 6 ++-- .../habanalabs/include/hw_ip/mmu/mmu_general.h | 6 ++-- drivers/misc/habanalabs/mmu.c | 36 +++++++++++----------- 3 files changed, 23 insertions(+), 25 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 87f37ac31ccd..1e1fa619a225 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -345,7 +345,7 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_next_hop_addr(u64 curr_pte) { if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & PHYS_ADDR_MASK; + return curr_pte & HOP_PHYS_ADDR_MASK; else return ULLONG_MAX; } @@ -535,7 +535,7 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, { struct hl_ctx *ctx = hdev->compute_ctx; u64 hop_addr, hop_pte_addr, hop_pte; - u64 offset_mask = HOP4_MASK | OFFSET_MASK; + u64 offset_mask = HOP4_MASK | FLAGS_MASK; int rc = 0; if (!ctx) { @@ -579,7 +579,7 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - offset_mask = OFFSET_MASK; + offset_mask = FLAGS_MASK; } if (!(hop_pte & PAGE_PRESENT_MASK)) diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index 71ea3c3e8ba3..74a5502b8c4e 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -17,13 +17,12 @@ #define PAGE_PRESENT_MASK 0x0000000000001ull #define SWAP_OUT_MASK 0x0000000000004ull #define LAST_MASK 0x0000000000800ull -#define PHYS_ADDR_MASK 0xFFFFFFFFFFFFF000ull #define HOP0_MASK 0x3000000000000ull #define HOP1_MASK 0x0FF8000000000ull #define HOP2_MASK 0x0007FC0000000ull #define HOP3_MASK 0x000003FE00000ull #define HOP4_MASK 0x00000001FF000ull -#define OFFSET_MASK 0x0000000000FFFull +#define FLAGS_MASK 0x0000000000FFFull #define HOP0_SHIFT 48 #define HOP1_SHIFT 39 @@ -31,8 +30,7 @@ #define HOP3_SHIFT 21 #define HOP4_SHIFT 12 -#define PTE_PHYS_ADDR_SHIFT 12 -#define PTE_PHYS_ADDR_MASK ~OFFSET_MASK +#define HOP_PHYS_ADDR_MASK (~FLAGS_MASK) #define HL_PTE_SIZE sizeof(u64) #define HOP_TABLE_SIZE PAGE_SIZE_4KB diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 176c315836f1..21b4e3281b3e 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -105,8 +105,8 @@ static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) * clear the 12 LSBs and translate the shadow hop to its associated * physical hop, and add back the original 12 LSBs. */ - u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) | - (val & OFFSET_MASK); + u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | + (val & FLAGS_MASK); ctx->hdev->asic_funcs->write_pte(ctx->hdev, get_phys_addr(ctx, shadow_pte_addr), @@ -199,7 +199,7 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) { if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & PHYS_ADDR_MASK; + return curr_pte & HOP_PHYS_ADDR_MASK; else return ULLONG_MAX; } @@ -288,23 +288,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) } /* need only pte 0 in hops 0 and 1 */ - pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop0_addr, pte_val); - pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop1_addr, pte_val); get_pte(ctx, hop1_addr); hop2_pte_addr = hop2_addr; for (i = 0 ; i < num_of_hop3 ; i++) { - pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) | + pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop2_pte_addr, pte_val); get_pte(ctx, hop2_addr); hop2_pte_addr += HL_PTE_SIZE; } - pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) | + pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; for (i = 0 ; i < num_of_hop3 ; i++) { @@ -400,8 +400,6 @@ int hl_mmu_init(struct hl_device *hdev) if (!hdev->mmu_enable) return 0; - /* MMU H/W init was already done in device hw_init() */ - hdev->mmu_pgt_pool = gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); @@ -427,6 +425,8 @@ int hl_mmu_init(struct hl_device *hdev) goto err_pool_add; } + /* MMU H/W init will be done in device hw_init() */ + return 0; err_pool_add: @@ -450,10 +450,10 @@ void hl_mmu_fini(struct hl_device *hdev) if (!hdev->mmu_enable) return; + /* MMU H/W fini was already done in device hw_fini() */ + kvfree(hdev->mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_pgt_pool); - - /* MMU H/W fini will be done in device hw_fini() */ } /** @@ -584,7 +584,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - PTE_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (curr_pte == default_pte) { dev_err(hdev->dev, @@ -773,7 +773,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hdev->dram_default_page_mapping && is_dram_addr) { u64 default_pte = (prop->mmu_dram_default_page_addr & - PTE_PHYS_ADDR_MASK) | LAST_MASK | + HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (curr_pte != default_pte) { @@ -813,7 +813,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, goto err; } - curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK + curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK | PAGE_PRESENT_MASK; if (is_huge) @@ -823,25 +823,25 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hop1_new) { curr_pte = - (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop0_pte_addr, curr_pte); } if (hop2_new) { curr_pte = - (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop1_pte_addr, curr_pte); get_pte(ctx, hop1_addr); } if (hop3_new) { curr_pte = - (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop2_pte_addr, curr_pte); get_pte(ctx, hop2_addr); } if (!is_huge) { if (hop4_new) { - curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) | + curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; write_pte(ctx, hop3_pte_addr, curr_pte); get_pte(ctx, hop3_addr); -- cgit From 54bb67444ea3f388756c5955db52ef62eb4ba3b9 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:55 +0000 Subject: habanalabs: split MMU properties to PCI/DRAM Split the properties used for MMU mappings to DRAM and PCI (host) types. This is a prerequisite for future ASICs support. Note that in Goya ASIC, the PMMU and DMMU are the same (except of page sizes) as only one MMU mechanism is used for both of the mapping types. Hence this patch should not have any effect on current behavior. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/debugfs.c | 90 +++++++++---- drivers/misc/habanalabs/goya/goya.c | 17 +++ drivers/misc/habanalabs/habanalabs.h | 114 ++++++++++------ .../habanalabs/include/hw_ip/mmu/mmu_general.h | 1 - drivers/misc/habanalabs/memory.c | 45 ++++--- drivers/misc/habanalabs/mmu.c | 149 +++++++++++++-------- 6 files changed, 268 insertions(+), 148 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 1e1fa619a225..1cf75010a379 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -307,39 +307,51 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx) (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); } -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, + u64 virt_addr, u64 mask, u64 shift) { return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP0_MASK) >> HOP0_SHIFT); + ((virt_addr & mask) >> shift); } -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP1_MASK) >> HOP1_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, + mmu_specs->hop0_shift); } -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP2_MASK) >> HOP2_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, + mmu_specs->hop1_shift); } -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP3_MASK) >> HOP3_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, + mmu_specs->hop2_shift); } -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr) +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) { - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & HOP4_MASK) >> HOP4_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, + mmu_specs->hop3_shift); +} + +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, + mmu_specs->hop4_shift); } static inline u64 get_next_hop_addr(u64 curr_pte) @@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data) struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; struct hl_ctx *ctx; + bool is_dram_addr; u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, @@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data) return 0; } + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + mutex_lock(&ctx->mmu_lock); /* the following lookup is copied from unmap() in mmu.c */ hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); hop1_addr = get_next_hop_addr(hop0_pte); if (hop1_addr == ULLONG_MAX) goto not_mapped; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); hop2_addr = get_next_hop_addr(hop1_pte); if (hop2_addr == ULLONG_MAX) goto not_mapped; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); hop3_addr = get_next_hop_addr(hop2_pte); if (hop3_addr == ULLONG_MAX) goto not_mapped; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); if (!(hop3_pte & LAST_MASK)) { @@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data) if (hop4_addr == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); if (!(hop4_pte & PAGE_PRESENT_MASK)) goto not_mapped; @@ -534,41 +556,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u64 *phys_addr) { struct hl_ctx *ctx = hdev->compute_ctx; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop_addr, hop_pte_addr, hop_pte; u64 offset_mask = HOP4_MASK | FLAGS_MASK; int rc = 0; + bool is_dram_addr; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + mutex_lock(&ctx->mmu_lock); /* hop 0 */ hop_addr = get_hop0_addr(ctx); - hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 1 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 2 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); /* hop 3 */ hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); if (!(hop_pte & LAST_MASK)) { @@ -576,7 +607,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, hop_addr = get_next_hop_addr(hop_pte); if (hop_addr == ULLONG_MAX) goto not_mapped; - hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); + hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, + virt_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); offset_mask = FLAGS_MASK; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3c22fb96a26f..3294a6a92f75 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -380,6 +380,23 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->dmmu.hop0_shift = HOP0_SHIFT; + prop->dmmu.hop1_shift = HOP1_SHIFT; + prop->dmmu.hop2_shift = HOP2_SHIFT; + prop->dmmu.hop3_shift = HOP3_SHIFT; + prop->dmmu.hop4_shift = HOP4_SHIFT; + prop->dmmu.hop0_mask = HOP0_MASK; + prop->dmmu.hop1_mask = HOP1_MASK; + prop->dmmu.hop2_mask = HOP2_MASK; + prop->dmmu.hop3_mask = HOP3_MASK; + prop->dmmu.hop4_mask = HOP4_MASK; + prop->dmmu.huge_page_size = PAGE_SIZE_2MB; + + /* No difference between PMMU and DMMU except of page size */ + memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); + prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->pmmu.page_size = PAGE_SIZE_4KB; + prop->va_space_host_start_address = VA_HOST_SPACE_START; prop->va_space_host_end_address = VA_HOST_SPACE_END; prop->va_space_dram_start_address = VA_DDR_SPACE_START; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 36d05c32f7ec..00c949f4ccd1 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -130,6 +130,36 @@ enum hl_device_hw_state { HL_DEVICE_HW_STATE_DIRTY }; +/** + * struct hl_mmu_properties - ASIC specific MMU address translation properties. + * @hop0_shift: shift of hop 0 mask. + * @hop1_shift: shift of hop 1 mask. + * @hop2_shift: shift of hop 2 mask. + * @hop3_shift: shift of hop 3 mask. + * @hop4_shift: shift of hop 4 mask. + * @hop0_mask: mask to get the PTE address in hop 0. + * @hop1_mask: mask to get the PTE address in hop 1. + * @hop2_mask: mask to get the PTE address in hop 2. + * @hop3_mask: mask to get the PTE address in hop 3. + * @hop4_mask: mask to get the PTE address in hop 4. + * @page_size: default page size used to allocate memory. + * @huge_page_size: page size used to allocate memory with huge pages. + */ +struct hl_mmu_properties { + u64 hop0_shift; + u64 hop1_shift; + u64 hop2_shift; + u64 hop3_shift; + u64 hop4_shift; + u64 hop0_mask; + u64 hop1_mask; + u64 hop2_mask; + u64 hop3_mask; + u64 hop4_mask; + u32 page_size; + u32 huge_page_size; +}; + /** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. @@ -137,6 +167,8 @@ enum hl_device_hw_state { * available sensors. * @uboot_ver: F/W U-boot version. * @preboot_ver: F/W Preboot version. + * @dmmu: DRAM MMU address translation properties. + * @pmmu: PCI (host) MMU address translation properties. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -173,53 +205,55 @@ enum hl_device_hw_state { * @psoc_pci_pll_nf: PCI PLL NF value. * @psoc_pci_pll_od: PCI PLL OD value. * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. - * @completion_queues_count: number of completion queues. * @high_pll: high PLL frequency used by the device. * @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool. * @tpc_enabled_mask: which TPCs are enabled. + * @completion_queues_count: number of completion queues. */ struct asic_fixed_properties { struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES]; - struct armcp_info armcp_info; - char uboot_ver[VERSION_MAX_LEN]; - char preboot_ver[VERSION_MAX_LEN]; - u64 sram_base_address; - u64 sram_end_address; - u64 sram_user_base_address; - u64 dram_base_address; - u64 dram_end_address; - u64 dram_user_base_address; - u64 dram_size; - u64 dram_pci_bar_size; - u64 max_power_default; - u64 va_space_host_start_address; - u64 va_space_host_end_address; - u64 va_space_dram_start_address; - u64 va_space_dram_end_address; - u64 dram_size_for_default_page_mapping; - u64 pcie_dbi_base_address; - u64 pcie_aux_dbi_reg_addr; - u64 mmu_pgt_addr; - u64 mmu_dram_default_page_addr; - u32 mmu_pgt_size; - u32 mmu_pte_size; - u32 mmu_hop_table_size; - u32 mmu_hop0_tables_total_size; - u32 dram_page_size; - u32 cfg_size; - u32 sram_size; - u32 max_asid; - u32 num_of_events; - u32 psoc_pci_pll_nr; - u32 psoc_pci_pll_nf; - u32 psoc_pci_pll_od; - u32 psoc_pci_pll_div_factor; - u32 high_pll; - u32 cb_pool_cb_cnt; - u32 cb_pool_cb_size; - u8 completion_queues_count; - u8 tpc_enabled_mask; + struct armcp_info armcp_info; + char uboot_ver[VERSION_MAX_LEN]; + char preboot_ver[VERSION_MAX_LEN]; + struct hl_mmu_properties dmmu; + struct hl_mmu_properties pmmu; + u64 sram_base_address; + u64 sram_end_address; + u64 sram_user_base_address; + u64 dram_base_address; + u64 dram_end_address; + u64 dram_user_base_address; + u64 dram_size; + u64 dram_pci_bar_size; + u64 max_power_default; + u64 va_space_host_start_address; + u64 va_space_host_end_address; + u64 va_space_dram_start_address; + u64 va_space_dram_end_address; + u64 dram_size_for_default_page_mapping; + u64 pcie_dbi_base_address; + u64 pcie_aux_dbi_reg_addr; + u64 mmu_pgt_addr; + u64 mmu_dram_default_page_addr; + u32 mmu_pgt_size; + u32 mmu_pte_size; + u32 mmu_hop_table_size; + u32 mmu_hop0_tables_total_size; + u32 dram_page_size; + u32 cfg_size; + u32 sram_size; + u32 max_asid; + u32 num_of_events; + u32 psoc_pci_pll_nr; + u32 psoc_pci_pll_nf; + u32 psoc_pci_pll_od; + u32 psoc_pci_pll_div_factor; + u32 high_pll; + u32 cb_pool_cb_cnt; + u32 cb_pool_cb_size; + u8 tpc_enabled_mask; + u8 completion_queues_count; }; /** diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index 74a5502b8c4e..a6851a9d3f03 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -12,7 +12,6 @@ #define PAGE_SHIFT_2MB 21 #define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB) #define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB) -#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1)) #define PAGE_PRESENT_MASK 0x0000000000001ull #define SWAP_OUT_MASK 0x0000000000004ull diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 12db6609da27..cce6bdb6e655 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -13,7 +13,6 @@ #include #include -#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT) #define HL_MMU_DEBUG 0 /* @@ -516,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev, * - Return the start address of the virtual block */ static u64 get_va_block(struct hl_device *hdev, - struct hl_va_range *va_range, u64 size, u64 hint_addr, - bool is_userptr) + struct hl_va_range *va_range, u64 size, u64 hint_addr, + bool is_userptr) { struct hl_vm_va_block *va_block, *new_va_block = NULL; u64 valid_start, valid_size, prev_start, prev_end, page_mask, @@ -525,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev, u32 page_size; bool add_prev = false; - if (is_userptr) { + if (is_userptr) /* * We cannot know if the user allocated memory with huge pages * or not, hence we continue with the biggest possible * granularity. */ - page_size = PAGE_SIZE_2MB; - page_mask = PAGE_MASK_2MB; - } else { - page_size = hdev->asic_prop.dram_page_size; - page_mask = ~((u64)page_size - 1); - } + page_size = hdev->asic_prop.pmmu.huge_page_size; + else + page_size = hdev->asic_prop.dmmu.page_size; + + page_mask = ~((u64)page_size - 1); mutex_lock(&va_range->lock); @@ -558,7 +556,6 @@ static u64 get_va_block(struct hl_device *hdev, if (valid_size >= size && (!new_va_block || valid_size < res_valid_size)) { - new_va_block = va_block; res_valid_start = valid_start; res_valid_size = valid_size; @@ -629,7 +626,7 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) /* * init_phys_pg_pack_from_userptr - initialize physical page pack from host * memory - * @asid: current context ASID + * @ctx: current context * @userptr: userptr to initialize from * @pphys_pg_pack: result pointer * @@ -638,16 +635,20 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) * - Create a physical page pack from the physical pages related to the given * virtual block */ -static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, +static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, + struct hl_userptr *userptr, struct hl_vm_phys_pg_pack **pphys_pg_pack) { + struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu; struct hl_vm_phys_pg_pack *phys_pg_pack; struct scatterlist *sg; dma_addr_t dma_addr; u64 page_mask, total_npages; - u32 npages, page_size = PAGE_SIZE; + u32 npages, page_size = PAGE_SIZE, + huge_page_size = mmu_prop->huge_page_size; bool first = true, is_huge_page_opt = true; int rc, i, j; + u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); if (!phys_pg_pack) @@ -655,7 +656,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, phys_pg_pack->vm_type = userptr->vm_type; phys_pg_pack->created_from_userptr = true; - phys_pg_pack->asid = asid; + phys_pg_pack->asid = ctx->asid; atomic_set(&phys_pg_pack->mapping_cnt, 1); /* Only if all dma_addrs are aligned to 2MB and their @@ -670,14 +671,14 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, total_npages += npages; - if ((npages % PGS_IN_2MB_PAGE) || - (dma_addr & (PAGE_SIZE_2MB - 1))) + if ((npages % pgs_in_huge_page) || + (dma_addr & (huge_page_size - 1))) is_huge_page_opt = false; } if (is_huge_page_opt) { - page_size = PAGE_SIZE_2MB; - total_npages /= PGS_IN_2MB_PAGE; + page_size = huge_page_size; + do_div(total_npages, pgs_in_huge_page); } page_mask = ~(((u64) page_size) - 1); @@ -709,7 +710,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, dma_addr += page_size; if (is_huge_page_opt) - npages -= PGS_IN_2MB_PAGE; + npages -= pgs_in_huge_page; else npages--; } @@ -872,7 +873,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return rc; } - rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr, + rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack); if (rc) { dev_err(hdev->dev, @@ -1029,7 +1030,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; userptr = hnode->ptr; - rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr, + rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack); if (rc) { dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 21b4e3281b3e..3a7f8ff19eb2 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -171,29 +171,44 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, ((virt_addr & mask) >> shift); } -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, + mmu_prop->hop0_shift); } -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, + mmu_prop->hop1_shift); } -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, + mmu_prop->hop2_shift); } -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, + mmu_prop->hop3_shift); } -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) { - return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT); + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, + mmu_prop->hop4_shift); } static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) @@ -513,24 +528,23 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) mutex_destroy(&ctx->mmu_lock); } -static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) +static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, hop3_addr = 0, hop3_pte_addr = 0, hop4_addr = 0, hop4_pte_addr = 0, curr_pte; - bool is_dram_addr, is_huge, clear_hop3 = true; + bool is_huge, clear_hop3 = true; - is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; @@ -539,7 +553,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop1_addr == ULLONG_MAX) goto not_mapped; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; @@ -548,7 +562,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop2_addr == ULLONG_MAX) goto not_mapped; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; @@ -557,7 +571,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop3_addr == ULLONG_MAX) goto not_mapped; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; @@ -575,7 +589,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) if (hop4_addr == ULLONG_MAX) goto not_mapped; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; @@ -667,25 +682,36 @@ not_mapped: int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 real_virt_addr; u32 real_page_size, npages; int i, rc; + bool is_dram_addr; if (!hdev->mmu_enable) return 0; + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * The H/W handles mapping of 4KB/2MB page. Hence if the host page size - * is bigger, we break it to sub-pages and unmap them separately. + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and unmap them separately. */ - if ((page_size % PAGE_SIZE_2MB) == 0) { - real_page_size = PAGE_SIZE_2MB; - } else if ((page_size % PAGE_SIZE_4KB) == 0) { - real_page_size = PAGE_SIZE_4KB; + if ((page_size % mmu_prop->huge_page_size) == 0) { + real_page_size = mmu_prop->huge_page_size; + } else if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not 4KB nor 2MB aligned, can't unmap\n", - page_size); + "page size of %u is not %uKB nor %uMB aligned, can't unmap\n", + page_size, + mmu_prop->page_size >> 10, + mmu_prop->huge_page_size >> 20); return -EFAULT; } @@ -694,7 +720,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) real_virt_addr = virt_addr; for (i = 0 ; i < npages ; i++) { - rc = _hl_mmu_unmap(ctx, real_virt_addr); + rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr); if (rc) return rc; @@ -705,10 +731,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) } static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, - u32 page_size) + u32 page_size, bool is_dram_addr) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 hop0_addr = 0, hop0_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0, @@ -716,21 +743,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, hop4_addr = 0, hop4_pte_addr = 0, curr_pte = 0; bool hop1_new = false, hop2_new = false, hop3_new = false, - hop4_new = false, is_huge, is_dram_addr; + hop4_new = false, is_huge; int rc = -ENOMEM; + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * This mapping function can map a 4KB/2MB page. For 2MB page there are - * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB - * pages only but user memory could have been allocated with one of the - * two page sizes. Since this is a common code for all the three cases, - * we need this hugs page check. + * This mapping function can map a page or a huge page. For huge page + * there are only 3 hops rather than 4. Currently the DRAM allocation + * uses huge pages only but user memory could have been allocated with + * one of the two page sizes. Since this is a common code for all the + * three cases, we need this hugs page check. */ - is_huge = page_size == PAGE_SIZE_2MB; - - is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + is_huge = page_size == mmu_prop->huge_page_size; if (is_dram_addr && !is_huge) { dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); @@ -738,28 +763,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, } hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); if (hop1_addr == ULLONG_MAX) goto err; - hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); if (hop2_addr == ULLONG_MAX) goto err; - hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); if (hop3_addr == ULLONG_MAX) goto err; - hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; if (!is_huge) { @@ -767,7 +792,8 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (hop4_addr == ULLONG_MAX) goto err; - hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; } @@ -890,25 +916,36 @@ err: int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; u64 real_virt_addr, real_phys_addr; u32 real_page_size, npages; int i, rc, mapped_cnt = 0; + bool is_dram_addr; if (!hdev->mmu_enable) return 0; + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->va_space_dram_start_address, + prop->va_space_dram_end_address); + + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + /* - * The H/W handles mapping of 4KB/2MB page. Hence if the host page size - * is bigger, we break it to sub-pages and map them separately. + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and map them separately. */ - if ((page_size % PAGE_SIZE_2MB) == 0) { - real_page_size = PAGE_SIZE_2MB; - } else if ((page_size % PAGE_SIZE_4KB) == 0) { - real_page_size = PAGE_SIZE_4KB; + if ((page_size % mmu_prop->huge_page_size) == 0) { + real_page_size = mmu_prop->huge_page_size; + } else if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not 4KB nor 2MB aligned, can't map\n", - page_size); + "page size of %u is not %dKB nor %dMB aligned, can't unmap\n", + page_size, + mmu_prop->page_size >> 10, + mmu_prop->huge_page_size >> 20); return -EFAULT; } @@ -923,7 +960,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) for (i = 0 ; i < npages ; i++) { rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr, - real_page_size); + real_page_size, is_dram_addr); if (rc) goto err; @@ -937,7 +974,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) err: real_virt_addr = virt_addr; for (i = 0 ; i < mapped_cnt ; i++) { - if (_hl_mmu_unmap(ctx, real_virt_addr)) + if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr)) dev_warn_ratelimited(hdev->dev, "failed to unmap va: 0x%llx\n", real_virt_addr); -- cgit From bc75d799f9531fe112f7d78a5547ad549293fb7e Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:56 +0000 Subject: habanalabs: prevent read/write from/to the device during hard reset During hard reset we should not access the device except of necessary reset operations because the device might be stuck or unresponsive. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3294a6a92f75..2935e84fe7d8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4870,7 +4870,8 @@ static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 status, timeout_usec; int rc; - if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + if (!(goya->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) return; /* no need in L1 only invalidation in Goya */ @@ -4909,7 +4910,8 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev, u32 status, timeout_usec, inv_data, pi; int rc; - if (!(goya->hw_cap_initialized & HW_CAP_MMU)) + if (!(goya->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) return; /* no need in L1 only invalidation in Goya */ -- cgit From 1b98d8b23f29e5ff82fe84ca0c8acffaa971b9d6 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:57 +0000 Subject: habanalabs: optimize MMU unmap Reduce context close time by skipping hash table lookup if possible in order to avoid hard reset with open contexts. Reset with open contexts can potentially lead to a kernel crash as the generic pool of the MMU hops is destroyed while it is not empty because some unmap operations are not done. This commit affect mainly when running on simulator. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/mmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 3a7f8ff19eb2..6262b26e2086 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -25,10 +25,9 @@ static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) return pgt_info; } -static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) { struct hl_device *hdev = ctx->hdev; - struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr, hdev->asic_prop.mmu_hop_table_size); @@ -37,6 +36,13 @@ static void free_hop(struct hl_ctx *ctx, u64 hop_addr) kfree(pgt_info); } +static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); + + _free_hop(ctx, pgt_info); +} + static u64 alloc_hop(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; @@ -159,7 +165,7 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) */ num_of_ptes_left = pgt_info->num_of_ptes; if (!num_of_ptes_left) - free_hop(ctx, hop_addr); + _free_hop(ctx, pgt_info); return num_of_ptes_left; } @@ -516,13 +522,14 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) dram_default_mapping_fini(ctx); if (!hash_empty(ctx->mmu_shadow_hash)) - dev_err(hdev->dev, "ctx is freed while it has pgts in use\n"); + dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", + ctx->asid); hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { - dev_err(hdev->dev, + dev_err_ratelimited(hdev->dev, "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); - free_hop(ctx, pgt_info->shadow_addr); + _free_hop(ctx, pgt_info); } mutex_destroy(&ctx->mmu_lock); -- cgit From 71c5e55e7c077fa17c42fbda91a8d14322825c44 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:57 +0000 Subject: habanalabs: skip VA block list update in reset flow Reduce context close time by skipping the VA block free list update in order to avoid hard reset with open contexts. Reset with open contexts can potentially lead to a kernel crash as the generic pool of the MMU hops is destroyed while it is not empty because some unmap operations are not done. The commit affect mainly when running on simulator. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index cce6bdb6e655..e6412e13145e 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -993,17 +993,19 @@ init_page_pack_err: * * @ctx : current context * @vaddr : device virtual address to unmap + * @ctx_free : true if in context free flow, false otherwise. * * This function does the following: * - Unmap the physical pages related to the given virtual address * - return the device virtual block to the virtual block list */ -static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) +static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) { struct hl_device *hdev = ctx->hdev; struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; struct hl_vm_hash_node *hnode = NULL; struct hl_userptr *userptr = NULL; + struct hl_va_range *va_range; enum vm_type_t *vm_type; bool is_userptr; int rc; @@ -1029,6 +1031,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; + va_range = &ctx->host_va_range; userptr = hnode->ptr; rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack); @@ -1040,6 +1043,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) } } else if (*vm_type == VM_TYPE_PHYS_PACK) { is_userptr = false; + va_range = &ctx->dram_va_range; phys_pg_pack = hnode->ptr; } else { dev_warn(hdev->dev, @@ -1065,12 +1069,18 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) mutex_unlock(&ctx->mmu_lock); - if (add_va_block(hdev, - is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, - vaddr, - vaddr + phys_pg_pack->total_size - 1)) - dev_warn(hdev->dev, "add va block failed for vaddr: 0x%llx\n", - vaddr); + /* + * No point in maintaining the free VA block list if the context is + * closing as the list will be freed anyway + */ + if (!ctx_free) { + rc = add_va_block(hdev, va_range, vaddr, + vaddr + phys_pg_pack->total_size - 1); + if (rc) + dev_warn(hdev->dev, + "add va block failed for vaddr: 0x%llx\n", + vaddr); + } atomic_dec(&phys_pg_pack->mapping_cnt); kfree(hnode); @@ -1202,8 +1212,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) break; case HL_MEM_OP_UNMAP: - rc = unmap_device_va(ctx, - args->in.unmap.device_virt_addr); + rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, + false); break; default: @@ -1650,7 +1660,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) dev_dbg(hdev->dev, "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", hnode->vaddr, ctx->asid); - unmap_device_va(ctx, hnode->vaddr); + unmap_device_va(ctx, hnode->vaddr, true); } spin_lock(&vm->idr_lock); -- cgit From bea84c4d67e5efd1078ef234ef1304a4d1788008 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:58 +0000 Subject: habanalabs: invalidate MMU cache only once Reduce context close time by performing MMU cache invalidation once at the end of the unmap loop rather in each iteration, in order to avoid hard reset with open contexts. Reset with open contexts can potentially lead to a kernel crash as the generic pool of the MMU hops is destroyed while it is not empty because some unmap operations are not done. The commit affect mainly when running on simulator. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index e6412e13145e..47e38c6f2d64 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -1065,7 +1065,13 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); - hdev->asic_funcs->mmu_invalidate_cache(hdev, true, *vm_type); + /* + * During context free this function is called in a loop to clean all + * the context mappings. Hence the cache invalidation can be called once + * at the loop end rather than for each iteration + */ + if (!ctx_free) + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, *vm_type); mutex_unlock(&ctx->mmu_lock); @@ -1663,6 +1669,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) unmap_device_va(ctx, hnode->vaddr, true); } + /* invalidate the cache once after the unmapping loop */ + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); + spin_lock(&vm->idr_lock); idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) if (phys_pg_list->asid == ctx->asid) { -- cgit From e604f551cdce07e45b6ca34eab58648185b3fba0 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Nov 2019 18:23:59 +0000 Subject: habanalabs: remove unnecessary checks Now that the VA block free list is not updated on context close in order to optimize this flow, no need in the sanity checks of the list contents as these will fail for sure. In addition, remove the "context closing with VA in use" print during hard reset as this situation is a side effect of the failure that caused the hard reset. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 40 +++++++++------------------------------- 1 file changed, 9 insertions(+), 31 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 47e38c6f2d64..6c72cb4eff54 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -544,7 +544,6 @@ static u64 get_va_block(struct hl_device *hdev, /* calc the first possible aligned addr */ valid_start = va_block->start; - if (valid_start & (page_size - 1)) { valid_start &= page_mask; valid_start += page_size; @@ -1588,43 +1587,16 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) * @hdev : pointer to the habanalabs structure * va_range : pointer to virtual addresses range * - * This function initializes the following: - * - Checks that the given range contains the whole initial range + * This function does the following: * - Frees the virtual addresses block list and its lock */ static void hl_va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range) { - struct hl_vm_va_block *va_block; - - if (list_empty(&va_range->list)) { - dev_warn(hdev->dev, - "va list should not be empty on cleanup!\n"); - goto out; - } - - if (!list_is_singular(&va_range->list)) { - dev_warn(hdev->dev, - "va list should not contain multiple blocks on cleanup!\n"); - goto free_va_list; - } - - va_block = list_first_entry(&va_range->list, typeof(*va_block), node); - - if (va_block->start != va_range->start_addr || - va_block->end != va_range->end_addr) { - dev_warn(hdev->dev, - "wrong va block on cleanup, from 0x%llx to 0x%llx\n", - va_block->start, va_block->end); - goto free_va_list; - } - -free_va_list: mutex_lock(&va_range->lock); clear_va_list_locked(hdev, &va_range->list); mutex_unlock(&va_range->lock); -out: mutex_destroy(&va_range->lock); } @@ -1659,8 +1631,14 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) hl_debugfs_remove_ctx_mem_hash(hdev, ctx); - if (!hash_empty(ctx->mem_hash)) - dev_notice(hdev->dev, "ctx is freed while it has va in use\n"); + /* + * Clearly something went wrong on hard reset so no point in printing + * another side effect error + */ + if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) + dev_notice(hdev->dev, + "ctx %d is freed while it has va in use\n", + ctx->asid); hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { dev_dbg(hdev->dev, -- cgit From 7fbdc12b91110d21e15b84d5acf8402ae608d8c1 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 16 Nov 2019 12:24:19 +0200 Subject: habanalabs: remove prints on successful device initialization Successful device initialization is mentioned in kernel log with the message "Successfully added device to habanalabs driver". There is no point of spamming the log with additional messages about successful queue testing, which are implied by the above mentioned message. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/firmware_if.c | 5 +---- drivers/misc/habanalabs/goya/goya.c | 3 --- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index ea2ca67fbfbf..f5bd03171dac 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -143,10 +143,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); if (!rc) { - if (result == ARMCP_PACKET_FENCE_VAL) - dev_info(hdev->dev, - "queue test on CPU queue succeeded\n"); - else + if (result != ARMCP_PACKET_FENCE_VAL) dev_err(hdev->dev, "CPU queue test failed (0x%08lX)\n", result); } else { diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 2935e84fe7d8..70bdaeffb6ce 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3006,9 +3006,6 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", hw_queue_id, (unsigned long long) fence_dma_addr, tmp); rc = -EIO; - } else { - dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n", - hw_queue_id); } free_pkt: -- cgit From da1342a0eec038dc466742e662218f6be349d1b7 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 16 Nov 2019 12:26:30 +0200 Subject: habanalabs: use defines for F/W files Make the code more concise and maintainable by using defines for the F/W files. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/goya/goya.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 70bdaeffb6ce..c8d16aa4382c 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -72,6 +72,9 @@ * */ +#define GOYA_UBOOT_FW_FILE "habanalabs/goya/goya-u-boot.bin" +#define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb" + #define GOYA_MMU_REGS_NUM 63 #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ @@ -2163,13 +2166,11 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) */ static int goya_push_uboot_to_device(struct hl_device *hdev) { - char fw_name[200]; void __iomem *dst; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin"); dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; - return hl_fw_push_fw_to_device(hdev, fw_name, dst); + return hl_fw_push_fw_to_device(hdev, GOYA_UBOOT_FW_FILE, dst); } /* @@ -2182,13 +2183,11 @@ static int goya_push_uboot_to_device(struct hl_device *hdev) */ static int goya_push_linux_to_device(struct hl_device *hdev) { - char fw_name[200]; void __iomem *dst; - snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb"); dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; - return hl_fw_push_fw_to_device(hdev, fw_name, dst); + return hl_fw_push_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst); } static int goya_pldm_init_cpu(struct hl_device *hdev) -- cgit From e16ee4103770acf365372886eac7c750017c918e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 16 Nov 2019 12:29:50 +0200 Subject: habanalabs: make code more concise Instead of doing if inside if, just write them with && operator. Signed-off-by: Oded Gabbay Reviewed-by: Omer Shpigelman --- drivers/misc/habanalabs/habanalabs_ioctl.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 02d7491fa28f..5d9c269d99db 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -60,7 +60,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; hw_ip.sram_size = prop->sram_size - sram_kmd_size; hw_ip.dram_size = prop->dram_size - dram_kmd_size; - if (hw_ip.dram_size > 0) + if (hw_ip.dram_size > PAGE_SIZE) hw_ip.dram_enabled = 1; hw_ip.num_of_events = prop->num_of_events; @@ -184,17 +184,14 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) goto out; } - if (output) { - if (copy_to_user((void __user *) (uintptr_t) args->output_ptr, - output, - args->output_size)) { - dev_err(hdev->dev, - "copy to user failed in debug ioctl\n"); - rc = -EFAULT; - goto out; - } + if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr, + output, args->output_size)) { + dev_err(hdev->dev, "copy to user failed in debug ioctl\n"); + rc = -EFAULT; + goto out; } + out: kfree(params); kfree(output); @@ -434,9 +431,8 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, retcode = func(hpriv, kdata); - if (cmd & IOC_OUT) - if (copy_to_user((void __user *)arg, kdata, usize)) - retcode = -EFAULT; + if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize)) + retcode = -EFAULT; out_err: if (retcode) -- cgit From 52c01b0137193ab0c9282ec8d09c6338446e6e9f Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Sun, 3 Nov 2019 16:26:44 +0200 Subject: habanalabs: expose reset counters via existing INFO IOCTL Expose both soft and hard reset counts via INFO IOCTL. This will allow system management applications to easily check if the device has undergone reset. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/habanalabs_ioctl.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c index 5d9c269d99db..6474b868ef27 100644 --- a/drivers/misc/habanalabs/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/habanalabs_ioctl.c @@ -242,6 +242,22 @@ static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; } +static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_reset_count reset_count = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + reset_count.hard_reset_cnt = hdev->hard_reset_cnt; + reset_count.soft_reset_cnt = hdev->soft_reset_cnt; + + return copy_to_user(out, &reset_count, + min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -260,6 +276,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_DEVICE_STATUS: return device_status_info(hdev, args); + case HL_INFO_RESET_COUNT: + return get_reset_count(hdev, args); + default: break; } -- cgit From 1af69d30c41d0b0f15d8be80c100cefaa909816c Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 17 Nov 2019 17:35:49 +0200 Subject: habanalabs: make the reset code more consistent In the hl_device_reset we ask about the hard_reset argument when we want to differentiate between soft and hard reset, except for three places where we use "from_hard_reset_thread". Replace one of those locations with the hard_reset argument as it is guaranteed that if we reached to that line in the code during hard_reset, it is from a kernel thread. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 2f5a4da707e7..80205d8584ce 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -891,7 +891,7 @@ again: * can't really exit until all its CSs are done, which is what we * do in cs rollback */ - if (from_hard_reset_thread) + if (hard_reset) device_kill_open_processes(hdev); /* Release kernel context */ -- cgit From 55f6d680970ea922d4ee23d5ac88d3a8046221fb Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 17 Nov 2019 17:41:57 +0200 Subject: habanalabs: flush EQ workers in hard reset During hard-reset, there can be multiple events received from the H/W. For each event, the driver opens a worker thread to handle it. For some of the events, the driver will read/write registers in the code that handles the event. In case of hard-reset, we must prevent reads/writes to the registers during the reset operation because the device might get stuck if that happens. Therefore, flush the EQ workers before resetting the device (in hard-reset only). Additional events won't arrive as we synced and disabled the interrupts. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/device.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 80205d8584ce..b155e9549076 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -887,13 +887,19 @@ again: /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev); - /* Kill processes here after CS rollback. This is because the process - * can't really exit until all its CSs are done, which is what we - * do in cs rollback - */ - if (hard_reset) + if (hard_reset) { + /* Kill processes here after CS rollback. This is because the + * process can't really exit until all its CSs are done, which + * is what we do in cs rollback + */ device_kill_open_processes(hdev); + /* Flush the Event queue workers to make sure no other thread is + * reading or writing to registers during the reset + */ + flush_workqueue(hdev->eq_wq); + } + /* Release kernel context */ if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1)) hdev->kernel_ctx = NULL; -- cgit From 5feccddcf9922ee3c25587d5e609bf58503ad93e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 18 Nov 2019 09:41:08 +0200 Subject: habanalabs: add more protection of device during reset Prevent accesses to the device (register read/write) from debugfs entries during reset as that can cause the device to get stuck. Signed-off-by: Oded Gabbay Reviewed-by: Tomer Tayar --- drivers/misc/habanalabs/debugfs.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers/misc') diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 1cf75010a379..20413e350343 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -528,6 +528,12 @@ static int engines_show(struct seq_file *s, void *data) struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't check device idle during reset\n"); + return 0; + } + hdev->asic_funcs->is_device_idle(hdev, NULL, s); return 0; @@ -640,6 +646,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, u32 val; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } + if (*ppos) return 0; @@ -669,6 +680,11 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, u32 value; ssize_t rc; + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + rc = kstrtouint_from_user(buf, count, 16, &value); if (rc) return rc; -- cgit