diff options
author | Gustavo Padovan <gustavo.padovan@collabora.com> | 2018-06-20 13:22:22 -0300 |
---|---|---|
committer | Gustavo Padovan <gustavo.padovan@collabora.com> | 2018-06-20 13:22:22 -0300 |
commit | d98c71dadc2d0debdb80beb5a478baf1e6f98758 (patch) | |
tree | bf873c28d3acced1814f8b2dba4ae69d2ed77333 /drivers | |
parent | d67b6a2065076d763c7df626b8c54f16038ad862 (diff) | |
parent | daf0678c2036c918f01e4aa6035629d2debc2f30 (diff) |
Merge drm-upstream/drm-next into drm-misc-next
We got a few conflicts in drm_atomic.c after merging the DRM writeback support,
now we need a backmerge to unlock develop development on drm-misc-next.
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Diffstat (limited to 'drivers')
1170 files changed, 54832 insertions, 22864 deletions
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 1ff17799769d..6389c88b3500 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -698,7 +698,7 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context), deadline, &online, NULL); @@ -724,7 +724,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, bool online; int rc; - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* clear D2H reception area to properly wait for D2H FIS */ ata_tf_init(link->device, &tf); @@ -788,7 +788,7 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); for (i = 0; i < 2; i++) { u16 val; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 4356ef1d28a8..824bd399f02e 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -350,7 +350,6 @@ struct ahci_host_priv { u32 em_msg_type; /* EM message type */ bool got_runtime_pm; /* Did we do pm_runtime_get? */ struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ - struct reset_control *rsts; /* Optional */ struct regulator **target_pwrs; /* Optional */ /* * If platform uses PHYs. There is a 1:1 relation between the port number and @@ -366,6 +365,13 @@ struct ahci_host_priv { * be overridden anytime before the host is activated. */ void (*start_engine)(struct ata_port *ap); + /* + * Optional ahci_stop_engine override, if not set this gets set to the + * default ahci_stop_engine during ahci_save_initial_config, this can + * be overridden anytime before the host is activated. + */ + int (*stop_engine)(struct ata_port *ap); + irqreturn_t (*irq_handler)(int irq, void *dev_instance); /* only required for per-port MSI(-X) support */ diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index de7128d81e9c..0045dacd814b 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -62,6 +62,60 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv) writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); } +/** + * ahci_mvebu_stop_engine + * + * @ap: Target ata port + * + * Errata Ref#226 - SATA Disk HOT swap issue when connected through + * Port Multiplier in FIS-based Switching mode. + * + * To avoid the issue, according to design, the bits[11:8, 0] of + * register PxFBS are cleared when Port Command and Status (0x18) bit[0] + * changes its value from 1 to 0, i.e. falling edge of Port + * Command and Status bit[0] sends PULSE that resets PxFBS + * bits[11:8; 0]. + * + * This function is used to override function of "ahci_stop_engine" + * from libahci.c by adding the mvebu work around(WA) to save PxFBS + * value before the PxCMD ST write of 0, then restore PxFBS value. + * + * Return: 0 on success; Error code otherwise. + */ +int ahci_mvebu_stop_engine(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + u32 tmp, port_fbs; + + tmp = readl(port_mmio + PORT_CMD); + + /* check if the HBA is idle */ + if ((tmp & (PORT_CMD_START | PORT_CMD_LIST_ON)) == 0) + return 0; + + /* save the port PxFBS register for later restore */ + port_fbs = readl(port_mmio + PORT_FBS); + + /* setting HBA to idle */ + tmp &= ~PORT_CMD_START; + writel(tmp, port_mmio + PORT_CMD); + + /* + * bit #15 PxCMD signal doesn't clear PxFBS, + * restore the PxFBS register right after clearing the PxCMD ST, + * no need to wait for the PxCMD bit #15. + */ + writel(port_fbs, port_mmio + PORT_FBS); + + /* wait for engine to stop. This could be as long as 500 msec */ + tmp = ata_wait_register(ap, port_mmio + PORT_CMD, + PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); + if (tmp & PORT_CMD_LIST_ON) + return -EIO; + + return 0; +} + #ifdef CONFIG_PM_SLEEP static int ahci_mvebu_suspend(struct platform_device *pdev, pm_message_t state) { @@ -112,6 +166,8 @@ static int ahci_mvebu_probe(struct platform_device *pdev) if (rc) return rc; + hpriv->stop_engine = ahci_mvebu_stop_engine; + if (of_device_is_compatible(pdev->dev.of_node, "marvell,armada-380-ahci")) { dram = mv_mbus_dram_info(); diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c index 2685f28160f7..cfdef4d44ae9 100644 --- a/drivers/ata/ahci_qoriq.c +++ b/drivers/ata/ahci_qoriq.c @@ -96,7 +96,7 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* * There is a errata on ls1021a Rev1.0 and Rev2.0 which is: diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index c2b5941d9184..ad58da7c9aff 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -165,7 +165,7 @@ static int xgene_ahci_restart_engine(struct ata_port *ap) PORT_CMD_ISSUE, 0x0, 1, 100)) return -EBUSY; - ahci_stop_engine(ap); + hpriv->stop_engine(ap); ahci_start_fis_rx(ap); /* @@ -421,7 +421,7 @@ static int xgene_ahci_hardreset(struct ata_link *link, unsigned int *class, portrxfis_saved = readl(port_mmio + PORT_FIS_ADDR); portrxfishi_saved = readl(port_mmio + PORT_FIS_ADDR_HI); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); rc = xgene_ahci_do_hardreset(link, deadline, &online); diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 7adcf3caabd0..e5d90977caec 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -560,6 +560,9 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) if (!hpriv->start_engine) hpriv->start_engine = ahci_start_engine; + if (!hpriv->stop_engine) + hpriv->stop_engine = ahci_stop_engine; + if (!hpriv->irq_handler) hpriv->irq_handler = ahci_single_level_irq_intr; } @@ -897,9 +900,10 @@ static void ahci_start_port(struct ata_port *ap) static int ahci_deinit_port(struct ata_port *ap, const char **emsg) { int rc; + struct ahci_host_priv *hpriv = ap->host->private_data; /* disable DMA */ - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) { *emsg = "failed to stop engine"; return rc; @@ -1310,7 +1314,7 @@ int ahci_kick_engine(struct ata_port *ap) int busy, rc; /* stop engine */ - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) goto out_restart; @@ -1549,7 +1553,7 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* clear D2H reception area to properly wait for D2H FIS */ ata_tf_init(link->device, &tf); @@ -2075,14 +2079,14 @@ void ahci_error_handler(struct ata_port *ap) if (!(ap->pflags & ATA_PFLAG_FROZEN)) { /* restart engine */ - ahci_stop_engine(ap); + hpriv->stop_engine(ap); hpriv->start_engine(ap); } sata_pmp_error_handler(ap); if (!ata_dev_enabled(ap->link.device)) - ahci_stop_engine(ap); + hpriv->stop_engine(ap); } EXPORT_SYMBOL_GPL(ahci_error_handler); @@ -2129,7 +2133,7 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) return; /* set DITO, MDAT, DETO and enable DevSlp, need to stop engine first */ - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) return; @@ -2189,7 +2193,7 @@ static void ahci_enable_fbs(struct ata_port *ap) return; } - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) return; @@ -2222,7 +2226,7 @@ static void ahci_disable_fbs(struct ata_port *ap) return; } - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) return; diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 46a762442dc5..30cc8f1a31e1 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -25,7 +25,6 @@ #include <linux/phy/phy.h> #include <linux/pm_runtime.h> #include <linux/of_platform.h> -#include <linux/reset.h> #include "ahci.h" static void ahci_host_stop(struct ata_host *host); @@ -196,8 +195,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators); * following order: * 1) Regulator * 2) Clocks (through ahci_platform_enable_clks) - * 3) Resets - * 4) Phys + * 3) Phys * * If resource enabling fails at any point the previous enabled resources * are disabled in reverse order. @@ -217,19 +215,12 @@ int ahci_platform_enable_resources(struct ahci_host_priv *hpriv) if (rc) goto disable_regulator; - rc = reset_control_deassert(hpriv->rsts); - if (rc) - goto disable_clks; - rc = ahci_platform_enable_phys(hpriv); if (rc) - goto disable_resets; + goto disable_clks; return 0; -disable_resets: - reset_control_assert(hpriv->rsts); - disable_clks: ahci_platform_disable_clks(hpriv); @@ -248,15 +239,12 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_resources); * following order: * 1) Phys * 2) Clocks (through ahci_platform_disable_clks) - * 3) Resets - * 4) Regulator + * 3) Regulator */ void ahci_platform_disable_resources(struct ahci_host_priv *hpriv) { ahci_platform_disable_phys(hpriv); - reset_control_assert(hpriv->rsts); - ahci_platform_disable_clks(hpriv); ahci_platform_disable_regulators(hpriv); @@ -405,12 +393,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev) hpriv->clks[i] = clk; } - hpriv->rsts = devm_reset_control_array_get_optional_shared(dev); - if (IS_ERR(hpriv->rsts)) { - rc = PTR_ERR(hpriv->rsts); - goto err_out; - } - hpriv->nports = child_nodes = of_get_child_count(dev->of_node); /* diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8bc71ca61e7f..68596bd4cf06 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4549,6 +4549,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, + /* This specific Samsung model/firmware-rev does not handle LPM well */ + { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, + + /* Sandisk devices which are known to not handle LPM well */ + { "SanDisk SD7UB3Q*G1001", NULL, ATA_HORKAGE_NOLPM, }, + /* devices that don't properly handle queued TRIM commands */ { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c016829a38fd..513b260bcff1 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -175,8 +175,8 @@ static void ata_eh_handle_port_resume(struct ata_port *ap) { } #endif /* CONFIG_PM */ -static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, - va_list args) +static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, + const char *fmt, va_list args) { ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, ATA_EH_DESC_LEN - ehi->desc_len, diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index aafb8cc03523..e67815b896fc 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -410,7 +410,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, int rc; int retry = 100; - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* clear D2H reception area to properly wait for D2H FIS */ ata_tf_init(link->device, &tf); diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 4b1995e2d044..010ca101d412 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -285,13 +285,13 @@ static const struct sil24_cerr_info { [PORT_CERR_INCONSISTENT] = { AC_ERR_HSM, ATA_EH_RESET, "protocol mismatch" }, [PORT_CERR_DIRECTION] = { AC_ERR_HSM, ATA_EH_RESET, - "data directon mismatch" }, + "data direction mismatch" }, [PORT_CERR_UNDERRUN] = { AC_ERR_HSM, ATA_EH_RESET, "ran out of SGEs while writing" }, [PORT_CERR_OVERRUN] = { AC_ERR_HSM, ATA_EH_RESET, "ran out of SGEs while reading" }, [PORT_CERR_PKT_PROT] = { AC_ERR_HSM, ATA_EH_RESET, - "invalid data directon for ATAPI CDB" }, + "invalid data direction for ATAPI CDB" }, [PORT_CERR_SGT_BOUNDARY] = { AC_ERR_SYSTEM, ATA_EH_RESET, "SGT not on qword boundary" }, [PORT_CERR_SGT_TGTABRT] = { AC_ERR_HOST_BUS, ATA_EH_RESET, diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index d97c05690faa..4e46dc9e41ad 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -191,7 +191,7 @@ static char *res_strings[] = { "reserved 37", "reserved 38", "reserved 39", - "reseverd 40", + "reserved 40", "reserved 41", "reserved 42", "reserved 43", diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 1ef67db03c8e..9c9a22958717 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -28,6 +28,7 @@ #include <asm/io.h> #include <linux/atomic.h> #include <linux/uaccess.h> +#include <linux/nospec.h> #include "uPD98401.h" #include "uPD98402.h" @@ -1458,6 +1459,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) return -EFAULT; if (pool < 0 || pool > ZATM_LAST_POOL) return -EINVAL; + pool = array_index_nospec(pool, + ZATM_LAST_POOL + 1); spin_lock_irqsave(&zatm_dev->lock, flags); info = zatm_dev->pool_info[pool]; if (cmd == ZATM_GETPOOLZ) { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8e8b04cc569a..33b36fea1d73 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2366,7 +2366,9 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0, - obj_req->copyup_bvecs, bytes); + obj_req->copyup_bvecs, + obj_req->copyup_bvec_count, + bytes); switch (obj_req->img_request->op_type) { case OBJ_OP_WRITE: diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c8c8b0b8d333..b937cc1e2c07 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -231,6 +231,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, @@ -263,7 +264,6 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* QCA ROME chipset */ - { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME }, @@ -399,6 +399,13 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"), }, }, + { + /* Dell XPS 9360 (QCA ROME device 0cf3:e300) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"), + }, + }, {} }; @@ -2852,6 +2859,12 @@ static int btusb_config_oob_wake(struct hci_dev *hdev) } #endif +static void btusb_check_needs_reset_resume(struct usb_interface *intf) +{ + if (dmi_check_system(btusb_needs_reset_resume_table)) + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; +} + static int btusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { @@ -2974,9 +2987,6 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame; hdev->notify = btusb_notify; - if (dmi_check_system(btusb_needs_reset_resume_table)) - interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; - #ifdef CONFIG_PM err = btusb_config_oob_wake(hdev); if (err) @@ -3064,6 +3074,7 @@ static int btusb_probe(struct usb_interface *intf, data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + btusb_check_needs_reset_resume(intf); } #ifdef CONFIG_BT_HCIBTUSB_RTL diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c index c381c8e396fc..79d8c84693a1 100644 --- a/drivers/char/agp/uninorth-agp.c +++ b/drivers/char/agp/uninorth-agp.c @@ -195,7 +195,7 @@ static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start, int ty return 0; } -int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type) +static int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type) { size_t i; u32 *gp; @@ -470,7 +470,7 @@ static int uninorth_free_gatt_table(struct agp_bridge_data *bridge) return 0; } -void null_cache_flush(void) +static void null_cache_flush(void) { mb(); } diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c index c58019750b7e..a2f8c42e527a 100644 --- a/drivers/clk/clk-cs2000-cp.c +++ b/drivers/clk/clk-cs2000-cp.c @@ -541,7 +541,7 @@ probe_err: return ret; } -static int cs2000_resume(struct device *dev) +static int __maybe_unused cs2000_resume(struct device *dev) { struct cs2000_priv *priv = dev_get_drvdata(dev); diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index ac4a042f8658..1628b93655ed 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -112,10 +112,18 @@ static int clk_mux_set_parent(struct clk_hw *hw, u8 index) return 0; } +static int clk_mux_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_mux *mux = to_clk_mux(hw); + + return clk_mux_determine_rate_flags(hw, req, mux->flags); +} + const struct clk_ops clk_mux_ops = { .get_parent = clk_mux_get_parent, .set_parent = clk_mux_set_parent, - .determine_rate = __clk_mux_determine_rate, + .determine_rate = clk_mux_determine_rate, }; EXPORT_SYMBOL_GPL(clk_mux_ops); diff --git a/drivers/clk/clk-stm32mp1.c b/drivers/clk/clk-stm32mp1.c index f1d5967b4b39..edd3cf451401 100644 --- a/drivers/clk/clk-stm32mp1.c +++ b/drivers/clk/clk-stm32mp1.c @@ -216,7 +216,7 @@ static const char * const usart1_src[] = { "pclk5", "pll3_q", "ck_hsi", "ck_csi", "pll4_q", "ck_hse" }; -const char * const usart234578_src[] = { +static const char * const usart234578_src[] = { "pclk1", "pll4_q", "ck_hsi", "ck_csi", "ck_hse" }; @@ -224,10 +224,6 @@ static const char * const usart6_src[] = { "pclk2", "pll4_q", "ck_hsi", "ck_csi", "ck_hse" }; -static const char * const dfsdm_src[] = { - "pclk2", "ck_mcu" -}; - static const char * const fdcan_src[] = { "ck_hse", "pll3_q", "pll4_q" }; @@ -316,10 +312,8 @@ struct stm32_clk_mgate { struct clock_config { u32 id; const char *name; - union { - const char *parent_name; - const char * const *parent_names; - }; + const char *parent_name; + const char * const *parent_names; int num_parents; unsigned long flags; void *cfg; @@ -469,7 +463,7 @@ static void mp1_gate_clk_disable(struct clk_hw *hw) } } -const struct clk_ops mp1_gate_clk_ops = { +static const struct clk_ops mp1_gate_clk_ops = { .enable = mp1_gate_clk_enable, .disable = mp1_gate_clk_disable, .is_enabled = clk_gate_is_enabled, @@ -698,7 +692,7 @@ static void mp1_mgate_clk_disable(struct clk_hw *hw) mp1_gate_clk_disable(hw); } -const struct clk_ops mp1_mgate_clk_ops = { +static const struct clk_ops mp1_mgate_clk_ops = { .enable = mp1_mgate_clk_enable, .disable = mp1_mgate_clk_disable, .is_enabled = clk_gate_is_enabled, @@ -732,7 +726,7 @@ static int clk_mmux_set_parent(struct clk_hw *hw, u8 index) return 0; } -const struct clk_ops clk_mmux_ops = { +static const struct clk_ops clk_mmux_ops = { .get_parent = clk_mmux_get_parent, .set_parent = clk_mmux_set_parent, .determine_rate = __clk_mux_determine_rate, @@ -1048,10 +1042,10 @@ struct stm32_pll_cfg { u32 offset; }; -struct clk_hw *_clk_register_pll(struct device *dev, - struct clk_hw_onecell_data *clk_data, - void __iomem *base, spinlock_t *lock, - const struct clock_config *cfg) +static struct clk_hw *_clk_register_pll(struct device *dev, + struct clk_hw_onecell_data *clk_data, + void __iomem *base, spinlock_t *lock, + const struct clock_config *cfg) { struct stm32_pll_cfg *stm_pll_cfg = cfg->cfg; @@ -1405,7 +1399,8 @@ enum { G_USBH, G_ETHSTP, G_RTCAPB, - G_TZC, + G_TZC1, + G_TZC2, G_TZPC, G_IWDG1, G_BSEC, @@ -1417,7 +1412,7 @@ enum { G_LAST }; -struct stm32_mgate mp1_mgate[G_LAST]; +static struct stm32_mgate mp1_mgate[G_LAST]; #define _K_GATE(_id, _gate_offset, _gate_bit_idx, _gate_flags,\ _mgate, _ops)\ @@ -1440,7 +1435,7 @@ struct stm32_mgate mp1_mgate[G_LAST]; &mp1_mgate[_id], &mp1_mgate_clk_ops) /* Peripheral gates */ -struct stm32_gate_cfg per_gate_cfg[G_LAST] = { +static struct stm32_gate_cfg per_gate_cfg[G_LAST] = { /* Multi gates */ K_GATE(G_MDIO, RCC_APB1ENSETR, 31, 0), K_MGATE(G_DAC12, RCC_APB1ENSETR, 29, 0), @@ -1506,7 +1501,8 @@ struct stm32_gate_cfg per_gate_cfg[G_LAST] = { K_GATE(G_BSEC, RCC_APB5ENSETR, 16, 0), K_GATE(G_IWDG1, RCC_APB5ENSETR, 15, 0), K_GATE(G_TZPC, RCC_APB5ENSETR, 13, 0), - K_GATE(G_TZC, RCC_APB5ENSETR, 12, 0), + K_GATE(G_TZC2, RCC_APB5ENSETR, 12, 0), + K_GATE(G_TZC1, RCC_APB5ENSETR, 11, 0), K_GATE(G_RTCAPB, RCC_APB5ENSETR, 8, 0), K_MGATE(G_USART1, RCC_APB5ENSETR, 4, 0), K_MGATE(G_I2C6, RCC_APB5ENSETR, 3, 0), @@ -1600,7 +1596,7 @@ enum { M_LAST }; -struct stm32_mmux ker_mux[M_LAST]; +static struct stm32_mmux ker_mux[M_LAST]; #define _K_MUX(_id, _offset, _shift, _width, _mux_flags, _mmux, _ops)\ [_id] = {\ @@ -1623,7 +1619,7 @@ struct stm32_mmux ker_mux[M_LAST]; _K_MUX(_id, _offset, _shift, _width, _mux_flags,\ &ker_mux[_id], &clk_mmux_ops) -const struct stm32_mux_cfg ker_mux_cfg[M_LAST] = { +static const struct stm32_mux_cfg ker_mux_cfg[M_LAST] = { /* Kernel multi mux */ K_MMUX(M_SDMMC12, RCC_SDMMC12CKSELR, 0, 3, 0), K_MMUX(M_SPI23, RCC_SPI2S23CKSELR, 0, 3, 0), @@ -1860,7 +1856,8 @@ static const struct clock_config stm32mp1_clock_cfg[] = { PCLK(USART1, "usart1", "pclk5", 0, G_USART1), PCLK(RTCAPB, "rtcapb", "pclk5", CLK_IGNORE_UNUSED | CLK_IS_CRITICAL, G_RTCAPB), - PCLK(TZC, "tzc", "pclk5", CLK_IGNORE_UNUSED, G_TZC), + PCLK(TZC1, "tzc1", "ck_axi", CLK_IGNORE_UNUSED, G_TZC1), + PCLK(TZC2, "tzc2", "ck_axi", CLK_IGNORE_UNUSED, G_TZC2), PCLK(TZPC, "tzpc", "pclk5", CLK_IGNORE_UNUSED, G_TZPC), PCLK(IWDG1, "iwdg1", "pclk5", 0, G_IWDG1), PCLK(BSEC, "bsec", "pclk5", CLK_IGNORE_UNUSED, G_BSEC), @@ -1916,8 +1913,7 @@ static const struct clock_config stm32mp1_clock_cfg[] = { KCLK(RNG1_K, "rng1_k", rng_src, 0, G_RNG1, M_RNG1), KCLK(RNG2_K, "rng2_k", rng_src, 0, G_RNG2, M_RNG2), KCLK(USBPHY_K, "usbphy_k", usbphy_src, 0, G_USBPHY, M_USBPHY), - KCLK(STGEN_K, "stgen_k", stgen_src, CLK_IGNORE_UNUSED, - G_STGEN, M_STGEN), + KCLK(STGEN_K, "stgen_k", stgen_src, CLK_IS_CRITICAL, G_STGEN, M_STGEN), KCLK(SPDIF_K, "spdif_k", spdif_src, 0, G_SPDIF, M_SPDIF), KCLK(SPI1_K, "spi1_k", spi123_src, 0, G_SPI1, M_SPI1), KCLK(SPI2_K, "spi2_k", spi123_src, 0, G_SPI2, M_SPI23), @@ -1948,8 +1944,8 @@ static const struct clock_config stm32mp1_clock_cfg[] = { KCLK(FDCAN_K, "fdcan_k", fdcan_src, 0, G_FDCAN, M_FDCAN), KCLK(SAI1_K, "sai1_k", sai_src, 0, G_SAI1, M_SAI1), KCLK(SAI2_K, "sai2_k", sai2_src, 0, G_SAI2, M_SAI2), - KCLK(SAI3_K, "sai3_k", sai_src, 0, G_SAI2, M_SAI3), - KCLK(SAI4_K, "sai4_k", sai_src, 0, G_SAI2, M_SAI4), + KCLK(SAI3_K, "sai3_k", sai_src, 0, G_SAI3, M_SAI3), + KCLK(SAI4_K, "sai4_k", sai_src, 0, G_SAI4, M_SAI4), KCLK(ADC12_K, "adc12_k", adc12_src, 0, G_ADC12, M_ADC12), KCLK(DSI_K, "dsi_k", dsi_src, 0, G_DSI, M_DSI), KCLK(ADFSDM_K, "adfsdm_k", sai_src, 0, G_ADFSDM, M_SAI1), @@ -1992,10 +1988,6 @@ static const struct clock_config stm32mp1_clock_cfg[] = { _DIV(RCC_MCO2CFGR, 4, 4, 0, NULL)), /* Debug clocks */ - FIXED_FACTOR(NO_ID, "ck_axi_div2", "ck_axi", 0, 1, 2), - - GATE(DBG, "ck_apb_dbg", "ck_axi_div2", 0, RCC_DBGCFGR, 8, 0), - GATE(CK_DBG, "ck_sys_dbg", "ck_axi", 0, RCC_DBGCFGR, 8, 0), COMPOSITE(CK_TRACE, "ck_trace", ck_trace_src, CLK_OPS_PARENT_ENABLE, diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ea67ac81c6f9..7af555f0e60c 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -426,9 +426,9 @@ static bool mux_is_better_rate(unsigned long rate, unsigned long now, return now <= rate && now > best; } -static int -clk_mux_determine_rate_flags(struct clk_hw *hw, struct clk_rate_request *req, - unsigned long flags) +int clk_mux_determine_rate_flags(struct clk_hw *hw, + struct clk_rate_request *req, + unsigned long flags) { struct clk_core *core = hw->core, *parent, *best_parent = NULL; int i, num_parents, ret; @@ -488,6 +488,7 @@ out: return 0; } +EXPORT_SYMBOL_GPL(clk_mux_determine_rate_flags); struct clk *__clk_lookup(const char *name) { diff --git a/drivers/clk/meson/clk-regmap.c b/drivers/clk/meson/clk-regmap.c index 3645fdb62343..ab7a3556f5b2 100644 --- a/drivers/clk/meson/clk-regmap.c +++ b/drivers/clk/meson/clk-regmap.c @@ -153,10 +153,19 @@ static int clk_regmap_mux_set_parent(struct clk_hw *hw, u8 index) val << mux->shift); } +static int clk_regmap_mux_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + struct clk_regmap *clk = to_clk_regmap(hw); + struct clk_regmap_mux_data *mux = clk_get_regmap_mux_data(clk); + + return clk_mux_determine_rate_flags(hw, req, mux->flags); +} + const struct clk_ops clk_regmap_mux_ops = { .get_parent = clk_regmap_mux_get_parent, .set_parent = clk_regmap_mux_set_parent, - .determine_rate = __clk_mux_determine_rate, + .determine_rate = clk_regmap_mux_determine_rate, }; EXPORT_SYMBOL_GPL(clk_regmap_mux_ops); diff --git a/drivers/clk/meson/gxbb-aoclk.h b/drivers/clk/meson/gxbb-aoclk.h index 0be78383f257..badc4c22b4ee 100644 --- a/drivers/clk/meson/gxbb-aoclk.h +++ b/drivers/clk/meson/gxbb-aoclk.h @@ -17,8 +17,6 @@ #define AO_RTC_ALT_CLK_CNTL0 0x94 #define AO_RTC_ALT_CLK_CNTL1 0x98 -extern const struct clk_ops meson_aoclk_gate_regmap_ops; - struct aoclk_cec_32k { struct clk_hw hw; struct regmap *regmap; diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c index cc2992493e0b..d0524ec71aad 100644 --- a/drivers/clk/meson/meson8b.c +++ b/drivers/clk/meson/meson8b.c @@ -253,7 +253,7 @@ static struct clk_fixed_factor meson8b_fclk_div3_div = { .mult = 1, .div = 3, .hw.init = &(struct clk_init_data){ - .name = "fclk_div_div3", + .name = "fclk_div3_div", .ops = &clk_fixed_factor_ops, .parent_names = (const char *[]){ "fixed_pll" }, .num_parents = 1, @@ -632,7 +632,8 @@ static struct clk_regmap meson8b_cpu_clk = { .hw.init = &(struct clk_init_data){ .name = "cpu_clk", .ops = &clk_regmap_mux_ro_ops, - .parent_names = (const char *[]){ "xtal", "cpu_out_sel" }, + .parent_names = (const char *[]){ "xtal", + "cpu_scale_out_sel" }, .num_parents = 2, .flags = (CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT), diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index bc5fc1630876..b15115a48775 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -126,6 +126,49 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy) cpu->perf_caps.lowest_perf, cpu_num, ret); } +/* + * The PCC subspace describes the rate at which platform can accept commands + * on the shared PCC channel (including READs which do not count towards freq + * trasition requests), so ideally we need to use the PCC values as a fallback + * if we don't have a platform specific transition_delay_us + */ +#ifdef CONFIG_ARM64 +#include <asm/cputype.h> + +static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) +{ + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_num = read_cpuid_part_number(); + unsigned int delay_us = 0; + + switch (implementor) { + case ARM_CPU_IMP_QCOM: + switch (part_num) { + case QCOM_CPU_PART_FALKOR_V1: + case QCOM_CPU_PART_FALKOR: + delay_us = 10000; + break; + default: + delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC; + break; + } + break; + default: + delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC; + break; + } + + return delay_us; +} + +#else + +static unsigned int cppc_cpufreq_get_transition_delay_us(int cpu) +{ + return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; +} +#endif + static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) { struct cppc_cpudata *cpu; @@ -162,8 +205,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu->perf_caps.highest_perf; policy->cpuinfo.max_freq = cppc_dmi_max_khz; - policy->transition_delay_us = cppc_get_transition_latency(cpu_num) / - NSEC_PER_USEC; + policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num); policy->shared_type = cpu->shared_type; if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 77e485557498..6f693b7d5220 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -384,7 +384,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) if (set) reg |= bit; else - reg &= bit; + reg &= ~bit; iowrite32(reg, addr); spin_unlock_irqrestore(&gpio->lock, flags); diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index 1948724d8c36..25d16b2af1c3 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -116,9 +116,9 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip, unsigned long word_mask; const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); unsigned long port_state; - u8 __iomem ports[] = { - idio16gpio->reg->out0_7, idio16gpio->reg->out8_15, - idio16gpio->reg->in0_7, idio16gpio->reg->in8_15, + void __iomem *ports[] = { + &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, + &idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15, }; /* clear bits array to a clean slate */ @@ -143,7 +143,7 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip, } /* read bits from current gpio port */ - port_state = ioread8(ports + i); + port_state = ioread8(ports[i]); /* store acquired bits at respective bits array offset */ bits[word_index] |= port_state << word_offset; diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index 835607ecf658..f953541e7890 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -206,10 +206,10 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip, unsigned long word_mask; const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); unsigned long port_state; - u8 __iomem ports[] = { - idio24gpio->reg->out0_7, idio24gpio->reg->out8_15, - idio24gpio->reg->out16_23, idio24gpio->reg->in0_7, - idio24gpio->reg->in8_15, idio24gpio->reg->in16_23, + void __iomem *ports[] = { + &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15, + &idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7, + &idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23, }; const unsigned long out_mode_mask = BIT(1); @@ -217,7 +217,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip, bitmap_zero(bits, chip->ngpio); /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports); i++) { + for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) { /* gpio offset in bits array */ bits_offset = i * gpio_reg_size; @@ -236,7 +236,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip, /* read bits from current gpio port (port 6 is TTL GPIO) */ if (i < 6) - port_state = ioread8(ports + i); + port_state = ioread8(ports[i]); else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) port_state = ioread8(&idio24gpio->reg->ttl_out0_7); else @@ -301,9 +301,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip, const unsigned long port_mask = GENMASK(gpio_reg_size, 0); unsigned long flags; unsigned int out_state; - u8 __iomem ports[] = { - idio24gpio->reg->out0_7, idio24gpio->reg->out8_15, - idio24gpio->reg->out16_23 + void __iomem *ports[] = { + &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15, + &idio24gpio->reg->out16_23 }; const unsigned long out_mode_mask = BIT(1); const unsigned int ttl_offset = 48; @@ -327,9 +327,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip, raw_spin_lock_irqsave(&idio24gpio->lock, flags); /* process output lines */ - out_state = ioread8(ports + i) & ~gpio_mask; + out_state = ioread8(ports[i]) & ~gpio_mask; out_state |= (*bits >> bits_offset) & gpio_mask; - iowrite8(out_state, ports + i); + iowrite8(out_state, ports[i]); raw_spin_unlock_irqrestore(&idio24gpio->lock, flags); } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 43aeb07343ec..d8ccb500872f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -497,7 +497,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) struct gpiohandle_request handlereq; struct linehandle_state *lh; struct file *file; - int fd, i, ret; + int fd, i, count = 0, ret; u32 lflags; if (copy_from_user(&handlereq, ip, sizeof(handlereq))) @@ -558,6 +558,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_descs; lh->descs[i] = desc; + count = i; if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); @@ -628,7 +629,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) out_put_unused_fd: put_unused_fd(fd); out_free_descs: - for (; i >= 0; i--) + for (i = 0; i < count; i++) gpiod_free(lh->descs[i]); kfree(lh->label); out_free_lh: @@ -902,7 +903,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) desc = &gdev->descs[offset]; ret = gpiod_request(desc, le->label); if (ret) - goto out_free_desc; + goto out_free_label; le->desc = desc; le->eflags = eflags; diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2ca2b5154d52..bfd332c95b61 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -56,13 +56,18 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ - ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \ - amdgpu_amdkfd_gfx_v7.o + ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ + vega20_reg_init.o + +# add DF block +amdgpu-y += \ + df_v1_7.o \ + df_v3_6.o # add GMC block amdgpu-y += \ @@ -126,11 +131,20 @@ amdgpu-y += \ vcn_v1_0.o # add amdkfd interfaces +amdgpu-y += amdgpu_amdkfd.o + +ifneq ($(CONFIG_HSA_AMD),) amdgpu-y += \ - amdgpu_amdkfd.o \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ - amdgpu_amdkfd_gfx_v8.o + amdgpu_amdkfd_gfx_v8.o \ + amdgpu_amdkfd_gfx_v9.o + +ifneq ($(CONFIG_DRM_AMDGPU_CIK),) +amdgpu-y += amdgpu_amdkfd_gfx_v7.o +endif + +endif # add cgs amdgpu-y += amdgpu_cgs.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8b605f3dc05..a59c07590cee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -129,6 +129,7 @@ extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; extern int amdgpu_emu_mode; +extern uint amdgpu_smu_memory_pool_size; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -137,6 +138,7 @@ extern int amdgpu_si_support; extern int amdgpu_cik_support; #endif +#define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -222,10 +224,10 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; -int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state); -int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state); void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, @@ -681,6 +683,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); @@ -771,9 +775,18 @@ struct amdgpu_rlc { u32 starting_offsets_start; u32 reg_list_format_size_bytes; u32 reg_list_size_bytes; + u32 reg_list_format_direct_reg_list_length; + u32 save_restore_list_cntl_size_bytes; + u32 save_restore_list_gpm_size_bytes; + u32 save_restore_list_srm_size_bytes; u32 *register_list_format; u32 *register_restore; + u8 *save_restore_list_cntl; + u8 *save_restore_list_gpm; + u8 *save_restore_list_srm; + + bool is_rlc_v2_1; }; #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES @@ -867,6 +880,8 @@ struct amdgpu_gfx_config { /* gfx configure feature */ uint32_t double_offchip_lds_buf; + /* cached value of DB_DEBUG2 */ + uint32_t db_debug2; }; struct amdgpu_cu_info { @@ -938,6 +953,12 @@ struct amdgpu_gfx { uint32_t ce_feature_version; uint32_t pfp_feature_version; uint32_t rlc_feature_version; + uint32_t rlc_srlc_fw_version; + uint32_t rlc_srlc_feature_version; + uint32_t rlc_srlg_fw_version; + uint32_t rlc_srlg_feature_version; + uint32_t rlc_srls_fw_version; + uint32_t rlc_srls_feature_version; uint32_t mec_feature_version; uint32_t mec2_feature_version; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; @@ -1204,6 +1225,8 @@ struct amdgpu_asic_funcs { /* invalidate hdp read cache */ void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + /* check if the asic needs a full reset of if soft reset will work */ + bool (*need_full_reset)(struct amdgpu_device *adev); }; /* @@ -1368,7 +1391,19 @@ struct amdgpu_nbio_funcs { void (*detect_hw_virt)(struct amdgpu_device *adev); }; - +struct amdgpu_df_funcs { + void (*init)(struct amdgpu_device *adev); + void (*enable_broadcast_mode)(struct amdgpu_device *adev, + bool enable); + u32 (*get_fb_channel_number)(struct amdgpu_device *adev); + u32 (*get_hbm_channel_number)(struct amdgpu_device *adev); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, + bool enable); +}; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { GC_HWIP = 1, @@ -1398,6 +1433,7 @@ enum amd_hw_ip_block_type { struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; + uint32_t pp_feature; }; #define AMDGPU_RESET_MAGIC_NUM 64 @@ -1590,6 +1626,7 @@ struct amdgpu_device { uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; const struct amdgpu_nbio_funcs *nbio_funcs; + const struct amdgpu_df_funcs *df_funcs; /* delayed work_func for deferring clockgating during resume */ struct delayed_work late_init_work; @@ -1764,6 +1801,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) +#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) @@ -1790,6 +1828,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) +#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index a29362f9ef41..03ee36739efe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -290,12 +290,11 @@ static int acp_hw_init(void *handle) else if (r) return r; - r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO, - 0x5289, 0, &acp_base); - if (r == -ENODEV) - return 0; - else if (r) - return r; + if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) + return -EINVAL; + + acp_base = adev->rmmio_base; + if (adev->asic_type != CHIP_STONEY) { adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); if (adev->acp.acp_genpd == NULL) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 4d36203ffb11..305143fcc1ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -50,15 +50,21 @@ int amdgpu_amdkfd_init(void) kgd2kfd = NULL; } + #elif defined(CONFIG_HSA_AMD) + ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); if (ret) kgd2kfd = NULL; #else + kgd2kfd = NULL; ret = -ENOENT; #endif + +#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD) amdgpu_amdkfd_gpuvm_init_mem_limits(); +#endif return ret; } @@ -92,8 +98,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; default: - dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); + dev_info(adev->dev, "kfd not supported on this ASIC\n"); return; } @@ -175,6 +185,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); + if (adev->asic_type >= CHIP_VEGA10) { + /* On SOC15 the BIF is involved in routing + * doorbells using the low 12 bits of the + * address. Communicate the assignments to + * KFD. KFD uses two doorbell pages per + * process in case of 64-bit doorbells so we + * can use each doorbell assignment twice. + */ + gpu_resources.sdma_doorbell[0][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE0; + gpu_resources.sdma_doorbell[0][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; + gpu_resources.sdma_doorbell[1][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE1; + gpu_resources.sdma_doorbell[1][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; + /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for + * SDMA, IH and VCN. So don't use them for the CP. + */ + gpu_resources.reserved_doorbell_mask = 0x1f0; + gpu_resources.reserved_doorbell_val = 0x0f0; + } kgd2kfd->device_init(adev->kfd, &gpu_resources); } @@ -217,13 +249,19 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; + struct amdgpu_bo_param bp; int r; uint64_t gpu_addr_tmp = 0; void *cpu_ptr_tmp = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel, - NULL, &bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -304,15 +342,12 @@ void get_local_mem_info(struct kgd_dev *kgd, mem_info->local_mem_size_public, mem_info->local_mem_size_private); - if (amdgpu_emu_mode == 1) { - mem_info->mem_clk_max = 100; - return; - } - if (amdgpu_sriov_vf(adev)) mem_info->mem_clk_max = adev->clock.default_mclk / 100; - else + else if (adev->powerplay.pp_funcs) mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; + else + mem_info->mem_clk_max = 100; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) @@ -329,13 +364,12 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* the sclk is in quantas of 10kHz */ - if (amdgpu_emu_mode == 1) - return 100; - if (amdgpu_sriov_vf(adev)) return adev->clock.default_sclk / 100; - - return amdgpu_dpm_get_sclk(adev, false) / 100; + else if (adev->powerplay.pp_funcs) + return amdgpu_dpm_get_sclk(adev, false) / 100; + else + return 100; } void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) @@ -432,3 +466,44 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } + +#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + return false; +} + +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +{ +} + +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ +} + +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + return NULL; +} + +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +{ + return 0; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) +{ + return NULL; +} + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return NULL; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c2c2bea731e0..a8418a3f4e9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,6 +28,7 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/mmu_context.h> +#include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" @@ -59,7 +60,9 @@ struct kgd_mem { uint32_t mapping_flags; + atomic_t invalid; struct amdkfd_process_info *process_info; + struct page **user_pages; struct amdgpu_sync sync; @@ -84,6 +87,9 @@ struct amdkfd_process_info { struct list_head vm_list_head; /* List head for all KFD BOs that belong to a KFD process. */ struct list_head kfd_bo_list; + /* List of userptr BOs that are valid or invalid */ + struct list_head userptr_valid_list; + struct list_head userptr_inval_list; /* Lock to protect kfd_bo_list */ struct mutex lock; @@ -91,6 +97,11 @@ struct amdkfd_process_info { unsigned int n_vms; /* Eviction Fence */ struct amdgpu_amdkfd_fence *eviction_fence; + + /* MMU-notifier related fields */ + atomic_t evicted_bos; + struct delayed_work restore_userptr_work; + struct pid *pid; }; int amdgpu_amdkfd_init(void); @@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -143,14 +156,14 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); /* GPUVM API */ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - void **process_info, - struct dma_fence **ef); + void **process_info, + struct dma_fence **ef); int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, - struct file *filp, - void **vm, void **process_info, - struct dma_fence **ef); + struct file *filp, + void **vm, void **process_info, + struct dma_fence **ef); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea54e53172b9..0ff36d45a597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 89264c9a5e9f..6ef9762b4b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c new file mode 100644 index 000000000000..f0c0d3953f69 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -0,0 +1,1043 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "soc15_hw_ip.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "vega10_enum.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma0/sdma0_4_0_sh_mask.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "sdma1/sdma1_4_0_sh_mask.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" +#include "soc15_common.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" + +/* HACK: MMHUB and GC both have VM-related register with the same + * names but different offsets. Define the MMHUB register we need here + * with a prefix. A proper solution would be to move the functions + * programming these registers into gfx_v9_0.c and mmhub_v1_0.c + * respectively. + */ +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 + +#define V9_PIPE_PER_MEC (4) +#define V9_QUEUES_PER_PIPE_MEC (8) + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_local_mem_info = get_local_mem_info, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + /* + * need to do this twice, once for gfx and once for mmhub + * for ATC add 16 to VMID for mmhub, for IH different registers. + * ATC_VMID0..15 registers are separate from ATC_VMID16..31. + */ + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << (vmid + 16)))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << (vmid + 16)); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, + pasid_mapping); + return 0; +} + +/* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[2] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uintptr_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uintptr_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v9_mqd *m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + uint32_t req = (1 << vmid) | + (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; + + mutex_lock(&adev->srbm_mutex); + + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), + req); + + while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + mutex_unlock(&adev->srbm_mutex); + +} + +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + if (ring->ready) + return invalidate_tlbs_with_kiq(adev, pasid); + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + write_vmid_invalidate_request(kgd, vmid); + break; + } + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return 0; + } + + write_vmid_invalidate_request(kgd, vmid); + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + /* No longer needed on GFXv9. The scratch base address is + * passed to the shader by the CP. It's the user mode driver's + * responsibility. + */ +} + +/* FIXME: Does this need to be ASIC-specific code? */ +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | + AMDGPU_PTE_VALID; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + /* TODO: take advantage of per-process address space size. For + * now, all processes share the same address space size, like + * on GFX8 and older. + */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1d6e1479da38..ff8fd75f7ca5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -23,6 +23,8 @@ #define pr_fmt(fmt) "kfd2kgd: " fmt #include <linux/list.h> +#include <linux/pagemap.h> +#include <linux/sched/mm.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -33,10 +35,20 @@ */ #define VI_BO_SIZE_ALIGN (0x8000) +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) + +/* Userptr restore delay, just long enough to allow consecutive VM + * changes to accumulate + */ +#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; + uint64_t max_userptr_mem_limit; int64_t system_mem_used; + int64_t userptr_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -57,6 +69,7 @@ static const char * const domain_bit_to_string[] = { #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -78,6 +91,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, /* Set memory usage limits. Current, limits are * System (kernel) memory - 3/8th System RAM + * Userptr memory - 3/4th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -90,8 +104,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) spin_lock_init(&kfd_mem_limit.mem_limit_lock); kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - pr_debug("Kernel memory limit %lluM\n", - (kfd_mem_limit.max_system_mem_limit >> 20)); + kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); + pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20), + (kfd_mem_limit.max_userptr_mem_limit >> 20)); } static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, @@ -111,6 +127,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, goto err_no_mem; } kfd_mem_limit.system_mem_used += (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + if ((kfd_mem_limit.system_mem_used + acc_size > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.userptr_mem_used + (size + acc_size) > + kfd_mem_limit.max_userptr_mem_limit)) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += acc_size; + kfd_mem_limit.userptr_mem_used += size; } err_no_mem: spin_unlock(&kfd_mem_limit.mem_limit_lock); @@ -126,10 +152,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, sizeof(struct amdgpu_bo)); spin_lock(&kfd_mem_limit.mem_limit_lock); - if (domain == AMDGPU_GEM_DOMAIN_GTT) + if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.userptr_mem_used -= size; + } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -138,12 +170,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) { spin_lock(&kfd_mem_limit.mem_limit_lock); - if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; + kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); + } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -506,7 +543,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, } static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, - struct amdkfd_process_info *process_info) + struct amdkfd_process_info *process_info, + bool userptr) { struct ttm_validate_buffer *entry = &mem->validate_list; struct amdgpu_bo *bo = mem->bo; @@ -515,10 +553,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, entry->shared = true; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); - list_add_tail(&entry->head, &process_info->kfd_bo_list); + if (userptr) + list_add_tail(&entry->head, &process_info->userptr_valid_list); + else + list_add_tail(&entry->head, &process_info->kfd_bo_list); mutex_unlock(&process_info->lock); } +/* Initializes user pages. It registers the MMU notifier and validates + * the userptr BO in the GTT domain. + * + * The BO must already be on the userptr_valid_list. Otherwise an + * eviction and restore may happen that leaves the new BO unmapped + * with the user mode queues running. + * + * Takes the process_info->lock to protect against concurrent restore + * workers. + * + * Returns 0 for success, negative errno for errors. + */ +static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, + uint64_t user_addr) +{ + struct amdkfd_process_info *process_info = mem->process_info; + struct amdgpu_bo *bo = mem->bo; + struct ttm_operation_ctx ctx = { true, false }; + int ret = 0; + + mutex_lock(&process_info->lock); + + ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); + if (ret) { + pr_err("%s: Failed to set userptr: %d\n", __func__, ret); + goto out; + } + + ret = amdgpu_mn_register(bo, user_addr); + if (ret) { + pr_err("%s: Failed to register MMU notifier: %d\n", + __func__, ret); + goto out; + } + + /* If no restore worker is running concurrently, user_pages + * should not be allocated + */ + WARN(mem->user_pages, "Leaking user_pages array"); + + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", __func__); + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + if (ret) { + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + goto free_out; + } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("%s: Failed to reserve BO\n", __func__); + goto release_out; + } + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + pr_err("%s: failed to validate BO\n", __func__); + amdgpu_bo_unreserve(bo); + +release_out: + if (ret) + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); +free_out: + kvfree(mem->user_pages); + mem->user_pages = NULL; +unregister_out: + if (ret) + amdgpu_mn_unregister(bo); +out: + mutex_unlock(&process_info->lock); + return ret; +} + /* Reserving a BO and its page table BOs must happen atomically to * avoid deadlocks. Some operations update multiple VMs at once. Track * all the reservation info in a context structure. Optionally a sync @@ -748,7 +871,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, } static int map_bo_to_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, + bool no_update_pte) { int ret; @@ -762,6 +886,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, return ret; } + if (no_update_pte) + return 0; + ret = update_gpuvm_pte(adev, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); @@ -820,6 +947,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, mutex_init(&info->lock); INIT_LIST_HEAD(&info->vm_list_head); INIT_LIST_HEAD(&info->kfd_bo_list); + INIT_LIST_HEAD(&info->userptr_valid_list); + INIT_LIST_HEAD(&info->userptr_inval_list); info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), @@ -830,6 +959,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, goto create_evict_fence_fail; } + info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); + atomic_set(&info->evicted_bos, 0); + INIT_DELAYED_WORK(&info->restore_userptr_work, + amdgpu_amdkfd_restore_userptr_worker); + *process_info = info; *ef = dma_fence_get(&info->eviction_fence->base); } @@ -872,6 +1006,7 @@ reserve_pd_fail: dma_fence_put(*ef); *ef = NULL; *process_info = NULL; + put_pid(info->pid); create_evict_fence_fail: mutex_destroy(&info->lock); kfree(info); @@ -967,8 +1102,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, /* Release per-process resources when last compute VM is destroyed */ if (!process_info->n_vms) { WARN_ON(!list_empty(&process_info->kfd_bo_list)); + WARN_ON(!list_empty(&process_info->userptr_valid_list)); + WARN_ON(!list_empty(&process_info->userptr_inval_list)); dma_fence_put(&process_info->eviction_fence->base); + cancel_delayed_work_sync(&process_info->restore_userptr_work); + put_pid(process_info->pid); mutex_destroy(&process_info->lock); kfree(process_info); } @@ -1003,9 +1142,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + uint64_t user_addr = 0; struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int byte_align; - u32 alloc_domain; + u32 domain, alloc_domain; u64 alloc_flags; uint32_t mapping_flags; int ret; @@ -1014,14 +1155,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( * Check on which domain to allocate BO */ if (flags & ALLOC_MEM_FLAGS_VRAM) { - alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; } else if (flags & ALLOC_MEM_FLAGS_GTT) { - alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flags = 0; + if (!offset || !*offset) + return -EINVAL; + user_addr = *offset; } else { return -EINVAL; } @@ -1069,8 +1217,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", va, size, domain_string(alloc_domain)); - ret = amdgpu_bo_create(adev, size, byte_align, - alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = byte_align; + bp.domain = alloc_domain; + bp.flags = alloc_flags; + bp.type = ttm_bo_type_device; + bp.resv = NULL; + ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); @@ -1078,18 +1232,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->kfd_bo = *mem; (*mem)->bo = bo; + if (user_addr) + bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; (*mem)->va = va; - (*mem)->domain = alloc_domain; + (*mem)->domain = domain; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); + + if (user_addr) { + ret = init_user_pages(*mem, current->mm, user_addr); + if (ret) { + mutex_lock(&avm->process_info->lock); + list_del(&(*mem)->validate_list.head); + mutex_unlock(&avm->process_info->lock); + goto allocate_init_user_pages_failed; + } + } if (offset) *offset = amdgpu_bo_mmap_offset(bo); return 0; +allocate_init_user_pages_failed: + amdgpu_bo_unref(&bo); + /* Don't unreserve system mem limit twice */ + goto err_reserve_system_mem; err_bo_create: unreserve_system_mem_limit(adev, size, alloc_domain); err_reserve_system_mem: @@ -1122,12 +1292,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( * be freed anyway */ + /* No more MMU notifiers */ + amdgpu_mn_unregister(mem->bo); + /* Make sure restore workers don't access the BO any more */ bo_list_entry = &mem->validate_list; mutex_lock(&process_info->lock); list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); + /* Free user pages if necessary */ + if (mem->user_pages) { + pr_debug("%s: Freeing user_pages array\n", __func__); + if (mem->user_pages[0]) + release_pages(mem->user_pages, + mem->bo->tbo.ttm->num_pages); + kvfree(mem->user_pages); + } + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1173,21 +1355,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kfd_bo_va_list *bo_va_entry = NULL; struct kfd_bo_va_list *bo_va_entry_aql = NULL; unsigned long bo_size; - - /* Make sure restore is not running concurrently. - */ - mutex_lock(&mem->process_info->lock); - - mutex_lock(&mem->lock); + bool is_invalid_userptr = false; bo = mem->bo; - if (!bo) { pr_err("Invalid BO when mapping memory to GPU\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } + /* Make sure restore is not running concurrently. Since we + * don't map invalid userptr BOs, we rely on the next restore + * worker to do the mapping + */ + mutex_lock(&mem->process_info->lock); + + /* Lock mmap-sem. If we find an invalid userptr BO, we can be + * sure that the MMU notifier is no longer running + * concurrently and the queues are actually stopped + */ + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + down_write(¤t->mm->mmap_sem); + is_invalid_userptr = atomic_read(&mem->invalid); + up_write(¤t->mm->mmap_sem); + } + + mutex_lock(&mem->lock); + domain = mem->domain; bo_size = bo->tbo.mem.size; @@ -1200,6 +1393,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out; + /* Userptr can be marked as "not invalid", but not actually be + * validated yet (still in the system domain). In that case + * the queues are still stopped and we can leave mapping for + * the next restore worker + */ + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + is_invalid_userptr = true; + if (check_if_add_bo_to_vm(avm, mem)) { ret = add_bo_to_vm(adev, mem, avm, false, &bo_va_entry); @@ -1217,7 +1418,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( goto add_bo_to_vm_failed; } - if (mem->mapped_to_gpu_memory == 0) { + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { /* Validate BO only once. The eviction fence gets added to BO * the first time it is mapped. Validate will wait for all * background evictions to complete. @@ -1235,7 +1437,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); - ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + ret = map_bo_to_gpuvm(adev, entry, ctx.sync, + is_invalid_userptr); if (ret) { pr_err("Failed to map radeon bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; @@ -1418,6 +1621,337 @@ bo_reserve_failed: return ret; } +/* Evict a userptr BO by stopping the queues if necessary + * + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it + * cannot do any memory allocations, and cannot take any locks that + * are held elsewhere while allocating memory. Therefore this is as + * simple as possible, using atomic counters. + * + * It doesn't do anything to the BO itself. The real work happens in + * restore, where we get updated page addresses. This function only + * ensures that GPU access to the BO is stopped. + */ +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, + struct mm_struct *mm) +{ + struct amdkfd_process_info *process_info = mem->process_info; + int invalid, evicted_bos; + int r = 0; + + invalid = atomic_inc_return(&mem->invalid); + evicted_bos = atomic_inc_return(&process_info->evicted_bos); + if (evicted_bos == 1) { + /* First eviction, stop the queues */ + r = kgd2kfd->quiesce_mm(mm); + if (r) + pr_err("Failed to quiesce KFD\n"); + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + } + + return r; +} + +/* Update invalid userptr BOs + * + * Moves invalidated (evicted) userptr BOs from userptr_valid_list to + * userptr_inval_list and updates user pages for all BOs that have + * been invalidated since their last update. + */ +static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + struct mm_struct *mm) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int invalid, ret; + + /* Move all invalidated BOs to the userptr_inval_list and + * release their user pages by migration to the CPU domain + */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_valid_list, + validate_list.head) { + if (!atomic_read(&mem->invalid)) + continue; /* BO is still valid */ + + bo = mem->bo; + + if (amdgpu_bo_reserve(bo, true)) + return -EAGAIN; + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + amdgpu_bo_unreserve(bo); + if (ret) { + pr_err("%s: Failed to invalidate userptr BO\n", + __func__); + return -EAGAIN; + } + + list_move_tail(&mem->validate_list.head, + &process_info->userptr_inval_list); + } + + if (list_empty(&process_info->userptr_inval_list)) + return 0; /* All evicted userptr BOs were freed */ + + /* Go through userptr_inval_list and update any invalid user_pages */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + invalid = atomic_read(&mem->invalid); + if (!invalid) + /* BO hasn't been invalidated since the last + * revalidation attempt. Keep its BO list. + */ + continue; + + bo = mem->bo; + + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); + return -ENOMEM; + } + } else if (mem->user_pages[0]) { + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + } + + /* Get updated user pages */ + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, + mem->user_pages); + if (ret) { + mem->user_pages[0] = NULL; + pr_info("%s: Failed to get user pages: %d\n", + __func__, ret); + /* Pretend it succeeded. It will fail later + * with a VM fault if the GPU tries to access + * it. Better than hanging indefinitely with + * stalled user mode queues. + */ + } + + /* Mark the BO as valid unless it was invalidated + * again concurrently + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; + } + + return 0; +} + +/* Validate invalid userptr BOs + * + * Validates BOs on the userptr_inval_list, and moves them back to the + * userptr_valid_list. Also updates GPUVM page tables with new page + * addresses and waits for the page table updates to complete. + */ +static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct amdgpu_bo_list_entry *pd_bo_list_entries; + struct list_head resv_list, duplicates; + struct ww_acquire_ctx ticket; + struct amdgpu_sync sync; + + struct amdgpu_vm *peer_vm; + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int i, ret; + + pd_bo_list_entries = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list_entries) { + pr_err("%s: Failed to allocate PD BO list entries\n", __func__); + return -ENOMEM; + } + + INIT_LIST_HEAD(&resv_list); + INIT_LIST_HEAD(&duplicates); + + /* Get all the page directory BOs that need to be reserved */ + i = 0; + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(peer_vm, &resv_list, + &pd_bo_list_entries[i++]); + /* Add the userptr_inval_list entries to resv_list */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + list_add_tail(&mem->resv_list.head, &resv_list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + /* Reserve all BOs and page tables for validation */ + ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); + WARN(!list_empty(&duplicates), "Duplicates should be empty"); + if (ret) + goto out; + + amdgpu_sync_create(&sync); + + /* Avoid triggering eviction fences when unmapping invalid + * userptr BOs (waits for all fences, doesn't use + * FENCE_OWNER_VM) + */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, + process_info->eviction_fence, + NULL, NULL); + + ret = process_validate_vms(process_info); + if (ret) + goto unreserve_out; + + /* Validate BOs and update GPUVM page tables */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + struct kfd_bo_va_list *bo_va_entry; + + bo = mem->bo; + + /* Copy pages array and validate the BO if we got user pages */ + if (mem->user_pages[0]) { + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + mem->user_pages); + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) { + pr_err("%s: failed to validate BO\n", __func__); + goto unreserve_out; + } + } + + /* Validate succeeded, now the BO owns the pages, free + * our copy of the pointer array. Put this BO back on + * the userptr_valid_list. If we need to revalidate + * it, we need to start from scratch. + */ + kvfree(mem->user_pages); + mem->user_pages = NULL; + list_move_tail(&mem->validate_list.head, + &process_info->userptr_valid_list); + + /* Update mapping. If the BO was not validated + * (because we couldn't get user pages), this will + * clear the page table entries, which will result in + * VM faults if the GPU tries to access the invalid + * memory. + */ + list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { + if (!bo_va_entry->is_mapped) + continue; + + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, &sync); + if (ret) { + pr_err("%s: update PTE failed\n", __func__); + /* make sure this gets validated again */ + atomic_inc(&mem->invalid); + goto unreserve_out; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync); + +unreserve_out: + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_bo_fence(peer_vm->root.base.bo, + &process_info->eviction_fence->base, true); + ttm_eu_backoff_reservation(&ticket, &resv_list); + amdgpu_sync_wait(&sync, false); + amdgpu_sync_free(&sync); +out: + kfree(pd_bo_list_entries); + + return ret; +} + +/* Worker callback to restore evicted userptr BOs + * + * Tries to update and validate all userptr BOs. If successful and no + * concurrent evictions happened, the queues are restarted. Otherwise, + * reschedule for another attempt later. + */ +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info = + container_of(dwork, struct amdkfd_process_info, + restore_userptr_work); + struct task_struct *usertask; + struct mm_struct *mm; + int evicted_bos; + + evicted_bos = atomic_read(&process_info->evicted_bos); + if (!evicted_bos) + return; + + /* Reference task and mm in case of concurrent process termination */ + usertask = get_pid_task(process_info->pid, PIDTYPE_PID); + if (!usertask) + return; + mm = get_task_mm(usertask); + if (!mm) { + put_task_struct(usertask); + return; + } + + mutex_lock(&process_info->lock); + + if (update_invalid_user_pages(process_info, mm)) + goto unlock_out; + /* userptr_inval_list can be empty if all evicted userptr BOs + * have been freed. In that case there is nothing to validate + * and we can just restart the queues. + */ + if (!list_empty(&process_info->userptr_inval_list)) { + if (atomic_read(&process_info->evicted_bos) != evicted_bos) + goto unlock_out; /* Concurrent eviction, try again */ + + if (validate_invalid_user_pages(process_info)) + goto unlock_out; + } + /* Final check for concurrent evicton and atomic update. If + * another eviction happens after successful update, it will + * be a first eviction that calls quiesce_mm. The eviction + * reference counting inside KFD will handle this case. + */ + if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != + evicted_bos) + goto unlock_out; + evicted_bos = 0; + if (kgd2kfd->resume_mm(mm)) { + pr_err("%s: Failed to resume KFD\n", __func__); + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + } +unlock_out: + mutex_unlock(&process_info->lock); + mmput(mm); + put_task_struct(usertask); + + /* If validation failed, reschedule another attempt */ + if (evicted_bos) + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index a0f48cb9b8f0..236915849cfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -322,3 +322,47 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) return ret; } + +union gfx_info { + struct atom_gfx_info_v2_4 v24; +}; + +int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + uint8_t frev, crev; + uint16_t data_offset; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + gfx_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union gfx_info *gfx_info = (union gfx_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 4: + adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se; + adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se; + adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs; + adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs); + adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds; + adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth; + adev->gfx.config.gs_prim_buffer_depth = + le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = + gfx_info->v24.gc_double_offchip_lds_buffer; + adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu; + adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size); + return 0; + default: + return -EINVAL; + } + + } + return -EINVAL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 7689c961c4ef..20f158fd3b76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -30,5 +30,6 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 1ae5ae8c45a4..daa06e7c5bb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -550,7 +550,7 @@ static int amdgpu_atpx_init(void) * look up whether we are the integrated or discrete GPU (all asics). * Returns the client id. */ -static int amdgpu_atpx_get_client_id(struct pci_dev *pdev) +static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev) { if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) return VGA_SWITCHEROO_IGD; @@ -569,7 +569,6 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, - { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 02b849be083b..19cfff31f2e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -75,13 +75,20 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, { struct amdgpu_bo *dobj = NULL; struct amdgpu_bo *sobj = NULL; + struct amdgpu_bo_param bp; uint64_t saddr, daddr; int r, n; int time; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = sdomain; + bp.flags = 0; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; n = AMDGPU_BENCHMARK_ITERATIONS; - r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0, - ttm_bo_type_kernel, NULL, &sobj); + r = amdgpu_bo_create(adev, &bp, &sobj); if (r) { goto out_cleanup; } @@ -93,8 +100,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, if (r) { goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0, - ttm_bo_type_kernel, NULL, &dobj); + bp.domain = ddomain; + r = amdgpu_bo_create(adev, &bp, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 71a57b2f7f04..e950730f1933 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -23,7 +23,6 @@ */ #include <linux/list.h> #include <linux/slab.h> -#include <linux/pci.h> #include <drm/drmP.h> #include <linux/firmware.h> #include <drm/amdgpu_drm.h> @@ -109,121 +108,6 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, WARN(1, "Invalid indirect register space"); } -static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, - enum cgs_resource_type resource_type, - uint64_t size, - uint64_t offset, - uint64_t *resource_base) -{ - CGS_FUNC_ADEV; - - if (resource_base == NULL) - return -EINVAL; - - switch (resource_type) { - case CGS_RESOURCE_TYPE_MMIO: - if (adev->rmmio_size == 0) - return -ENOENT; - if ((offset + size) > adev->rmmio_size) - return -EINVAL; - *resource_base = adev->rmmio_base; - return 0; - case CGS_RESOURCE_TYPE_DOORBELL: - if (adev->doorbell.size == 0) - return -ENOENT; - if ((offset + size) > adev->doorbell.size) - return -EINVAL; - *resource_base = adev->doorbell.base; - return 0; - case CGS_RESOURCE_TYPE_FB: - case CGS_RESOURCE_TYPE_IO: - case CGS_RESOURCE_TYPE_ROM: - default: - return -EINVAL; - } -} - -static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device, - unsigned table, uint16_t *size, - uint8_t *frev, uint8_t *crev) -{ - CGS_FUNC_ADEV; - uint16_t data_start; - - if (amdgpu_atom_parse_data_header( - adev->mode_info.atom_context, table, size, - frev, crev, &data_start)) - return (uint8_t*)adev->mode_info.atom_context->bios + - data_start; - - return NULL; -} - -static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table, - uint8_t *frev, uint8_t *crev) -{ - CGS_FUNC_ADEV; - - if (amdgpu_atom_parse_cmd_header( - adev->mode_info.atom_context, table, - frev, crev)) - return 0; - - return -EINVAL; -} - -static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table, - void *args) -{ - CGS_FUNC_ADEV; - - return amdgpu_atom_execute_table( - adev->mode_info.atom_context, table, args); -} - -static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state) -{ - CGS_FUNC_ADEV; - int i, r = -1; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - (void *)adev, - state); - break; - } - } - return r; -} - -static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_powergating_state state) -{ - CGS_FUNC_ADEV; - int i, r = -1; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; - - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_powergating_state( - (void *)adev, - state); - break; - } - } - return r; -} - - static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) { CGS_FUNC_ADEV; @@ -271,18 +155,6 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) return result; } -static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type) -{ - CGS_FUNC_ADEV; - if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) { - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - return 0; - } - /* cannot release other firmware because they are not created by cgs */ - return -EINVAL; -} - static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, enum cgs_ucode_id type) { @@ -326,34 +198,6 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, return fw_version; } -static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device, - bool en) -{ - CGS_FUNC_ADEV; - - if (adev->gfx.rlc.funcs->enter_safe_mode == NULL || - adev->gfx.rlc.funcs->exit_safe_mode == NULL) - return 0; - - if (en) - adev->gfx.rlc.funcs->enter_safe_mode(adev); - else - adev->gfx.rlc.funcs->exit_safe_mode(adev); - - return 0; -} - -static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device, - bool lock) -{ - CGS_FUNC_ADEV; - - if (lock) - mutex_lock(&adev->grbm_idx_mutex); - else - mutex_unlock(&adev->grbm_idx_mutex); -} - static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) @@ -541,6 +385,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, case CHIP_POLARIS12: strcpy(fw_name, "amdgpu/polaris12_smc.bin"); break; + case CHIP_VEGAM: + strcpy(fw_name, "amdgpu/vegam_smc.bin"); + break; case CHIP_VEGA10: if ((adev->pdev->device == 0x687f) && ((adev->pdev->revision == 0xc0) || @@ -553,6 +400,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, case CHIP_VEGA12: strcpy(fw_name, "amdgpu/vega12_smc.bin"); break; + case CHIP_VEGA20: + strcpy(fw_name, "amdgpu/vega20_smc.bin"); + break; default: DRM_ERROR("SMC firmware not supported\n"); return -EINVAL; @@ -598,97 +448,12 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return 0; } -static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device) -{ - CGS_FUNC_ADEV; - return amdgpu_sriov_vf(adev); -} - -static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, - struct cgs_display_info *info) -{ - CGS_FUNC_ADEV; - struct cgs_mode_info *mode_info; - - if (info == NULL) - return -EINVAL; - - mode_info = info->mode_info; - if (mode_info) - /* if the displays are off, vblank time is max */ - mode_info->vblank_time_us = 0xffffffff; - - if (!amdgpu_device_has_dc_support(adev)) { - struct amdgpu_crtc *amdgpu_crtc; - struct drm_device *ddev = adev->ddev; - struct drm_crtc *crtc; - uint32_t line_time_us, vblank_lines; - - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (crtc->enabled) { - info->active_display_mask |= (1 << amdgpu_crtc->crtc_id); - info->display_count++; - } - if (mode_info != NULL && - crtc->enabled && amdgpu_crtc->enabled && - amdgpu_crtc->hw_mode.clock) { - line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) / - amdgpu_crtc->hw_mode.clock; - vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end - - amdgpu_crtc->hw_mode.crtc_vdisplay + - (amdgpu_crtc->v_border * 2); - mode_info->vblank_time_us = vblank_lines * line_time_us; - mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - /* we have issues with mclk switching with refresh rates - * over 120 hz on the non-DC code. - */ - if (mode_info->refresh_rate > 120) - mode_info->vblank_time_us = 0; - mode_info = NULL; - } - } - } - } else { - info->display_count = adev->pm.pm_display_cfg.num_display; - if (mode_info != NULL) { - mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time; - mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh; - } - } - return 0; -} - - -static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled) -{ - CGS_FUNC_ADEV; - - adev->pm.dpm_enabled = enabled; - - return 0; -} - static const struct cgs_ops amdgpu_cgs_ops = { .read_register = amdgpu_cgs_read_register, .write_register = amdgpu_cgs_write_register, .read_ind_register = amdgpu_cgs_read_ind_register, .write_ind_register = amdgpu_cgs_write_ind_register, - .get_pci_resource = amdgpu_cgs_get_pci_resource, - .atom_get_data_table = amdgpu_cgs_atom_get_data_table, - .atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs, - .atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table, .get_firmware_info = amdgpu_cgs_get_firmware_info, - .rel_firmware = amdgpu_cgs_rel_firmware, - .set_powergating_state = amdgpu_cgs_set_powergating_state, - .set_clockgating_state = amdgpu_cgs_set_clockgating_state, - .get_active_displays_info = amdgpu_cgs_get_active_displays_info, - .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled, - .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, - .enter_safe_mode = amdgpu_cgs_enter_safe_mode, - .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, }; struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 96501ff0e55b..8e66851eb427 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -691,7 +691,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector) return ret; } -static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); @@ -843,7 +843,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) return ret; } -static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1172,7 +1172,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector) amdgpu_connector->use_digital = true; } -static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1448,7 +1448,7 @@ out: return ret; } -static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector, +static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc34b50e6b29..82312a7bc6ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -382,8 +382,7 @@ retry: p->bytes_moved += ctx.bytes_moved; if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) + amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -411,7 +410,6 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved, bytes_moved; bool update_bytes_moved_vis; uint32_t other; @@ -435,18 +433,14 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, continue; /* Good we can try to move this BO somewhere else */ - amdgpu_ttm_placement_from_domain(bo, other); update_bytes_moved_vis = adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT; - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); + amdgpu_bo_in_cpu_visible_vram(bo); + amdgpu_ttm_placement_from_domain(bo, other); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - bytes_moved = atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; - p->bytes_moved += bytes_moved; + p->bytes_moved += ctx.bytes_moved; if (update_bytes_moved_vis) - p->bytes_moved_vis += bytes_moved; + p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r)) break; @@ -528,6 +522,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct list_head duplicates; unsigned i, tries = 10; + struct amdgpu_bo *gds; + struct amdgpu_bo *gws; + struct amdgpu_bo *oa; int r; INIT_LIST_HEAD(&p->validated); @@ -536,7 +533,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->bo_list) { amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev); + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); } INIT_LIST_HEAD(&duplicates); @@ -658,31 +655,36 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); + if (p->bo_list) { - struct amdgpu_bo *gds = p->bo_list->gds_obj; - struct amdgpu_bo *gws = p->bo_list->gws_obj; - struct amdgpu_bo *oa = p->bo_list->oa_obj; struct amdgpu_vm *vm = &fpriv->vm; unsigned i; + gds = p->bo_list->gds_obj; + gws = p->bo_list->gws_obj; + oa = p->bo_list->oa_obj; for (i = 0; i < p->bo_list->num_entries; i++) { struct amdgpu_bo *bo = p->bo_list->array[i].robj; p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); } + } else { + gds = p->adev->gds.gds_gfx_bo; + gws = p->adev->gds.gws_gfx_bo; + oa = p->adev->gds.oa_gfx_bo; + } - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds); - p->job->gds_size = amdgpu_bo_size(gds); - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws); - p->job->gws_size = amdgpu_bo_size(gws); - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa); - p->job->oa_size = amdgpu_bo_size(oa); - } + if (gds) { + p->job->gds_base = amdgpu_bo_gpu_offset(gds); + p->job->gds_size = amdgpu_bo_size(gds); + } + if (gws) { + p->job->gws_base = amdgpu_bo_gpu_offset(gws); + p->job->gws_size = amdgpu_bo_size(gws); + } + if (oa) { + p->job->oa_base = amdgpu_bo_gpu_offset(oa); + p->job->oa_size = amdgpu_bo_size(oa); } if (!r && p->uf_entry.robj) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 09d35051fdd6..c5bb36275e93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -91,7 +91,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, continue; r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, - rq, amdgpu_sched_jobs, &ctx->guilty); + rq, &ctx->guilty); if (r) goto failed; } @@ -111,8 +111,9 @@ failed: return r; } -static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) +static void amdgpu_ctx_fini(struct kref *ref) { + struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_device *adev = ctx->adev; unsigned i, j; @@ -125,13 +126,11 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) kfree(ctx->fences); ctx->fences = NULL; - for (i = 0; i < adev->num_rings; i++) - drm_sched_entity_fini(&adev->rings[i]->sched, - &ctx->rings[i].entity); - amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); mutex_destroy(&ctx->lock); + + kfree(ctx); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -170,12 +169,20 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + u32 i; ctx = container_of(ref, struct amdgpu_ctx, refcount); - amdgpu_ctx_fini(ctx); + for (i = 0; i < ctx->adev->num_rings; i++) { - kfree(ctx); + if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) + continue; + + drm_sched_entity_fini(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + } + + amdgpu_ctx_fini(ref); } static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) @@ -419,9 +426,11 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) if (other) { signed long r; - r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + r = dma_fence_wait(other, true); if (r < 0) { - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + if (r != -ERESTARTSYS) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; } } @@ -435,16 +444,72 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) idr_init(&mgr->ctx_handles); } +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) + return; + + for (i = 0; i < ctx->adev->num_rings; i++) { + + if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) + continue; + + if (kref_read(&ctx->refcount) == 1) + drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + else + DRM_ERROR("ctx %p is still alive\n", ctx); + } + } +} + +void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) +{ + struct amdgpu_ctx *ctx; + struct idr *idp; + uint32_t id, i; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) { + + if (!ctx->adev) + return; + + for (i = 0; i < ctx->adev->num_rings; i++) { + + if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) + continue; + + if (kref_read(&ctx->refcount) == 1) + drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity); + else + DRM_ERROR("ctx %p is still alive\n", ctx); + } + } +} + void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) { struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id; + amdgpu_ctx_mgr_entity_cleanup(mgr); + idp = &mgr->ctx_handles; idr_for_each_entry(idp, ctx, id) { - if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1) + if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) DRM_ERROR("ctx %p is still alive\n", ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 448d69fe3756..f5fb93795a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -28,8 +28,13 @@ #include <linux/debugfs.h> #include "amdgpu.h" -/* - * Debugfs +/** + * amdgpu_debugfs_add_files - Add simple debugfs entries + * + * @adev: Device to attach debugfs entries to + * @files: Array of function callbacks that respond to reads + * @nfiles: Number of callbacks to register + * */ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, const struct drm_info_list *files, @@ -64,7 +69,33 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, #if defined(CONFIG_DEBUG_FS) - +/** + * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes + * + * @read: True if reading + * @f: open file handle + * @buf: User buffer to write/read to + * @size: Number of bytes to write/read + * @pos: Offset to seek to + * + * This debugfs entry has special meaning on the offset being sought. + * Various bits have different meanings: + * + * Bit 62: Indicates a GRBM bank switch is needed + * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is + * zero) + * Bits 24..33: The SE or ME selector if needed + * Bits 34..43: The SH (or SA) or PIPE selector if needed + * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed + * + * Bit 23: Indicates that the PM power gating lock should be held + * This is necessary to read registers that might be + * unreliable during a power gating transistion. + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -164,19 +195,37 @@ end: return result; } - +/** + * amdgpu_debugfs_regs_read - Callback for reading MMIO registers + */ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); } +/** + * amdgpu_debugfs_regs_write - Callback for writing MMIO registers + */ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos); } + +/** + * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -204,6 +253,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -232,6 +293,18 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user return result; } +/** + * amdgpu_debugfs_regs_didt_read - Read from a DIDT register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -259,6 +332,18 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_didt_write - Write to a DIDT register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -287,6 +372,18 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user return result; } +/** + * amdgpu_debugfs_regs_smc_read - Read from a SMC register + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to read. This + * allows reading multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -314,6 +411,18 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_regs_smc_write - Write to a SMC register + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * The lower bits are the BYTE offset of the register to write. This + * allows writing multiple registers in a single call and having + * the returned size reflect that. + */ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -342,6 +451,20 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * return result; } +/** + * amdgpu_debugfs_gca_config_read - Read from gfx config data + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * This file is used to access configuration data in a somewhat + * stable fashion. The format is a series of DWORDs with the first + * indicating which revision it is. New content is appended to the + * end so that older software can still read the data. + */ + static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -418,6 +541,19 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_debugfs_sensor_read - Read from the powerplay sensors + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset is treated as the BYTE address of one of the sensors + * enumerated in amd/include/kgd_pp_interface.h under the + * 'amd_pp_sensors' enumeration. For instance to read the UVD VCLK + * you would use the offset 3 * 4 = 12. + */ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -428,7 +564,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, if (size & 3 || *pos & 0x3) return -EINVAL; - if (amdgpu_dpm == 0) + if (!adev->pm.dpm_enabled) return -EINVAL; /* convert offset to sensor number */ @@ -457,6 +593,27 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, return !r ? outsize : r; } +/** amdgpu_debugfs_wave_read - Read WAVE STATUS data + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset being sought changes which wave that the status data + * will be returned for. The bits are used as follows: + * + * Bits 0..6: Byte offset into data + * Bits 7..14: SE selector + * Bits 15..22: SH/SA selector + * Bits 23..30: CU/{WGP+SIMD} selector + * Bits 31..36: WAVE ID selector + * Bits 37..44: SIMD ID selector + * + * The returned data begins with one DWORD of version information + * Followed by WAVE STATUS registers relevant to the GFX IP version + * being used. See gfx_v8_0_read_wave_data() for an example output. + */ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -507,6 +664,28 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, return result; } +/** amdgpu_debugfs_gpr_read - Read wave gprs + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * The offset being sought changes which wave that the status data + * will be returned for. The bits are used as follows: + * + * Bits 0..11: Byte offset into data + * Bits 12..19: SE selector + * Bits 20..27: SH/SA selector + * Bits 28..35: CU/{WGP+SIMD} selector + * Bits 36..43: WAVE ID selector + * Bits 37..44: SIMD ID selector + * Bits 52..59: Thread selector + * Bits 60..61: Bank selector (VGPR=0,SGPR=1) + * + * The return data comes from the SGPR or VGPR register bank for + * the selected operational unit. + */ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -637,6 +816,12 @@ static const char *debugfs_regs_names[] = { "amdgpu_gpr", }; +/** + * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide + * register access. + * + * @adev: The device to attach the debugfs entries to + */ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { struct drm_minor *minor = adev->ddev->primary; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 34af664b9f93..3317d1536f4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -83,8 +83,10 @@ static const char *amdgpu_asic_name[] = { "POLARIS10", "POLARIS11", "POLARIS12", + "VEGAM", "VEGA10", "VEGA12", + "VEGA20", "RAVEN", "LAST", }; @@ -690,6 +692,8 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, { u64 size_af, size_bf; + mc->gart_size += adev->pm.smu_prv_buffer_size; + size_af = adev->gmc.mc_mask - mc->vram_end; size_bf = mc->vram_start; if (size_bf > size_af) { @@ -907,6 +911,46 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) } } +static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) +{ + struct sysinfo si; + bool is_os_64 = (sizeof(void *) == 8) ? true : false; + uint64_t total_memory; + uint64_t dram_size_seven_GB = 0x1B8000000; + uint64_t dram_size_three_GB = 0xB8000000; + + if (amdgpu_smu_memory_pool_size == 0) + return; + + if (!is_os_64) { + DRM_WARN("Not 64-bit OS, feature not supported\n"); + goto def_value; + } + si_meminfo(&si); + total_memory = (uint64_t)si.totalram * si.mem_unit; + + if ((amdgpu_smu_memory_pool_size == 1) || + (amdgpu_smu_memory_pool_size == 2)) { + if (total_memory < dram_size_three_GB) + goto def_value1; + } else if ((amdgpu_smu_memory_pool_size == 4) || + (amdgpu_smu_memory_pool_size == 8)) { + if (total_memory < dram_size_seven_GB) + goto def_value1; + } else { + DRM_WARN("Smu memory pool size not supported\n"); + goto def_value; + } + adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; + + return; + +def_value1: + DRM_WARN("No enough system memory\n"); +def_value: + adev->pm.smu_prv_buffer_size = 0; +} + /** * amdgpu_device_check_arguments - validate module params * @@ -948,6 +992,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_vm_fragment_size = -1; } + amdgpu_device_check_smu_prv_buffer_size(adev); + amdgpu_device_check_vm_size(adev); amdgpu_device_check_block_size(adev); @@ -1039,10 +1085,11 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { * the hardware IP specified. * Returns the error code from the last instance. */ -int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state) { + struct amdgpu_device *adev = dev; int i, r = 0; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1072,10 +1119,11 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, * the hardware IP specified. * Returns the error code from the last instance. */ -int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, +int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state) { + struct amdgpu_device *adev = dev; int i, r = 0; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1320,9 +1368,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_CARRIZO: case CHIP_STONEY: #ifdef CONFIG_DRM_AMDGPU_SI @@ -1339,6 +1388,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_KABINI: case CHIP_MULLINS: #endif + case CHIP_VEGA20: default: return 0; case CHIP_VEGA10: @@ -1428,9 +1478,10 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_CARRIZO: case CHIP_STONEY: if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) @@ -1472,6 +1523,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) #endif case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: if (adev->asic_type == CHIP_RAVEN) adev->family = AMDGPU_FAMILY_RV; @@ -1499,6 +1551,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return -EAGAIN; } + adev->powerplay.pp_feature = amdgpu_pp_feature_mask; + for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { DRM_ERROR("disabled ip block: %d <%s>\n", @@ -1654,12 +1708,17 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) if (amdgpu_emu_mode == 1) return 0; + r = amdgpu_ib_ring_tests(adev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, @@ -1671,6 +1730,18 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) } } } + + if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) { + /* enable gfx powergating */ + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); + /* enable gfxoff */ + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_SMC, + AMD_PG_STATE_GATE); + } + return 0; } @@ -1704,8 +1775,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) } } - mod_delayed_work(system_wq, &adev->late_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); + queue_delayed_work(system_wq, &adev->late_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); amdgpu_device_fill_reset_magic(adev); @@ -1759,6 +1830,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* ungate blocks before hw fini so that we can shutdown the blocks safely */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, @@ -1850,6 +1922,12 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) amdgpu_virt_request_full_gpu(adev, false); + /* ungate SMC block powergating */ + if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_SMC, + AMD_CG_STATE_UNGATE); + /* ungate SMC block first */ r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, AMD_CG_STATE_UNGATE); @@ -2086,16 +2164,15 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_MULLINS: case CHIP_CARRIZO: case CHIP_STONEY: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_TONGA: case CHIP_FIJI: -#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) - return amdgpu_dc != 0; -#endif case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case CHIP_RAVEN: #endif @@ -2375,10 +2452,6 @@ fence_driver_init: goto failed; } - r = amdgpu_ib_ring_tests(adev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - if (amdgpu_sriov_vf(adev)) amdgpu_virt_init_data_exchange(adev); @@ -2539,7 +2612,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb); + struct drm_framebuffer *fb = crtc->primary->fb; struct amdgpu_bo *robj; if (amdgpu_crtc->cursor_bo) { @@ -2551,10 +2624,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) } } - if (rfb == NULL || rfb->obj == NULL) { + if (fb == NULL || fb->obj[0] == NULL) { continue; } - robj = gem_to_amdgpu_bo(rfb->obj); + robj = gem_to_amdgpu_bo(fb->obj[0]); /* don't unpin kernel fb objects */ if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { r = amdgpu_bo_reserve(robj, true); @@ -2640,11 +2713,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } amdgpu_fence_driver_resume(adev); - if (resume) { - r = amdgpu_ib_ring_tests(adev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - } r = amdgpu_device_ip_late_init(adev); if (r) @@ -2736,6 +2804,9 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return true; + if (amdgpu_asic_need_full_reset(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -2792,6 +2863,9 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) { int i; + if (amdgpu_asic_need_full_reset(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -3087,20 +3161,19 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); - amdgpu_virt_release_full_gpu(adev, true); if (r) goto error; amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); +error: + amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { atomic_inc(&adev->vram_lost_counter); r = amdgpu_device_handle_vram_lost(adev); } -error: - return r; } @@ -3117,7 +3190,6 @@ error: int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force) { - struct drm_atomic_state *state = NULL; int i, r, resched; if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { @@ -3140,10 +3212,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); - /* store modesetting */ - if (amdgpu_device_has_dc_support(adev)) - state = drm_atomic_helper_suspend(adev->ddev); - /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -3183,10 +3251,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, kthread_unpark(ring->sched.thread); } - if (amdgpu_device_has_dc_support(adev)) { - if (drm_atomic_helper_resume(adev->ddev, state)) - dev_info(adev->dev, "drm resume failed:%d\n", r); - } else { + if (!amdgpu_device_has_dc_support(adev)) { drm_helper_resume_force_mode(adev->ddev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 93f700ab1bfb..76ee8e04ff11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -35,6 +35,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> static void amdgpu_display_flip_callback(struct dma_fence *f, @@ -151,8 +152,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - struct amdgpu_framebuffer *old_amdgpu_fb; - struct amdgpu_framebuffer *new_amdgpu_fb; struct drm_gem_object *obj; struct amdgpu_flip_work *work; struct amdgpu_bo *new_abo; @@ -174,15 +173,13 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; /* schedule unpin of the old buffer */ - old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - obj = old_amdgpu_fb->obj; + obj = crtc->primary->fb->obj[0]; /* take a reference to the old object */ work->old_abo = gem_to_amdgpu_bo(obj); amdgpu_bo_ref(work->old_abo); - new_amdgpu_fb = to_amdgpu_framebuffer(fb); - obj = new_amdgpu_fb->obj; + obj = fb->obj[0]; new_abo = gem_to_amdgpu_bo(obj); /* pin the new buffer */ @@ -192,7 +189,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base); + r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -482,31 +479,12 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, return true; } -static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb) -{ - struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - - drm_gem_object_put_unlocked(amdgpu_fb->obj); - drm_framebuffer_cleanup(fb); - kfree(amdgpu_fb); -} - -static int amdgpu_display_user_framebuffer_create_handle( - struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - - return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle); -} - static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { - .destroy = amdgpu_display_user_framebuffer_destroy, - .create_handle = amdgpu_display_user_framebuffer_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; -uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) { uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -526,11 +504,11 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, struct drm_gem_object *obj) { int ret; - rfb->obj = obj; + rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); if (ret) { - rfb->obj = NULL; + rfb->base.obj[0] = NULL; return ret; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 2b11d808f297..f66e3e3fef0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,7 +23,7 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ -uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index e997ebbe43ea..def1010ac05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -115,6 +115,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, pr_cont("\n"); } +void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev) +{ + struct drm_device *ddev = adev->ddev; + struct drm_crtc *crtc; + struct amdgpu_crtc *amdgpu_crtc; + + adev->pm.dpm.new_active_crtcs = 0; + adev->pm.dpm.new_active_crtc_count = 0; + if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, + &ddev->mode_config.crtc_list, head) { + amdgpu_crtc = to_amdgpu_crtc(crtc); + if (amdgpu_crtc->enabled) { + adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); + adev->pm.dpm.new_active_crtc_count++; + } + } + } +} + u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 643d008410c6..dd6203a0a6b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -52,8 +52,6 @@ enum amdgpu_dpm_event_src { AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 }; -#define SCLK_DEEP_SLEEP_MASK 0x8 - struct amdgpu_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -349,12 +347,6 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) -#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \ - virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \ - ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \ - (adev)->powerplay.pp_handle, virtual_addr_low, \ - virtual_addr_hi, mc_addr_low, mc_addr_hi, size) - #define amdgpu_dpm_get_power_profile_mode(adev, buf) \ ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ (adev)->powerplay.pp_handle, buf)) @@ -445,6 +437,8 @@ struct amdgpu_pm { uint32_t pcie_gen_mask; uint32_t pcie_mlw_mask; struct amd_pp_display_configuration pm_display_cfg;/* set by dc */ + uint32_t smu_prv_buffer_size; + struct amdgpu_bo *smu_prv_buffer; }; #define R600_SSTU_DFLT 0 @@ -482,6 +476,7 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, struct amdgpu_ps *rps); u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); +void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); bool amdgpu_is_uvd_state(u32 class, u32 class2); void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, u32 *p, u32 *u); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0b19482b36b8..b0bf2f24da48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -75,9 +75,10 @@ * - 3.23.0 - Add query for VRAM lost counter * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). + * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 25 +#define KMS_DRIVER_MINOR 26 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -121,7 +122,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -uint amdgpu_pp_feature_mask = 0xffffbfff; +uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */ int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; @@ -132,6 +133,7 @@ int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ int amdgpu_emu_mode = 0; +uint amdgpu_smu_memory_pool_size = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -316,6 +318,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled) module_param_named(cik_support, amdgpu_cik_support, int, 0444); #endif +MODULE_PARM_DESC(smu_memory_pool_size, + "reserve gtt for smu debug usage, 0 = disable," + "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); +module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -534,6 +541,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, + /* VEGAM */ + {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, + {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, /* Vega 10 */ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, @@ -550,6 +560,13 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, + /* Vega 20 */ + {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 12063019751b..bc5fd8ebab5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -137,7 +137,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); - domain = amdgpu_display_framebuffer_domains(adev); + domain = amdgpu_display_supported_domains(adev); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; @@ -292,9 +292,9 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb drm_fb_helper_unregister_fbi(&rfbdev->helper); - if (rfb->obj) { - amdgpufb_destroy_pinned_object(rfb->obj); - rfb->obj = NULL; + if (rfb->base.obj[0]) { + amdgpufb_destroy_pinned_object(rfb->base.obj[0]); + rfb->base.obj[0] = NULL; drm_framebuffer_unregister_private(&rfb->base); drm_framebuffer_cleanup(&rfb->base); } @@ -377,7 +377,7 @@ int amdgpu_fbdev_total_size(struct amdgpu_device *adev) if (!adev->mode_info.rfbdev) return 0; - robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj); + robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]); size += amdgpu_bo_size(robj); return size; } @@ -386,7 +386,7 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) { if (!adev->mode_info.rfbdev) return false; - if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj)) + if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0])) return true; return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 97449e06a242..39ec6b8890a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -131,7 +131,8 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, + unsigned flags) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; @@ -149,7 +150,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) adev->fence_context + ring->idx, seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - seq, AMDGPU_FENCE_FLAG_INT); + seq, flags | AMDGPU_FENCE_FLAG_INT); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; /* This function can't be called concurrently anyway, otherwise @@ -375,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; uint64_t index; - if (ring != &adev->uvd.ring) { + if (ring != &adev->uvd.inst[ring->me].ring) { ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); } else { /* put fence directly behind firmware */ index = ALIGN(adev->uvd.fw->size, 8); - ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; - ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; + ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index; + ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; } amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); amdgpu_irq_get(adev, irq_src, irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index cf0f186c6092..17d6b9fb6d77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -113,12 +113,17 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) int r; if (adev->gart.robj == NULL) { - r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, - &adev->gart.robj); + struct amdgpu_bo_param bp; + + memset(&bp, 0, sizeof(bp)); + bp.size = adev->gart.table_size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &adev->gart.robj); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 46b9ea4e6103..5fb156a01774 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -30,6 +30,7 @@ #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" +#include "amdgpu_display.h" void amdgpu_gem_object_free(struct drm_gem_object *gobj) { @@ -48,17 +49,25 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, struct drm_gem_object **obj) { struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int r; + memset(&bp, 0, sizeof(bp)); *obj = NULL; /* At least align on page size */ if (alignment < PAGE_SIZE) { alignment = PAGE_SIZE; } + bp.size = size; + bp.byte_align = alignment; + bp.type = type; + bp.resv = resv; + bp.preferred_domain = initial_domain; retry: - r = amdgpu_bo_create(adev, size, alignment, initial_domain, - flags, type, resv, &bo); + bp.flags = flags; + bp.domain = initial_domain; + r = amdgpu_bo_create(adev, &bp, &bo); if (r) { if (r != -ERESTARTSYS) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { @@ -221,17 +230,19 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* reject invalid gem domains */ - if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) + if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { + if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { + /* if gds bo is created from user space, it must be + * passed to bo list + */ + DRM_ERROR("GDS bo cannot be per-vm-bo\n"); + return -EINVAL; + } flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) size = size << AMDGPU_GDS_SHIFT; @@ -746,15 +757,16 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct amdgpu_device *adev = dev->dev_private; struct drm_gem_object *gobj; uint32_t handle; + u32 domain; int r; args->pitch = amdgpu_align_pitch(adev, args->width, DIV_ROUND_UP(args->bpp, 8), 0); args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); - - r = amdgpu_gem_object_create(adev, args->size, 0, - AMDGPU_GEM_DOMAIN_VRAM, + domain = amdgpu_bo_get_preferred_pin_domain(adev, + amdgpu_display_supported_domains(adev)); + r = amdgpu_gem_object_create(adev, args->size, 0, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, false, NULL, &gobj); if (r) @@ -771,16 +783,23 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, } #if defined(CONFIG_DEBUG_FS) + +#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \ + if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \ + seq_printf((m), " " #flag); \ + } + static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) { struct drm_gem_object *gobj = ptr; struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); struct seq_file *m = data; + struct dma_buf_attachment *attachment; + struct dma_buf *dma_buf; unsigned domain; const char *placement; unsigned pin_count; - uint64_t offset; domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); switch (domain) { @@ -798,13 +817,27 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) seq_printf(m, "\t0x%08x: %12ld byte %s", id, amdgpu_bo_size(bo), placement); - offset = READ_ONCE(bo->tbo.mem.start); - if (offset != AMDGPU_BO_INVALID_OFFSET) - seq_printf(m, " @ 0x%010Lx", offset); - pin_count = READ_ONCE(bo->pin_count); if (pin_count) seq_printf(m, " pin count %d", pin_count); + + dma_buf = READ_ONCE(bo->gem_base.dma_buf); + attachment = READ_ONCE(bo->gem_base.import_attach); + + if (attachment) + seq_printf(m, " imported from %p", dma_buf); + else if (dma_buf) + seq_printf(m, " exported as %p", dma_buf); + + amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); + amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS); + amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED); + amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS); + amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID); + amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC); + seq_printf(m, "\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 311589e02d17..f70eeed9ed76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -127,6 +127,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; + unsigned fence_flags = 0; unsigned i; int r = 0; @@ -227,7 +228,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, #endif amdgpu_asic_invalidate_hdp(adev, ring); - r = amdgpu_fence_emit(ring, f); + if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) + fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; + + r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); if (job && job->vmid) @@ -242,7 +246,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* wrap the last IB with fence */ if (job && job->uf_addr) { amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, - AMDGPU_FENCE_FLAG_64BIT); + fence_flags | AMDGPU_FENCE_FLAG_64BIT); } if (patch_offset != ~0 && ring->funcs->patch_cond_exec) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4b7824d30e73..91517b166a3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -31,6 +31,7 @@ #include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" +#include "atom.h" #include <linux/vga_switcheroo.h> #include <linux/slab.h> @@ -214,6 +215,18 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->gfx.rlc_fw_version; fw_info->feature = adev->gfx.rlc_feature_version; break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL: + fw_info->ver = adev->gfx.rlc_srlc_fw_version; + fw_info->feature = adev->gfx.rlc_srlc_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM: + fw_info->ver = adev->gfx.rlc_srlg_fw_version; + fw_info->feature = adev->gfx.rlc_srlg_feature_version; + break; + case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM: + fw_info->ver = adev->gfx.rlc_srls_fw_version; + fw_info->feature = adev->gfx.rlc_srls_feature_version; + break; case AMDGPU_INFO_FW_GFX_MEC: if (query_fw->index == 0) { fw_info->ver = adev->gfx.mec_fw_version; @@ -273,12 +286,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_crtc *crtc; uint32_t ui32 = 0; uint64_t ui64 = 0; - int i, found; + int i, j, found; int ui32_size = sizeof(ui32); if (!info->return_size || !info->return_pointer) return -EINVAL; + /* Ensure IB tests are run on ring */ + flush_delayed_work(&adev->late_init_work); + switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: ui32 = adev->accel_working; @@ -332,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_UVD: type = AMD_IP_BLOCK_TYPE_UVD; - ring_mask = adev->uvd.ring.ready ? 1 : 0; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) + ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i); ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 16; break; @@ -345,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; - for (i = 0; i < adev->uvd.num_enc_rings; i++) - ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i); + for (i = 0; i < adev->uvd.num_uvd_inst; i++) + for (j = 0; j < adev->uvd.num_enc_rings; j++) + ring_mask |= + ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) << + (j + i * adev->uvd.num_enc_rings)); ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 1; break; @@ -701,10 +721,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } } case AMDGPU_INFO_SENSOR: { - struct pp_gpu_power query = {0}; - int query_size = sizeof(query); - - if (amdgpu_dpm == 0) + if (!adev->pm.dpm_enabled) return -ENOENT; switch (info->sensor_info.type) { @@ -746,10 +763,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file /* get average GPU power */ if (amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, - (void *)&query, &query_size)) { + (void *)&ui32, &ui32_size)) { return -EINVAL; } - ui32 = query.average_gpu_power >> 8; + ui32 >>= 8; break; case AMDGPU_INFO_SENSOR_VDDNB: /* get VDDNB in millivolts */ @@ -913,8 +930,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, return; pm_runtime_get_sync(dev->dev); - - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr); if (adev->asic_type != CHIP_RAVEN) { amdgpu_uvd_free_handles(adev, file_priv); @@ -935,6 +951,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); amdgpu_vm_fini(adev, &fpriv->vm); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + if (pasid) amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); amdgpu_bo_unref(&pd); @@ -1088,6 +1106,7 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_info_firmware fw_info; struct drm_amdgpu_query_fw query_fw; + struct atom_context *ctx = adev->mode_info.atom_context; int ret, i; /* VCE */ @@ -1146,6 +1165,30 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* RLC SAVE RESTORE LIST CNTL */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST GPM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + /* RLC SAVE RESTORE LIST SRM MEM */ + query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* MEC */ query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC; query_fw.index = 0; @@ -1210,6 +1253,9 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index bd67f4cb8e6c..83e344fbb50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -36,12 +36,14 @@ #include <drm/drm.h> #include "amdgpu.h" +#include "amdgpu_amdkfd.h" struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; struct mmu_notifier mn; + enum amdgpu_mn_type type; /* only used on destruction */ struct work_struct work; @@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_range_start - callback to notify about mm change + * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier * @mn: the mm this callback is about @@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * We block for all BOs between start and end to be idle and * unmap them by move them into system domain again. */ -static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; @@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, } /** + * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We temporarily evict all BOs between start and end. This + * necessitates evicting all user-mode queues of the process. The BOs + * are restorted in amdgpu_mn_invalidate_range_end_hsa. + */ +static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + amdgpu_mn_read_lock(rmn); + + it = interval_tree_iter_first(&rmn->objects, start, end); + while (it) { + struct amdgpu_mn_node *node; + struct amdgpu_bo *bo; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, start, end); + + list_for_each_entry(bo, &node->bos, mn_list) { + struct kgd_mem *mem = bo->kfd_bo; + + if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + start, end)) + amdgpu_amdkfd_evict_userptr(mem, mm); + } + } +} + +/** * amdgpu_mn_invalidate_range_end - callback to notify about mm change * * @mn: our notifier @@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, amdgpu_mn_read_unlock(rmn); } -static const struct mmu_notifier_ops amdgpu_mn_ops = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, +static const struct mmu_notifier_ops amdgpu_mn_ops[] = { + [AMDGPU_MN_TYPE_GFX] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, + [AMDGPU_MN_TYPE_HSA] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, }; +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. + */ +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) + /** * amdgpu_mn_get - create notifier context * * @adev: amdgpu device pointer + * @type: type of MMU notifier context * * Creates a notifier context for current->mm. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { struct mm_struct *mm = current->mm; struct amdgpu_mn *rmn; + unsigned long key = AMDGPU_MN_KEY(mm, type); int r; mutex_lock(&adev->mn_lock); @@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) return ERR_PTR(-EINTR); } - hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) + hash_for_each_possible(adev->mn_hash, rmn, node, key) + if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) goto release_locks; rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); @@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->adev = adev; rmn->mm = mm; - rmn->mn.ops = &amdgpu_mn_ops; init_rwsem(&rmn->lock); + rmn->type = type; + rmn->mn.ops = &amdgpu_mn_ops[type]; rmn->objects = RB_ROOT_CACHED; mutex_init(&rmn->read_lock); atomic_set(&rmn->recursion, 0); @@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) if (r) goto free_rmn; - hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); + hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); release_locks: up_write(&mm->mmap_sem); @@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { unsigned long end = addr + amdgpu_bo_size(bo) - 1; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + enum amdgpu_mn_type type = + bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; struct amdgpu_mn *rmn; - struct amdgpu_mn_node *node = NULL; + struct amdgpu_mn_node *node = NULL, *new_node; struct list_head bos; struct interval_tree_node *it; - rmn = amdgpu_mn_get(adev); + rmn = amdgpu_mn_get(adev, type); if (IS_ERR(rmn)) return PTR_ERR(rmn); + new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + INIT_LIST_HEAD(&bos); down_write(&rmn->lock); @@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) list_splice(&node->bos, &bos); } - if (!node) { - node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); - if (!node) { - up_write(&rmn->lock); - return -ENOMEM; - } - } + if (!node) + node = new_node; + else + kfree(new_node); bo->mn = rmn; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index d0095a3793b8..eb0f432f78fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -29,16 +29,23 @@ */ struct amdgpu_mn; +enum amdgpu_mn_type { + AMDGPU_MN_TYPE_GFX, + AMDGPU_MN_TYPE_HSA, +}; + #if defined(CONFIG_MMU_NOTIFIER) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d6416ee52e32..b9e9e8b02fb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -308,7 +308,6 @@ struct amdgpu_display_funcs { struct amdgpu_framebuffer { struct drm_framebuffer base; - struct drm_gem_object *obj; /* caching for later use */ uint64_t address; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6d08cde8443c..5e4e1bd90383 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -191,14 +191,21 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr) { + struct amdgpu_bo_param bp; bool free = false; int r; + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = align; + bp.domain = domain; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + if (!*bo_ptr) { - r = amdgpu_bo_create(adev, size, align, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, bo_ptr); + r = amdgpu_bo_create(adev, &bp, bo_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); @@ -341,27 +348,25 @@ fail: return false; } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +static int amdgpu_bo_do_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { - .interruptible = (type != ttm_bo_type_kernel), + .interruptible = (bp->type != ttm_bo_type_kernel), .no_wait_gpu = false, - .resv = resv, + .resv = bp->resv, .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT }; struct amdgpu_bo *bo; - unsigned long page_align; + unsigned long page_align, size = bp->size; size_t acc_size; int r; - page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; + page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; size = ALIGN(size, PAGE_SIZE); - if (!amdgpu_bo_validate_size(adev, size, domain)) + if (!amdgpu_bo_validate_size(adev, size, bp->domain)) return -ENOMEM; *bo_ptr = NULL; @@ -375,18 +380,14 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA); + bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : + bp->domain; bo->allowed_domains = bo->preferred_domains; - if (type != ttm_bo_type_kernel && + if (bp->type != ttm_bo_type_kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; - bo->flags = flags; + bo->flags = bp->flags; #ifdef CONFIG_X86_32 /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit @@ -417,11 +418,13 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, #endif bo->tbo.bdev = &adev->mman.bdev; - amdgpu_ttm_placement_from_domain(bo, domain); + amdgpu_ttm_placement_from_domain(bo, bp->domain); + if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 1; - r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, + r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type, &bo->placement, page_align, &ctx, acc_size, - NULL, resv, &amdgpu_ttm_bo_destroy); + NULL, bp->resv, &amdgpu_ttm_bo_destroy); if (unlikely(r != 0)) return r; @@ -433,10 +436,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, else amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); - if (type == ttm_bo_type_kernel) - bo->tbo.priority = 1; - - if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && + if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; @@ -449,20 +449,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, bo->tbo.moving = dma_fence_get(fence); dma_fence_put(fence); } - if (!resv) + if (!bp->resv) amdgpu_bo_unreserve(bo); *bo_ptr = bo; trace_amdgpu_bo_create(bo); /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ - if (type == ttm_bo_type_device) + if (bp->type == ttm_bo_type_device) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; return 0; fail_unreserve: - if (!resv) + if (!bp->resv) ww_mutex_unlock(&bo->tbo.resv->lock); amdgpu_bo_unref(&bo); return r; @@ -472,16 +472,22 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, unsigned long size, int byte_align, struct amdgpu_bo *bo) { + struct amdgpu_bo_param bp; int r; if (bo->shadow) return 0; - r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW, - ttm_bo_type_kernel, - bo->tbo.resv, &bo->shadow); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = byte_align; + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.resv; + + r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -492,28 +498,26 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } -int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +int amdgpu_bo_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { - uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; + u64 flags = bp->flags; int r; - r = amdgpu_bo_do_create(adev, size, byte_align, domain, - parent_flags, type, resv, bo_ptr); + bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; + r = amdgpu_bo_do_create(adev, bp, bo_ptr); if (r) return r; if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { - if (!resv) + if (!bp->resv) WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, NULL)); - r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); + r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr)); - if (!resv) + if (!bp->resv) reservation_object_unlock((*bo_ptr)->tbo.resv); if (r) @@ -689,8 +693,17 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; /* A shared bo cannot be migrated to VRAM */ - if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM)) - return -EINVAL; + if (bo->prime_shared_count) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + domain = AMDGPU_GEM_DOMAIN_GTT; + else + return -EINVAL; + } + + /* This assumes only APU display buffers are pinned with (VRAM|GTT). + * See function amdgpu_display_supported_domains() + */ + domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); if (bo->pin_count) { uint32_t mem_type = bo->tbo.mem.mem_type; @@ -838,6 +851,13 @@ int amdgpu_bo_init(struct amdgpu_device *adev) return amdgpu_ttm_init(adev); } +int amdgpu_bo_late_init(struct amdgpu_device *adev) +{ + amdgpu_ttm_late_init(adev); + + return 0; +} + void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); @@ -1042,3 +1062,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) return bo->tbo.offset; } + +uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, + uint32_t domain) +{ + if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { + domain = AMDGPU_GEM_DOMAIN_VRAM; + if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD) + domain = AMDGPU_GEM_DOMAIN_GTT; + } + return domain; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 546f77cb7882..731748033878 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -33,6 +33,16 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX +struct amdgpu_bo_param { + unsigned long size; + int byte_align; + u32 domain; + u32 preferred_domain; + u64 flags; + enum ttm_bo_type type; + struct reservation_object *resv; +}; + /* bo virtual addresses in a vm */ struct amdgpu_bo_va_mapping { struct amdgpu_bo_va *bo_va; @@ -196,6 +206,27 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } /** + * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM + */ +static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; + struct drm_mm_node *node = bo->tbo.mem.mm_node; + unsigned long pages_left; + + if (bo->tbo.mem.mem_type != TTM_PL_VRAM) + return false; + + for (pages_left = bo->tbo.mem.num_pages; pages_left; + pages_left -= node->size, node++) + if (node->start < fpfn) + return true; + + return false; +} + +/** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) @@ -203,10 +234,8 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; } -int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, - int byte_align, u32 domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, +int amdgpu_bo_create(struct amdgpu_device *adev, + struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, @@ -230,6 +259,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, int amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_evict_vram(struct amdgpu_device *adev); int amdgpu_bo_init(struct amdgpu_device *adev); +int amdgpu_bo_late_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma); @@ -259,7 +289,8 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct reservation_object *resv, struct dma_fence **fence, bool direct); - +uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, + uint32_t domain); /* * sub allocation diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 361975cf45a9..b455da487782 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -77,6 +77,37 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) } } +/** + * DOC: power_dpm_state + * + * This is a legacy interface and is only provided for backwards compatibility. + * The amdgpu driver provides a sysfs API for adjusting certain power + * related parameters. The file power_dpm_state is used for this. + * It accepts the following arguments: + * - battery + * - balanced + * - performance + * + * battery + * + * On older GPUs, the vbios provided a special power state for battery + * operation. Selecting battery switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + * balanced + * + * On older GPUs, the vbios provided a special power state for balanced + * operation. Selecting balanced switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + * performance + * + * On older GPUs, the vbios provided a special power state for performance + * operation. Selecting performance switched to this state. This is no + * longer provided on newer GPUs so the option does nothing in that case. + * + */ + static ssize_t amdgpu_get_dpm_state(struct device *dev, struct device_attribute *attr, char *buf) @@ -131,6 +162,59 @@ fail: return count; } + +/** + * DOC: power_dpm_force_performance_level + * + * The amdgpu driver provides a sysfs API for adjusting certain power + * related parameters. The file power_dpm_force_performance_level is + * used for this. It accepts the following arguments: + * - auto + * - low + * - high + * - manual + * - GPU fan + * - profile_standard + * - profile_min_sclk + * - profile_min_mclk + * - profile_peak + * + * auto + * + * When auto is selected, the driver will attempt to dynamically select + * the optimal power profile for current conditions in the driver. + * + * low + * + * When low is selected, the clocks are forced to the lowest power state. + * + * high + * + * When high is selected, the clocks are forced to the highest power state. + * + * manual + * + * When manual is selected, the user can manually adjust which power states + * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk, + * and pp_dpm_pcie files and adjust the power state transition heuristics + * via the pp_power_profile_mode sysfs file. + * + * profile_standard + * profile_min_sclk + * profile_min_mclk + * profile_peak + * + * When the profiling modes are selected, clock and power gating are + * disabled and the clocks are set for different profiling cases. This + * mode is recommended for profiling specific work loads where you do + * not want clock or power gating for clock fluctuation to interfere + * with your results. profile_standard sets the clocks to a fixed clock + * level which varies from asic to asic. profile_min_sclk forces the sclk + * to the lowest level. profile_min_mclk forces the mclk to the lowest level. + * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels. + * + */ + static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, struct device_attribute *attr, char *buf) @@ -324,6 +408,17 @@ fail: return count; } +/** + * DOC: pp_table + * + * The amdgpu driver provides a sysfs API for uploading new powerplay + * tables. The file pp_table is used for this. Reading the file + * will dump the current power play table. Writing to the file + * will attempt to upload a new powerplay table and re-initialize + * powerplay using that new table. + * + */ + static ssize_t amdgpu_get_pp_table(struct device *dev, struct device_attribute *attr, char *buf) @@ -360,6 +455,29 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return count; } +/** + * DOC: pp_od_clk_voltage + * + * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages + * in each power level within a power state. The pp_od_clk_voltage is used for + * this. + * + * Reading the file will display: + * - a list of engine clock levels and voltages labeled OD_SCLK + * - a list of memory clock levels and voltages labeled OD_MCLK + * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE + * + * To manually adjust these settings, first select manual using + * power_dpm_force_performance_level. Enter a new value for each + * level by writing a string that contains "s/m level clock voltage" to + * the file. E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz + * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at + * 810 mV. When you have edited all of the states as needed, write + * "c" (commit) to the file to commit your changes. If you want to reset to the + * default power levels, write "r" (reset) to the file to reset them. + * + */ + static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, struct device_attribute *attr, const char *buf, @@ -437,6 +555,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); + size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); return size; } else { return snprintf(buf, PAGE_SIZE, "\n"); @@ -444,6 +563,23 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, } +/** + * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie + * + * The amdgpu driver provides a sysfs API for adjusting what power levels + * are enabled for a given power state. The files pp_dpm_sclk, pp_dpm_mclk, + * and pp_dpm_pcie are used for this. + * + * Reading back the files will show you the available power levels within + * the power state and the clock information for those levels. + * + * To manually adjust these states, first select manual using + * power_dpm_force_performance_level. + * Secondly,Enter a new value for each level by inputing a string that + * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie" + * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6. + */ + static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, struct device_attribute *attr, char *buf) @@ -466,23 +602,27 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; + char *sub_str = NULL; + char *tmp; + char buf_cpy[count]; + const char delimiter[3] = {' ', '\n', '\0'}; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + memcpy(buf_cpy, buf, count+1); + tmp = buf_cpy; + while (tmp[0]) { + sub_str = strsep(&tmp, delimiter); + if (strlen(sub_str)) { + ret = kstrtol(sub_str, 0, &level); - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; + } else + break; } - if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); @@ -512,21 +652,26 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; + char *sub_str = NULL; + char *tmp; + char buf_cpy[count]; + const char delimiter[3] = {' ', '\n', '\0'}; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + memcpy(buf_cpy, buf, count+1); + tmp = buf_cpy; + while (tmp[0]) { + sub_str = strsep(&tmp, delimiter); + if (strlen(sub_str)) { + ret = kstrtol(sub_str, 0, &level); - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; + } else + break; } if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); @@ -557,21 +702,27 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; - uint32_t i, mask = 0; - char sub_str[2]; + uint32_t mask = 0; + char *sub_str = NULL; + char *tmp; + char buf_cpy[count]; + const char delimiter[3] = {' ', '\n', '\0'}; - for (i = 0; i < strlen(buf); i++) { - if (*(buf + i) == '\n') - continue; - sub_str[0] = *(buf + i); - sub_str[1] = '\0'; - ret = kstrtol(sub_str, 0, &level); + memcpy(buf_cpy, buf, count+1); + tmp = buf_cpy; - if (ret) { - count = -EINVAL; - goto fail; - } - mask |= 1 << level; + while (tmp[0]) { + sub_str = strsep(&tmp, delimiter); + if (strlen(sub_str)) { + ret = kstrtol(sub_str, 0, &level); + + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; + } else + break; } if (adev->powerplay.pp_funcs->force_clock_level) amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); @@ -668,6 +819,26 @@ fail: return count; } +/** + * DOC: pp_power_profile_mode + * + * The amdgpu driver provides a sysfs API for adjusting the heuristics + * related to switching between power levels in a power state. The file + * pp_power_profile_mode is used for this. + * + * Reading this file outputs a list of all of the predefined power profiles + * and the relevant heuristics settings for that profile. + * + * To select a profile or create a custom profile, first select manual using + * power_dpm_force_performance_level. Writing the number of a predefined + * profile to pp_power_profile_mode will enable those heuristics. To + * create a custom set of heuristics, write a string of numbers to the file + * starting with the number of the custom profile along with a setting + * for each heuristic parameter. Due to differences across asic families + * the heuristic parameters vary from family to family. + * + */ + static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, struct device_attribute *attr, char *buf) @@ -1020,8 +1191,8 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; - struct pp_gpu_power query = {0}; - int r, size = sizeof(query); + u32 query = 0; + int r, size = sizeof(u32); unsigned uw; /* Can't get power when the card is off */ @@ -1041,7 +1212,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, return r; /* convert to microwatts */ - uw = (query.average_gpu_power >> 8) * 1000000; + uw = (query >> 8) * 1000000 + (query & 0xff) * 1000; return snprintf(buf, PAGE_SIZE, "%u\n", uw); } @@ -1109,6 +1280,46 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, return count; } + +/** + * DOC: hwmon + * + * The amdgpu driver exposes the following sensor interfaces: + * - GPU temperature (via the on-die sensor) + * - GPU voltage + * - Northbridge voltage (APUs only) + * - GPU power + * - GPU fan + * + * hwmon interfaces for GPU temperature: + * - temp1_input: the on die GPU temperature in millidegrees Celsius + * - temp1_crit: temperature critical max value in millidegrees Celsius + * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius + * + * hwmon interfaces for GPU voltage: + * - in0_input: the voltage on the GPU in millivolts + * - in1_input: the voltage on the Northbridge in millivolts + * + * hwmon interfaces for GPU power: + * - power1_average: average power used by the GPU in microWatts + * - power1_cap_min: minimum cap supported in microWatts + * - power1_cap_max: maximum cap supported in microWatts + * - power1_cap: selected power cap in microWatts + * + * hwmon interfaces for GPU fan: + * - pwm1: pulse width modulation fan level (0-255) + * - pwm1_enable: pulse width modulation fan control method + * 0: no fan speed control + * 1: manual fan speed control using pwm interface + * 2: automatic fan speed control + * - pwm1_min: pulse width modulation fan control minimum level (0) + * - pwm1_max: pulse width modulation fan control maximum level (255) + * - fan1_input: fan speed in RPM + * + * You can use hwmon tools like sensors to view this information on your system. + * + */ + static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); @@ -1153,19 +1364,14 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* handle non-powerplay limitations */ - if (!adev->powerplay.pp_handle) { - /* Skip fan attributes if fan is not present */ - if (adev->pm.no_fan && - (attr == &sensor_dev_attr_pwm1.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) - return 0; - /* requires powerplay */ - if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) - return 0; - } + + /* Skip fan attributes if fan is not present */ + if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr)) + return 0; /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && @@ -1658,9 +1864,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) { - struct drm_device *ddev = adev->ddev; - struct drm_crtc *crtc; - struct amdgpu_crtc *amdgpu_crtc; int i = 0; if (!adev->pm.dpm_enabled) @@ -1676,21 +1879,25 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) } if (adev->powerplay.pp_funcs->dispatch_tasks) { + if (!amdgpu_device_has_dc_support(adev)) { + mutex_lock(&adev->pm.mutex); + amdgpu_dpm_get_active_displays(adev); + adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs; + adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); + adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); + /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ + if (adev->pm.pm_display_cfg.vrefresh > 120) + adev->pm.pm_display_cfg.min_vblank_time = 0; + if (adev->powerplay.pp_funcs->display_configuration_change) + adev->powerplay.pp_funcs->display_configuration_change( + adev->powerplay.pp_handle, + &adev->pm.pm_display_cfg); + mutex_unlock(&adev->pm.mutex); + } amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); } else { mutex_lock(&adev->pm.mutex); - adev->pm.dpm.new_active_crtcs = 0; - adev->pm.dpm.new_active_crtc_count = 0; - if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) { - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - amdgpu_crtc = to_amdgpu_crtc(crtc); - if (amdgpu_crtc->enabled) { - adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id); - adev->pm.dpm.new_active_crtc_count++; - } - } - } + amdgpu_dpm_get_active_displays(adev); /* update battery/ac status */ if (power_supply_is_system_supplied() > 0) adev->pm.dpm.ac_power = true; @@ -1711,7 +1918,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { uint32_t value; - struct pp_gpu_power query = {0}; + uint32_t query = 0; int size; /* sanity check PP is enabled */ @@ -1734,17 +1941,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "\t%u mV (VDDGFX)\n", value); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDNB)\n", value); - size = sizeof(query); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) { - seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8, - query.vddc_power & 0xff); - seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8, - query.vddci_power & 0xff); - seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8, - query.max_gpu_power & 0xff); - seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8, - query.average_gpu_power & 0xff); - } + size = sizeof(uint32_t); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) + seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff); size = sizeof(value); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 0558da049827..a156b3891a3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -102,12 +102,18 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct reservation_object *resv = attach->dmabuf->resv; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; int ret; + memset(&bp, 0, sizeof(bp)); + bp.size = attach->dmabuf->size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.flags = 0; + bp.type = ttm_bo_type_sg; + bp.resv = resv; ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg, - resv, &bo); + ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) goto error; @@ -208,7 +214,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_framebuffer_domains(adev); + u32 domain = amdgpu_display_supported_domains(adev); int ret; bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index c7d43e064fc7..9f1a5bd39ae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -52,6 +52,7 @@ static int psp_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: psp_v3_1_set_psp_funcs(psp); break; case CHIP_RAVEN: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 262c1267249e..8af16e81c7d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -66,6 +66,8 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, u32 ring, struct amdgpu_ring **out_ring) { + u32 instance; + switch (mapper->hw_ip) { case AMDGPU_HW_IP_GFX: *out_ring = &adev->gfx.gfx_ring[ring]; @@ -77,13 +79,16 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, *out_ring = &adev->sdma.instance[ring].ring; break; case AMDGPU_HW_IP_UVD: - *out_ring = &adev->uvd.ring; + instance = ring; + *out_ring = &adev->uvd.inst[instance].ring; break; case AMDGPU_HW_IP_VCE: *out_ring = &adev->vce.ring[ring]; break; case AMDGPU_HW_IP_UVD_ENC: - *out_ring = &adev->uvd.ring_enc[ring]; + instance = ring / adev->uvd.num_enc_rings; + *out_ring = + &adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings]; break; case AMDGPU_HW_IP_VCN_DEC: *out_ring = &adev->vcn.ring_dec; @@ -240,13 +245,14 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, ip_num_rings = adev->sdma.num_instances; break; case AMDGPU_HW_IP_UVD: - ip_num_rings = 1; + ip_num_rings = adev->uvd.num_uvd_inst; break; case AMDGPU_HW_IP_VCE: ip_num_rings = adev->vce.num_rings; break; case AMDGPU_HW_IP_UVD_ENC: - ip_num_rings = adev->uvd.num_enc_rings; + ip_num_rings = + adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst; break; case AMDGPU_HW_IP_VCN_DEC: ip_num_rings = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d5f526f38e50..c6850b629d0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; + ring->me = 0; ring->adev->rings[ring->idx] = NULL; } @@ -459,6 +460,26 @@ void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) spin_unlock(&adev->ring_lru_list_lock); } +/** + * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper + * + * @adev: amdgpu_device pointer + * @reg0: register to write + * @reg1: register to wait on + * @ref: reference value to write/wait on + * @mask: mask to wait on + * + * Helper for rings that don't support write and wait in a + * single oneshot packet. + */ +void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + /* * Debugfs info */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1a5911882657..1513124c5659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,7 +29,7 @@ #include <drm/drm_print.h> /* max number of rings */ -#define AMDGPU_MAX_RINGS 18 +#define AMDGPU_MAX_RINGS 21 #define AMDGPU_MAX_GFX_RINGS 1 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 @@ -42,6 +42,7 @@ #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) +#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX, @@ -90,7 +91,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, + unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); @@ -154,6 +156,9 @@ struct amdgpu_ring_funcs { void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); + void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, @@ -228,6 +233,10 @@ int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, int num_blacklist, bool lru_pipe_order, struct amdgpu_ring **ring); void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); +void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t val0, + uint32_t reg1, uint32_t val1); + static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 2dbe87591f81..d167e8ab76d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -33,6 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *vram_obj = NULL; struct amdgpu_bo **gtt_obj = NULL; + struct amdgpu_bo_param bp; uint64_t gart_addr, vram_addr; unsigned n, size; int i, r; @@ -58,9 +59,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = 1; goto out_cleanup; } - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0, - ttm_bo_type_kernel, NULL, &vram_obj); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = 0; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + + r = amdgpu_bo_create(adev, &bp, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -79,9 +86,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) void **vram_start, **vram_end; struct dma_fence *fence = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, 0, - ttm_bo_type_kernel, NULL, gtt_obj + i); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + r = amdgpu_bo_create(adev, &bp, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 532263ab6e16..e96e26d3f3b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -275,7 +275,7 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, ), TP_fast_assign( - __entry->bo = bo_va->base.bo; + __entry->bo = bo_va ? bo_va->base.bo : NULL; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 205da3ff9cd0..e93a0a237dc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -63,16 +63,44 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); /* * Global memory. */ + +/** + * amdgpu_ttm_mem_global_init - Initialize and acquire reference to + * memory object + * + * @ref: Object for initialization. + * + * This is called by drm_global_item_ref() when an object is being + * initialized. + */ static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) { return ttm_mem_global_init(ref->object); } +/** + * amdgpu_ttm_mem_global_release - Drop reference to a memory object + * + * @ref: Object being removed + * + * This is called by drm_global_item_unref() when an object is being + * released. + */ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) { ttm_mem_global_release(ref->object); } +/** + * amdgpu_ttm_global_init - Initialize global TTM memory reference + * structures. + * + * @adev: AMDGPU device for which the global structures need to be + * registered. + * + * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init() + * during bring up. + */ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; @@ -80,7 +108,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) struct drm_sched_rq *rq; int r; + /* ensure reference is false in case init fails */ adev->mman.mem_global_referenced = false; + global_ref = &adev->mman.mem_global_ref; global_ref->global_type = DRM_GLOBAL_TTM_MEM; global_ref->size = sizeof(struct ttm_mem_global); @@ -111,7 +141,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) ring = adev->mman.buffer_funcs_ring; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move run queue.\n"); goto error_entity; @@ -146,6 +176,18 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) return 0; } +/** + * amdgpu_init_mem_type - Initialize a memory manager for a specific + * type of memory request. + * + * @bdev: The TTM BO device object (contains a reference to + * amdgpu_device) + * @type: The type of memory requested + * @man: + * + * This is called by ttm_bo_init_mm() when a buffer object is being + * initialized. + */ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, struct ttm_mem_type_manager *man) { @@ -161,6 +203,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->default_caching = TTM_PL_FLAG_CACHED; break; case TTM_PL_TT: + /* GTT memory */ man->func = &amdgpu_gtt_mgr_func; man->gpu_offset = adev->gmc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; @@ -193,6 +236,14 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, return 0; } +/** + * amdgpu_evict_flags - Compute placement flags + * + * @bo: The buffer object to evict + * @placement: Possible destination(s) for evicted BO + * + * Fill in placement data when ttm_bo_evict() is called + */ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { @@ -204,12 +255,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; + /* Don't handle scatter gather BOs */ if (bo->type == ttm_bo_type_sg) { placement->num_placement = 0; placement->num_busy_placement = 0; return; } + /* Object isn't an AMDGPU object so ignore */ if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; placement->busy_placement = &placements; @@ -217,26 +270,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (!adev->mman.buffer_funcs_enabled) { + /* Move to system memory */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; - struct drm_mm_node *node = bo->mem.mm_node; - unsigned long pages_left; - - for (pages_left = bo->mem.num_pages; - pages_left; - pages_left -= node->size, node++) { - if (node->start < fpfn) - break; - } - - if (!pages_left) - goto gtt; + !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && + amdgpu_bo_in_cpu_visible_vram(abo)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -245,12 +288,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - abo->placements[0].fpfn = fpfn; + abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; abo->placement.busy_placement = &abo->placements[1]; abo->placement.num_busy_placement = 1; } else { -gtt: + /* Move to GTT memory */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); } break; @@ -261,6 +304,15 @@ gtt: *placement = abo->placement; } +/** + * amdgpu_verify_access - Verify access for a mmap call + * + * @bo: The buffer object to map + * @filp: The file pointer from the process performing the mmap + * + * This is called by ttm_bo_mmap() to verify whether a process + * has the right to mmap a BO to their process space. + */ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); @@ -278,6 +330,15 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) filp->private_data); } +/** + * amdgpu_move_null - Register memory for a buffer object + * + * @bo: The bo to assign the memory to + * @new_mem: The memory to be assigned. + * + * Assign the memory from new_mem to the memory of the buffer object + * bo. + */ static void amdgpu_move_null(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem) { @@ -288,6 +349,10 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo, new_mem->mm_node = NULL; } +/** + * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT + * buffer. + */ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, struct drm_mm_node *mm_node, struct ttm_mem_reg *mem) @@ -302,9 +367,10 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, } /** - * amdgpu_find_mm_node - Helper function finds the drm_mm_node - * corresponding to @offset. It also modifies the offset to be - * within the drm_mm_node returned + * amdgpu_find_mm_node - Helper function finds the drm_mm_node + * corresponding to @offset. It also modifies + * the offset to be within the drm_mm_node + * returned */ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, unsigned long *offset) @@ -443,7 +509,12 @@ error: return r; } - +/** + * amdgpu_move_blit - Copy an entire buffer to another buffer + * + * This is a helper called by amdgpu_bo_move() and + * amdgpu_move_vram_ram() to help move buffers to and from VRAM. + */ static int amdgpu_move_blit(struct ttm_buffer_object *bo, bool evict, bool no_wait_gpu, struct ttm_mem_reg *new_mem, @@ -478,6 +549,11 @@ error: return r; } +/** + * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer + * + * Called by amdgpu_bo_move(). + */ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) @@ -490,6 +566,8 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, int r; adev = amdgpu_ttm_adev(bo->bdev); + + /* create space/pages for new_mem in GTT space */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; placement.num_placement = 1; @@ -504,25 +582,36 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, return r; } + /* set caching flags */ r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); if (unlikely(r)) { goto out_cleanup; } + /* Bind the memory to the GTT space */ r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); if (unlikely(r)) { goto out_cleanup; } + + /* blit VRAM to GTT */ r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } + + /* move BO (in tmp_mem) to new_mem */ r = ttm_bo_move_ttm(bo, ctx, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; } +/** + * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM + * + * Called by amdgpu_bo_move(). + */ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) @@ -535,6 +624,8 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, int r; adev = amdgpu_ttm_adev(bo->bdev); + + /* make space in GTT for old_mem buffer */ tmp_mem = *new_mem; tmp_mem.mm_node = NULL; placement.num_placement = 1; @@ -548,10 +639,14 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, if (unlikely(r)) { return r; } + + /* move/bind old memory to GTT space */ r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } + + /* copy to VRAM */ r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); if (unlikely(r)) { goto out_cleanup; @@ -561,6 +656,11 @@ out_cleanup: return r; } +/** + * amdgpu_bo_move - Move a buffer object to a new memory location + * + * Called by ttm_bo_handle_move_mem() + */ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) @@ -626,6 +726,11 @@ memcpy: return 0; } +/** + * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault + * + * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() + */ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; @@ -695,7 +800,7 @@ struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; - struct mm_struct *usermm; + struct task_struct *usertask; uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; @@ -703,17 +808,29 @@ struct amdgpu_ttm_tt { uint32_t last_set_pages; }; +/** + * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to + * by a USERPTR pointer to memory + * + * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). + * This provides a wrapper around the get_user_pages() call to provide + * device accessible pages that back user memory. + */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct mm_struct *mm = gtt->usertask->mm; unsigned int flags = 0; unsigned pinned = 0; int r; + if (!mm) /* Happens during process shutdown */ + return -ESRCH; + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) flags |= FOLL_WRITE; - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory @@ -721,13 +838,14 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; - vma = find_vma(gtt->usermm, gtt->userptr); + vma = find_vma(mm, gtt->userptr); if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return -EPERM; } } + /* loop enough times using contiguous pages of memory */ do { unsigned num_pages = ttm->num_pages - pinned; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; @@ -739,7 +857,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) list_add(&guptask.list, >t->guptasks); spin_unlock(>t->guptasklock); - r = get_user_pages(userptr, num_pages, flags, p, NULL); + if (mm == current->mm) + r = get_user_pages(userptr, num_pages, flags, p, NULL); + else + r = get_user_pages_remote(gtt->usertask, + mm, userptr, num_pages, + flags, p, NULL, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); @@ -752,15 +875,23 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) } while (pinned < ttm->num_pages); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return 0; release_pages: release_pages(pages, pinned); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return r; } +/** + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages + * as necessary. + * + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. + */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -775,6 +906,11 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) } } +/** + * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * + * Called while unpinning userptr pages + */ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -793,7 +929,12 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) } } -/* prepare the sg table with the user pages */ +/** + * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the + * user pages + * + * Called by amdgpu_ttm_backend_bind() + **/ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); @@ -805,17 +946,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + /* Allocate an SG array and squash pages into it */ r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, ttm->num_pages << PAGE_SHIFT, GFP_KERNEL); if (r) goto release_sg; + /* Map SG to device */ r = -ENOMEM; nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); if (nents != ttm->sg->nents) goto release_sg; + /* convert SG to linear array of pages and dma addresses */ drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -826,6 +970,9 @@ release_sg: return r; } +/** + * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages + */ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); @@ -839,14 +986,60 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) if (!ttm->sg->sgl) return; - /* free the sg table and pages again */ + /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + /* mark the pages as dirty */ amdgpu_ttm_tt_mark_user_pages(ttm); sg_free_table(ttm->sg); } +int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, + struct ttm_buffer_object *tbo, + uint64_t flags) +{ + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); + struct ttm_tt *ttm = tbo->ttm; + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; + + if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { + uint64_t page_idx = 1; + + r = amdgpu_gart_bind(adev, gtt->offset, page_idx, + ttm->pages, gtt->ttm.dma_address, flags); + if (r) + goto gart_bind_fail; + + /* Patch mtype of the second part BO */ + flags &= ~AMDGPU_PTE_MTYPE_MASK; + flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + + r = amdgpu_gart_bind(adev, + gtt->offset + (page_idx << PAGE_SHIFT), + ttm->num_pages - page_idx, + &ttm->pages[page_idx], + &(gtt->ttm.dma_address[page_idx]), flags); + } else { + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, + ttm->pages, gtt->ttm.dma_address, flags); + } + +gart_bind_fail: + if (r) + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + ttm->num_pages, gtt->offset); + + return r; +} + +/** + * amdgpu_ttm_backend_bind - Bind GTT memory + * + * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). + * This handles binding GTT memory to the device address space. + */ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { @@ -877,7 +1070,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return 0; } + /* compute PTE flags relevant to this BO memory */ flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); + + /* bind pages into GART page tables */ gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); @@ -888,6 +1084,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return r; } +/** + * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object + */ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); @@ -903,6 +1102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) return 0; + /* allocate GTT space */ tmp = bo->mem; tmp.mm_node = NULL; placement.num_placement = 1; @@ -918,10 +1118,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) if (unlikely(r)) return r; + /* compute PTE flags for this buffer object */ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); + + /* Bind pages */ gtt->offset = (u64)tmp.start << PAGE_SHIFT; - r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, - bo->ttm->pages, gtt->ttm.dma_address, flags); + r = amdgpu_ttm_gart_bind(adev, bo, flags); if (unlikely(r)) { ttm_bo_mem_put(bo, &tmp); return r; @@ -935,31 +1137,40 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; } +/** + * amdgpu_ttm_recover_gart - Rebind GTT pages + * + * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to + * rebind GTT pages during a GPU reset. + */ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm; uint64_t flags; int r; - if (!gtt) + if (!tbo->ttm) return 0; - flags = amdgpu_ttm_tt_pte_flags(adev, >t->ttm.ttm, &tbo->mem); - r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, - gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); - if (r) - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - gtt->ttm.ttm.num_pages, gtt->offset); + flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem); + r = amdgpu_ttm_gart_bind(adev, tbo, flags); + return r; } +/** + * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages + * + * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and + * ttm_tt_destroy(). + */ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; + /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); @@ -978,6 +1189,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + if (gtt->usertask) + put_task_struct(gtt->usertask); + ttm_dma_tt_fini(>t->ttm); kfree(gtt); } @@ -988,6 +1202,13 @@ static struct ttm_backend_func amdgpu_backend_func = { .destroy = &amdgpu_ttm_backend_destroy, }; +/** + * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO + * + * @bo: The buffer object to create a GTT ttm_tt object around + * + * Called by ttm_tt_create(). + */ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { @@ -1001,6 +1222,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; + + /* allocate space for the uninitialized page entries */ if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { kfree(gtt); return NULL; @@ -1008,6 +1231,12 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return >t->ttm.ttm; } +/** + * amdgpu_ttm_tt_populate - Map GTT pages visible to the device + * + * Map the pages of a ttm_tt object to an address space visible + * to the underlying device. + */ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) { @@ -1015,6 +1244,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, struct amdgpu_ttm_tt *gtt = (void *)ttm; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + /* user pages are bound by amdgpu_ttm_tt_pin_userptr() */ if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) @@ -1039,9 +1269,17 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, } #endif + /* fall back to generic helper to populate the page array + * and map them to the device */ return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); } +/** + * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays + * + * Unmaps pages of a ttm_tt object from the device address space and + * unpopulates the page array backing it. + */ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) { struct amdgpu_device *adev; @@ -1067,9 +1305,21 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) } #endif + /* fall back to generic helper to unmap and unpopulate array */ ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); } +/** + * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt + * for the current task + * + * @ttm: The ttm_tt object to bind this userptr object to + * @addr: The address in the current tasks VM space to use + * @flags: Requirements of userptr object. + * + * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages + * to current task + */ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags) { @@ -1079,8 +1329,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return -EINVAL; gtt->userptr = addr; - gtt->usermm = current->mm; gtt->userflags = flags; + + if (gtt->usertask) + put_task_struct(gtt->usertask); + gtt->usertask = current->group_leader; + get_task_struct(gtt->usertask); + spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); atomic_set(>t->mmu_invalidations, 0); @@ -1089,6 +1344,9 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return 0; } +/** + * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object + */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1096,9 +1354,18 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) if (gtt == NULL) return NULL; - return gtt->usermm; + if (gtt->usertask == NULL) + return NULL; + + return gtt->usertask->mm; } +/** + * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays + * inside an address range for the + * current task. + * + */ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { @@ -1109,10 +1376,16 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt == NULL || !gtt->userptr) return false; + /* Return false if no part of the ttm_tt object lies within + * the range + */ size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; if (gtt->userptr > end || gtt->userptr + size <= start) return false; + /* Search the lists of tasks that hold this mapping and see + * if current is one of them. If it is return false. + */ spin_lock(>t->guptasklock); list_for_each_entry(entry, >t->guptasks, list) { if (entry->task == current) { @@ -1127,6 +1400,10 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, return true; } +/** + * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been + * invalidated? + */ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated) { @@ -1137,6 +1414,12 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, return prev_invalidated != *last_invalidated; } +/** + * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this + * ttm_tt object been invalidated + * since the last time they've + * been set? + */ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1147,6 +1430,9 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; } +/** + * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? + */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1157,6 +1443,12 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); } +/** + * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object + * + * @ttm: The ttm_tt object to compute the flags for + * @mem: The memory registry backing this ttm_tt object + */ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem) { @@ -1181,6 +1473,16 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, return flags; } +/** + * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict + * a buffer object. + * + * Return true if eviction is sensible. Called by + * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space() + * which tries to evict buffer objects until it can find space + * for a new object and by ttm_bo_force_list_clean() which is + * used to clean out a memory space. + */ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { @@ -1227,6 +1529,19 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +/** + * amdgpu_ttm_access_memory - Read or Write memory that backs a + * buffer object. + * + * @bo: The buffer object to read/write + * @offset: Offset into buffer object + * @buf: Secondary buffer to write/read from + * @len: Length in bytes of access + * @write: true if writing + * + * This is used to access VRAM that backs a buffer object via MMIO + * access for debugging purposes. + */ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) @@ -1329,6 +1644,7 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) { struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_param bp; int r = 0; int i; u64 vram_size = adev->gmc.visible_vram_size; @@ -1336,17 +1652,21 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) u64 size = adev->fw_vram_usage.size; struct amdgpu_bo *bo; + memset(&bp, 0, sizeof(bp)); + bp.size = adev->fw_vram_usage.size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; adev->fw_vram_usage.va = NULL; adev->fw_vram_usage.reserved_bo = NULL; if (adev->fw_vram_usage.size > 0 && adev->fw_vram_usage.size <= vram_size) { - r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - ttm_bo_type_kernel, NULL, + r = amdgpu_bo_create(adev, &bp, &adev->fw_vram_usage.reserved_bo); if (r) goto error_create; @@ -1398,13 +1718,22 @@ error_create: adev->fw_vram_usage.reserved_bo = NULL; return r; } - +/** + * amdgpu_ttm_init - Init the memory management (ttm) as well as + * various gtt/vram related fields. + * + * This initializes all of the memory space pools that the TTM layer + * will need such as the GTT space (system memory mapped to the device), + * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which + * can be mapped per VMID. + */ int amdgpu_ttm_init(struct amdgpu_device *adev) { uint64_t gtt_size; int r; u64 vis_vram_limit; + /* initialize global references for vram/gtt */ r = amdgpu_ttm_global_init(adev); if (r) { return r; @@ -1425,6 +1754,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* We opt to avoid OOM on system pages allocations */ adev->mman.bdev.no_retry = true; + /* Initialize VRAM pool with all of VRAM divided into pages */ r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, adev->gmc.real_vram_size >> PAGE_SHIFT); if (r) { @@ -1454,15 +1784,23 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->stolen_vga_memory, - NULL, NULL); - if (r) - return r; + /* allocate memory as required for VGA + * This is used for VGA emulation and pre-OS scanout buffers to + * avoid display artifacts while transitioning between pre-OS + * and driver. */ + if (adev->gmc.stolen_size) { + r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->stolen_vga_memory, + NULL, NULL); + if (r) + return r; + } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); + /* Compute GTT size, either bsaed on 3/4th the size of RAM size + * or whatever the user passed on module init */ if (amdgpu_gtt_size == -1) { struct sysinfo si; @@ -1473,6 +1811,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } else gtt_size = (uint64_t)amdgpu_gtt_size << 20; + + /* Initialize GTT memory pool */ r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing GTT heap.\n"); @@ -1481,6 +1821,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned)(gtt_size / (1024 * 1024))); + /* Initialize various on-chip memory pools */ adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; @@ -1520,6 +1861,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } } + /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { DRM_ERROR("Failed to init debugfs\n"); @@ -1528,13 +1870,25 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_ttm_late_init - Handle any late initialization for + * amdgpu_ttm + */ +void amdgpu_ttm_late_init(struct amdgpu_device *adev) +{ + /* return the VGA stolen memory (if any) back to VRAM */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); +} + +/** + * amdgpu_ttm_fini - De-initialize the TTM memory pools + */ void amdgpu_ttm_fini(struct amdgpu_device *adev) { if (!adev->mman.initialized) return; amdgpu_ttm_debugfs_fini(adev); - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); @@ -1856,6 +2210,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { #endif }; +/** + * amdgpu_ttm_vram_read - Linear read access to VRAM + * + * Accesses VRAM via MMIO for debugging purposes. + */ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1895,6 +2254,11 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_ttm_vram_write - Linear write access to VRAM + * + * Accesses VRAM via MMIO for debugging purposes. + */ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { @@ -1943,6 +2307,9 @@ static const struct file_operations amdgpu_ttm_vram_fops = { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS +/** + * amdgpu_ttm_gtt_read - Linear read access to GTT memory + */ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1990,6 +2357,13 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif +/** + * amdgpu_iomem_read - Virtual read access to GPU mapped memory + * + * This function is used to read memory that has been mapped to the + * GPU and the known addresses are not physical addresses but instead + * bus addresses (e.g., what you'd put in an IB or ring buffer). + */ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1998,6 +2372,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, ssize_t result = 0; int r; + /* retrieve the IOMMU domain if any for this device */ dom = iommu_get_domain_for_dev(adev->dev); while (size) { @@ -2010,6 +2385,10 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, bytes = bytes < size ? bytes : size; + /* Translate the bus address to a physical address. If + * the domain is NULL it means there is no IOMMU active + * and the address translation is the identity + */ addr = dom ? iommu_iova_to_phys(dom, addr) : addr; pfn = addr >> PAGE_SHIFT; @@ -2034,6 +2413,13 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, return result; } +/** + * amdgpu_iomem_write - Virtual write access to GPU mapped memory + * + * This function is used to write memory that has been mapped to the + * GPU and the known addresses are not physical addresses but instead + * bus addresses (e.g., what you'd put in an IB or ring buffer). + */ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6ea7de863041..e969c879d87e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -77,6 +77,7 @@ uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); int amdgpu_ttm_init(struct amdgpu_device *adev); +void amdgpu_ttm_late_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 5916cc25e28b..f55f72a37ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -161,8 +161,38 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr) le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes)); DRM_DEBUG("reg_list_separate_size_bytes: %u\n", le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); - DRM_DEBUG("reg_list_separate_size_bytes: %u\n", - le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes)); + DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n", + le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes)); + if (version_minor == 1) { + const struct rlc_firmware_header_v2_1 *v2_1 = + container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0); + DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n", + le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length)); + DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver)); + DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver)); + DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes)); + DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes)); + DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver)); + DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver)); + DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes)); + DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes)); + DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver)); + DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_feature_ver)); + DRM_DEBUG("save_restore_list_srm_size_bytes %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_size_bytes)); + DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n", + le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes)); + } } else { DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor); } @@ -265,6 +295,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else @@ -276,6 +307,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_VEGA20: + return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); } @@ -307,7 +340,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && - ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + @@ -329,6 +365,18 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(cp_hdr->jt_offset) * 4), ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm, + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) { + ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes; + memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm, + ucode->ucode_size); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 30b5500dc152..08e38579af24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -98,6 +98,24 @@ struct rlc_firmware_header_v2_0 { uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */ }; +/* version_major=2, version_minor=1 */ +struct rlc_firmware_header_v2_1 { + struct rlc_firmware_header_v2_0 v2_0; + uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */ + uint32_t save_restore_list_cntl_ucode_ver; + uint32_t save_restore_list_cntl_feature_ver; + uint32_t save_restore_list_cntl_size_bytes; + uint32_t save_restore_list_cntl_offset_bytes; + uint32_t save_restore_list_gpm_ucode_ver; + uint32_t save_restore_list_gpm_feature_ver; + uint32_t save_restore_list_gpm_size_bytes; + uint32_t save_restore_list_gpm_offset_bytes; + uint32_t save_restore_list_srm_ucode_ver; + uint32_t save_restore_list_srm_feature_ver; + uint32_t save_restore_list_srm_size_bytes; + uint32_t save_restore_list_srm_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -148,6 +166,7 @@ union amdgpu_firmware_header { struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; + struct rlc_firmware_header_v2_1 rlc_v2_1; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; struct gpu_info_firmware_header_v1_0 gpu_info; @@ -168,6 +187,9 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MEC2, AMDGPU_UCODE_ID_CP_MEC2_JT, AMDGPU_UCODE_ID_RLC_G, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, + AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, AMDGPU_UCODE_ID_STORAGE, AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 627542b22ae4..bcf68f80bbf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -66,15 +66,18 @@ #define FIRMWARE_POLARIS10 "amdgpu/polaris10_uvd.bin" #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" +#define FIRMWARE_VEGAM "amdgpu/vegam_uvd.bin" #define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" #define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin" +#define FIRMWARE_VEGA20 "amdgpu/vega20_uvd.bin" -#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00) -#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00) -#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00) -#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00) -#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00) +/* These are common relative offsets for all asics, from uvd_7_0_offset.h, */ +#define UVD_GPCOM_VCPU_CMD 0x03c3 +#define UVD_GPCOM_VCPU_DATA0 0x03c4 +#define UVD_GPCOM_VCPU_DATA1 0x03c5 +#define UVD_NO_OP 0x03ff +#define UVD_BASE_SI 0x3800 /** * amdgpu_uvd_cs_ctx - Command submission parser context @@ -109,9 +112,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGAM); MODULE_FIRMWARE(FIRMWARE_VEGA10); MODULE_FIRMWARE(FIRMWARE_VEGA12); +MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_uvd_idle_work_handler(struct work_struct *work); @@ -123,9 +128,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned version_major, version_minor, family_id; - int i, r; + int i, j, r; - INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); + INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler); switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK @@ -172,6 +177,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) case CHIP_VEGA12: fw_name = FIRMWARE_VEGA12; break; + case CHIP_VEGAM: + fw_name = FIRMWARE_VEGAM; + break; + case CHIP_VEGA20: + fw_name = FIRMWARE_VEGA20; + break; default: return -EINVAL; } @@ -226,28 +237,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo, - &adev->uvd.gpu_addr, &adev->uvd.cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); - return r; - } + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { - ring = &adev->uvd.ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, - rq, amdgpu_sched_jobs, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up UVD run queue.\n"); - return r; - } + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo, + &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); + return r; + } - for (i = 0; i < adev->uvd.max_handles; ++i) { - atomic_set(&adev->uvd.handles[i], 0); - adev->uvd.filp[i] = NULL; - } + ring = &adev->uvd.inst[j].ring; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity, + rq, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j); + return r; + } + for (i = 0; i < adev->uvd.max_handles; ++i) { + atomic_set(&adev->uvd.inst[j].handles[i], 0); + adev->uvd.inst[j].filp[i] = NULL; + } + } /* from uvd v5.0 HW addressing capacity increased to 64 bits */ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) adev->uvd.address_64_bit = true; @@ -274,20 +287,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { - int i; - kfree(adev->uvd.saved_bo); + int i, j; - drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + kfree(adev->uvd.inst[j].saved_bo); - amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo, - &adev->uvd.gpu_addr, - (void **)&adev->uvd.cpu_addr); + drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity); - amdgpu_ring_fini(&adev->uvd.ring); + amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, + &adev->uvd.inst[j].gpu_addr, + (void **)&adev->uvd.inst[j].cpu_addr); - for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) - amdgpu_ring_fini(&adev->uvd.ring_enc[i]); + amdgpu_ring_fini(&adev->uvd.inst[j].ring); + for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) + amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); + } release_firmware(adev->uvd.fw); return 0; @@ -297,32 +312,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; - int i; - - if (adev->uvd.vcpu_bo == NULL) - return 0; + int i, j; - cancel_delayed_work_sync(&adev->uvd.idle_work); + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + if (adev->uvd.inst[j].vcpu_bo == NULL) + continue; - /* only valid for physical mode */ - if (adev->asic_type < CHIP_POLARIS10) { - for (i = 0; i < adev->uvd.max_handles; ++i) - if (atomic_read(&adev->uvd.handles[i])) - break; + cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work); - if (i == adev->uvd.max_handles) - return 0; - } + /* only valid for physical mode */ + if (adev->asic_type < CHIP_POLARIS10) { + for (i = 0; i < adev->uvd.max_handles; ++i) + if (atomic_read(&adev->uvd.inst[j].handles[i])) + break; - size = amdgpu_bo_size(adev->uvd.vcpu_bo); - ptr = adev->uvd.cpu_addr; + if (i == adev->uvd.max_handles) + continue; + } - adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); - if (!adev->uvd.saved_bo) - return -ENOMEM; + size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo); + ptr = adev->uvd.inst[j].cpu_addr; - memcpy_fromio(adev->uvd.saved_bo, ptr, size); + adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->uvd.inst[j].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + } return 0; } @@ -330,59 +346,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; - if (adev->uvd.vcpu_bo == NULL) - return -EINVAL; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + if (adev->uvd.inst[i].vcpu_bo == NULL) + return -EINVAL; - size = amdgpu_bo_size(adev->uvd.vcpu_bo); - ptr = adev->uvd.cpu_addr; + size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo); + ptr = adev->uvd.inst[i].cpu_addr; - if (adev->uvd.saved_bo != NULL) { - memcpy_toio(ptr, adev->uvd.saved_bo, size); - kfree(adev->uvd.saved_bo); - adev->uvd.saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->uvd.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + if (adev->uvd.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); + kfree(adev->uvd.inst[i].saved_bo); + adev->uvd.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->uvd.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } + memset_io(ptr, 0, size); + /* to restore uvd fence seq */ + amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring); } - memset_io(ptr, 0, size); - /* to restore uvd fence seq */ - amdgpu_fence_driver_force_completion(&adev->uvd.ring); } - return 0; } void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) { - struct amdgpu_ring *ring = &adev->uvd.ring; - int i, r; + struct amdgpu_ring *ring; + int i, j, r; - for (i = 0; i < adev->uvd.max_handles; ++i) { - uint32_t handle = atomic_read(&adev->uvd.handles[i]); - if (handle != 0 && adev->uvd.filp[i] == filp) { - struct dma_fence *fence; - - r = amdgpu_uvd_get_destroy_msg(ring, handle, - false, &fence); - if (r) { - DRM_ERROR("Error destroying UVD (%d)!\n", r); - continue; - } + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + ring = &adev->uvd.inst[j].ring; + + for (i = 0; i < adev->uvd.max_handles; ++i) { + uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]); + if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) { + struct dma_fence *fence; + + r = amdgpu_uvd_get_destroy_msg(ring, handle, + false, &fence); + if (r) { + DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r); + continue; + } - dma_fence_wait(fence, false); - dma_fence_put(fence); + dma_fence_wait(fence, false); + dma_fence_put(fence); - adev->uvd.filp[i] = NULL; - atomic_set(&adev->uvd.handles[i], 0); + adev->uvd.inst[j].filp[i] = NULL; + atomic_set(&adev->uvd.inst[j].handles[i], 0); + } } } } @@ -657,15 +679,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, void *ptr; long r; int i; + uint32_t ip_instance = ctx->parser->job->ring->me; if (offset & 0x3F) { - DRM_ERROR("UVD messages must be 64 byte aligned!\n"); + DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance); return -EINVAL; } r = amdgpu_bo_kmap(bo, &ptr); if (r) { - DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); + DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r); return r; } @@ -675,7 +698,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, handle = msg[2]; if (handle == 0) { - DRM_ERROR("Invalid UVD handle!\n"); + DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance); return -EINVAL; } @@ -686,18 +709,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, /* try to alloc a new handle */ for (i = 0; i < adev->uvd.max_handles; ++i) { - if (atomic_read(&adev->uvd.handles[i]) == handle) { - DRM_ERROR("Handle 0x%x already in use!\n", handle); + if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { + DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle); return -EINVAL; } - if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { - adev->uvd.filp[i] = ctx->parser->filp; + if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) { + adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp; return 0; } } - DRM_ERROR("No more free UVD handles!\n"); + DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance); return -ENOSPC; case 1: @@ -709,27 +732,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, /* validate the handle */ for (i = 0; i < adev->uvd.max_handles; ++i) { - if (atomic_read(&adev->uvd.handles[i]) == handle) { - if (adev->uvd.filp[i] != ctx->parser->filp) { - DRM_ERROR("UVD handle collision detected!\n"); + if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { + if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) { + DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance); return -EINVAL; } return 0; } } - DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); + DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle); return -ENOENT; case 2: /* it's a destroy msg, free the handle */ for (i = 0; i < adev->uvd.max_handles; ++i) - atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); + atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0); amdgpu_bo_kunmap(bo); return 0; default: - DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); + DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type); return -EINVAL; } BUG(); @@ -800,7 +823,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) } if ((cmd == 0 || cmd == 0x3) && - (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) { + (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) { DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n", start, end); return -EINVAL; @@ -968,6 +991,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, uint64_t addr; long r; int i; + unsigned offset_idx = 0; + unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; amdgpu_bo_kunmap(bo); amdgpu_bo_unpin(bo); @@ -987,17 +1012,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, goto err; if (adev->asic_type >= CHIP_VEGA10) { - data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0); - data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0); - data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0); - data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0); - } else { - data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); - data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); - data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); - data[3] = PACKET0(mmUVD_NO_OP, 0); + offset_idx = 1 + ring->me; + offset[1] = adev->reg_offset[UVD_HWIP][0][1]; + offset[2] = adev->reg_offset[UVD_HWIP][1][1]; } + data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0); + data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0); + data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0); + data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0); + ib = &job->ibs[0]; addr = amdgpu_bo_gpu_offset(bo); ib->ptr[0] = data[0]; @@ -1033,7 +1057,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err_free; - r = amdgpu_job_submit(job, ring, &adev->uvd.entity, + r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); if (r) goto err_free; @@ -1121,8 +1145,15 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, static void amdgpu_uvd_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = - container_of(work, struct amdgpu_device, uvd.idle_work.work); - unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); + container_of(work, struct amdgpu_device, uvd.inst->idle_work.work); + unsigned fences = 0, i, j; + + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring); + for (j = 0; j < adev->uvd.num_enc_rings; ++j) { + fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]); + } + } if (fences == 0) { if (adev->pm.dpm_enabled) { @@ -1136,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) AMD_CG_STATE_GATE); } } else { - schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT); + schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT); } } @@ -1148,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) if (amdgpu_sriov_vf(adev)) return; - set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); + set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work); if (set_clocks) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, true); @@ -1165,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) - schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); + schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT); } /** @@ -1179,27 +1210,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence; long r; + uint32_t ip_instance = ring->me; r = amdgpu_uvd_get_create_msg(ring, 1, NULL); if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); goto error; } r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); goto error; } r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); r = -ETIMEDOUT; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); r = 0; } @@ -1227,7 +1259,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev) * necessarily linear. So we need to count * all non-zero handles. */ - if (atomic_read(&adev->uvd.handles[i])) + if (atomic_read(&adev->uvd.inst->handles[i])) used_handles++; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 32ea20b99e53..b1579fba134c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -31,30 +31,37 @@ #define AMDGPU_UVD_SESSION_SIZE (50*1024) #define AMDGPU_UVD_FIRMWARE_OFFSET 256 +#define AMDGPU_MAX_UVD_INSTANCES 2 + #define AMDGPU_UVD_FIRMWARE_SIZE(adev) \ (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \ 8) - AMDGPU_UVD_FIRMWARE_OFFSET) -struct amdgpu_uvd { +struct amdgpu_uvd_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; - unsigned fw_version; void *saved_bo; - unsigned max_handles; atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; struct delayed_work idle_work; - const struct firmware *fw; /* UVD firmware */ struct amdgpu_ring ring; struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS]; struct amdgpu_irq_src irq; - bool address_64_bit; - bool use_ctx_buf; struct drm_sched_entity entity; struct drm_sched_entity entity_enc; uint32_t srbm_soft_reset; +}; + +struct amdgpu_uvd { + const struct firmware *fw; /* UVD firmware */ + unsigned fw_version; + unsigned max_handles; unsigned num_enc_rings; + uint8_t num_uvd_inst; + bool address_64_bit; + bool use_ctx_buf; + struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES]; }; int amdgpu_uvd_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a33804bd3314..23d960ec1cf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -51,11 +51,13 @@ #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" -#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" -#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" +#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_VEGAM "amdgpu/vegam_vce.bin" #define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" #define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin" +#define FIRMWARE_VEGA20 "amdgpu/vega20_vce.bin" #ifdef CONFIG_DRM_AMDGPU_CIK MODULE_FIRMWARE(FIRMWARE_BONAIRE); @@ -71,9 +73,11 @@ MODULE_FIRMWARE(FIRMWARE_STONEY); MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGAM); MODULE_FIRMWARE(FIRMWARE_VEGA10); MODULE_FIRMWARE(FIRMWARE_VEGA12); +MODULE_FIRMWARE(FIRMWARE_VEGA20); static void amdgpu_vce_idle_work_handler(struct work_struct *work); @@ -132,12 +136,18 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) case CHIP_POLARIS12: fw_name = FIRMWARE_POLARIS12; break; + case CHIP_VEGAM: + fw_name = FIRMWARE_VEGAM; + break; case CHIP_VEGA10: fw_name = FIRMWARE_VEGA10; break; case CHIP_VEGA12: fw_name = FIRMWARE_VEGA12; break; + case CHIP_VEGA20: + fw_name = FIRMWARE_VEGA20; + break; default: return -EINVAL; @@ -181,7 +191,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; @@ -755,6 +765,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) if (r) goto out; break; + + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, + idx + 2, 0, 0); + if (r) + goto out; + + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, + idx + 7, 0, 0); + if (r) + goto out; + break; } idx += len / 4; @@ -860,6 +882,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) goto out; break; + case 0x0500000d: /* MV buffer */ + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, + idx + 2, *size, 0); + if (r) + goto out; + + r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, + idx + 7, *size / 12, 0); + if (r) + goto out; + break; + default: DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); r = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 58e495330b38..127e87b470ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -49,8 +49,6 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_sw_init(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; @@ -84,6 +82,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); family_id = le32_to_cpu(hdr->ucode_version) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; @@ -102,24 +101,6 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } - ring = &adev->vcn.ring_dec; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, - rq, amdgpu_sched_jobs, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCN dec run queue.\n"); - return r; - } - - ring = &adev->vcn.ring_enc[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, - rq, amdgpu_sched_jobs, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCN enc run queue.\n"); - return r; - } - return 0; } @@ -129,10 +110,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) kfree(adev->vcn.saved_bo); - drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); - - drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); - amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, (void **)&adev->vcn.cpu_addr); @@ -205,13 +182,18 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + unsigned i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); + } if (fences == 0) { - if (adev->pm.dpm_enabled) { - /* might be used when with pg/cg + if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); - */ - } + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); } else { schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } @@ -223,9 +205,11 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); if (set_clocks && adev->pm.dpm_enabled) { - /* might be used when with pg/cg - amdgpu_dpm_enable_uvd(adev, true); - */ + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + else + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); } } @@ -271,7 +255,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) } static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, - struct amdgpu_bo *bo, bool direct, + struct amdgpu_bo *bo, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; @@ -299,19 +283,12 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, } ib->length_dw = 16; - if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); - job->fence = dma_fence_get(f); - if (r) - goto err_free; + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err_free; - amdgpu_job_free(job); - } else { - r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); - if (r) - goto err_free; - } + amdgpu_job_free(job); amdgpu_bo_fence(bo, f, false); amdgpu_bo_unreserve(bo); @@ -363,11 +340,11 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = 14; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); + return amdgpu_vcn_dec_send_msg(ring, bo, fence); } static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_bo *bo = NULL; @@ -389,7 +366,7 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = 6; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); + return amdgpu_vcn_dec_send_msg(ring, bo, fence); } int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -403,7 +380,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto error; } - r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 2fd7db891689..773010b9ff15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -45,6 +45,17 @@ #define VCN_ENC_CMD_REG_WRITE 0x0000000b #define VCN_ENC_CMD_REG_WAIT 0x0000000c +enum engine_status_constants { + UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002, + UVD_STATUS__UVD_BUSY = 0x00000004, + GB_ADDR_CONFIG_DEFAULT = 0x26010011, + UVD_STATUS__IDLE = 0x2, + UVD_STATUS__BUSY = 0x5, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1, + UVD_STATUS__RBC_BUSY = 0x1, +}; + struct amdgpu_vcn { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -56,8 +67,6 @@ struct amdgpu_vcn { struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_irq_src irq; - struct drm_sched_entity entity_dec; - struct drm_sched_entity entity_enc; unsigned num_enc_rings; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index da55a78d7380..b0eb2f537392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -94,6 +94,34 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo) +{ + base->vm = vm; + base->bo = bo; + INIT_LIST_HEAD(&base->bo_list); + INIT_LIST_HEAD(&base->vm_status); + + if (!bo) + return; + list_add_tail(&base->bo_list, &bo->va); + + if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + return; + + if (bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + return; + + /* + * we checked all the prerequisites, but it looks like this per vm bo + * is currently evicted. add the bo to the evicted list to make sure it + * is validated on next vm use to avoid fault. + * */ + list_move_tail(&base->vm_status, &vm->evicted); +} + /** * amdgpu_vm_level_shift - return the addr shift for each level * @@ -196,24 +224,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param) { struct ttm_bo_global *glob = adev->mman.bdev.glob; - int r; + struct amdgpu_vm_bo_base *bo_base, *tmp; + int r = 0; - spin_lock(&vm->status_lock); - while (!list_empty(&vm->evicted)) { - struct amdgpu_vm_bo_base *bo_base; - struct amdgpu_bo *bo; + list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { + struct amdgpu_bo *bo = bo_base->bo; - bo_base = list_first_entry(&vm->evicted, - struct amdgpu_vm_bo_base, - vm_status); - spin_unlock(&vm->status_lock); - - bo = bo_base->bo; - BUG_ON(!bo); if (bo->parent) { r = validate(param, bo); if (r) - return r; + break; spin_lock(&glob->lru_lock); ttm_bo_move_to_lru_tail(&bo->tbo); @@ -222,22 +242,29 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_unlock(&glob->lru_lock); } - if (bo->tbo.type == ttm_bo_type_kernel && - vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(bo, NULL); - if (r) - return r; - } - - spin_lock(&vm->status_lock); - if (bo->tbo.type != ttm_bo_type_kernel) + if (bo->tbo.type != ttm_bo_type_kernel) { + spin_lock(&vm->moved_lock); list_move(&bo_base->vm_status, &vm->moved); - else + spin_unlock(&vm->moved_lock); + } else { list_move(&bo_base->vm_status, &vm->relocated); + } } - spin_unlock(&vm->status_lock); - return 0; + spin_lock(&glob->lru_lock); + list_for_each_entry(bo_base, &vm->idle, vm_status) { + struct amdgpu_bo *bo = bo_base->bo; + + if (!bo->parent) + continue; + + ttm_bo_move_to_lru_tail(&bo->tbo); + if (bo->shadow) + ttm_bo_move_to_lru_tail(&bo->shadow->tbo); + } + spin_unlock(&glob->lru_lock); + + return r; } /** @@ -249,13 +276,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - bool ready; - - spin_lock(&vm->status_lock); - ready = list_empty(&vm->evicted); - spin_unlock(&vm->status_lock); - - return ready; + return list_empty(&vm->evicted); } /** @@ -412,11 +433,16 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_bo *pt; if (!entry->base.bo) { - r = amdgpu_bo_create(adev, - amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, flags, - ttm_bo_type_kernel, resv, &pt); + struct amdgpu_bo_param bp; + + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_bo_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = flags; + bp.type = ttm_bo_type_kernel; + bp.resv = resv; + r = amdgpu_bo_create(adev, &bp, &pt); if (r) return r; @@ -441,12 +467,8 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, */ pt->parent = amdgpu_bo_ref(parent->base.bo); - entry->base.vm = vm; - entry->base.bo = pt; - list_add_tail(&entry->base.bo_list, &pt->va); - spin_lock(&vm->status_lock); - list_add(&entry->base.vm_status, &vm->relocated); - spin_unlock(&vm->status_lock); + amdgpu_vm_bo_base_init(&entry->base, vm, pt); + list_move(&entry->base.vm_status, &vm->relocated); } if (level < AMDGPU_VM_PTB) { @@ -628,7 +650,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (vm_flush_needed || pasid_mapping_needed) { - r = amdgpu_fence_emit(ring, &fence); + r = amdgpu_fence_emit(ring, &fence, 0); if (r) return r; } @@ -893,10 +915,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, if (!entry->base.bo) continue; - spin_lock(&vm->status_lock); - if (list_empty(&entry->base.vm_status)) - list_add(&entry->base.vm_status, &vm->relocated); - spin_unlock(&vm->status_lock); + if (!entry->base.moved) + list_move(&entry->base.vm_status, &vm->relocated); amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); } } @@ -926,6 +946,14 @@ restart: params.adev = adev; if (vm->use_cpu_for_update) { + struct amdgpu_vm_bo_base *bo_base; + + list_for_each_entry(bo_base, &vm->relocated, vm_status) { + r = amdgpu_bo_kmap(bo_base->bo, NULL); + if (unlikely(r)) + return r; + } + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); if (unlikely(r)) return r; @@ -941,7 +969,6 @@ restart: params.func = amdgpu_vm_do_set_ptes; } - spin_lock(&vm->status_lock); while (!list_empty(&vm->relocated)) { struct amdgpu_vm_bo_base *bo_base, *parent; struct amdgpu_vm_pt *pt, *entry; @@ -950,14 +977,12 @@ restart: bo_base = list_first_entry(&vm->relocated, struct amdgpu_vm_bo_base, vm_status); - list_del_init(&bo_base->vm_status); - spin_unlock(&vm->status_lock); + bo_base->moved = false; + list_move(&bo_base->vm_status, &vm->idle); bo = bo_base->bo->parent; - if (!bo) { - spin_lock(&vm->status_lock); + if (!bo) continue; - } parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, bo_list); @@ -966,12 +991,10 @@ restart: amdgpu_vm_update_pde(¶ms, vm, pt, entry); - spin_lock(&vm->status_lock); if (!vm->use_cpu_for_update && (ndw - params.ib->length_dw) < 32) break; } - spin_unlock(&vm->status_lock); if (vm->use_cpu_for_update) { /* Flush HDP */ @@ -1074,9 +1097,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, if (entry->huge) { /* Add the entry to the relocated list to update it. */ entry->huge = false; - spin_lock(&p->vm->status_lock); list_move(&entry->base.vm_status, &p->vm->relocated); - spin_unlock(&p->vm->status_lock); } return; } @@ -1555,9 +1576,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, amdgpu_asic_flush_hdp(adev, NULL); } - spin_lock(&vm->status_lock); + spin_lock(&vm->moved_lock); list_del_init(&bo_va->base.vm_status); - spin_unlock(&vm->status_lock); + spin_unlock(&vm->moved_lock); + + /* If the BO is not in its preferred location add it back to + * the evicted list so that it gets validated again on the + * next command submission. + */ + if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + uint32_t mem_type = bo->tbo.mem.mem_type; + + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) + list_add_tail(&bo_va->base.vm_status, &vm->evicted); + else + list_add(&bo_va->base.vm_status, &vm->idle); + } list_splice_init(&bo_va->invalids, &bo_va->valids); bo_va->cleared = clear; @@ -1766,19 +1800,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { + struct amdgpu_bo_va *bo_va, *tmp; + struct list_head moved; bool clear; - int r = 0; - - spin_lock(&vm->status_lock); - while (!list_empty(&vm->moved)) { - struct amdgpu_bo_va *bo_va; - struct reservation_object *resv; + int r; - bo_va = list_first_entry(&vm->moved, - struct amdgpu_bo_va, base.vm_status); - spin_unlock(&vm->status_lock); + INIT_LIST_HEAD(&moved); + spin_lock(&vm->moved_lock); + list_splice_init(&vm->moved, &moved); + spin_unlock(&vm->moved_lock); - resv = bo_va->base.bo->tbo.resv; + list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) { + struct reservation_object *resv = bo_va->base.bo->tbo.resv; /* Per VM BOs never need to bo cleared in the page tables */ if (resv == vm->root.base.bo->tbo.resv) @@ -1791,17 +1824,19 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, clear = true; r = amdgpu_vm_bo_update(adev, bo_va, clear); - if (r) + if (r) { + spin_lock(&vm->moved_lock); + list_splice(&moved, &vm->moved); + spin_unlock(&vm->moved_lock); return r; + } if (!clear && resv != vm->root.base.bo->tbo.resv) reservation_object_unlock(resv); - spin_lock(&vm->status_lock); } - spin_unlock(&vm->status_lock); - return r; + return 0; } /** @@ -1827,36 +1862,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo_va == NULL) { return NULL; } - bo_va->base.vm = vm; - bo_va->base.bo = bo; - INIT_LIST_HEAD(&bo_va->base.bo_list); - INIT_LIST_HEAD(&bo_va->base.vm_status); + amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - if (!bo) - return bo_va; - - list_add_tail(&bo_va->base.bo_list, &bo->va); - - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) - return bo_va; - - if (bo->preferred_domains & - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) - return bo_va; - - /* - * We checked all the prerequisites, but it looks like this per VM BO - * is currently evicted. add the BO to the evicted list to make sure it - * is validated on next VM use to avoid fault. - * */ - spin_lock(&vm->status_lock); - list_move_tail(&bo_va->base.vm_status, &vm->evicted); - spin_unlock(&vm->status_lock); - return bo_va; } @@ -1884,11 +1895,11 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (mapping->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { - spin_lock(&vm->status_lock); - if (list_empty(&bo_va->base.vm_status)) - list_add(&bo_va->base.vm_status, &vm->moved); - spin_unlock(&vm->status_lock); + if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && + !bo_va->base.moved) { + spin_lock(&vm->moved_lock); + list_move(&bo_va->base.vm_status, &vm->moved); + spin_unlock(&vm->moved_lock); } trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -2112,7 +2123,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, before->last = saddr - 1; before->offset = tmp->offset; before->flags = tmp->flags; - list_add(&before->list, &tmp->list); + before->bo_va = tmp->bo_va; + list_add(&before->list, &tmp->bo_va->invalids); } /* Remember mapping split at the end */ @@ -2122,7 +2134,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, after->offset = tmp->offset; after->offset += after->start - tmp->start; after->flags = tmp->flags; - list_add(&after->list, &tmp->list); + after->bo_va = tmp->bo_va; + list_add(&after->list, &tmp->bo_va->invalids); } list_del(&tmp->list); @@ -2198,9 +2211,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_del(&bo_va->base.bo_list); - spin_lock(&vm->status_lock); + spin_lock(&vm->moved_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->status_lock); + spin_unlock(&vm->moved_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2234,33 +2247,34 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, { struct amdgpu_vm_bo_base *bo_base; + /* shadow bo doesn't have bo base, its validation needs its parent */ + if (bo->parent && bo->parent->shadow == bo) + bo = bo->parent; + list_for_each_entry(bo_base, &bo->va, bo_list) { struct amdgpu_vm *vm = bo_base->vm; + bool was_moved = bo_base->moved; bo_base->moved = true; if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { - spin_lock(&bo_base->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&bo_base->vm_status, &vm->evicted); else list_move_tail(&bo_base->vm_status, &vm->evicted); - spin_unlock(&bo_base->vm->status_lock); continue; } - if (bo->tbo.type == ttm_bo_type_kernel) { - spin_lock(&bo_base->vm->status_lock); - if (list_empty(&bo_base->vm_status)) - list_add(&bo_base->vm_status, &vm->relocated); - spin_unlock(&bo_base->vm->status_lock); + if (was_moved) continue; - } - spin_lock(&bo_base->vm->status_lock); - if (list_empty(&bo_base->vm_status)) - list_add(&bo_base->vm_status, &vm->moved); - spin_unlock(&bo_base->vm->status_lock); + if (bo->tbo.type == ttm_bo_type_kernel) { + list_move(&bo_base->vm_status, &vm->relocated); + } else { + spin_lock(&bo_base->vm->moved_lock); + list_move(&bo_base->vm_status, &vm->moved); + spin_unlock(&bo_base->vm->moved_lock); + } } } @@ -2355,6 +2369,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid) { + struct amdgpu_bo_param bp; + struct amdgpu_bo *root; const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; @@ -2367,10 +2383,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->va = RB_ROOT_CACHED; for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; - spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->evicted); INIT_LIST_HEAD(&vm->relocated); + spin_lock_init(&vm->moved_lock); INIT_LIST_HEAD(&vm->moved); + INIT_LIST_HEAD(&vm->idle); INIT_LIST_HEAD(&vm->freed); /* create scheduler entity for page table updates */ @@ -2380,7 +2397,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ring = adev->vm_manager.vm_pte_rings[ring_instance]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; r = drm_sched_entity_init(&ring->sched, &vm->entity, - rq, amdgpu_sched_jobs, NULL); + rq, NULL); if (r) return r; @@ -2409,24 +2426,28 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, flags |= AMDGPU_GEM_CREATE_SHADOW; size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, - ttm_bo_type_kernel, NULL, &vm->root.base.bo); + memset(&bp, 0, sizeof(bp)); + bp.size = size; + bp.byte_align = align; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.flags = flags; + bp.type = ttm_bo_type_kernel; + bp.resv = NULL; + r = amdgpu_bo_create(adev, &bp, &root); if (r) goto error_free_sched_entity; - r = amdgpu_bo_reserve(vm->root.base.bo, true); + r = amdgpu_bo_reserve(root, true); if (r) goto error_free_root; - r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + r = amdgpu_vm_clear_bo(adev, vm, root, adev->vm_manager.root_level, vm->pte_support_ats); if (r) goto error_unreserve; - vm->root.base.vm = vm; - list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); - list_add_tail(&vm->root.base.vm_status, &vm->evicted); + amdgpu_vm_bo_base_init(&vm->root.base, vm, root); amdgpu_bo_unreserve(vm->root.base.bo); if (pasid) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 30f080364c97..061b99a18cb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -75,11 +75,12 @@ struct amdgpu_bo_list_entry; /* PDE Block Fragment Size for VEGA10 */ #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) -/* VEGA10 only */ + +/* For GFX9 */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) -/* For Raven */ +#define AMDGPU_MTYPE_NC 0 #define AMDGPU_MTYPE_CC 2 #define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ @@ -167,9 +168,6 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* protecting invalidated */ - spinlock_t status_lock; - /* BOs who needs a validation */ struct list_head evicted; @@ -178,6 +176,10 @@ struct amdgpu_vm { /* BOs moved, but not yet updated in the PT */ struct list_head moved; + spinlock_t moved_lock; + + /* All BOs of this VM not currently in the state machine */ + struct list_head idle; /* BO mappings freed, but not yet updated in the PT */ struct list_head freed; @@ -186,9 +188,6 @@ struct amdgpu_vm { struct amdgpu_vm_pt root; struct dma_fence *last_update; - /* protecting freed */ - spinlock_t freed_lock; - /* Scheduler entity for page table updates */ struct drm_sched_entity entity; diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 47ef3e6e7178..a266dcf5daed 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -5903,7 +5903,7 @@ static int ci_dpm_init(struct amdgpu_device *adev) pi->pcie_dpm_key_disabled = 0; pi->thermal_sclk_dpm_enabled = 0; - if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) + if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; @@ -6255,7 +6255,7 @@ static int ci_dpm_late_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; /* init the sysfs and debugfs files late */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 0df22030e713..8ff4c60d1b59 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1735,6 +1735,12 @@ static void cik_invalidate_hdp(struct amdgpu_device *adev, } } +static bool cik_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we support soft reset */ + return true; +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1748,6 +1754,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_config_memsize = &cik_get_config_memsize, .flush_hdp = &cik_flush_hdp, .invalidate_hdp = &cik_invalidate_hdp, + .need_full_reset = &cik_need_full_reset, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 452f88ea46a2..ada241bfeee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1823,7 +1823,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1842,18 +1841,15 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -2043,8 +2039,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2526,11 +2521,9 @@ static void dce_v10_0_crtc_disable(struct drm_crtc *crtc) dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index a7c1c584a191..a5b96eac3033 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -173,6 +173,7 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) ARRAY_SIZE(polaris11_golden_settings_a11)); break; case CHIP_POLARIS10: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, polaris10_golden_settings_a11, ARRAY_SIZE(polaris10_golden_settings_a11)); @@ -473,6 +474,7 @@ static int dce_v11_0_get_num_crtc (struct amdgpu_device *adev) num_crtc = 2; break; case CHIP_POLARIS10: + case CHIP_VEGAM: num_crtc = 6; break; case CHIP_POLARIS11: @@ -1445,6 +1447,7 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev) adev->mode_info.audio.num_pins = 7; break; case CHIP_POLARIS10: + case CHIP_VEGAM: adev->mode_info.audio.num_pins = 8; break; case CHIP_POLARIS11: @@ -1862,7 +1865,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1881,18 +1883,15 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -2082,8 +2081,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2253,7 +2251,8 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(amdgpu_crtc->encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; @@ -2601,11 +2600,9 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -2673,7 +2670,8 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc, if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(amdgpu_crtc->encoder); int encoder_mode = @@ -2830,6 +2828,7 @@ static int dce_v11_0_early_init(void *handle) adev->mode_info.num_dig = 9; break; case CHIP_POLARIS10: + case CHIP_VEGAM: adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; break; @@ -2949,7 +2948,8 @@ static int dce_v11_0_hw_init(void *handle) amdgpu_atombios_encoder_init_dig(adev); if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); amdgpu_atombios_crtc_set_dce_clock(adev, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 9f67b7fd3487..394cc1e8fe20 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1780,7 +1780,6 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1798,18 +1797,15 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -1978,8 +1974,7 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2414,11 +2409,9 @@ static void dce_v6_0_crtc_disable(struct drm_crtc *crtc) dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index f55422cbd77a..c9b9ab8f1b05 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1754,7 +1754,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_framebuffer *amdgpu_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct amdgpu_bo *abo; @@ -1773,18 +1772,15 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - amdgpu_fb = to_amdgpu_framebuffer(fb); + if (atomic) target_fb = fb; - } else { - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = amdgpu_fb->obj; + obj = target_fb->obj[0]; abo = gem_to_amdgpu_bo(obj); r = amdgpu_bo_reserve(abo, false); if (unlikely(r != 0)) @@ -1955,8 +1951,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - amdgpu_fb = to_amdgpu_framebuffer(fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r != 0)) return r; @@ -2430,11 +2425,9 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc) dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index b51f05dc9582..dbf2ccd0c744 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -168,11 +168,9 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc) dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct amdgpu_framebuffer *amdgpu_fb; struct amdgpu_bo *abo; - amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); - abo = gem_to_amdgpu_bo(amdgpu_fb->obj); + abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]); r = amdgpu_bo_reserve(abo, true); if (unlikely(r)) DRM_ERROR("failed to reserve abo before unpin\n"); @@ -329,7 +327,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector) return 0; } -static int dce_virtual_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { return MODE_OK; @@ -462,8 +460,9 @@ static int dce_virtual_hw_init(void *handle) break; case CHIP_CARRIZO: case CHIP_STONEY: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_VEGAM: dce_v11_0_disable_dce(adev); break; case CHIP_TOPAZ: @@ -474,6 +473,7 @@ static int dce_virtual_hw_init(void *handle) break; case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: break; default: DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c new file mode 100644 index 000000000000..9935371db7ce --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -0,0 +1,120 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v1_7.h" + +#include "df/df_1_7_default.h" +#include "df/df_1_7_offset.h" +#include "df/df_1_7_sh_mask.h" + +static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; + +static void df_v1_7_init (struct amdgpu_device *adev) +{ +} + +static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + if (enable) { + tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl); + tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp); + } else + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, + mmFabricConfigAccessControl_DEFAULT); +} + +static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); + tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + + return tmp; +} + +static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev) +{ + int fb_channel_number; + + fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + + return df_v1_7_channel_number[fb_channel_number]; +} + +static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + /* Put DF on broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, true); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V1_7_MGCG_DISABLE; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } + + /* Exit boradcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, false); +} + +static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + u32 tmp; + + /* AMD_CG_SUPPORT_DF_MGCG */ + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY) + *flags |= AMD_CG_SUPPORT_DF_MGCG; +} + +static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0, + ForceParWrRMW, enable); +} + +const struct amdgpu_df_funcs df_v1_7_funcs = { + .init = df_v1_7_init, + .enable_broadcast_mode = df_v1_7_enable_broadcast_mode, + .get_fb_channel_number = df_v1_7_get_fb_channel_number, + .get_hbm_channel_number = df_v1_7_get_hbm_channel_number, + .update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating, + .get_clockgating_state = df_v1_7_get_clockgating_state, + .enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk104.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h index 780a1d973634..74621104c487 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk104.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,20 +19,22 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * - * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" -#include <nvif/class.h> +#ifndef __DF_V1_7_H__ +#define __DF_V1_7_H__ -const struct nv50_disp_dmac_oclass -gk104_disp_base_oclass = { - .base.oclass = GK104_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &gf119_disp_dmac_func, - .mthd = &gf119_disp_base_chan_mthd, - .chid = 1, +#include "soc15_common.h" +enum DF_V1_7_MGCG +{ + DF_V1_7_MGCG_DISABLE = 0, + DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1, + DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2, + DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13, + DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14, + DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15 }; + +extern const struct amdgpu_df_funcs df_v1_7_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c new file mode 100644 index 000000000000..d5ebe566809b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -0,0 +1,116 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v3_6.h" + +#include "df/df_3_6_default.h" +#include "df/df_3_6_offset.h" +#include "df/df_3_6_sh_mask.h" + +static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, + 16, 32, 0, 0, 0, 2, 4, 8}; + +static void df_v3_6_init(struct amdgpu_device *adev) +{ +} + +static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + if (enable) { + tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl); + tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp); + } else + WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, + mmFabricConfigAccessControl_DEFAULT); +} + +static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0); + tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + + return tmp; +} + +static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev) +{ + int fb_channel_number; + + fb_channel_number = adev->df_funcs->get_fb_channel_number(adev); + if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number)) + fb_channel_number = 0; + + return df_v3_6_channel_number[fb_channel_number]; +} + +static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + /* Put DF on broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, true); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } else { + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + tmp |= DF_V3_6_MGCG_DISABLE; + WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp); + } + + /* Exit broadcast mode */ + adev->df_funcs->enable_broadcast_mode(adev, false); +} + +static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + u32 tmp; + + /* AMD_CG_SUPPORT_DF_MGCG */ + tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater); + if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY) + *flags |= AMD_CG_SUPPORT_DF_MGCG; +} + +const struct amdgpu_df_funcs df_v3_6_funcs = { + .init = df_v3_6_init, + .enable_broadcast_mode = df_v3_6_enable_broadcast_mode, + .get_fb_channel_number = df_v3_6_get_fb_channel_number, + .get_hbm_channel_number = df_v3_6_get_hbm_channel_number, + .update_medium_grain_clock_gating = + df_v3_6_update_medium_grain_clock_gating, + .get_clockgating_state = df_v3_6_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk110.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index d8bdd246c8ed..e79c58e5efcb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk110.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,20 +19,22 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * - * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" -#include <nvif/class.h> +#ifndef __DF_V3_6_H__ +#define __DF_V3_6_H__ -const struct nv50_disp_dmac_oclass -gk110_disp_base_oclass = { - .base.oclass = GK110_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &gf119_disp_dmac_func, - .mthd = &gf119_disp_base_chan_mthd, - .chid = 1, +#include "soc15_common.h" + +enum DF_V3_6_MGCG { + DF_V3_6_MGCG_DISABLE = 0, + DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1, + DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2, + DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13, + DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14, + DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 }; + +extern const struct amdgpu_df_funcs df_v3_6_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index e14263fca1c9..818874b13c99 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -125,18 +125,6 @@ MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); -MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); - MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); @@ -149,6 +137,18 @@ MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); + MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); @@ -161,6 +161,13 @@ MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); +MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vegam_me.bin"); +MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); +MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); + static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, @@ -292,6 +299,37 @@ static const u32 tonga_mgcg_cgcg_init[] = mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, }; +static const u32 golden_settings_vegam_a11[] = +{ + mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, + mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, + mmDB_DEBUG2, 0xf00fffff, 0x00000400, + mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, + mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, + mmSQ_CONFIG, 0x07f80000, 0x01180000, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, + mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, + mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, + mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, +}; + +static const u32 vegam_golden_common_all[] = +{ + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, + mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, + mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, +}; + static const u32 golden_settings_polaris11_a11[] = { mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, @@ -712,6 +750,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) tonga_golden_common_all, ARRAY_SIZE(tonga_golden_common_all)); break; + case CHIP_VEGAM: + amdgpu_device_program_register_sequence(adev, + golden_settings_vegam_a11, + ARRAY_SIZE(golden_settings_vegam_a11)); + amdgpu_device_program_register_sequence(adev, + vegam_golden_common_all, + ARRAY_SIZE(vegam_golden_common_all)); + break; case CHIP_POLARIS11: case CHIP_POLARIS12: amdgpu_device_program_register_sequence(adev, @@ -918,17 +964,20 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; - case CHIP_POLARIS11: - chip_name = "polaris11"; + case CHIP_STONEY: + chip_name = "stoney"; break; case CHIP_POLARIS10: chip_name = "polaris10"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; case CHIP_POLARIS12: chip_name = "polaris12"; break; - case CHIP_STONEY: - chip_name = "stoney"; + case CHIP_VEGAM: + chip_name = "vegam"; break; default: BUG(); @@ -1770,6 +1819,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_POLARIS10: + case CHIP_VEGAM: ret = amdgpu_atombios_get_gfx_info(adev); if (ret) return ret; @@ -1957,12 +2007,13 @@ static int gfx_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (adev->asic_type) { - case CHIP_FIJI: case CHIP_TONGA: + case CHIP_CARRIZO: + case CHIP_FIJI: + case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: - case CHIP_POLARIS10: - case CHIP_CARRIZO: + case CHIP_VEGAM: adev->gfx.mec.num_mec = 2; break; case CHIP_TOPAZ: @@ -2323,6 +2374,7 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) break; case CHIP_FIJI: + case CHIP_VEGAM: modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | @@ -3504,6 +3556,7 @@ gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) { switch (adev->asic_type) { case CHIP_FIJI: + case CHIP_VEGAM: *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) | PKR_YSEL(1) | @@ -4071,7 +4124,8 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev) gfx_v8_0_init_power_gating(adev); WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); } else if ((adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) { + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) { gfx_v8_0_init_csb(adev); gfx_v8_0_init_save_restore_list(adev); gfx_v8_0_enable_save_restore_machine(adev); @@ -4146,7 +4200,8 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); if (adev->asic_type == CHIP_POLARIS11 || adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12) { + adev->asic_type == CHIP_POLARIS12 || + adev->asic_type == CHIP_VEGAM) { tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); tmp &= ~0x3; WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); @@ -5498,7 +5553,8 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade bool enable) { if ((adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) /* Send msg to SMU via Powerplay */ amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_SMC, @@ -5588,6 +5644,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); else @@ -6154,6 +6211,7 @@ static int gfx_v8_0_set_clockgating_state(void *handle, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9d39fd5b1822..a69153435ea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -27,6 +27,7 @@ #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" +#include "amdgpu_atomfirmware.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -41,7 +42,6 @@ #define GFX9_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L -#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 #define mmPWR_MISC_CNTL_STATUS 0x0183 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 @@ -64,6 +64,13 @@ MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); +MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vega20_me.bin"); +MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); +MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); + MODULE_FIRMWARE("amdgpu/raven_ce.bin"); MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); MODULE_FIRMWARE("amdgpu/raven_me.bin"); @@ -73,29 +80,22 @@ MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_9_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = @@ -109,6 +109,21 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) }; +static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) +}; + static const struct soc15_reg_golden golden_settings_gc_9_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), @@ -185,6 +200,30 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) }; +static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, + mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, +}; + +static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = +{ + mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, + mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, +}; + #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 @@ -218,6 +257,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_2_1_vg12, ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); break; + case CHIP_VEGA20: + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg20, + ARRAY_SIZE(golden_settings_gc_9_0_vg20)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_gc_9_1, @@ -401,6 +448,27 @@ static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_1 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); + adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); + adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); + adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); + adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); + adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); + adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); + adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); + adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); + adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); + adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); + adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); + adev->gfx.rlc.reg_list_format_direct_reg_list_length = + le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -412,6 +480,8 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) const struct rlc_firmware_header_v2_0 *rlc_hdr; unsigned int *tmp = NULL; unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; DRM_DEBUG("\n"); @@ -422,6 +492,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA12: chip_name = "vega12"; break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; case CHIP_RAVEN: chip_name = "raven"; break; @@ -468,6 +541,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + if (version_major == 2 && version_minor == 1) + adev->gfx.rlc.is_rlc_v2_1 = true; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); adev->gfx.rlc.save_and_restore_offset = @@ -508,6 +587,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_0_init_rlc_ext_microcode(adev); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) @@ -566,6 +648,26 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + if (adev->gfx.rlc.is_rlc_v2_1) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -1013,9 +1115,10 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q }; -static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) +static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; + int err; adev->gfx.funcs = &gfx_v9_0_gfx_funcs; @@ -1037,6 +1140,20 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; DRM_INFO("fix gfx.config for vega12\n"); break; + case CHIP_VEGA20: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22014042; + /* check vbios table if gpu info is not available */ + err = amdgpu_atomfirmware_get_gfx_info(adev); + if (err) + return err; + break; case CHIP_RAVEN: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; @@ -1086,6 +1203,8 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config, GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); + + return 0; } static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, @@ -1319,6 +1438,7 @@ static int gfx_v9_0_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: adev->gfx.mec.num_mec = 2; break; @@ -1446,7 +1566,9 @@ static int gfx_v9_0_sw_init(void *handle) adev->gfx.ce_ram_size = 0x8000; - gfx_v9_0_gpu_early_init(adev); + r = gfx_v9_0_gpu_early_init(adev); + if (r) + return r; r = gfx_v9_0_ngg_init(adev); if (r) @@ -1600,6 +1722,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -1616,7 +1739,10 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - tmp = adev->gmc.shared_aperture_start >> 48; + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> 48)); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); } } @@ -1708,55 +1834,44 @@ static void gfx_v9_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.clear_state_size); } -static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, +static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, int indirect_offset, int list_size, int *unique_indirect_regs, - int *unique_indirect_reg_count, - int max_indirect_reg_count, + int unique_indirect_reg_count, int *indirect_start_offsets, int *indirect_start_offsets_count, - int max_indirect_start_offsets_count) + int max_start_offsets_count) { int idx; - bool new_entry = true; for (; indirect_offset < list_size; indirect_offset++) { + WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); + indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; + *indirect_start_offsets_count = *indirect_start_offsets_count + 1; + + while (register_list_format[indirect_offset] != 0xFFFFFFFF) { + indirect_offset += 2; + + /* look for the matching indice */ + for (idx = 0; idx < unique_indirect_reg_count; idx++) { + if (unique_indirect_regs[idx] == + register_list_format[indirect_offset] || + !unique_indirect_regs[idx]) + break; + } - if (new_entry) { - new_entry = false; - indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; - *indirect_start_offsets_count = *indirect_start_offsets_count + 1; - BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); - } - - if (register_list_format[indirect_offset] == 0xFFFFFFFF) { - new_entry = true; - continue; - } - - indirect_offset += 2; + BUG_ON(idx >= unique_indirect_reg_count); - /* look for the matching indice */ - for (idx = 0; idx < *unique_indirect_reg_count; idx++) { - if (unique_indirect_regs[idx] == - register_list_format[indirect_offset]) - break; - } + if (!unique_indirect_regs[idx]) + unique_indirect_regs[idx] = register_list_format[indirect_offset]; - if (idx >= *unique_indirect_reg_count) { - unique_indirect_regs[*unique_indirect_reg_count] = - register_list_format[indirect_offset]; - idx = *unique_indirect_reg_count; - *unique_indirect_reg_count = *unique_indirect_reg_count + 1; - BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); + indirect_offset++; } - - register_list_format[indirect_offset] = idx; } } -static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) +static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) { int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; int unique_indirect_reg_count = 0; @@ -1765,7 +1880,7 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) int indirect_start_offsets_count = 0; int list_size = 0; - int i = 0; + int i = 0, j = 0; u32 tmp = 0; u32 *register_list_format = @@ -1776,15 +1891,15 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes); /* setup unique_indirect_regs array and indirect_start_offsets array */ - gfx_v9_0_parse_ind_reg_list(register_list_format, - GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, - adev->gfx.rlc.reg_list_format_size_bytes >> 2, - unique_indirect_regs, - &unique_indirect_reg_count, - ARRAY_SIZE(unique_indirect_regs), - indirect_start_offsets, - &indirect_start_offsets_count, - ARRAY_SIZE(indirect_start_offsets)); + unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); + gfx_v9_1_parse_ind_reg_list(register_list_format, + adev->gfx.rlc.reg_list_format_direct_reg_list_length, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indirect_regs, + unique_indirect_reg_count, + indirect_start_offsets, + &indirect_start_offsets_count, + ARRAY_SIZE(indirect_start_offsets)); /* enable auto inc in case it is disabled */ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); @@ -1798,19 +1913,37 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), adev->gfx.rlc.register_restore[i]); - /* load direct register */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); - for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), - adev->gfx.rlc.register_restore[i]); - /* load indirect register */ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), adev->gfx.rlc.reg_list_format_start); - for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + + /* direct register portion */ + for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), register_list_format[i]); + /* indirect register portion */ + while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { + if (register_list_format[i] == 0xFFFFFFFF) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + continue; + } + + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); + + for (j = 0; j < unique_indirect_reg_count; j++) { + if (register_list_format[i] == unique_indirect_regs[j]) { + WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); + break; + } + } + + BUG_ON(j >= unique_indirect_reg_count); + + i++; + } + /* set save/restore list size */ list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; list_size = list_size >> 1; @@ -1823,14 +1956,19 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.starting_offsets_start); for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), - indirect_start_offsets[i]); + indirect_start_offsets[i]); /* load unique indirect regs*/ for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, - unique_indirect_regs[i] & 0x3FFFF); - WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, - unique_indirect_regs[i] >> 20); + if (unique_indirect_regs[i] != 0) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], + unique_indirect_regs[i] & 0x3FFFF); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], + unique_indirect_regs[i] >> 20); + } } kfree(register_list_format); @@ -2010,6 +2148,9 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad static void gfx_v9_0_init_pg(struct amdgpu_device *adev) { + if (!adev->gfx.rlc.is_rlc_v2_1) + return; + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_DMG | @@ -2017,27 +2158,12 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { gfx_v9_0_init_csb(adev); - gfx_v9_0_init_rlc_save_restore_list(adev); + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); - if (adev->asic_type == CHIP_RAVEN) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); - gfx_v9_0_init_gfx_power_gating(adev); - - if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); - } else { - gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); - gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); - } - - if (adev->pg_flags & AMD_PG_SUPPORT_CP) - gfx_v9_0_enable_cp_power_gating(adev, true); - else - gfx_v9_0_enable_cp_power_gating(adev, false); - } + WREG32(mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); + gfx_v9_0_init_gfx_power_gating(adev); } } @@ -3061,6 +3187,9 @@ static int gfx_v9_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -3339,8 +3468,7 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - /* TODO: double check if we need to perform under safe mdoe */ - /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + gfx_v9_0_enter_rlc_safe_mode(adev); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -3351,7 +3479,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - /* gfx_v9_0_exit_rlc_safe_mode(adev); */ + gfx_v9_0_exit_rlc_safe_mode(adev); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -3605,6 +3733,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -3742,7 +3871,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } amdgpu_ring_write(ring, header); -BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -3774,13 +3903,16 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); - amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | - EOP_TC_ACTION_EN | - EOP_TC_WB_ACTION_EN | - EOP_TC_MD_ACTION_EN | + amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | + EOP_TC_NC_ACTION_EN) : + (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN)) | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5))); amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); @@ -4137,6 +4269,20 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -4458,6 +4604,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_tmz = gfx_v9_0_ring_emit_tmz, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -4492,6 +4639,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .set_priority = gfx_v9_0_ring_set_priority_compute, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { @@ -4522,6 +4670,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .emit_rreg = gfx_v9_0_ring_emit_rreg, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) @@ -4577,6 +4726,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; @@ -4686,6 +4836,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 5617cf62c566..79f9ac29019b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -819,12 +819,33 @@ static int gmc_v6_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + static int gmc_v6_0_sw_init(void *handle) { int r; @@ -851,8 +872,6 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.mc_mask = 0xffffffffffULL; - adev->gmc.stolen_size = 256 * 1024; - adev->need_dma32 = false; dma_bits = adev->need_dma32 ? 32 : 40; r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); @@ -878,6 +897,8 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev); + r = amdgpu_bo_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80054f36e487..7147bfe25a23 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -958,12 +958,33 @@ static int gmc_v7_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + static int gmc_v7_0_sw_init(void *handle) { int r; @@ -998,8 +1019,6 @@ static int gmc_v7_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->gmc.stolen_size = 256 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1030,6 +1049,8 @@ static int gmc_v7_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d71d4cb68f9c..1edbe6b477b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -138,6 +138,7 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, golden_settings_polaris11_a11, ARRAY_SIZE(golden_settings_polaris11_a11)); @@ -231,6 +232,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: case CHIP_CARRIZO: case CHIP_STONEY: + case CHIP_VEGAM: return 0; default: BUG(); } @@ -567,9 +569,10 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) /* set the gart size */ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { - case CHIP_POLARIS11: /* all engines support GPUVM */ case CHIP_POLARIS10: /* all engines support GPUVM */ + case CHIP_POLARIS11: /* all engines support GPUVM */ case CHIP_POLARIS12: /* all engines support GPUVM */ + case CHIP_VEGAM: /* all engines support GPUVM */ default: adev->gmc.gart_size = 256ULL << 20; break; @@ -1049,12 +1052,33 @@ static int gmc_v8_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_bo_late_init(adev); + if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else return 0; } +static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ + u32 d1vga_control = RREG32(mmD1VGA_CONTROL); + unsigned size; + + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + return size; +} + #define mmMC_SEQ_MISC0_FIJI 0xA71 static int gmc_v8_0_sw_init(void *handle) @@ -1068,7 +1092,8 @@ static int gmc_v8_0_sw_init(void *handle) } else { u32 tmp; - if (adev->asic_type == CHIP_FIJI) + if ((adev->asic_type == CHIP_FIJI) || + (adev->asic_type == CHIP_VEGAM)) tmp = RREG32(mmMC_SEQ_MISC0_FIJI); else tmp = RREG32(mmMC_SEQ_MISC0); @@ -1096,8 +1121,6 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - adev->gmc.stolen_size = 256 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1128,6 +1151,8 @@ static int gmc_v8_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e687363900bb..3c0a85d4e4ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -43,19 +43,13 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" -#define mmDF_CS_AON0_DramBaseAddress0 0x0044 -#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 -//DF_CS_AON0_DramBaseAddress0 -#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 -#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 -#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 -#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 -#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc -#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L -#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L -#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L -#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L -#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L +/* add these here since we already include dce12 headers and these are for DCN */ +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d +#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT 0x0 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT 0x10 +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK 0x00003FFFL +#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK 0x3FFF0000L /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ #define AMDGPU_NUM_OF_VMIDS 8 @@ -385,11 +379,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); - - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); return pd_addr; } @@ -556,8 +548,7 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = adev->gmc.shared_aperture_start + (4ULL << 30) - 1; - adev->gmc.private_aperture_start = - adev->gmc.shared_aperture_end + 1; + adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; @@ -659,6 +650,11 @@ static int gmc_v9_0_late_init(void *handle) unsigned i; int r; + /* + * TODO - Uncomment once GART corruption issue is fixed. + */ + /* amdgpu_bo_late_init(adev); */ + for(i = 0; i < adev->num_rings; ++i) { struct amdgpu_ring *ring = adev->rings[i]; unsigned vmhub = ring->funcs->vmhub; @@ -679,6 +675,7 @@ static int gmc_v9_0_late_init(void *handle) DRM_INFO("ECC is active.\n"); } else if (r == 0) { DRM_INFO("ECC is not present.\n"); + adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false); } else { DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); return r; @@ -697,10 +694,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_device_vram_location(adev, &adev->gmc, base); amdgpu_device_gart_location(adev, mc); /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) - adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); - else - adev->vm_manager.vram_base_offset = 0; + adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); } /** @@ -714,7 +708,6 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, */ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { - u32 tmp; int chansize, numchan; int r; @@ -727,39 +720,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) else chansize = 128; - tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); - tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; - tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; - switch (tmp) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 0; - break; - case 3: - numchan = 4; - break; - case 4: - numchan = 0; - break; - case 5: - numchan = 8; - break; - case 6: - numchan = 0; - break; - case 7: - numchan = 16; - break; - case 8: - numchan = 2; - break; - } + numchan = adev->df_funcs->get_hbm_channel_number(adev); adev->gmc.vram_width = numchan * chansize; } @@ -792,6 +753,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: /* all engines support GPUVM */ case CHIP_VEGA12: /* all engines support GPUVM */ + case CHIP_VEGA20: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -826,6 +788,52 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } +static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) +{ +#if 0 + u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); +#endif + unsigned size; + + /* + * TODO Remove once GART corruption is resolved + * Check related code in gmc_v9_0_sw_fini + * */ + size = 9 * 1024 * 1024; + +#if 0 + if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { + size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ + } else { + u32 viewport; + + switch (adev->asic_type) { + case CHIP_RAVEN: + viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); + size = (REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, + HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) * + 4); + break; + case CHIP_VEGA10: + case CHIP_VEGA12: + default: + viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * + REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) * + 4); + break; + } + } + /* return 0 if the pre-OS buffer uses up most of vram */ + if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) + return 0; + +#endif + return size; +} + static int gmc_v9_0_sw_init(void *handle) { int r; @@ -851,6 +859,7 @@ static int gmc_v9_0_sw_init(void *handle) break; case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -877,12 +886,6 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* - * It needs to reserve 8M stolen memory for vega10 - * TODO: Figure out how to avoid that... - */ - adev->gmc.stolen_size = 8 * 1024 * 1024; - /* set DMA mask + need_dma32 flags. * PCIE - can handle 44-bits. * IGP - can handle 44-bits @@ -907,6 +910,8 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev); + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -950,6 +955,18 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); gmc_v9_0_gart_fini(adev); + + /* + * TODO: + * Currently there is a bug where some memory client outside + * of the driver writes to first 8M of VRAM on S3 resume, + * this overrides GART which by default gets placed in first 8M and + * causes VM_FAULTS once GTT is accessed. + * Keep the stolen memory reservation until the while this is not solved. + * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init + */ + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_fini(adev); return 0; @@ -960,6 +977,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA20: soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, ARRAY_SIZE(golden_settings_mmhub_1_0_0)); diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 26ba984ab2b7..17f7f074cedc 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -2817,7 +2817,7 @@ static int kv_dpm_init(struct amdgpu_device *adev) pi->caps_tcp_ramping = true; } - if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK) + if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) pi->caps_sclk_ds = true; else pi->caps_sclk_ds = false; @@ -2974,7 +2974,7 @@ static int kv_dpm_late_init(void *handle) /* powerdown unused blocks for now */ struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; kv_dpm_powergate_acp(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 43f925773b57..3d53c4413f13 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -734,6 +734,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: mmhub_v1_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 493348672475..078f70faedcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -260,8 +260,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - if (locked) + if (locked) { + adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); + } /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_lockup_timeout == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index df34dc79d444..365517c0121e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -34,10 +34,19 @@ #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 +/* vega20 */ +#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011 +#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2 + static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + if (adev->asic_type == CHIP_VEGA20) + tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20); + else + tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -75,10 +84,14 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); u32 doorbell_range = RREG32(reg); + u32 range = 2; + + if (adev->asic_type == CHIP_VEGA20) + range = 8; if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range); } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); @@ -133,6 +146,9 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; + if (adev->asic_type == CHIP_VEGA20) + return; + /* NBIF_MGCG_CTRL_LCLK */ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 8da6da90b1c9..0cf48d26c676 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -40,11 +40,20 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ + GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ + GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ + GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ }; +/*----------------------------------------------------------------------------- + NOTE: All physical addresses used in this interface are actually + GPU Virtual Addresses. +*/ + + /* Control registers of the TEE Gfx interface. These are located in * SRBM-to-PSP mailbox registers (total 8 registers). */ @@ -55,8 +64,8 @@ struct psp_gfx_ctrl volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ - volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */ - volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */ + volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/ + volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of GPU Virtual of ring buffer (VMID=0) */ volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ }; @@ -78,6 +87,8 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ + GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ + GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ }; @@ -85,11 +96,11 @@ enum psp_gfx_cmd_id /* Command to load Trusted Application binary into PSP OS. */ struct psp_gfx_cmd_load_ta { - uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */ - uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */ + uint32_t app_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */ + uint32_t app_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of the TA binary */ uint32_t app_len; /* length of the TA binary in bytes */ - uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */ - uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */ + uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the GPU Virtual address of CMD buffer */ uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided @@ -111,8 +122,8 @@ struct psp_gfx_cmd_unload_ta */ struct psp_gfx_buf_desc { - uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */ - uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */ + uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of the buffer */ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ }; @@ -145,8 +156,8 @@ struct psp_gfx_cmd_invoke_cmd /* Command to setup TMR region. */ struct psp_gfx_cmd_setup_tmr { - uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */ - uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */ + uint32_t buf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */ uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ }; @@ -174,18 +185,32 @@ enum psp_gfx_fw_type GFX_FW_TYPE_ISP = 16, GFX_FW_TYPE_ACP = 17, GFX_FW_TYPE_SMU = 18, + GFX_FW_TYPE_MMSCH = 19, + GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, + GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, + GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22, + GFX_FW_TYPE_MAX = 23 }; /* Command to load HW IP FW. */ struct psp_gfx_cmd_load_ip_fw { - uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */ - uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */ + uint32_t fw_phy_addr_lo; /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */ + uint32_t fw_phy_addr_hi; /* bits [63:32] of GPU Virtual address of FW location */ uint32_t fw_size; /* FW buffer size in bytes */ enum psp_gfx_fw_type fw_type; /* FW type */ }; +/* Command to save/restore HW IP FW. */ +struct psp_gfx_cmd_save_restore_ip_fw +{ + uint32_t save_fw; /* if set, command is used for saving fw otherwise for resetoring*/ + uint32_t save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */ + uint32_t save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */ + uint32_t buf_size; /* Size of the save/restore buffer in bytes */ + enum psp_gfx_fw_type fw_type; /* FW type */ +}; /* All GFX ring buffer commands. */ union psp_gfx_commands @@ -195,7 +220,7 @@ union psp_gfx_commands struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; - + struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw; }; @@ -226,8 +251,8 @@ struct psp_gfx_cmd_resp /* These fields are used for RBI only. They are all 0 in GPCOM commands */ - uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */ - uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */ + uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */ + uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of GPU Virtual address of response buffer */ uint32_t resp_offset; /* +20 offset within response buffer */ uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ @@ -251,19 +276,19 @@ struct psp_gfx_cmd_resp /* Structure of the Ring Buffer Frame */ struct psp_gfx_rb_frame { - uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */ - uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */ + uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of GPU Virtual address of command buffer */ uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ - uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */ - uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */ + uint32_t fence_addr_lo; /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */ + uint32_t fence_addr_hi; /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */ uint32_t fence_value; /* +20 Fence value */ uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ uint8_t reserved1[2]; /* +34 reserved, must be 0 */ - uint32_t reserved2[7]; /* +40 reserved, must be 0 */ - /* total 64 bytes */ + uint32_t reserved2[7]; /* +36 reserved, must be 0 */ + /* total 64 bytes */ }; #endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 8873d833a7f7..0ff136d02d9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -70,6 +70,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * case AMDGPU_UCODE_ID_RLC_G: *type = GFX_FW_TYPE_RLC_G; break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM; + break; + case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM: + *type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 196e75def1f2..727071fee6f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -41,9 +41,14 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); +MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); +MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); + #define smnMP1_FIRMWARE_FLAGS 0x3010028 +static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554}; + static int psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) { @@ -207,12 +212,31 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) return ret; } +static bool psp_v3_1_match_version(struct amdgpu_device *adev, uint32_t ver) +{ + int i; + + if (ver == adev->psp.sos_fw_version) + return true; + + /* + * Double check if the latest four legacy versions. + * If yes, it is still the right version. + */ + for (i = 0; i < sizeof(sos_old_versions) / sizeof(uint32_t); i++) { + if (sos_old_versions[i] == adev->psp.sos_fw_version) + return true; + } + + return false; +} + static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) { int ret; unsigned int psp_gfxdrv_command_reg = 0; struct amdgpu_device *adev = psp->adev; - uint32_t sol_reg; + uint32_t sol_reg, ver; /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. @@ -245,6 +269,10 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, true); + ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + if (!psp_v3_1_match_version(adev, ver)) + DRM_WARN("SOS version doesn't match\n"); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index be20a387d961..aa9ab299fd32 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -62,6 +62,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_sdma.bin"); MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin"); MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin"); MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/vegam_sdma.bin"); +MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin"); static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = @@ -209,6 +211,7 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) break; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_program_register_sequence(adev, golden_settings_polaris11_a11, ARRAY_SIZE(golden_settings_polaris11_a11)); @@ -275,15 +278,18 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; - case CHIP_POLARIS11: - chip_name = "polaris11"; - break; case CHIP_POLARIS10: chip_name = "polaris10"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; case CHIP_POLARIS12: chip_name = "polaris12"; break; + case CHIP_VEGAM: + chip_name = "vegam"; + break; case CHIP_CARRIZO: chip_name = "carrizo"; break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 399f876f9cad..ca53b3fba422 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -42,6 +42,8 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/vega20_sdma.bin"); +MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L @@ -107,6 +109,28 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; +static const struct soc15_reg_golden golden_settings_sdma_4_2[] = +{ + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) +}; + static const struct soc15_reg_golden golden_settings_sdma_rv1[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), @@ -139,6 +163,11 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_vg12, ARRAY_SIZE(golden_settings_sdma_vg12)); break; + case CHIP_VEGA20: + soc15_program_register_sequence(adev, + golden_settings_sdma_4_2, + ARRAY_SIZE(golden_settings_sdma_4_2)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_sdma_4_1, @@ -182,6 +211,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA12: chip_name = "vega12"; break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; case CHIP_RAVEN: chip_name = "raven"; break; @@ -360,6 +392,31 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, } +static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring, + int mem_space, int hdp, + uint32_t addr0, uint32_t addr1, + uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) | + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + if (mem_space) { + /* memory */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + } else { + /* registers */ + amdgpu_ring_write(ring, addr0 << 2); + amdgpu_ring_write(ring, addr1 << 2); + } + amdgpu_ring_write(ring, ref); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */ +} + /** * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring * @@ -378,15 +435,10 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) else ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + sdma_v4_0_wait_reg_mem(ring, 0, 1, + adev->nbio_funcs->get_hdp_flush_done_offset(adev), + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + ref_and_mask, ref_and_mask, 10); } /** @@ -1114,16 +1166,10 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint64_t addr = ring->fence_drv.gpu_addr; /* wait for idle */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ - SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); - amdgpu_ring_write(ring, addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); - amdgpu_ring_write(ring, seq); /* reference */ - amdgpu_ring_write(ring, 0xffffffff); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ + sdma_v4_0_wait_reg_mem(ring, 1, 0, + addr & 0xfffffffc, + upper_32_bits(addr) & 0xffffffff, + seq, 0xffffffff, 4); } @@ -1154,15 +1200,7 @@ static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring, static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ - amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, val); /* reference */ - amdgpu_ring_write(ring, mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); + sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10); } static int sdma_v4_0_early_init(void *handle) @@ -1510,6 +1548,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: sdma_v4_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -1605,6 +1644,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .pad_ib = sdma_v4_0_ring_pad_ib, .emit_wreg = sdma_v4_0_ring_emit_wreg, .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index a675ec6d2811..c364ef94cc36 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1252,6 +1252,12 @@ static void si_invalidate_hdp(struct amdgpu_device *adev, } } +static bool si_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we support soft reset */ + return true; +} + static int si_get_pcie_lanes(struct amdgpu_device *adev) { u32 link_width_cntl; @@ -1332,6 +1338,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .get_config_memsize = &si_get_config_memsize, .flush_hdp = &si_flush_hdp, .invalidate_hdp = &si_invalidate_hdp, + .need_full_reset = &si_need_full_reset, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 797d505bf9ee..b12d7c9d42a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -7580,7 +7580,7 @@ static int si_dpm_late_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!amdgpu_dpm) + if (!adev->pm.dpm_enabled) return 0; ret = si_set_temperature_range(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 51cf8a30f6c2..83f2717fcf81 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -41,8 +41,6 @@ #include "sdma1/sdma1_4_0_offset.h" #include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_sh_mask.h" -#include "mp/mp_9_0_offset.h" -#include "mp/mp_9_0_sh_mask.h" #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" @@ -52,6 +50,8 @@ #include "gmc_v9_0.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "df_v1_7.h" +#include "df_v3_6.h" #include "vega10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" @@ -60,33 +60,6 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" -#define mmFabricConfigAccessControl 0x0410 -#define mmFabricConfigAccessControl_BASE_IDX 0 -#define mmFabricConfigAccessControl_DEFAULT 0x00000000 -//FabricConfigAccessControl -#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 -#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 -#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 -#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L -#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L -#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L - - -#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc -#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 -//DF_PIE_AON0_DfGlobalClkGater -#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 -#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL - -enum { - DF_MGCG_DISABLE = 0, - DF_MGCG_ENABLE_00_CYCLE_DELAY =1, - DF_MGCG_ENABLE_01_CYCLE_DELAY =2, - DF_MGCG_ENABLE_15_CYCLE_DELAY =13, - DF_MGCG_ENABLE_31_CYCLE_DELAY =14, - DF_MGCG_ENABLE_63_CYCLE_DELAY =15 -}; - #define mmMP0_MISC_CGTT_CTRL0 0x01b9 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 #define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba @@ -313,6 +286,7 @@ static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, + { SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)}, }; static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, @@ -341,6 +315,8 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, } else { if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) return adev->gfx.config.gb_addr_config; + else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2)) + return adev->gfx.config.db_debug2; return RREG32(reg_offset); } } @@ -512,15 +488,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) case CHIP_RAVEN: vega10_reg_base_init(adev); break; + case CHIP_VEGA20: + vega20_reg_base_init(adev); + break; default: return -EINVAL; } if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; + else if (adev->asic_type == CHIP_VEGA20) + adev->nbio_funcs = &nbio_v7_0_funcs; else adev->nbio_funcs = &nbio_v6_1_funcs; + if (adev->asic_type == CHIP_VEGA20) + adev->df_funcs = &df_v3_6_funcs; + else + adev->df_funcs = &df_v1_7_funcs; adev->nbio_funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) @@ -529,12 +514,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + if (adev->asic_type != CHIP_VEGA20) { + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + } if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -593,6 +581,12 @@ static void soc15_invalidate_hdp(struct amdgpu_device *adev, HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); } +static bool soc15_need_full_reset(struct amdgpu_device *adev) +{ + /* change this when we implement soft reset */ + return true; +} + static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -606,6 +600,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .flush_hdp = &soc15_flush_hdp, .invalidate_hdp = &soc15_invalidate_hdp, + .need_full_reset = &soc15_need_full_reset, }; static int soc15_common_early_init(void *handle) @@ -675,6 +670,28 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x14; break; + case CHIP_VEGA20: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_VCE_MGCG | + AMD_CG_SUPPORT_UVD_MGCG; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x28; + break; case CHIP_RAVEN: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | @@ -694,8 +711,15 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | - AMD_CG_SUPPORT_SDMA_LS; - adev->pg_flags = AMD_PG_SUPPORT_SDMA; + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_VCN_MGCG; + + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + + if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; adev->external_rev_id = 0x1; break; @@ -871,32 +895,6 @@ static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *ade WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); } -static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t data; - - /* Put DF on broadcast mode */ - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl)); - data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; - WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - data |= DF_MGCG_ENABLE_15_CYCLE_DELAY; - WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); - } else { - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; - data |= DF_MGCG_DISABLE; - WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); - } - - WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), - mmFabricConfigAccessControl_DEFAULT); -} - static int soc15_common_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -908,6 +906,7 @@ static int soc15_common_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); adev->nbio_funcs->update_medium_grain_light_sleep(adev, @@ -920,7 +919,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - soc15_update_df_medium_grain_clock_gating(adev, + adev->df_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: @@ -973,10 +972,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) *flags |= AMD_CG_SUPPORT_ROM_MGCG; - /* AMD_CG_SUPPORT_DF_MGCG */ - data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); - if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY) - *flags |= AMD_CG_SUPPORT_DF_MGCG; + adev->df_funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index f70da8a29f86..1f714b7af520 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -55,5 +55,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, const u32 array_size); int vega10_reg_base_init(struct amdgpu_device *adev); +int vega20_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index def865067edd..0942f492d2e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -47,6 +47,21 @@ #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) +#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \ + do { \ + uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + uint32_t loop = adev->usec_timeout; \ + while ((tmp_ & (mask)) != (expected_value)) { \ + udelay(2); \ + tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ + loop--; \ + if (!loop) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + } \ + } while (0) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 7f408f85fdb6..8dc29107228f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -159,6 +159,7 @@ #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ #define EOP_TCL1_ACTION_EN (1 << 16) #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ +#define EOP_TC_NC_ACTION_EN (1 << 19) #define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ #define DATA_SEL(x) ((x) << 29) @@ -268,6 +269,11 @@ * x=1: tmz_end */ +#define PACKET3_INVALIDATE_TLBS 0x98 +# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_SET_RESOURCES 0xA0 /* 1. header * 2. CONTROL diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 948bb9437757..6fed3d7797a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -93,6 +93,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v4_2_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->uvd.num_uvd_inst = 1; uvd_v4_2_set_ring_funcs(adev); uvd_v4_2_set_irq_funcs(adev); @@ -107,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); if (r) return r; @@ -119,9 +120,9 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - ring = &adev->uvd.ring; + ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); return r; } @@ -150,7 +151,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, static int uvd_v4_2_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int r; @@ -208,7 +209,7 @@ done: static int uvd_v4_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; if (RREG32(mmUVD_STATUS) != 0) uvd_v4_2_stop(adev); @@ -251,7 +252,7 @@ static int uvd_v4_2_resume(void *handle) */ static int uvd_v4_2_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t rb_bufsz; int i, j, r; u32 tmp; @@ -523,6 +524,18 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } +static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); + amdgpu_ring_write(ring, 0); + } +} + /** * uvd_v4_2_mc_resume - memory controller programming * @@ -536,7 +549,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) uint32_t size; /* programm the VCPU memory controller bits 0-27 */ - addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; + addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); WREG32(mmUVD_VCPU_CACHE_SIZE0, size); @@ -553,11 +566,11 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) WREG32(mmUVD_VCPU_CACHE_SIZE2, size); /* bits 28-31 */ - addr = (adev->uvd.gpu_addr >> 28) & 0xF; + addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF; WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); /* bits 32-39 */ - addr = (adev->uvd.gpu_addr >> 32) & 0xFF; + addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF; WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); @@ -664,7 +677,7 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_DEBUG("IH: UVD TRAP\n"); - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst->ring); return 0; } @@ -688,7 +701,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, if (state == AMD_PG_STATE_GATE) { uvd_v4_2_stop(adev); - if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { + if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { if (!(RREG32_SMC(ixCURRENT_PG_STATUS) & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) { WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | @@ -699,7 +712,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, } return 0; } else { - if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) { + if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) { if (RREG32_SMC(ixCURRENT_PG_STATUS) & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK | @@ -732,7 +745,6 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(mmUVD_NO_OP, 0), .support_64bit_ptrs = false, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, @@ -745,7 +757,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .emit_fence = uvd_v4_2_ring_emit_fence, .test_ring = uvd_v4_2_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v4_2_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, @@ -753,7 +765,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) { - adev->uvd.ring.funcs = &uvd_v4_2_ring_funcs; + adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs; } static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { @@ -763,8 +775,8 @@ static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = { static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = 1; - adev->uvd.irq.funcs = &uvd_v4_2_irq_funcs; + adev->uvd.inst->irq.num_types = 1; + adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs; } const struct amdgpu_ip_block_version uvd_v4_2_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 6445d55e7d5a..341ee6d55ce8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -89,6 +89,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) static int uvd_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->uvd.num_uvd_inst = 1; uvd_v5_0_set_ring_funcs(adev); uvd_v5_0_set_irq_funcs(adev); @@ -103,7 +104,7 @@ static int uvd_v5_0_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); if (r) return r; @@ -115,9 +116,9 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - ring = &adev->uvd.ring; + ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); return r; } @@ -144,7 +145,7 @@ static int uvd_v5_0_sw_fini(void *handle) static int uvd_v5_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int r; @@ -204,7 +205,7 @@ done: static int uvd_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; if (RREG32(mmUVD_STATUS) != 0) uvd_v5_0_stop(adev); @@ -253,9 +254,9 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) /* programm memory controller bits 0-27 */ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr)); + lower_32_bits(adev->uvd.inst->gpu_addr)); WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr)); + upper_32_bits(adev->uvd.inst->gpu_addr)); offset = AMDGPU_UVD_FIRMWARE_OFFSET; size = AMDGPU_UVD_FIRMWARE_SIZE(adev); @@ -287,7 +288,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) */ static int uvd_v5_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; uint32_t mp_swap_cntl; @@ -540,6 +541,18 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ib->length_dw); } +static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); + amdgpu_ring_write(ring, 0); + } +} + static bool uvd_v5_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -586,7 +599,7 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_DEBUG("IH: UVD TRAP\n"); - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst->ring); return 0; } @@ -840,7 +853,6 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(mmUVD_NO_OP, 0), .support_64bit_ptrs = false, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, @@ -853,7 +865,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .emit_fence = uvd_v5_0_ring_emit_fence, .test_ring = uvd_v5_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v5_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, @@ -861,7 +873,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) { - adev->uvd.ring.funcs = &uvd_v5_0_ring_funcs; + adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs; } static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { @@ -871,8 +883,8 @@ static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = { static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = 1; - adev->uvd.irq.funcs = &uvd_v5_0_irq_funcs; + adev->uvd.inst->irq.num_types = 1; + adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs; } const struct amdgpu_ip_block_version uvd_v5_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index f26f515db2fb..bfddf97dd13e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -62,7 +62,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) { return ((adev->asic_type >= CHIP_POLARIS10) && - (adev->asic_type <= CHIP_POLARIS12) && + (adev->asic_type <= CHIP_VEGAM) && (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); } @@ -91,7 +91,7 @@ static uint64_t uvd_v6_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) + if (ring == &adev->uvd.inst->ring_enc[0]) return RREG32(mmUVD_RB_RPTR); else return RREG32(mmUVD_RB_RPTR2); @@ -121,7 +121,7 @@ static uint64_t uvd_v6_0_enc_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) + if (ring == &adev->uvd.inst->ring_enc[0]) return RREG32(mmUVD_RB_WPTR); else return RREG32(mmUVD_RB_WPTR2); @@ -152,7 +152,7 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) + if (ring == &adev->uvd.inst->ring_enc[0]) WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else @@ -375,6 +375,7 @@ error: static int uvd_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->uvd.num_uvd_inst = 1; if (!(adev->flags & AMD_IS_APU) && (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK)) @@ -399,14 +400,14 @@ static int uvd_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); if (r) return r; /* UVD ENC TRAP */ if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.inst->irq); if (r) return r; } @@ -418,18 +419,18 @@ static int uvd_v6_0_sw_init(void *handle) if (!uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) - adev->uvd.ring_enc[i].funcs = NULL; + adev->uvd.inst->ring_enc[i].funcs = NULL; - adev->uvd.irq.num_types = 1; + adev->uvd.inst->irq.num_types = 1; adev->uvd.num_enc_rings = 0; DRM_INFO("UVD ENC is disabled\n"); } else { struct drm_sched_rq *rq; - ring = &adev->uvd.ring_enc[0]; + ring = &adev->uvd.inst->ring_enc[0]; rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); + r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc, + rq, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; @@ -440,17 +441,17 @@ static int uvd_v6_0_sw_init(void *handle) if (r) return r; - ring = &adev->uvd.ring; + ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; + ring = &adev->uvd.inst->ring_enc[i]; sprintf(ring->name, "uvd_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; } @@ -469,10 +470,10 @@ static int uvd_v6_0_sw_fini(void *handle) return r; if (uvd_v6_0_enc_support(adev)) { - drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); + drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc); for (i = 0; i < adev->uvd.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->uvd.ring_enc[i]); + amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); } return amdgpu_uvd_sw_fini(adev); @@ -488,7 +489,7 @@ static int uvd_v6_0_sw_fini(void *handle) static int uvd_v6_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t tmp; int i, r; @@ -532,7 +533,7 @@ static int uvd_v6_0_hw_init(void *handle) if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; + ring = &adev->uvd.inst->ring_enc[i]; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -563,7 +564,7 @@ done: static int uvd_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; if (RREG32(mmUVD_STATUS) != 0) uvd_v6_0_stop(adev); @@ -611,9 +612,9 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) /* programm memory controller bits 0-27 */ WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr)); + lower_32_bits(adev->uvd.inst->gpu_addr)); WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr)); + upper_32_bits(adev->uvd.inst->gpu_addr)); offset = AMDGPU_UVD_FIRMWARE_OFFSET; size = AMDGPU_UVD_FIRMWARE_SIZE(adev); @@ -726,7 +727,7 @@ static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, */ static int uvd_v6_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring = &adev->uvd.inst->ring; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; uint32_t mp_swap_cntl; @@ -866,14 +867,14 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); if (uvd_v6_0_enc_support(adev)) { - ring = &adev->uvd.ring_enc[0]; + ring = &adev->uvd.inst->ring_enc[0]; WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->uvd.ring_enc[1]; + ring = &adev->uvd.inst->ring_enc[1]; WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -964,6 +965,16 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** + * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing + * + * @ring: amdgpu_ring pointer + */ +static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* The firmware doesn't seem to like touching registers at this point. */ +} + +/** * uvd_v6_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1089,6 +1100,18 @@ static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0xE); } +static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0)); + amdgpu_ring_write(ring, 0); + } +} + static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring) { uint32_t seq = ring->fence_drv.sync_seq; @@ -1148,10 +1171,10 @@ static bool uvd_v6_0_check_soft_reset(void *handle) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); if (srbm_soft_reset) { - adev->uvd.srbm_soft_reset = srbm_soft_reset; + adev->uvd.inst->srbm_soft_reset = srbm_soft_reset; return true; } else { - adev->uvd.srbm_soft_reset = 0; + adev->uvd.inst->srbm_soft_reset = 0; return false; } } @@ -1160,7 +1183,7 @@ static int uvd_v6_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst->srbm_soft_reset) return 0; uvd_v6_0_stop(adev); @@ -1172,9 +1195,9 @@ static int uvd_v6_0_soft_reset(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 srbm_soft_reset; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst->srbm_soft_reset) return 0; - srbm_soft_reset = adev->uvd.srbm_soft_reset; + srbm_soft_reset = adev->uvd.inst->srbm_soft_reset; if (srbm_soft_reset) { u32 tmp; @@ -1202,7 +1225,7 @@ static int uvd_v6_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst->srbm_soft_reset) return 0; mdelay(5); @@ -1228,17 +1251,17 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst->ring); break; case 119: if (likely(uvd_v6_0_enc_support(adev))) - amdgpu_fence_process(&adev->uvd.ring_enc[0]); + amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]); else int_handled = false; break; case 120: if (likely(uvd_v6_0_enc_support(adev))) - amdgpu_fence_process(&adev->uvd.ring_enc[1]); + amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]); else int_handled = false; break; @@ -1521,22 +1544,22 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(mmUVD_NO_OP, 0), .support_64bit_ptrs = false, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ 14, /* uvd_v6_0_ring_emit_fence x1 no user fence */ .emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */ .emit_ib = uvd_v6_0_ring_emit_ib, .emit_fence = uvd_v6_0_ring_emit_fence, + .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v6_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, @@ -1552,7 +1575,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ 10 + /* uvd_v6_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */ 14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */ @@ -1561,6 +1584,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .emit_fence = uvd_v6_0_ring_emit_fence, .emit_vm_flush = uvd_v6_0_ring_emit_vm_flush, .emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync, + .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -1600,10 +1624,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) { if (adev->asic_type >= CHIP_POLARIS10) { - adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs; + adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs; DRM_INFO("UVD is enabled in VM mode\n"); } else { - adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs; + adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs; DRM_INFO("UVD is enabled in physical mode\n"); } } @@ -1613,7 +1637,7 @@ static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->uvd.num_enc_rings; ++i) - adev->uvd.ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; + adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs; DRM_INFO("UVD ENC is enabled in VM mode\n"); } @@ -1626,11 +1650,11 @@ static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = { static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev) { if (uvd_v6_0_enc_support(adev)) - adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; + adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1; else - adev->uvd.irq.num_types = 1; + adev->uvd.inst->irq.num_types = 1; - adev->uvd.irq.funcs = &uvd_v6_0_irq_funcs; + adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs; } const struct amdgpu_ip_block_version uvd_v6_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eddc57f3b72a..57d32f21b3a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -40,6 +40,8 @@ #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" +#define UVD7_MAX_HW_INSTANCES_VEGA20 2 + static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); @@ -47,6 +49,11 @@ static int uvd_v7_0_start(struct amdgpu_device *adev); static void uvd_v7_0_stop(struct amdgpu_device *adev); static int uvd_v7_0_sriov_start(struct amdgpu_device *adev); +static int amdgpu_ih_clientid_uvds[] = { + SOC15_IH_CLIENTID_UVD, + SOC15_IH_CLIENTID_UVD1 +}; + /** * uvd_v7_0_ring_get_rptr - get read pointer * @@ -58,7 +65,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); } /** @@ -72,10 +79,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.ring_enc[0]) - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); else - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); } /** @@ -89,7 +96,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); } /** @@ -106,10 +113,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; - if (ring == &adev->uvd.ring_enc[0]) - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); else - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); } /** @@ -123,7 +130,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } /** @@ -144,11 +151,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) return; } - if (ring == &adev->uvd.ring_enc[0]) - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } @@ -170,8 +177,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 16); if (r) { - DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", - ring->idx, r); + DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n", + ring->me, ring->idx, r); return r; } amdgpu_ring_write(ring, HEVC_ENC_CMD_END); @@ -184,11 +191,11 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); + DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", + ring->me, ring->idx, i); } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + DRM_ERROR("amdgpu: (%d)ring %d test failed\n", + ring->me, ring->idx); r = -ETIMEDOUT; } @@ -342,24 +349,24 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r); goto error; } r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence); if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r); goto error; } r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me); r = -ETIMEDOUT; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r); } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx); r = 0; } error: @@ -370,6 +377,10 @@ error: static int uvd_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_VEGA20) + adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20; + else + adev->uvd.num_uvd_inst = 1; if (amdgpu_sriov_vf(adev)) adev->uvd.num_enc_rings = 1; @@ -386,19 +397,21 @@ static int uvd_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; struct drm_sched_rq *rq; - int i, r; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq); - if (r) - return r; - - /* UVD ENC TRAP */ - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq); + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + /* UVD TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], 124, &adev->uvd.inst[j].irq); if (r) return r; + + /* UVD ENC TRAP */ + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + 119, &adev->uvd.inst[j].irq); + if (r) + return r; + } } r = amdgpu_uvd_sw_init(adev); @@ -415,43 +428,48 @@ static int uvd_v7_0_sw_init(void *handle) DRM_INFO("PSP loading UVD firmware\n"); } - ring = &adev->uvd.ring_enc[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD ENC run queue.\n"); - return r; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + ring = &adev->uvd.inst[j].ring_enc[0]; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc, + rq, NULL); + if (r) { + DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j); + return r; + } } r = amdgpu_uvd_resume(adev); if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - ring = &adev->uvd.ring; - sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); - if (r) - return r; - } - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; - sprintf(ring->name, "uvd_enc%d", i); - if (amdgpu_sriov_vf(adev)) { - ring->use_doorbell = true; - - /* currently only use the first enconding ring for - * sriov, so set unused location for other unused rings. - */ - if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; - else - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + if (!amdgpu_sriov_vf(adev)) { + ring = &adev->uvd.inst[j].ring; + sprintf(ring->name, "uvd<%d>", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + if (r) + return r; + } + + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.inst[j].ring_enc[i]; + sprintf(ring->name, "uvd_enc%d<%d>", i, j); + if (amdgpu_sriov_vf(adev)) { + ring->use_doorbell = true; + + /* currently only use the first enconding ring for + * sriov, so set unused location for other unused rings. + */ + if (i == 0) + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + else + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + } + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + if (r) + return r; } - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); - if (r) - return r; } r = amdgpu_virt_alloc_mm_table(adev); @@ -463,7 +481,7 @@ static int uvd_v7_0_sw_init(void *handle) static int uvd_v7_0_sw_fini(void *handle) { - int i, r; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_virt_free_mm_table(adev); @@ -472,11 +490,12 @@ static int uvd_v7_0_sw_fini(void *handle) if (r) return r; - drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); - - for (i = 0; i < adev->uvd.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->uvd.ring_enc[i]); + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc); + for (i = 0; i < adev->uvd.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); + } return amdgpu_uvd_sw_fini(adev); } @@ -490,9 +509,9 @@ static int uvd_v7_0_sw_fini(void *handle) static int uvd_v7_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring; uint32_t tmp; - int i, r; + int i, j, r; if (amdgpu_sriov_vf(adev)) r = uvd_v7_0_sriov_start(adev); @@ -501,57 +520,60 @@ static int uvd_v7_0_hw_init(void *handle) if (r) goto done; - if (!amdgpu_sriov_vf(adev)) { - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + ring = &adev->uvd.inst[j].ring; + + if (!amdgpu_sriov_vf(adev)) { + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r); + goto done; + } + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_TIMEOUT_STATUS), 0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_CNTL), 0)); + amdgpu_ring_write(ring, 3); + + amdgpu_ring_commit(ring); } - r = amdgpu_ring_alloc(ring, 10); - if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); - goto done; + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.inst[j].ring_enc[i]; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } } - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - /* Clear timeout status bits */ - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_TIMEOUT_STATUS), 0)); - amdgpu_ring_write(ring, 0x8); - - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_CNTL), 0)); - amdgpu_ring_write(ring, 3); - - amdgpu_ring_commit(ring); } - - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - } - done: if (!r) DRM_INFO("UVD and UVD ENC initialized successfully.\n"); @@ -569,7 +591,7 @@ done: static int uvd_v7_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.ring; + int i; if (!amdgpu_sriov_vf(adev)) uvd_v7_0_stop(adev); @@ -578,7 +600,8 @@ static int uvd_v7_0_hw_fini(void *handle) DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); } - ring->ready = false; + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) + adev->uvd.inst[i].ring.ready = false; return 0; } @@ -618,48 +641,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) { uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); uint32_t offset; + int i; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - offset = 0; - } else { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr)); - offset = size; - } + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr)); + offset = size; + } - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, - AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - - WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); + + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); + + WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); + } } static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, @@ -669,6 +695,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, uint64_t addr = table->gpu_addr; struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; uint32_t size; + int i; size = header->header_size + header->vce_table_size + header->uvd_table_size; @@ -688,11 +715,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, /* 4, set resp to zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); - WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); - adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0; - adev->uvd.ring_enc[0].wptr = 0; - adev->uvd.ring_enc[0].wptr_old = 0; - + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0); + adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0; + adev->uvd.inst[i].ring_enc[0].wptr = 0; + adev->uvd.inst[i].ring_enc[0].wptr_old = 0; + } /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); @@ -725,6 +753,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) struct mmsch_v1_0_cmd_end end = { {0} }; uint32_t *init_table = adev->virt.mm_table.cpu_addr; struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; + uint8_t i = 0; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; @@ -742,120 +771,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) init_table += header->uvd_table_offset; - ring = &adev->uvd.ring; - ring->wptr = 0; - size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); - - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), - 0xFFFFFFFF, 0x00000004); - /* mc resume*/ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - offset = 0; - } else { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->uvd.gpu_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->uvd.gpu_addr)); - offset = size; + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + ring = &adev->uvd.inst[i].ring; + ring->wptr = 0; + size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); + + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + 0xFFFFFFFF, 0x00000004); + /* mc resume*/ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + offset = 0; + } else { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.inst[i].gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.inst[i].gpu_addr)); + offset = size; + } + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); + + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); + /* mc resume end*/ + + /* disable clock gating */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL), + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); + + /* disable interupt */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); + + /* stall UMC and register bus before resetting VCPU */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* put LMI, VCPU, RBC etc... into reset */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), + (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); + + /* initialize UVD memory controller */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL), + (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L)); + + /* take all subblocks out of reset, except VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + /* enable VCPU clock */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK); + + /* enable master interrupt */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + + /* clear the bit 4 of UVD_STATUS */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); + + /* force RBC into idle state */ + size = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp); + + ring = &adev->uvd.inst[i].ring_enc[0]; + ring->wptr = 0; + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4); + + /* boot up the VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0); + + /* enable UMC */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); + + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02); } - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - lower_32_bits(adev->uvd.gpu_addr + offset)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - upper_32_bits(adev->uvd.gpu_addr + offset)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), - AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); - /* mc resume end*/ - - /* disable clock gating */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), - ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0); - - /* disable interupt */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - ~UVD_MASTINT_EN__VCPU_EN_MASK, 0); - - /* stall UMC and register bus before resetting VCPU */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - - /* put LMI, VCPU, RBC etc... into reset */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), - (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | - UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | - UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | - UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | - UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | - UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | - UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK)); - - /* initialize UVD memory controller */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), - (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__REQ_MODE_MASK | - 0x00100000L)); - - /* take all subblocks out of reset, except VCPU */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - - /* enable VCPU clock */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK); - - /* enable master interrupt */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), - (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); - - /* clear the bit 4 of UVD_STATUS */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0); - - /* force RBC into idle state */ - size = order_base_2(ring->ring_size); - tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); - - ring = &adev->uvd.ring_enc[0]; - ring->wptr = 0; - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); - - /* boot up the VCPU */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); - - /* enable UMC */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); - - MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); - /* add end packet */ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; @@ -874,15 +904,17 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) */ static int uvd_v7_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->uvd.ring; + struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; uint32_t mp_swap_cntl; - int i, j, r; + int i, j, k, r; - /* disable DPG */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, - ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { + /* disable DPG */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + } /* disable byte swapping */ lmi_swap_cntl = 0; @@ -890,157 +922,159 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) uvd_v7_0_mc_resume(adev); - /* disable clock gating */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0, - ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); - - /* disable interupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* stall UMC and register bus before resetting VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - mdelay(1); - - /* put LMI, VCPU, RBC etc... into reset */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | - UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | - UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | - UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | - UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | - UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | - UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); - mdelay(5); + for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { + ring = &adev->uvd.inst[k].ring; + /* disable clock gating */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0, + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); - /* initialize UVD memory controller */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, - (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__REQ_MODE_MASK | - 0x00100000L); + /* disable interupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + mdelay(5); + + /* initialize UVD memory controller */ + WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL, + (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L); #ifdef __BIG_ENDIAN - /* swap (8 in 32) RB and IB */ - lmi_swap_cntl = 0xa; - mp_swap_cntl = 0; + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; + mp_swap_cntl = 0; #endif - WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); - WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); - - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); - - /* take all subblocks out of reset, except VCPU */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(5); + WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); - /* enable VCPU clock */ - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, - UVD_VCPU_CNTL__CLK_EN_MASK); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0); + WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88); - /* enable UMC */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + /* take all subblocks out of reset, except VCPU */ + WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); - /* boot up the VCPU */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); - mdelay(10); + /* enable VCPU clock */ + WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL, + UVD_VCPU_CNTL__CLK_EN_MASK); - for (i = 0; i < 10; ++i) { - uint32_t status; + /* enable UMC */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - for (j = 0; j < 100; ++j) { - status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); + /* boot up the VCPU */ + WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0); + mdelay(10); + + for (i = 0; i < 10; ++i) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, k, mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; if (status & 2) break; + + DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k); + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; } - r = 0; - if (status & 2) - break; - - DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, - ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, - ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(10); - r = -1; - } - - if (r) { - DRM_ERROR("UVD not responding, giving up!!!\n"); - return r; - } - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), - ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); - - /* clear the bit 4 of UVD_STATUS */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - - /* force RBC into idle state */ - rb_bufsz = order_base_2(ring->ring_size); - tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); - - /* set the write pointer delay */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); - - /* set the wb address */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, - (upper_32_bits(ring->gpu_addr) >> 2)); - - /* programm the RB_BASE for ring buffer */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - - /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); - - ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, - lower_32_bits(ring->wptr)); - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, - ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - - ring = &adev->uvd.ring_enc[0]; - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + if (r) { + DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k); + return r; + } + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); - ring = &adev->uvd.ring_enc[1]; - WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + /* clear the bit 4 of UVD_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0, + ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); + + ring = &adev->uvd.inst[k].ring_enc[0]; + WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->uvd.inst[k].ring_enc[1]; + WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4); + } return 0; } @@ -1053,26 +1087,30 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) */ static void uvd_v7_0_stop(struct amdgpu_device *adev) { - /* force RBC into idle state */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); - - /* Stall UMC and register bus before resetting VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - mdelay(1); - - /* put VCPU into reset */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(5); + uint8_t i = 0; - /* disable VCPU clock */ - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + /* force RBC into idle state */ + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101); - /* Unstall UMC and register bus */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put VCPU into reset */ + WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* disable VCPU clock */ + WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + } } /** @@ -1091,26 +1129,26 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); amdgpu_ring_write(ring, seq); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, addr & 0xffffffff); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 2); } @@ -1136,6 +1174,16 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } /** + * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing + * + * @ring: amdgpu_ring pointer + */ +static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + /* The firmware doesn't seem to like touching registers at this point. */ +} + +/** * uvd_v7_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -1149,30 +1197,30 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); + WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n", + ring->me, ring->idx, r); return r; } amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); + tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); + DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", + ring->me, ring->idx, i); } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); + DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n", + ring->me, ring->idx, tmp); r = -EINVAL; } return r; @@ -1193,17 +1241,17 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0)); amdgpu_ring_write(ring, ib->length_dw); } @@ -1231,13 +1279,13 @@ static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 8); } @@ -1247,16 +1295,16 @@ static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, struct amdgpu_device *adev = ring->adev; amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0)); amdgpu_ring_write(ring, val); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0)); amdgpu_ring_write(ring, mask); amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, 12); } @@ -1277,12 +1325,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - int i; struct amdgpu_device *adev = ring->adev; + int i; - for (i = 0; i < count; i++) - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + WARN_ON(ring->wptr % 2 || count % 2); + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0)); + amdgpu_ring_write(ring, 0); + } } static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) @@ -1349,16 +1400,16 @@ static bool uvd_v7_0_check_soft_reset(void *handle) if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || - (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & + (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) & AMDGPU_UVD_STATUS_BUSY_MASK)) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); if (srbm_soft_reset) { - adev->uvd.srbm_soft_reset = srbm_soft_reset; + adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset; return true; } else { - adev->uvd.srbm_soft_reset = 0; + adev->uvd.inst[ring->me].srbm_soft_reset = 0; return false; } } @@ -1367,7 +1418,7 @@ static int uvd_v7_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; uvd_v7_0_stop(adev); @@ -1379,9 +1430,9 @@ static int uvd_v7_0_soft_reset(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 srbm_soft_reset; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; - srbm_soft_reset = adev->uvd.srbm_soft_reset; + srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset; if (srbm_soft_reset) { u32 tmp; @@ -1409,7 +1460,7 @@ static int uvd_v7_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (!adev->uvd.srbm_soft_reset) + if (!adev->uvd.inst[ring->me].srbm_soft_reset) return 0; mdelay(5); @@ -1431,17 +1482,32 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_UVD: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_UVD1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + DRM_DEBUG("IH: UVD TRAP\n"); + switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->uvd.ring); + amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring); break; case 119: - amdgpu_fence_process(&adev->uvd.ring_enc[0]); + amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]); break; case 120: if (!amdgpu_sriov_vf(adev)) - amdgpu_fence_process(&adev->uvd.ring_enc[1]); + amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -1457,9 +1523,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, data2, suvd_flags; - data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); - data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); - data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); + data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL); + data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); + data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL); data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); @@ -1503,18 +1569,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); data1 |= suvd_flags; - WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); - WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); - WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); - WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); + WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data); + WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0); + WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); + WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2); } static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, cgc_flags, suvd_flags; - data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); - data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); + data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE); + data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE); cgc_flags = UVD_CGC_GATE__SYS_MASK | UVD_CGC_GATE__UDEC_MASK | @@ -1546,8 +1612,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) data |= cgc_flags; data1 |= suvd_flags; - WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); - WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); + WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data); + WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1); } static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) @@ -1606,7 +1672,7 @@ static int uvd_v7_0_set_powergating_state(void *handle, if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) return 0; - WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); + WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); if (state == AMD_PG_STATE_GATE) { uvd_v7_0_stop(adev); @@ -1647,14 +1713,13 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, .emit_frame_size = - 6 + 6 + /* hdp flush / invalidate */ + 6 + /* hdp invalidate */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 8 + /* uvd_v7_0_ring_emit_vm_flush */ @@ -1663,6 +1728,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .emit_ib = uvd_v7_0_ring_emit_ib, .emit_fence = uvd_v7_0_ring_emit_fence, .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, + .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush, .test_ring = uvd_v7_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, .insert_nop = uvd_v7_0_ring_insert_nop, @@ -1671,6 +1737,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .end_use = amdgpu_uvd_ring_end_use, .emit_wreg = uvd_v7_0_ring_emit_wreg, .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { @@ -1702,22 +1769,32 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { .end_use = amdgpu_uvd_ring_end_use, .emit_wreg = uvd_v7_0_enc_ring_emit_wreg, .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) { - adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs; - DRM_INFO("UVD is enabled in VM mode\n"); + int i; + + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs; + adev->uvd.inst[i].ring.me = i; + DRM_INFO("UVD(%d) is enabled in VM mode\n", i); + } } static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) { - int i; + int i, j; - for (i = 0; i < adev->uvd.num_enc_rings; ++i) - adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; + adev->uvd.inst[j].ring_enc[i].me = j; + } - DRM_INFO("UVD ENC is enabled in VM mode\n"); + DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j); + } } static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { @@ -1727,8 +1804,12 @@ static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; - adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs; + int i; + + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1; + adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs; + } } const struct amdgpu_ip_block_version uvd_v7_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 428d1928e44e..0999c843f623 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -388,7 +388,8 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) default: if ((adev->asic_type == CHIP_POLARIS10) || (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS12)) + (adev->asic_type == CHIP_POLARIS12) || + (adev->asic_type == CHIP_VEGAM)) return AMDGPU_VCE_HARVEST_VCE1; return 0; @@ -467,8 +468,8 @@ static int vce_v3_0_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; vce_v3_0_override_vce_clock_gating(adev, true); - if (!(adev->flags & AMD_IS_APU)) - amdgpu_asic_set_vce_clocks(adev, 10000, 10000); + + amdgpu_asic_set_vce_clocks(adev, 10000, 10000); for (i = 0; i < adev->vce.num_rings; i++) adev->vce.ring[i].ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 73fd48d6c756..8fd1b742985a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -1081,6 +1081,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .end_use = amdgpu_vce_ring_end_use, .emit_wreg = vce_v4_0_emit_wreg, .emit_reg_wait = vce_v4_0_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 8c132673bc79..29684c3ea4ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -35,7 +35,6 @@ #include "mmhub/mmhub_9_1_offset.h" #include "mmhub/mmhub_9_1_sh_mask.h" -static int vcn_v1_0_start(struct amdgpu_device *adev); static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -146,10 +145,6 @@ static int vcn_v1_0_hw_init(void *handle) struct amdgpu_ring *ring = &adev->vcn.ring_dec; int i, r; - r = vcn_v1_0_start(adev); - if (r) - goto done; - ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -185,11 +180,9 @@ static int vcn_v1_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->vcn.ring_dec; - int r; - r = vcn_v1_0_stop(adev); - if (r) - return r; + if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) + vcn_v1_0_stop(adev); ring->ready = false; @@ -288,14 +281,14 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) * * Disable clock gating for VCN block */ -static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) +static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) { uint32_t data; /* JPEG disable CGC */ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; @@ -310,7 +303,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) /* UVD disable CGC */ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; @@ -415,13 +408,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) * * Enable clock gating for VCN block */ -static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) +static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev) { uint32_t data = 0; /* enable JPEG CGC */ data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; @@ -435,7 +428,7 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) /* enable UVD CGC */ data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); - if (sw) + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; @@ -480,6 +473,94 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); } +static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret); + } else { + data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0, 0xFFFFFFFF, ret); + } + + /* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */ + + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); +} + +static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int ret; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); + + + data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT + | 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT); + + WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data); + + data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT + | 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret); + } +} + /** * vcn_v1_0_start - start VCN block * @@ -499,8 +580,9 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) vcn_v1_0_mc_resume(adev); + vcn_1_0_disable_static_power_gating(adev); /* disable clock gating */ - vcn_v1_0_disable_clock_gating(adev, true); + vcn_v1_0_disable_clock_gating(adev); /* disable interupt */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, @@ -680,16 +762,45 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - /* enable clock gating */ - vcn_v1_0_enable_clock_gating(adev, true); + WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); + vcn_v1_0_enable_clock_gating(adev); + vcn_1_0_enable_static_power_gating(adev); return 0; } +static bool vcn_v1_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2); +} + +static int vcn_v1_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; + + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret); + + return ret; +} + static int vcn_v1_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { - /* needed for driver unload*/ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + /* wait for STATUS to clear */ + if (vcn_v1_0_is_idle(handle)) + return -EBUSY; + vcn_v1_0_enable_clock_gating(adev); + } else { + /* disable HW gating and enable Sw gating */ + vcn_v1_0_disable_clock_gating(adev); + } return 0; } @@ -1048,16 +1159,36 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - int i; struct amdgpu_device *adev = ring->adev; + int i; - for (i = 0; i < count; i++) - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + WARN_ON(ring->wptr % 2 || count % 2); + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + amdgpu_ring_write(ring, 0); + } } +static int vcn_v1_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* This doesn't actually powergate the VCN block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (state == AMD_PG_STATE_GATE) + return vcn_v1_0_stop(adev); + else + return vcn_v1_0_start(adev); +} static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", @@ -1069,20 +1200,19 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .hw_fini = vcn_v1_0_hw_fini, .suspend = vcn_v1_0_suspend, .resume = vcn_v1_0_resume, - .is_idle = NULL /* vcn_v1_0_is_idle */, - .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */, + .is_idle = vcn_v1_0_is_idle, + .wait_for_idle = vcn_v1_0_wait_for_idle, .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, .soft_reset = NULL /* vcn_v1_0_soft_reset */, .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, - .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */, + .set_powergating_state = vcn_v1_0_set_powergating_state, }; static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, - .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_dec_ring_get_rptr, @@ -1101,7 +1231,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, .test_ring = amdgpu_vcn_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, - .insert_nop = vcn_v1_0_ring_insert_nop, + .insert_nop = vcn_v1_0_dec_ring_insert_nop, .insert_start = vcn_v1_0_dec_ring_insert_start, .insert_end = vcn_v1_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, @@ -1109,6 +1239,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { @@ -1139,6 +1270,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c new file mode 100644 index 000000000000..52778de93ab0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "vega20_ip_offset.h" + +int vega20_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke beend by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + } + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 126f1276d347..4ac1288ab7df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -305,9 +305,10 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) stoney_mgcg_cgcg_init, ARRAY_SIZE(stoney_mgcg_cgcg_init)); break; - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: default: break; } @@ -728,33 +729,59 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, return r; tmp = RREG32_SMC(cntl_reg); - tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | - CG_DCLK_CNTL__DCLK_DIVIDER_MASK); + + if (adev->flags & AMD_IS_APU) + tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK; + else + tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK | + CG_DCLK_CNTL__DCLK_DIVIDER_MASK); tmp |= dividers.post_divider; WREG32_SMC(cntl_reg, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK) - break; + tmp = RREG32_SMC(status_reg); + if (adev->flags & AMD_IS_APU) { + if (tmp & 0x10000) + break; + } else { + if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK) + break; + } mdelay(10); } if (i == 100) return -ETIMEDOUT; - return 0; } +#define ixGNB_CLK1_DFS_CNTL 0xD82200F0 +#define ixGNB_CLK1_STATUS 0xD822010C +#define ixGNB_CLK2_DFS_CNTL 0xD8220110 +#define ixGNB_CLK2_STATUS 0xD822012C +#define ixGNB_CLK3_DFS_CNTL 0xD8220130 +#define ixGNB_CLK3_STATUS 0xD822014C + static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) { int r; - r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); - if (r) - return r; + if (adev->flags & AMD_IS_APU) { + r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS); + if (r) + return r; - r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); - if (r) - return r; + r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS); + if (r) + return r; + } else { + r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); + if (r) + return r; + + r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); + if (r) + return r; + } return 0; } @@ -764,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) int r, i; struct atom_clock_dividers dividers; u32 tmp; + u32 reg_ctrl; + u32 reg_status; + u32 status_mask; + u32 reg_mask; + + if (adev->flags & AMD_IS_APU) { + reg_ctrl = ixGNB_CLK3_DFS_CNTL; + reg_status = ixGNB_CLK3_STATUS; + status_mask = 0x00010000; + reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } else { + reg_ctrl = ixCG_ECLK_CNTL; + reg_status = ixCG_ECLK_STATUS; + status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK; + reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK; + } r = amdgpu_atombios_get_clock_dividers(adev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, @@ -772,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) return r; for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; - tmp = RREG32_SMC(ixCG_ECLK_CNTL); - tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | - CG_ECLK_CNTL__ECLK_DIVIDER_MASK); + tmp = RREG32_SMC(reg_ctrl); + tmp &= ~reg_mask; tmp |= dividers.post_divider; - WREG32_SMC(ixCG_ECLK_CNTL, tmp); + WREG32_SMC(reg_ctrl, tmp); for (i = 0; i < 100; i++) { - if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK) + if (RREG32_SMC(reg_status) & status_mask) break; mdelay(10); } + if (i == 100) return -ETIMEDOUT; @@ -876,6 +920,27 @@ static void vi_invalidate_hdp(struct amdgpu_device *adev, } } +static bool vi_need_full_reset(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + /* CZ has hang issues with full reset at the moment */ + return false; + case CHIP_FIJI: + case CHIP_TONGA: + /* XXX: soft reset should work on fiji and tonga */ + return true; + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_TOPAZ: + default: + /* change this when we support soft reset */ + return true; + } +} + static const struct amdgpu_asic_funcs vi_asic_funcs = { .read_disabled_bios = &vi_read_disabled_bios, @@ -889,6 +954,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_config_memsize = &vi_get_config_memsize, .flush_hdp = &vi_flush_hdp, .invalidate_hdp = &vi_invalidate_hdp, + .need_full_reset = &vi_need_full_reset, }; #define CZ_REV_BRISTOL(rev) \ @@ -1031,6 +1097,30 @@ static int vi_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x64; break; + case CHIP_VEGAM: + adev->cg_flags = 0; + /*AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_UVD_MGCG | + AMD_CG_SUPPORT_VCE_MGCG;*/ + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x6E; + break; case CHIP_CARRIZO: adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | AMD_CG_SUPPORT_GFX_MGCG | @@ -1422,6 +1512,7 @@ static int vi_common_set_clockgating_state(void *handle, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: vi_common_set_clockgating_state_by_smu(adev, state); default: break; @@ -1551,9 +1642,10 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); } break; - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 0d0242240c47..ffd096fffc1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ + kfd_mqd_manager_v9.o \ kfd_kernel_queue.o kfd_kernel_queue_cik.o \ - kfd_kernel_queue_vi.o kfd_packet_manager.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o \ - kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ + kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \ + kfd_packet_manager.o kfd_process_queue_manager.o \ + kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \ + kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ - kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o + kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) amdkfd-y += kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 3d5ccb3755d4..49df6c791cfc 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -27,18 +27,28 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { - unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + unsigned int vmid, pasid; + + /* Only handle interrupts from KFD VMIDs */ + vmid = (ihre->ring_id & 0x0000ff00) >> 8; + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + /* If there is no valid PASID, it's likely a firmware bug */ pasid = (ihre->ring_id & 0xffff0000) >> 16; + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; - /* Do not process in ISR, just request it to be forwarded to WQ. */ - return (pasid != 0) && - (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || - ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE; } static void cik_event_interrupt_wq(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 48769d12dd7b..37ce6dd65391 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -33,7 +33,8 @@ #define APE1_MTYPE(x) ((x) << 7) /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ -#define MTYPE_CACHED 0 +#define MTYPE_CACHED_NV 0 +#define MTYPE_CACHED 1 #define MTYPE_NONCACHED 3 #define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h new file mode 100644 index 000000000000..f68aef02fc1f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -0,0 +1,560 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +static const uint32_t cwsr_trap_gfx8_hex[] = { + 0xbf820001, 0xbf820125, + 0xb8f4f802, 0x89748674, + 0xb8f5f803, 0x8675ff75, + 0x00000400, 0xbf850011, + 0xc00a1e37, 0x00000000, + 0xbf8c007f, 0x87777978, + 0xbf840002, 0xb974f802, + 0xbe801d78, 0xb8f5f803, + 0x8675ff75, 0x000001ff, + 0xbf850002, 0x80708470, + 0x82718071, 0x8671ff71, + 0x0000ffff, 0xb974f802, + 0xbe801f70, 0xb8f5f803, + 0x8675ff75, 0x00000100, + 0xbf840006, 0xbefa0080, + 0xb97a0203, 0x8671ff71, + 0x0000ffff, 0x80f08870, + 0x82f18071, 0xbefa0080, + 0xb97a0283, 0xbef60068, + 0xbef70069, 0xb8fa1c07, + 0x8e7a9c7a, 0x87717a71, + 0xb8fa03c7, 0x8e7a9b7a, + 0x87717a71, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbef2007e, + 0xbef3007f, 0xbefe0180, + 0xbf900004, 0x877a8474, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x877b7a7b, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x877b7a7b, 0xbeef007c, + 0xbeee0080, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cbc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611d3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xb8f5f803, + 0xbefe007c, 0xbefc006e, + 0xc0611d7c, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dbc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dfc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xb8eff801, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbef30080, + 0x8773737a, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8f51605, 0x80758175, + 0x8e758475, 0x8e7a8275, + 0xbefa00ff, 0x01000000, + 0xbef60178, 0x80786e78, + 0x82798079, 0xbefc0080, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003c, 0x00000000, + 0xc06b013c, 0x00000010, + 0xc06b023c, 0x00000020, + 0xc06b033c, 0x00000030, + 0x8078c078, 0x82798079, + 0x807c907c, 0xbf0a757c, + 0xbf85ffeb, 0xbef80176, + 0xbeee0080, 0xbefe00c1, + 0xbeff00c1, 0xbefa00ff, + 0x01000000, 0xe0724000, + 0x6e1e0000, 0xe0724100, + 0x6e1e0100, 0xe0724200, + 0x6e1e0200, 0xe0724300, + 0x6e1e0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f54306, + 0x8675c175, 0xbf84002c, + 0xbf8a0000, 0x867aff73, + 0x04000000, 0xbf840028, + 0x8e758675, 0x8e758275, + 0xbefa0075, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0x806eff6e, 0x00000080, + 0xbefa00ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe80007b, + 0x867bff7b, 0xff7fffff, + 0x877bff7b, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8c007f, 0xe0765000, + 0x6e1e0002, 0x32040702, + 0xd0c9006a, 0x0000eb02, + 0xbf87fff7, 0xbefb0000, + 0xbeee00ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8f52a05, 0x80758175, + 0x8e758275, 0x8e7a8875, + 0xbefa00ff, 0x01000000, + 0xbefc0084, 0xbf0a757c, + 0xbf840015, 0xbf11017c, + 0x8075ff75, 0x00001000, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x6e1e0000, + 0xe0724100, 0x6e1e0100, + 0xe0724200, 0x6e1e0200, + 0xe0724300, 0x6e1e0300, + 0x807c847c, 0x806eff6e, + 0x00000400, 0xbf0a757c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200ca, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x8676ff7f, + 0x08000000, 0x8f768376, + 0x877b767b, 0x8676ff7f, + 0x70000000, 0x8f768176, + 0x877b767b, 0x8676ff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8f34306, 0x8673c173, + 0xbf840019, 0x8e738673, + 0x8e738273, 0xbefa0073, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x8072ff72, + 0x00000080, 0xbefa00ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x721e0000, + 0xe0510100, 0x721e0000, + 0x807cff7c, 0x00000200, + 0x8072ff72, 0x00000200, + 0xbf0a737c, 0xbf85fff6, + 0xbef20080, 0xbefe00c1, + 0xbeff00c1, 0xb8f32a05, + 0x80738173, 0x8e738273, + 0x8e7a8873, 0xbefa00ff, + 0x01000000, 0xbef60072, + 0x8072ff72, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x8073ff73, 0x00008000, + 0xe0524000, 0x721e0000, + 0xe0524100, 0x721e0100, + 0xe0524200, 0x721e0200, + 0xe0524300, 0x721e0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8072ff72, 0x00000400, + 0xbf0a737c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x761e0000, 0xe0524100, + 0x761e0100, 0xe0524200, + 0x761e0200, 0xe0524300, + 0x761e0300, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0x80f2c072, 0xb8f31605, + 0x80738173, 0x8e738473, + 0x8e7a8273, 0xbefa00ff, + 0x01000000, 0xbefc0073, + 0xc031003c, 0x00000072, + 0x80f2c072, 0xbf8c007f, + 0x80fc907c, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff1, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xc0211cfc, + 0x00000072, 0x80728472, + 0xc0211c3c, 0x00000072, + 0x80728472, 0xc0211c7c, + 0x00000072, 0x80728472, + 0xc0211bbc, 0x00000072, + 0x80728472, 0xc0211bfc, + 0x00000072, 0x80728472, + 0xc0211d3c, 0x00000072, + 0x80728472, 0xc0211d7c, + 0x00000072, 0x80728472, + 0xc0211a3c, 0x00000072, + 0x80728472, 0xc0211a7c, + 0x00000072, 0x80728472, + 0xc0211dfc, 0x00000072, + 0x80728472, 0xc0211b3c, + 0x00000072, 0x80728472, + 0xc0211b7c, 0x00000072, + 0x80728472, 0xbf8c007f, + 0xbefc0073, 0xbefe006e, + 0xbeff006f, 0x867375ff, + 0x000003ff, 0xb9734803, + 0x867375ff, 0xfffff800, + 0x8f738b73, 0xb973a2c3, + 0xb977f801, 0x8673ff71, + 0xf0000000, 0x8f739c73, + 0x8e739073, 0xbef60080, + 0x87767376, 0x8673ff71, + 0x08000000, 0x8f739b73, + 0x8e738f73, 0x87767376, + 0x8673ff74, 0x00800000, + 0x8f739773, 0xb976f807, + 0x8671ff71, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb974f802, 0xbf8a0000, + 0x95807370, 0xbf810000, +}; + + +static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbf820001, 0xbf82015a, + 0xb8f8f802, 0x89788678, + 0xb8f1f803, 0x866eff71, + 0x00000400, 0xbf850034, + 0x866eff71, 0x00000800, + 0xbf850003, 0x866eff71, + 0x00000100, 0xbf840008, + 0x866eff78, 0x00002000, + 0xbf840001, 0xbf810000, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xb8eef807, 0x866fff6e, + 0x001f8000, 0x8e6f8b6f, + 0x8977ff77, 0xfc000000, + 0x87776f77, 0x896eff6e, + 0x001f8000, 0xb96ef807, + 0xb8f0f812, 0xb8f1f813, + 0x8ef08870, 0xc0071bb8, + 0x00000000, 0xbf8cc07f, + 0xc0071c38, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0xb8f1f803, 0x8671ff71, + 0x000001ff, 0xbf850002, + 0x806c846c, 0x826d806d, + 0x866dff6d, 0x0000ffff, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x86fe7e7e, 0x86ea6a6a, + 0xb978f802, 0xbe801f6c, + 0x866dff6d, 0x0000ffff, + 0xbef00080, 0xb9700283, + 0xb8f02407, 0x8e709c70, + 0x876d706d, 0xb8f003c7, + 0x8e709b70, 0x876d706d, + 0xb8f0f807, 0x8670ff70, + 0x00007fff, 0xb970f807, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x87708478, 0xb970f802, + 0xbf8e0002, 0xbf88fffe, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8f11605, + 0x80718171, 0x8e718671, + 0x80707170, 0x80707e70, + 0x8271807f, 0x8671ff71, + 0x0000ffff, 0xc0471cb8, + 0x00000040, 0xbf8cc07f, + 0xc04b1d38, 0x00000048, + 0xbf8cc07f, 0xc0431e78, + 0x00000058, 0xbf8cc07f, + 0xc0471eb8, 0x0000005c, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x8670ff7f, + 0x08000000, 0x8f708370, + 0x87777077, 0x8670ff7f, + 0x70000000, 0x8f708170, + 0x87777077, 0xbefb007c, + 0xbefa0080, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f01605, 0x80708170, + 0x8e708670, 0x807a707a, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc007a, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8f1f803, + 0xbefe007c, 0xbefc007a, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8fbf801, + 0xbefe007c, 0xbefc007a, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0x8670ff7f, + 0x04000000, 0xbeef0080, + 0x876f6f70, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f11605, 0x80718171, + 0x8e718471, 0x8e768271, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747a74, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a717c, 0xbf85ffe7, + 0xbef40172, 0xbefa0080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0xe0724000, 0x7a1d0000, + 0xe0724100, 0x7a1d0100, + 0xe0724200, 0x7a1d0200, + 0xe0724300, 0x7a1d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8f14306, 0x8671c171, + 0xbf84002c, 0xbf8a0000, + 0x8670ff6f, 0x04000000, + 0xbf840028, 0x8e718671, + 0x8e718271, 0xbef60071, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f01605, + 0x80708170, 0x8e708670, + 0x807a707a, 0x807aff7a, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x7a1d0002, + 0x68040702, 0xd0c9006a, + 0x0000e302, 0xbf87fff7, + 0xbef70000, 0xbefa00ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8f12a05, + 0x80718171, 0x8e718271, + 0x8e768871, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a717c, 0xbf840015, + 0xbf11017c, 0x8071ff71, + 0x00001000, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, + 0x7a1d0000, 0xe0724100, + 0x7a1d0100, 0xe0724200, + 0x7a1d0200, 0xe0724300, + 0x7a1d0300, 0x807c847c, + 0x807aff7a, 0x00000400, + 0xbf0a717c, 0xbf85ffef, + 0xbf9c0000, 0xbf8200d9, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, + 0x00000078, 0x80788478, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, + 0x00000078, 0x80788478, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, + 0x00000078, 0x80788478, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, + 0x00000078, 0x80788478, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe007a, + 0xbeff007b, 0x866f71ff, + 0x000003ff, 0xb96f4803, + 0x866f71ff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc0071cb7, 0x00000040, + 0xc00b1d37, 0x00000048, + 0xc0031e77, 0x00000058, + 0xc0071eb7, 0x0000005c, + 0xbf8cc07f, 0x866fff6d, + 0xf0000000, 0x8f6f9c6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x08000000, 0x8f6f9b6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff70, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb970f802, 0xbf8a0000, + 0x95806f6c, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index 997a383dcb8b..a2a04bb64096 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -20,9 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if 0 -HW (VI) source code for CWSR trap handler -#Version 18 + multiple trap handler +/* To compile this assembly code: + * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex + */ + +/* HW (VI) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ // this performance-optimal version was originally from Seven Xu at SRDC @@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi /**************************************************************************/ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 @@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi //tba_lo and tba_hi need to be saved/restored -var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} var s_save_pc_hi = ttmp1 var s_save_exec_lo = ttmp2 var s_save_exec_hi = ttmp3 @@ -319,6 +323,10 @@ end s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC end + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + L_SLEEP: s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 @@ -1007,8 +1015,6 @@ end s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) @@ -1044,6 +1050,7 @@ end s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu @@ -1127,258 +1134,3 @@ end function get_hwreg_size_bytes return 128 //HWREG size 128 bytes end - - -#endif - -static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf820123, - 0xb8f4f802, 0x89748674, - 0xb8f5f803, 0x8675ff75, - 0x00000400, 0xbf850011, - 0xc00a1e37, 0x00000000, - 0xbf8c007f, 0x87777978, - 0xbf840002, 0xb974f802, - 0xbe801d78, 0xb8f5f803, - 0x8675ff75, 0x000001ff, - 0xbf850002, 0x80708470, - 0x82718071, 0x8671ff71, - 0x0000ffff, 0xb974f802, - 0xbe801f70, 0xb8f5f803, - 0x8675ff75, 0x00000100, - 0xbf840006, 0xbefa0080, - 0xb97a0203, 0x8671ff71, - 0x0000ffff, 0x80f08870, - 0x82f18071, 0xbefa0080, - 0xb97a0283, 0xbef60068, - 0xbef70069, 0xb8fa1c07, - 0x8e7a9c7a, 0x87717a71, - 0xb8fa03c7, 0x8e7a9b7a, - 0x87717a71, 0xb8faf807, - 0x867aff7a, 0x00007fff, - 0xb97af807, 0xbef2007e, - 0xbef3007f, 0xbefe0180, - 0xbf900004, 0xbf8e0002, - 0xbf88fffe, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x877b7a7b, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x877b7a7b, 0xbeef007c, - 0xbeee0080, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cbc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611d3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xb8f5f803, - 0xbefe007c, 0xbefc006e, - 0xc0611d7c, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dbc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dfc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xb8eff801, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbef30080, - 0x8773737a, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8f51605, 0x80758175, - 0x8e758475, 0x8e7a8275, - 0xbefa00ff, 0x01000000, - 0xbef60178, 0x80786e78, - 0x82798079, 0xbefc0080, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003c, 0x00000000, - 0xc06b013c, 0x00000010, - 0xc06b023c, 0x00000020, - 0xc06b033c, 0x00000030, - 0x8078c078, 0x82798079, - 0x807c907c, 0xbf0a757c, - 0xbf85ffeb, 0xbef80176, - 0xbeee0080, 0xbefe00c1, - 0xbeff00c1, 0xbefa00ff, - 0x01000000, 0xe0724000, - 0x6e1e0000, 0xe0724100, - 0x6e1e0100, 0xe0724200, - 0x6e1e0200, 0xe0724300, - 0x6e1e0300, 0xbefe00c1, - 0xbeff00c1, 0xb8f54306, - 0x8675c175, 0xbf84002c, - 0xbf8a0000, 0x867aff73, - 0x04000000, 0xbf840028, - 0x8e758675, 0x8e758275, - 0xbefa0075, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0x806eff6e, 0x00000080, - 0xbefa00ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe80007b, - 0x867bff7b, 0xff7fffff, - 0x877bff7b, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8c007f, 0xe0765000, - 0x6e1e0002, 0x32040702, - 0xd0c9006a, 0x0000eb02, - 0xbf87fff7, 0xbefb0000, - 0xbeee00ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8f52a05, 0x80758175, - 0x8e758275, 0x8e7a8875, - 0xbefa00ff, 0x01000000, - 0xbefc0084, 0xbf0a757c, - 0xbf840015, 0xbf11017c, - 0x8075ff75, 0x00001000, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x6e1e0000, - 0xe0724100, 0x6e1e0100, - 0xe0724200, 0x6e1e0200, - 0xe0724300, 0x6e1e0300, - 0x807c847c, 0x806eff6e, - 0x00000400, 0xbf0a757c, - 0xbf85ffef, 0xbf9c0000, - 0xbf8200ca, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x8676ff7f, - 0x08000000, 0x8f768376, - 0x877b767b, 0x8676ff7f, - 0x70000000, 0x8f768176, - 0x877b767b, 0x8676ff7f, - 0x04000000, 0xbf84001e, - 0xbefe00c1, 0xbeff00c1, - 0xb8f34306, 0x8673c173, - 0xbf840019, 0x8e738673, - 0x8e738273, 0xbefa0073, - 0xb8f22a05, 0x80728172, - 0x8e728a72, 0xb8f61605, - 0x80768176, 0x8e768676, - 0x80727672, 0x8072ff72, - 0x00000080, 0xbefa00ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x721e0000, - 0xe0510100, 0x721e0000, - 0x807cff7c, 0x00000200, - 0x8072ff72, 0x00000200, - 0xbf0a737c, 0xbf85fff6, - 0xbef20080, 0xbefe00c1, - 0xbeff00c1, 0xb8f32a05, - 0x80738173, 0x8e738273, - 0x8e7a8873, 0xbefa00ff, - 0x01000000, 0xbef60072, - 0x8072ff72, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x8073ff73, 0x00008000, - 0xe0524000, 0x721e0000, - 0xe0524100, 0x721e0100, - 0xe0524200, 0x721e0200, - 0xe0524300, 0x721e0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8072ff72, 0x00000400, - 0xbf0a737c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x761e0000, 0xe0524100, - 0x761e0100, 0xe0524200, - 0x761e0200, 0xe0524300, - 0x761e0300, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0x80f2c072, 0xb8f31605, - 0x80738173, 0x8e738473, - 0x8e7a8273, 0xbefa00ff, - 0x01000000, 0xbefc0073, - 0xc031003c, 0x00000072, - 0x80f2c072, 0xbf8c007f, - 0x80fc907c, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff1, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xc0211cfc, - 0x00000072, 0x80728472, - 0xc0211c3c, 0x00000072, - 0x80728472, 0xc0211c7c, - 0x00000072, 0x80728472, - 0xc0211bbc, 0x00000072, - 0x80728472, 0xc0211bfc, - 0x00000072, 0x80728472, - 0xc0211d3c, 0x00000072, - 0x80728472, 0xc0211d7c, - 0x00000072, 0x80728472, - 0xc0211a3c, 0x00000072, - 0x80728472, 0xc0211a7c, - 0x00000072, 0x80728472, - 0xc0211dfc, 0x00000072, - 0x80728472, 0xc0211b3c, - 0x00000072, 0x80728472, - 0xc0211b7c, 0x00000072, - 0x80728472, 0xbf8c007f, - 0x8671ff71, 0x0000ffff, - 0xbefc0073, 0xbefe006e, - 0xbeff006f, 0x867375ff, - 0x000003ff, 0xb9734803, - 0x867375ff, 0xfffff800, - 0x8f738b73, 0xb973a2c3, - 0xb977f801, 0x8673ff71, - 0xf0000000, 0x8f739c73, - 0x8e739073, 0xbef60080, - 0x87767376, 0x8673ff71, - 0x08000000, 0x8f739b73, - 0x8e738f73, 0x87767376, - 0x8673ff74, 0x00800000, - 0x8f739773, 0xb976f807, - 0x86fe7e7e, 0x86ea6a6a, - 0xb974f802, 0xbf8a0000, - 0x95807370, 0xbf810000, -}; - diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm new file mode 100644 index 000000000000..998be96be736 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -0,0 +1,1214 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To compile this assembly code: + * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex + */ + +/* HW (GFX9) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ + +// this performance-optimal version was originally from Seven Xu at SRDC + +// Revison #18 --... +/* Rev History +** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) +** #4. SR Memory Layout: +** 1. VGPR-SGPR-HWREG-{LDS} +** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. +** #5. Update: 1. Accurate g8sr_ts_save_d timestamp +** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) +** #7. Update: 1. don't barrier if noLDS +** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version +** 2. Fix SQ issue by s_sleep 2 +** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last +** 2. optimize s_buffer save by burst 16sgprs... +** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. +** #11. Update 1. Add 2 more timestamp for debug version +** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance +** #13. Integ 1. Always use MUBUF for PV trap shader... +** #14. Update 1. s_buffer_store soft clause... +** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. +** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree +** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] +** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... +** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 +** 2. FUNC - Handle non-CWSR traps +*/ + +var G8SR_WDMEM_HWREG_OFFSET = 0 +var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes + +// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. + +var G8SR_DEBUG_TIMESTAMP = 0 +var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset +var s_g8sr_ts_save_s = s[34:35] // save start +var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi +var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ +var s_g8sr_ts_save_d = s[40:41] // save end +var s_g8sr_ts_restore_s = s[42:43] // restore start +var s_g8sr_ts_restore_d = s[44:45] // restore end + +var G8SR_VGPR_SR_IN_DWX4 = 0 +var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes +var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 + + +/*************************************************************************/ +/* control on how to run the shader */ +/*************************************************************************/ +//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) +var EMU_RUN_HACK = 0 +var EMU_RUN_HACK_RESTORE_NORMAL = 0 +var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 +var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var SAVE_LDS = 1 +var WG_BASE_ADDR_LO = 0x9000a000 +var WG_BASE_ADDR_HI = 0x0 +var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem +var CTX_SAVE_CONTROL = 0x0 +var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) +var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes +var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing +var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency + +/**************************************************************************/ +/* variables */ +/**************************************************************************/ +var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 +var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 +var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +var SQ_WAVE_STATUS_HALT_MASK = 0x2000 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits + +var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 +var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 +var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + +var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + +var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 +var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + +var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data +var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 + +/* Save */ +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE + +var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_SAVE_SPI_INIT_ATC_SHIFT = 27 +var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi + +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_tmp = ttmp4 +var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_xnack_mask_lo = ttmp6 +var s_save_xnack_mask_hi = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_status = ttmp12 +var s_save_mem_offset = ttmp14 +var s_save_alloc_size = s_save_trapsts //conflict +var s_save_m0 = ttmp15 +var s_save_ttmps_lo = s_save_tmp //no conflict +var s_save_ttmps_hi = s_save_trapsts //no conflict + +/* Restore */ +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 +var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT +var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK +var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT +var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi + +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp2 +var s_restore_mem_offset_save = s_restore_tmp //no conflict + +var s_restore_m0 = s_restore_alloc_size //no conflict + +var s_restore_mode = ttmp7 + +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp14 +var s_restore_exec_hi = ttmp15 +var s_restore_status = ttmp4 +var s_restore_trapsts = ttmp5 +var s_restore_xnack_mask_lo = xnack_mask_lo +var s_restore_xnack_mask_hi = xnack_mask_hi +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_ttmps_lo = s_restore_tmp //no conflict +var s_restore_ttmps_hi = s_restore_alloc_size //no conflict + +/**************************************************************************/ +/* trap handler entry points */ +/**************************************************************************/ +/* Shader Main*/ + +shader main + asic(GFX9) + type(CS) + + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore + //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC + s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. + s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE + //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE + s_branch L_SKIP_RESTORE //NOT restore, SAVE actually + else + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + end + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE //restore + +L_SKIP_RESTORE: + + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* +if (!EMU_RUN_HACK) + // Illegal instruction is a non-maskable exception which blocks context save. + // Halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_HALT_WAVE + + // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. + // Instead, halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_FETCH_2ND_TRAP + +L_HALT_WAVE: + // If STATUS.HALT is set then this fault must come from SQC instruction fetch. + // We cannot prevent further faults so just terminate the wavefront. + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_ALREADY_HALTED + s_endpgm +L_NOT_ALREADY_HALTED: + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. The debugger compensates for this. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + +L_FETCH_2ND_TRAP: + // Preserve and clear scalar XNACK state before issuing scalar reads. + // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. + s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK + s_or_b32 ttmp11, ttmp11, ttmp3 + + s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) + s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) + s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_waitcnt lgkmcnt(0) + s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception + s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. + s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 + s_addc_u32 ttmp1, ttmp1, 0 +L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_IB_STS. + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status + + s_rfe_b64 [ttmp0, ttmp1] +end + // ********* End handling of non-CWSR traps ******************* + +/**************************************************************************/ +/* save routine */ +/**************************************************************************/ + +L_SAVE: + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? +end + + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG + + s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_sq_save_msg + s_waitcnt lgkmcnt(0) +end + + if (EMU_RUN_HACK) + + else + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + + if (EMU_RUN_HACK) + + else + s_cbranch_execz L_SLEEP + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_spi_wrexec + s_waitcnt lgkmcnt(0) +end + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 + s_lshr_b32 s_save_tmp, s_save_tmp, 6 + s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + + // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_save_ttmps_lo) + get_sgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo + s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 + s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF + s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 + ack_sqc_store_workaround() + s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 + ack_sqc_store_workaround() + s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 + ack_sqc_store_workaround() + s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 + ack_sqc_store_workaround() + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE + + //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) + s_mov_b32 s_save_m0, m0 //save M0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 + + + + + /* save HW registers */ + ////////////////////////////// + + L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + + + s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 + + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + end + + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC + write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC + write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS + + //s_save_trapsts conflicts with s_save_alloc_size + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + + write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO + write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + + + + /* the first wave in the threadgroup */ + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset + s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states + L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4 + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? + // restore s_save_buf_rsrc0,1 + //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo + s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo + + + + + /* save first 4 VGPR, then LDS save could use */ + // each wave will alloc 4 vgprs at least... + ///////////////////////////////////////////////////////////////////////////////////// + + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + s_mov_b32 xnack_mask_lo, 0x0 + s_mov_b32 xnack_mask_hi, 0x0 + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 +end + + + + /* save LDS */ + ////////////////////////////// + + L_SAVE_LDS: + + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + +var LDS_DMA_ENABLE = 0 +var UNROLL = 0 +if UNROLL==0 && LDS_DMA_ENABLE==1 + s_mov_b32 s3, 256*2 + s_nop 0 + s_nop 0 + s_nop 0 + L_SAVE_LDS_LOOP: + //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? + if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + end + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? + +elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss + // store from higest LDS address to lowest + s_mov_b32 s3, 256*2 + s_sub_u32 m0, s_save_alloc_size, s3 + s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 + s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... + s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest + s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction + s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc + s_nop 0 + s_nop 0 + s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes + s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved + s_add_u32 s0, s0,s_save_alloc_size + s_addc_u32 s1, s1, 0 + s_setpc_b64 s[0:1] + + + for var i =0; i< 128; i++ + // be careful to make here a 64Byte aligned address, which could improve performance... + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + + if i!=127 + s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline + s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 + end + end + +else // BUFFER_STORE + v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 + v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid + v_mul_i32_i24 v2, v3, 8 // tid*8 + v_mov_b32 v3, 256*2 + s_mov_b32 m0, 0x10000 + s_mov_b32 s0, s_save_buf_rsrc3 + s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT + +L_SAVE_LDS_LOOP_VECTOR: + ds_read_b64 v[0:1], v2 //x =LDS[a], byte address + s_waitcnt lgkmcnt(0) + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 +// s_waitcnt vmcnt(0) +// v_add_u32 v2, vcc[0:1], v2, v3 + v_add_u32 v2, v2, v3 + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size + s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR + + // restore rsrc3 + s_mov_b32 s_save_buf_rsrc3, s0 + +end + +L_SAVE_LDS_DONE: + + + /* save VGPRs - set the Rest VGPRs */ + ////////////////////////////////////////////////////////////////////////////////////// + L_SAVE_VGPR: + // VGPR SR memory offset: 0 + // TODO rearrange the RSRC words to use swizzle for VGPR save... + + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, 4 // skip first 4 VGPRs + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs + + s_set_gpr_idx_on m0, 0x1 // This will change M0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 +L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 // v0 = v[0+m0] + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + s_add_u32 m0, m0, 4 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +L_SAVE_VGPR_LOOP_END: + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =0 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + + + s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later + + L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 //v0 = v[0+m0] + v_mov_b32 v1, v1 //v0 = v[0+m0] + v_mov_b32 v2, v2 //v0 = v[0+m0] + v_mov_b32 v3, v3 //v0 = v[0+m0] + + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + end + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +end + +L_SAVE_VGPR_END: + + + + + + + /* S_PGM_END_SAVED */ //FIXME graphics ONLY + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_rfe_b64 s_save_pc_lo //Return to the main shader program + else + end + +// Save Done timestamp +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_d + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // Need reset rsrc2?? + s_mov_b32 m0, s_save_mem_offset + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 +end + + + s_branch L_END_PGM + + + +/**************************************************************************/ +/* restore routine */ +/**************************************************************************/ + +L_RESTORE: + /* Setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_restore_tmp, v9, 0 + s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 + s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE + s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL + else + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... + s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] + s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. +end + + + + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + + /* global mem offset */ +// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 + + /* the first wave in the threadgroup */ + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ + ////////////////////////////// + L_RESTORE_LDS: + + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? + + + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + L_RESTORE_LDS_LOOP: + if (SAVE_LDS) + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW + end + s_add_u32 m0, m0, 256*2 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? + + + /* restore VGPRs */ + ////////////////////////////// + L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + +if G8SR_VGPR_SR_IN_DWX4 + get_vgpr_size_bytes(s_restore_mem_offset) + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, s_restore_alloc_size + s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 + +L_RESTORE_VGPR_LOOP: + buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + s_waitcnt vmcnt(0) + s_sub_u32 m0, m0, 4 + v_mov_b32 v0, v0 // v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_cmp_eq_u32 m0, 0x8000 + s_cbranch_scc0 L_RESTORE_VGPR_LOOP + s_set_gpr_idx_off + + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes + +else + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 1 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later + + L_RESTORE_VGPR_LOOP: + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 + end + s_waitcnt vmcnt(0) //ensure data ready + v_mov_b32 v0, v0 //v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? + s_set_gpr_idx_off + /* VGPR restore on v0 */ + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + end + +end + + /* restore SGPRs */ + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, s_restore_alloc_size + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made + s_waitcnt lgkmcnt(0) //ensure data ready + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? + + /* restore HW registers */ + ////////////////////////////// + L_RESTORE_HWREG: + + +if G8SR_DEBUG_TIMESTAMP + s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo + s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi +end + + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + + + s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS + read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS + read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO + read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode + + // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_restore_ttmps_lo) + get_sgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF + s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 + s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 + s_waitcnt lgkmcnt(0) + + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT + s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + + s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d + s_waitcnt lgkmcnt(0) +end + +// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + + +/**************************************************************************/ +/* the END */ +/**************************************************************************/ +L_END_PGM: + s_endpgm + +end + + +/**************************************************************************/ +/* the helper functions */ +/**************************************************************************/ + +//Only for save hwreg to mem +function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo +end + + +// HWREG are saved before SGPRs, so all HWREG could be use. +function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc +end + + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 +end + + + +function get_lds_size_bytes(s_lds_size_byte) + // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW + s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size + s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW +end + +function get_vgpr_size_bytes(s_vgpr_size_byte) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible +end + +function get_sgpr_size_bytes(s_sgpr_size_byte) + s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1 + s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value) +end + +function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes +end + +function ack_sqc_store_workaround + if ACK_SQC_STORE + s_waitcnt lgkmcnt(0) + end +end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 59808a39ecf4..f64c5551cdba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Queue Size: 0x%llX, %u\n", q_properties->queue_size, args->ring_size); - pr_debug("Queue r/w Pointers: %p, %p\n", + pr_debug("Queue r/w Pointers: %px, %px\n", q_properties->read_ptr, q_properties->write_ptr); @@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ - args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); + args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; + args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); args->doorbell_offset <<= PAGE_SHIFT; + if (KFD_IS_SOC15(dev->device_info->asic_family)) + /* On SOC15 ASICs, doorbell allocation must be + * per-device, and independent from the per-process + * queue_id. Return the doorbell offset within the + * doorbell aperture to user mode. + */ + args->doorbell_offset |= q_properties.doorbell_off; mutex_unlock(&p->mutex); @@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1645,23 +1653,33 @@ err_i1: static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; + struct kfd_dev *dev = NULL; + unsigned long vm_pgoff; + unsigned int gpu_id; process = kfd_get_process(current); if (IS_ERR(process)) return PTR_ERR(process); - if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == - KFD_MMAP_DOORBELL_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; - return kfd_doorbell_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == - KFD_MMAP_EVENTS_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); + gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); + if (gpu_id) + dev = kfd_device_by_id(gpu_id); + + switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { + case KFD_MMAP_TYPE_DOORBELL: + if (!dev) + return -ENODEV; + return kfd_doorbell_mmap(dev, process, vma); + + case KFD_MMAP_TYPE_EVENTS: return kfd_event_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == - KFD_MMAP_RESERVED_MEM_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; - return kfd_reserved_mem_mmap(process, vma); + + case KFD_MMAP_TYPE_RESERVED_MEM: + if (!dev) + return -ENODEV; + return kfd_reserved_mem_mmap(dev, process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 4f126ef6139b..296b3f230280 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define fiji_cache_info carrizo_cache_info #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info +/* TODO - check & update Vega10 cache details */ +#define vega10_cache_info carrizo_cache_info +#define raven_cache_info carrizo_cache_info static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) @@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris11_cache_info; num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; + case CHIP_VEGA10: + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; + case CHIP_RAVEN: + pcache_info = raven_cache_info; + num_of_cache_types = ARRAY_SIZE(raven_cache_info); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3346699960dd..7ee6cec2c060 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,16 +20,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) -#include <linux/amd-iommu.h> -#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" -#include "cwsr_trap_handler_gfx8.asm" +#include "cwsr_trap_handler.h" #include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 @@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = { .max_pasid_bits = 16, /* max num of queues for CZ.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = { .asic_family = CHIP_POLARIS11, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = { .needs_pci_atomics = true, }; +static const struct kfd_device_info vega10_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info vega10_vf_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + struct kfd_deviceid { unsigned short did; @@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67EB, &polaris11_device_info }, /* Polaris11 */ { 0x67EF, &polaris11_device_info }, /* Polaris11 */ { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6860, &vega10_device_info }, /* Vega10 */ + { 0x6861, &vega10_device_info }, /* Vega10 */ + { 0x6862, &vega10_device_info }, /* Vega10 */ + { 0x6863, &vega10_device_info }, /* Vega10 */ + { 0x6864, &vega10_device_info }, /* Vega10 */ + { 0x6867, &vega10_device_info }, /* Vega10 */ + { 0x6868, &vega10_device_info }, /* Vega10 */ + { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ + { 0x687F, &vega10_device_info }, /* Vega10 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) { struct kfd_dev *kfd; - + int ret; const struct kfd_device_info *device_info = lookup_device_info(pdev->device); @@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } - if (device_info->needs_pci_atomics) { - /* Allow BIF to recode atomics to PCIe 3.0 - * AtomicOps. 32 and 64-bit requests are possible and - * must be supported. - */ - if (pci_enable_atomic_ops_to_root(pdev, - PCI_EXP_DEVCAP2_ATOMIC_COMP32 | - PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics", - pdev->vendor, pdev->device); - return NULL; - } + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + ret = pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (device_info->needs_pci_atomics && ret < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics\n", + pdev->vendor, pdev->device); + return NULL; } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); @@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + if (kfd->device_info->asic_family < CHIP_VEGA10) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx9_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); + } - kfd->cwsr_isa = cwsr_trap_gfx8_hex; - kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); kfd->cwsr_enabled = true; } } @@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock(&kfd->interrupt_lock); } +int kgd2kfd_quiesce_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_evict_queues(p); + + kfd_unref_process(p); + return r; +} + +int kgd2kfd_resume_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_restore_queues(p); + + kfd_unref_process(p); + return r; +} + /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will * prepare for safe eviction of KFD BOs that belong to the specified * process. @@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) return -ENOMEM; - *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); if ((*mem_obj) == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d55d29d31da4..668ad07ebe1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) +{ + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) { + /* On pre-SOC15 chips we need to use the queue ID to + * preserve the user mode ABI. + */ + q->doorbell_id = q->properties.queue_id; + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + /* For SDMA queues on SOC15, use static doorbell + * assignments based on the engine and queue. + */ + q->doorbell_id = dev->shared_resources.sdma_doorbell + [q->properties.sdma_engine_id] + [q->properties.sdma_queue_id]; + } else { + /* For CP queues on SOC15 reserve a free doorbell ID */ + unsigned int found; + + found = find_first_zero_bit(qpd->doorbell_bitmap, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { + pr_debug("No doorbells available"); + return -EBUSY; + } + set_bit(found, qpd->doorbell_bitmap); + q->doorbell_id = found; + } + + q->properties.doorbell_off = + kfd_doorbell_id_to_offset(dev, q->process, + q->doorbell_id); + + return 0; +} + +static void deallocate_doorbell(struct qcm_process_device *qpd, + struct queue *q) +{ + unsigned int old; + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family) || + q->properties.type == KFD_QUEUE_TYPE_SDMA) + return; + + old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); + WARN_ON(!old); +} + static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm, static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, struct qcm_process_device *qpd) { - uint32_t len; + const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; + int ret; if (!qpd->ib_kaddr) return -ENOMEM; - len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + if (ret) + return ret; return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, - qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, + pmf->release_mem_size / sizeof(uint32_t)); } static void deallocate_vmid(struct device_queue_manager *dqm, @@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (retval) return retval; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_hqd; + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_hqd; + goto out_deallocate_doorbell; pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", q->pipe, q->queue); @@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_hqd: deallocate_hqd(dqm, q); @@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } dqm->total_queue_count--; + deallocate_doorbell(qpd, q); + retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); if (retval) @@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: deallocate_sdma_queue(dqm, q->sdma_id); @@ -1070,12 +1139,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; } + + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; } /* * Eviction state logic: we only mark active queues as evicted @@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -1117,6 +1191,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mutex_unlock(&dqm->lock); return retval; +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); @@ -1257,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, goto failed; } + deallocate_doorbell(qpd, q); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); @@ -1308,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - bool retval; + bool retval = true; + + if (!dqm->asic_ops.set_cache_memory_policy) + return retval; mutex_lock(&dqm->lock); @@ -1577,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_POLARIS11: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + device_queue_manager_init_v9(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); @@ -1627,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) int pipe, queue; int r = 0; + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, + KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); + if (!r) { + seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", + KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, + KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), + KFD_CIK_HIQ_QUEUE); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 412beff3281d..59a6b1956932 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -200,6 +200,8 @@ void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi_tonga( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c new file mode 100644 index 000000000000..79e5bcf6367c --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -0,0 +1,84 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "vega10_enum.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); + +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->update_qpd = update_qpd_v9; + asic_ops->init_sdma_vm = init_sdma_vm_v9; +} + +static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) +{ + uint32_t shared_base = pdd->lds_base >> 48; + uint32_t private_base = pdd->scratch_base >> 48; + + return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) | + private_base; +} + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + if (vega10_noretry && + !dqm->dev->device_info->needs_iommu_device) + qpd->sh_mem_config |= + 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + + pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + + return 0; +} + +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + /* Not needed on SDMAv4 any more */ + q->properties.sdma_vm_addr = 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index ebb4da14e3df..c3744d89352c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -33,7 +33,6 @@ static DEFINE_IDA(doorbell_ida); static unsigned int max_doorbell_slices; -#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4 /* * Each device exposes a doorbell aperture, a PCI MMIO aperture that @@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices; */ /* # of doorbell bytes allocated for each process. */ -static inline size_t doorbell_process_allocation(void) +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) { - return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES * + return roundup(kfd->device_info->doorbell_size * KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, PAGE_SIZE); } @@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd) doorbell_start_offset = roundup(kfd->shared_resources.doorbell_start_offset, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); doorbell_aperture_size = rounddown(kfd->shared_resources.doorbell_aperture_size, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (doorbell_aperture_size > doorbell_start_offset) doorbell_process_limit = (doorbell_aperture_size - doorbell_start_offset) / - doorbell_process_allocation(); + kfd_doorbell_process_slice(kfd); else return -ENOSPC; @@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (!kfd->doorbell_kernel_ptr) return -ENOMEM; @@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd) iounmap(kfd->doorbell_kernel_ptr); } -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma) { phys_addr_t address; - struct kfd_dev *dev; /* * For simplicitly we only allow mapping of the entire doorbell * allocation of a single device & process. */ - if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) - return -EINVAL; - - /* Find kfd device according to gpu id */ - dev = kfd_device_by_id(vma->vm_pgoff); - if (!dev) + if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) return -EINVAL; /* Calculate physical address of doorbell */ @@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", (unsigned long long) vma->vm_start, address, vma->vm_flags, - doorbell_process_allocation()); + kfd_doorbell_process_slice(dev)); return io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, - doorbell_process_allocation(), + kfd_doorbell_process_slice(dev), vma->vm_page_prot); } /* get kernel iomem pointer for a doorbell */ -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off) { u32 inx; @@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; + inx *= kfd->device_info->doorbell_size / sizeof(u32); + /* * Calculating the kernel doorbell offset using the first * doorbell page. @@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) mutex_unlock(&kfd->doorbell_mutex); } -inline void write_kernel_doorbell(u32 __iomem *db, u32 value) +void write_kernel_doorbell(void __iomem *db, u32 value) { if (db) { writel(value, db); @@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) } } -/* - * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 - * to doorbells with the process's doorbell page - */ -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell64(void __iomem *db, u64 value) +{ + if (db) { + WARN(((unsigned long)db & 7) != 0, + "Unaligned 64-bit doorbell"); + writeq(value, (u64 __iomem *)db); + pr_debug("writing %llu to doorbell address %p\n", value, db); + } +} + +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id) + unsigned int doorbell_id) { /* * doorbell_id_offset accounts for doorbells taken by KGD. - * index * doorbell_process_allocation/sizeof(u32) adjusts to - * the process's doorbells. + * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to + * the process's doorbells. The offset returned is in dword + * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_id_offset + process->doorbell_index - * doorbell_process_allocation() / sizeof(u32) + - queue_id; + * kfd_doorbell_process_slice(kfd) / sizeof(u32) + + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } uint64_t kfd_get_number_elems(struct kfd_dev *kfd) { uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - kfd->shared_resources.doorbell_start_offset) / - doorbell_process_allocation() + 1; + kfd_doorbell_process_slice(kfd) + 1; return num_of_elems; @@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process) { return dev->doorbell_base + - process->doorbell_index * doorbell_process_allocation(); + process->doorbell_index * kfd_doorbell_process_slice(dev); } int kfd_alloc_process_doorbells(struct kfd_process *process) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 4890a90f1e44..5562e94e786a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, case KFD_EVENT_TYPE_DEBUG: ret = create_signal_event(devkfd, p, ev); if (!ret) { - *event_page_offset = KFD_MMAP_EVENTS_MASK; + *event_page_offset = KFD_MMAP_TYPE_EVENTS; *event_page_offset <<= PAGE_SHIFT; *event_slot_index = ev->event_id; } @@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", partial_id, valid_id_bits); - if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) { /* With relatively few events, it's faster to * iterate over the event IDR */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 66852de410c8..97d5423c5673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -275,23 +275,35 @@ * for FLAT_* / S_LOAD operations. */ -#define MAKE_GPUVM_APP_BASE(gpu_num) \ +#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \ (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) #define MAKE_GPUVM_APP_LIMIT(base, size) \ (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1) -#define MAKE_SCRATCH_APP_BASE() \ +#define MAKE_SCRATCH_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x100000000L) #define MAKE_SCRATCH_APP_LIMIT(base) \ (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) -#define MAKE_LDS_APP_BASE() \ +#define MAKE_LDS_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x0) #define MAKE_LDS_APP_LIMIT(base) \ (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) +/* On GFXv9 the LDS and scratch apertures are programmed independently + * using the high 16 bits of the 64-bit virtual address. They must be + * in the hole, which will be the case as long as the high 16 bits are + * not 0. + * + * The aperture sizes are still 4GB implicitly. + * + * A GPUVM aperture is not applicable on GFXv9. + */ +#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48) +#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48) + /* User mode manages most of the SVM aperture address space. The low * 16MB are reserved for kernel use (CWSR trap handler and kernel IB * for now). @@ -300,6 +312,55 @@ #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE) #define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE) +static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) +{ + /* + * node id couldn't be 0 - the three MSB bits of + * aperture shoudn't be 0 + */ + pdd->lds_base = MAKE_LDS_APP_BASE_VI(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + if (!pdd->dev->device_info->needs_iommu_device) { + /* dGPUs: SVM aperture starting at 0 + * with small reserved space for kernel. + * Set them to CANONICAL addresses. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + } else { + /* set them to non CANONICAL addresses, and no SVM is + * allocated. + */ + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1); + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base, + pdd->dev->shared_resources.gpuvm_size); + } + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + +static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) +{ + pdd->lds_base = MAKE_LDS_APP_BASE_V9(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + /* Raven needs SVM to support graphic handle, etc. Leave the small + * reserved space before SVM on Raven as well, even though we don't + * have to. + * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they + * are used in Thunk to reserve SVM. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + int kfd_init_apertures(struct kfd_process *process) { uint8_t id = 0; @@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_process_device *pdd; /*Iterating over all devices*/ - while (kfd_topology_enum_kfd_devices(id, &dev) == 0 && - id < NUM_OF_SUPPORTED_GPUS) { - + while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { if (!dev) { id++; /* Skip non GPU devices */ continue; @@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process) pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); - return -1; + return -ENOMEM; } /* * For 64 bit process apertures will be statically reserved in @@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - /* Same LDS and scratch apertures can be used - * on all GPUs. This allows using more dGPUs - * than placement options for apertures. - */ - pdd->lds_base = MAKE_LDS_APP_BASE(); - pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - - pdd->scratch_base = MAKE_SCRATCH_APP_BASE(); - pdd->scratch_limit = - MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + switch (dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + kfd_init_apertures_vi(pdd, id); + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd_init_apertures_v9(pdd, id); + break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + return -EINVAL; + } - if (dev->device_info->needs_iommu_device) { - /* APUs: GPUVM aperture in - * non-canonical address space - */ - pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); - pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT( - pdd->gpuvm_base, - dev->shared_resources.gpuvm_size); - } else { - /* dGPUs: SVM aperture starting at 0 - * with small reserved space for kernel + if (!dev->device_info->needs_iommu_device) { + /* dGPUs: the reserved space for kernel + * before SVM */ - pdd->gpuvm_base = SVM_USER_BASE; - pdd->gpuvm_limit = - dev->shared_resources.gpuvm_size - 1; pdd->qpd.cwsr_base = SVM_CWSR_BASE; pdd->qpd.ib_base = SVM_IB_BASE; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c new file mode 100644 index 000000000000..37029baa3346 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -0,0 +1,92 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "kfd_priv.h" +#include "kfd_events.h" +#include "soc15_int.h" + + +static bool event_interrupt_isr_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + const uint32_t *data = ih_ring_entry; + + /* Only handle interrupts from KFD VMIDs */ + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + + /* If there is no valid PASID, it's likely a firmware bug */ + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + + pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", + client_id, source_id, pasid); + pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); + + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return source_id == SOC15_INTSRC_CP_END_OF_PIPE || + source_id == SOC15_INTSRC_SDMA_TRAP || + source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || + source_id == SOC15_INTSRC_CP_BAD_OPCODE; +} + +static void event_interrupt_wq_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + uint32_t context_id; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) + kfd_signal_event_interrupt(pasid, context_id, 32); + else if (source_id == SOC15_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) + kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(pasid); + else if (client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_UTCL2) { + /* TODO */ + } +} + +const struct kfd_event_interrupt_class event_interrupt_class_v9 = { + .interrupt_isr = event_interrupt_isr_v9, + .interrupt_wq = event_interrupt_wq_v9, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 035c351f47c5..db6d9336b80d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work) { struct kfd_dev *dev = container_of(work, struct kfd_dev, interrupt_work); + uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; - uint32_t ih_ring_entry[DIV_ROUND_UP( - dev->device_info->ih_ring_entry_size, - sizeof(uint32_t))]; + if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { + dev_err_once(kfd_chardev(), "Ring entry too small\n"); + return; + } while (dequeue_ih_ring_entry(dev, ih_ring_entry)) dev->device_info->event_interrupt_class->interrupt_wq(dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 69f496485331..476951d8c91c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, &kq->wptr_mem); if (retval != 0) @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, size_t available_size; size_t queue_size_dwords; uint32_t wptr, rptr; + uint64_t wptr64; unsigned int *queue_address; /* When rptr == wptr, the buffer is empty. @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * the opposite. So we can only use up to queue_size_dwords - 1 dwords. */ rptr = *kq->rptr_kernel; - wptr = *kq->wptr_kernel; + wptr = kq->pending_wptr; + wptr64 = kq->pending_wptr64; queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / 4; @@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * make sure calling functions know * acquire_packet_buffer() failed */ - *buffer_ptr = NULL; - return -ENOMEM; + goto err_no_space; } if (wptr + packet_size_in_dwords >= queue_size_dwords) { /* make sure after rolling back to position 0, there is * still enough space. */ - if (packet_size_in_dwords >= rptr) { - *buffer_ptr = NULL; - return -ENOMEM; - } + if (packet_size_in_dwords >= rptr) + goto err_no_space; + /* fill nops, roll back and start at position 0 */ while (wptr > 0) { queue_address[wptr] = kq->nop_packet; wptr = (wptr + 1) % queue_size_dwords; + wptr64++; } } *buffer_ptr = &queue_address[wptr]; kq->pending_wptr = wptr + packet_size_in_dwords; + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; return 0; + +err_no_space: + *buffer_ptr = NULL; + return -ENOMEM; } static void submit_packet(struct kernel_queue *kq) @@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) pr_debug("\n"); #endif - *kq->wptr_kernel = kq->pending_wptr; - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, - kq->pending_wptr); + kq->ops_asic_specific.submit_packet(kq); } static void rollback_packet(struct kernel_queue *kq) { - kq->pending_wptr = *kq->queue->properties.write_ptr; + if (kq->dev->device_info->doorbell_size == 8) { + kq->pending_wptr64 = *kq->wptr64_kernel; + kq->pending_wptr = *kq->wptr_kernel % + (kq->queue->properties.queue_size / 4); + } else { + kq->pending_wptr = *kq->wptr_kernel; + } } struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + kernel_queue_init_v9(&kq->ops_asic_specific); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 594053136ee4..97aff2041a5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -72,6 +72,7 @@ struct kernel_queue { struct kfd_dev *dev; struct mqd_manager *mqd; struct queue *queue; + uint64_t pending_wptr64; uint32_t pending_wptr; unsigned int nop_packet; @@ -79,7 +80,10 @@ struct kernel_queue { uint32_t *rptr_kernel; uint64_t rptr_gpu_addr; struct kfd_mem_obj *wptr_mem; - uint32_t *wptr_kernel; + union { + uint64_t *wptr64_kernel; + uint32_t *wptr_kernel; + }; uint64_t wptr_gpu_addr; struct kfd_mem_obj *pq; uint64_t pq_gpu_addr; @@ -97,5 +101,6 @@ struct kernel_queue { void kernel_queue_init_cik(struct kernel_queue_ops *ops); void kernel_queue_init_vi(struct kernel_queue_ops *ops); +void kernel_queue_init_v9(struct kernel_queue_ops *ops); #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c index a90eb440b1fb..19e54acb4125 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -26,11 +26,13 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_cik(struct kernel_queue *kq); +static void submit_packet_cik(struct kernel_queue *kq); void kernel_queue_init_cik(struct kernel_queue_ops *ops) { ops->initialize = initialize_cik; ops->uninitialize = uninitialize_cik; + ops->submit_packet = submit_packet_cik; } static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, static void uninitialize_cik(struct kernel_queue *kq) { } + +static void submit_packet_cik(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c new file mode 100644 index 000000000000..684a3bf07efd --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -0,0 +1,340 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_ai.h" +#include "kfd_pm4_opcodes.h" + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_v9(struct kernel_queue *kq); +static void submit_packet_v9(struct kernel_queue *kq); + +void kernel_queue_init_v9(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_v9; + ops->uninitialize = uninitialize_v9; + ops->submit_packet = submit_packet_v9; +} + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + + return true; +} + +static void uninitialize_v9(struct kernel_queue *kq) +{ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); +} + +static void submit_packet_v9(struct kernel_queue *kq) +{ + *kq->wptr64_kernel = kq->pending_wptr64; + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, + kq->pending_wptr64); +} + +static int pm_map_process_v9(struct packet_manager *pm, + uint32_t *buffer, struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + uint64_t vm_page_table_base_addr = + (uint64_t)(qpd->page_table_base) << 12; + + packet = (struct pm4_mes_map_process *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields14.gds_size = qpd->gds_size; + packet->bitfields14.num_gws = qpd->num_gws; + packet->bitfields14.num_oac = qpd->num_oac; + packet->bitfields14.sdma_enable = 1; + packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + packet->vm_context_page_table_base_addr_lo32 = + lower_32_bits(vm_page_table_base_addr); + packet->vm_context_page_table_base_addr_hi32 = + upper_32_bits(vm_page_table_base_addr); + + return 0; +} + +static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->ib_base_hi = upper_32_bits(ib); + + return 0; +} + +static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + + +static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(struct pm4_mec_release_mem)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; + + packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel__mec_release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_v9_pm_funcs = { + .map_process = pm_map_process_v9, + .runlist = pm_runlist_v9, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_v9, + .unmap_queues = pm_unmap_queues_v9, + .query_status = pm_query_status_v9, + .release_mem = pm_release_mem_v9, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index f1d48281e322..bf20c6d32ef3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -22,15 +22,20 @@ */ #include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_vi.h" +#include "kfd_pm4_opcodes.h" static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_vi(struct kernel_queue *kq); +static void submit_packet_vi(struct kernel_queue *kq); void kernel_queue_init_vi(struct kernel_queue_ops *ops) { ops->initialize = initialize_vi; ops->uninitialize = uninitialize_vi; + ops->submit_packet = submit_packet_vi; } static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, @@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq) { kfd_gtt_sa_free(kq->dev, kq->eop_mem); } + +static void submit_packet_vi(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} + +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) +{ + union PM4_MES_TYPE_3_HEADER header; + + header.u32All = 0; + header.opcode = opcode; + header.count = packet_size / 4 - 2; + header.type = PM4_TYPE_3; + + return header.u32All; +} + +static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + + packet = (struct pm4_mes_map_process *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields3.page_table_base = qpd->page_table_base; + packet->bitfields10.gds_size = qpd->gds_size; + packet->bitfields10.num_gws = qpd->num_gws; + packet->bitfields10.num_oac = qpd->num_oac; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; + packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; + + packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + return 0; +} + +static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + if (WARN_ON(!ib)) + return -EFAULT; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->bitfields3.ib_base_hi = upper_32_bits(ib); + + return 0; +} + +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) +{ + struct pm4_mes_set_resources *packet; + + packet = (struct pm4_mes_set_resources *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); + + packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + return 0; +} + +static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + +static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_vi_pm_funcs = { + .map_process = pm_map_process_vi, + .runlist = pm_runlist_vi, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_vi, + .unmap_queues = pm_unmap_queues_vi, + .query_status = pm_query_status_vi, + .release_mem = pm_release_mem_vi, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index e0c07d24d251..76bf2dc8aec4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .interrupt = kgd2kfd_interrupt, .suspend = kgd2kfd_suspend, .resume = kgd2kfd_resume, + .quiesce_mm = kgd2kfd_quiesce_mm, + .resume_mm = kgd2kfd_resume_mm, .schedule_evict_and_restore_process = kgd2kfd_schedule_evict_and_restore_process, }; @@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444); MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); +int vega10_noretry; +module_param_named(noretry, vega10_noretry, int, 0644); +MODULE_PARM_DESC(noretry, + "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); + static int amdkfd_init_completed; int kgd2kfd_init(unsigned int interface_version, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index ee7061e1c466..4b8eb506642b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, case CHIP_POLARIS10: case CHIP_POLARIS11: return mqd_manager_init_vi_tonga(type, dev); + case CHIP_VEGA10: + case CHIP_RAVEN: + return mqd_manager_init_v9(type, dev); default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index c00c325ed3c9..06eaa218eba6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_mqd_base_addr_lo = lower_32_bits(addr); m->cp_mqd_base_addr_hi = upper_32_bits(addr); - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; - /* Although WinKFD writes this, I suspect it should not be necessary */ - m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10); @@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c new file mode 100644 index 000000000000..684054ff02cd --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -0,0 +1,443 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "v9_structs.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + uint64_t addr; + struct v9_mqd *m; + struct kfd_dev *kfd = mm->dev; + + /* From V9, for CWSR, the control stack is located on the next page + * boundary after the mqd, we will use the gtt allocation function + * instead of sub-allocation function. + */ + if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { + *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!*mqd_mem_obj) + return -ENOMEM; + retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, + ALIGN(q->ctl_stack_size, PAGE_SIZE) + + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), + &((*mqd_mem_obj)->gtt_mem), + &((*mqd_mem_obj)->gpu_addr), + (void *)&((*mqd_mem_obj)->cpu_ptr)); + } else + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), + mqd_mem_obj); + if (retval != 0) + return -ENOMEM; + + m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, sizeof(struct v9_mqd)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | + 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; + + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | + 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | + 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_aql_control = + 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; + } + + if (q->tba_addr) { + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, + wptr_shift, 0, mms); +} + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); + + m->cp_hqd_pq_doorbell_control = + q->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + m->cp_hqd_ib_control = + 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | + 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; + + /* + * HW does not clamp this field correctly. Maximum EOP queue size + * is constrained by per-SE EOP done signal count, which is 8-bit. + * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit + * more than (EOP entry count - 1) so a queue size of 0x800 dwords + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control = min(0xA, + order_base_2(q->eop_ring_buffer_size / 4) - 1); + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = + upper_32_bits(q->eop_ring_buffer_address >> 8); + + m->cp_hqd_iq_timer = 0; + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | + 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | + 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = 0; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_destroy + (mm->dev->kgd, mqd, type, timeout, + pipe_id, queue_id); +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + struct kfd_dev *kfd = mm->dev; + + if (mqd_mem_obj->gtt_mem) { + kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); + } else { + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_is_occupied( + mm->dev->kgd, queue_address, + pipe_id, queue_id); +} + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); + + if (retval != 0) + return retval; + + m = get_mqd(*mqd); + + m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | + 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = update_mqd(mm, mqd, q); + + if (retval != 0) + return retval; + + /* TODO: what's the point? update_mqd already does this. */ + m = get_mqd(mqd); + m->cp_hqd_vmid = q->vmid; + return retval; +} + +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct v9_sdma_mqd *m; + + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct v9_sdma_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct v9_sdma_mqd)); + + *mqd = m; + if (gart_addr) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); +} + +#define SDMA_RLC_DUMMY_DEFAULT 0xf + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_sdma_mqd *m; + + m = get_sdma_mqd(mqd); + m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_doorbell_offset = + q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + +/* + * * preempt type here is ignored because there is only one way + * * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +#if defined(CONFIG_DEBUG_FS) + +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_mqd), false); + return 0; +} + +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_sdma_mqd), false); + return 0; +} + +#endif + +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; + + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + case KFD_MQD_TYPE_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 89e4242e43e7..481307b8b4db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 89ba4c670ec5..c317feb43f69 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -26,8 +26,6 @@ #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" #include "kfd_priv.h" -#include "kfd_pm4_headers_vi.h" -#include "kfd_pm4_opcodes.h" static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) @@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, *wptr = temp; } -static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) -{ - union PM4_MES_TYPE_3_HEADER header; - - header.u32All = 0; - header.opcode = opcode; - header.count = packet_size / 4 - 2; - header.type = PM4_TYPE_3; - - return header.u32All; -} - static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) @@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm, pr_debug("Over subscribed runlist\n"); } - map_queue_size = sizeof(struct pm4_mes_map_queues); + map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ - *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + + *rlib_size = process_count * pm->pmf->map_process_size + queue_count * map_queue_size; /* @@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * when over subscription */ if (*over_subscription) - *rlib_size += sizeof(struct pm4_mes_runlist); + *rlib_size += pm->pmf->runlist_size; pr_debug("runlist ib size %d\n", *rlib_size); } @@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + mutex_lock(&pm->lock); + retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, &pm->ib_buffer_obj); if (retval) { pr_err("Failed to allocate runlist IB\n"); - return retval; + goto out; } *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; @@ -121,138 +109,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, memset(*rl_buffer, 0, *rl_buffer_size); pm->allocated = true; - return retval; -} - -static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, - uint64_t ib, size_t ib_size_in_dwords, bool chain) -{ - struct pm4_mes_runlist *packet; - int concurrent_proc_cnt = 0; - struct kfd_dev *kfd = pm->dqm->dev; - - if (WARN_ON(!ib)) - return -EFAULT; - - /* Determine the number of processes to map together to HW: - * it can not exceed the number of VMIDs available to the - * scheduler, and it is determined by the smaller of the number - * of processes in the runlist and kfd module parameter - * hws_max_conc_proc. - * Note: the arbitration between the number of VMIDs and - * hws_max_conc_proc has been done in - * kgd2kfd_device_init(). - */ - concurrent_proc_cnt = min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); - - packet = (struct pm4_mes_runlist *)buffer; - - memset(buffer, 0, sizeof(struct pm4_mes_runlist)); - packet->header.u32All = build_pm4_header(IT_RUN_LIST, - sizeof(struct pm4_mes_runlist)); - - packet->bitfields4.ib_size = ib_size_in_dwords; - packet->bitfields4.chain = chain ? 1 : 0; - packet->bitfields4.offload_polling = 0; - packet->bitfields4.valid = 1; - packet->bitfields4.process_cnt = concurrent_proc_cnt; - packet->ordinal2 = lower_32_bits(ib); - packet->bitfields3.ib_base_hi = upper_32_bits(ib); - - return 0; -} - -static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, - struct qcm_process_device *qpd) -{ - struct pm4_mes_map_process *packet; - - packet = (struct pm4_mes_map_process *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - - packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, - sizeof(struct pm4_mes_map_process)); - packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; - packet->bitfields2.process_quantum = 1; - packet->bitfields2.pasid = qpd->pqm->process->pasid; - packet->bitfields3.page_table_base = qpd->page_table_base; - packet->bitfields10.gds_size = qpd->gds_size; - packet->bitfields10.num_gws = qpd->num_gws; - packet->bitfields10.num_oac = qpd->num_oac; - packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; - - packet->sh_mem_config = qpd->sh_mem_config; - packet->sh_mem_bases = qpd->sh_mem_bases; - packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; - packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; - - packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; - - packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); - packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); - - return 0; -} - -static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, - struct queue *q, bool is_static) -{ - struct pm4_mes_map_queues *packet; - bool use_static = is_static; - - packet = (struct pm4_mes_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); - - packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; - packet->bitfields2.num_queues = 1; - packet->bitfields2.queue_sel = - queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; - - packet->bitfields2.engine_sel = - engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_compute_vi; - - switch (q->properties.type) { - case KFD_QUEUE_TYPE_COMPUTE: - if (use_static) - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_latency_static_queue_vi; - break; - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.queue_type = - queue_type__mes_map_queues__debug_interface_queue_vi; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = q->properties.sdma_engine_id + - engine_sel__mes_map_queues__sdma0_vi; - use_static = false; /* no static queues under SDMA */ - break; - default: - WARN(1, "queue type %d", q->properties.type); - return -EINVAL; - } - packet->bitfields3.doorbell_offset = - q->properties.doorbell_off; - - packet->mqd_addr_lo = - lower_32_bits(q->gart_mqd_addr); - - packet->mqd_addr_hi = - upper_32_bits(q->gart_mqd_addr); - - packet->wptr_addr_lo = - lower_32_bits((uint64_t)q->properties.write_ptr); - - packet->wptr_addr_hi = - upper_32_bits((uint64_t)q->properties.write_ptr); - - return 0; +out: + mutex_unlock(&pm->lock); + return retval; } static int pm_create_runlist_ib(struct packet_manager *pm, @@ -292,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return -ENOMEM; } - retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); + retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); if (retval) return retval; proccesses_mapped++; - inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), + inc_wptr(&rl_wptr, pm->pmf->map_process_size, alloc_size_bytes); list_for_each_entry(kq, &qpd->priv_queue_list, list) { @@ -307,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], kq->queue, qpd->is_debug); @@ -315,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } @@ -326,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], q, qpd->is_debug); @@ -335,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } } @@ -343,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("Finished map process and queues to runlist\n"); if (is_over_subscription) - retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], + retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); @@ -355,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; } -/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size - * of this packet - * @gpu_addr - GPU address of the packet. It's a virtual address. - * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer - * Return - length of the packet - */ -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) -{ - struct pm4_mec_release_mem *packet; - - WARN_ON(!buffer); - - packet = (struct pm4_mec_release_mem *)buffer; - memset(buffer, 0, sizeof(*packet)); - - packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, - sizeof(*packet)); - - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; - packet->bitfields2.tcl1_action_ena = 1; - packet->bitfields2.tc_action_ena = 1; - packet->bitfields2.cache_policy = cache_policy___release_mem__lru; - packet->bitfields2.atc = 0; - - packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; - packet->bitfields3.int_sel = - int_sel___release_mem__send_interrupt_after_write_confirm; - - packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; - packet->address_hi = upper_32_bits(gpu_addr); - - packet->data_lo = 0; - - return sizeof(*packet) / sizeof(unsigned int); -} - int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { + switch (dqm->dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + /* PM4 packet structures on CIK are the same as on VI */ + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + pm->pmf = &kfd_vi_pm_funcs; + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + pm->pmf = &kfd_v9_pm_funcs; + break; + default: + WARN(1, "Unexpected ASIC family %u", + dqm->dev->device_info->asic_family); + return -EINVAL; + } + pm->dqm = dqm; mutex_init(&pm->lock); pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); @@ -415,38 +259,25 @@ void pm_uninit(struct packet_manager *pm) int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res) { - struct pm4_mes_set_resources *packet; + uint32_t *buffer, size; int retval = 0; + size = pm->pmf->set_resources_size; mutex_lock(&pm->lock); pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, - sizeof(*packet) / sizeof(uint32_t), - (unsigned int **)&packet); - if (!packet) { + size / sizeof(uint32_t), + (unsigned int **)&buffer); + if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; goto out; } - memset(packet, 0, sizeof(struct pm4_mes_set_resources)); - packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, - sizeof(struct pm4_mes_set_resources)); - - packet->bitfields2.queue_type = - queue_type__mes_set_resources__hsa_interface_queue_hiq; - packet->bitfields2.vmid_mask = res->vmid_mask; - packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; - packet->bitfields7.oac_mask = res->oac_mask; - packet->bitfields8.gds_heap_base = res->gds_heap_base; - packet->bitfields8.gds_heap_size = res->gds_heap_size; - - packet->gws_mask_lo = lower_32_bits(res->gws_mask); - packet->gws_mask_hi = upper_32_bits(res->gws_mask); - - packet->queue_mask_lo = lower_32_bits(res->queue_mask); - packet->queue_mask_hi = upper_32_bits(res->queue_mask); - - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->set_resources(pm, buffer, res); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); out: mutex_unlock(&pm->lock); @@ -468,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); + packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, @@ -476,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_acquire_packet_buffer; - retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, + retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false); if (retval) goto fail_create_runlist; @@ -499,37 +330,29 @@ fail_create_runlist_ib: int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value) { - int retval; - struct pm4_mes_query_status *packet; + uint32_t *buffer, size; + int retval = 0; if (WARN_ON(!fence_address)) return -EFAULT; + size = pm->pmf->query_status_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), - (unsigned int **)&packet); - if (retval) - goto fail_acquire_packet_buffer; - - packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, - sizeof(struct pm4_mes_query_status)); - - packet->bitfields2.context_id = 0; - packet->bitfields2.interrupt_sel = - interrupt_sel__mes_query_status__completion_status; - packet->bitfields2.command = - command__mes_query_status__fence_only_after_write_ack; - - packet->addr_hi = upper_32_bits((uint64_t)fence_address); - packet->addr_lo = lower_32_bits((uint64_t)fence_address); - packet->data_hi = upper_32_bits((uint64_t)fence_value); - packet->data_lo = lower_32_bits((uint64_t)fence_value); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; + } - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); -fail_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } @@ -539,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, uint32_t filter_param, bool reset, unsigned int sdma_engine) { - int retval; - uint32_t *buffer; - struct pm4_mes_unmap_queues *packet; + uint32_t *buffer, size; + int retval = 0; + size = pm->pmf->unmap_queues_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), - &buffer); - if (retval) - goto err_acquire_packet_buffer; - - packet = (struct pm4_mes_unmap_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n", - filter, reset, type); - packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, - sizeof(struct pm4_mes_unmap_queues)); - switch (type) { - case KFD_QUEUE_TYPE_COMPUTE: - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__compute; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__sdma0 + sdma_engine; - break; - default: - WARN(1, "queue type %d", type); - retval = -EINVAL; - goto err_invalid; + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; } - if (reset) - packet->bitfields2.action = - action__mes_unmap_queues__reset_queues; + retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param, + reset, sdma_engine); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); else - packet->bitfields2.action = - action__mes_unmap_queues__preempt_queues; - - switch (filter) { - case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_specified_queues; - packet->bitfields2.num_queues = 1; - packet->bitfields3b.doorbell_offset0 = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_BY_PASID: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; - packet->bitfields3a.pasid = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_queues; - break; - case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: - /* in this case, we do not preempt static queues */ - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_non_static_queues; - break; - default: - WARN(1, "filter %d", filter); - retval = -EINVAL; - goto err_invalid; - } + pm->priv_queue->ops.rollback_packet(pm->priv_queue); - pm->priv_queue->ops.submit_packet(pm->priv_queue); - - mutex_unlock(&pm->lock); - return 0; - -err_invalid: - pm->priv_queue->ops.rollback_packet(pm->priv_queue); -err_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h new file mode 100644 index 000000000000..f2bcf5c092ea --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -0,0 +1,583 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef F32_MES_PM4_PACKETS_H +#define F32_MES_PM4_PACKETS_H + +#ifndef PM4_MES_HEADER_DEFINED +#define PM4_MES_HEADER_DEFINED +union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1 : 8; /* < reserved */ + uint32_t opcode : 8; /* < IT opcode */ + uint32_t count : 14;/* < number of DWORDs - 1 in the + * information body. + */ + uint32_t type : 2; /* < packet identifier. + * It should be 3 for type 3 packets + */ + }; + uint32_t u32All; +}; +#endif /* PM4_MES_HEADER_DEFINED */ + +/*--------------------MES_SET_RESOURCES--------------------*/ + +#ifndef PM4_MES_SET_RESOURCES_DEFINED +#define PM4_MES_SET_RESOURCES_DEFINED +enum mes_set_resources_queue_type_enum { + queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, + queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, + queue_type__mes_set_resources__hsa_debug_interface_queue = 4 +}; + + +struct pm4_mes_set_resources { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t vmid_mask:16; + uint32_t unmap_latency:8; + uint32_t reserved1:5; + enum mes_set_resources_queue_type_enum queue_type:3; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t queue_mask_lo; + uint32_t queue_mask_hi; + uint32_t gws_mask_lo; + uint32_t gws_mask_hi; + + union { + struct { + uint32_t oac_mask:16; + uint32_t reserved2:16; + } bitfields7; + uint32_t ordinal7; + }; + + union { + struct { + uint32_t gds_heap_base:6; + uint32_t reserved3:5; + uint32_t gds_heap_size:6; + uint32_t reserved4:15; + } bitfields8; + uint32_t ordinal8; + }; + +}; +#endif + +/*--------------------MES_RUN_LIST--------------------*/ + +#ifndef PM4_MES_RUN_LIST_DEFINED +#define PM4_MES_RUN_LIST_DEFINED + +struct pm4_mes_runlist { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:2; + uint32_t ib_base_lo:30; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t ib_base_hi; + + union { + struct { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; + uint32_t reserved2:1; + uint32_t valid:1; + uint32_t process_cnt:4; + uint32_t reserved3:4; + } bitfields4; + uint32_t ordinal4; + }; + +}; +#endif + +/*--------------------MES_MAP_PROCESS--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_DEFINED +#define PM4_MES_MAP_PROCESS_DEFINED + +struct pm4_mes_map_process { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:8; + uint32_t diq_enable:1; + uint32_t process_quantum:7; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t vm_context_page_table_base_addr_lo32; + + uint32_t vm_context_page_table_base_addr_hi32; + + uint32_t sh_mem_bases; + + uint32_t sh_mem_config; + + uint32_t sq_shader_tba_lo; + + uint32_t sq_shader_tba_hi; + + uint32_t sq_shader_tma_lo; + + uint32_t sq_shader_tma_hi; + + uint32_t reserved6; + + uint32_t gds_addr_lo; + + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:6; + uint32_t reserved7:1; + uint32_t sdma_enable:1; + uint32_t num_oac:4; + uint32_t reserved8:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields14; + uint32_t ordinal14; + }; + + uint32_t completion_signal_lo; + + uint32_t completion_signal_hi; + +}; + +#endif + +/*--------------------MES_MAP_PROCESS_VM--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED +#define PM4_MES_MAP_PROCESS_VM_DEFINED + +struct PM4_MES_MAP_PROCESS_VM { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + uint32_t reserved1; + + uint32_t vm_context_cntl; + + uint32_t reserved2; + + uint32_t vm_context_page_table_end_addr_lo32; + + uint32_t vm_context_page_table_end_addr_hi32; + + uint32_t vm_context_page_table_start_addr_lo32; + + uint32_t vm_context_page_table_start_addr_hi32; + + uint32_t reserved3; + + uint32_t reserved4; + + uint32_t reserved5; + + uint32_t reserved6; + + uint32_t reserved7; + + uint32_t reserved8; + + uint32_t completion_signal_lo32; + + uint32_t completion_signal_hi32; + +}; +#endif + +/*--------------------MES_MAP_QUEUES--------------------*/ + +#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED +#define PM4_MES_MAP_QUEUES_VI_DEFINED +enum mes_map_queues_queue_sel_enum { + queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0, +queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1 +}; + +enum mes_map_queues_queue_type_enum { + queue_type__mes_map_queues__normal_compute_vi = 0, + queue_type__mes_map_queues__debug_interface_queue_vi = 1, + queue_type__mes_map_queues__normal_latency_static_queue_vi = 2, +queue_type__mes_map_queues__low_latency_static_queue_vi = 3 +}; + +enum mes_map_queues_alloc_format_enum { + alloc_format__mes_map_queues__one_per_pipe_vi = 0, +alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 +}; + +enum mes_map_queues_engine_sel_enum { + engine_sel__mes_map_queues__compute_vi = 0, + engine_sel__mes_map_queues__sdma0_vi = 2, + engine_sel__mes_map_queues__sdma1_vi = 3 +}; + + +struct pm4_mes_map_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:4; + enum mes_map_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:15; + enum mes_map_queues_queue_type_enum queue_type:3; + enum mes_map_queues_alloc_format_enum alloc_format:2; + enum mes_map_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t reserved3:1; + uint32_t check_disable:1; + uint32_t doorbell_offset:26; + uint32_t reserved4:4; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t mqd_addr_lo; + uint32_t mqd_addr_hi; + uint32_t wptr_addr_lo; + uint32_t wptr_addr_hi; +}; +#endif + +/*--------------------MES_QUERY_STATUS--------------------*/ + +#ifndef PM4_MES_QUERY_STATUS_DEFINED +#define PM4_MES_QUERY_STATUS_DEFINED +enum mes_query_status_interrupt_sel_enum { + interrupt_sel__mes_query_status__completion_status = 0, + interrupt_sel__mes_query_status__process_status = 1, + interrupt_sel__mes_query_status__queue_status = 2 +}; + +enum mes_query_status_command_enum { + command__mes_query_status__interrupt_only = 0, + command__mes_query_status__fence_only_immediate = 1, + command__mes_query_status__fence_only_after_write_ack = 2, + command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 +}; + +enum mes_query_status_engine_sel_enum { + engine_sel__mes_query_status__compute = 0, + engine_sel__mes_query_status__sdma0_queue = 2, + engine_sel__mes_query_status__sdma1_queue = 3 +}; + +struct pm4_mes_query_status { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t context_id:28; + enum mes_query_status_interrupt_sel_enum interrupt_sel:2; + enum mes_query_status_command_enum command:2; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:16; + } bitfields3a; + struct { + uint32_t reserved2:2; + uint32_t doorbell_offset:26; + enum mes_query_status_engine_sel_enum engine_sel:3; + uint32_t reserved3:1; + } bitfields3b; + uint32_t ordinal3; + }; + + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data_lo; + uint32_t data_hi; +}; +#endif + +/*--------------------MES_UNMAP_QUEUES--------------------*/ + +#ifndef PM4_MES_UNMAP_QUEUES_DEFINED +#define PM4_MES_UNMAP_QUEUES_DEFINED +enum mes_unmap_queues_action_enum { + action__mes_unmap_queues__preempt_queues = 0, + action__mes_unmap_queues__reset_queues = 1, + action__mes_unmap_queues__disable_process_queues = 2, + action__mes_unmap_queues__reserved = 3 +}; + +enum mes_unmap_queues_queue_sel_enum { + queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, + queue_sel__mes_unmap_queues__unmap_all_queues = 2, + queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3 +}; + +enum mes_unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__compute = 0, + engine_sel__mes_unmap_queues__sdma0 = 2, + engine_sel__mes_unmap_queues__sdmal = 3 +}; + +struct pm4_mes_unmap_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + enum mes_unmap_queues_action_enum action:2; + uint32_t reserved1:2; + enum mes_unmap_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:20; + enum mes_unmap_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved3:16; + } bitfields3a; + struct { + uint32_t reserved4:2; + uint32_t doorbell_offset0:26; + int32_t reserved5:4; + } bitfields3b; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t reserved6:2; + uint32_t doorbell_offset1:26; + uint32_t reserved7:4; + } bitfields4; + uint32_t ordinal4; + }; + + union { + struct { + uint32_t reserved8:2; + uint32_t doorbell_offset2:26; + uint32_t reserved9:4; + } bitfields5; + uint32_t ordinal5; + }; + + union { + struct { + uint32_t reserved10:2; + uint32_t doorbell_offset3:26; + uint32_t reserved11:4; + } bitfields6; + uint32_t ordinal6; + }; +}; +#endif + +#ifndef PM4_MEC_RELEASE_MEM_DEFINED +#define PM4_MEC_RELEASE_MEM_DEFINED + +enum mec_release_mem_event_index_enum { + event_index__mec_release_mem__end_of_pipe = 5, + event_index__mec_release_mem__shader_done = 6 +}; + +enum mec_release_mem_cache_policy_enum { + cache_policy__mec_release_mem__lru = 0, + cache_policy__mec_release_mem__stream = 1 +}; + +enum mec_release_mem_pq_exe_status_enum { + pq_exe_status__mec_release_mem__default = 0, + pq_exe_status__mec_release_mem__phase_update = 1 +}; + +enum mec_release_mem_dst_sel_enum { + dst_sel__mec_release_mem__memory_controller = 0, + dst_sel__mec_release_mem__tc_l2 = 1, + dst_sel__mec_release_mem__queue_write_pointer_register = 2, + dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3 +}; + +enum mec_release_mem_int_sel_enum { + int_sel__mec_release_mem__none = 0, + int_sel__mec_release_mem__send_interrupt_only = 1, + int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2, + int_sel__mec_release_mem__send_data_after_write_confirm = 3, + int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6 +}; + +enum mec_release_mem_data_sel_enum { + data_sel__mec_release_mem__none = 0, + data_sel__mec_release_mem__send_32_bit_low = 1, + data_sel__mec_release_mem__send_64_bit_data = 2, + data_sel__mec_release_mem__send_gpu_clock_counter = 3, + data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4, + data_sel__mec_release_mem__store_gds_data_to_memory = 5 +}; + +struct pm4_mec_release_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int event_type:6; + unsigned int reserved1:2; + enum mec_release_mem_event_index_enum event_index:4; + unsigned int tcl1_vol_action_ena:1; + unsigned int tc_vol_action_ena:1; + unsigned int reserved2:1; + unsigned int tc_wb_action_ena:1; + unsigned int tcl1_action_ena:1; + unsigned int tc_action_ena:1; + uint32_t reserved3:1; + uint32_t tc_nc_action_ena:1; + uint32_t tc_wc_action_ena:1; + uint32_t tc_md_action_ena:1; + uint32_t reserved4:3; + enum mec_release_mem_cache_policy_enum cache_policy:2; + uint32_t reserved5:2; + enum mec_release_mem_pq_exe_status_enum pq_exe_status:1; + uint32_t reserved6:2; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + uint32_t reserved7:16; + enum mec_release_mem_dst_sel_enum dst_sel:2; + uint32_t reserved8:6; + enum mec_release_mem_int_sel_enum int_sel:3; + uint32_t reserved9:2; + enum mec_release_mem_data_sel_enum data_sel:3; + } bitfields3; + unsigned int ordinal3; + }; + + union { + struct { + uint32_t reserved10:2; + unsigned int address_lo_32b:30; + } bitfields4; + struct { + uint32_t reserved11:3; + uint32_t address_lo_64b:29; + } bitfields4b; + uint32_t reserved12; + unsigned int ordinal4; + }; + + union { + uint32_t address_hi; + uint32_t reserved13; + uint32_t ordinal5; + }; + + union { + uint32_t data_lo; + uint32_t cmp_data_lo; + struct { + uint32_t dw_offset:16; + uint32_t num_dwords:16; + } bitfields6c; + uint32_t reserved14; + uint32_t ordinal6; + }; + + union { + uint32_t data_hi; + uint32_t cmp_data_hi; + uint32_t reserved15; + uint32_t reserved16; + uint32_t ordinal7; + }; + + uint32_t int_ctxid; + +}; + +#endif + +enum { + CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 +}; +#endif + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 96a9cc0f02c9..5e3990bb4c4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -39,11 +39,37 @@ #include "amd_shared.h" +#define KFD_MAX_RING_ENTRY_SIZE 8 + #define KFD_SYSFS_FILE_MODE 0444 -#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull -#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull -#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull +/* GPU ID hash width in bits */ +#define KFD_GPU_ID_HASH_WIDTH 16 + +/* Use upper bits of mmap offset to store KFD driver specific information. + * BITS[63:62] - Encode MMAP type + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to + * BITS[45:0] - MMAP offset value + * + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these + * defines are w.r.t to PAGE_SIZE + */ +#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) + +#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) +#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ + << KFD_MMAP_GPU_ID_SHIFT) +#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ + & KFD_MMAP_GPU_ID_MASK) +#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ + >> KFD_MMAP_GPU_ID_SHIFT) + +#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) +#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) /* * When working with cp scheduler we should assign the HIQ manually or via @@ -55,9 +81,6 @@ #define KFD_CIK_HIQ_PIPE 4 #define KFD_CIK_HIQ_QUEUE 0 -/* GPU ID hash width in bits */ -#define KFD_GPU_ID_HASH_WIDTH 16 - /* Macro for allocating structures */ #define kfd_alloc_struct(ptr_to_struct) \ ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) @@ -116,6 +139,11 @@ extern int debug_largebar; */ extern int ignore_crat; +/* + * Set sh_mem_config.retry_disable on Vega10 + */ +extern int vega10_noretry; + /** * enum kfd_sched_policy * @@ -148,6 +176,8 @@ enum cache_policy { cache_policy_noncoherent }; +#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) + struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); @@ -160,6 +190,7 @@ struct kfd_device_info { const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; + unsigned int doorbell_size; size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; @@ -173,6 +204,7 @@ struct kfd_mem_obj { uint32_t range_end; uint64_t gpu_addr; uint32_t *cpu_ptr; + void *gtt_mem; }; struct kfd_vmid_info { @@ -364,7 +396,7 @@ struct queue_properties { uint32_t queue_percent; uint32_t *read_ptr; uint32_t *write_ptr; - uint32_t __iomem *doorbell_ptr; + void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; bool is_evicted; @@ -427,6 +459,7 @@ struct queue { uint32_t queue; unsigned int sdma_id; + unsigned int doorbell_id; struct kfd_process *process; struct kfd_dev *device; @@ -501,6 +534,9 @@ struct qcm_process_device { /* IB memory */ uint64_t ib_base; void *ib_kaddr; + + /* doorbell resources per process per device */ + unsigned long *doorbell_bitmap; }; /* KFD Memory Eviction */ @@ -512,6 +548,8 @@ struct qcm_process_device { /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10 +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); @@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); +int kfd_process_evict_queues(struct kfd_process *p); +int kfd_process_restore_queues(struct kfd_process *p); void kfd_suspend_all_processes(void); int kfd_resume_all_processes(void); @@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); /* KFD process API for creating and translating handles */ @@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void); void kfd_pasid_free(unsigned int pasid); /* Doorbells */ +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); int kfd_doorbell_init(struct kfd_dev *kfd); void kfd_doorbell_fini(struct kfd_dev *kfd); -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma); +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); u32 read_kernel_doorbell(u32 __iomem *db); -void write_kernel_doorbell(u32 __iomem *db, u32 value); -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell(void __iomem *db, u32 value); +void write_kernel_doorbell64(void __iomem *db, u64 value); +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id); + unsigned int doorbell_id); phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process); int kfd_alloc_process_doorbells(struct kfd_process *process); @@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -832,8 +877,42 @@ struct packet_manager { bool allocated; struct kfd_mem_obj *ib_buffer_obj; unsigned int ib_size_bytes; + + const struct packet_manager_funcs *pmf; +}; + +struct packet_manager_funcs { + /* Support ASIC-specific packet formats for PM4 packets */ + int (*map_process)(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd); + int (*runlist)(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain); + int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); + int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static); + int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter mode, + uint32_t filter_param, bool reset, + unsigned int sdma_engine); + int (*query_status)(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value); + int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); + + /* Packet sizes */ + int map_process_size; + int runlist_size; + int set_resources_size; + int map_queues_size; + int unmap_queues_size; + int query_status_size; + int release_mem_size; }; +extern const struct packet_manager_funcs kfd_vi_pm_funcs; +extern const struct packet_manager_funcs kfd_v9_pm_funcs; + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); void pm_uninit(struct packet_manager *pm); int pm_send_set_resources(struct packet_manager *pm, @@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); +/* Following PM funcs can be shared among VI and AI */ +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ extern const struct kfd_event_interrupt_class event_interrupt_class_cik; +extern const struct kfd_event_interrupt_class event_interrupt_class_v9; + extern const struct kfd_device_global_init_class device_global_init_class_cik; void kfd_event_init_process(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1711ad0642f7..1d80b4f7c681 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); + kfree(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); kfree(pdd); @@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; - offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) + << PAGE_SHIFT; qpd->tba_addr = (int64_t)vm_mmap(filep, 0, KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, offset); @@ -585,6 +587,31 @@ err_alloc_process: return ERR_PTR(err); } +static int init_doorbell_bitmap(struct qcm_process_device *qpd, + struct kfd_dev *dev) +{ + unsigned int i; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) + return 0; + + qpd->doorbell_bitmap = + kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + BITS_PER_BYTE), GFP_KERNEL); + if (!qpd->doorbell_bitmap) + return -ENOMEM; + + /* Mask out any reserved doorbells */ + for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++) + if ((dev->shared_resources.reserved_doorbell_mask & i) == + dev->shared_resources.reserved_doorbell_val) { + set_bit(i, qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x\n", i); + } + + return 0; +} + struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p) { @@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; + if (init_doorbell_bitmap(&pdd->qpd, dev)) { + pr_err("Failed to init doorbell for process\n"); + kfree(pdd); + return NULL; + } + pdd->dev = dev; INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); @@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -static int process_evict_queues(struct kfd_process *p) +int kfd_process_evict_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r = 0; @@ -844,7 +877,7 @@ fail: } /* process_restore_queues - Restore all user queues of a process */ -static int process_restore_queues(struct kfd_process *p) +int kfd_process_restore_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r, ret = 0; @@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work) flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid %d\n", p->pasid); - ret = process_evict_queues(p); + ret = kfd_process_evict_queues(p); if (!ret) { dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work) return; } - ret = process_restore_queues(p); + ret = kfd_process_restore_queues(p); if (!ret) pr_debug("Finished restoring pasid %d\n", p->pasid); else @@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - if (process_evict_queues(p)) + if (kfd_process_evict_queues(p)) pr_err("Failed to suspend process %d\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void) return ret; } -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma) { - struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); struct kfd_process_device *pdd; struct qcm_process_device *qpd; - if (!dev) - return -EINVAL; if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { pr_err("Incorrect CWSR mapping size.\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7817e327ea6d..d65ce0436b31 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; - q_properties->doorbell_off = - kfd_queue_id_to_doorbell(dev, pqm->process, qid); - /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; @@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("DQM create queue failed\n"); + pr_err("Pasid %d DQM create queue %d failed. ret %d\n", + pqm->process->pasid, type, retval); goto err_create_queue; } + if (q) + /* Return the doorbell offset within the doorbell page + * to the caller so it can be passed up to user mode + * (in bytes). + */ + properties->doorbell_off = + (q->properties.doorbell_off * sizeof(uint32_t)) & + (kfd_doorbell_process_slice(dev) - 1); + pr_debug("PQM After DQM create queue\n"); list_add(&pqn->process_queue_list, &pqm->queues); @@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { - pr_debug("Destroy queue failed, returned %d\n", retval); - goto err_destroy_queue; + pr_err("Pasid %d destroy queue %d failed, ret %d\n", + pqm->process->pasid, + pqn->q->properties.queue_id, retval); + if (retval != -ETIME) + goto err_destroy_queue; } uninit_queue(pqn->q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a5315d4f1c95..6dcd621e5b71 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q) pr_debug("Queue Address: 0x%llX\n", q->queue_address); pr_debug("Queue Id: %u\n", q->queue_id); pr_debug("Queue Process Vmid: %u\n", q->vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off); } @@ -53,8 +53,8 @@ void print_queue(struct queue *q) pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address); pr_debug("Queue Id: %u\n", q->properties.queue_id); pr_debug("Queue Process Vmid: %u\n", q->properties.vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off); pr_debug("Queue MQD Address: 0x%p\n", q->mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ac28abc94e57..bc95d4dfee2e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; default: WARN(1, "Unexpected ASIC family %u", dev->gpu->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index eb54cfcaf039..7d9c3f948dff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -45,6 +45,7 @@ #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 struct kfd_node_properties { diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h new file mode 100644 index 000000000000..0bc0b25cb410 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -0,0 +1,47 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef HSA_SOC15_INT_H_INCLUDED +#define HSA_SOC15_INT_H_INCLUDED + +#include "soc15_ih_clientid.h" + +#define SOC15_INTSRC_CP_END_OF_PIPE 181 +#define SOC15_INTSRC_CP_BAD_OPCODE 183 +#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 +#define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_SDMA_TRAP 224 + + +#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff) +#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff) +#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff) +#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf) +#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1) +#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff) +#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4])) +#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5])) +#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6])) +#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7])) + +#endif + diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 5b124a67404c..d5d4586e6176 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -9,14 +9,6 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. -config DRM_AMD_DC_PRE_VEGA - bool "DC support for Polaris and older ASICs" - default y - help - Choose this option to enable the new DC support for older asics - by default. This includes Polaris, Carrizo, Tonga, Bonaire, - and Hawaii. - config DRM_AMD_DC_FBC bool "AMD FBC - Enable Frame Buffer Compression" depends on DRM_AMD_DC diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3263031db44b..92eb181b63e9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -46,6 +46,7 @@ #include <linux/moduleparam.h> #include <linux/version.h> #include <linux/types.h> +#include <linux/pm_runtime.h> #include <drm/drmP.h> #include <drm/drm_atomic.h> @@ -433,11 +434,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.dce_environment = DCE_ENV_PRODUCTION_DRV; - if (amdgpu_dc_log) - init_data.log_mask = DC_DEFAULT_LOG_MASK; - else - init_data.log_mask = DC_MIN_LOG_MASK; - /* * TODO debug why this doesn't work on Raven */ @@ -649,18 +645,6 @@ amdgpu_dm_find_first_crtc_matching_connector(struct drm_atomic_state *state, static int dm_resume(void *handle) { struct amdgpu_device *adev = handle; - struct amdgpu_display_manager *dm = &adev->dm; - int ret = 0; - - /* power on hardware */ - dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); - - ret = amdgpu_dm_display_resume(adev); - return ret; -} - -int amdgpu_dm_display_resume(struct amdgpu_device *adev) -{ struct drm_device *ddev = adev->ddev; struct amdgpu_display_manager *dm = &adev->dm; struct amdgpu_dm_connector *aconnector; @@ -671,10 +655,12 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) struct drm_plane *plane; struct drm_plane_state *new_plane_state; struct dm_plane_state *dm_new_plane_state; - - int ret = 0; + int ret; int i; + /* power on hardware */ + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); + /* program HPD filter */ dc_resume(dm->dc); @@ -688,8 +674,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) amdgpu_dm_irq_resume_early(adev); /* Do detection*/ - list_for_each_entry(connector, - &ddev->mode_config.connector_list, head) { + list_for_each_entry(connector, &ddev->mode_config.connector_list, head) { aconnector = to_amdgpu_dm_connector(connector); /* @@ -711,7 +696,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) } /* Force mode set in atomic comit */ - for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i) + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) new_crtc_state->active_changed = true; /* @@ -719,7 +704,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) * them here, since they were duplicated as part of the suspend * procedure. */ - for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i) { + for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) { dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); if (dm_new_crtc_state->stream) { WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1); @@ -728,7 +713,7 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) } } - for_each_new_plane_in_state(adev->dm.cached_state, plane, new_plane_state, i) { + for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) { dm_new_plane_state = to_dm_plane_state(new_plane_state); if (dm_new_plane_state->dc_state) { WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1); @@ -737,9 +722,9 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev) } } - ret = drm_atomic_helper_resume(ddev, adev->dm.cached_state); + ret = drm_atomic_helper_resume(ddev, dm->cached_state); - adev->dm.cached_state = NULL; + dm->cached_state = NULL; amdgpu_dm_irq_resume_late(adev); @@ -927,6 +912,7 @@ amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector) drm_mode_connector_update_edid_property(connector, NULL); aconnector->num_modes = 0; aconnector->dc_sink = NULL; + aconnector->edid = NULL; } mutex_unlock(&dev->mode_config.mutex); @@ -1131,6 +1117,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA10 || adev->asic_type == CHIP_VEGA12 || + adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_RAVEN) client_id = SOC15_IH_CLIENTID_DCE; @@ -1529,8 +1516,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) case CHIP_POLARIS11: case CHIP_POLARIS10: case CHIP_POLARIS12: + case CHIP_VEGAM: case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: if (dce110_register_irq_handlers(dm->adev)) { DRM_ERROR("DM: Failed to initialize IRQ\n"); goto fail; @@ -1549,7 +1538,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) break; #endif default: - DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type); + DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); goto fail; } @@ -1657,7 +1646,6 @@ static ssize_t s3_debug_store(struct device *device, if (ret == 0) { if (s3_state) { dm_resume(adev); - amdgpu_dm_display_resume(adev); drm_kms_helper_hotplug_event(adev->ddev); } else dm_suspend(adev); @@ -1722,6 +1710,7 @@ static int dm_early_init(void *handle) adev->mode_info.plane_type = dm_plane_type_default; break; case CHIP_POLARIS10: + case CHIP_VEGAM: adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; @@ -1729,6 +1718,7 @@ static int dm_early_init(void *handle) break; case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; @@ -1743,7 +1733,7 @@ static int dm_early_init(void *handle) break; #endif default: - DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type); + DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); return -EINVAL; } @@ -1848,7 +1838,7 @@ static bool fill_rects_from_plane_state(const struct drm_plane_state *state, static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, uint64_t *tiling_flags) { - struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->obj); + struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]); int r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { @@ -1977,6 +1967,7 @@ static int fill_plane_attributes_from_fb(struct amdgpu_device *adev, if (adev->asic_type == CHIP_VEGA10 || adev->asic_type == CHIP_VEGA12 || + adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_RAVEN) { /* Fill GFX9 params */ plane_state->tiling_info.gfx9.num_pipes = @@ -2017,7 +2008,6 @@ static int fill_plane_attributes(struct amdgpu_device *adev, const struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(plane_state->fb); const struct drm_crtc *crtc = plane_state->crtc; - struct dc_transfer_func *input_tf; int ret = 0; if (!fill_rects_from_plane_state(plane_state, dc_plane_state)) @@ -2031,13 +2021,6 @@ static int fill_plane_attributes(struct amdgpu_device *adev, if (ret) return ret; - input_tf = dc_create_transfer_func(); - - if (input_tf == NULL) - return -ENOMEM; - - dc_plane_state->in_transfer_func = input_tf; - /* * Always set input transfer function, since plane state is refreshed * every time. @@ -2113,12 +2096,6 @@ convert_color_depth_from_display_info(const struct drm_connector *connector) { uint32_t bpc = connector->display_info.bpc; - /* Limited color depth to 8bit - * TODO: Still need to handle deep color - */ - if (bpc > 8) - bpc = 8; - switch (bpc) { case 0: /* Temporary Work around, DRM don't parse color depth for @@ -2206,7 +2183,6 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, const struct drm_connector *connector) { struct dc_crtc_timing *timing_out = &stream->timing; - struct dc_transfer_func *tf = dc_create_transfer_func(); memset(timing_out, 0, sizeof(struct dc_crtc_timing)); @@ -2250,9 +2226,8 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, stream->output_color_space = get_output_color_space(timing_out); - tf->type = TF_TYPE_PREDEFINED; - tf->tf = TRANSFER_FUNCTION_SRGB; - stream->out_transfer_func = tf; + stream->out_transfer_func->type = TF_TYPE_PREDEFINED; + stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; } static void fill_audio_info(struct audio_info *audio_info, @@ -2336,27 +2311,22 @@ decide_crtc_timing_for_drm_display_mode(struct drm_display_mode *drm_mode, } } -static int create_fake_sink(struct amdgpu_dm_connector *aconnector) +static struct dc_sink * +create_fake_sink(struct amdgpu_dm_connector *aconnector) { - struct dc_sink *sink = NULL; struct dc_sink_init_data sink_init_data = { 0 }; - + struct dc_sink *sink = NULL; sink_init_data.link = aconnector->dc_link; sink_init_data.sink_signal = aconnector->dc_link->connector_signal; sink = dc_sink_create(&sink_init_data); if (!sink) { DRM_ERROR("Failed to create sink!\n"); - return -ENOMEM; + return NULL; } - sink->sink_signal = SIGNAL_TYPE_VIRTUAL; - aconnector->fake_enable = true; - - aconnector->dc_sink = sink; - aconnector->dc_link->local_sink = sink; - return 0; + return sink; } static void set_multisync_trigger_params( @@ -2419,7 +2389,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; - + struct dc_sink *sink = NULL; if (aconnector == NULL) { DRM_ERROR("aconnector is NULL!\n"); return stream; @@ -2437,15 +2407,18 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, return stream; } - if (create_fake_sink(aconnector)) + sink = create_fake_sink(aconnector); + if (!sink) return stream; + } else { + sink = aconnector->dc_sink; } - stream = dc_create_stream_for_sink(aconnector->dc_sink); + stream = dc_create_stream_for_sink(sink); if (stream == NULL) { DRM_ERROR("Failed to create stream for sink!\n"); - return stream; + goto finish; } list_for_each_entry(preferred_mode, &aconnector->base.modes, head) { @@ -2484,10 +2457,16 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, fill_audio_info( &stream->audio_info, drm_connector, - aconnector->dc_sink); + sink); update_stream_signal(stream); + if (dm_state && dm_state->freesync_capable) + stream->ignore_msa_timing_param = true; +finish: + if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL) + dc_sink_release(sink); + return stream; } @@ -2710,18 +2689,15 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) const struct dc_link *link = aconnector->dc_link; struct amdgpu_device *adev = connector->dev->dev_private; struct amdgpu_display_manager *dm = &adev->dm; + #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) && - link->type != dc_connection_none) { - amdgpu_dm_register_backlight_device(dm); - - if (dm->backlight_dev) { - backlight_device_unregister(dm->backlight_dev); - dm->backlight_dev = NULL; - } - + link->type != dc_connection_none && + dm->backlight_dev) { + backlight_device_unregister(dm->backlight_dev); + dm->backlight_dev = NULL; } #endif drm_connector_unregister(connector); @@ -2734,6 +2710,9 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) struct dm_connector_state *state = to_dm_connector_state(connector->state); + if (connector->state) + __drm_atomic_helper_connector_destroy_state(connector->state); + kfree(state); state = kzalloc(sizeof(*state), GFP_KERNEL); @@ -2744,8 +2723,7 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->underscan_hborder = 0; state->underscan_vborder = 0; - connector->state = &state->base; - connector->state->connector = connector; + __drm_atomic_helper_connector_reset(connector, &state->base); } } @@ -2855,7 +2833,7 @@ static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector) create_eml_sink(aconnector); } -int amdgpu_dm_connector_mode_valid(struct drm_connector *connector, +enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int result = MODE_ERROR; @@ -3058,8 +3036,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, } afb = to_amdgpu_framebuffer(new_state->fb); - - obj = afb->obj; + obj = new_state->fb->obj[0]; rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); r = amdgpu_bo_reserve(rbo, false); @@ -3067,12 +3044,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, return r; if (plane->type != DRM_PLANE_TYPE_CURSOR) - domain = amdgpu_display_framebuffer_domains(adev); + domain = amdgpu_display_supported_domains(adev); else domain = AMDGPU_GEM_DOMAIN_VRAM; r = amdgpu_bo_pin(rbo, domain, &afb->address); - amdgpu_bo_unreserve(rbo); if (unlikely(r != 0)) { @@ -3105,17 +3081,6 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, } } - /* It's a hack for s3 since in 4.9 kernel filter out cursor buffer - * prepare and cleanup in drm_atomic_helper_prepare_planes - * and drm_atomic_helper_cleanup_planes because fb doens't in s3. - * IN 4.10 kernel this code should be removed and amdgpu_device_suspend - * code touching fram buffers should be avoided for DC. - */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) { - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(new_state->crtc); - - acrtc->cursor_bo = obj; - } return 0; } @@ -3123,14 +3088,12 @@ static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { struct amdgpu_bo *rbo; - struct amdgpu_framebuffer *afb; int r; if (!old_state->fb) return; - afb = to_amdgpu_framebuffer(old_state->fb); - rbo = gem_to_amdgpu_bo(afb->obj); + rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); r = amdgpu_bo_reserve(rbo, false); if (unlikely(r)) { DRM_ERROR("failed to reserve rbo before unpin\n"); @@ -3773,7 +3736,7 @@ static void remove_stream(struct amdgpu_device *adev, static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, struct dc_cursor_position *position) { - struct amdgpu_crtc *amdgpu_crtc = amdgpu_crtc = to_amdgpu_crtc(crtc); + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int x, y; int xorigin = 0, yorigin = 0; @@ -3905,7 +3868,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, int r, vpos, hpos; struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb); - struct amdgpu_bo *abo = gem_to_amdgpu_bo(afb->obj); + struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]); struct amdgpu_device *adev = crtc->dev->dev_private; bool async_flip = (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; struct dc_flip_addrs addr = { {0} }; @@ -3984,6 +3947,96 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } +/* + * TODO this whole function needs to go + * + * dc_surface_update is needlessly complex. See if we can just replace this + * with a dc_plane_state and follow the atomic model a bit more closely here. + */ +static bool commit_planes_to_stream( + struct dc *dc, + struct dc_plane_state **plane_states, + uint8_t new_plane_count, + struct dm_crtc_state *dm_new_crtc_state, + struct dm_crtc_state *dm_old_crtc_state, + struct dc_state *state) +{ + /* no need to dynamically allocate this. it's pretty small */ + struct dc_surface_update updates[MAX_SURFACES]; + struct dc_flip_addrs *flip_addr; + struct dc_plane_info *plane_info; + struct dc_scaling_info *scaling_info; + int i; + struct dc_stream_state *dc_stream = dm_new_crtc_state->stream; + struct dc_stream_update *stream_update = + kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL); + + if (!stream_update) { + BREAK_TO_DEBUGGER(); + return false; + } + + flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs), + GFP_KERNEL); + plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info), + GFP_KERNEL); + scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info), + GFP_KERNEL); + + if (!flip_addr || !plane_info || !scaling_info) { + kfree(flip_addr); + kfree(plane_info); + kfree(scaling_info); + kfree(stream_update); + return false; + } + + memset(updates, 0, sizeof(updates)); + + stream_update->src = dc_stream->src; + stream_update->dst = dc_stream->dst; + stream_update->out_transfer_func = dc_stream->out_transfer_func; + + for (i = 0; i < new_plane_count; i++) { + updates[i].surface = plane_states[i]; + updates[i].gamma = + (struct dc_gamma *)plane_states[i]->gamma_correction; + updates[i].in_transfer_func = plane_states[i]->in_transfer_func; + flip_addr[i].address = plane_states[i]->address; + flip_addr[i].flip_immediate = plane_states[i]->flip_immediate; + plane_info[i].color_space = plane_states[i]->color_space; + plane_info[i].format = plane_states[i]->format; + plane_info[i].plane_size = plane_states[i]->plane_size; + plane_info[i].rotation = plane_states[i]->rotation; + plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror; + plane_info[i].stereo_format = plane_states[i]->stereo_format; + plane_info[i].tiling_info = plane_states[i]->tiling_info; + plane_info[i].visible = plane_states[i]->visible; + plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha; + plane_info[i].dcc = plane_states[i]->dcc; + scaling_info[i].scaling_quality = plane_states[i]->scaling_quality; + scaling_info[i].src_rect = plane_states[i]->src_rect; + scaling_info[i].dst_rect = plane_states[i]->dst_rect; + scaling_info[i].clip_rect = plane_states[i]->clip_rect; + + updates[i].flip_addr = &flip_addr[i]; + updates[i].plane_info = &plane_info[i]; + updates[i].scaling_info = &scaling_info[i]; + } + + dc_commit_updates_for_stream( + dc, + updates, + new_plane_count, + dc_stream, stream_update, plane_states, state); + + kfree(flip_addr); + kfree(plane_info); + kfree(scaling_info); + kfree(stream_update); + return true; +} + static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_device *dev, struct amdgpu_display_manager *dm, @@ -3999,6 +4052,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_crtc_state *new_pcrtc_state = drm_atomic_get_new_crtc_state(state, pcrtc); struct dm_crtc_state *acrtc_state = to_dm_crtc_state(new_pcrtc_state); + struct dm_crtc_state *dm_old_crtc_state = + to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc)); struct dm_atomic_state *dm_state = to_dm_atomic_state(state); int planes_count = 0; unsigned long flags; @@ -4035,7 +4090,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, } spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - if (!pflip_needed) { + if (!pflip_needed || plane->type == DRM_PLANE_TYPE_OVERLAY) { WARN_ON(!dm_new_plane_state->dc_state); plane_states_constructed[planes_count] = dm_new_plane_state->dc_state; @@ -4077,10 +4132,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } - if (false == dc_commit_planes_to_stream(dm->dc, + + if (false == commit_planes_to_stream(dm->dc, plane_states_constructed, planes_count, - dc_stream_attach, + acrtc_state, + dm_old_crtc_state, dm_state->context)) dm_error("%s: Failed to attach plane!\n", __func__); } else { @@ -4209,6 +4266,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (dm_old_crtc_state->stream) remove_stream(adev, acrtc, dm_old_crtc_state->stream); + pm_runtime_get_noresume(dev->dev); + acrtc->enabled = true; acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; @@ -4305,8 +4364,10 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); struct dc_stream_status *status = NULL; - if (acrtc) + if (acrtc) { new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); + old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base); + } /* Skip any modesets/resets */ if (!acrtc || drm_atomic_crtc_needs_modeset(new_crtc_state)) @@ -4329,11 +4390,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) WARN_ON(!status->plane_count); /*TODO How it works with MPO ?*/ - if (!dc_commit_planes_to_stream( + if (!commit_planes_to_stream( dm->dc, status->plane_states, status->plane_count, - dm_new_crtc_state->stream, + dm_new_crtc_state, + to_dm_crtc_state(old_crtc_state), dm_state->context)) dm_error("%s: Failed to update stream scaling!\n", __func__); } @@ -4394,6 +4456,16 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_wait_for_flip_done(dev, state); drm_atomic_helper_cleanup_planes(dev, state); + + /* Finally, drop a runtime PM reference for each newly disabled CRTC, + * so we can put the GPU into runtime suspend if we're not driving any + * displays anymore + */ + pm_runtime_mark_last_busy(dev->dev); + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (old_crtc_state->active && !new_crtc_state->active) + pm_runtime_put_autosuspend(dev->dev); + } } @@ -4576,7 +4648,7 @@ static int dm_update_crtcs_state(struct dc *dc, if (aconnector && enable) { // Make sure fake sink is created in plug-in scenario new_con_state = drm_atomic_get_connector_state(state, - &aconnector->base); + &aconnector->base); if (IS_ERR(new_con_state)) { ret = PTR_ERR_OR_ZERO(new_con_state); @@ -4693,15 +4765,16 @@ next_crtc: * We want to do dc stream updates that do not require a * full modeset below. */ - if (!enable || !aconnector || modereset_required(new_crtc_state)) + if (!(enable && aconnector && new_crtc_state->enable && + new_crtc_state->active)) continue; /* * Given above conditions, the dc state cannot be NULL because: - * 1. We're attempting to enable a CRTC. Which has a... - * 2. Valid connector attached, and - * 3. User does not want to reset it (disable or mark inactive, - * which can happen on a CRTC that's already disabled). - * => It currently exists. + * 1. We're in the process of enabling CRTCs (just been added + * to the dc context, or already is on the context) + * 2. Has a valid connector attached, and + * 3. Is currently active and enabled. + * => The dc stream state currently exists. */ BUG_ON(dm_new_crtc_state->stream == NULL); @@ -4753,7 +4826,8 @@ static int dm_update_planes_state(struct dc *dc, /* Remove any changed/removed planes */ if (!enable) { - if (pflip_needed) + if (pflip_needed && + plane->type != DRM_PLANE_TYPE_OVERLAY) continue; if (!old_plane_crtc) @@ -4800,7 +4874,8 @@ static int dm_update_planes_state(struct dc *dc, if (!dm_new_crtc_state->stream) continue; - if (pflip_needed) + if (pflip_needed && + plane->type != DRM_PLANE_TYPE_OVERLAY) continue; WARN_ON(dm_new_plane_state->dc_state); @@ -5007,17 +5082,24 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector, struct edid *edid) { int i; - uint64_t val_capable; bool edid_check_required; struct detailed_timing *timing; struct detailed_non_pixel *data; struct detailed_data_monitor_range *range; struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); + struct dm_connector_state *dm_con_state; struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + if (!connector->state) { + DRM_ERROR("%s - Connector has no state", __func__); + return; + } + + dm_con_state = to_dm_connector_state(connector->state); + edid_check_required = false; if (!amdgpu_dm_connector->dc_sink) { DRM_ERROR("dc_sink NULL, could not add free_sync module.\n"); @@ -5036,7 +5118,7 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector, amdgpu_dm_connector); } } - val_capable = 0; + dm_con_state->freesync_capable = false; if (edid_check_required == true && (edid->version > 1 || (edid->version == 1 && edid->revision > 1))) { for (i = 0; i < 4; i++) { @@ -5072,7 +5154,7 @@ void amdgpu_dm_add_sink_to_freesync_module(struct drm_connector *connector, amdgpu_dm_connector->min_vfreq * 1000000; amdgpu_dm_connector->caps.max_refresh_in_micro_hz = amdgpu_dm_connector->max_vfreq * 1000000; - val_capable = 1; + dm_con_state->freesync_capable = true; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b68400c1154b..d5aa89ad5571 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -28,7 +28,6 @@ #include <drm/drmP.h> #include <drm/drm_atomic.h> -#include "dc.h" /* * This file contains the definition for amdgpu_display_manager @@ -53,6 +52,7 @@ struct amdgpu_device; struct drm_device; struct amdgpu_dm_irq_handler_data; +struct dc; struct amdgpu_dm_prev_state { struct drm_framebuffer *fb; @@ -220,6 +220,7 @@ struct dm_connector_state { uint8_t underscan_hborder; bool underscan_enable; struct mod_freesync_user_enable user_enable; + bool freesync_capable; }; #define to_dm_connector_state(x)\ @@ -246,7 +247,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, struct dc_link *link, int link_index); -int amdgpu_dm_connector_mode_valid(struct drm_connector *connector, +enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode); void dm_restore_drm_connector_state(struct drm_device *dev, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 25f064c01038..b329393307e5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -25,6 +25,7 @@ #include "amdgpu_mode.h" #include "amdgpu_dm.h" +#include "dc.h" #include "modules/color/color_gamma.h" #define MAX_DRM_LUT_VALUE 0xFFFF @@ -87,9 +88,9 @@ static void __drm_lut_to_dc_gamma(struct drm_color_lut *lut, g = drm_color_lut_extract(lut[i].green, 16); b = drm_color_lut_extract(lut[i].blue, 16); - gamma->entries.red[i] = dal_fixed31_32_from_int(r); - gamma->entries.green[i] = dal_fixed31_32_from_int(g); - gamma->entries.blue[i] = dal_fixed31_32_from_int(b); + gamma->entries.red[i] = dc_fixpt_from_int(r); + gamma->entries.green[i] = dc_fixpt_from_int(g); + gamma->entries.blue[i] = dc_fixpt_from_int(b); } return; } @@ -100,9 +101,9 @@ static void __drm_lut_to_dc_gamma(struct drm_color_lut *lut, g = drm_color_lut_extract(lut[i].green, 16); b = drm_color_lut_extract(lut[i].blue, 16); - gamma->entries.red[i] = dal_fixed31_32_from_fraction(r, MAX_DRM_LUT_VALUE); - gamma->entries.green[i] = dal_fixed31_32_from_fraction(g, MAX_DRM_LUT_VALUE); - gamma->entries.blue[i] = dal_fixed31_32_from_fraction(b, MAX_DRM_LUT_VALUE); + gamma->entries.red[i] = dc_fixpt_from_fraction(r, MAX_DRM_LUT_VALUE); + gamma->entries.green[i] = dc_fixpt_from_fraction(g, MAX_DRM_LUT_VALUE); + gamma->entries.blue[i] = dc_fixpt_from_fraction(b, MAX_DRM_LUT_VALUE); } } @@ -207,7 +208,7 @@ void amdgpu_dm_set_ctm(struct dm_crtc_state *crtc) for (i = 0; i < 12; i++) { /* Skip 4th element */ if (i % 4 == 3) { - stream->gamut_remap_matrix.matrix[i] = dal_fixed31_32_zero; + stream->gamut_remap_matrix.matrix[i] = dc_fixpt_zero; continue; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index ca0b08bfa2cf..bd449351803f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -330,11 +330,6 @@ bool dm_helpers_dp_mst_send_payload_allocation( return true; } -bool dm_helpers_dc_conn_log(struct dc_context *ctx, struct log_entry *entry, enum dc_log_type event) -{ - return true; -} - void dm_dtn_log_begin(struct dc_context *ctx) {} diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 4be21bf54749..a910f01838ab 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -555,6 +555,9 @@ static inline int dm_irq_state(struct amdgpu_device *adev, return 0; } + if (acrtc->otg_inst == -1) + return 0; + irq_source = dal_irq_type + acrtc->otg_inst; st = (state == AMDGPU_IRQ_STATE_ENABLE); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index ace9ad578ca0..4304d9e408b8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -83,21 +83,22 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum i2c_mot_mode mot = (msg->request & DP_AUX_I2C_MOT) ? I2C_MOT_TRUE : I2C_MOT_FALSE; enum ddc_result res; - ssize_t read_bytes; + uint32_t read_bytes = msg->size; if (WARN_ON(msg->size > 16)) return -E2BIG; switch (msg->request & ~DP_AUX_I2C_MOT) { case DP_AUX_NATIVE_READ: - read_bytes = dal_ddc_service_read_dpcd_data( + res = dal_ddc_service_read_dpcd_data( TO_DM_AUX(aux)->ddc_service, false, I2C_MOT_UNDEF, msg->address, msg->buffer, - msg->size); - return read_bytes; + msg->size, + &read_bytes); + break; case DP_AUX_NATIVE_WRITE: res = dal_ddc_service_write_dpcd_data( TO_DM_AUX(aux)->ddc_service, @@ -108,14 +109,15 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, msg->size); break; case DP_AUX_I2C_READ: - read_bytes = dal_ddc_service_read_dpcd_data( + res = dal_ddc_service_read_dpcd_data( TO_DM_AUX(aux)->ddc_service, true, mot, msg->address, msg->buffer, - msg->size); - return read_bytes; + msg->size, + &read_bytes); + break; case DP_AUX_I2C_WRITE: res = dal_ddc_service_write_dpcd_data( TO_DM_AUX(aux)->ddc_service, @@ -137,7 +139,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, r == DDC_RESULT_SUCESSFULL); #endif - return msg->size; + if (res != DDC_RESULT_SUCESSFULL) + return -EIO; + return read_bytes; } static enum drm_connector_status diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 89342b48be6b..5a3346124a01 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -37,8 +37,17 @@ unsigned long long dm_get_timestamp(struct dc_context *ctx) { - /* TODO: return actual timestamp */ - return 0; + struct timespec64 time; + + getrawmonotonic64(&time); + return timespec64_to_ns(&time); +} + +unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, + unsigned long long current_time_stamp, + unsigned long long last_time_stamp) +{ + return current_time_stamp - last_time_stamp; } void dm_perf_trace_timestamp(const char *func_name, unsigned int line) @@ -225,6 +234,33 @@ static void pp_to_dc_clock_levels( } } +static void pp_to_dc_clock_levels_with_latency( + const struct pp_clock_levels_with_latency *pp_clks, + struct dm_pp_clock_levels_with_latency *clk_level_info, + enum dm_pp_clock_type dc_clk_type) +{ + uint32_t i; + + if (pp_clks->num_levels > DM_PP_MAX_CLOCK_LEVELS) { + DRM_INFO("DM_PPLIB: Warning: %s clock: number of levels %d exceeds maximum of %d!\n", + DC_DECODE_PP_CLOCK_TYPE(dc_clk_type), + pp_clks->num_levels, + DM_PP_MAX_CLOCK_LEVELS); + + clk_level_info->num_levels = DM_PP_MAX_CLOCK_LEVELS; + } else + clk_level_info->num_levels = pp_clks->num_levels; + + DRM_DEBUG("DM_PPLIB: values for %s clock\n", + DC_DECODE_PP_CLOCK_TYPE(dc_clk_type)); + + for (i = 0; i < clk_level_info->num_levels; i++) { + DRM_DEBUG("DM_PPLIB:\t %d\n", pp_clks->data[i].clocks_in_khz); + clk_level_info->data[i].clocks_in_khz = pp_clks->data[i].clocks_in_khz; + clk_level_info->data[i].latency_in_us = pp_clks->data[i].latency_in_us; + } +} + bool dm_pp_get_clock_levels_by_type( const struct dc_context *ctx, enum dm_pp_clock_type clk_type, @@ -302,8 +338,22 @@ bool dm_pp_get_clock_levels_by_type_with_latency( enum dm_pp_clock_type clk_type, struct dm_pp_clock_levels_with_latency *clk_level_info) { - /* TODO: to be implemented */ - return false; + struct amdgpu_device *adev = ctx->driver_context; + void *pp_handle = adev->powerplay.pp_handle; + struct pp_clock_levels_with_latency pp_clks = { 0 }; + const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; + + if (!pp_funcs || !pp_funcs->get_clock_by_type_with_latency) + return false; + + if (pp_funcs->get_clock_by_type_with_latency(pp_handle, + dc_to_pp_clock_type(clk_type), + &pp_clks)) + return false; + + pp_to_dc_clock_levels_with_latency(&pp_clks, clk_level_info, clk_type); + + return true; } bool dm_pp_get_clock_levels_by_type_with_voltage( diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile index bca33bd9a0d2..b49ea96b5dae 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/Makefile +++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile @@ -24,7 +24,7 @@ # It provides the general basic services required by other DAL # subcomponents. -BASICS = conversion.o fixpt31_32.o fixpt32_32.o \ +BASICS = conversion.o fixpt31_32.o \ logger.o log_helpers.o vector.o AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS)) diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c index 310964915a83..50b47f11875c 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c @@ -41,22 +41,22 @@ uint16_t fixed_point_to_int_frac( uint16_t result; - uint16_t d = (uint16_t)dal_fixed31_32_floor( - dal_fixed31_32_abs( + uint16_t d = (uint16_t)dc_fixpt_floor( + dc_fixpt_abs( arg)); if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor)) - numerator = (uint16_t)dal_fixed31_32_round( - dal_fixed31_32_mul_int( + numerator = (uint16_t)dc_fixpt_round( + dc_fixpt_mul_int( arg, divisor)); else { - numerator = dal_fixed31_32_floor( - dal_fixed31_32_sub( - dal_fixed31_32_from_int( + numerator = dc_fixpt_floor( + dc_fixpt_sub( + dc_fixpt_from_int( 1LL << integer_bits), - dal_fixed31_32_recip( - dal_fixed31_32_from_int( + dc_fixpt_recip( + dc_fixpt_from_int( divisor)))); } @@ -66,8 +66,8 @@ uint16_t fixed_point_to_int_frac( result = (uint16_t)( (1 << (integer_bits + fractional_bits + 1)) + numerator); - if ((result != 0) && dal_fixed31_32_lt( - arg, dal_fixed31_32_zero)) + if ((result != 0) && dc_fixpt_lt( + arg, dc_fixpt_zero)) result |= 1 << (integer_bits + fractional_bits); return result; @@ -84,15 +84,15 @@ void convert_float_matrix( uint32_t buffer_size) { const struct fixed31_32 min_2_13 = - dal_fixed31_32_from_fraction(S2D13_MIN, DIVIDER); + dc_fixpt_from_fraction(S2D13_MIN, DIVIDER); const struct fixed31_32 max_2_13 = - dal_fixed31_32_from_fraction(S2D13_MAX, DIVIDER); + dc_fixpt_from_fraction(S2D13_MAX, DIVIDER); uint32_t i; for (i = 0; i < buffer_size; ++i) { uint32_t reg_value = fixed_point_to_int_frac( - dal_fixed31_32_clamp( + dc_fixpt_clamp( flt[i], min_2_13, max_2_13), diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 8a9bba879207..f28989860fd8 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -26,13 +26,13 @@ #include "dm_services.h" #include "include/fixed31_32.h" -static inline uint64_t abs_i64( - int64_t arg) +static inline unsigned long long abs_i64( + long long arg) { if (arg > 0) - return (uint64_t)arg; + return (unsigned long long)arg; else - return (uint64_t)(-arg); + return (unsigned long long)(-arg); } /* @@ -40,12 +40,12 @@ static inline uint64_t abs_i64( * result = dividend / divisor * *remainder = dividend % divisor */ -static inline uint64_t complete_integer_division_u64( - uint64_t dividend, - uint64_t divisor, - uint64_t *remainder) +static inline unsigned long long complete_integer_division_u64( + unsigned long long dividend, + unsigned long long divisor, + unsigned long long *remainder) { - uint64_t result; + unsigned long long result; ASSERT(divisor); @@ -64,30 +64,28 @@ static inline uint64_t complete_integer_division_u64( #define GET_FRACTIONAL_PART(x) \ (FRACTIONAL_PART_MASK & (x)) -struct fixed31_32 dal_fixed31_32_from_fraction( - int64_t numerator, - int64_t denominator) +struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) { struct fixed31_32 res; bool arg1_negative = numerator < 0; bool arg2_negative = denominator < 0; - uint64_t arg1_value = arg1_negative ? -numerator : numerator; - uint64_t arg2_value = arg2_negative ? -denominator : denominator; + unsigned long long arg1_value = arg1_negative ? -numerator : numerator; + unsigned long long arg2_value = arg2_negative ? -denominator : denominator; - uint64_t remainder; + unsigned long long remainder; /* determine integer part */ - uint64_t res_value = complete_integer_division_u64( + unsigned long long res_value = complete_integer_division_u64( arg1_value, arg2_value, &remainder); ASSERT(res_value <= LONG_MAX); /* determine fractional part */ { - uint32_t i = FIXED31_32_BITS_PER_FRACTIONAL_PART; + unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; do { remainder <<= 1; @@ -103,14 +101,14 @@ struct fixed31_32 dal_fixed31_32_from_fraction( /* round up LSB */ { - uint64_t summand = (remainder << 1) >= arg2_value; + unsigned long long summand = (remainder << 1) >= arg2_value; ASSERT(res_value <= LLONG_MAX - summand); res_value += summand; } - res.value = (int64_t)res_value; + res.value = (long long)res_value; if (arg1_negative ^ arg2_negative) res.value = -res.value; @@ -118,79 +116,23 @@ struct fixed31_32 dal_fixed31_32_from_fraction( return res; } -struct fixed31_32 dal_fixed31_32_from_int_nonconst( - int64_t arg) -{ - struct fixed31_32 res; - - ASSERT((LONG_MIN <= arg) && (arg <= LONG_MAX)); - - res.value = arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; - - return res; -} - -struct fixed31_32 dal_fixed31_32_shl( - struct fixed31_32 arg, - uint8_t shift) -{ - struct fixed31_32 res; - - ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || - ((arg.value < 0) && (arg.value >= LLONG_MIN >> shift))); - - res.value = arg.value << shift; - - return res; -} - -struct fixed31_32 dal_fixed31_32_add( - struct fixed31_32 arg1, - struct fixed31_32 arg2) -{ - struct fixed31_32 res; - - ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || - ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); - - res.value = arg1.value + arg2.value; - - return res; -} - -struct fixed31_32 dal_fixed31_32_sub( - struct fixed31_32 arg1, - struct fixed31_32 arg2) -{ - struct fixed31_32 res; - - ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || - ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); - - res.value = arg1.value - arg2.value; - - return res; -} - -struct fixed31_32 dal_fixed31_32_mul( - struct fixed31_32 arg1, - struct fixed31_32 arg2) +struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) { struct fixed31_32 res; bool arg1_negative = arg1.value < 0; bool arg2_negative = arg2.value < 0; - uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value; - uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value; + unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; + unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; - uint64_t arg1_int = GET_INTEGER_PART(arg1_value); - uint64_t arg2_int = GET_INTEGER_PART(arg2_value); + unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); + unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); - uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value); - uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value); + unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); + unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); - uint64_t tmp; + unsigned long long tmp; res.value = arg1_int * arg2_int; @@ -200,22 +142,22 @@ struct fixed31_32 dal_fixed31_32_mul( tmp = arg1_int * arg2_fra; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; tmp = arg2_int * arg1_fra; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; tmp = arg1_fra * arg2_fra; tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (uint64_t)dal_fixed31_32_half.value); + (tmp >= (unsigned long long)dc_fixpt_half.value); - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; @@ -225,18 +167,17 @@ struct fixed31_32 dal_fixed31_32_mul( return res; } -struct fixed31_32 dal_fixed31_32_sqr( - struct fixed31_32 arg) +struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) { struct fixed31_32 res; - uint64_t arg_value = abs_i64(arg.value); + unsigned long long arg_value = abs_i64(arg.value); - uint64_t arg_int = GET_INTEGER_PART(arg_value); + unsigned long long arg_int = GET_INTEGER_PART(arg_value); - uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value); + unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); - uint64_t tmp; + unsigned long long tmp; res.value = arg_int * arg_int; @@ -246,28 +187,27 @@ struct fixed31_32 dal_fixed31_32_sqr( tmp = arg_int * arg_fra; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; tmp = arg_fra * arg_fra; tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + - (tmp >= (uint64_t)dal_fixed31_32_half.value); + (tmp >= (unsigned long long)dc_fixpt_half.value); - ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value)); + ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); res.value += tmp; return res; } -struct fixed31_32 dal_fixed31_32_recip( - struct fixed31_32 arg) +struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) { /* * @note @@ -276,41 +216,40 @@ struct fixed31_32 dal_fixed31_32_recip( ASSERT(arg.value); - return dal_fixed31_32_from_fraction( - dal_fixed31_32_one.value, + return dc_fixpt_from_fraction( + dc_fixpt_one.value, arg.value); } -struct fixed31_32 dal_fixed31_32_sinc( - struct fixed31_32 arg) +struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) { struct fixed31_32 square; - struct fixed31_32 res = dal_fixed31_32_one; + struct fixed31_32 res = dc_fixpt_one; - int32_t n = 27; + int n = 27; struct fixed31_32 arg_norm = arg; - if (dal_fixed31_32_le( - dal_fixed31_32_two_pi, - dal_fixed31_32_abs(arg))) { - arg_norm = dal_fixed31_32_sub( + if (dc_fixpt_le( + dc_fixpt_two_pi, + dc_fixpt_abs(arg))) { + arg_norm = dc_fixpt_sub( arg_norm, - dal_fixed31_32_mul_int( - dal_fixed31_32_two_pi, - (int32_t)div64_s64( + dc_fixpt_mul_int( + dc_fixpt_two_pi, + (int)div64_s64( arg_norm.value, - dal_fixed31_32_two_pi.value))); + dc_fixpt_two_pi.value))); } - square = dal_fixed31_32_sqr(arg_norm); + square = dc_fixpt_sqr(arg_norm); do { - res = dal_fixed31_32_sub( - dal_fixed31_32_one, - dal_fixed31_32_div_int( - dal_fixed31_32_mul( + res = dc_fixpt_sub( + dc_fixpt_one, + dc_fixpt_div_int( + dc_fixpt_mul( square, res), n * (n - 1))); @@ -319,37 +258,35 @@ struct fixed31_32 dal_fixed31_32_sinc( } while (n > 2); if (arg.value != arg_norm.value) - res = dal_fixed31_32_div( - dal_fixed31_32_mul(res, arg_norm), + res = dc_fixpt_div( + dc_fixpt_mul(res, arg_norm), arg); return res; } -struct fixed31_32 dal_fixed31_32_sin( - struct fixed31_32 arg) +struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) { - return dal_fixed31_32_mul( + return dc_fixpt_mul( arg, - dal_fixed31_32_sinc(arg)); + dc_fixpt_sinc(arg)); } -struct fixed31_32 dal_fixed31_32_cos( - struct fixed31_32 arg) +struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) { /* TODO implement argument normalization */ - const struct fixed31_32 square = dal_fixed31_32_sqr(arg); + const struct fixed31_32 square = dc_fixpt_sqr(arg); - struct fixed31_32 res = dal_fixed31_32_one; + struct fixed31_32 res = dc_fixpt_one; - int32_t n = 26; + int n = 26; do { - res = dal_fixed31_32_sub( - dal_fixed31_32_one, - dal_fixed31_32_div_int( - dal_fixed31_32_mul( + res = dc_fixpt_sub( + dc_fixpt_one, + dc_fixpt_div_int( + dc_fixpt_mul( square, res), n * (n - 1))); @@ -367,37 +304,35 @@ struct fixed31_32 dal_fixed31_32_cos( * * Calculated as Taylor series. */ -static struct fixed31_32 fixed31_32_exp_from_taylor_series( - struct fixed31_32 arg) +static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) { - uint32_t n = 9; + unsigned int n = 9; - struct fixed31_32 res = dal_fixed31_32_from_fraction( + struct fixed31_32 res = dc_fixpt_from_fraction( n + 2, n + 1); /* TODO find correct res */ - ASSERT(dal_fixed31_32_lt(arg, dal_fixed31_32_one)); + ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); do - res = dal_fixed31_32_add( - dal_fixed31_32_one, - dal_fixed31_32_div_int( - dal_fixed31_32_mul( + res = dc_fixpt_add( + dc_fixpt_one, + dc_fixpt_div_int( + dc_fixpt_mul( arg, res), n)); while (--n != 1); - return dal_fixed31_32_add( - dal_fixed31_32_one, - dal_fixed31_32_mul( + return dc_fixpt_add( + dc_fixpt_one, + dc_fixpt_mul( arg, res)); } -struct fixed31_32 dal_fixed31_32_exp( - struct fixed31_32 arg) +struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) { /* * @brief @@ -406,44 +341,43 @@ struct fixed31_32 dal_fixed31_32_exp( * where m = round(x / ln(2)), r = x - m * ln(2) */ - if (dal_fixed31_32_le( - dal_fixed31_32_ln2_div_2, - dal_fixed31_32_abs(arg))) { - int32_t m = dal_fixed31_32_round( - dal_fixed31_32_div( + if (dc_fixpt_le( + dc_fixpt_ln2_div_2, + dc_fixpt_abs(arg))) { + int m = dc_fixpt_round( + dc_fixpt_div( arg, - dal_fixed31_32_ln2)); + dc_fixpt_ln2)); - struct fixed31_32 r = dal_fixed31_32_sub( + struct fixed31_32 r = dc_fixpt_sub( arg, - dal_fixed31_32_mul_int( - dal_fixed31_32_ln2, + dc_fixpt_mul_int( + dc_fixpt_ln2, m)); ASSERT(m != 0); - ASSERT(dal_fixed31_32_lt( - dal_fixed31_32_abs(r), - dal_fixed31_32_one)); + ASSERT(dc_fixpt_lt( + dc_fixpt_abs(r), + dc_fixpt_one)); if (m > 0) - return dal_fixed31_32_shl( + return dc_fixpt_shl( fixed31_32_exp_from_taylor_series(r), - (uint8_t)m); + (unsigned char)m); else - return dal_fixed31_32_div_int( + return dc_fixpt_div_int( fixed31_32_exp_from_taylor_series(r), 1LL << -m); } else if (arg.value != 0) return fixed31_32_exp_from_taylor_series(arg); else - return dal_fixed31_32_one; + return dc_fixpt_one; } -struct fixed31_32 dal_fixed31_32_log( - struct fixed31_32 arg) +struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) { - struct fixed31_32 res = dal_fixed31_32_neg(dal_fixed31_32_one); + struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one); /* TODO improve 1st estimation */ struct fixed31_32 error; @@ -453,15 +387,15 @@ struct fixed31_32 dal_fixed31_32_log( /* TODO if arg is zero, return -INF */ do { - struct fixed31_32 res1 = dal_fixed31_32_add( - dal_fixed31_32_sub( + struct fixed31_32 res1 = dc_fixpt_add( + dc_fixpt_sub( res, - dal_fixed31_32_one), - dal_fixed31_32_div( + dc_fixpt_one), + dc_fixpt_div( arg, - dal_fixed31_32_exp(res))); + dc_fixpt_exp(res))); - error = dal_fixed31_32_sub( + error = dc_fixpt_sub( res, res1); @@ -472,78 +406,23 @@ struct fixed31_32 dal_fixed31_32_log( return res; } -struct fixed31_32 dal_fixed31_32_pow( - struct fixed31_32 arg1, - struct fixed31_32 arg2) -{ - return dal_fixed31_32_exp( - dal_fixed31_32_mul( - dal_fixed31_32_log(arg1), - arg2)); -} - -int32_t dal_fixed31_32_floor( - struct fixed31_32 arg) -{ - uint64_t arg_value = abs_i64(arg.value); - - if (arg.value >= 0) - return (int32_t)GET_INTEGER_PART(arg_value); - else - return -(int32_t)GET_INTEGER_PART(arg_value); -} - -int32_t dal_fixed31_32_round( - struct fixed31_32 arg) -{ - uint64_t arg_value = abs_i64(arg.value); - - const int64_t summand = dal_fixed31_32_half.value; - - ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); - - arg_value += summand; - - if (arg.value >= 0) - return (int32_t)GET_INTEGER_PART(arg_value); - else - return -(int32_t)GET_INTEGER_PART(arg_value); -} - -int32_t dal_fixed31_32_ceil( - struct fixed31_32 arg) -{ - uint64_t arg_value = abs_i64(arg.value); - - const int64_t summand = dal_fixed31_32_one.value - - dal_fixed31_32_epsilon.value; - - ASSERT(LLONG_MAX - (int64_t)arg_value >= summand); - - arg_value += summand; - - if (arg.value >= 0) - return (int32_t)GET_INTEGER_PART(arg_value); - else - return -(int32_t)GET_INTEGER_PART(arg_value); -} /* this function is a generic helper to translate fixed point value to * specified integer format that will consist of integer_bits integer part and * fractional_bits fractional part. For example it is used in - * dal_fixed31_32_u2d19 to receive 2 bits integer part and 19 bits fractional + * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional * part in 32 bits. It is used in hw programming (scaler) */ -static inline uint32_t ux_dy( - int64_t value, - uint32_t integer_bits, - uint32_t fractional_bits) +static inline unsigned int ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits) { /* 1. create mask of integer part */ - uint32_t result = (1 << integer_bits) - 1; + unsigned int result = (1 << integer_bits) - 1; /* 2. mask out fractional part */ - uint32_t fractional_part = FRACTIONAL_PART_MASK & value; + unsigned int fractional_part = FRACTIONAL_PART_MASK & value; /* 3. shrink fixed point integer part to be of integer_bits width*/ result &= GET_INTEGER_PART(value); /* 4. make space for fractional part to be filled in after integer */ @@ -554,13 +433,13 @@ static inline uint32_t ux_dy( return result | fractional_part; } -static inline uint32_t clamp_ux_dy( - int64_t value, - uint32_t integer_bits, - uint32_t fractional_bits, - uint32_t min_clamp) +static inline unsigned int clamp_ux_dy( + long long value, + unsigned int integer_bits, + unsigned int fractional_bits, + unsigned int min_clamp) { - uint32_t truncated_val = ux_dy(value, integer_bits, fractional_bits); + unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) return (1 << (integer_bits + fractional_bits)) - 1; @@ -570,35 +449,35 @@ static inline uint32_t clamp_ux_dy( return min_clamp; } -uint32_t dal_fixed31_32_u2d19( - struct fixed31_32 arg) +unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) +{ + return ux_dy(arg.value, 3, 19); +} + +unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) { return ux_dy(arg.value, 2, 19); } -uint32_t dal_fixed31_32_u0d19( - struct fixed31_32 arg) +unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) { return ux_dy(arg.value, 0, 19); } -uint32_t dal_fixed31_32_clamp_u0d14( - struct fixed31_32 arg) +unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) { return clamp_ux_dy(arg.value, 0, 14, 1); } -uint32_t dal_fixed31_32_clamp_u0d10( - struct fixed31_32 arg) +unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) { return clamp_ux_dy(arg.value, 0, 10, 1); } -int32_t dal_fixed31_32_s4d19( - struct fixed31_32 arg) +int dc_fixpt_s4d19(struct fixed31_32 arg) { if (arg.value < 0) - return -(int32_t)ux_dy(dal_fixed31_32_abs(arg).value, 4, 19); + return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19); else return ux_dy(arg.value, 4, 19); } diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c deleted file mode 100644 index 4d3aaa82a07b..000000000000 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dm_services.h" -#include "include/fixed32_32.h" - -static uint64_t u64_div(uint64_t n, uint64_t d) -{ - uint32_t i = 0; - uint64_t r; - uint64_t q = div64_u64_rem(n, d, &r); - - for (i = 0; i < 32; ++i) { - uint64_t sbit = q & (1ULL<<63); - - r <<= 1; - r |= sbit ? 1 : 0; - q <<= 1; - if (r >= d) { - r -= d; - q |= 1; - } - } - - if (2*r >= d) - q += 1; - return q; -} - -struct fixed32_32 dal_fixed32_32_from_fraction(uint32_t n, uint32_t d) -{ - struct fixed32_32 fx; - - fx.value = u64_div((uint64_t)n << 32, (uint64_t)d << 32); - return fx; -} - -struct fixed32_32 dal_fixed32_32_add( - struct fixed32_32 lhs, - struct fixed32_32 rhs) -{ - struct fixed32_32 fx = {lhs.value + rhs.value}; - - ASSERT(fx.value >= rhs.value); - return fx; -} - -struct fixed32_32 dal_fixed32_32_add_int(struct fixed32_32 lhs, uint32_t rhs) -{ - struct fixed32_32 fx = {lhs.value + ((uint64_t)rhs << 32)}; - - ASSERT(fx.value >= (uint64_t)rhs << 32); - return fx; - -} -struct fixed32_32 dal_fixed32_32_sub( - struct fixed32_32 lhs, - struct fixed32_32 rhs) -{ - struct fixed32_32 fx; - - ASSERT(lhs.value >= rhs.value); - fx.value = lhs.value - rhs.value; - return fx; -} - -struct fixed32_32 dal_fixed32_32_sub_int(struct fixed32_32 lhs, uint32_t rhs) -{ - struct fixed32_32 fx; - - ASSERT(lhs.value >= ((uint64_t)rhs<<32)); - fx.value = lhs.value - ((uint64_t)rhs<<32); - return fx; -} - -struct fixed32_32 dal_fixed32_32_mul( - struct fixed32_32 lhs, - struct fixed32_32 rhs) -{ - struct fixed32_32 fx; - uint64_t lhs_int = lhs.value>>32; - uint64_t lhs_frac = (uint32_t)lhs.value; - uint64_t rhs_int = rhs.value>>32; - uint64_t rhs_frac = (uint32_t)rhs.value; - uint64_t ahbh = lhs_int * rhs_int; - uint64_t ahbl = lhs_int * rhs_frac; - uint64_t albh = lhs_frac * rhs_int; - uint64_t albl = lhs_frac * rhs_frac; - - ASSERT((ahbh>>32) == 0); - - fx.value = (ahbh<<32) + ahbl + albh + (albl>>32); - return fx; - -} - -struct fixed32_32 dal_fixed32_32_mul_int(struct fixed32_32 lhs, uint32_t rhs) -{ - struct fixed32_32 fx; - uint64_t lhsi = (lhs.value>>32) * (uint64_t)rhs; - uint64_t lhsf; - - ASSERT((lhsi>>32) == 0); - lhsf = ((uint32_t)lhs.value) * (uint64_t)rhs; - ASSERT((lhsi<<32) + lhsf >= lhsf); - fx.value = (lhsi<<32) + lhsf; - return fx; -} - -struct fixed32_32 dal_fixed32_32_div( - struct fixed32_32 lhs, - struct fixed32_32 rhs) -{ - struct fixed32_32 fx; - - fx.value = u64_div(lhs.value, rhs.value); - return fx; -} - -struct fixed32_32 dal_fixed32_32_div_int(struct fixed32_32 lhs, uint32_t rhs) -{ - struct fixed32_32 fx; - - fx.value = u64_div(lhs.value, (uint64_t)rhs << 32); - return fx; -} - -uint32_t dal_fixed32_32_ceil(struct fixed32_32 v) -{ - ASSERT((uint32_t)v.value ? (v.value >> 32) + 1 >= 1 : true); - return (v.value>>32) + ((uint32_t)v.value ? 1 : 0); -} - -uint32_t dal_fixed32_32_round(struct fixed32_32 v) -{ - ASSERT(v.value + (1ULL<<31) >= (1ULL<<31)); - return (v.value + (1ULL<<31))>>32; -} - diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c index 854678a0c54b..021451549ff7 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c @@ -94,7 +94,6 @@ void dc_conn_log(struct dc_context *ctx, dm_logger_append(&entry, "%2.2X ", hex_data[i]); dm_logger_append(&entry, "^\n"); - dm_helpers_dc_conn_log(ctx, &entry, event); fail: dm_logger_close(&entry); diff --git a/drivers/gpu/drm/amd/display/dc/basics/logger.c b/drivers/gpu/drm/amd/display/dc/basics/logger.c index 31bee054f43a..738a818d58d1 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/logger.c +++ b/drivers/gpu/drm/amd/display/dc/basics/logger.c @@ -61,7 +61,7 @@ static const struct dc_log_type_info log_type_info_tbl[] = { {LOG_EVENT_UNDERFLOW, "Underflow"}, {LOG_IF_TRACE, "InterfaceTrace"}, {LOG_DTN, "DTN"}, - {LOG_PROFILING, "Profiling"} + {LOG_DISPLAYSTATS, "DisplayStats"} }; @@ -402,3 +402,4 @@ cleanup: entry->max_buf_bytes = 0; } } + diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 985fe8c22875..b8cef7af3c4a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -70,6 +70,10 @@ static enum bp_result get_firmware_info_v3_1( struct bios_parser *bp, struct dc_firmware_info *info); +static enum bp_result get_firmware_info_v3_2( + struct bios_parser *bp, + struct dc_firmware_info *info); + static struct atom_hpd_int_record *get_hpd_record(struct bios_parser *bp, struct atom_display_object_path_v2 *object); @@ -1321,9 +1325,14 @@ static enum bp_result bios_parser_get_firmware_info( case 3: switch (revision.minor) { case 1: - case 2: result = get_firmware_info_v3_1(bp, info); break; + case 2: + result = get_firmware_info_v3_2(bp, info); + break; + case 3: + result = get_firmware_info_v3_2(bp, info); + break; default: break; } @@ -1383,6 +1392,84 @@ static enum bp_result get_firmware_info_v3_1( return BP_RESULT_OK; } +static enum bp_result get_firmware_info_v3_2( + struct bios_parser *bp, + struct dc_firmware_info *info) +{ + struct atom_firmware_info_v3_2 *firmware_info; + struct atom_display_controller_info_v4_1 *dce_info = NULL; + struct atom_common_table_header *header; + struct atom_data_revision revision; + struct atom_smu_info_v3_2 *smu_info_v3_2 = NULL; + struct atom_smu_info_v3_3 *smu_info_v3_3 = NULL; + + if (!info) + return BP_RESULT_BADINPUT; + + firmware_info = GET_IMAGE(struct atom_firmware_info_v3_2, + DATA_TABLES(firmwareinfo)); + + dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1, + DATA_TABLES(dce_info)); + + if (!firmware_info || !dce_info) + return BP_RESULT_BADBIOSTABLE; + + memset(info, 0, sizeof(*info)); + + header = GET_IMAGE(struct atom_common_table_header, + DATA_TABLES(smu_info)); + get_atom_data_table_revision(header, &revision); + + if (revision.minor == 2) { + /* Vega12 */ + smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2, + DATA_TABLES(smu_info)); + + if (!smu_info_v3_2) + return BP_RESULT_BADBIOSTABLE; + + info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10; + } else if (revision.minor == 3) { + /* Vega20 */ + smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3, + DATA_TABLES(smu_info)); + + if (!smu_info_v3_3) + return BP_RESULT_BADBIOSTABLE; + + info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10; + } + + // We need to convert from 10KHz units into KHz units. + info->default_memory_clk = firmware_info->bootup_mclk_in10khz * 10; + + /* 27MHz for Vega10 & Vega12; 100MHz for Vega20 */ + info->pll_info.crystal_frequency = dce_info->dce_refclk_10khz * 10; + /* Hardcode frequency if BIOS gives no DCE Ref Clk */ + if (info->pll_info.crystal_frequency == 0) { + if (revision.minor == 2) + info->pll_info.crystal_frequency = 27000; + else if (revision.minor == 3) + info->pll_info.crystal_frequency = 100000; + } + /*dp_phy_ref_clk is not correct for atom_display_controller_info_v4_2, but we don't use it*/ + info->dp_phy_ref_clk = dce_info->dpphy_refclk_10khz * 10; + info->i2c_engine_ref_clk = dce_info->i2c_engine_refclk_10khz * 10; + + /* Get GPU PLL VCO Clock */ + if (bp->cmd_tbl.get_smu_clock_info != NULL) { + if (revision.minor == 2) + info->smu_gpu_pll_output_freq = + bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10; + else if (revision.minor == 3) + info->smu_gpu_pll_output_freq = + bp->cmd_tbl.get_smu_clock_info(bp, SMU11_SYSPLL3_0_ID) * 10; + } + + return BP_RESULT_OK; +} + static enum bp_result bios_parser_get_encoder_cap_info( struct dc_bios *dcb, struct graphics_object_id object_id, diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 4b5fdd577848..651e1fd4622f 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -24,7 +24,7 @@ */ #include "dm_services.h" - +#include "amdgpu.h" #include "atom.h" #include "include/bios_parser_interface.h" @@ -35,16 +35,16 @@ #include "bios_parser_types_internal.h" #define EXEC_BIOS_CMD_TABLE(command, params)\ - (cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \ + (amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GetIndexIntoMasterTable(COMMAND, command), \ - ¶ms) == 0) + (uint32_t *)¶ms) == 0) #define BIOS_CMD_TABLE_REVISION(command, frev, crev)\ - cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \ + amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GetIndexIntoMasterTable(COMMAND, command), &frev, &crev) #define BIOS_CMD_TABLE_PARA_REVISION(command)\ - bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \ + bios_cmd_table_para_revision(bp->base.ctx->driver_context, \ GetIndexIntoMasterTable(COMMAND, command)) static void init_dig_encoder_control(struct bios_parser *bp); @@ -82,16 +82,18 @@ void dal_bios_parser_init_cmd_tbl(struct bios_parser *bp) init_set_dce_clock(bp); } -static uint32_t bios_cmd_table_para_revision(void *cgs_device, +static uint32_t bios_cmd_table_para_revision(void *dev, uint32_t index) { + struct amdgpu_device *adev = dev; uint8_t frev, crev; - if (cgs_atom_get_cmd_table_revs(cgs_device, + if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, - &frev, &crev) != 0) + &frev, &crev)) + return crev; + else return 0; - return crev; } /******************************************************************************* @@ -368,7 +370,7 @@ static void init_transmitter_control(struct bios_parser *bp) uint8_t crev; if (BIOS_CMD_TABLE_REVISION(UNIPHYTransmitterControl, - frev, crev) != 0) + frev, crev) == false) BREAK_TO_DEBUGGER(); switch (crev) { case 2: diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 3f63f712c8a4..752b08a42d3e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -26,14 +26,18 @@ #include "dm_services.h" #include "ObjectID.h" -#include "atomfirmware.h" +#include "atomfirmware.h" +#include "atom.h" #include "include/bios_parser_interface.h" #include "command_table2.h" #include "command_table_helper2.h" #include "bios_parser_helper.h" #include "bios_parser_types_internal2.h" +#include "amdgpu.h" + + #define DC_LOGGER \ bp->base.ctx->logger @@ -43,16 +47,16 @@ ->FieldName)-(char *)0)/sizeof(uint16_t)) #define EXEC_BIOS_CMD_TABLE(fname, params)\ - (cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \ + (amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GET_INDEX_INTO_MASTER_TABLE(command, fname), \ - ¶ms) == 0) + (uint32_t *)¶ms) == 0) #define BIOS_CMD_TABLE_REVISION(fname, frev, crev)\ - cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \ + amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \ GET_INDEX_INTO_MASTER_TABLE(command, fname), &frev, &crev) #define BIOS_CMD_TABLE_PARA_REVISION(fname)\ - bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \ + bios_cmd_table_para_revision(bp->base.ctx->driver_context, \ GET_INDEX_INTO_MASTER_TABLE(command, fname)) static void init_dig_encoder_control(struct bios_parser *bp); @@ -86,16 +90,18 @@ void dal_firmware_parser_init_cmd_tbl(struct bios_parser *bp) init_get_smu_clock_info(bp); } -static uint32_t bios_cmd_table_para_revision(void *cgs_device, +static uint32_t bios_cmd_table_para_revision(void *dev, uint32_t index) { + struct amdgpu_device *adev = dev; uint8_t frev, crev; - if (cgs_atom_get_cmd_table_revs(cgs_device, + if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, - &frev, &crev) != 0) + &frev, &crev)) + return crev; + else return 0; - return crev; } /****************************************************************************** @@ -201,7 +207,7 @@ static void init_transmitter_control(struct bios_parser *bp) uint8_t frev; uint8_t crev; - if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) != 0) + if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) == false) BREAK_TO_DEBUGGER(); switch (crev) { case 6: diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c index 2979358c6a55..253bbb1eea60 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c @@ -51,6 +51,7 @@ bool dal_bios_parser_init_cmd_tbl_helper( return true; case DCE_VERSION_11_2: + case DCE_VERSION_11_22: *h = dal_cmd_tbl_helper_dce112_get_table(); return true; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c index 9a4d30dd4969..bbbcef566c55 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c @@ -52,6 +52,7 @@ bool dal_bios_parser_init_cmd_tbl_helper2( return true; case DCE_VERSION_11_2: + case DCE_VERSION_11_22: *h = dal_cmd_tbl_helper_dce112_get_table2(); return true; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h new file mode 100644 index 000000000000..fc3f98fb09ea --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h @@ -0,0 +1,579 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef _CALCS_CALCS_LOGGER_H_ +#define _CALCS_CALCS_LOGGER_H_ +#define DC_LOGGER \ + logger + +static void print_bw_calcs_dceip(struct dal_logger *logger, const struct bw_calcs_dceip *dceip) +{ + + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_dceip"); + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_calcs_version version %d", dceip->version); + DC_LOG_BANDWIDTH_CALCS(" [bool] large_cursor: %d", dceip->large_cursor); + DC_LOG_BANDWIDTH_CALCS(" [bool] dmif_pipe_en_fbc_chunk_tracker: %d", dceip->dmif_pipe_en_fbc_chunk_tracker); + DC_LOG_BANDWIDTH_CALCS(" [bool] display_write_back_supported: %d", dceip->display_write_back_supported); + DC_LOG_BANDWIDTH_CALCS(" [bool] argb_compression_support: %d", dceip->argb_compression_support); + DC_LOG_BANDWIDTH_CALCS(" [bool] pre_downscaler_enabled: %d", dceip->pre_downscaler_enabled); + DC_LOG_BANDWIDTH_CALCS(" [bool] underlay_downscale_prefetch_enabled: %d", + dceip->underlay_downscale_prefetch_enabled); + DC_LOG_BANDWIDTH_CALCS(" [bool] graphics_lb_nodownscaling_multi_line_prefetching: %d", + dceip->graphics_lb_nodownscaling_multi_line_prefetching); + DC_LOG_BANDWIDTH_CALCS(" [bool] limit_excessive_outstanding_dmif_requests: %d", + dceip->limit_excessive_outstanding_dmif_requests); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_max_outstanding_group_num: %d", + dceip->cursor_max_outstanding_group_num); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lines_interleaved_into_lb: %d", dceip->lines_interleaved_into_lb); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] low_power_tiling_mode: %d", dceip->low_power_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_width: %d", dceip->chunk_width); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_graphics_pipes: %d", dceip->number_of_graphics_pipes); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_pipes: %d", dceip->number_of_underlay_pipes); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_dmif_buffer_allocated: %d", dceip->max_dmif_buffer_allocated); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_dmif_size: %d", dceip->graphics_dmif_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_luma_dmif_size: %d", dceip->underlay_luma_dmif_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_chroma_dmif_size: %d", dceip->underlay_chroma_dmif_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_lines_of_pte_prefetching_in_linear_mode: %d", + dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_luma_mcifwr_buffer_size: %d", + dceip->display_write_back420_luma_mcifwr_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] display_write_back420_chroma_mcifwr_buffer_size: %d", + dceip->display_write_back420_chroma_mcifwr_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] scatter_gather_pte_request_rows_in_tiling_mode: %d", + dceip->scatter_gather_pte_request_rows_in_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency10_bit_per_component: %d", + bw_fixed_to_int(dceip->underlay_vscaler_efficiency10_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_vscaler_efficiency12_bit_per_component: %d", + bw_fixed_to_int(dceip->underlay_vscaler_efficiency12_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency6_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency6_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency8_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency8_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency10_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency10_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] graphics_vscaler_efficiency12_bit_per_component: %d", + bw_fixed_to_int(dceip->graphics_vscaler_efficiency12_bit_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] alpha_vscaler_efficiency: %d", + bw_fixed_to_int(dceip->alpha_vscaler_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_write_pixels_per_dispclk: %d", + bw_fixed_to_int(dceip->lb_write_pixels_per_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component444: %d", + bw_fixed_to_int(dceip->lb_size_per_component444)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_and_dram_clock_state_change_gated_before_cursor: %d", + bw_fixed_to_int(dceip->stutter_and_dram_clock_state_change_gated_before_cursor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_luma_lb_size_per_component: %d", + bw_fixed_to_int(dceip->underlay420_luma_lb_size_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay420_chroma_lb_size_per_component: %d", + bw_fixed_to_int(dceip->underlay420_chroma_lb_size_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay422_lb_size_per_component: %d", + bw_fixed_to_int(dceip->underlay422_lb_size_per_component)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_chunk_width: %d", bw_fixed_to_int(dceip->cursor_chunk_width)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_dcp_buffer_lines: %d", + bw_fixed_to_int(dceip->cursor_dcp_buffer_lines)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_width_efficient_for_tiling: %d", + bw_fixed_to_int(dceip->underlay_maximum_width_efficient_for_tiling)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_height_efficient_for_tiling: %d", + bw_fixed_to_int(dceip->underlay_maximum_height_efficient_for_tiling)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display: %d", + bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation: %d", + bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_outstanding_pte_request_limit: %d", + bw_fixed_to_int(dceip->minimum_outstanding_pte_request_limit)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_total_outstanding_pte_requests_allowed_by_saw: %d", + bw_fixed_to_int(dceip->maximum_total_outstanding_pte_requests_allowed_by_saw)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] linear_mode_line_request_alternation_slice: %d", + bw_fixed_to_int(dceip->linear_mode_line_request_alternation_slice)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_efficiency: %d", bw_fixed_to_int(dceip->request_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_per_request: %d", bw_fixed_to_int(dceip->dispclk_per_request)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_ramping_factor: %d", + bw_fixed_to_int(dceip->dispclk_ramping_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_throughput_factor: %d", + bw_fixed_to_int(dceip->display_pipe_throughput_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_all_surfaces_burst_time: %d", + bw_fixed_to_int(dceip->mcifwr_all_surfaces_burst_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_request_buffer_size: %d", + bw_fixed_to_int(dceip->dmif_request_buffer_size)); + + +} + +static void print_bw_calcs_vbios(struct dal_logger *logger, const struct bw_calcs_vbios *vbios) +{ + + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_vbios vbios"); + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines memory_type: %d", vbios->memory_type); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] dram_channel_width_in_bits: %d", vbios->dram_channel_width_in_bits); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", vbios->number_of_dram_channels); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_banks: %d", vbios->number_of_dram_banks); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_yclk: %d", bw_fixed_to_int(vbios->low_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_yclk: %d", bw_fixed_to_int(vbios->mid_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_yclk: %d", bw_fixed_to_int(vbios->high_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_sclk: %d", bw_fixed_to_int(vbios->low_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid1_sclk: %d", bw_fixed_to_int(vbios->mid1_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid2_sclk: %d", bw_fixed_to_int(vbios->mid2_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid3_sclk: %d", bw_fixed_to_int(vbios->mid3_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid4_sclk: %d", bw_fixed_to_int(vbios->mid4_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid5_sclk: %d", bw_fixed_to_int(vbios->mid5_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid6_sclk: %d", bw_fixed_to_int(vbios->mid6_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_sclk: %d", bw_fixed_to_int(vbios->high_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_dispclk: %d", + bw_fixed_to_int(vbios->low_voltage_max_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_dispclk;: %d", + bw_fixed_to_int(vbios->mid_voltage_max_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_dispclk;: %d", + bw_fixed_to_int(vbios->high_voltage_max_dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] low_voltage_max_phyclk: %d", + bw_fixed_to_int(vbios->low_voltage_max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mid_voltage_max_phyclk: %d", + bw_fixed_to_int(vbios->mid_voltage_max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] high_voltage_max_phyclk: %d", + bw_fixed_to_int(vbios->high_voltage_max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_return_bus_width: %d", bw_fixed_to_int(vbios->data_return_bus_width)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] trc: %d", bw_fixed_to_int(vbios->trc)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency: %d", bw_fixed_to_int(vbios->dmifmc_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_exit_latency: %d", + bw_fixed_to_int(vbios->stutter_self_refresh_exit_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_self_refresh_entry_latency: %d", + bw_fixed_to_int(vbios->stutter_self_refresh_entry_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_latency: %d", + bw_fixed_to_int(vbios->nbp_state_change_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrmc_urgent_latency: %d", + bw_fixed_to_int(vbios->mcifwrmc_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable: %d", vbios->scatter_gather_enable); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] down_spread_percentage: %d", + bw_fixed_to_int(vbios->down_spread_percentage)); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] cursor_width: %d", vbios->cursor_width); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] average_compression_rate: %d", vbios->average_compression_rate); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_request_slots_gmc_reserves_for_dmif_per_channel: %d", + vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration: %d", bw_fixed_to_int(vbios->blackout_duration)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_blackout_recovery_time: %d", + bw_fixed_to_int(vbios->maximum_blackout_recovery_time)); + + +} + +static void print_bw_calcs_data(struct dal_logger *logger, struct bw_calcs_data *data) +{ + + int i, j, k; + + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_data data"); + DC_LOG_BANDWIDTH_CALCS("#####################################################################"); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_displays: %d", data->number_of_displays); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_surface_type: %d", data->underlay_surface_type); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines panning_and_bezel_adjustment: %d", + data->panning_and_bezel_adjustment); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_tiling_mode: %d", data->graphics_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] graphics_lb_bpc: %d", data->graphics_lb_bpc); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] underlay_lb_bpc: %d", data->underlay_lb_bpc); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_tiling_mode: %d", data->underlay_tiling_mode); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d0_underlay_mode: %d", data->d0_underlay_mode); + DC_LOG_BANDWIDTH_CALCS(" [bool] d1_display_write_back_dwb_enable: %d", data->d1_display_write_back_dwb_enable); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines d1_underlay_mode: %d", data->d1_underlay_mode); + DC_LOG_BANDWIDTH_CALCS(" [bool] cpup_state_change_enable: %d", data->cpup_state_change_enable); + DC_LOG_BANDWIDTH_CALCS(" [bool] cpuc_state_change_enable: %d", data->cpuc_state_change_enable); + DC_LOG_BANDWIDTH_CALCS(" [bool] nbp_state_change_enable: %d", data->nbp_state_change_enable); + DC_LOG_BANDWIDTH_CALCS(" [bool] stutter_mode_enable: %d", data->stutter_mode_enable); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] y_clk_level: %d", data->y_clk_level); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] sclk_level: %d", data->sclk_level); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_underlay_surfaces: %d", data->number_of_underlay_surfaces); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_wrchannels: %d", data->number_of_dram_wrchannels); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] chunk_request_delay: %d", data->chunk_request_delay); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] number_of_dram_channels: %d", data->number_of_dram_channels); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines underlay_micro_tile_mode: %d", data->underlay_micro_tile_mode); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines graphics_micro_tile_mode: %d", data->graphics_micro_tile_mode); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] max_phyclk: %d", bw_fixed_to_int(data->max_phyclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_efficiency: %d", bw_fixed_to_int(data->dram_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_surface_type: %d", + bw_fixed_to_int(data->src_width_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_surface_type: %d", + bw_fixed_to_int(data->src_height_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_surface_type: %d", + bw_fixed_to_int(data->hsr_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_surface_type: %d", bw_fixed_to_int(data->vsr_after_surface_type)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width_after_rotation: %d", + bw_fixed_to_int(data->src_width_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height_after_rotation: %d", + bw_fixed_to_int(data->src_height_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_rotation: %d", bw_fixed_to_int(data->hsr_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_rotation: %d", bw_fixed_to_int(data->vsr_after_rotation)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_pixels: %d", bw_fixed_to_int(data->source_height_pixels)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr_after_stereo: %d", bw_fixed_to_int(data->hsr_after_stereo)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr_after_stereo: %d", bw_fixed_to_int(data->vsr_after_stereo)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_in_lb: %d", bw_fixed_to_int(data->source_width_in_lb)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_line_pitch: %d", bw_fixed_to_int(data->lb_line_pitch)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] underlay_maximum_source_efficient_for_tiling: %d", + bw_fixed_to_int(data->underlay_maximum_source_efficient_for_tiling)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] num_lines_at_frame_start: %d", + bw_fixed_to_int(data->num_lines_at_frame_start)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dmif_size_in_time: %d", bw_fixed_to_int(data->min_dmif_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_mcifwr_size_in_time: %d", + bw_fixed_to_int(data->min_mcifwr_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_dmif_size: %d", + bw_fixed_to_int(data->total_requests_for_dmif_size)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] peak_pte_request_to_eviction_ratio_limiting: %d", + bw_fixed_to_int(data->peak_pte_request_to_eviction_ratio_limiting)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_pte_per_pte_request: %d", + bw_fixed_to_int(data->useful_pte_per_pte_request)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_rows: %d", + bw_fixed_to_int(data->scatter_gather_pte_request_rows)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_row_height: %d", + bw_fixed_to_int(data->scatter_gather_row_height)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_vblank: %d", + bw_fixed_to_int(data->scatter_gather_pte_requests_in_vblank)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] inefficient_linear_pitch_in_bytes: %d", + bw_fixed_to_int(data->inefficient_linear_pitch_in_bytes)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_data: %d", bw_fixed_to_int(data->cursor_total_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_total_request_groups: %d", + bw_fixed_to_int(data->cursor_total_request_groups)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_requests: %d", + bw_fixed_to_int(data->scatter_gather_total_pte_requests)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_total_pte_request_groups: %d", + bw_fixed_to_int(data->scatter_gather_total_pte_request_groups)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] tile_width_in_pixels: %d", bw_fixed_to_int(data->tile_width_in_pixels)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_number_of_data_request_page_close_open: %d", + bw_fixed_to_int(data->dmif_total_number_of_data_request_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_number_of_data_request_page_close_open: %d", + bw_fixed_to_int(data->mcifwr_total_number_of_data_request_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_page_close_open: %d", + bw_fixed_to_int(data->bytes_per_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_total_page_close_open_time: %d", + bw_fixed_to_int(data->mcifwr_total_page_close_open_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_requests_for_adjusted_dmif_size: %d", + bw_fixed_to_int(data->total_requests_for_adjusted_dmif_size)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_trips: %d", + bw_fixed_to_int(data->total_dmifmc_urgent_trips)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dmifmc_urgent_latency: %d", + bw_fixed_to_int(data->total_dmifmc_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_data: %d", + bw_fixed_to_int(data->total_display_reads_required_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_reads_required_dram_access_data: %d", + bw_fixed_to_int(data->total_display_reads_required_dram_access_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_data: %d", + bw_fixed_to_int(data->total_display_writes_required_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_display_writes_required_dram_access_data: %d", + bw_fixed_to_int(data->total_display_writes_required_dram_access_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_data: %d", + bw_fixed_to_int(data->display_reads_required_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_required_dram_access_data: %d", + bw_fixed_to_int(data->display_reads_required_dram_access_data)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_total_page_close_open_time: %d", + bw_fixed_to_int(data->dmif_total_page_close_open_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_cursor_memory_interface_buffer_size_in_time: %d", + bw_fixed_to_int(data->min_cursor_memory_interface_buffer_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_read_buffer_size_in_time: %d", + bw_fixed_to_int(data->min_read_buffer_size_in_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer: %d", + bw_fixed_to_int(data->display_reads_time_for_data_transfer)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_writes_time_for_data_transfer: %d", + bw_fixed_to_int(data->display_writes_time_for_data_transfer)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_dram_bandwidth: %d", + bw_fixed_to_int(data->dmif_required_dram_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_dram_bandwidth: %d", + bw_fixed_to_int(data->mcifwr_required_dram_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dmifmc_urgent_latency_for_page_close_open: %d", + bw_fixed_to_int(data->required_dmifmc_urgent_latency_for_page_close_open)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_mcifmcwr_urgent_latency: %d", + bw_fixed_to_int(data->required_mcifmcwr_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_dram_bandwidth_gbyte_per_second: %d", + bw_fixed_to_int(data->required_dram_bandwidth_gbyte_per_second)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_bandwidth: %d", bw_fixed_to_int(data->dram_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk: %d", bw_fixed_to_int(data->dmif_required_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_required_sclk: %d", bw_fixed_to_int(data->mcifwr_required_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] required_sclk: %d", bw_fixed_to_int(data->required_sclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] downspread_factor: %d", bw_fixed_to_int(data->downspread_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scaler_efficiency: %d", bw_fixed_to_int(data->v_scaler_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scaler_limits_factor: %d", bw_fixed_to_int(data->scaler_limits_factor)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_pipe_pixel_throughput: %d", + bw_fixed_to_int(data->display_pipe_pixel_throughput)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping: %d", + bw_fixed_to_int(data->total_dispclk_required_with_ramping)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping: %d", + bw_fixed_to_int(data->total_dispclk_required_without_ramping)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_read_request_bandwidth: %d", + bw_fixed_to_int(data->total_read_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_write_request_bandwidth: %d", + bw_fixed_to_int(data->total_write_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_total_read_request_bandwidth: %d", + bw_fixed_to_int(data->dispclk_required_for_total_read_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_with_ramping_with_request_bandwidth: %d", + bw_fixed_to_int(data->total_dispclk_required_with_ramping_with_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_dispclk_required_without_ramping_with_request_bandwidth: %d", + bw_fixed_to_int(data->total_dispclk_required_without_ramping_with_request_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk: %d", bw_fixed_to_int(data->dispclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_recovery_time: %d", bw_fixed_to_int(data->blackout_recovery_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_pixels_per_data_fifo_entry: %d", + bw_fixed_to_int(data->min_pixels_per_data_fifo_entry)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] sclk_deep_sleep: %d", bw_fixed_to_int(data->sclk_deep_sleep)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] chunk_request_time: %d", bw_fixed_to_int(data->chunk_request_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_request_time: %d", bw_fixed_to_int(data->cursor_request_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] line_source_pixels_transfer_time: %d", + bw_fixed_to_int(data->line_source_pixels_transfer_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifdram_access_efficiency: %d", + bw_fixed_to_int(data->dmifdram_access_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwrdram_access_efficiency: %d", + bw_fixed_to_int(data->mcifwrdram_access_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth_no_compression: %d", + bw_fixed_to_int(data->total_average_bandwidth_no_compression)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_average_bandwidth: %d", + bw_fixed_to_int(data->total_average_bandwidth)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] total_stutter_cycle_duration: %d", + bw_fixed_to_int(data->total_stutter_cycle_duration)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_burst_time: %d", bw_fixed_to_int(data->stutter_burst_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] time_in_self_refresh: %d", bw_fixed_to_int(data->time_in_self_refresh)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_efficiency: %d", bw_fixed_to_int(data->stutter_efficiency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] worst_number_of_trips_to_memory: %d", + bw_fixed_to_int(data->worst_number_of_trips_to_memory)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] immediate_flip_time: %d", bw_fixed_to_int(data->immediate_flip_time)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_dmif_clients: %d", + bw_fixed_to_int(data->latency_for_non_dmif_clients)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_for_non_mcifwr_clients: %d", + bw_fixed_to_int(data->latency_for_non_mcifwr_clients)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmifmc_urgent_latency_supported_in_high_sclk_and_yclk: %d", + bw_fixed_to_int(data->dmifmc_urgent_latency_supported_in_high_sclk_and_yclk)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_margin: %d", + bw_fixed_to_int(data->nbp_state_dram_speed_change_margin)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_reads_time_for_data_transfer_and_urgent_latency: %d", + bw_fixed_to_int(data->display_reads_time_for_data_transfer_and_urgent_latency)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_margin: %d", + bw_fixed_to_int(data->dram_speed_change_margin)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_vblank_dram_speed_change_margin: %d", + bw_fixed_to_int(data->min_vblank_dram_speed_change_margin)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_stutter_refresh_duration: %d", + bw_fixed_to_int(data->min_stutter_refresh_duration)); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_stutter_dmif_buffer_size: %d", data->total_stutter_dmif_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] total_bytes_requested: %d", data->total_bytes_requested); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] min_stutter_dmif_buffer_size: %d", data->min_stutter_dmif_buffer_size); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] num_stutter_bursts: %d", data->num_stutter_bursts); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_nbp_state_dram_speed_change_latency_supported: %d", + bw_fixed_to_int(data->v_blank_nbp_state_dram_speed_change_latency_supported)); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_dram_speed_change_latency_supported: %d", + bw_fixed_to_int(data->nbp_state_dram_speed_change_latency_supported)); + + for (i = 0; i < maximum_number_of_surfaces; i++) { + DC_LOG_BANDWIDTH_CALCS(" [bool] fbc_en[%d]:%d\n", i, data->fbc_en[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] lpt_en[%d]:%d", i, data->lpt_en[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] displays_match_flag[%d]:%d", i, data->displays_match_flag[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] use_alpha[%d]:%d", i, data->use_alpha[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] orthogonal_rotation[%d]:%d", i, data->orthogonal_rotation[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] enable[%d]:%d", i, data->enable[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] access_one_channel_only[%d]:%d", i, data->access_one_channel_only[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] scatter_gather_enable_for_pipe[%d]:%d", + i, data->scatter_gather_enable_for_pipe[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] interlace_mode[%d]:%d", + i, data->interlace_mode[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] display_pstate_change_enable[%d]:%d", + i, data->display_pstate_change_enable[i]); + DC_LOG_BANDWIDTH_CALCS(" [bool] line_buffer_prefetch[%d]:%d", i, data->line_buffer_prefetch[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] bytes_per_pixel[%d]:%d", i, data->bytes_per_pixel[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] max_chunks_non_fbc_mode[%d]:%d", + i, data->max_chunks_non_fbc_mode[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] lb_bpc[%d]:%d", i, data->lb_bpc[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bpphdmi[%d]:%d", i, data->output_bpphdmi[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr[%d]:%d", i, data->output_bppdp4_lane_hbr[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr2[%d]:%d", + i, data->output_bppdp4_lane_hbr2[i]); + DC_LOG_BANDWIDTH_CALCS(" [uint32_t] output_bppdp4_lane_hbr3[%d]:%d", + i, data->output_bppdp4_lane_hbr3[i]); + DC_LOG_BANDWIDTH_CALCS(" [enum] bw_defines stereo_mode[%d]:%d", i, data->stereo_mode[i]); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_buffer_transfer_time[%d]:%d", + i, bw_fixed_to_int(data->dmif_buffer_transfer_time[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] displays_with_same_mode[%d]:%d", + i, bw_fixed_to_int(data->displays_with_same_mode[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_dmif_buffer_size[%d]:%d", + i, bw_fixed_to_int(data->stutter_dmif_buffer_size[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_refresh_duration[%d]:%d", + i, bw_fixed_to_int(data->stutter_refresh_duration[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_exit_watermark[%d]:%d", + i, bw_fixed_to_int(data->stutter_exit_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_entry_watermark[%d]:%d", + i, bw_fixed_to_int(data->stutter_entry_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_total[%d]:%d", i, bw_fixed_to_int(data->h_total[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_total[%d]:%d", i, bw_fixed_to_int(data->v_total[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixel_rate[%d]:%d", i, bw_fixed_to_int(data->pixel_rate[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_width[%d]:%d", i, bw_fixed_to_int(data->src_width[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels[%d]:%d", + i, bw_fixed_to_int(data->pitch_in_pixels[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pitch_in_pixels_after_surface_type[%d]:%d", + i, bw_fixed_to_int(data->pitch_in_pixels_after_surface_type[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_height[%d]:%d", i, bw_fixed_to_int(data->src_height[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scale_ratio[%d]:%d", i, bw_fixed_to_int(data->scale_ratio[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_taps[%d]:%d", i, bw_fixed_to_int(data->h_taps[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_taps[%d]:%d", i, bw_fixed_to_int(data->v_taps[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] h_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->h_scale_ratio[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->v_scale_ratio[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] rotation_angle[%d]:%d", + i, bw_fixed_to_int(data->rotation_angle[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] compression_rate[%d]:%d", + i, bw_fixed_to_int(data->compression_rate[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] hsr[%d]:%d", i, bw_fixed_to_int(data->hsr[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] vsr[%d]:%d", i, bw_fixed_to_int(data->vsr[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_rounded_up_to_chunks[%d]:%d", + i, bw_fixed_to_int(data->source_width_rounded_up_to_chunks[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_width_pixels[%d]:%d", + i, bw_fixed_to_int(data->source_width_pixels[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] source_height_rounded_up_to_chunks[%d]:%d", + i, bw_fixed_to_int(data->source_height_rounded_up_to_chunks[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] display_bandwidth[%d]:%d", + i, bw_fixed_to_int(data->display_bandwidth[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] request_bandwidth[%d]:%d", + i, bw_fixed_to_int(data->request_bandwidth[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] bytes_per_request[%d]:%d", + i, bw_fixed_to_int(data->bytes_per_request[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] useful_bytes_per_request[%d]:%d", + i, bw_fixed_to_int(data->useful_bytes_per_request[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lines_interleaved_in_mem_access[%d]:%d", + i, bw_fixed_to_int(data->lines_interleaved_in_mem_access[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] latency_hiding_lines[%d]:%d", + i, bw_fixed_to_int(data->latency_hiding_lines[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions[%d]:%d", + i, bw_fixed_to_int(data->lb_partitions[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_partitions_max[%d]:%d", + i, bw_fixed_to_int(data->lb_partitions_max[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_with_ramping[%d]:%d", + i, bw_fixed_to_int(data->dispclk_required_with_ramping[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_without_ramping[%d]:%d", + i, bw_fixed_to_int(data->dispclk_required_without_ramping[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] data_buffer_size[%d]:%d", + i, bw_fixed_to_int(data->data_buffer_size[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] outstanding_chunk_request_limit[%d]:%d", + i, bw_fixed_to_int(data->outstanding_chunk_request_limit[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] urgent_watermark[%d]:%d", + i, bw_fixed_to_int(data->urgent_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] nbp_state_change_watermark[%d]:%d", + i, bw_fixed_to_int(data->nbp_state_change_watermark[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_filter_init[%d]:%d", i, bw_fixed_to_int(data->v_filter_init[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] stutter_cycle_duration[%d]:%d", + i, bw_fixed_to_int(data->stutter_cycle_duration[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth[%d]:%d", + i, bw_fixed_to_int(data->average_bandwidth[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] average_bandwidth_no_compression[%d]:%d", + i, bw_fixed_to_int(data->average_bandwidth_no_compression[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_request_limit[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_pte_request_limit[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_size_per_component[%d]:%d", + i, bw_fixed_to_int(data->lb_size_per_component[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] memory_chunk_size_in_bytes[%d]:%d", + i, bw_fixed_to_int(data->memory_chunk_size_in_bytes[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pipe_chunk_size_in_bytes[%d]:%d", + i, bw_fixed_to_int(data->pipe_chunk_size_in_bytes[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] number_of_trips_to_memory_for_getting_apte_row[%d]:%d", + i, bw_fixed_to_int(data->number_of_trips_to_memory_for_getting_apte_row[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size[%d]:%d", + i, bw_fixed_to_int(data->adjusted_data_buffer_size[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] adjusted_data_buffer_size_in_memory[%d]:%d", + i, bw_fixed_to_int(data->adjusted_data_buffer_size_in_memory[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pixels_per_data_fifo_entry[%d]:%d", + i, bw_fixed_to_int(data->pixels_per_data_fifo_entry[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_pte_requests_in_row[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_pte_requests_in_row[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] pte_request_per_chunk[%d]:%d", + i, bw_fixed_to_int(data->pte_request_per_chunk[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_width[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_page_width[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] scatter_gather_page_height[%d]:%d", + i, bw_fixed_to_int(data->scatter_gather_page_height[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_beginning_of_frame[%d]:%d", + i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_beginning_of_frame[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] lb_lines_in_per_line_out_in_middle_of_frame[%d]:%d", + i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_middle_of_frame[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_width_pixels[%d]:%d", + i, bw_fixed_to_int(data->cursor_width_pixels[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding[%d]:%d", + i, bw_fixed_to_int(data->minimum_latency_hiding[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding[%d]:%d", + i, bw_fixed_to_int(data->maximum_latency_hiding[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] minimum_latency_hiding_with_cursor[%d]:%d", + i, bw_fixed_to_int(data->minimum_latency_hiding_with_cursor[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] maximum_latency_hiding_with_cursor[%d]:%d", + i, bw_fixed_to_int(data->maximum_latency_hiding_with_cursor[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_first_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_pixels_for_first_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_pixels_for_last_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_pixels_for_last_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_first_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_data_for_first_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] src_data_for_last_output_pixel[%d]:%d", + i, bw_fixed_to_int(data->src_data_for_last_output_pixel[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] active_time[%d]:%d", i, bw_fixed_to_int(data->active_time[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] horizontal_blank_and_chunk_granularity_factor[%d]:%d", + i, bw_fixed_to_int(data->horizontal_blank_and_chunk_granularity_factor[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] cursor_latency_hiding[%d]:%d", + i, bw_fixed_to_int(data->cursor_latency_hiding[i])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] v_blank_dram_speed_change_margin[%d]:%d", + i, bw_fixed_to_int(data->v_blank_dram_speed_change_margin[i])); + } + + for (i = 0; i < maximum_number_of_surfaces; i++) { + for (j = 0; j < 3; j++) { + for (k = 0; k < 8; k++) { + + DC_LOG_BANDWIDTH_CALCS("\n [bw_fixed] line_source_transfer_time[%d][%d][%d]:%d", + i, j, k, bw_fixed_to_int(data->line_source_transfer_time[i][j][k])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dram_speed_change_line_source_transfer_time[%d][%d][%d]:%d", + i, j, k, + bw_fixed_to_int(data->dram_speed_change_line_source_transfer_time[i][j][k])); + } + } + } + + for (i = 0; i < 3; i++) { + for (j = 0; j < 8; j++) { + + DC_LOG_BANDWIDTH_CALCS("\n [uint32_t] num_displays_with_margin[%d][%d]:%d", + i, j, data->num_displays_with_margin[i][j]); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_burst_time[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dmif_burst_time[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] mcifwr_burst_time[%d][%d]:%d", + i, j, bw_fixed_to_int(data->mcifwr_burst_time[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] min_dram_speed_change_margin[%d][%d]:%d", + i, j, bw_fixed_to_int(data->min_dram_speed_change_margin[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_dram_speed_change[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dispclk_required_for_dram_speed_change[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] blackout_duration_margin[%d][%d]:%d", + i, j, bw_fixed_to_int(data->blackout_duration_margin[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_duration[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_duration[i][j])); + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dispclk_required_for_blackout_recovery[%d][%d]:%d", + i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_recovery[i][j])); + } + } + + for (i = 0; i < 6; i++) { + DC_LOG_BANDWIDTH_CALCS(" [bw_fixed] dmif_required_sclk_for_urgent_latency[%d]:%d", + i, bw_fixed_to_int(data->dmif_required_sclk_for_urgent_latency[i])); + } +} +; + +#endif /* _CALCS_CALCS_LOGGER_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c index 7243c37f569e..31d167bc548f 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c @@ -36,41 +36,41 @@ static bool build_custom_float( uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1; const struct fixed31_32 mantissa_constant_plus_max_fraction = - dal_fixed31_32_from_fraction( + dc_fixpt_from_fraction( (1LL << (format->mantissa_bits + 1)) - 1, 1LL << format->mantissa_bits); struct fixed31_32 mantiss; - if (dal_fixed31_32_eq( + if (dc_fixpt_eq( value, - dal_fixed31_32_zero)) { + dc_fixpt_zero)) { *negative = false; *mantissa = 0; *exponenta = 0; return true; } - if (dal_fixed31_32_lt( + if (dc_fixpt_lt( value, - dal_fixed31_32_zero)) { + dc_fixpt_zero)) { *negative = format->sign; - value = dal_fixed31_32_neg(value); + value = dc_fixpt_neg(value); } else { *negative = false; } - if (dal_fixed31_32_lt( + if (dc_fixpt_lt( value, - dal_fixed31_32_one)) { + dc_fixpt_one)) { uint32_t i = 1; do { - value = dal_fixed31_32_shl(value, 1); + value = dc_fixpt_shl(value, 1); ++i; - } while (dal_fixed31_32_lt( + } while (dc_fixpt_lt( value, - dal_fixed31_32_one)); + dc_fixpt_one)); --i; @@ -81,15 +81,15 @@ static bool build_custom_float( } *exponenta = exp_offset - i; - } else if (dal_fixed31_32_le( + } else if (dc_fixpt_le( mantissa_constant_plus_max_fraction, value)) { uint32_t i = 1; do { - value = dal_fixed31_32_shr(value, 1); + value = dc_fixpt_shr(value, 1); ++i; - } while (dal_fixed31_32_lt( + } while (dc_fixpt_lt( mantissa_constant_plus_max_fraction, value)); @@ -98,23 +98,23 @@ static bool build_custom_float( *exponenta = exp_offset; } - mantiss = dal_fixed31_32_sub( + mantiss = dc_fixpt_sub( value, - dal_fixed31_32_one); + dc_fixpt_one); - if (dal_fixed31_32_lt( + if (dc_fixpt_lt( mantiss, - dal_fixed31_32_zero) || - dal_fixed31_32_lt( - dal_fixed31_32_one, + dc_fixpt_zero) || + dc_fixpt_lt( + dc_fixpt_one, mantiss)) - mantiss = dal_fixed31_32_zero; + mantiss = dc_fixpt_zero; else - mantiss = dal_fixed31_32_shl( + mantiss = dc_fixpt_shl( mantiss, format->mantissa_bits); - *mantissa = dal_fixed31_32_floor(mantiss); + *mantissa = dc_fixpt_floor(mantiss); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 0cbab81ab304..2c4e8f0cb2dc 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -28,6 +28,7 @@ #include "dc.h" #include "core_types.h" #include "dal_asic_id.h" +#include "calcs_logger.h" /* * NOTE: @@ -52,11 +53,14 @@ static enum bw_calcs_version bw_calcs_version_from_asic_id(struct hw_asic_id asi return BW_CALCS_VERSION_CARRIZO; case FAMILY_VI: + if (ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) + return BW_CALCS_VERSION_POLARIS12; if (ASIC_REV_IS_POLARIS10_P(asic_id.hw_internal_rev)) return BW_CALCS_VERSION_POLARIS10; - if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev) || - ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) + if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev)) return BW_CALCS_VERSION_POLARIS11; + if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev)) + return BW_CALCS_VERSION_VEGAM; return BW_CALCS_VERSION_INVALID; case FAMILY_AI: @@ -2145,6 +2149,9 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); /* todo: this is a bug*/ break; case BW_CALCS_VERSION_POLARIS10: + /* TODO: Treat VEGAM the same as P10 for now + * Need to tune the para for VEGAM if needed */ + case BW_CALCS_VERSION_VEGAM: vbios.memory_type = bw_def_gddr5; vbios.dram_channel_width_in_bits = 32; vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits; @@ -2373,6 +2380,122 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2; dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); break; + case BW_CALCS_VERSION_POLARIS12: + vbios.memory_type = bw_def_gddr5; + vbios.dram_channel_width_in_bits = 32; + vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits; + vbios.number_of_dram_banks = 8; + vbios.high_yclk = bw_int_to_fixed(6000); + vbios.mid_yclk = bw_int_to_fixed(3200); + vbios.low_yclk = bw_int_to_fixed(1000); + vbios.low_sclk = bw_int_to_fixed(678); + vbios.mid1_sclk = bw_int_to_fixed(864); + vbios.mid2_sclk = bw_int_to_fixed(900); + vbios.mid3_sclk = bw_int_to_fixed(920); + vbios.mid4_sclk = bw_int_to_fixed(940); + vbios.mid5_sclk = bw_int_to_fixed(960); + vbios.mid6_sclk = bw_int_to_fixed(980); + vbios.high_sclk = bw_int_to_fixed(1049); + vbios.low_voltage_max_dispclk = bw_int_to_fixed(459); + vbios.mid_voltage_max_dispclk = bw_int_to_fixed(654); + vbios.high_voltage_max_dispclk = bw_int_to_fixed(1108); + vbios.low_voltage_max_phyclk = bw_int_to_fixed(540); + vbios.mid_voltage_max_phyclk = bw_int_to_fixed(810); + vbios.high_voltage_max_phyclk = bw_int_to_fixed(810); + vbios.data_return_bus_width = bw_int_to_fixed(32); + vbios.trc = bw_int_to_fixed(48); + if (vbios.number_of_dram_channels == 2) // 64-bit + vbios.dmifmc_urgent_latency = bw_int_to_fixed(4); + else + vbios.dmifmc_urgent_latency = bw_int_to_fixed(3); + vbios.stutter_self_refresh_exit_latency = bw_int_to_fixed(5); + vbios.stutter_self_refresh_entry_latency = bw_int_to_fixed(0); + vbios.nbp_state_change_latency = bw_int_to_fixed(250); + vbios.mcifwrmc_urgent_latency = bw_int_to_fixed(10); + vbios.scatter_gather_enable = false; + vbios.down_spread_percentage = bw_frc_to_fixed(5, 10); + vbios.cursor_width = 32; + vbios.average_compression_rate = 4; + vbios.number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256; + vbios.blackout_duration = bw_int_to_fixed(0); /* us */ + vbios.maximum_blackout_recovery_time = bw_int_to_fixed(0); + + dceip.max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100; + dceip.max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100; + dceip.percent_of_ideal_port_bw_received_after_urgent_latency = 100; + dceip.large_cursor = false; + dceip.dmif_request_buffer_size = bw_int_to_fixed(768); + dceip.dmif_pipe_en_fbc_chunk_tracker = false; + dceip.cursor_max_outstanding_group_num = 1; + dceip.lines_interleaved_into_lb = 2; + dceip.chunk_width = 256; + dceip.number_of_graphics_pipes = 5; + dceip.number_of_underlay_pipes = 0; + dceip.low_power_tiling_mode = 0; + dceip.display_write_back_supported = true; + dceip.argb_compression_support = true; + dceip.underlay_vscaler_efficiency6_bit_per_component = + bw_frc_to_fixed(35556, 10000); + dceip.underlay_vscaler_efficiency8_bit_per_component = + bw_frc_to_fixed(34286, 10000); + dceip.underlay_vscaler_efficiency10_bit_per_component = + bw_frc_to_fixed(32, 10); + dceip.underlay_vscaler_efficiency12_bit_per_component = + bw_int_to_fixed(3); + dceip.graphics_vscaler_efficiency6_bit_per_component = + bw_frc_to_fixed(35, 10); + dceip.graphics_vscaler_efficiency8_bit_per_component = + bw_frc_to_fixed(34286, 10000); + dceip.graphics_vscaler_efficiency10_bit_per_component = + bw_frc_to_fixed(32, 10); + dceip.graphics_vscaler_efficiency12_bit_per_component = + bw_int_to_fixed(3); + dceip.alpha_vscaler_efficiency = bw_int_to_fixed(3); + dceip.max_dmif_buffer_allocated = 4; + dceip.graphics_dmif_size = 12288; + dceip.underlay_luma_dmif_size = 19456; + dceip.underlay_chroma_dmif_size = 23552; + dceip.pre_downscaler_enabled = true; + dceip.underlay_downscale_prefetch_enabled = true; + dceip.lb_write_pixels_per_dispclk = bw_int_to_fixed(1); + dceip.lb_size_per_component444 = bw_int_to_fixed(245952); + dceip.graphics_lb_nodownscaling_multi_line_prefetching = true; + dceip.stutter_and_dram_clock_state_change_gated_before_cursor = + bw_int_to_fixed(1); + dceip.underlay420_luma_lb_size_per_component = bw_int_to_fixed( + 82176); + dceip.underlay420_chroma_lb_size_per_component = + bw_int_to_fixed(164352); + dceip.underlay422_lb_size_per_component = bw_int_to_fixed( + 82176); + dceip.cursor_chunk_width = bw_int_to_fixed(64); + dceip.cursor_dcp_buffer_lines = bw_int_to_fixed(4); + dceip.underlay_maximum_width_efficient_for_tiling = + bw_int_to_fixed(1920); + dceip.underlay_maximum_height_efficient_for_tiling = + bw_int_to_fixed(1080); + dceip.peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display = + bw_frc_to_fixed(3, 10); + dceip.peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation = + bw_int_to_fixed(25); + dceip.minimum_outstanding_pte_request_limit = bw_int_to_fixed( + 2); + dceip.maximum_total_outstanding_pte_requests_allowed_by_saw = + bw_int_to_fixed(128); + dceip.limit_excessive_outstanding_dmif_requests = true; + dceip.linear_mode_line_request_alternation_slice = + bw_int_to_fixed(64); + dceip.scatter_gather_lines_of_pte_prefetching_in_linear_mode = + 32; + dceip.display_write_back420_luma_mcifwr_buffer_size = 12288; + dceip.display_write_back420_chroma_mcifwr_buffer_size = 8192; + dceip.request_efficiency = bw_frc_to_fixed(8, 10); + dceip.dispclk_per_request = bw_int_to_fixed(2); + dceip.dispclk_ramping_factor = bw_frc_to_fixed(105, 100); + dceip.display_pipe_throughput_factor = bw_frc_to_fixed(105, 100); + dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2; + dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); + break; case BW_CALCS_VERSION_STONEY: vbios.memory_type = bw_def_gddr5; vbios.dram_channel_width_in_bits = 64; @@ -2815,6 +2938,19 @@ static void populate_initial_data( data->bytes_per_pixel[num_displays + 4] = 4; break; } + } else if (pipe[i].stream->dst.width != 0 && + pipe[i].stream->dst.height != 0 && + pipe[i].stream->src.width != 0 && + pipe[i].stream->src.height != 0) { + data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.width); + data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4]; + data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.height); + data->h_taps[num_displays + 4] = pipe[i].stream->src.width == pipe[i].stream->dst.width ? bw_int_to_fixed(1) : bw_int_to_fixed(2); + data->v_taps[num_displays + 4] = pipe[i].stream->src.height == pipe[i].stream->dst.height ? bw_int_to_fixed(1) : bw_int_to_fixed(2); + data->h_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.width, pipe[i].stream->dst.width); + data->v_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.height, pipe[i].stream->dst.height); + data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0); + data->bytes_per_pixel[num_displays + 4] = 4; } else { data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_addressable); data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4]; @@ -2873,6 +3009,11 @@ bool bw_calcs(struct dc_context *ctx, struct bw_fixed mid_yclk = vbios->mid_yclk; struct bw_fixed low_yclk = vbios->low_yclk; + if (ctx->dc->debug.bandwidth_calcs_trace) { + print_bw_calcs_dceip(ctx->logger, dceip); + print_bw_calcs_vbios(ctx->logger, vbios); + print_bw_calcs_data(ctx->logger, data); + } calculate_bandwidth(dceip, vbios, data); yclk_lvl = data->y_clk_level; @@ -2968,7 +3109,33 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); - + calcs_output->stutter_entry_wm_ns[0].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].a_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); calcs_output->urgent_wm_ns[0].a_mark = bw_fixed_to_int(bw_mul(data-> @@ -3063,7 +3230,33 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); - + calcs_output->stutter_entry_wm_ns[0].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].b_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); calcs_output->urgent_wm_ns[0].b_mark = bw_fixed_to_int(bw_mul(data-> @@ -3156,6 +3349,34 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[0].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].c_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); + calcs_output->urgent_wm_ns[0].c_mark = bw_fixed_to_int(bw_mul(data-> urgent_watermark[4], bw_int_to_fixed(1000))); @@ -3260,6 +3481,33 @@ bool bw_calcs(struct dc_context *ctx, bw_fixed_to_int(bw_mul(data-> stutter_exit_watermark[9], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[0].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[4], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[1].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[5], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[2].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[6], bw_int_to_fixed(1000))); + if (ctx->dc->caps.max_slave_planes) { + calcs_output->stutter_entry_wm_ns[3].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[0], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[1], bw_int_to_fixed(1000))); + } else { + calcs_output->stutter_entry_wm_ns[3].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[7], bw_int_to_fixed(1000))); + calcs_output->stutter_entry_wm_ns[4].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[8], bw_int_to_fixed(1000))); + } + calcs_output->stutter_entry_wm_ns[5].d_mark = + bw_fixed_to_int(bw_mul(data-> + stutter_entry_watermark[9], bw_int_to_fixed(1000))); calcs_output->urgent_wm_ns[0].d_mark = bw_fixed_to_int(bw_mul(data-> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 4bb43a371292..49a4ea45466d 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -873,14 +873,14 @@ bool dcn_validate_bandwidth( } if (pipe->plane_state->rotation % 2 == 0) { - ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value + ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); - ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value + ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); } else { - ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value + ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value || v->scaler_recout_height[input_idx] == v->viewport_width[input_idx]); - ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value + ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value || v->scaler_rec_out_width[input_idx] == v->viewport_height[input_idx]); } v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; @@ -1459,39 +1459,39 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) void dcn_bw_sync_calcs_and_dml(struct dc *dc) { kernel_fpu_begin(); - DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %d ns\n" - "sr_enter_plus_exit_time: %d ns\n" - "urgent_latency: %d ns\n" - "write_back_latency: %d ns\n" - "percent_of_ideal_drambw_received_after_urg_latency: %d %\n" + DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n" + "sr_enter_plus_exit_time: %f ns\n" + "urgent_latency: %f ns\n" + "write_back_latency: %f ns\n" + "percent_of_ideal_drambw_received_after_urg_latency: %f %%\n" "max_request_size: %d bytes\n" - "dcfclkv_max0p9: %d kHz\n" - "dcfclkv_nom0p8: %d kHz\n" - "dcfclkv_mid0p72: %d kHz\n" - "dcfclkv_min0p65: %d kHz\n" - "max_dispclk_vmax0p9: %d kHz\n" - "max_dispclk_vnom0p8: %d kHz\n" - "max_dispclk_vmid0p72: %d kHz\n" - "max_dispclk_vmin0p65: %d kHz\n" - "max_dppclk_vmax0p9: %d kHz\n" - "max_dppclk_vnom0p8: %d kHz\n" - "max_dppclk_vmid0p72: %d kHz\n" - "max_dppclk_vmin0p65: %d kHz\n" - "socclk: %d kHz\n" - "fabric_and_dram_bandwidth_vmax0p9: %d MB/s\n" - "fabric_and_dram_bandwidth_vnom0p8: %d MB/s\n" - "fabric_and_dram_bandwidth_vmid0p72: %d MB/s\n" - "fabric_and_dram_bandwidth_vmin0p65: %d MB/s\n" - "phyclkv_max0p9: %d kHz\n" - "phyclkv_nom0p8: %d kHz\n" - "phyclkv_mid0p72: %d kHz\n" - "phyclkv_min0p65: %d kHz\n" - "downspreading: %d %\n" + "dcfclkv_max0p9: %f kHz\n" + "dcfclkv_nom0p8: %f kHz\n" + "dcfclkv_mid0p72: %f kHz\n" + "dcfclkv_min0p65: %f kHz\n" + "max_dispclk_vmax0p9: %f kHz\n" + "max_dispclk_vnom0p8: %f kHz\n" + "max_dispclk_vmid0p72: %f kHz\n" + "max_dispclk_vmin0p65: %f kHz\n" + "max_dppclk_vmax0p9: %f kHz\n" + "max_dppclk_vnom0p8: %f kHz\n" + "max_dppclk_vmid0p72: %f kHz\n" + "max_dppclk_vmin0p65: %f kHz\n" + "socclk: %f kHz\n" + "fabric_and_dram_bandwidth_vmax0p9: %f MB/s\n" + "fabric_and_dram_bandwidth_vnom0p8: %f MB/s\n" + "fabric_and_dram_bandwidth_vmid0p72: %f MB/s\n" + "fabric_and_dram_bandwidth_vmin0p65: %f MB/s\n" + "phyclkv_max0p9: %f kHz\n" + "phyclkv_nom0p8: %f kHz\n" + "phyclkv_mid0p72: %f kHz\n" + "phyclkv_min0p65: %f kHz\n" + "downspreading: %f %%\n" "round_trip_ping_latency_cycles: %d DCFCLK Cycles\n" "urgent_out_of_order_return_per_channel: %d Bytes\n" "number_of_channels: %d\n" "vmm_page_size: %d Bytes\n" - "dram_clock_change_latency: %d ns\n" + "dram_clock_change_latency: %f ns\n" "return_bus_width: %d Bytes\n", dc->dcn_soc->sr_exit_time * 1000, dc->dcn_soc->sr_enter_plus_exit_time * 1000, @@ -1527,11 +1527,11 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) dc->dcn_soc->vmm_page_size, dc->dcn_soc->dram_clock_change_latency * 1000, dc->dcn_soc->return_bus_width); - DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %d\n" - "det_buffer_size_in_kbyte: %d\n" - "dpp_output_buffer_pixels: %d\n" - "opp_output_buffer_lines: %d\n" - "pixel_chunk_size_in_kbyte: %d\n" + DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %f\n" + "det_buffer_size_in_kbyte: %f\n" + "dpp_output_buffer_pixels: %f\n" + "opp_output_buffer_lines: %f\n" + "pixel_chunk_size_in_kbyte: %f\n" "pte_enable: %d\n" "pte_chunk_size: %d kbytes\n" "meta_chunk_size: %d kbytes\n" @@ -1550,13 +1550,13 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) "max_pscl_tolb_throughput: %d pixels/dppclk\n" "max_lb_tovscl_throughput: %d pixels/dppclk\n" "max_vscl_tohscl_throughput: %d pixels/dppclk\n" - "max_hscl_ratio: %d\n" - "max_vscl_ratio: %d\n" + "max_hscl_ratio: %f\n" + "max_vscl_ratio: %f\n" "max_hscl_taps: %d\n" "max_vscl_taps: %d\n" "pte_buffer_size_in_requests: %d\n" - "dispclk_ramping_margin: %d %\n" - "under_scan_factor: %d %\n" + "dispclk_ramping_margin: %f %%\n" + "under_scan_factor: %f %%\n" "max_inter_dcn_tile_repeaters: %d\n" "can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one: %d\n" "bug_forcing_luma_and_chroma_request_to_same_size_fixed: %d\n" diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 9cd3566def8d..644b2187507b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -936,95 +936,6 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) return true; } -/* - * TODO this whole function needs to go - * - * dc_surface_update is needlessly complex. See if we can just replace this - * with a dc_plane_state and follow the atomic model a bit more closely here. - */ -bool dc_commit_planes_to_stream( - struct dc *dc, - struct dc_plane_state **plane_states, - uint8_t new_plane_count, - struct dc_stream_state *dc_stream, - struct dc_state *state) -{ - /* no need to dynamically allocate this. it's pretty small */ - struct dc_surface_update updates[MAX_SURFACES]; - struct dc_flip_addrs *flip_addr; - struct dc_plane_info *plane_info; - struct dc_scaling_info *scaling_info; - int i; - struct dc_stream_update *stream_update = - kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL); - - if (!stream_update) { - BREAK_TO_DEBUGGER(); - return false; - } - - flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs), - GFP_KERNEL); - plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info), - GFP_KERNEL); - scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info), - GFP_KERNEL); - - if (!flip_addr || !plane_info || !scaling_info) { - kfree(flip_addr); - kfree(plane_info); - kfree(scaling_info); - kfree(stream_update); - return false; - } - - memset(updates, 0, sizeof(updates)); - - stream_update->src = dc_stream->src; - stream_update->dst = dc_stream->dst; - stream_update->out_transfer_func = dc_stream->out_transfer_func; - - for (i = 0; i < new_plane_count; i++) { - updates[i].surface = plane_states[i]; - updates[i].gamma = - (struct dc_gamma *)plane_states[i]->gamma_correction; - updates[i].in_transfer_func = plane_states[i]->in_transfer_func; - flip_addr[i].address = plane_states[i]->address; - flip_addr[i].flip_immediate = plane_states[i]->flip_immediate; - plane_info[i].color_space = plane_states[i]->color_space; - plane_info[i].input_tf = plane_states[i]->input_tf; - plane_info[i].format = plane_states[i]->format; - plane_info[i].plane_size = plane_states[i]->plane_size; - plane_info[i].rotation = plane_states[i]->rotation; - plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror; - plane_info[i].stereo_format = plane_states[i]->stereo_format; - plane_info[i].tiling_info = plane_states[i]->tiling_info; - plane_info[i].visible = plane_states[i]->visible; - plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha; - plane_info[i].dcc = plane_states[i]->dcc; - scaling_info[i].scaling_quality = plane_states[i]->scaling_quality; - scaling_info[i].src_rect = plane_states[i]->src_rect; - scaling_info[i].dst_rect = plane_states[i]->dst_rect; - scaling_info[i].clip_rect = plane_states[i]->clip_rect; - - updates[i].flip_addr = &flip_addr[i]; - updates[i].plane_info = &plane_info[i]; - updates[i].scaling_info = &scaling_info[i]; - } - - dc_commit_updates_for_stream( - dc, - updates, - new_plane_count, - dc_stream, stream_update, plane_states, state); - - kfree(flip_addr); - kfree(plane_info); - kfree(scaling_info); - kfree(stream_update); - return true; -} - struct dc_state *dc_create_state(void) { struct dc_state *context = kzalloc(sizeof(struct dc_state), @@ -1107,9 +1018,6 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa if (u->plane_info->color_space != u->surface->color_space) update_flags->bits.color_space_change = 1; - if (u->plane_info->input_tf != u->surface->input_tf) - update_flags->bits.input_tf_change = 1; - if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) update_flags->bits.horizontal_mirror_change = 1; @@ -1243,12 +1151,20 @@ static enum surface_update_type det_surface_update(const struct dc *dc, if (u->input_csc_color_matrix) update_flags->bits.input_csc_change = 1; - if (update_flags->bits.in_transfer_func_change - || update_flags->bits.input_csc_change) { + if (u->coeff_reduction_factor) + update_flags->bits.coeff_reduction_change = 1; + + if (update_flags->bits.in_transfer_func_change) { type = UPDATE_TYPE_MED; elevate_update_type(&overall_type, type); } + if (update_flags->bits.input_csc_change + || update_flags->bits.coeff_reduction_change) { + type = UPDATE_TYPE_FULL; + elevate_update_type(&overall_type, type); + } + return overall_type; } @@ -1297,7 +1213,7 @@ enum surface_update_type dc_check_update_surfaces_for_stream( type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status); if (type == UPDATE_TYPE_FULL) for (i = 0; i < surface_count; i++) - updates[i].surface->update_flags.bits.full_update = 1; + updates[i].surface->update_flags.raw = 0xFFFFFFFF; return type; } @@ -1375,6 +1291,12 @@ static void commit_planes_for_stream(struct dc *dc, pipe_ctx->stream_res.abm->funcs->set_abm_level( pipe_ctx->stream_res.abm, stream->abm_level); } + + if (stream_update && stream_update->periodic_fn_vsync_delta && + pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) + pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( + pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, + pipe_ctx->stream->periodic_fn_vsync_delta); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 5a552cb3f8a7..267c76766dea 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -36,8 +36,9 @@ #include "hw_sequencer.h" #include "resource.h" -#define DC_LOGGER \ - logger + +#define DC_LOGGER_INIT(logger) + #define SURFACE_TRACE(...) do {\ if (dc->debug.surface_trace) \ @@ -60,8 +61,7 @@ void pre_surface_trace( int surface_count) { int i; - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; + DC_LOGGER_INIT(dc->ctx->logger); for (i = 0; i < surface_count; i++) { const struct dc_plane_state *plane_state = plane_states[i]; @@ -72,8 +72,8 @@ void pre_surface_trace( "plane_state->visible = %d;\n" "plane_state->flip_immediate = %d;\n" "plane_state->address.type = %d;\n" - "plane_state->address.grph.addr.quad_part = 0x%X;\n" - "plane_state->address.grph.meta_addr.quad_part = 0x%X;\n" + "plane_state->address.grph.addr.quad_part = 0x%llX;\n" + "plane_state->address.grph.meta_addr.quad_part = 0x%llX;\n" "plane_state->scaling_quality.h_taps = %d;\n" "plane_state->scaling_quality.v_taps = %d;\n" "plane_state->scaling_quality.h_taps_c = %d;\n" @@ -155,7 +155,6 @@ void pre_surface_trace( "plane_state->tiling_info.gfx8.pipe_config = %d;\n" "plane_state->tiling_info.gfx8.array_mode = %d;\n" "plane_state->color_space = %d;\n" - "plane_state->input_tf = %d;\n" "plane_state->dcc.enable = %d;\n" "plane_state->format = %d;\n" "plane_state->rotation = %d;\n" @@ -163,7 +162,6 @@ void pre_surface_trace( plane_state->tiling_info.gfx8.pipe_config, plane_state->tiling_info.gfx8.array_mode, plane_state->color_space, - plane_state->input_tf, plane_state->dcc.enable, plane_state->format, plane_state->rotation, @@ -183,8 +181,7 @@ void update_surface_trace( int surface_count) { int i; - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; + DC_LOGGER_INIT(dc->ctx->logger); for (i = 0; i < surface_count; i++) { const struct dc_surface_update *update = &updates[i]; @@ -192,8 +189,8 @@ void update_surface_trace( SURFACE_TRACE("Update %d\n", i); if (update->flip_addr) { SURFACE_TRACE("flip_addr->address.type = %d;\n" - "flip_addr->address.grph.addr.quad_part = 0x%X;\n" - "flip_addr->address.grph.meta_addr.quad_part = 0x%X;\n" + "flip_addr->address.grph.addr.quad_part = 0x%llX;\n" + "flip_addr->address.grph.meta_addr.quad_part = 0x%llX;\n" "flip_addr->flip_immediate = %d;\n", update->flip_addr->address.type, update->flip_addr->address.grph.addr.quad_part, @@ -204,16 +201,15 @@ void update_surface_trace( if (update->plane_info) { SURFACE_TRACE( "plane_info->color_space = %d;\n" - "plane_info->input_tf = %d;\n" "plane_info->format = %d;\n" "plane_info->plane_size.grph.surface_pitch = %d;\n" "plane_info->plane_size.grph.surface_size.height = %d;\n" "plane_info->plane_size.grph.surface_size.width = %d;\n" "plane_info->plane_size.grph.surface_size.x = %d;\n" "plane_info->plane_size.grph.surface_size.y = %d;\n" - "plane_info->rotation = %d;\n", + "plane_info->rotation = %d;\n" + "plane_info->stereo_format = %d;\n", update->plane_info->color_space, - update->plane_info->input_tf, update->plane_info->format, update->plane_info->plane_size.grph.surface_pitch, update->plane_info->plane_size.grph.surface_size.height, @@ -303,8 +299,7 @@ void update_surface_trace( void post_surface_trace(struct dc *dc) { - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; + DC_LOGGER_INIT(dc->ctx->logger); SURFACE_TRACE("post surface process.\n"); @@ -316,10 +311,10 @@ void context_timing_trace( { int i; struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; int h_pos[MAX_PIPES], v_pos[MAX_PIPES]; struct crtc_position position; unsigned int underlay_idx = core_dc->res_pool->underlay_pipe_index; + DC_LOGGER_INIT(dc->ctx->logger); for (i = 0; i < core_dc->res_pool->pipe_count; i++) { @@ -354,9 +349,7 @@ void context_clock_trace( struct dc_state *context) { #if defined(CONFIG_DRM_AMD_DC_DCN1_0) - struct dc *core_dc = dc; - struct dal_logger *logger = core_dc->ctx->logger; - + DC_LOGGER_INIT(dc->ctx->logger); CLOCK_TRACE("Current: dispclk_khz:%d max_dppclk_khz:%d dcfclk_khz:%d\n" "dcfclk_deep_sleep_khz:%d fclk_khz:%d socclk_khz:%d\n", context->bw.dcn.calc_clk.dispclk_khz, @@ -371,6 +364,7 @@ void context_clock_trace( context->bw.dcn.calc_clk.dppclk_khz, context->bw.dcn.calc_clk.dcfclk_khz, context->bw.dcn.calc_clk.dcfclk_deep_sleep_khz, - context->bw.dcn.calc_clk.fclk_khz); + context->bw.dcn.calc_clk.fclk_khz, + context->bw.dcn.calc_clk.socclk_khz); #endif } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index ebc96b720083..83d121510ef5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -208,6 +208,7 @@ void color_space_to_black_color( case COLOR_SPACE_YCBCR709: case COLOR_SPACE_YCBCR601_LIMITED: case COLOR_SPACE_YCBCR709_LIMITED: + case COLOR_SPACE_2020_YCBCR: *black_color = black_color_format[BLACK_COLOR_FORMAT_YUV_CV]; break; @@ -216,7 +217,25 @@ void color_space_to_black_color( black_color_format[BLACK_COLOR_FORMAT_RGB_LIMITED]; break; - default: + /** + * Remove default and add case for all color space + * so when we forget to add new color space + * compiler will give a warning + */ + case COLOR_SPACE_UNKNOWN: + case COLOR_SPACE_SRGB: + case COLOR_SPACE_XR_RGB: + case COLOR_SPACE_MSREF_SCRGB: + case COLOR_SPACE_XV_YCC_709: + case COLOR_SPACE_XV_YCC_601: + case COLOR_SPACE_2020_RGB_FULLRANGE: + case COLOR_SPACE_2020_RGB_LIMITEDRANGE: + case COLOR_SPACE_ADOBERGB: + case COLOR_SPACE_DCIP3: + case COLOR_SPACE_DISPLAYNATIVE: + case COLOR_SPACE_DOLBYVISION: + case COLOR_SPACE_APPCTRL: + case COLOR_SPACE_CUSTOMPOINTS: /* fefault is sRGB black (full range). */ *black_color = black_color_format[BLACK_COLOR_FORMAT_RGB_FULLRANGE]; @@ -230,6 +249,9 @@ bool hwss_wait_for_blank_complete( { int counter; + /* Not applicable if the pipe is not primary, save 300ms of boot time */ + if (!tg->funcs->is_blanked) + return true; for (counter = 0; counter < 100; counter++) { if (tg->funcs->is_blanked(tg)) break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 6d1c4981a185..2fa521812d23 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -45,8 +45,9 @@ #include "dce/dce_11_0_d.h" #include "dce/dce_11_0_enum.h" #include "dce/dce_11_0_sh_mask.h" -#define DC_LOGGER \ - dc_ctx->logger + +#define DC_LOGGER_INIT(logger) + #define LINK_INFO(...) \ DC_LOG_HW_HOTPLUG( \ @@ -468,6 +469,13 @@ static void link_disconnect_sink(struct dc_link *link) link->dpcd_sink_count = 0; } +static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *link) +{ + dc_sink_release(link->local_sink); + link->local_sink = prev_sink; +} + + static bool detect_dp( struct dc_link *link, struct display_sink_capability *sink_caps, @@ -550,6 +558,17 @@ static bool detect_dp( return true; } +static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid) +{ + if (old_edid->length != new_edid->length) + return false; + + if (new_edid->length == 0) + return false; + + return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0); +} + bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) { struct dc_sink_init_data sink_init_data = { 0 }; @@ -557,11 +576,15 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) uint8_t i; bool converter_disable_audio = false; struct audio_support *aud_support = &link->dc->res_pool->audio_support; + bool same_edid = false; enum dc_edid_status edid_status; struct dc_context *dc_ctx = link->ctx; struct dc_sink *sink = NULL; + struct dc_sink *prev_sink = NULL; + struct dpcd_caps prev_dpcd_caps; + bool same_dpcd = true; enum dc_connection_type new_connection_type = dc_connection_none; - + DC_LOGGER_INIT(link->ctx->logger); if (link->connector_signal == SIGNAL_TYPE_VIRTUAL) return false; @@ -574,6 +597,11 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) link->local_sink) return true; + prev_sink = link->local_sink; + if (prev_sink != NULL) { + dc_sink_retain(prev_sink); + memcpy(&prev_dpcd_caps, &link->dpcd_caps, sizeof(struct dpcd_caps)); + } link_disconnect_sink(link); if (new_connection_type != dc_connection_none) { @@ -615,14 +643,25 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) link, &sink_caps, &converter_disable_audio, - aud_support, reason)) + aud_support, reason)) { + if (prev_sink != NULL) + dc_sink_release(prev_sink); return false; + } + // Check if dpcp block is the same + if (prev_sink != NULL) { + if (memcmp(&link->dpcd_caps, &prev_dpcd_caps, sizeof(struct dpcd_caps))) + same_dpcd = false; + } /* Active dongle downstream unplug */ if (link->type == dc_connection_active_dongle && link->dpcd_caps.sink_count. - bits.SINK_COUNT == 0) + bits.SINK_COUNT == 0) { + if (prev_sink != NULL) + dc_sink_release(prev_sink); return true; + } if (link->type == dc_connection_mst_branch) { LINK_INFO("link=%d, mst branch is now Connected\n", @@ -630,9 +669,11 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) /* Need to setup mst link_cap struct here * otherwise dc_link_detect() will leave mst link_cap * empty which leads to allocate_mst_payload() has "0" - * pbn_per_slot value leading to exception on dal_fixed31_32_div() + * pbn_per_slot value leading to exception on dc_fixpt_div() */ link->verified_link_cap = link->reported_link_cap; + if (prev_sink != NULL) + dc_sink_release(prev_sink); return false; } @@ -642,6 +683,8 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) default: DC_ERROR("Invalid connector type! signal:%d\n", link->connector_signal); + if (prev_sink != NULL) + dc_sink_release(prev_sink); return false; } /* switch() */ @@ -664,6 +707,8 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) sink = dc_sink_create(&sink_init_data); if (!sink) { DC_ERROR("Failed to create sink!\n"); + if (prev_sink != NULL) + dc_sink_release(prev_sink); return false; } @@ -687,22 +732,33 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) break; } - if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && - sink_caps.transaction_type == - DDC_TRANSACTION_TYPE_I2C_OVER_AUX) { - /* - * TODO debug why Dell 2413 doesn't like - * two link trainings - */ + // Check if edid is the same + if ((prev_sink != NULL) && ((edid_status == EDID_THE_SAME) || (edid_status == EDID_OK))) + same_edid = is_same_edid(&prev_sink->dc_edid, &sink->dc_edid); - /* deal with non-mst cases */ - dp_hbr_verify_link_cap(link, &link->reported_link_cap); - } + // If both edid and dpcd are the same, then discard new sink and revert back to original sink + if ((same_edid) && (same_dpcd)) { + link_disconnect_remap(prev_sink, link); + sink = prev_sink; + prev_sink = NULL; + } else { + if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && + sink_caps.transaction_type == + DDC_TRANSACTION_TYPE_I2C_OVER_AUX) { + /* + * TODO debug why Dell 2413 doesn't like + * two link trainings + */ - /* HDMI-DVI Dongle */ - if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A && - !sink->edid_caps.edid_hdmi) - sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; + /* deal with non-mst cases */ + dp_hbr_verify_link_cap(link, &link->reported_link_cap); + } + + /* HDMI-DVI Dongle */ + if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A && + !sink->edid_caps.edid_hdmi) + sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; + } /* Connectivity log: detection */ for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) { @@ -761,10 +817,14 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) sink_caps.signal = SIGNAL_TYPE_NONE; } - LINK_INFO("link=%d, dc_sink_in=%p is now %s\n", + LINK_INFO("link=%d, dc_sink_in=%p is now %s prev_sink=%p dpcd same=%d edid same=%d\n", link->link_index, sink, (sink_caps.signal == SIGNAL_TYPE_NONE ? - "Disconnected":"Connected")); + "Disconnected":"Connected"), prev_sink, + same_dpcd, same_edid); + + if (prev_sink != NULL) + dc_sink_release(prev_sink); return true; } @@ -927,6 +987,7 @@ static bool construct( struct integrated_info info = {{{ 0 }}}; struct dc_bios *bios = init_params->dc->ctx->dc_bios; const struct dc_vbios_funcs *bp_funcs = bios->funcs; + DC_LOGGER_INIT(dc_ctx->logger); link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID; @@ -1135,7 +1196,8 @@ static void dpcd_configure_panel_mode( { union dpcd_edp_config edp_config_set; bool panel_mode_edp = false; - struct dc_context *dc_ctx = link->ctx; + DC_LOGGER_INIT(link->ctx->logger); + memset(&edp_config_set, '\0', sizeof(union dpcd_edp_config)); if (DP_PANEL_MODE_DEFAULT != panel_mode) { @@ -1183,16 +1245,21 @@ static void enable_stream_features(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->sink->link; - union down_spread_ctrl downspread; + union down_spread_ctrl old_downspread; + union down_spread_ctrl new_downspread; core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL, - &downspread.raw, sizeof(downspread)); + &old_downspread.raw, sizeof(old_downspread)); - downspread.bits.IGNORE_MSA_TIMING_PARAM = + new_downspread.raw = old_downspread.raw; + + new_downspread.bits.IGNORE_MSA_TIMING_PARAM = (stream->ignore_msa_timing_param) ? 1 : 0; - core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, - &downspread.raw, sizeof(downspread)); + if (new_downspread.raw != old_downspread.raw) { + core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL, + &new_downspread.raw, sizeof(new_downspread)); + } } static enum dc_status enable_link_dp( @@ -1843,9 +1910,22 @@ static void disable_link(struct dc_link *link, enum signal_type signal) static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, - const struct dc_dongle_caps *dongle_caps) + const struct dpcd_caps *dpcd_caps) { unsigned int required_pix_clk = timing->pix_clk_khz; + const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps; + + switch (dpcd_caps->dongle_type) { + case DISPLAY_DONGLE_DP_VGA_CONVERTER: + case DISPLAY_DONGLE_DP_DVI_CONVERTER: + case DISPLAY_DONGLE_DP_DVI_DONGLE: + if (timing->pixel_encoding == PIXEL_ENCODING_RGB) + return true; + else + return false; + default: + break; + } if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER || dongle_caps->extendedCapValid == false) @@ -1911,7 +1991,7 @@ enum dc_status dc_link_validate_mode_timing( const struct dc_crtc_timing *timing) { uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk; - struct dc_dongle_caps *dongle_caps = &link->dpcd_caps.dongle_caps; + struct dpcd_caps *dpcd_caps = &link->dpcd_caps; /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle @@ -1924,7 +2004,7 @@ enum dc_status dc_link_validate_mode_timing( return DC_EXCEED_DONGLE_CAP; /* Active Dongle*/ - if (!dp_active_dongle_validate_timing(timing, dongle_caps)) + if (!dp_active_dongle_validate_timing(timing, dpcd_caps)) return DC_EXCEED_DONGLE_CAP; switch (stream->signal) { @@ -1950,10 +2030,10 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, struct dc *core_dc = link->ctx->dc; struct abm *abm = core_dc->res_pool->abm; struct dmcu *dmcu = core_dc->res_pool->dmcu; - struct dc_context *dc_ctx = link->ctx; unsigned int controller_id = 0; bool use_smooth_brightness = true; int i; + DC_LOGGER_INIT(link->ctx->logger); if ((dmcu == NULL) || (abm == NULL) || @@ -1961,7 +2041,7 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, return false; if (stream) { - if (stream->bl_pwm_level == 0) + if (stream->bl_pwm_level == EDP_BACKLIGHT_RAMP_DISABLE_LEVEL) frame_ramp = 0; ((struct dc_stream_state *)stream)->bl_pwm_level = level; @@ -2038,10 +2118,10 @@ static struct fixed31_32 get_pbn_per_slot(struct dc_stream_state *stream) &stream->sink->link->cur_link_settings; uint32_t link_rate_in_mbps = link_settings->link_rate * LINK_RATE_REF_FREQ_IN_MHZ; - struct fixed31_32 mbps = dal_fixed31_32_from_int( + struct fixed31_32 mbps = dc_fixpt_from_int( link_rate_in_mbps * link_settings->lane_count); - return dal_fixed31_32_div_int(mbps, 54); + return dc_fixpt_div_int(mbps, 54); } static int get_color_depth(enum dc_color_depth color_depth) @@ -2082,7 +2162,7 @@ static struct fixed31_32 get_pbn_from_timing(struct pipe_ctx *pipe_ctx) numerator = 64 * PEAK_FACTOR_X1000; denominator = 54 * 8 * 1000 * 1000; kbps *= numerator; - peak_kbps = dal_fixed31_32_from_fraction(kbps, denominator); + peak_kbps = dc_fixpt_from_fraction(kbps, denominator); return peak_kbps; } @@ -2149,8 +2229,8 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) struct fixed31_32 avg_time_slots_per_mtp; struct fixed31_32 pbn; struct fixed31_32 pbn_per_slot; - struct dc_context *dc_ctx = link->ctx; uint8_t i; + DC_LOGGER_INIT(link->ctx->logger); /* enable_link_dp_mst already check link->enabled_stream_count * and stream is in link->stream[]. This is called during set mode, @@ -2178,11 +2258,11 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_count); for (i = 0; i < MAX_CONTROLLER_NUM; i++) { - DC_LOG_MST("stream_enc[%d]: 0x%x " + DC_LOG_MST("stream_enc[%d]: %p " "stream[%d].vcp_id: %d " "stream[%d].slot_count: %d\n", i, - link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, i, link->mst_stream_alloc_table.stream_allocations[i].vcp_id, i, @@ -2209,7 +2289,7 @@ static enum dc_status allocate_mst_payload(struct pipe_ctx *pipe_ctx) /* slot X.Y for only current stream */ pbn_per_slot = get_pbn_per_slot(stream); pbn = get_pbn_from_timing(pipe_ctx); - avg_time_slots_per_mtp = dal_fixed31_32_div(pbn, pbn_per_slot); + avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot); stream_encoder->funcs->set_mst_bandwidth( stream_encoder, @@ -2226,10 +2306,10 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) struct link_encoder *link_encoder = link->link_enc; struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; - struct fixed31_32 avg_time_slots_per_mtp = dal_fixed31_32_from_int(0); + struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0); uint8_t i; bool mst_mode = (link->type == dc_connection_mst_branch); - struct dc_context *dc_ctx = link->ctx; + DC_LOGGER_INIT(link->ctx->logger); /* deallocate_mst_payload is called before disable link. When mode or * disable/enable monitor, new stream is created which is not in link @@ -2268,11 +2348,11 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_count); for (i = 0; i < MAX_CONTROLLER_NUM; i++) { - DC_LOG_MST("stream_enc[%d]: 0x%x " + DC_LOG_MST("stream_enc[%d]: %p " "stream[%d].vcp_id: %d " "stream[%d].slot_count: %d\n", i, - link->mst_stream_alloc_table.stream_allocations[i].stream_enc, + (void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc, i, link->mst_stream_alloc_table.stream_allocations[i].vcp_id, i, @@ -2302,8 +2382,8 @@ void core_link_enable_stream( struct pipe_ctx *pipe_ctx) { struct dc *core_dc = pipe_ctx->stream->ctx->dc; - struct dc_context *dc_ctx = pipe_ctx->stream->ctx; enum dc_status status; + DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* eDP lit up by bios already, no need to enable again. */ if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 49c2face1e7a..ae48d603ebd6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -629,13 +629,14 @@ bool dal_ddc_service_query_ddc_data( return ret; } -ssize_t dal_ddc_service_read_dpcd_data( +enum ddc_result dal_ddc_service_read_dpcd_data( struct ddc_service *ddc, bool i2c, enum i2c_mot_mode mot, uint32_t address, uint8_t *data, - uint32_t len) + uint32_t len, + uint32_t *read) { struct aux_payload read_payload = { .i2c_over_aux = i2c, @@ -652,6 +653,8 @@ ssize_t dal_ddc_service_read_dpcd_data( .mot = mot }; + *read = 0; + if (len > DEFAULT_AUX_MAX_DATA_SIZE) { BREAK_TO_DEBUGGER(); return DDC_RESULT_FAILED_INVALID_OPERATION; @@ -661,7 +664,8 @@ ssize_t dal_ddc_service_read_dpcd_data( ddc->ctx->i2caux, ddc->ddc_pin, &command)) { - return (ssize_t)command.payloads->length; + *read = command.payloads->length; + return DDC_RESULT_SUCESSFULL; } return DDC_RESULT_FAILED_OPERATION; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 3b5053570229..7857cb42b3e6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1378,8 +1378,8 @@ static uint32_t bandwidth_in_kbps_from_timing( { uint32_t bits_per_channel = 0; uint32_t kbps; - switch (timing->display_color_depth) { + switch (timing->display_color_depth) { case COLOR_DEPTH_666: bits_per_channel = 6; break; @@ -1401,14 +1401,20 @@ static uint32_t bandwidth_in_kbps_from_timing( default: break; } + ASSERT(bits_per_channel != 0); kbps = timing->pix_clk_khz; kbps *= bits_per_channel; - if (timing->flags.Y_ONLY != 1) + if (timing->flags.Y_ONLY != 1) { /*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/ kbps *= 3; + if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + kbps /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + kbps = kbps * 2 / 3; + } return kbps; @@ -1624,17 +1630,42 @@ static enum dc_status read_hpd_rx_irq_data( struct dc_link *link, union hpd_irq_data *irq_data) { + static enum dc_status retval; + /* The HW reads 16 bytes from 200h on HPD, * but if we get an AUX_DEFER, the HW cannot retry * and this causes the CTS tests 4.3.2.1 - 3.2.4 to * fail, so we now explicitly read 6 bytes which is * the req from the above mentioned test cases. + * + * For DP 1.4 we need to read those from 2002h range. */ - return core_link_read_dpcd( - link, - DP_SINK_COUNT, - irq_data->raw, - sizeof(union hpd_irq_data)); + if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_14) + retval = core_link_read_dpcd( + link, + DP_SINK_COUNT, + irq_data->raw, + sizeof(union hpd_irq_data)); + else { + /* Read 2 bytes at this location,... */ + retval = core_link_read_dpcd( + link, + DP_SINK_COUNT_ESI, + irq_data->raw, + 2); + + if (retval != DC_OK) + return retval; + + /* ... then read remaining 4 at the other location */ + retval = core_link_read_dpcd( + link, + DP_LANE0_1_STATUS_ESI, + &irq_data->raw[2], + 4); + } + + return retval; } static bool allow_hpd_rx_irq(const struct dc_link *link) @@ -2272,12 +2303,14 @@ static void dp_wa_power_up_0010FA(struct dc_link *link, uint8_t *dpcd_data, static bool retrieve_link_cap(struct dc_link *link) { - uint8_t dpcd_data[DP_TRAINING_AUX_RD_INTERVAL - DP_DPCD_REV + 1]; + uint8_t dpcd_data[DP_ADAPTER_CAP - DP_DPCD_REV + 1]; union down_stream_port_count down_strm_port_count; union edp_configuration_cap edp_config_cap; union dp_downstream_port_present ds_port = { 0 }; enum dc_status status = DC_ERROR_UNEXPECTED; + uint32_t read_dpcd_retry_cnt = 3; + int i; memset(dpcd_data, '\0', sizeof(dpcd_data)); memset(&down_strm_port_count, @@ -2285,11 +2318,15 @@ static bool retrieve_link_cap(struct dc_link *link) memset(&edp_config_cap, '\0', sizeof(union edp_configuration_cap)); - status = core_link_read_dpcd( - link, - DP_DPCD_REV, - dpcd_data, - sizeof(dpcd_data)); + for (i = 0; i < read_dpcd_retry_cnt; i++) { + status = core_link_read_dpcd( + link, + DP_DPCD_REV, + dpcd_data, + sizeof(dpcd_data)); + if (status == DC_OK) + break; + } if (status != DC_OK) { dm_error("%s: Read dpcd data failed.\n", __func__); @@ -2376,6 +2413,10 @@ bool detect_dp_sink_caps(struct dc_link *link) void detect_edp_sink_caps(struct dc_link *link) { retrieve_link_cap(link); + + if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN) + link->reported_link_cap.link_rate = LINK_RATE_HIGH2; + link->verified_link_cap = link->reported_link_cap; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index 7c866a7d5e77..82cd1d6e6e59 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -11,8 +11,6 @@ #include "dc_link_dp.h" #include "dc_link_ddc.h" #include "dm_helpers.h" -#include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" #include "dpcd_defs.h" enum dc_status core_link_read_dpcd( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ba3487e97361..751f3ac9d921 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -45,8 +45,9 @@ #include "dcn10/dcn10_resource.h" #endif #include "dce120/dce120_resource.h" -#define DC_LOGGER \ - ctx->logger + +#define DC_LOGGER_INIT(logger) + enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) { enum dce_version dc_version = DCE_VERSION_UNKNOWN; @@ -78,6 +79,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) { dc_version = DCE_VERSION_11_2; } + if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev)) + dc_version = DCE_VERSION_11_22; break; case FAMILY_AI: dc_version = DCE_VERSION_12_0; @@ -124,6 +127,7 @@ struct resource_pool *dc_create_resource_pool( num_virtual_links, dc, asic_id); break; case DCE_VERSION_11_2: + case DCE_VERSION_11_22: res_pool = dce112_create_resource_pool( num_virtual_links, dc); break; @@ -492,9 +496,9 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) data->viewport_c.x = data->viewport.x / vpc_div; data->viewport_c.y = data->viewport.y / vpc_div; data->inits.h_c = (data->viewport.x % vpc_div) != 0 ? - dal_fixed31_32_half : dal_fixed31_32_zero; + dc_fixpt_half : dc_fixpt_zero; data->inits.v_c = (data->viewport.y % vpc_div) != 0 ? - dal_fixed31_32_half : dal_fixed31_32_zero; + dc_fixpt_half : dc_fixpt_zero; /* Round up, assume original video size always even dimensions */ data->viewport_c.width = (data->viewport.width + vpc_div - 1) / vpc_div; data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div; @@ -623,10 +627,10 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) rect_swap_helper(&surf_src); - pipe_ctx->plane_res.scl_data.ratios.horz = dal_fixed31_32_from_fraction( + pipe_ctx->plane_res.scl_data.ratios.horz = dc_fixpt_from_fraction( surf_src.width, plane_state->dst_rect.width); - pipe_ctx->plane_res.scl_data.ratios.vert = dal_fixed31_32_from_fraction( + pipe_ctx->plane_res.scl_data.ratios.vert = dc_fixpt_from_fraction( surf_src.height, plane_state->dst_rect.height); @@ -648,6 +652,14 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.ratios.horz_c.value /= 2; pipe_ctx->plane_res.scl_data.ratios.vert_c.value /= 2; } + pipe_ctx->plane_res.scl_data.ratios.horz = dc_fixpt_truncate( + pipe_ctx->plane_res.scl_data.ratios.horz, 19); + pipe_ctx->plane_res.scl_data.ratios.vert = dc_fixpt_truncate( + pipe_ctx->plane_res.scl_data.ratios.vert, 19); + pipe_ctx->plane_res.scl_data.ratios.horz_c = dc_fixpt_truncate( + pipe_ctx->plane_res.scl_data.ratios.horz_c, 19); + pipe_ctx->plane_res.scl_data.ratios.vert_c = dc_fixpt_truncate( + pipe_ctx->plane_res.scl_data.ratios.vert_c, 19); } static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *recout_skip) @@ -684,32 +696,33 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r * init_bot = init + scaling_ratio * init_c = init + truncated_vp_c_offset(from calculate viewport) */ - data->inits.h = dal_fixed31_32_div_int( - dal_fixed31_32_add_int(data->ratios.horz, data->taps.h_taps + 1), 2); + data->inits.h = dc_fixpt_truncate(dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.horz, data->taps.h_taps + 1), 2), 19); - data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_div_int( - dal_fixed31_32_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2)); + data->inits.h_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.h_c, dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2)), 19); - data->inits.v = dal_fixed31_32_div_int( - dal_fixed31_32_add_int(data->ratios.vert, data->taps.v_taps + 1), 2); + data->inits.v = dc_fixpt_truncate(dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.vert, data->taps.v_taps + 1), 2), 19); + + data->inits.v_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.v_c, dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)), 19); - data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_div_int( - dal_fixed31_32_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)); /* Adjust for viewport end clip-off */ if ((data->viewport.x + data->viewport.width) < (src.x + src.width) && !flip_horz_scan_dir) { int vp_clip = src.x + src.width - data->viewport.width - data->viewport.x; - int int_part = dal_fixed31_32_floor( - dal_fixed31_32_sub(data->inits.h, data->ratios.horz)); + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.h, data->ratios.horz)); int_part = int_part > 0 ? int_part : 0; data->viewport.width += int_part < vp_clip ? int_part : vp_clip; } if ((data->viewport.y + data->viewport.height) < (src.y + src.height) && !flip_vert_scan_dir) { int vp_clip = src.y + src.height - data->viewport.height - data->viewport.y; - int int_part = dal_fixed31_32_floor( - dal_fixed31_32_sub(data->inits.v, data->ratios.vert)); + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.v, data->ratios.vert)); int_part = int_part > 0 ? int_part : 0; data->viewport.height += int_part < vp_clip ? int_part : vp_clip; @@ -717,8 +730,8 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div && !flip_horz_scan_dir) { int vp_clip = (src.x + src.width) / vpc_div - data->viewport_c.width - data->viewport_c.x; - int int_part = dal_fixed31_32_floor( - dal_fixed31_32_sub(data->inits.h_c, data->ratios.horz_c)); + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c)); int_part = int_part > 0 ? int_part : 0; data->viewport_c.width += int_part < vp_clip ? int_part : vp_clip; @@ -726,8 +739,8 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div && !flip_vert_scan_dir) { int vp_clip = (src.y + src.height) / vpc_div - data->viewport_c.height - data->viewport_c.y; - int int_part = dal_fixed31_32_floor( - dal_fixed31_32_sub(data->inits.v_c, data->ratios.vert_c)); + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c)); int_part = int_part > 0 ? int_part : 0; data->viewport_c.height += int_part < vp_clip ? int_part : vp_clip; @@ -737,9 +750,9 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r if (data->viewport.x && !flip_horz_scan_dir) { int int_part; - data->inits.h = dal_fixed31_32_add(data->inits.h, dal_fixed31_32_mul_int( + data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int( data->ratios.horz, recout_skip->width)); - int_part = dal_fixed31_32_floor(data->inits.h) - data->viewport.x; + int_part = dc_fixpt_floor(data->inits.h) - data->viewport.x; if (int_part < data->taps.h_taps) { int int_adj = data->viewport.x >= (data->taps.h_taps - int_part) ? (data->taps.h_taps - int_part) : data->viewport.x; @@ -752,15 +765,15 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r int_part = data->taps.h_taps; } data->inits.h.value &= 0xffffffff; - data->inits.h = dal_fixed31_32_add_int(data->inits.h, int_part); + data->inits.h = dc_fixpt_add_int(data->inits.h, int_part); } if (data->viewport_c.x && !flip_horz_scan_dir) { int int_part; - data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_mul_int( + data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int( data->ratios.horz_c, recout_skip->width)); - int_part = dal_fixed31_32_floor(data->inits.h_c) - data->viewport_c.x; + int_part = dc_fixpt_floor(data->inits.h_c) - data->viewport_c.x; if (int_part < data->taps.h_taps_c) { int int_adj = data->viewport_c.x >= (data->taps.h_taps_c - int_part) ? (data->taps.h_taps_c - int_part) : data->viewport_c.x; @@ -773,15 +786,15 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r int_part = data->taps.h_taps_c; } data->inits.h_c.value &= 0xffffffff; - data->inits.h_c = dal_fixed31_32_add_int(data->inits.h_c, int_part); + data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part); } if (data->viewport.y && !flip_vert_scan_dir) { int int_part; - data->inits.v = dal_fixed31_32_add(data->inits.v, dal_fixed31_32_mul_int( + data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int( data->ratios.vert, recout_skip->height)); - int_part = dal_fixed31_32_floor(data->inits.v) - data->viewport.y; + int_part = dc_fixpt_floor(data->inits.v) - data->viewport.y; if (int_part < data->taps.v_taps) { int int_adj = data->viewport.y >= (data->taps.v_taps - int_part) ? (data->taps.v_taps - int_part) : data->viewport.y; @@ -794,15 +807,15 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r int_part = data->taps.v_taps; } data->inits.v.value &= 0xffffffff; - data->inits.v = dal_fixed31_32_add_int(data->inits.v, int_part); + data->inits.v = dc_fixpt_add_int(data->inits.v, int_part); } if (data->viewport_c.y && !flip_vert_scan_dir) { int int_part; - data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_mul_int( + data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int( data->ratios.vert_c, recout_skip->height)); - int_part = dal_fixed31_32_floor(data->inits.v_c) - data->viewport_c.y; + int_part = dc_fixpt_floor(data->inits.v_c) - data->viewport_c.y; if (int_part < data->taps.v_taps_c) { int int_adj = data->viewport_c.y >= (data->taps.v_taps_c - int_part) ? (data->taps.v_taps_c - int_part) : data->viewport_c.y; @@ -815,12 +828,12 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r int_part = data->taps.v_taps_c; } data->inits.v_c.value &= 0xffffffff; - data->inits.v_c = dal_fixed31_32_add_int(data->inits.v_c, int_part); + data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part); } /* Interlaced inits based on final vert inits */ - data->inits.v_bot = dal_fixed31_32_add(data->inits.v, data->ratios.vert); - data->inits.v_c_bot = dal_fixed31_32_add(data->inits.v_c, data->ratios.vert_c); + data->inits.v_bot = dc_fixpt_add(data->inits.v, data->ratios.vert); + data->inits.v_c_bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c); if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) { @@ -835,7 +848,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; struct view recout_skip = { 0 }; bool res = false; - struct dc_context *ctx = pipe_ctx->stream->ctx; + DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Important: scaling ratio calculation requires pixel format, * lb depth calculation requires recout and taps require scaling ratios. * Inits require viewport, taps, ratios and recout of split pipe @@ -843,6 +856,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface( pipe_ctx->plane_state->format); + if (pipe_ctx->stream->timing.flags.INTERLACE) + pipe_ctx->stream->dst.height *= 2; + calculate_scaling_ratios(pipe_ctx); calculate_viewport(pipe_ctx); @@ -863,6 +879,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; + if (pipe_ctx->stream->timing.flags.INTERLACE) + pipe_ctx->plane_res.scl_data.v_active *= 2; /* Taps calculations */ @@ -908,6 +926,9 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) plane_state->dst_rect.x, plane_state->dst_rect.y); + if (pipe_ctx->stream->timing.flags.INTERLACE) + pipe_ctx->stream->dst.height /= 2; + return res; } @@ -1294,6 +1315,19 @@ bool dc_add_all_planes_for_stream( } +static bool is_hdr_static_meta_changed(struct dc_stream_state *cur_stream, + struct dc_stream_state *new_stream) +{ + if (cur_stream == NULL) + return true; + + if (memcmp(&cur_stream->hdr_static_metadata, + &new_stream->hdr_static_metadata, + sizeof(struct dc_info_packet)) != 0) + return true; + + return false; +} static bool is_timing_changed(struct dc_stream_state *cur_stream, struct dc_stream_state *new_stream) @@ -1329,6 +1363,9 @@ static bool are_stream_backends_same( if (is_timing_changed(stream_a, stream_b)) return false; + if (is_hdr_static_meta_changed(stream_a, stream_b)) + return false; + return true; } @@ -1599,18 +1636,6 @@ enum dc_status dc_remove_stream_from_ctx( return DC_OK; } -static void copy_pipe_ctx( - const struct pipe_ctx *from_pipe_ctx, struct pipe_ctx *to_pipe_ctx) -{ - struct dc_plane_state *plane_state = to_pipe_ctx->plane_state; - struct dc_stream_state *stream = to_pipe_ctx->stream; - - *to_pipe_ctx = *from_pipe_ctx; - to_pipe_ctx->stream = stream; - if (plane_state != NULL) - to_pipe_ctx->plane_state = plane_state; -} - static struct dc_stream_state *find_pll_sharable_stream( struct dc_stream_state *stream_needs_pll, struct dc_state *context) @@ -1703,7 +1728,7 @@ enum dc_status resource_map_pool_resources( pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream); #endif - if (pipe_idx < 0) + if (pipe_idx < 0 || context->res_ctx.pipe_ctx[pipe_idx].stream_res.tg == NULL) return DC_NO_CONTROLLER_RESOURCE; pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; @@ -1752,26 +1777,6 @@ enum dc_status resource_map_pool_resources( return DC_ERROR_UNEXPECTED; } -/* first stream in the context is used to populate the rest */ -void validate_guaranteed_copy_streams( - struct dc_state *context, - int max_streams) -{ - int i; - - for (i = 1; i < max_streams; i++) { - context->streams[i] = context->streams[0]; - - copy_pipe_ctx(&context->res_ctx.pipe_ctx[0], - &context->res_ctx.pipe_ctx[i]); - context->res_ctx.pipe_ctx[i].stream = - context->res_ctx.pipe_ctx[0].stream; - - dc_stream_retain(context->streams[i]); - context->stream_count++; - } -} - void dc_resource_state_copy_construct_current( const struct dc *dc, struct dc_state *dst_ctx) @@ -1798,9 +1803,9 @@ enum dc_status dc_validate_global_state( return DC_ERROR_UNEXPECTED; if (dc->res_pool->funcs->validate_global) { - result = dc->res_pool->funcs->validate_global(dc, new_ctx); - if (result != DC_OK) - return result; + result = dc->res_pool->funcs->validate_global(dc, new_ctx); + if (result != DC_OK) + return result; } for (i = 0; i < new_ctx->stream_count; i++) { @@ -1843,7 +1848,7 @@ enum dc_status dc_validate_global_state( } static void patch_gamut_packet_checksum( - struct encoder_info_packet *gamut_packet) + struct dc_info_packet *gamut_packet) { /* For gamut we recalc checksum */ if (gamut_packet->valid) { @@ -1862,12 +1867,11 @@ static void patch_gamut_packet_checksum( } static void set_avi_info_frame( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; enum dc_color_space color_space = COLOR_SPACE_UNKNOWN; - struct info_frame info_frame = { {0} }; uint32_t pixel_encoding = 0; enum scanning_type scan_type = SCANNING_TYPE_NODATA; enum dc_aspect_ratio aspect = ASPECT_RATIO_NO_DATA; @@ -1877,22 +1881,24 @@ static void set_avi_info_frame( unsigned int cn0_cn1_value = 0; uint8_t *check_sum = NULL; uint8_t byte_index = 0; - union hdmi_info_packet *hdmi_info = &info_frame.avi_info_packet.info_packet_hdmi; + union hdmi_info_packet hdmi_info; union display_content_support support = {0}; unsigned int vic = pipe_ctx->stream->timing.vic; enum dc_timing_3d_format format; + memset(&hdmi_info, 0, sizeof(union hdmi_info_packet)); + color_space = pipe_ctx->stream->output_color_space; if (color_space == COLOR_SPACE_UNKNOWN) color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? COLOR_SPACE_SRGB:COLOR_SPACE_YCBCR709; /* Initialize header */ - hdmi_info->bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI; + hdmi_info.bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI; /* InfoFrameVersion_3 is defined by CEA861F (Section 6.4), but shall * not be used in HDMI 2.0 (Section 10.1) */ - hdmi_info->bits.header.version = 2; - hdmi_info->bits.header.length = HDMI_AVI_INFOFRAME_SIZE; + hdmi_info.bits.header.version = 2; + hdmi_info.bits.header.length = HDMI_AVI_INFOFRAME_SIZE; /* * IDO-defined (Y2,Y1,Y0 = 1,1,1) shall not be used by devices built @@ -1918,39 +1924,39 @@ static void set_avi_info_frame( /* Y0_Y1_Y2 : The pixel encoding */ /* H14b AVI InfoFrame has extension on Y-field from 2 bits to 3 bits */ - hdmi_info->bits.Y0_Y1_Y2 = pixel_encoding; + hdmi_info.bits.Y0_Y1_Y2 = pixel_encoding; /* A0 = 1 Active Format Information valid */ - hdmi_info->bits.A0 = ACTIVE_FORMAT_VALID; + hdmi_info.bits.A0 = ACTIVE_FORMAT_VALID; /* B0, B1 = 3; Bar info data is valid */ - hdmi_info->bits.B0_B1 = BAR_INFO_BOTH_VALID; + hdmi_info.bits.B0_B1 = BAR_INFO_BOTH_VALID; - hdmi_info->bits.SC0_SC1 = PICTURE_SCALING_UNIFORM; + hdmi_info.bits.SC0_SC1 = PICTURE_SCALING_UNIFORM; /* S0, S1 : Underscan / Overscan */ /* TODO: un-hardcode scan type */ scan_type = SCANNING_TYPE_UNDERSCAN; - hdmi_info->bits.S0_S1 = scan_type; + hdmi_info.bits.S0_S1 = scan_type; /* C0, C1 : Colorimetry */ if (color_space == COLOR_SPACE_YCBCR709 || color_space == COLOR_SPACE_YCBCR709_LIMITED) - hdmi_info->bits.C0_C1 = COLORIMETRY_ITU709; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709; else if (color_space == COLOR_SPACE_YCBCR601 || color_space == COLOR_SPACE_YCBCR601_LIMITED) - hdmi_info->bits.C0_C1 = COLORIMETRY_ITU601; + hdmi_info.bits.C0_C1 = COLORIMETRY_ITU601; else { - hdmi_info->bits.C0_C1 = COLORIMETRY_NO_DATA; + hdmi_info.bits.C0_C1 = COLORIMETRY_NO_DATA; } if (color_space == COLOR_SPACE_2020_RGB_FULLRANGE || color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE || color_space == COLOR_SPACE_2020_YCBCR) { - hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR; - hdmi_info->bits.C0_C1 = COLORIMETRY_EXTENDED; + hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR; + hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } else if (color_space == COLOR_SPACE_ADOBERGB) { - hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB; - hdmi_info->bits.C0_C1 = COLORIMETRY_EXTENDED; + hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB; + hdmi_info.bits.C0_C1 = COLORIMETRY_EXTENDED; } /* TODO: un-hardcode aspect ratio */ @@ -1959,18 +1965,18 @@ static void set_avi_info_frame( switch (aspect) { case ASPECT_RATIO_4_3: case ASPECT_RATIO_16_9: - hdmi_info->bits.M0_M1 = aspect; + hdmi_info.bits.M0_M1 = aspect; break; case ASPECT_RATIO_NO_DATA: case ASPECT_RATIO_64_27: case ASPECT_RATIO_256_135: default: - hdmi_info->bits.M0_M1 = 0; + hdmi_info.bits.M0_M1 = 0; } /* Active Format Aspect ratio - same as Picture Aspect Ratio. */ - hdmi_info->bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE; + hdmi_info.bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE; /* TODO: un-hardcode cn0_cn1 and itc */ @@ -2013,8 +2019,8 @@ static void set_avi_info_frame( } } } - hdmi_info->bits.CN0_CN1 = cn0_cn1_value; - hdmi_info->bits.ITC = itc_value; + hdmi_info.bits.CN0_CN1 = cn0_cn1_value; + hdmi_info.bits.ITC = itc_value; } /* TODO : We should handle YCC quantization */ @@ -2023,19 +2029,19 @@ static void set_avi_info_frame( stream->sink->edid_caps.qy_bit == 1) { if (color_space == COLOR_SPACE_SRGB || color_space == COLOR_SPACE_2020_RGB_FULLRANGE) { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_FULL_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE; } else if (color_space == COLOR_SPACE_SRGB_LIMITED || color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; } else { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; } } else { - hdmi_info->bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; - hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; + hdmi_info.bits.Q0_Q1 = RGB_QUANTIZATION_DEFAULT_RANGE; + hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE; } ///VIC @@ -2060,51 +2066,49 @@ static void set_avi_info_frame( break; } } - hdmi_info->bits.VIC0_VIC7 = vic; + hdmi_info.bits.VIC0_VIC7 = vic; /* pixel repetition * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel * repetition start from 1 */ - hdmi_info->bits.PR0_PR3 = 0; + hdmi_info.bits.PR0_PR3 = 0; /* Bar Info * barTop: Line Number of End of Top Bar. * barBottom: Line Number of Start of Bottom Bar. * barLeft: Pixel Number of End of Left Bar. * barRight: Pixel Number of Start of Right Bar. */ - hdmi_info->bits.bar_top = stream->timing.v_border_top; - hdmi_info->bits.bar_bottom = (stream->timing.v_total + hdmi_info.bits.bar_top = stream->timing.v_border_top; + hdmi_info.bits.bar_bottom = (stream->timing.v_total - stream->timing.v_border_bottom + 1); - hdmi_info->bits.bar_left = stream->timing.h_border_left; - hdmi_info->bits.bar_right = (stream->timing.h_total + hdmi_info.bits.bar_left = stream->timing.h_border_left; + hdmi_info.bits.bar_right = (stream->timing.h_total - stream->timing.h_border_right + 1); /* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */ - check_sum = &info_frame.avi_info_packet.info_packet_hdmi.packet_raw_data.sb[0]; + check_sum = &hdmi_info.packet_raw_data.sb[0]; *check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2; for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++) - *check_sum += hdmi_info->packet_raw_data.sb[byte_index]; + *check_sum += hdmi_info.packet_raw_data.sb[byte_index]; /* one byte complement */ *check_sum = (uint8_t) (0x100 - *check_sum); /* Store in hw_path_mode */ - info_packet->hb0 = hdmi_info->packet_raw_data.hb0; - info_packet->hb1 = hdmi_info->packet_raw_data.hb1; - info_packet->hb2 = hdmi_info->packet_raw_data.hb2; + info_packet->hb0 = hdmi_info.packet_raw_data.hb0; + info_packet->hb1 = hdmi_info.packet_raw_data.hb1; + info_packet->hb2 = hdmi_info.packet_raw_data.hb2; - for (byte_index = 0; byte_index < sizeof(info_frame.avi_info_packet. - info_packet_hdmi.packet_raw_data.sb); byte_index++) - info_packet->sb[byte_index] = info_frame.avi_info_packet. - info_packet_hdmi.packet_raw_data.sb[byte_index]; + for (byte_index = 0; byte_index < sizeof(hdmi_info.packet_raw_data.sb); byte_index++) + info_packet->sb[byte_index] = hdmi_info.packet_raw_data.sb[byte_index]; info_packet->valid = true; } static void set_vendor_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { uint32_t length = 0; @@ -2217,7 +2221,7 @@ static void set_vendor_info_packet( } static void set_spd_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { /* SPD info packet for FreeSync */ @@ -2338,104 +2342,19 @@ static void set_spd_info_packet( } static void set_hdr_static_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { - uint16_t i = 0; - enum signal_type signal = stream->signal; - uint32_t data; + /* HDR Static Metadata info packet for HDR10 */ - if (!stream->hdr_static_metadata.hdr_supported) + if (!stream->hdr_static_metadata.valid) return; - if (dc_is_hdmi_signal(signal)) { - info_packet->valid = true; - - info_packet->hb0 = 0x87; - info_packet->hb1 = 0x01; - info_packet->hb2 = 0x1A; - i = 1; - } else if (dc_is_dp_signal(signal)) { - info_packet->valid = true; - - info_packet->hb0 = 0x00; - info_packet->hb1 = 0x87; - info_packet->hb2 = 0x1D; - info_packet->hb3 = (0x13 << 2); - i = 2; - } - - data = stream->hdr_static_metadata.is_hdr; - info_packet->sb[i++] = data ? 0x02 : 0x00; - info_packet->sb[i++] = 0x00; - - data = stream->hdr_static_metadata.chromaticity_green_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_green_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_blue_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_blue_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_red_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_red_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_white_point_x / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.chromaticity_white_point_y / 2; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.max_luminance; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.min_luminance; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.maximum_content_light_level; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - data = stream->hdr_static_metadata.maximum_frame_average_light_level; - info_packet->sb[i++] = data & 0xFF; - info_packet->sb[i++] = (data & 0xFF00) >> 8; - - if (dc_is_hdmi_signal(signal)) { - uint32_t checksum = 0; - - checksum += info_packet->hb0; - checksum += info_packet->hb1; - checksum += info_packet->hb2; - - for (i = 1; i <= info_packet->hb2; i++) - checksum += info_packet->sb[i]; - - info_packet->sb[0] = 0x100 - checksum; - } else if (dc_is_dp_signal(signal)) { - info_packet->sb[0] = 0x01; - info_packet->sb[1] = 0x1A; - } + *info_packet = stream->hdr_static_metadata; } static void set_vsc_info_packet( - struct encoder_info_packet *info_packet, + struct dc_info_packet *info_packet, struct dc_stream_state *stream) { unsigned int vscPacketRevision = 0; @@ -2650,6 +2569,8 @@ bool pipe_need_reprogram( if (is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream)) return true; + if (is_hdr_static_meta_changed(pipe_ctx_old->stream, pipe_ctx->stream)) + return true; return false; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index ce0747ed0f00..3732a1de9d6c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -101,14 +101,16 @@ static void construct(struct dc_stream_state *stream, stream->status.link = stream->sink->link; update_stream_signal(stream); + + stream->out_transfer_func = dc_create_transfer_func(); + stream->out_transfer_func->type = TF_TYPE_BYPASS; } static void destruct(struct dc_stream_state *stream) { dc_sink_release(stream->sink); if (stream->out_transfer_func != NULL) { - dc_transfer_func_release( - stream->out_transfer_func); + dc_transfer_func_release(stream->out_transfer_func); stream->out_transfer_func = NULL; } } @@ -176,6 +178,7 @@ bool dc_stream_set_cursor_attributes( int i; struct dc *core_dc; struct resource_context *res_ctx; + struct pipe_ctx *pipe_to_program = NULL; if (NULL == stream) { dm_error("DC: dc_stream is NULL!\n"); @@ -203,9 +206,17 @@ bool dc_stream_set_cursor_attributes( if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state) continue; + if (!pipe_to_program) { + pipe_to_program = pipe_ctx; + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true); + } core_dc->hwss.set_cursor_attribute(pipe_ctx); } + + if (pipe_to_program) + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false); + return true; } @@ -216,6 +227,7 @@ bool dc_stream_set_cursor_position( int i; struct dc *core_dc; struct resource_context *res_ctx; + struct pipe_ctx *pipe_to_program = NULL; if (NULL == stream) { dm_error("DC: dc_stream is NULL!\n"); @@ -241,9 +253,17 @@ bool dc_stream_set_cursor_position( !pipe_ctx->plane_res.ipp) continue; + if (!pipe_to_program) { + pipe_to_program = pipe_ctx; + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true); + } + core_dc->hwss.set_cursor_position(pipe_ctx); } + if (pipe_to_program) + core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false); + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index ade5b8ee9c3c..68a71adeb12e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -38,6 +38,12 @@ static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state) { plane_state->ctx = ctx; + + plane_state->gamma_correction = dc_create_gamma(); + plane_state->gamma_correction->is_identity = true; + + plane_state->in_transfer_func = dc_create_transfer_func(); + plane_state->in_transfer_func->type = TF_TYPE_BYPASS; } static void destruct(struct dc_plane_state *plane_state) @@ -66,8 +72,8 @@ struct dc_plane_state *dc_create_plane_state(struct dc *dc) { struct dc *core_dc = dc; - struct dc_plane_state *plane_state = kzalloc(sizeof(*plane_state), - GFP_KERNEL); + struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state), + GFP_KERNEL); if (NULL == plane_state) return NULL; @@ -120,7 +126,7 @@ static void dc_plane_state_free(struct kref *kref) { struct dc_plane_state *plane_state = container_of(kref, struct dc_plane_state, refcount); destruct(plane_state); - kfree(plane_state); + kvfree(plane_state); } void dc_plane_state_release(struct dc_plane_state *plane_state) @@ -136,7 +142,7 @@ void dc_gamma_retain(struct dc_gamma *gamma) static void dc_gamma_free(struct kref *kref) { struct dc_gamma *gamma = container_of(kref, struct dc_gamma, refcount); - kfree(gamma); + kvfree(gamma); } void dc_gamma_release(struct dc_gamma **gamma) @@ -147,7 +153,7 @@ void dc_gamma_release(struct dc_gamma **gamma) struct dc_gamma *dc_create_gamma(void) { - struct dc_gamma *gamma = kzalloc(sizeof(*gamma), GFP_KERNEL); + struct dc_gamma *gamma = kvzalloc(sizeof(*gamma), GFP_KERNEL); if (gamma == NULL) goto alloc_fail; @@ -167,7 +173,7 @@ void dc_transfer_func_retain(struct dc_transfer_func *tf) static void dc_transfer_func_free(struct kref *kref) { struct dc_transfer_func *tf = container_of(kref, struct dc_transfer_func, refcount); - kfree(tf); + kvfree(tf); } void dc_transfer_func_release(struct dc_transfer_func *tf) @@ -175,9 +181,9 @@ void dc_transfer_func_release(struct dc_transfer_func *tf) kref_put(&tf->refcount, dc_transfer_func_free); } -struct dc_transfer_func *dc_create_transfer_func(void) +struct dc_transfer_func *dc_create_transfer_func() { - struct dc_transfer_func *tf = kzalloc(sizeof(*tf), GFP_KERNEL); + struct dc_transfer_func *tf = kvzalloc(sizeof(*tf), GFP_KERNEL); if (tf == NULL) goto alloc_fail; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index fa4b3c8b3bb7..9cfde0ccf4e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -38,7 +38,7 @@ #include "inc/compressor.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.1.38" +#define DC_VER "3.1.44" #define MAX_SURFACES 3 #define MAX_STREAMS 6 @@ -75,6 +75,7 @@ struct dc_caps { bool dynamic_audio; bool is_apu; bool dual_link_dvi; + bool post_blend_color_processing; }; struct dc_dcc_surface_param { @@ -202,6 +203,8 @@ struct dc_debug { bool timing_trace; bool clock_trace; bool validation_trace; + bool bandwidth_calcs_trace; + int max_downscale_src_width; /* stutter efficiency related */ bool disable_stutter; @@ -238,6 +241,8 @@ struct dc_debug { bool az_endpoint_mute_only; bool always_use_regamma; bool p010_mpo_support; + bool recovery_enabled; + }; struct dc_state; struct resource_pool; @@ -332,20 +337,6 @@ enum { TRANSFER_FUNC_POINTS = 1025 }; -// Moved here from color module for linux -enum color_transfer_func { - transfer_func_unknown, - transfer_func_srgb, - transfer_func_bt709, - transfer_func_pq2084, - transfer_func_pq2084_interim, - transfer_func_linear_0_1, - transfer_func_linear_0_125, - transfer_func_dolbyvision, - transfer_func_gamma_22, - transfer_func_gamma_26 -}; - struct dc_hdr_static_metadata { /* display chromaticities and white point in units of 0.00001 */ unsigned int chromaticity_green_x; @@ -361,9 +352,6 @@ struct dc_hdr_static_metadata { uint32_t max_luminance; uint32_t maximum_content_light_level; uint32_t maximum_frame_average_light_level; - - bool hdr_supported; - bool is_hdr; }; enum dc_transfer_func_type { @@ -419,7 +407,6 @@ union surface_update_flags { /* Medium updates */ uint32_t dcc_change:1; uint32_t color_space_change:1; - uint32_t input_tf_change:1; uint32_t horizontal_mirror_change:1; uint32_t per_pixel_alpha_change:1; uint32_t rotation_change:1; @@ -428,6 +415,7 @@ union surface_update_flags { uint32_t position_change:1; uint32_t in_transfer_func_change:1; uint32_t input_csc_change:1; + uint32_t coeff_reduction_change:1; uint32_t output_tf_change:1; uint32_t pixel_format_change:1; @@ -460,7 +448,7 @@ struct dc_plane_state { struct dc_gamma *gamma_correction; struct dc_transfer_func *in_transfer_func; struct dc_bias_and_scale *bias_and_scale; - struct csc_transform input_csc_color_matrix; + struct dc_csc_transform input_csc_color_matrix; struct fixed31_32 coeff_reduction_factor; uint32_t sdr_white_level; @@ -468,7 +456,6 @@ struct dc_plane_state { struct dc_hdr_static_metadata hdr_static_ctx; enum dc_color_space color_space; - enum color_transfer_func input_tf; enum surface_pixel_format format; enum dc_rotation_angle rotation; @@ -498,7 +485,6 @@ struct dc_plane_info { enum dc_rotation_angle rotation; enum plane_stereo_format stereo_format; enum dc_color_space color_space; - enum color_transfer_func input_tf; unsigned int sdr_white_level; bool horizontal_mirror; bool visible; @@ -517,19 +503,18 @@ struct dc_surface_update { struct dc_plane_state *surface; /* isr safe update parameters. null means no updates */ - struct dc_flip_addrs *flip_addr; - struct dc_plane_info *plane_info; - struct dc_scaling_info *scaling_info; + const struct dc_flip_addrs *flip_addr; + const struct dc_plane_info *plane_info; + const struct dc_scaling_info *scaling_info; /* following updates require alloc/sleep/spin that is not isr safe, * null means no updates */ - struct dc_gamma *gamma; - enum color_transfer_func color_input_tf; - struct dc_transfer_func *in_transfer_func; + const struct dc_gamma *gamma; + const struct dc_transfer_func *in_transfer_func; - struct csc_transform *input_csc_color_matrix; - struct fixed31_32 *coeff_reduction_factor; + const struct dc_csc_transform *input_csc_color_matrix; + const struct fixed31_32 *coeff_reduction_factor; }; /* @@ -699,6 +684,7 @@ struct dc_cursor { struct dc_cursor_attributes attributes; }; + /******************************************************************************* * Interrupt interfaces ******************************************************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 2726b02e006b..90bccd5ccaa2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -26,6 +26,8 @@ #ifndef DC_DP_TYPES_H #define DC_DP_TYPES_H +#include "os_types.h" + enum dc_lane_count { LANE_COUNT_UNKNOWN = 0, LANE_COUNT_ONE = 1, diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 48e1fcf53d43..bd0fda0ceb91 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -117,6 +117,65 @@ uint32_t generic_reg_get5(const struct dc_context *ctx, uint32_t addr, return reg_val; } +uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6) +{ + uint32_t reg_val = dm_read_reg(ctx, addr); + *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1); + *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2); + *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3); + *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4); + *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5); + *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6); + return reg_val; +} + +uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7) +{ + uint32_t reg_val = dm_read_reg(ctx, addr); + *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1); + *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2); + *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3); + *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4); + *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5); + *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6); + *field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7); + return reg_val; +} + +uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7, + uint8_t shift8, uint32_t mask8, uint32_t *field_value8) +{ + uint32_t reg_val = dm_read_reg(ctx, addr); + *field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1); + *field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2); + *field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3); + *field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4); + *field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5); + *field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6); + *field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7); + *field_value8 = get_reg_field_value_ex(reg_val, mask8, shift8); + return reg_val; +} /* note: va version of this is pretty bad idea, since there is a output parameter pass by pointer * compiler won't be able to check for size match and is prone to stack corruption type of bugs diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index b83a7dc2f5a9..b1f70579d61b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -423,6 +423,11 @@ enum dc_gamma_type { GAMMA_CS_TFM_1D = 3, }; +struct dc_csc_transform { + uint16_t matrix[12]; + bool enable_adjustment; +}; + struct dc_gamma { struct kref refcount; enum dc_gamma_type type; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index dc34515ef01f..8a716baa1203 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -51,6 +51,14 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; +struct time_stamp { + uint64_t edp_poweroff; + uint64_t edp_poweron; +}; + +struct link_trace { + struct time_stamp time_stamp; +}; /* * A link contains one or more sinks and their connected status. * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported. @@ -114,6 +122,7 @@ struct dc_link { struct dc_link_status link_status; + struct link_trace link_trace; }; const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index d017df56b2ba..d7e6d53bb383 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -58,18 +58,20 @@ struct dc_stream_state { struct freesync_context freesync_ctx; - struct dc_hdr_static_metadata hdr_static_metadata; + struct dc_info_packet hdr_static_metadata; struct dc_transfer_func *out_transfer_func; struct colorspace_transform gamut_remap_matrix; - struct csc_transform csc_color_matrix; + struct dc_csc_transform csc_color_matrix; enum dc_color_space output_color_space; enum dc_dither_option dither_option; enum view_3d_format view_format; - enum color_transfer_func output_tf; bool ignore_msa_timing_param; + + unsigned long long periodic_fn_vsync_delta; + /* TODO: custom INFO packets */ /* TODO: ABM info (DMCU) */ /* PSR info */ @@ -110,9 +112,10 @@ struct dc_stream_update { struct rect src; struct rect dst; struct dc_transfer_func *out_transfer_func; - struct dc_hdr_static_metadata *hdr_static_metadata; - enum color_transfer_func color_output_tf; + struct dc_info_packet *hdr_static_metadata; unsigned int *abm_level; + + unsigned long long *periodic_fn_vsync_delta; }; bool dc_is_stream_unchanged( @@ -131,13 +134,6 @@ bool dc_is_stream_scaling_unchanged( * This does not trigger a flip. No surface address is programmed. */ -bool dc_commit_planes_to_stream( - struct dc *dc, - struct dc_plane_state **plane_states, - uint8_t new_plane_count, - struct dc_stream_state *dc_stream, - struct dc_state *state); - void dc_commit_updates_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, @@ -209,14 +205,6 @@ bool dc_add_all_planes_for_stream( enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream); /* - * This function takes a stream and checks if it is guaranteed to be supported. - * Guaranteed means that MAX_COFUNC similar streams are supported. - * - * After this call: - * No hardware is programmed for call. Only validation is done. - */ - -/* * Set up streams and links associated to drive sinks * The streams parameter is an absolute set of all active streams. * diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 9441305d3ab5..76df2534c4a4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -25,7 +25,7 @@ #ifndef DC_TYPES_H_ #define DC_TYPES_H_ -#include "fixed32_32.h" +#include "os_types.h" #include "fixed31_32.h" #include "irq_types.h" #include "dc_dp_types.h" @@ -370,12 +370,6 @@ struct dc_csc_adjustments { struct fixed31_32 hue; }; -enum { - MAX_LANES = 2, - MAX_COFUNC_PATH = 6, - LAYER_INDEX_PRIMARY = -1, -}; - enum dpcd_downstream_port_max_bpc { DOWN_STREAM_MAX_8BPC = 0, DOWN_STREAM_MAX_10BPC, @@ -530,6 +524,15 @@ struct vrr_params { uint32_t frame_counter; }; +struct dc_info_packet { + bool valid; + uint8_t hb0; + uint8_t hb1; + uint8_t hb2; + uint8_t hb3; + uint8_t sb[32]; +}; + #define DC_PLANE_UPDATE_TIMES_MAX 10 struct dc_plane_flip_time { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index fe92a1222803..29294db1a96b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -26,7 +26,7 @@ #include "dce_abm.h" #include "dm_services.h" #include "reg_helper.h" -#include "fixed32_32.h" +#include "fixed31_32.h" #include "dc.h" #include "atom.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c index 6d5cdcdc8ec9..7f6d724686f1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c @@ -33,8 +33,9 @@ #define CTX \ aud->base.ctx -#define DC_LOGGER \ - aud->base.ctx->logger + +#define DC_LOGGER_INIT() + #define REG(reg)\ (aud->regs->reg) @@ -348,8 +349,8 @@ static void set_audio_latency( void dce_aud_az_enable(struct audio *audio) { - struct dce_audio *aud = DCE_AUD(audio); uint32_t value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); + DC_LOGGER_INIT(); set_reg_field_value(value, 1, AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, @@ -371,7 +372,7 @@ void dce_aud_az_enable(struct audio *audio) void dce_aud_az_disable(struct audio *audio) { uint32_t value; - struct dce_audio *aud = DCE_AUD(audio); + DC_LOGGER_INIT(); value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL); set_reg_field_value(value, 1, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 0aa2cda60890..599c7ab6befe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -41,8 +41,9 @@ #define CTX \ clk_src->base.ctx -#define DC_LOGGER \ - calc_pll_cs->ctx->logger + +#define DC_LOGGER_INIT() + #undef FN #define FN(reg_name, field_name) \ clk_src->cs_shift->field_name, clk_src->cs_mask->field_name @@ -467,7 +468,7 @@ static uint32_t dce110_get_pix_clk_dividers_helper ( { uint32_t field = 0; uint32_t pll_calc_error = MAX_PLL_CALC_ERROR; - struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll; + DC_LOGGER_INIT(); /* Check if reference clock is external (not pcie/xtalin) * HW Dce80 spec: * 00 - PCIE_REFCLK, 01 - XTALIN, 02 - GENERICA, 03 - GENERICB @@ -557,8 +558,8 @@ static uint32_t dce110_get_pix_clk_dividers( struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(cs); - struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll; uint32_t pll_calc_error = MAX_PLL_CALC_ERROR; + DC_LOGGER_INIT(); if (pix_clk_params == NULL || pll_settings == NULL || pix_clk_params->requested_pix_clk == 0) { @@ -589,6 +590,7 @@ static uint32_t dce110_get_pix_clk_dividers( pll_settings, pix_clk_params); break; case DCE_VERSION_11_2: + case DCE_VERSION_11_22: case DCE_VERSION_12_0: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: @@ -655,12 +657,12 @@ static uint32_t dce110_get_d_to_pixel_rate_in_hz( return 0; } - pix_rate = dal_fixed31_32_from_int(clk_src->ref_freq_khz); - pix_rate = dal_fixed31_32_mul_int(pix_rate, 1000); - pix_rate = dal_fixed31_32_mul_int(pix_rate, phase); - pix_rate = dal_fixed31_32_div_int(pix_rate, modulo); + pix_rate = dc_fixpt_from_int(clk_src->ref_freq_khz); + pix_rate = dc_fixpt_mul_int(pix_rate, 1000); + pix_rate = dc_fixpt_mul_int(pix_rate, phase); + pix_rate = dc_fixpt_div_int(pix_rate, modulo); - return dal_fixed31_32_round(pix_rate); + return dc_fixpt_round(pix_rate); } else { return dce110_get_dp_pixel_rate_from_combo_phy_pll(cs, pix_clk_params, pll_settings); } @@ -709,12 +711,12 @@ static bool calculate_ss( const struct spread_spectrum_data *ss_data, struct delta_sigma_data *ds_data) { - struct fixed32_32 fb_div; - struct fixed32_32 ss_amount; - struct fixed32_32 ss_nslip_amount; - struct fixed32_32 ss_ds_frac_amount; - struct fixed32_32 ss_step_size; - struct fixed32_32 modulation_time; + struct fixed31_32 fb_div; + struct fixed31_32 ss_amount; + struct fixed31_32 ss_nslip_amount; + struct fixed31_32 ss_ds_frac_amount; + struct fixed31_32 ss_step_size; + struct fixed31_32 modulation_time; if (ds_data == NULL) return false; @@ -729,42 +731,42 @@ static bool calculate_ss( /* compute SS_AMOUNT_FBDIV & SS_AMOUNT_NFRAC_SLIP & SS_AMOUNT_DSFRAC*/ /* 6 decimal point support in fractional feedback divider */ - fb_div = dal_fixed32_32_from_fraction( + fb_div = dc_fixpt_from_fraction( pll_settings->fract_feedback_divider, 1000000); - fb_div = dal_fixed32_32_add_int(fb_div, pll_settings->feedback_divider); + fb_div = dc_fixpt_add_int(fb_div, pll_settings->feedback_divider); ds_data->ds_frac_amount = 0; /*spreadSpectrumPercentage is in the unit of .01%, * so have to divided by 100 * 100*/ - ss_amount = dal_fixed32_32_mul( - fb_div, dal_fixed32_32_from_fraction(ss_data->percentage, + ss_amount = dc_fixpt_mul( + fb_div, dc_fixpt_from_fraction(ss_data->percentage, 100 * ss_data->percentage_divider)); - ds_data->feedback_amount = dal_fixed32_32_floor(ss_amount); + ds_data->feedback_amount = dc_fixpt_floor(ss_amount); - ss_nslip_amount = dal_fixed32_32_sub(ss_amount, - dal_fixed32_32_from_int(ds_data->feedback_amount)); - ss_nslip_amount = dal_fixed32_32_mul_int(ss_nslip_amount, 10); - ds_data->nfrac_amount = dal_fixed32_32_floor(ss_nslip_amount); + ss_nslip_amount = dc_fixpt_sub(ss_amount, + dc_fixpt_from_int(ds_data->feedback_amount)); + ss_nslip_amount = dc_fixpt_mul_int(ss_nslip_amount, 10); + ds_data->nfrac_amount = dc_fixpt_floor(ss_nslip_amount); - ss_ds_frac_amount = dal_fixed32_32_sub(ss_nslip_amount, - dal_fixed32_32_from_int(ds_data->nfrac_amount)); - ss_ds_frac_amount = dal_fixed32_32_mul_int(ss_ds_frac_amount, 65536); - ds_data->ds_frac_amount = dal_fixed32_32_floor(ss_ds_frac_amount); + ss_ds_frac_amount = dc_fixpt_sub(ss_nslip_amount, + dc_fixpt_from_int(ds_data->nfrac_amount)); + ss_ds_frac_amount = dc_fixpt_mul_int(ss_ds_frac_amount, 65536); + ds_data->ds_frac_amount = dc_fixpt_floor(ss_ds_frac_amount); /* compute SS_STEP_SIZE_DSFRAC */ - modulation_time = dal_fixed32_32_from_fraction( + modulation_time = dc_fixpt_from_fraction( pll_settings->reference_freq * 1000, pll_settings->reference_divider * ss_data->modulation_freq_hz); if (ss_data->flags.CENTER_SPREAD) - modulation_time = dal_fixed32_32_div_int(modulation_time, 4); + modulation_time = dc_fixpt_div_int(modulation_time, 4); else - modulation_time = dal_fixed32_32_div_int(modulation_time, 2); + modulation_time = dc_fixpt_div_int(modulation_time, 2); - ss_step_size = dal_fixed32_32_div(ss_amount, modulation_time); + ss_step_size = dc_fixpt_div(ss_amount, modulation_time); /* SS_STEP_SIZE_DSFRAC_DEC = Int(SS_STEP_SIZE * 2 ^ 16 * 10)*/ - ss_step_size = dal_fixed32_32_mul_int(ss_step_size, 65536 * 10); - ds_data->ds_frac_size = dal_fixed32_32_floor(ss_step_size); + ss_step_size = dc_fixpt_mul_int(ss_step_size, 65536 * 10); + ds_data->ds_frac_size = dc_fixpt_floor(ss_step_size); return true; } @@ -978,6 +980,7 @@ static bool dce110_program_pix_clk( break; case DCE_VERSION_11_2: + case DCE_VERSION_11_22: case DCE_VERSION_12_0: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case DCN_VERSION_1_0: @@ -1054,7 +1057,7 @@ static void get_ss_info_from_atombios( struct spread_spectrum_info *ss_info_cur; struct spread_spectrum_data *ss_data_cur; uint32_t i; - struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll; + DC_LOGGER_INIT(); if (ss_entries_num == NULL) { DC_LOG_SYNC( "Invalid entry !!!\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c index 78e6beb6cf26..8a581c67bf2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c @@ -26,7 +26,7 @@ #include "dce_clocks.h" #include "dm_services.h" #include "reg_helper.h" -#include "fixed32_32.h" +#include "fixed31_32.h" #include "bios_parser_interface.h" #include "dc.h" #include "dmcu.h" @@ -35,7 +35,7 @@ #endif #include "core_types.h" #include "dc_types.h" - +#include "dal_asic_id.h" #define TO_DCE_CLOCKS(clocks)\ container_of(clocks, struct dce_disp_clk, base) @@ -228,19 +228,19 @@ static int dce_clocks_get_dp_ref_freq(struct display_clock *clk) generated according to average value (case as with previous ASICs) */ if (clk_dce->ss_on_dprefclk && clk_dce->dprefclk_ss_divider != 0) { - struct fixed32_32 ss_percentage = dal_fixed32_32_div_int( - dal_fixed32_32_from_fraction( + struct fixed31_32 ss_percentage = dc_fixpt_div_int( + dc_fixpt_from_fraction( clk_dce->dprefclk_ss_percentage, clk_dce->dprefclk_ss_divider), 200); - struct fixed32_32 adj_dp_ref_clk_khz; + struct fixed31_32 adj_dp_ref_clk_khz; - ss_percentage = dal_fixed32_32_sub(dal_fixed32_32_one, + ss_percentage = dc_fixpt_sub(dc_fixpt_one, ss_percentage); adj_dp_ref_clk_khz = - dal_fixed32_32_mul_int( + dc_fixpt_mul_int( ss_percentage, dp_ref_clk_khz); - dp_ref_clk_khz = dal_fixed32_32_floor(adj_dp_ref_clk_khz); + dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz); } return dp_ref_clk_khz; @@ -256,19 +256,19 @@ static int dce_clocks_get_dp_ref_freq_wrkaround(struct display_clock *clk) int dp_ref_clk_khz = 600000; if (clk_dce->ss_on_dprefclk && clk_dce->dprefclk_ss_divider != 0) { - struct fixed32_32 ss_percentage = dal_fixed32_32_div_int( - dal_fixed32_32_from_fraction( + struct fixed31_32 ss_percentage = dc_fixpt_div_int( + dc_fixpt_from_fraction( clk_dce->dprefclk_ss_percentage, clk_dce->dprefclk_ss_divider), 200); - struct fixed32_32 adj_dp_ref_clk_khz; + struct fixed31_32 adj_dp_ref_clk_khz; - ss_percentage = dal_fixed32_32_sub(dal_fixed32_32_one, + ss_percentage = dc_fixpt_sub(dc_fixpt_one, ss_percentage); adj_dp_ref_clk_khz = - dal_fixed32_32_mul_int( + dc_fixpt_mul_int( ss_percentage, dp_ref_clk_khz); - dp_ref_clk_khz = dal_fixed32_32_floor(adj_dp_ref_clk_khz); + dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz); } return dp_ref_clk_khz; @@ -413,9 +413,12 @@ static int dce112_set_clock( /*VBIOS will determine DPREFCLK frequency, so we don't set it*/ dce_clk_params.target_clock_frequency = 0; dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK; - dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = + if (!ASICREV_IS_VEGA20_P(clk->ctx->asic_id.hw_internal_rev)) + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = (dce_clk_params.pll_id == CLOCK_SOURCE_COMBO_DISPLAY_PLL0); + else + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false; bp->funcs->set_dce_clock(bp, &dce_clk_params); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index 2ee3d9bf1062..a576b8bbb3cd 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -28,7 +28,7 @@ #include "dce_dmcu.h" #include "dm_services.h" #include "reg_helper.h" -#include "fixed32_32.h" +#include "fixed31_32.h" #include "dc.h" #define TO_DCE_DMCU(dmcu)\ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c index 487724345d9d..0275d6d60da4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c @@ -53,7 +53,8 @@ void dce_pipe_control_lock(struct dc *dc, struct dce_hwseq *hws = dc->hwseq; /* Not lock pipe when blank */ - if (lock && pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg)) + if (lock && pipe->stream_res.tg->funcs->is_blanked && + pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg)) return; val = REG_GET_4(BLND_V_UPDATE_LOCK[pipe->stream_res.tg->inst], diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c index d737e911971b..5d9506b3d46b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c @@ -195,13 +195,13 @@ static void dce_ipp_program_input_lut( for (i = 0; i < gamma->num_entries; i++) { REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR, - dal_fixed31_32_round( + dc_fixpt_round( gamma->entries.red[i])); REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR, - dal_fixed31_32_round( + dc_fixpt_round( gamma->entries.green[i])); REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR, - dal_fixed31_32_round( + dc_fixpt_round( gamma->entries.blue[i])); } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 8167cad7bcf7..dbe3b26b6d9e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -113,6 +113,7 @@ static const struct link_encoder_funcs dce110_lnk_enc_funcs = { .connect_dig_be_to_fe = dce110_link_encoder_connect_dig_be_to_fe, .enable_hpd = dce110_link_encoder_enable_hpd, .disable_hpd = dce110_link_encoder_disable_hpd, + .is_dig_enabled = dce110_is_dig_enabled, .destroy = dce110_link_encoder_destroy }; @@ -535,8 +536,9 @@ void dce110_psr_program_secondary_packet(struct link_encoder *enc, DP_SEC_GSP0_PRIORITY, 1); } -static bool is_dig_enabled(const struct dce110_link_encoder *enc110) +bool dce110_is_dig_enabled(struct link_encoder *enc) { + struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc); uint32_t value; REG_GET(DIG_BE_EN_CNTL, DIG_ENABLE, &value); @@ -1031,7 +1033,7 @@ void dce110_link_encoder_disable_output( struct bp_transmitter_control cntl = { 0 }; enum bp_result result; - if (!is_dig_enabled(enc110)) { + if (!dce110_is_dig_enabled(enc)) { /* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */ return; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h index 0ec3433d34b6..347069461a22 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h @@ -263,4 +263,6 @@ void dce110_psr_program_dp_dphy_fast_training(struct link_encoder *enc, void dce110_psr_program_secondary_packet(struct link_encoder *enc, unsigned int sdp_transmit_line_num_deadline); +bool dce110_is_dig_enabled(struct link_encoder *enc); + #endif /* __DC_LINK_ENCODER__DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index 0790f25c7b3b..b235a75355b8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -174,6 +174,25 @@ static void program_urgency_watermark( URGENCY_HIGH_WATERMARK, urgency_high_wm); } +static void dce120_program_urgency_watermark( + struct dce_mem_input *dce_mi, + uint32_t wm_select, + uint32_t urgency_low_wm, + uint32_t urgency_high_wm) +{ + REG_UPDATE(DPG_WATERMARK_MASK_CONTROL, + URGENCY_WATERMARK_MASK, wm_select); + + REG_SET_2(DPG_PIPE_URGENCY_CONTROL, 0, + URGENCY_LOW_WATERMARK, urgency_low_wm, + URGENCY_HIGH_WATERMARK, urgency_high_wm); + + REG_SET_2(DPG_PIPE_URGENT_LEVEL_CONTROL, 0, + URGENT_LEVEL_LOW_WATERMARK, urgency_low_wm, + URGENT_LEVEL_HIGH_WATERMARK, urgency_high_wm); + +} + static void program_nbp_watermark( struct dce_mem_input *dce_mi, uint32_t wm_select, @@ -206,6 +225,25 @@ static void program_nbp_watermark( } } +static void dce120_program_stutter_watermark( + struct dce_mem_input *dce_mi, + uint32_t wm_select, + uint32_t stutter_mark, + uint32_t stutter_entry) +{ + REG_UPDATE(DPG_WATERMARK_MASK_CONTROL, + STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK, wm_select); + + if (REG(DPG_PIPE_STUTTER_CONTROL2)) + REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL2, + STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark, + STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry); + else + REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL, + STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark, + STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry); +} + static void program_stutter_watermark( struct dce_mem_input *dce_mi, uint32_t wm_select, @@ -225,7 +263,8 @@ static void program_stutter_watermark( static void dce_mi_program_display_marks( struct mem_input *mi, struct dce_watermarks nbp, - struct dce_watermarks stutter, + struct dce_watermarks stutter_exit, + struct dce_watermarks stutter_enter, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns) { @@ -243,13 +282,14 @@ static void dce_mi_program_display_marks( program_nbp_watermark(dce_mi, 2, nbp.a_mark); /* set a */ program_nbp_watermark(dce_mi, 1, nbp.d_mark); /* set d */ - program_stutter_watermark(dce_mi, 2, stutter.a_mark); /* set a */ - program_stutter_watermark(dce_mi, 1, stutter.d_mark); /* set d */ + program_stutter_watermark(dce_mi, 2, stutter_exit.a_mark); /* set a */ + program_stutter_watermark(dce_mi, 1, stutter_exit.d_mark); /* set d */ } -static void dce120_mi_program_display_marks(struct mem_input *mi, +static void dce112_mi_program_display_marks(struct mem_input *mi, struct dce_watermarks nbp, - struct dce_watermarks stutter, + struct dce_watermarks stutter_exit, + struct dce_watermarks stutter_entry, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns) { @@ -273,10 +313,43 @@ static void dce120_mi_program_display_marks(struct mem_input *mi, program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */ program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */ - program_stutter_watermark(dce_mi, 0, stutter.a_mark); /* set a */ - program_stutter_watermark(dce_mi, 1, stutter.b_mark); /* set b */ - program_stutter_watermark(dce_mi, 2, stutter.c_mark); /* set c */ - program_stutter_watermark(dce_mi, 3, stutter.d_mark); /* set d */ + program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark); /* set a */ + program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark); /* set b */ + program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark); /* set c */ + program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark); /* set d */ +} + +static void dce120_mi_program_display_marks(struct mem_input *mi, + struct dce_watermarks nbp, + struct dce_watermarks stutter_exit, + struct dce_watermarks stutter_entry, + struct dce_watermarks urgent, + uint32_t total_dest_line_time_ns) +{ + struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi); + uint32_t stutter_en = mi->ctx->dc->debug.disable_stutter ? 0 : 1; + + dce120_program_urgency_watermark(dce_mi, 0, /* set a */ + urgent.a_mark, total_dest_line_time_ns); + dce120_program_urgency_watermark(dce_mi, 1, /* set b */ + urgent.b_mark, total_dest_line_time_ns); + dce120_program_urgency_watermark(dce_mi, 2, /* set c */ + urgent.c_mark, total_dest_line_time_ns); + dce120_program_urgency_watermark(dce_mi, 3, /* set d */ + urgent.d_mark, total_dest_line_time_ns); + + REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL, + STUTTER_ENABLE, stutter_en, + STUTTER_IGNORE_FBC, 1); + program_nbp_watermark(dce_mi, 0, nbp.a_mark); /* set a */ + program_nbp_watermark(dce_mi, 1, nbp.b_mark); /* set b */ + program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */ + program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */ + + dce120_program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark, stutter_entry.a_mark); /* set a */ + dce120_program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark, stutter_entry.b_mark); /* set b */ + dce120_program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark, stutter_entry.c_mark); /* set c */ + dce120_program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark, stutter_entry.d_mark); /* set d */ } static void program_tiling( @@ -696,5 +769,17 @@ void dce112_mem_input_construct( const struct dce_mem_input_mask *mi_mask) { dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask); + dce_mi->base.funcs->mem_input_program_display_marks = dce112_mi_program_display_marks; +} + +void dce120_mem_input_construct( + struct dce_mem_input *dce_mi, + struct dc_context *ctx, + int inst, + const struct dce_mem_input_registers *regs, + const struct dce_mem_input_shift *mi_shift, + const struct dce_mem_input_mask *mi_mask) +{ + dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask); dce_mi->base.funcs->mem_input_program_display_marks = dce120_mi_program_display_marks; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h index 05d39c0cbe87..d15b0d7f47fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h @@ -106,6 +106,7 @@ struct dce_mem_input_registers { uint32_t DPG_PIPE_ARBITRATION_CONTROL1; uint32_t DPG_WATERMARK_MASK_CONTROL; uint32_t DPG_PIPE_URGENCY_CONTROL; + uint32_t DPG_PIPE_URGENT_LEVEL_CONTROL; uint32_t DPG_PIPE_NB_PSTATE_CHANGE_CONTROL; uint32_t DPG_PIPE_LOW_POWER_CONTROL; uint32_t DPG_PIPE_STUTTER_CONTROL; @@ -213,6 +214,11 @@ struct dce_mem_input_registers { #define MI_DCE12_DMIF_PG_MASK_SH_LIST(mask_sh, blk)\ SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_EXIT_SELF_REFRESH_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_ENTER_SELF_REFRESH_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_LOW_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_HIGH_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, mask_sh),\ + SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, mask_sh),\ SFB(blk, DPG_WATERMARK_MASK_CONTROL, PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\ SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_ENABLE, mask_sh),\ SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_URGENT_DURING_REQUEST, mask_sh),\ @@ -286,6 +292,8 @@ struct dce_mem_input_registers { type STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK; \ type URGENCY_LOW_WATERMARK; \ type URGENCY_HIGH_WATERMARK; \ + type URGENT_LEVEL_LOW_WATERMARK;\ + type URGENT_LEVEL_HIGH_WATERMARK;\ type NB_PSTATE_CHANGE_ENABLE; \ type NB_PSTATE_CHANGE_URGENT_DURING_REQUEST; \ type NB_PSTATE_CHANGE_NOT_SELF_REFRESH_DURING_REQUEST; \ @@ -297,6 +305,7 @@ struct dce_mem_input_registers { type STUTTER_ENABLE; \ type STUTTER_IGNORE_FBC; \ type STUTTER_EXIT_SELF_REFRESH_WATERMARK; \ + type STUTTER_ENTER_SELF_REFRESH_WATERMARK; \ type DMIF_BUFFERS_ALLOCATED; \ type DMIF_BUFFERS_ALLOCATION_COMPLETED; \ type ENABLE; /* MC_HUB_RDREQ_DMIF_LIMIT */\ @@ -344,4 +353,12 @@ void dce112_mem_input_construct( const struct dce_mem_input_shift *mi_shift, const struct dce_mem_input_mask *mi_mask); +void dce120_mem_input_construct( + struct dce_mem_input *dce_mi, + struct dc_context *ctx, + int inst, + const struct dce_mem_input_registers *regs, + const struct dce_mem_input_shift *mi_shift, + const struct dce_mem_input_mask *mi_mask); + #endif /*__DCE_MEM_INPUT_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c b/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c index 6243450b41b7..48862bebf29e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c @@ -1014,11 +1014,11 @@ static const uint16_t filter_8tap_64p_183[264] = { const uint16_t *get_filter_3tap_16p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_3tap_16p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_3tap_16p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_3tap_16p_150; else return filter_3tap_16p_183; @@ -1026,11 +1026,11 @@ const uint16_t *get_filter_3tap_16p(struct fixed31_32 ratio) const uint16_t *get_filter_3tap_64p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_3tap_64p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_3tap_64p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_3tap_64p_150; else return filter_3tap_64p_183; @@ -1038,11 +1038,11 @@ const uint16_t *get_filter_3tap_64p(struct fixed31_32 ratio) const uint16_t *get_filter_4tap_16p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_4tap_16p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_4tap_16p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_4tap_16p_150; else return filter_4tap_16p_183; @@ -1050,11 +1050,11 @@ const uint16_t *get_filter_4tap_16p(struct fixed31_32 ratio) const uint16_t *get_filter_4tap_64p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_4tap_64p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_4tap_64p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_4tap_64p_150; else return filter_4tap_64p_183; @@ -1062,11 +1062,11 @@ const uint16_t *get_filter_4tap_64p(struct fixed31_32 ratio) const uint16_t *get_filter_5tap_64p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_5tap_64p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_5tap_64p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_5tap_64p_150; else return filter_5tap_64p_183; @@ -1074,11 +1074,11 @@ const uint16_t *get_filter_5tap_64p(struct fixed31_32 ratio) const uint16_t *get_filter_6tap_64p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_6tap_64p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_6tap_64p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_6tap_64p_150; else return filter_6tap_64p_183; @@ -1086,11 +1086,11 @@ const uint16_t *get_filter_6tap_64p(struct fixed31_32 ratio) const uint16_t *get_filter_7tap_64p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_7tap_64p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_7tap_64p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_7tap_64p_150; else return filter_7tap_64p_183; @@ -1098,11 +1098,11 @@ const uint16_t *get_filter_7tap_64p(struct fixed31_32 ratio) const uint16_t *get_filter_8tap_64p(struct fixed31_32 ratio) { - if (ratio.value < dal_fixed31_32_one.value) + if (ratio.value < dc_fixpt_one.value) return filter_8tap_64p_upscale; - else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(4, 3).value) return filter_8tap_64p_117; - else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value) + else if (ratio.value < dc_fixpt_from_fraction(5, 3).value) return filter_8tap_64p_150; else return filter_8tap_64p_183; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 162f6a6c4208..c0e813c7ddd4 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -26,27 +26,10 @@ #include "dc_bios_types.h" #include "dce_stream_encoder.h" #include "reg_helper.h" +#include "hw_shared.h" + #define DC_LOGGER \ enc110->base.ctx->logger -enum DP_PIXEL_ENCODING { -DP_PIXEL_ENCODING_RGB444 = 0x00000000, -DP_PIXEL_ENCODING_YCBCR422 = 0x00000001, -DP_PIXEL_ENCODING_YCBCR444 = 0x00000002, -DP_PIXEL_ENCODING_RGB_WIDE_GAMUT = 0x00000003, -DP_PIXEL_ENCODING_Y_ONLY = 0x00000004, -DP_PIXEL_ENCODING_YCBCR420 = 0x00000005, -DP_PIXEL_ENCODING_RESERVED = 0x00000006, -}; - - -enum DP_COMPONENT_DEPTH { -DP_COMPONENT_DEPTH_6BPC = 0x00000000, -DP_COMPONENT_DEPTH_8BPC = 0x00000001, -DP_COMPONENT_DEPTH_10BPC = 0x00000002, -DP_COMPONENT_DEPTH_12BPC = 0x00000003, -DP_COMPONENT_DEPTH_16BPC = 0x00000004, -DP_COMPONENT_DEPTH_RESERVED = 0x00000005, -}; #define REG(reg)\ @@ -80,7 +63,7 @@ enum { static void dce110_update_generic_info_packet( struct dce110_stream_encoder *enc110, uint32_t packet_index, - const struct encoder_info_packet *info_packet) + const struct dc_info_packet *info_packet) { uint32_t regval; /* TODOFPGA Figure out a proper number for max_retries polling for lock @@ -89,7 +72,8 @@ static void dce110_update_generic_info_packet( uint32_t max_retries = 50; /*we need turn on clock before programming AFMT block*/ - REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + if (REG(AFMT_CNTL)) + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); if (REG(AFMT_VBI_PACKET_CONTROL1)) { if (packet_index >= 8) @@ -196,7 +180,7 @@ static void dce110_update_generic_info_packet( static void dce110_update_hdmi_info_packet( struct dce110_stream_encoder *enc110, uint32_t packet_index, - const struct encoder_info_packet *info_packet) + const struct dc_info_packet *info_packet) { uint32_t cont, send, line; @@ -314,11 +298,11 @@ static void dce110_stream_encoder_dp_set_stream_attribute( switch (crtc_timing->pixel_encoding) { case PIXEL_ENCODING_YCBCR422: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_YCBCR422); + DP_PIXEL_ENCODING_TYPE_YCBCR422); break; case PIXEL_ENCODING_YCBCR444: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_YCBCR444); + DP_PIXEL_ENCODING_TYPE_YCBCR444); if (crtc_timing->flags.Y_ONLY) if (crtc_timing->display_color_depth != COLOR_DEPTH_666) @@ -326,7 +310,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( * Color depth of Y-only could be * 8, 10, 12, 16 bits */ REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_Y_ONLY); + DP_PIXEL_ENCODING_TYPE_Y_ONLY); /* Note: DP_MSA_MISC1 bit 7 is the indicator * of Y-only mode. * This bit is set in HW if register @@ -334,7 +318,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; case PIXEL_ENCODING_YCBCR420: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_YCBCR420); + DP_PIXEL_ENCODING_TYPE_YCBCR420); if (enc110->se_mask->DP_VID_M_DOUBLE_VALUE_EN) REG_UPDATE(DP_VID_TIMING, DP_VID_M_DOUBLE_VALUE_EN, 1); @@ -345,7 +329,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; default: REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_RGB444); + DP_PIXEL_ENCODING_TYPE_RGB444); break; } @@ -363,20 +347,20 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; case COLOR_DEPTH_888: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_8BPC); + DP_COMPONENT_PIXEL_DEPTH_8BPC); break; case COLOR_DEPTH_101010: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_10BPC); + DP_COMPONENT_PIXEL_DEPTH_10BPC); break; case COLOR_DEPTH_121212: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_12BPC); + DP_COMPONENT_PIXEL_DEPTH_12BPC); break; default: REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_DEPTH_6BPC); + DP_COMPONENT_PIXEL_DEPTH_6BPC); break; } @@ -700,11 +684,11 @@ static void dce110_stream_encoder_set_mst_bandwidth( struct fixed31_32 avg_time_slots_per_mtp) { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t x = dal_fixed31_32_floor( + uint32_t x = dc_fixpt_floor( avg_time_slots_per_mtp); - uint32_t y = dal_fixed31_32_ceil( - dal_fixed31_32_shl( - dal_fixed31_32_sub_int( + uint32_t y = dc_fixpt_ceil( + dc_fixpt_shl( + dc_fixpt_sub_int( avg_time_slots_per_mtp, x), 26)); @@ -736,7 +720,8 @@ static void dce110_stream_encoder_update_hdmi_info_packets( const uint32_t *content = (const uint32_t *) &info_frame->avi.sb[0]; /*we need turn on clock before programming AFMT block*/ - REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + if (REG(AFMT_CNTL)) + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); REG_WRITE(AFMT_AVI_INFO0, content[0]); @@ -836,7 +821,7 @@ static void dce110_stream_encoder_update_dp_info_packets( const struct encoder_info_frame *info_frame) { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t value = REG_READ(DP_SEC_CNTL); + uint32_t value = 0; if (info_frame->vsc.valid) dce110_update_generic_info_packet( @@ -870,6 +855,7 @@ static void dce110_stream_encoder_update_dp_info_packets( * Therefore we need to enable master bit * if at least on of the fields is not 0 */ + value = REG_READ(DP_SEC_CNTL); if (value) REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); } @@ -879,7 +865,7 @@ static void dce110_stream_encoder_stop_dp_info_packets( { /* stop generic packets on DP */ struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t value = REG_READ(DP_SEC_CNTL); + uint32_t value = 0; if (enc110->se_mask->DP_SEC_AVI_ENABLE) { REG_SET_7(DP_SEC_CNTL, 0, @@ -892,25 +878,10 @@ static void dce110_stream_encoder_stop_dp_info_packets( DP_SEC_STREAM_ENABLE, 0); } -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) - if (enc110->se_mask->DP_SEC_GSP7_ENABLE) { - REG_SET_10(DP_SEC_CNTL, 0, - DP_SEC_GSP0_ENABLE, 0, - DP_SEC_GSP1_ENABLE, 0, - DP_SEC_GSP2_ENABLE, 0, - DP_SEC_GSP3_ENABLE, 0, - DP_SEC_GSP4_ENABLE, 0, - DP_SEC_GSP5_ENABLE, 0, - DP_SEC_GSP6_ENABLE, 0, - DP_SEC_GSP7_ENABLE, 0, - DP_SEC_MPG_ENABLE, 0, - DP_SEC_STREAM_ENABLE, 0); - } -#endif /* this register shared with audio info frame. * therefore we need to keep master enabled * if at least one of the fields is not 0 */ - + value = REG_READ(DP_SEC_CNTL); if (value) REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); @@ -1513,7 +1484,7 @@ static void dce110_se_disable_dp_audio( struct stream_encoder *enc) { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t value = REG_READ(DP_SEC_CNTL); + uint32_t value = 0; /* Disable Audio packets */ REG_UPDATE_5(DP_SEC_CNTL, @@ -1525,6 +1496,7 @@ static void dce110_se_disable_dp_audio( /* This register shared with encoder info frame. Therefore we need to keep master enabled if at least on of the fields is not 0 */ + value = REG_READ(DP_SEC_CNTL); if (value != 0) REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c index 832c5daada35..a02e719d7794 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c @@ -41,7 +41,7 @@ #define DC_LOGGER \ xfm_dce->base.ctx->logger -#define IDENTITY_RATIO(ratio) (dal_fixed31_32_u2d19(ratio) == (1 << 19)) +#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) #define GAMUT_MATRIX_SIZE 12 #define SCL_PHASES 16 @@ -256,27 +256,27 @@ static void calculate_inits( struct fixed31_32 v_init; inits->h_int_scale_ratio = - dal_fixed31_32_u2d19(data->ratios.horz) << 5; + dc_fixpt_u2d19(data->ratios.horz) << 5; inits->v_int_scale_ratio = - dal_fixed31_32_u2d19(data->ratios.vert) << 5; + dc_fixpt_u2d19(data->ratios.vert) << 5; h_init = - dal_fixed31_32_div_int( - dal_fixed31_32_add( + dc_fixpt_div_int( + dc_fixpt_add( data->ratios.horz, - dal_fixed31_32_from_int(data->taps.h_taps + 1)), + dc_fixpt_from_int(data->taps.h_taps + 1)), 2); - inits->h_init.integer = dal_fixed31_32_floor(h_init); - inits->h_init.fraction = dal_fixed31_32_u0d19(h_init) << 5; + inits->h_init.integer = dc_fixpt_floor(h_init); + inits->h_init.fraction = dc_fixpt_u0d19(h_init) << 5; v_init = - dal_fixed31_32_div_int( - dal_fixed31_32_add( + dc_fixpt_div_int( + dc_fixpt_add( data->ratios.vert, - dal_fixed31_32_from_int(data->taps.v_taps + 1)), + dc_fixpt_from_int(data->taps.v_taps + 1)), 2); - inits->v_init.integer = dal_fixed31_32_floor(v_init); - inits->v_init.fraction = dal_fixed31_32_u0d19(v_init) << 5; + inits->v_init.integer = dc_fixpt_floor(v_init); + inits->v_init.fraction = dc_fixpt_u0d19(v_init) << 5; } static void program_scl_ratios_inits( diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 3092f76bdb75..38ec0d609297 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -733,38 +733,6 @@ enum dc_status dce100_add_stream_to_ctx( return result; } -enum dc_status dce100_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - if (!dce100_validate_bandwidth(dc, context)) - result = DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - static void dce100_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -786,7 +754,6 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s static const struct resource_funcs dce100_res_pool_funcs = { .destroy = dce100_destroy_resource_pool, .link_enc_create = dce100_link_encoder_create, - .validate_guaranteed = dce100_validate_guaranteed, .validate_bandwidth = dce100_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c index 9150d2694450..e2994d337044 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c @@ -121,10 +121,10 @@ static void reset_lb_on_vblank(struct dc_context *ctx) frame_count = dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT); - for (retry = 100; retry > 0; retry--) { + for (retry = 10000; retry > 0; retry--) { if (frame_count != dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT)) break; - msleep(1); + udelay(10); } if (!retry) dm_error("Frame count did not increase for 100ms.\n"); @@ -147,14 +147,14 @@ static void wait_for_fbc_state_changed( uint32_t addr = mmFBC_STATUS; uint32_t value; - while (counter < 10) { + while (counter < 1000) { value = dm_read_reg(cp110->base.ctx, addr); if (get_reg_field_value( value, FBC_STATUS, FBC_ENABLE_STATUS) == enabled) break; - msleep(10); + udelay(100); counter++; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index d0575999f172..c29052b6da5a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -70,8 +70,9 @@ #define CTX \ hws->ctx -#define DC_LOGGER \ - ctx->logger + +#define DC_LOGGER_INIT() + #define REG(reg)\ hws->regs->reg @@ -279,7 +280,9 @@ dce110_set_input_transfer_func(struct pipe_ctx *pipe_ctx, build_prescale_params(&prescale_params, plane_state); ipp->funcs->ipp_program_prescale(ipp, &prescale_params); - if (plane_state->gamma_correction && dce_use_lut(plane_state->format)) + if (plane_state->gamma_correction && + !plane_state->gamma_correction->is_identity && + dce_use_lut(plane_state->format)) ipp->funcs->ipp_program_input_lut(ipp, plane_state->gamma_correction); if (tf == NULL) { @@ -506,19 +509,19 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(region_start)); - arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(region_end)); + arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), + dc_fixpt_from_int(region_start)); + arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), + dc_fixpt_from_int(region_end)); y_r = rgb_resulted[0].red; y_g = rgb_resulted[0].green; y_b = rgb_resulted[0].blue; - y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b)); + y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); arr_points[0].y = y1_min; - arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, + arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); y_r = rgb_resulted[hw_points - 1].red; @@ -528,21 +531,21 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, /* see comment above, m_arrPoints[1].y should be the Y value for the * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) */ - y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b)); + y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); arr_points[1].y = y3_max; - arr_points[1].slope = dal_fixed31_32_zero; + arr_points[1].slope = dc_fixpt_zero; if (output_tf->tf == TRANSFER_FUNCTION_PQ) { /* for PQ, we want to have a straight line from last HW X point, * and the slope to be such that we hit 1.0 at 10000 nits. */ - const struct fixed31_32 end_value = dal_fixed31_32_from_int(125); + const struct fixed31_32 end_value = dc_fixpt_from_int(125); - arr_points[1].slope = dal_fixed31_32_div( - dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y), - dal_fixed31_32_sub(end_value, arr_points[1].x)); + arr_points[1].slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), + dc_fixpt_sub(end_value, arr_points[1].x)); } regamma_params->hw_points_num = hw_points; @@ -566,16 +569,16 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, i = 1; while (i != hw_points + 1) { - if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red)) + if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) rgb_plus_1->red = rgb->red; - if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green)) + if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) rgb_plus_1->green = rgb->green; - if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue)) + if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) rgb_plus_1->blue = rgb->blue; - rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red); - rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green); - rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue); + rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); + rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); + rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); ++rgb_plus_1; ++rgb; @@ -851,6 +854,28 @@ void hwss_edp_power_control( if (power_up != is_panel_powered_on(hwseq)) { /* Send VBIOS command to prompt eDP panel power */ + if (power_up) { + unsigned long long current_ts = dm_get_timestamp(ctx); + unsigned long long duration_in_ms = + dm_get_elapse_time_in_ns( + ctx, + current_ts, + div64_u64(link->link_trace.time_stamp.edp_poweroff, 1000000)); + unsigned long long wait_time_ms = 0; + + /* max 500ms from LCDVDD off to on */ + if (link->link_trace.time_stamp.edp_poweroff == 0) + wait_time_ms = 500; + else if (duration_in_ms < 500) + wait_time_ms = 500 - duration_in_ms; + + if (wait_time_ms) { + msleep(wait_time_ms); + dm_output_to_console("%s: wait %lld ms to power on eDP.\n", + __func__, wait_time_ms); + } + + } DC_LOG_HW_RESUME_S3( "%s: Panel Power action: %s\n", @@ -864,9 +889,14 @@ void hwss_edp_power_control( cntl.coherent = false; cntl.lanes_number = LANE_COUNT_FOUR; cntl.hpd_sel = link->link_enc->hpd_source; - bp_result = link_transmitter_control(ctx->dc_bios, &cntl); + if (!power_up) + /*save driver power off time stamp*/ + link->link_trace.time_stamp.edp_poweroff = dm_get_timestamp(ctx); + else + link->link_trace.time_stamp.edp_poweron = dm_get_timestamp(ctx); + if (bp_result != BP_RESULT_OK) DC_LOG_ERROR( "%s: Panel Power bp_result: %d\n", @@ -974,9 +1004,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) /*don't free audio if it is from retrain or internal disable stream*/ if (option == FREE_ACQUIRED_RESOURCE && dc->caps.dynamic_audio == true) { /*we have to dynamic arbitrate the audio endpoints*/ - pipe_ctx->stream_res.audio = NULL; /*we free the resource, need reset is_audio_acquired*/ update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, pipe_ctx->stream_res.audio, false); + pipe_ctx->stream_res.audio = NULL; } /* TODO: notify audio driver for if audio modes list changed @@ -1011,7 +1041,7 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { link->dc->hwss.edp_backlight_control(link, true); - stream->bl_pwm_level = 0; + stream->bl_pwm_level = EDP_BACKLIGHT_RAMP_DISABLE_LEVEL; } } void dce110_blank_stream(struct pipe_ctx *pipe_ctx) @@ -1203,7 +1233,7 @@ static void program_scaler(const struct dc *dc, &pipe_ctx->plane_res.scl_data); } -static enum dc_status dce110_prog_pixclk_crtc_otg( +static enum dc_status dce110_enable_stream_timing( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc) @@ -1269,7 +1299,7 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx[pipe_ctx->pipe_idx]; /* */ - dc->hwss.prog_pixclk_crtc_otg(pipe_ctx, context, dc); + dc->hwss.enable_stream_timing(pipe_ctx, context, dc); /* FPGA does not program backend */ if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { @@ -1441,6 +1471,17 @@ static void disable_vga_and_power_gate_all_controllers( } } +static struct dc_link *get_link_for_edp(struct dc *dc) +{ + int i; + + for (i = 0; i < dc->link_count; i++) { + if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) + return dc->links[i]; + } + return NULL; +} + static struct dc_link *get_link_for_edp_not_in_use( struct dc *dc, struct dc_state *context) @@ -1475,20 +1516,21 @@ static struct dc_link *get_link_for_edp_not_in_use( */ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) { - struct dc_bios *dcb = dc->ctx->dc_bios; - - /* vbios already light up eDP, so we can leverage vbios and skip eDP - * programming - */ - bool can_eDP_fast_boot_optimize = - (dcb->funcs->get_vga_enabled_displays(dc->ctx->dc_bios) == ATOM_DISPLAY_LCD1_ACTIVE); - - /* if OS doesn't light up eDP and eDP link is available, we want to disable */ struct dc_link *edp_link_to_turnoff = NULL; + struct dc_link *edp_link = get_link_for_edp(dc); + bool can_eDP_fast_boot_optimize = false; + + if (edp_link) { + can_eDP_fast_boot_optimize = + edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc); + } if (can_eDP_fast_boot_optimize) { edp_link_to_turnoff = get_link_for_edp_not_in_use(dc, context); + /* if OS doesn't light up eDP and eDP link is available, we want to disable + * If resume from S4/S5, should optimization. + */ if (!edp_link_to_turnoff) dc->apply_edp_fast_boot_optimization = true; } @@ -1544,6 +1586,7 @@ static void dce110_set_displaymarks( pipe_ctx->plane_res.mi, context->bw.dce.nbp_state_change_wm_ns[num_pipes], context->bw.dce.stutter_exit_wm_ns[num_pipes], + context->bw.dce.stutter_entry_wm_ns[num_pipes], context->bw.dce.urgent_wm_ns[num_pipes], total_dest_line_time_ns); if (i == underlay_idx) { @@ -1569,6 +1612,7 @@ static void set_safe_displaymarks( MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK }; struct dce_watermarks nbp_marks = { SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK }; + struct dce_watermarks min_marks = { 0, 0, 0, 0}; for (i = 0; i < MAX_PIPES; i++) { if (res_ctx->pipe_ctx[i].stream == NULL || res_ctx->pipe_ctx[i].plane_res.mi == NULL) @@ -1578,6 +1622,7 @@ static void set_safe_displaymarks( res_ctx->pipe_ctx[i].plane_res.mi, nbp_marks, max_marks, + min_marks, max_marks, MAX_WATERMARK); @@ -1803,6 +1848,9 @@ static bool should_enable_fbc(struct dc *dc, } } + /* Pipe context should be found */ + ASSERT(pipe_ctx); + /* Only supports eDP */ if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP) return false; @@ -2221,74 +2269,6 @@ static void program_gamut_remap(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust); } - -/** - * TODO REMOVE, USE UPDATE INSTEAD - */ -static void set_plane_config( - const struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct resource_context *res_ctx) -{ - struct mem_input *mi = pipe_ctx->plane_res.mi; - struct dc_plane_state *plane_state = pipe_ctx->plane_state; - struct xfm_grph_csc_adjustment adjust; - struct out_csc_color_matrix tbl_entry; - unsigned int i; - - memset(&adjust, 0, sizeof(adjust)); - memset(&tbl_entry, 0, sizeof(tbl_entry)); - adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; - - dce_enable_fe_clock(dc->hwseq, mi->inst, true); - - set_default_colors(pipe_ctx); - if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) { - tbl_entry.color_space = - pipe_ctx->stream->output_color_space; - - for (i = 0; i < 12; i++) - tbl_entry.regval[i] = - pipe_ctx->stream->csc_color_matrix.matrix[i]; - - pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment - (pipe_ctx->plane_res.xfm, &tbl_entry); - } - - if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) { - adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - - for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) - adjust.temperature_matrix[i] = - pipe_ctx->stream->gamut_remap_matrix.matrix[i]; - } - - pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust); - - pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0; - program_scaler(dc, pipe_ctx); - - program_surface_visibility(dc, pipe_ctx); - - mi->funcs->mem_input_program_surface_config( - mi, - plane_state->format, - &plane_state->tiling_info, - &plane_state->plane_size, - plane_state->rotation, - NULL, - false); - if (mi->funcs->set_blank) - mi->funcs->set_blank(mi, pipe_ctx->plane_state->visible); - - if (dc->config.gpu_vm_support) - mi->funcs->mem_input_program_pte_vm( - pipe_ctx->plane_res.mi, - plane_state->format, - &plane_state->tiling_info, - plane_state->rotation); -} - static void update_plane_addr(const struct dc *dc, struct pipe_ctx *pipe_ctx) { @@ -2699,8 +2679,11 @@ static void dce110_program_front_end_for_pipe( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct xfm_grph_csc_adjustment adjust; struct out_csc_color_matrix tbl_entry; +#if defined(CONFIG_DRM_AMD_DC_FBC) + unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; +#endif unsigned int i; - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(); memset(&tbl_entry, 0, sizeof(tbl_entry)); if (dc->current_state) @@ -2740,7 +2723,9 @@ static void dce110_program_front_end_for_pipe( program_scaler(dc, pipe_ctx); #if defined(CONFIG_DRM_AMD_DC_FBC) - if (dc->fbc_compressor && old_pipe->stream) { + /* fbc not applicable on Underlay pipe */ + if (dc->fbc_compressor && old_pipe->stream && + pipe_ctx->pipe_idx != underlay_idx) { if (plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); else @@ -2776,13 +2761,13 @@ static void dce110_program_front_end_for_pipe( dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream); DC_LOG_SURFACE( - "Pipe:%d 0x%x: addr hi:0x%x, " + "Pipe:%d %p: addr hi:0x%x, " "addr low:0x%x, " "src: %d, %d, %d," " %d; dst: %d, %d, %d, %d;" "clip: %d, %d, %d, %d\n", pipe_ctx->pipe_idx, - pipe_ctx->plane_state, + (void *) pipe_ctx->plane_state, pipe_ctx->plane_state->address.grph.addr.high_part, pipe_ctx->plane_state->address.grph.addr.low_part, pipe_ctx->plane_state->src_rect.x, @@ -2970,7 +2955,6 @@ static const struct hw_sequencer_funcs dce110_funcs = { .init_hw = init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dce110_apply_ctx_for_surface, - .set_plane_config = set_plane_config, .update_plane_addr = update_plane_addr, .update_pending_status = dce110_update_pending_status, .set_input_transfer_func = dce110_set_input_transfer_func, @@ -2993,7 +2977,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .get_position = get_position, .set_static_screen_control = set_static_screen_control, .reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap, - .prog_pixclk_crtc_otg = dce110_prog_pixclk_crtc_otg, + .enable_stream_timing = dce110_enable_stream_timing, .setup_stereo = NULL, .set_avmute = dce110_set_avmute, .wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c index 7bab8c6d2a73..0564c8e31252 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c @@ -923,6 +923,7 @@ void dce_mem_input_v_program_display_marks( struct mem_input *mem_input, struct dce_watermarks nbp, struct dce_watermarks stutter, + struct dce_watermarks stutter_enter, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index b1f14be20fdf..ee33786bdef6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -930,38 +930,6 @@ static enum dc_status dce110_add_stream_to_ctx( return result; } -static enum dc_status dce110_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - if (!dce110_validate_bandwidth(dc, context)) - result = DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - static struct pipe_ctx *dce110_acquire_underlay( struct dc_state *context, const struct resource_pool *pool, @@ -1036,7 +1004,6 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce110_res_pool_funcs = { .destroy = dce110_destroy_resource_pool, .link_enc_create = dce110_link_encoder_create, - .validate_guaranteed = dce110_validate_guaranteed, .validate_bandwidth = dce110_validate_bandwidth, .validate_plane = dce110_validate_plane, .acquire_idle_pipe_for_layer = dce110_acquire_underlay, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c index be7153924a70..1b2fe0df347f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c @@ -431,14 +431,6 @@ void dce110_timing_generator_set_drr( 0, CRTC_V_TOTAL_CONTROL, CRTC_SET_V_TOTAL_MIN_MASK); - set_reg_field_value(v_total_min, - 0, - CRTC_V_TOTAL_MIN, - CRTC_V_TOTAL_MIN); - set_reg_field_value(v_total_max, - 0, - CRTC_V_TOTAL_MAX, - CRTC_V_TOTAL_MAX); set_reg_field_value(v_total_cntl, 0, CRTC_V_TOTAL_CONTROL, @@ -447,6 +439,14 @@ void dce110_timing_generator_set_drr( 0, CRTC_V_TOTAL_CONTROL, CRTC_V_TOTAL_MAX_SEL); + set_reg_field_value(v_total_min, + 0, + CRTC_V_TOTAL_MIN, + CRTC_V_TOTAL_MIN); + set_reg_field_value(v_total_max, + 0, + CRTC_V_TOTAL_MAX, + CRTC_V_TOTAL_MAX); set_reg_field_value(v_total_cntl, 0, CRTC_V_TOTAL_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index 8ad04816e7d3..a3cef60380ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -648,12 +648,6 @@ static void dce110_timing_generator_v_disable_vga( return; } -static bool dce110_tg_v_is_blanked(struct timing_generator *tg) -{ - /* Signal comes from the primary pipe, underlay is never blanked. */ - return false; -} - /** ******************************************************************************************** * * DCE11 Timing Generator Constructor / Destructor @@ -670,7 +664,6 @@ static const struct timing_generator_funcs dce110_tg_v_funcs = { .set_early_control = dce110_timing_generator_v_set_early_control, .wait_for_state = dce110_timing_generator_v_wait_for_state, .set_blank = dce110_timing_generator_v_set_blank, - .is_blanked = dce110_tg_v_is_blanked, .set_colors = dce110_timing_generator_v_set_colors, .set_overscan_blank_color = dce110_timing_generator_v_set_overscan_color_black, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c index 8ba3c12fc608..a7dce060204f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c @@ -373,13 +373,13 @@ static void calculate_inits( struct rect *chroma_viewport) { inits->h_int_scale_ratio_luma = - dal_fixed31_32_u2d19(data->ratios.horz) << 5; + dc_fixpt_u2d19(data->ratios.horz) << 5; inits->v_int_scale_ratio_luma = - dal_fixed31_32_u2d19(data->ratios.vert) << 5; + dc_fixpt_u2d19(data->ratios.vert) << 5; inits->h_int_scale_ratio_chroma = - dal_fixed31_32_u2d19(data->ratios.horz_c) << 5; + dc_fixpt_u2d19(data->ratios.horz_c) << 5; inits->v_int_scale_ratio_chroma = - dal_fixed31_32_u2d19(data->ratios.vert_c) << 5; + dc_fixpt_u2d19(data->ratios.vert_c) << 5; inits->h_init_luma.integer = 1; inits->v_init_luma.integer = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index cd1e3f72c44e..00c0a1ef15eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -430,7 +430,7 @@ static struct stream_encoder *dce112_stream_encoder_create( if (!enc110) return NULL; - + dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); @@ -867,38 +867,6 @@ enum dc_status dce112_add_stream_to_ctx( return result; } -enum dc_status dce112_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, stream); - - if (result == DC_OK) - result = resource_map_phy_clock_resources(dc, context, stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - if (!dce112_validate_bandwidth(dc, context)) - result = DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - enum dc_status dce112_validate_global( struct dc *dc, struct dc_state *context) @@ -921,7 +889,6 @@ static void dce112_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce112_res_pool_funcs = { .destroy = dce112_destroy_resource_pool, .link_enc_create = dce112_link_encoder_create, - .validate_guaranteed = dce112_validate_guaranteed, .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce112_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h index d5c19d34eb0a..95a403396219 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h @@ -42,11 +42,6 @@ enum dc_status dce112_validate_with_context( struct dc_state *context, struct dc_state *old_context); -enum dc_status dce112_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context); - bool dce112_validate_bandwidth( struct dc *dc, struct dc_state *context); diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 4659a4bfabaa..2d58daccc005 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -652,7 +652,7 @@ static struct mem_input *dce120_mem_input_create( return NULL; } - dce112_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); + dce120_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks); return &dce_mi->base; } @@ -684,7 +684,6 @@ static void dce120_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce120_res_pool_funcs = { .destroy = dce120_destroy_resource_pool, .link_enc_create = dce120_link_encoder_create, - .validate_guaranteed = dce112_validate_guaranteed, .validate_bandwidth = dce112_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce112_add_stream_to_ctx @@ -815,14 +814,25 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) dm_pp_notify_wm_clock_changes(dc->ctx, &clk_ranges); } +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = dm_read_reg_soc15(ctx, mmCC_DC_PIPE_DIS, 0); + /* VG20 support max 6 pipes */ + value = value & 0x3f; + return value; +} + static bool construct( uint8_t num_virtual_links, struct dc *dc, struct dce110_resource_pool *pool) { unsigned int i; + int j; struct dc_context *ctx = dc->ctx; struct irq_service_init_data irq_init_data; + bool harvest_enabled = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev); + uint32_t pipe_fuses; ctx->dc_bios->regs = &bios_regs; @@ -916,28 +926,41 @@ static bool construct( if (!pool->base.irqs) goto irqs_create_fail; + /* retrieve valid pipe fuses */ + if (harvest_enabled) + pipe_fuses = read_pipe_fuses(ctx); + + /* index to valid pipe resource */ + j = 0; for (i = 0; i < pool->base.pipe_count; i++) { - pool->base.timing_generators[i] = + if (harvest_enabled) { + if ((pipe_fuses & (1 << i)) != 0) { + dm_error("DC: skip invalid pipe %d!\n", i); + continue; + } + } + + pool->base.timing_generators[j] = dce120_timing_generator_create( ctx, i, &dce120_tg_offsets[i]); - if (pool->base.timing_generators[i] == NULL) { + if (pool->base.timing_generators[j] == NULL) { BREAK_TO_DEBUGGER(); dm_error("DC: failed to create tg!\n"); goto controller_create_fail; } - pool->base.mis[i] = dce120_mem_input_create(ctx, i); + pool->base.mis[j] = dce120_mem_input_create(ctx, i); - if (pool->base.mis[i] == NULL) { + if (pool->base.mis[j] == NULL) { BREAK_TO_DEBUGGER(); dm_error( "DC: failed to create memory input!\n"); goto controller_create_fail; } - pool->base.ipps[i] = dce120_ipp_create(ctx, i); + pool->base.ipps[j] = dce120_ipp_create(ctx, i); if (pool->base.ipps[i] == NULL) { BREAK_TO_DEBUGGER(); dm_error( @@ -945,7 +968,7 @@ static bool construct( goto controller_create_fail; } - pool->base.transforms[i] = dce120_transform_create(ctx, i); + pool->base.transforms[j] = dce120_transform_create(ctx, i); if (pool->base.transforms[i] == NULL) { BREAK_TO_DEBUGGER(); dm_error( @@ -953,16 +976,23 @@ static bool construct( goto res_create_fail; } - pool->base.opps[i] = dce120_opp_create( + pool->base.opps[j] = dce120_opp_create( ctx, i); - if (pool->base.opps[i] == NULL) { + if (pool->base.opps[j] == NULL) { BREAK_TO_DEBUGGER(); dm_error( "DC: failed to create output pixel processor!\n"); } + + /* check next valid pipe */ + j++; } + /* valid pipe num */ + pool->base.pipe_count = j; + pool->base.timing_generator_count = j; + if (!resource_construct(num_virtual_links, dc, &pool->base, &res_create_funcs)) goto res_create_fail; diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c index 7bee78172d85..2ea490f8482e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c @@ -570,12 +570,6 @@ void dce120_timing_generator_set_drr( 0x180); } else { - CRTC_REG_UPDATE( - CRTC0_CRTC_V_TOTAL_MIN, - CRTC_V_TOTAL_MIN, 0); - CRTC_REG_UPDATE( - CRTC0_CRTC_V_TOTAL_MAX, - CRTC_V_TOTAL_MAX, 0); CRTC_REG_SET_N(CRTC0_CRTC_V_TOTAL_CONTROL, 5, FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MIN_SEL), 0, FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MAX_SEL), 0, @@ -583,6 +577,12 @@ void dce120_timing_generator_set_drr( FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_FORCE_LOCK_TO_MASTER_VSYNC), 0, FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_SET_V_TOTAL_MIN_MASK), 0); CRTC_REG_UPDATE( + CRTC0_CRTC_V_TOTAL_MIN, + CRTC_V_TOTAL_MIN, 0); + CRTC_REG_UPDATE( + CRTC0_CRTC_V_TOTAL_MAX, + CRTC_V_TOTAL_MAX, 0); + CRTC_REG_UPDATE( CRTC0_CRTC_STATIC_SCREEN_CONTROL, CRTC_STATIC_SCREEN_EVENT_MASK, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 5d854a37a978..48a068964722 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -691,23 +691,6 @@ static void destruct(struct dce110_resource_pool *pool) } } -static enum dc_status build_mapped_resource( - const struct dc *dc, - struct dc_state *context, - struct dc_stream_state *stream) -{ - struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - dce110_resource_build_pipe_hw_param(pipe_ctx); - - resource_build_info_frame(pipe_ctx); - - return DC_OK; -} - bool dce80_validate_bandwidth( struct dc *dc, struct dc_state *context) @@ -749,37 +732,6 @@ enum dc_status dce80_validate_global( return DC_OK; } -enum dc_status dce80_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - - if (result == DC_OK) - result = dce80_validate_bandwidth(dc, context); - - return result; -} - static void dce80_destroy_resource_pool(struct resource_pool **pool) { struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool); @@ -792,7 +744,6 @@ static void dce80_destroy_resource_pool(struct resource_pool **pool) static const struct resource_funcs dce80_res_pool_funcs = { .destroy = dce80_destroy_resource_pool, .link_enc_create = dce80_link_encoder_create, - .validate_guaranteed = dce80_validate_guaranteed, .validate_bandwidth = dce80_validate_bandwidth, .validate_plane = dce100_validate_plane, .add_stream_to_ctx = dce100_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 5469bdfe19f3..84f52c63d95c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -26,7 +26,7 @@ DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ dcn10_dpp.o dcn10_opp.o dcn10_optc.o \ dcn10_hubp.o dcn10_mpc.o \ dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \ - dcn10_hubbub.o + dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 881a1bff94d2..5d95a997fd9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -169,7 +169,7 @@ bool cm_helper_convert_to_custom_float( } if (fixpoint == true) - arr_points[1].custom_float_y = dal_fixed31_32_clamp_u0d14(arr_points[1].y); + arr_points[1].custom_float_y = dc_fixpt_clamp_u0d14(arr_points[1].y); else if (!convert_to_custom_float_format(arr_points[1].y, &fmt, &arr_points[1].custom_float_y)) { BREAK_TO_DEBUGGER(); @@ -327,19 +327,19 @@ bool cm_helper_translate_curve_to_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(region_start)); - arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(region_end)); + arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), + dc_fixpt_from_int(region_start)); + arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), + dc_fixpt_from_int(region_end)); y_r = rgb_resulted[0].red; y_g = rgb_resulted[0].green; y_b = rgb_resulted[0].blue; - y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b)); + y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); arr_points[0].y = y1_min; - arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x); + arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); y_r = rgb_resulted[hw_points - 1].red; y_g = rgb_resulted[hw_points - 1].green; y_b = rgb_resulted[hw_points - 1].blue; @@ -347,35 +347,35 @@ bool cm_helper_translate_curve_to_hw_format( /* see comment above, m_arrPoints[1].y should be the Y value for the * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) */ - y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b)); + y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); arr_points[1].y = y3_max; - arr_points[1].slope = dal_fixed31_32_zero; + arr_points[1].slope = dc_fixpt_zero; if (output_tf->tf == TRANSFER_FUNCTION_PQ) { /* for PQ, we want to have a straight line from last HW X point, * and the slope to be such that we hit 1.0 at 10000 nits. */ const struct fixed31_32 end_value = - dal_fixed31_32_from_int(125); + dc_fixpt_from_int(125); - arr_points[1].slope = dal_fixed31_32_div( - dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y), - dal_fixed31_32_sub(end_value, arr_points[1].x)); + arr_points[1].slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), + dc_fixpt_sub(end_value, arr_points[1].x)); } lut_params->hw_points_num = hw_points; - i = 1; - for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) { + k = 0; + for (i = 1; i < MAX_REGIONS_NUMBER; i++) { if (seg_distr[k] != -1) { lut_params->arr_curve_points[k].segments_num = seg_distr[k]; lut_params->arr_curve_points[i].offset = lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); } - i++; + k++; } if (seg_distr[k] != -1) @@ -386,24 +386,24 @@ bool cm_helper_translate_curve_to_hw_format( i = 1; while (i != hw_points + 1) { - if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red)) + if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) rgb_plus_1->red = rgb->red; - if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green)) + if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) rgb_plus_1->green = rgb->green; - if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue)) + if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) rgb_plus_1->blue = rgb->blue; - rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red); - rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green); - rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue); + rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); + rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); + rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); if (fixpoint == true) { - rgb->delta_red_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_red); - rgb->delta_green_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_green); - rgb->delta_blue_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_blue); - rgb->red_reg = dal_fixed31_32_clamp_u0d14(rgb->red); - rgb->green_reg = dal_fixed31_32_clamp_u0d14(rgb->green); - rgb->blue_reg = dal_fixed31_32_clamp_u0d14(rgb->blue); + rgb->delta_red_reg = dc_fixpt_clamp_u0d10(rgb->delta_red); + rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green); + rgb->delta_blue_reg = dc_fixpt_clamp_u0d10(rgb->delta_blue); + rgb->red_reg = dc_fixpt_clamp_u0d14(rgb->red); + rgb->green_reg = dc_fixpt_clamp_u0d14(rgb->green); + rgb->blue_reg = dc_fixpt_clamp_u0d14(rgb->blue); } ++rgb_plus_1; @@ -489,19 +489,19 @@ bool cm_helper_translate_curve_to_degamma_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(region_start)); - arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(region_end)); + arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), + dc_fixpt_from_int(region_start)); + arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), + dc_fixpt_from_int(region_end)); y_r = rgb_resulted[0].red; y_g = rgb_resulted[0].green; y_b = rgb_resulted[0].blue; - y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b)); + y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); arr_points[0].y = y1_min; - arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x); + arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); y_r = rgb_resulted[hw_points - 1].red; y_g = rgb_resulted[hw_points - 1].green; y_b = rgb_resulted[hw_points - 1].blue; @@ -509,35 +509,35 @@ bool cm_helper_translate_curve_to_degamma_hw_format( /* see comment above, m_arrPoints[1].y should be the Y value for the * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) */ - y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b)); + y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); arr_points[1].y = y3_max; - arr_points[1].slope = dal_fixed31_32_zero; + arr_points[1].slope = dc_fixpt_zero; if (output_tf->tf == TRANSFER_FUNCTION_PQ) { /* for PQ, we want to have a straight line from last HW X point, * and the slope to be such that we hit 1.0 at 10000 nits. */ const struct fixed31_32 end_value = - dal_fixed31_32_from_int(125); + dc_fixpt_from_int(125); - arr_points[1].slope = dal_fixed31_32_div( - dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y), - dal_fixed31_32_sub(end_value, arr_points[1].x)); + arr_points[1].slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), + dc_fixpt_sub(end_value, arr_points[1].x)); } lut_params->hw_points_num = hw_points; - i = 1; - for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) { + k = 0; + for (i = 1; i < MAX_REGIONS_NUMBER; i++) { if (seg_distr[k] != -1) { lut_params->arr_curve_points[k].segments_num = seg_distr[k]; lut_params->arr_curve_points[i].offset = lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); } - i++; + k++; } if (seg_distr[k] != -1) @@ -548,16 +548,16 @@ bool cm_helper_translate_curve_to_degamma_hw_format( i = 1; while (i != hw_points + 1) { - if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red)) + if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) rgb_plus_1->red = rgb->red; - if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green)) + if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) rgb_plus_1->green = rgb->green; - if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue)) + if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) rgb_plus_1->blue = rgb->blue; - rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red); - rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green); - rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue); + rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); + rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); + rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); ++rgb_plus_1; ++rgb; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index e305c28c98de..c69fa4bfab0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -98,6 +98,30 @@ enum gamut_remap_select { GAMUT_REMAP_COMB_COEFF }; +void dpp_read_state(struct dpp *dpp_base, + struct dcn_dpp_state *s) +{ + struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); + + REG_GET(CM_IGAM_CONTROL, + CM_IGAM_LUT_MODE, &s->igam_lut_mode); + REG_GET(CM_IGAM_CONTROL, + CM_IGAM_INPUT_FORMAT, &s->igam_input_format); + REG_GET(CM_DGAM_CONTROL, + CM_DGAM_LUT_MODE, &s->dgam_lut_mode); + REG_GET(CM_RGAM_CONTROL, + CM_RGAM_LUT_MODE, &s->rgam_lut_mode); + REG_GET(CM_GAMUT_REMAP_CONTROL, + CM_GAMUT_REMAP_MODE, &s->gamut_remap_mode); + + s->gamut_remap_c11_c12 = REG_READ(CM_GAMUT_REMAP_C11_C12); + s->gamut_remap_c13_c14 = REG_READ(CM_GAMUT_REMAP_C13_C14); + s->gamut_remap_c21_c22 = REG_READ(CM_GAMUT_REMAP_C21_C22); + s->gamut_remap_c23_c24 = REG_READ(CM_GAMUT_REMAP_C23_C24); + s->gamut_remap_c31_c32 = REG_READ(CM_GAMUT_REMAP_C31_C32); + s->gamut_remap_c33_c34 = REG_READ(CM_GAMUT_REMAP_C33_C34); +} + /* Program gamut remap in bypass mode */ void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp) { @@ -106,10 +130,9 @@ void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp) /* Gamut remap in bypass */ } -#define IDENTITY_RATIO(ratio) (dal_fixed31_32_u2d19(ratio) == (1 << 19)) - +#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19)) -bool dpp_get_optimal_number_of_taps( +static bool dpp_get_optimal_number_of_taps( struct dpp *dpp, struct scaler_data *scl_data, const struct scaling_taps *in_taps) @@ -121,6 +144,18 @@ bool dpp_get_optimal_number_of_taps( else pixel_width = scl_data->viewport.width; + /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ + if (scl_data->viewport.width != scl_data->h_active && + scl_data->viewport.height != scl_data->v_active && + dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && + scl_data->format == PIXEL_FORMAT_FP16) + return false; + + if (scl_data->viewport.width > scl_data->h_active && + dpp->ctx->dc->debug.max_downscale_src_width != 0 && + scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width) + return false; + /* TODO: add lb check */ /* No support for programming ratio of 4, drop to 3.99999.. */ @@ -257,7 +292,7 @@ void dpp1_cnv_setup ( struct dpp *dpp_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space) { uint32_t pixel_format; @@ -416,7 +451,7 @@ void dpp1_set_cursor_position( if (src_x_offset >= (int)param->viewport_width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)width < 0) + if (src_x_offset + (int)width <= 0) cur_en = 0; /* not visible beyond left edge*/ REG_UPDATE(CURSOR0_CONTROL, @@ -443,6 +478,7 @@ void dpp1_dppclk_control( } static const struct dpp_funcs dcn10_dpp_funcs = { + .dpp_read_state = dpp_read_state, .dpp_reset = dpp_reset, .dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale, .dpp_get_optimal_number_of_taps = dpp_get_optimal_number_of_taps, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index 17b062a8f88a..e862cafa6501 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -44,6 +44,10 @@ #define TF_REG_LIST_DCN(id) \ SRI(CM_GAMUT_REMAP_CONTROL, CM, id),\ SRI(CM_GAMUT_REMAP_C11_C12, CM, id),\ + SRI(CM_GAMUT_REMAP_C13_C14, CM, id),\ + SRI(CM_GAMUT_REMAP_C21_C22, CM, id),\ + SRI(CM_GAMUT_REMAP_C23_C24, CM, id),\ + SRI(CM_GAMUT_REMAP_C31_C32, CM, id),\ SRI(CM_GAMUT_REMAP_C33_C34, CM, id),\ SRI(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \ SRI(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \ @@ -108,6 +112,8 @@ SRI(CM_DGAM_LUT_DATA, CM, id), \ SRI(CM_CONTROL, CM, id), \ SRI(CM_DGAM_CONTROL, CM, id), \ + SRI(CM_TEST_DEBUG_INDEX, CM, id), \ + SRI(CM_TEST_DEBUG_DATA, CM, id), \ SRI(FORMAT_CONTROL, CNVC_CFG, id), \ SRI(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \ SRI(CURSOR0_CONTROL, CNVC_CUR, id), \ @@ -175,6 +181,14 @@ TF_SF(CM0_CM_GAMUT_REMAP_CONTROL, CM_GAMUT_REMAP_MODE, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C11, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C12, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C13, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C14, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C21, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C22, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C23, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C24, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C31, mask_sh),\ + TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C32, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C33, mask_sh),\ TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C34, mask_sh),\ TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_LEFT, mask_sh),\ @@ -300,6 +314,7 @@ TF_SF(CM0_CM_DGAM_LUT_INDEX, CM_DGAM_LUT_INDEX, mask_sh), \ TF_SF(CM0_CM_DGAM_LUT_DATA, CM_DGAM_LUT_DATA, mask_sh), \ TF_SF(CM0_CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, mask_sh), \ + TF_SF(CM0_CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_INDEX, mask_sh), \ TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS, mask_sh), \ TF2_SF(CNVC_CFG0, FORMAT_CONTROL__ALPHA_EN, mask_sh), \ TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_EXPANSION_MODE, mask_sh), \ @@ -417,6 +432,41 @@ TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \ TF_SF(DPP_TOP0_DPP_CONTROL, DPPCLK_RATE_CONTROL, mask_sh) +/* + * + DCN1 CM debug status register definition + + register :ID9_CM_STATUS do + implement_ref :cm + map to: :cmdebugind, at: j + width 32 + disclosure NEVER + + field :ID9_VUPDATE_CFG, [0], R + field :ID9_IGAM_LUT_MODE, [2..1], R + field :ID9_BNS_BYPASS, [3], R + field :ID9_ICSC_MODE, [5..4], R + field :ID9_DGAM_LUT_MODE, [8..6], R + field :ID9_HDR_BYPASS, [9], R + field :ID9_GAMUT_REMAP_MODE, [11..10], R + field :ID9_RGAM_LUT_MODE, [14..12], R + #1 free bit + field :ID9_OCSC_MODE, [18..16], R + field :ID9_DENORM_MODE, [21..19], R + field :ID9_ROUND_TRUNC_MODE, [25..22], R + field :ID9_DITHER_EN, [26], R + field :ID9_DITHER_MODE, [28..27], R + end +*/ + +#define TF_DEBUG_REG_LIST_SH_DCN10 \ + .CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 4, \ + .CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 16 + +#define TF_DEBUG_REG_LIST_MASK_DCN10 \ + .CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 0x30, \ + .CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 0x70000 + #define TF_REG_FIELD_LIST(type) \ type EXT_OVERSCAN_LEFT; \ type EXT_OVERSCAN_RIGHT; \ @@ -486,6 +536,14 @@ type CM_GAMUT_REMAP_MODE; \ type CM_GAMUT_REMAP_C11; \ type CM_GAMUT_REMAP_C12; \ + type CM_GAMUT_REMAP_C13; \ + type CM_GAMUT_REMAP_C14; \ + type CM_GAMUT_REMAP_C21; \ + type CM_GAMUT_REMAP_C22; \ + type CM_GAMUT_REMAP_C23; \ + type CM_GAMUT_REMAP_C24; \ + type CM_GAMUT_REMAP_C31; \ + type CM_GAMUT_REMAP_C32; \ type CM_GAMUT_REMAP_C33; \ type CM_GAMUT_REMAP_C34; \ type CM_COMA_C11; \ @@ -1010,6 +1068,9 @@ type CUR0_EXPANSION_MODE; \ type CUR0_ENABLE; \ type CM_BYPASS; \ + type CM_TEST_DEBUG_INDEX; \ + type CM_TEST_DEBUG_DATA_ID9_ICSC_MODE; \ + type CM_TEST_DEBUG_DATA_ID9_OCSC_MODE;\ type FORMAT_CONTROL__ALPHA_EN; \ type CUR0_COLOR0; \ type CUR0_COLOR1; \ @@ -1054,6 +1115,10 @@ struct dcn_dpp_mask { uint32_t RECOUT_SIZE; \ uint32_t CM_GAMUT_REMAP_CONTROL; \ uint32_t CM_GAMUT_REMAP_C11_C12; \ + uint32_t CM_GAMUT_REMAP_C13_C14; \ + uint32_t CM_GAMUT_REMAP_C21_C22; \ + uint32_t CM_GAMUT_REMAP_C23_C24; \ + uint32_t CM_GAMUT_REMAP_C31_C32; \ uint32_t CM_GAMUT_REMAP_C33_C34; \ uint32_t CM_COMA_C11_C12; \ uint32_t CM_COMA_C33_C34; \ @@ -1255,6 +1320,8 @@ struct dcn_dpp_mask { uint32_t CM_IGAM_LUT_RW_CONTROL; \ uint32_t CM_IGAM_LUT_RW_INDEX; \ uint32_t CM_IGAM_LUT_SEQ_COLOR; \ + uint32_t CM_TEST_DEBUG_INDEX; \ + uint32_t CM_TEST_DEBUG_DATA; \ uint32_t FORMAT_CONTROL; \ uint32_t CNVC_SURFACE_PIXEL_FORMAT; \ uint32_t CURSOR_CONTROL; \ @@ -1289,8 +1356,8 @@ struct dcn10_dpp { enum dcn10_input_csc_select { INPUT_CSC_SELECT_BYPASS = 0, - INPUT_CSC_SELECT_ICSC, - INPUT_CSC_SELECT_COMA + INPUT_CSC_SELECT_ICSC = 1, + INPUT_CSC_SELECT_COMA = 2 }; void dpp1_set_cursor_attributes( @@ -1357,12 +1424,11 @@ void dpp1_set_degamma( enum ipp_degamma_mode mode); void dpp1_set_degamma_pwl(struct dpp *dpp_base, - const struct pwl_params *params); + const struct pwl_params *params); + -bool dpp_get_optimal_number_of_taps( - struct dpp *dpp, - struct scaler_data *scl_data, - const struct scaling_taps *in_taps); +void dpp_read_state(struct dpp *dpp_base, + struct dcn_dpp_state *s); void dpp_reset(struct dpp *dpp_base); @@ -1408,7 +1474,7 @@ void dpp1_cnv_setup ( struct dpp *dpp_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); void dpp1_full_bypass(struct dpp *dpp_base); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index fb32975e4b67..116977eb24e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -216,41 +216,55 @@ static void dpp1_cm_program_color_matrix( struct dcn10_dpp *dpp, const uint16_t *regval) { - uint32_t mode; + uint32_t ocsc_mode; + uint32_t cur_mode; struct color_matrices_reg gam_regs; - REG_GET(CM_OCSC_CONTROL, CM_OCSC_MODE, &mode); - if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } - mode = 4; + + /* determine which CSC matrix (ocsc or comb) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + REG_SET(CM_TEST_DEBUG_INDEX, 0, + CM_TEST_DEBUG_INDEX, 9); + + REG_GET(CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_ID9_OCSC_MODE, &cur_mode); + + if (cur_mode != 4) + ocsc_mode = 4; + else + ocsc_mode = 5; + + gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_OCSC_C11; gam_regs.masks.csc_c11 = dpp->tf_mask->CM_OCSC_C11; gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_OCSC_C12; gam_regs.masks.csc_c12 = dpp->tf_mask->CM_OCSC_C12; - if (mode == 4) { + if (ocsc_mode == 4) { gam_regs.csc_c11_c12 = REG(CM_OCSC_C11_C12); gam_regs.csc_c33_c34 = REG(CM_OCSC_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); - } else { gam_regs.csc_c11_c12 = REG(CM_COMB_C11_C12); gam_regs.csc_c33_c34 = REG(CM_COMB_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &gam_regs); + + REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); + } void dpp1_cm_set_output_csc_default( @@ -260,15 +274,14 @@ void dpp1_cm_set_output_csc_default( struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); const uint16_t *regval = NULL; int arr_size; - uint32_t ocsc_mode = 4; regval = find_color_matrix(colorspace, &arr_size); if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } + dpp1_cm_program_color_matrix(dpp, regval); - REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); } static void dpp1_cm_get_reg_field( @@ -329,9 +342,8 @@ void dpp1_cm_set_output_csc_adjustment( const uint16_t *regval) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - uint32_t ocsc_mode = 4; + dpp1_cm_program_color_matrix(dpp, regval); - REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); } void dpp1_cm_power_on_regamma_lut(struct dpp *dpp_base, @@ -437,17 +449,18 @@ void dpp1_cm_program_regamma_lutb_settings( void dpp1_program_input_csc( struct dpp *dpp_base, enum dc_color_space color_space, - enum dcn10_input_csc_select select, + enum dcn10_input_csc_select input_select, const struct out_csc_color_matrix *tbl_entry) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); int i; int arr_size = sizeof(dcn10_input_csc_matrix)/sizeof(struct dcn10_input_csc_matrix); const uint16_t *regval = NULL; - uint32_t selection = 1; + uint32_t cur_select = 0; + enum dcn10_input_csc_select select; struct color_matrices_reg gam_regs; - if (select == INPUT_CSC_SELECT_BYPASS) { + if (input_select == INPUT_CSC_SELECT_BYPASS) { REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0); return; } @@ -467,36 +480,45 @@ void dpp1_program_input_csc( regval = tbl_entry->regval; } - if (select == INPUT_CSC_SELECT_COMA) - selection = 2; - REG_SET(CM_ICSC_CONTROL, 0, - CM_ICSC_MODE, selection); + /* determine which CSC matrix (icsc or coma) we are using + * currently. select the alternate set to double buffer + * the CSC update so CSC is updated on frame boundary + */ + REG_SET(CM_TEST_DEBUG_INDEX, 0, + CM_TEST_DEBUG_INDEX, 9); + + REG_GET(CM_TEST_DEBUG_DATA, + CM_TEST_DEBUG_DATA_ID9_ICSC_MODE, &cur_select); + + if (cur_select != INPUT_CSC_SELECT_ICSC) + select = INPUT_CSC_SELECT_ICSC; + else + select = INPUT_CSC_SELECT_COMA; gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11; gam_regs.masks.csc_c11 = dpp->tf_mask->CM_ICSC_C11; gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12; gam_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12; - if (select == INPUT_CSC_SELECT_ICSC) { gam_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12); gam_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); } else { gam_regs.csc_c11_c12 = REG(CM_COMA_C11_C12); gam_regs.csc_c33_c34 = REG(CM_COMA_C33_C34); - cm_helper_program_color_matrices( - dpp->base.ctx, - regval, - &gam_regs); } + + cm_helper_program_color_matrices( + dpp->base.ctx, + regval, + &gam_regs); + + REG_SET(CM_ICSC_CONTROL, 0, + CM_ICSC_MODE, select); } //keep here for now, decide multi dce support later @@ -789,13 +811,13 @@ void dpp1_program_input_lut( REG_UPDATE(CM_IGAM_LUT_RW_INDEX, CM_IGAM_LUT_RW_INDEX, 0); for (i = 0; i < gamma->num_entries; i++) { REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR, - dal_fixed31_32_round( + dc_fixpt_round( gamma->entries.red[i])); REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR, - dal_fixed31_32_round( + dc_fixpt_round( gamma->entries.green[i])); REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR, - dal_fixed31_32_round( + dc_fixpt_round( gamma->entries.blue[i])); } // Power off LUT memory diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 3eb824debf43..f862fd148cca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -169,7 +169,7 @@ static enum dscl_mode_sel dpp1_dscl_get_dscl_mode( const struct scaler_data *data, bool dbg_always_scale) { - const long long one = dal_fixed31_32_one.value; + const long long one = dc_fixpt_one.value; if (dpp_base->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { /* DSCL is processing data in fixed format */ @@ -464,8 +464,8 @@ static enum lb_memory_config dpp1_dscl_find_lb_memory_config(struct dcn10_dpp *d int num_part_y, num_part_c; int vtaps = scl_data->taps.v_taps; int vtaps_c = scl_data->taps.v_taps_c; - int ceil_vratio = dal_fixed31_32_ceil(scl_data->ratios.vert); - int ceil_vratio_c = dal_fixed31_32_ceil(scl_data->ratios.vert_c); + int ceil_vratio = dc_fixpt_ceil(scl_data->ratios.vert); + int ceil_vratio_c = dc_fixpt_ceil(scl_data->ratios.vert_c); enum lb_memory_config mem_cfg = LB_MEMORY_CONFIG_0; if (dpp->base.ctx->dc->debug.use_max_lb) @@ -565,52 +565,52 @@ static void dpp1_dscl_set_manual_ratio_init( uint32_t init_int = 0; REG_SET(SCL_HORZ_FILTER_SCALE_RATIO, 0, - SCL_H_SCALE_RATIO, dal_fixed31_32_u2d19(data->ratios.horz) << 5); + SCL_H_SCALE_RATIO, dc_fixpt_u3d19(data->ratios.horz) << 5); REG_SET(SCL_VERT_FILTER_SCALE_RATIO, 0, - SCL_V_SCALE_RATIO, dal_fixed31_32_u2d19(data->ratios.vert) << 5); + SCL_V_SCALE_RATIO, dc_fixpt_u3d19(data->ratios.vert) << 5); REG_SET(SCL_HORZ_FILTER_SCALE_RATIO_C, 0, - SCL_H_SCALE_RATIO_C, dal_fixed31_32_u2d19(data->ratios.horz_c) << 5); + SCL_H_SCALE_RATIO_C, dc_fixpt_u3d19(data->ratios.horz_c) << 5); REG_SET(SCL_VERT_FILTER_SCALE_RATIO_C, 0, - SCL_V_SCALE_RATIO_C, dal_fixed31_32_u2d19(data->ratios.vert_c) << 5); + SCL_V_SCALE_RATIO_C, dc_fixpt_u3d19(data->ratios.vert_c) << 5); /* * 0.24 format for fraction, first five bits zeroed */ - init_frac = dal_fixed31_32_u0d19(data->inits.h) << 5; - init_int = dal_fixed31_32_floor(data->inits.h); + init_frac = dc_fixpt_u0d19(data->inits.h) << 5; + init_int = dc_fixpt_floor(data->inits.h); REG_SET_2(SCL_HORZ_FILTER_INIT, 0, SCL_H_INIT_FRAC, init_frac, SCL_H_INIT_INT, init_int); - init_frac = dal_fixed31_32_u0d19(data->inits.h_c) << 5; - init_int = dal_fixed31_32_floor(data->inits.h_c); + init_frac = dc_fixpt_u0d19(data->inits.h_c) << 5; + init_int = dc_fixpt_floor(data->inits.h_c); REG_SET_2(SCL_HORZ_FILTER_INIT_C, 0, SCL_H_INIT_FRAC_C, init_frac, SCL_H_INIT_INT_C, init_int); - init_frac = dal_fixed31_32_u0d19(data->inits.v) << 5; - init_int = dal_fixed31_32_floor(data->inits.v); + init_frac = dc_fixpt_u0d19(data->inits.v) << 5; + init_int = dc_fixpt_floor(data->inits.v); REG_SET_2(SCL_VERT_FILTER_INIT, 0, SCL_V_INIT_FRAC, init_frac, SCL_V_INIT_INT, init_int); - init_frac = dal_fixed31_32_u0d19(data->inits.v_bot) << 5; - init_int = dal_fixed31_32_floor(data->inits.v_bot); + init_frac = dc_fixpt_u0d19(data->inits.v_bot) << 5; + init_int = dc_fixpt_floor(data->inits.v_bot); REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0, SCL_V_INIT_FRAC_BOT, init_frac, SCL_V_INIT_INT_BOT, init_int); - init_frac = dal_fixed31_32_u0d19(data->inits.v_c) << 5; - init_int = dal_fixed31_32_floor(data->inits.v_c); + init_frac = dc_fixpt_u0d19(data->inits.v_c) << 5; + init_int = dc_fixpt_floor(data->inits.v_c); REG_SET_2(SCL_VERT_FILTER_INIT_C, 0, SCL_V_INIT_FRAC_C, init_frac, SCL_V_INIT_INT_C, init_int); - init_frac = dal_fixed31_32_u0d19(data->inits.v_c_bot) << 5; - init_int = dal_fixed31_32_floor(data->inits.v_c_bot); + init_frac = dc_fixpt_u0d19(data->inits.v_c_bot) << 5; + init_int = dc_fixpt_floor(data->inits.v_c_bot); REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0, SCL_V_INIT_FRAC_BOT_C, init_frac, SCL_V_INIT_INT_BOT_C, init_int); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 738f67ffd1b4..943143efbb82 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -476,8 +476,235 @@ void hubbub1_toggle_watermark_change_req(struct hubbub *hubbub) DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, watermark_change_req); } +void hubbub1_soft_reset(struct hubbub *hubbub, bool reset) +{ + uint32_t reset_en = reset ? 1 : 0; + + REG_UPDATE(DCHUBBUB_SOFT_RESET, + DCHUBBUB_GLOBAL_SOFT_RESET, reset_en); +} + +static bool hubbub1_dcc_support_swizzle( + enum swizzle_mode_values swizzle, + unsigned int bytes_per_element, + enum segment_order *segment_order_horz, + enum segment_order *segment_order_vert) +{ + bool standard_swizzle = false; + bool display_swizzle = false; + + switch (swizzle) { + case DC_SW_4KB_S: + case DC_SW_64KB_S: + case DC_SW_VAR_S: + case DC_SW_4KB_S_X: + case DC_SW_64KB_S_X: + case DC_SW_VAR_S_X: + standard_swizzle = true; + break; + case DC_SW_4KB_D: + case DC_SW_64KB_D: + case DC_SW_VAR_D: + case DC_SW_4KB_D_X: + case DC_SW_64KB_D_X: + case DC_SW_VAR_D_X: + display_swizzle = true; + break; + default: + break; + } + + if (bytes_per_element == 1 && standard_swizzle) { + *segment_order_horz = segment_order__contiguous; + *segment_order_vert = segment_order__na; + return true; + } + if (bytes_per_element == 2 && standard_swizzle) { + *segment_order_horz = segment_order__non_contiguous; + *segment_order_vert = segment_order__contiguous; + return true; + } + if (bytes_per_element == 4 && standard_swizzle) { + *segment_order_horz = segment_order__non_contiguous; + *segment_order_vert = segment_order__contiguous; + return true; + } + if (bytes_per_element == 8 && standard_swizzle) { + *segment_order_horz = segment_order__na; + *segment_order_vert = segment_order__contiguous; + return true; + } + if (bytes_per_element == 8 && display_swizzle) { + *segment_order_horz = segment_order__contiguous; + *segment_order_vert = segment_order__non_contiguous; + return true; + } + + return false; +} + +static bool hubbub1_dcc_support_pixel_format( + enum surface_pixel_format format, + unsigned int *bytes_per_element) +{ + /* DML: get_bytes_per_element */ + switch (format) { + case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: + case SURFACE_PIXEL_FORMAT_GRPH_RGB565: + *bytes_per_element = 2; + return true; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010: + *bytes_per_element = 4; + return true; + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: + *bytes_per_element = 8; + return true; + default: + return false; + } +} + +static void hubbub1_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height, + unsigned int bytes_per_element) +{ + /* copied from DML. might want to refactor DML to leverage from DML */ + /* DML : get_blk256_size */ + if (bytes_per_element == 1) { + *blk256_width = 16; + *blk256_height = 16; + } else if (bytes_per_element == 2) { + *blk256_width = 16; + *blk256_height = 8; + } else if (bytes_per_element == 4) { + *blk256_width = 8; + *blk256_height = 8; + } else if (bytes_per_element == 8) { + *blk256_width = 8; + *blk256_height = 4; + } +} + +static void hubbub1_det_request_size( + unsigned int height, + unsigned int width, + unsigned int bpe, + bool *req128_horz_wc, + bool *req128_vert_wc) +{ + unsigned int detile_buf_size = 164 * 1024; /* 164KB for DCN1.0 */ + + unsigned int blk256_height = 0; + unsigned int blk256_width = 0; + unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc; + + hubbub1_get_blk256_size(&blk256_width, &blk256_height, bpe); + + swath_bytes_horz_wc = height * blk256_height * bpe; + swath_bytes_vert_wc = width * blk256_width * bpe; + + *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ + + *req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ? + false : /* full 256B request */ + true; /* half 128b request */ +} + +static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output) +{ + struct dc *dc = hubbub->ctx->dc; + /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */ + enum dcc_control dcc_control; + unsigned int bpe; + enum segment_order segment_order_horz, segment_order_vert; + bool req128_horz_wc, req128_vert_wc; + + memset(output, 0, sizeof(*output)); + + if (dc->debug.disable_dcc == DCC_DISABLE) + return false; + + if (!hubbub->funcs->dcc_support_pixel_format(input->format, &bpe)) + return false; + + if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe, + &segment_order_horz, &segment_order_vert)) + return false; + + hubbub1_det_request_size(input->surface_size.height, input->surface_size.width, + bpe, &req128_horz_wc, &req128_vert_wc); + + if (!req128_horz_wc && !req128_vert_wc) { + dcc_control = dcc_control__256_256_xxx; + } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { + if (!req128_horz_wc) + dcc_control = dcc_control__256_256_xxx; + else if (segment_order_horz == segment_order__contiguous) + dcc_control = dcc_control__128_128_xxx; + else + dcc_control = dcc_control__256_64_64; + } else if (input->scan == SCAN_DIRECTION_VERTICAL) { + if (!req128_vert_wc) + dcc_control = dcc_control__256_256_xxx; + else if (segment_order_vert == segment_order__contiguous) + dcc_control = dcc_control__128_128_xxx; + else + dcc_control = dcc_control__256_64_64; + } else { + if ((req128_horz_wc && + segment_order_horz == segment_order__non_contiguous) || + (req128_vert_wc && + segment_order_vert == segment_order__non_contiguous)) + /* access_dir not known, must use most constraining */ + dcc_control = dcc_control__256_64_64; + else + /* reg128 is true for either horz and vert + * but segment_order is contiguous + */ + dcc_control = dcc_control__128_128_xxx; + } + + if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE && + dcc_control != dcc_control__256_256_xxx) + return false; + + switch (dcc_control) { + case dcc_control__256_256_xxx: + output->grph.rgb.max_uncompressed_blk_size = 256; + output->grph.rgb.max_compressed_blk_size = 256; + output->grph.rgb.independent_64b_blks = false; + break; + case dcc_control__128_128_xxx: + output->grph.rgb.max_uncompressed_blk_size = 128; + output->grph.rgb.max_compressed_blk_size = 128; + output->grph.rgb.independent_64b_blks = false; + break; + case dcc_control__256_64_64: + output->grph.rgb.max_uncompressed_blk_size = 256; + output->grph.rgb.max_compressed_blk_size = 64; + output->grph.rgb.independent_64b_blks = true; + break; + } + + output->capable = true; + output->const_color_support = false; + + return true; +} + static const struct hubbub_funcs hubbub1_funcs = { - .update_dchub = hubbub1_update_dchub + .update_dchub = hubbub1_update_dchub, + .dcc_support_swizzle = hubbub1_dcc_support_swizzle, + .dcc_support_pixel_format = hubbub1_dcc_support_pixel_format, + .get_dcc_compression_cap = hubbub1_get_dcc_compression_cap, }; void hubbub1_construct(struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h index a16e908821a0..6315a0e6b0d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h @@ -27,6 +27,7 @@ #define __DC_HUBBUB_DCN10_H__ #include "core_types.h" +#include "dchubbub.h" #define HUBHUB_REG_LIST_DCN()\ SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\ @@ -47,7 +48,8 @@ SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND),\ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ SR(DCHUBBUB_TEST_DEBUG_INDEX), \ - SR(DCHUBBUB_TEST_DEBUG_DATA) + SR(DCHUBBUB_TEST_DEBUG_DATA),\ + SR(DCHUBBUB_SOFT_RESET) #define HUBBUB_SR_WATERMARK_REG_LIST()\ SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A),\ @@ -104,6 +106,7 @@ struct dcn_hubbub_registers { uint32_t DCHUBBUB_SDPIF_AGP_BOT; uint32_t DCHUBBUB_SDPIF_AGP_TOP; uint32_t DCHUBBUB_CRC_CTRL; + uint32_t DCHUBBUB_SOFT_RESET; }; /* set field name */ @@ -113,6 +116,7 @@ struct dcn_hubbub_registers { #define HUBBUB_MASK_SH_LIST_DCN(mask_sh)\ HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \ + HUBBUB_SF(DCHUBBUB_SOFT_RESET, DCHUBBUB_GLOBAL_SOFT_RESET, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_DONE_INTERRUPT_DISABLE, mask_sh), \ HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, mask_sh), \ @@ -142,6 +146,7 @@ struct dcn_hubbub_registers { type DCHUBBUB_ARB_SAT_LEVEL;\ type DCHUBBUB_ARB_MIN_REQ_OUTSTAND;\ type DCHUBBUB_GLOBAL_TIMER_REFDIV;\ + type DCHUBBUB_GLOBAL_SOFT_RESET; \ type SDPIF_FB_TOP;\ type SDPIF_FB_BASE;\ type SDPIF_FB_OFFSET;\ @@ -173,12 +178,6 @@ struct dcn_hubbub_wm { struct dcn_hubbub_wm_set sets[4]; }; -struct hubbub_funcs { - void (*update_dchub)( - struct hubbub *hubbub, - struct dchub_init_data *dh_data); -}; - struct hubbub { const struct hubbub_funcs *funcs; struct dc_context *ctx; @@ -206,6 +205,7 @@ void hubbub1_toggle_watermark_change_req( void hubbub1_wm_read_state(struct hubbub *hubbub, struct dcn_hubbub_wm *wm); +void hubbub1_soft_reset(struct hubbub *hubbub, bool reset); void hubbub1_construct(struct hubbub *hubbub, struct dc_context *ctx, const struct dcn_hubbub_registers *hubbub_regs, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 39b72f696ae9..c28085be39ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -78,6 +78,27 @@ static void hubp1_disconnect(struct hubp *hubp) CURSOR_ENABLE, 0); } +static void hubp1_disable_control(struct hubp *hubp, bool disable_hubp) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + uint32_t disable = disable_hubp ? 1 : 0; + + REG_UPDATE(DCHUBP_CNTL, + HUBP_DISABLE, disable); +} + +static unsigned int hubp1_get_underflow_status(struct hubp *hubp) +{ + uint32_t hubp_underflow = 0; + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_GET(DCHUBP_CNTL, + HUBP_UNDERFLOW_STATUS, + &hubp_underflow); + + return hubp_underflow; +} + static void hubp1_set_hubp_blank_en(struct hubp *hubp, bool blank) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); @@ -146,6 +167,9 @@ void hubp1_program_size_and_rotation( * 444 or 420 luma */ if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + ASSERT(plane_size->video.chroma_pitch != 0); + /* Chroma pitch zero can cause system hang! */ + pitch = plane_size->video.luma_pitch - 1; meta_pitch = dcc->video.meta_pitch_l - 1; pitch_c = plane_size->video.chroma_pitch - 1; @@ -372,11 +396,15 @@ bool hubp1_program_surface_flip_and_addr( if (address->grph_stereo.right_addr.quad_part == 0) break; - REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + REG_UPDATE_8(DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_TMZ, address->tmz_surface, PRIMARY_SURFACE_TMZ_C, address->tmz_surface, PRIMARY_META_SURFACE_TMZ, address->tmz_surface, - PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface); + PRIMARY_META_SURFACE_TMZ_C, address->tmz_surface, + SECONDARY_SURFACE_TMZ, address->tmz_surface, + SECONDARY_SURFACE_TMZ_C, address->tmz_surface, + SECONDARY_META_SURFACE_TMZ, address->tmz_surface, + SECONDARY_META_SURFACE_TMZ_C, address->tmz_surface); if (address->grph_stereo.right_meta_addr.quad_part != 0) { @@ -435,9 +463,11 @@ void hubp1_dcc_control(struct hubp *hubp, bool enable, uint32_t dcc_ind_64b_blk = independent_64b_blks ? 1 : 0; struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - REG_UPDATE_2(DCSURF_SURFACE_CONTROL, + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_DCC_EN, dcc_en, - PRIMARY_SURFACE_DCC_IND_64B_BLK, dcc_ind_64b_blk); + PRIMARY_SURFACE_DCC_IND_64B_BLK, dcc_ind_64b_blk, + SECONDARY_SURFACE_DCC_EN, dcc_en, + SECONDARY_SURFACE_DCC_IND_64B_BLK, dcc_ind_64b_blk); } void hubp1_program_surface_config( @@ -535,11 +565,13 @@ void hubp1_program_deadline( REG_SET(VBLANK_PARAMETERS_3, 0, REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); - REG_SET(NOM_PARAMETERS_0, 0, - DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l); + if (REG(NOM_PARAMETERS_0)) + REG_SET(NOM_PARAMETERS_0, 0, + DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l); - REG_SET(NOM_PARAMETERS_1, 0, - REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l); + if (REG(NOM_PARAMETERS_1)) + REG_SET(NOM_PARAMETERS_1, 0, + REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l); REG_SET(NOM_PARAMETERS_4, 0, DST_Y_PER_META_ROW_NOM_L, dlg_attr->dst_y_per_meta_row_nom_l); @@ -568,11 +600,13 @@ void hubp1_program_deadline( REG_SET(VBLANK_PARAMETERS_4, 0, REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); - REG_SET(NOM_PARAMETERS_2, 0, - DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c); + if (REG(NOM_PARAMETERS_2)) + REG_SET(NOM_PARAMETERS_2, 0, + DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c); - REG_SET(NOM_PARAMETERS_3, 0, - REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c); + if (REG(NOM_PARAMETERS_3)) + REG_SET(NOM_PARAMETERS_3, 0, + REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c); REG_SET(NOM_PARAMETERS_6, 0, DST_Y_PER_META_ROW_NOM_C, dlg_attr->dst_y_per_meta_row_nom_c); @@ -609,6 +643,13 @@ void hubp1_program_deadline( REG_SET(DCN_SURF1_TTU_CNTL1, 0, REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_c); + + REG_SET_3(DCN_CUR0_TTU_CNTL0, 0, + REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_cur0, + QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_cur0, + QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_cur0); + REG_SET(DCN_CUR0_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); } static void hubp1_setup( @@ -752,9 +793,159 @@ void min_set_viewport( PRI_VIEWPORT_Y_START_C, viewport_c->y); } -void hubp1_read_state(struct dcn10_hubp *hubp1, - struct dcn_hubp_state *s) +void hubp1_read_state(struct hubp *hubp) { + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + struct dcn_hubp_state *s = &hubp1->state; + struct _vcs_dpi_display_dlg_regs_st *dlg_attr = &s->dlg_attr; + struct _vcs_dpi_display_ttu_regs_st *ttu_attr = &s->ttu_attr; + struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs; + + /* Requester */ + REG_GET(HUBPRET_CONTROL, + DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs->plane1_base_address); + REG_GET_4(DCN_EXPANSION_MODE, + DRQ_EXPANSION_MODE, &rq_regs->drq_expansion_mode, + PRQ_EXPANSION_MODE, &rq_regs->prq_expansion_mode, + MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode, + CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode); + REG_GET_8(DCHUBP_REQ_SIZE_CONFIG, + CHUNK_SIZE, &rq_regs->rq_regs_l.chunk_size, + MIN_CHUNK_SIZE, &rq_regs->rq_regs_l.min_chunk_size, + META_CHUNK_SIZE, &rq_regs->rq_regs_l.meta_chunk_size, + MIN_META_CHUNK_SIZE, &rq_regs->rq_regs_l.min_meta_chunk_size, + DPTE_GROUP_SIZE, &rq_regs->rq_regs_l.dpte_group_size, + MPTE_GROUP_SIZE, &rq_regs->rq_regs_l.mpte_group_size, + SWATH_HEIGHT, &rq_regs->rq_regs_l.swath_height, + PTE_ROW_HEIGHT_LINEAR, &rq_regs->rq_regs_l.pte_row_height_linear); + REG_GET_8(DCHUBP_REQ_SIZE_CONFIG_C, + CHUNK_SIZE_C, &rq_regs->rq_regs_c.chunk_size, + MIN_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_chunk_size, + META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.meta_chunk_size, + MIN_META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_meta_chunk_size, + DPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.dpte_group_size, + MPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.mpte_group_size, + SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height, + PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear); + + /* DLG - Per hubp */ + REG_GET_2(BLANK_OFFSET_0, + REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end, + DLG_V_BLANK_END, &dlg_attr->dlg_vblank_end); + + REG_GET(BLANK_OFFSET_1, + MIN_DST_Y_NEXT_START, &dlg_attr->min_dst_y_next_start); + + REG_GET(DST_DIMENSIONS, + REFCYC_PER_HTOTAL, &dlg_attr->refcyc_per_htotal); + + REG_GET_2(DST_AFTER_SCALER, + REFCYC_X_AFTER_SCALER, &dlg_attr->refcyc_x_after_scaler, + DST_Y_AFTER_SCALER, &dlg_attr->dst_y_after_scaler); + + if (REG(PREFETCH_SETTINS)) + REG_GET_2(PREFETCH_SETTINS, + DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch, + VRATIO_PREFETCH, &dlg_attr->vratio_prefetch); + else + REG_GET_2(PREFETCH_SETTINGS, + DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch, + VRATIO_PREFETCH, &dlg_attr->vratio_prefetch); + + REG_GET_2(VBLANK_PARAMETERS_0, + DST_Y_PER_VM_VBLANK, &dlg_attr->dst_y_per_vm_vblank, + DST_Y_PER_ROW_VBLANK, &dlg_attr->dst_y_per_row_vblank); + + REG_GET(REF_FREQ_TO_PIX_FREQ, + REF_FREQ_TO_PIX_FREQ, &dlg_attr->ref_freq_to_pix_freq); + + /* DLG - Per luma/chroma */ + REG_GET(VBLANK_PARAMETERS_1, + REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr->refcyc_per_pte_group_vblank_l); + + REG_GET(VBLANK_PARAMETERS_3, + REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr->refcyc_per_meta_chunk_vblank_l); + + if (REG(NOM_PARAMETERS_0)) + REG_GET(NOM_PARAMETERS_0, + DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr->dst_y_per_pte_row_nom_l); + + if (REG(NOM_PARAMETERS_1)) + REG_GET(NOM_PARAMETERS_1, + REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr->refcyc_per_pte_group_nom_l); + + REG_GET(NOM_PARAMETERS_4, + DST_Y_PER_META_ROW_NOM_L, &dlg_attr->dst_y_per_meta_row_nom_l); + + REG_GET(NOM_PARAMETERS_5, + REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr->refcyc_per_meta_chunk_nom_l); + + REG_GET_2(PER_LINE_DELIVERY_PRE, + REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr->refcyc_per_line_delivery_pre_l, + REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr->refcyc_per_line_delivery_pre_c); + + REG_GET_2(PER_LINE_DELIVERY, + REFCYC_PER_LINE_DELIVERY_L, &dlg_attr->refcyc_per_line_delivery_l, + REFCYC_PER_LINE_DELIVERY_C, &dlg_attr->refcyc_per_line_delivery_c); + + if (REG(PREFETCH_SETTINS_C)) + REG_GET(PREFETCH_SETTINS_C, + VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c); + else + REG_GET(PREFETCH_SETTINGS_C, + VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c); + + REG_GET(VBLANK_PARAMETERS_2, + REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr->refcyc_per_pte_group_vblank_c); + + REG_GET(VBLANK_PARAMETERS_4, + REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr->refcyc_per_meta_chunk_vblank_c); + + if (REG(NOM_PARAMETERS_2)) + REG_GET(NOM_PARAMETERS_2, + DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr->dst_y_per_pte_row_nom_c); + + if (REG(NOM_PARAMETERS_3)) + REG_GET(NOM_PARAMETERS_3, + REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr->refcyc_per_pte_group_nom_c); + + REG_GET(NOM_PARAMETERS_6, + DST_Y_PER_META_ROW_NOM_C, &dlg_attr->dst_y_per_meta_row_nom_c); + + REG_GET(NOM_PARAMETERS_7, + REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr->refcyc_per_meta_chunk_nom_c); + + /* TTU - per hubp */ + REG_GET_2(DCN_TTU_QOS_WM, + QoS_LEVEL_LOW_WM, &ttu_attr->qos_level_low_wm, + QoS_LEVEL_HIGH_WM, &ttu_attr->qos_level_high_wm); + + REG_GET_2(DCN_GLOBAL_TTU_CNTL, + MIN_TTU_VBLANK, &ttu_attr->min_ttu_vblank, + QoS_LEVEL_FLIP, &ttu_attr->qos_level_flip); + + /* TTU - per luma/chroma */ + /* Assumed surf0 is luma and 1 is chroma */ + + REG_GET_3(DCN_SURF0_TTU_CNTL0, + REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_l, + QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_l, + QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_l); + + REG_GET(DCN_SURF0_TTU_CNTL1, + REFCYC_PER_REQ_DELIVERY_PRE, + &ttu_attr->refcyc_per_req_delivery_pre_l); + + REG_GET_3(DCN_SURF1_TTU_CNTL0, + REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_c, + QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_c, + QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_c); + + REG_GET(DCN_SURF1_TTU_CNTL1, + REFCYC_PER_REQ_DELIVERY_PRE, + &ttu_attr->refcyc_per_req_delivery_pre_c); + + /* Rest of hubp */ REG_GET(DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, &s->pixel_format); @@ -890,14 +1081,14 @@ void hubp1_cursor_set_position( ASSERT(param->h_scale_ratio.value); if (param->h_scale_ratio.value) - dst_x_offset = dal_fixed31_32_floor(dal_fixed31_32_div( - dal_fixed31_32_from_int(dst_x_offset), + dst_x_offset = dc_fixpt_floor(dc_fixpt_div( + dc_fixpt_from_int(dst_x_offset), param->h_scale_ratio)); if (src_x_offset >= (int)param->viewport_width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)hubp->curs_attr.width < 0) + if (src_x_offset + (int)hubp->curs_attr.width <= 0) cur_en = 0; /* not visible beyond left edge*/ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) @@ -952,6 +1143,10 @@ static struct hubp_funcs dcn10_hubp_funcs = { .hubp_disconnect = hubp1_disconnect, .hubp_clk_cntl = hubp1_clk_cntl, .hubp_vtg_sel = hubp1_vtg_sel, + .hubp_read_state = hubp1_read_state, + .hubp_disable_control = hubp1_disable_control, + .hubp_get_underflow_status = hubp1_get_underflow_status, + }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 4a3703e12ea1..d901d5092969 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -30,6 +30,7 @@ #define TO_DCN10_HUBP(hubp)\ container_of(hubp, struct dcn10_hubp, base) +/* Register address initialization macro for all ASICs (including those with reduced functionality) */ #define HUBP_REG_LIST_DCN(id)\ SRI(DCHUBP_CNTL, HUBP, id),\ SRI(HUBPREQ_DEBUG_DB, HUBP, id),\ @@ -78,16 +79,12 @@ SRI(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_1, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_3, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_0, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_1, HUBPREQ, id),\ SRI(NOM_PARAMETERS_4, HUBPREQ, id),\ SRI(NOM_PARAMETERS_5, HUBPREQ, id),\ SRI(PER_LINE_DELIVERY_PRE, HUBPREQ, id),\ SRI(PER_LINE_DELIVERY, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_2, HUBPREQ, id),\ SRI(VBLANK_PARAMETERS_4, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_2, HUBPREQ, id),\ - SRI(NOM_PARAMETERS_3, HUBPREQ, id),\ SRI(NOM_PARAMETERS_6, HUBPREQ, id),\ SRI(NOM_PARAMETERS_7, HUBPREQ, id),\ SRI(DCN_TTU_QOS_WM, HUBPREQ, id),\ @@ -96,11 +93,21 @@ SRI(DCN_SURF0_TTU_CNTL1, HUBPREQ, id),\ SRI(DCN_SURF1_TTU_CNTL0, HUBPREQ, id),\ SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\ - SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id),\ + SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\ + SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\ SRI(HUBP_CLK_CNTL, HUBP, id) +/* Register address initialization macro for ASICs with VM */ +#define HUBP_REG_LIST_DCN_VM(id)\ + SRI(NOM_PARAMETERS_0, HUBPREQ, id),\ + SRI(NOM_PARAMETERS_1, HUBPREQ, id),\ + SRI(NOM_PARAMETERS_2, HUBPREQ, id),\ + SRI(NOM_PARAMETERS_3, HUBPREQ, id),\ + SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id) + #define HUBP_REG_LIST_DCN10(id)\ HUBP_REG_LIST_DCN(id),\ + HUBP_REG_LIST_DCN_VM(id),\ SRI(PREFETCH_SETTINS, HUBPREQ, id),\ SRI(PREFETCH_SETTINS_C, HUBPREQ, id),\ SRI(DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB, HUBPREQ, id),\ @@ -198,6 +205,8 @@ uint32_t DCN_SURF0_TTU_CNTL1; \ uint32_t DCN_SURF1_TTU_CNTL0; \ uint32_t DCN_SURF1_TTU_CNTL1; \ + uint32_t DCN_CUR0_TTU_CNTL0; \ + uint32_t DCN_CUR0_TTU_CNTL1; \ uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB; \ uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB; \ uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB; \ @@ -237,12 +246,14 @@ #define HUBP_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix +/* Mask/shift struct generation macro for all ASICs (including those with reduced functionality) */ #define HUBP_MASK_SH_LIST_DCN(mask_sh)\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_BANKS, mask_sh),\ HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\ @@ -301,6 +312,12 @@ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_META_SURFACE_TMZ_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_DCC_EN, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, PRIMARY_SURFACE_DCC_IND_64B_BLK, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_TMZ, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_TMZ_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_META_SURFACE_TMZ, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_META_SURFACE_TMZ_C, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_EN, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_IND_64B_BLK, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CB_B, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CR_R, mask_sh),\ @@ -335,8 +352,6 @@ HUBP_SF(HUBPREQ0_REF_FREQ_TO_PIX_FREQ, REF_FREQ_TO_PIX_FREQ, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_1, REFCYC_PER_PTE_GROUP_VBLANK_L, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_3, REFCYC_PER_META_CHUNK_VBLANK_L, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_4, DST_Y_PER_META_ROW_NOM_L, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_5, REFCYC_PER_META_CHUNK_NOM_L, mask_sh),\ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_L, mask_sh),\ @@ -345,8 +360,6 @@ HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_2, REFCYC_PER_PTE_GROUP_VBLANK_C, mask_sh),\ HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_4, REFCYC_PER_META_CHUNK_VBLANK_C, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\ - HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_6, DST_Y_PER_META_ROW_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_NOM_PARAMETERS_7, REFCYC_PER_META_CHUNK_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_LOW_WM, mask_sh),\ @@ -357,12 +370,24 @@ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\ + HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh) + +/* Mask/shift struct generation macro for ASICs with VM */ +#define HUBP_MASK_SH_LIST_DCN_VM(mask_sh)\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\ + HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, mask_sh),\ HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, mask_sh),\ - HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh) + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, REFCYC_PER_REQ_DELIVERY, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\ + HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh) #define HUBP_MASK_SH_LIST_DCN10(mask_sh)\ HUBP_MASK_SH_LIST_DCN(mask_sh),\ + HUBP_MASK_SH_LIST_DCN_VM(mask_sh),\ HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, DST_Y_PREFETCH, mask_sh),\ HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, VRATIO_PREFETCH, mask_sh),\ HUBP_SF(HUBPREQ0_PREFETCH_SETTINS_C, VRATIO_PREFETCH_C, mask_sh),\ @@ -403,6 +428,7 @@ #define DCN_HUBP_REG_FIELD_LIST(type) \ type HUBP_BLANK_EN;\ + type HUBP_DISABLE;\ type HUBP_TTU_DISABLE;\ type HUBP_NO_OUTSTANDING_REQ;\ type HUBP_VTG_SEL;\ @@ -469,6 +495,8 @@ type SECONDARY_META_SURFACE_TMZ_C;\ type PRIMARY_SURFACE_DCC_EN;\ type PRIMARY_SURFACE_DCC_IND_64B_BLK;\ + type SECONDARY_SURFACE_DCC_EN;\ + type SECONDARY_SURFACE_DCC_IND_64B_BLK;\ type DET_BUF_PLANE1_BASE_ADDRESS;\ type CROSSBAR_SRC_CB_B;\ type CROSSBAR_SRC_CR_R;\ @@ -601,8 +629,29 @@ struct dcn_mi_mask { DCN_HUBP_REG_FIELD_LIST(uint32_t); }; +struct dcn_hubp_state { + struct _vcs_dpi_display_dlg_regs_st dlg_attr; + struct _vcs_dpi_display_ttu_regs_st ttu_attr; + struct _vcs_dpi_display_rq_regs_st rq_regs; + uint32_t pixel_format; + uint32_t inuse_addr_hi; + uint32_t viewport_width; + uint32_t viewport_height; + uint32_t rotation_angle; + uint32_t h_mirror_en; + uint32_t sw_mode; + uint32_t dcc_en; + uint32_t blank_en; + uint32_t underflow_status; + uint32_t ttu_disable; + uint32_t min_ttu_vblank; + uint32_t qos_level_low_wm; + uint32_t qos_level_high_wm; +}; + struct dcn10_hubp { struct hubp base; + struct dcn_hubp_state state; const struct dcn_mi_registers *hubp_regs; const struct dcn_mi_shift *hubp_shift; const struct dcn_mi_mask *hubp_mask; @@ -680,26 +729,9 @@ void dcn10_hubp_construct( const struct dcn_mi_shift *hubp_shift, const struct dcn_mi_mask *hubp_mask); - -struct dcn_hubp_state { - uint32_t pixel_format; - uint32_t inuse_addr_hi; - uint32_t viewport_width; - uint32_t viewport_height; - uint32_t rotation_angle; - uint32_t h_mirror_en; - uint32_t sw_mode; - uint32_t dcc_en; - uint32_t blank_en; - uint32_t underflow_status; - uint32_t ttu_disable; - uint32_t min_ttu_vblank; - uint32_t qos_level_low_wm; - uint32_t qos_level_high_wm; -}; -void hubp1_read_state(struct dcn10_hubp *hubp1, - struct dcn_hubp_state *s); +void hubp1_read_state(struct hubp *hubp); enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 8b0f6b8a5627..f8e0576af6e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -45,8 +45,8 @@ #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" -#define DC_LOGGER \ - ctx->logger +#define DC_LOGGER_INIT(logger) + #define CTX \ hws->ctx #define REG(reg)\ @@ -56,16 +56,17 @@ #define FN(reg_name, field_name) \ hws->shifts->field_name, hws->masks->field_name +/*print is 17 wide, first two characters are spaces*/ #define DTN_INFO_MICRO_SEC(ref_cycle) \ print_microsec(dc_ctx, ref_cycle) void print_microsec(struct dc_context *dc_ctx, uint32_t ref_cycle) { - static const uint32_t ref_clk_mhz = 48; - static const unsigned int frac = 10; + const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000; + static const unsigned int frac = 1000; uint32_t us_x10 = (ref_cycle * frac) / ref_clk_mhz; - DTN_INFO("%d.%d \t ", + DTN_INFO(" %11d.%03d", us_x10 / frac, us_x10 % frac); } @@ -92,14 +93,14 @@ void dcn10_log_hubbub_state(struct dc *dc) hubbub1_wm_read_state(dc->res_pool->hubbub, &wm); - DTN_INFO("HUBBUB WM: \t data_urgent \t pte_meta_urgent \t " - "sr_enter \t sr_exit \t dram_clk_change \n"); + DTN_INFO("HUBBUB WM: data_urgent pte_meta_urgent" + " sr_enter sr_exit dram_clk_change\n"); for (i = 0; i < 4; i++) { struct dcn_hubbub_wm_set *s; s = &wm.sets[i]; - DTN_INFO("WM_Set[%d]:\t ", s->wm_set); + DTN_INFO("WM_Set[%d]:", s->wm_set); DTN_INFO_MICRO_SEC(s->data_urgent); DTN_INFO_MICRO_SEC(s->pte_meta_urgent); DTN_INFO_MICRO_SEC(s->sr_enter); @@ -111,6 +112,121 @@ void dcn10_log_hubbub_state(struct dc *dc) DTN_INFO("\n"); } +static void dcn10_log_hubp_states(struct dc *dc) +{ + struct dc_context *dc_ctx = dc->ctx; + struct resource_pool *pool = dc->res_pool; + int i; + + DTN_INFO("HUBP: format addr_hi width height" + " rot mir sw_mode dcc_en blank_en ttu_dis underflow" + " min_ttu_vblank qos_low_wm qos_high_wm\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct hubp *hubp = pool->hubps[i]; + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state); + + hubp->funcs->hubp_read_state(hubp); + + if (!s->blank_en) { + DTN_INFO("[%2d]: %5xh %6xh %5d %6d %2xh %2xh %6xh" + " %6d %8d %7d %8xh", + hubp->inst, + s->pixel_format, + s->inuse_addr_hi, + s->viewport_width, + s->viewport_height, + s->rotation_angle, + s->h_mirror_en, + s->sw_mode, + s->dcc_en, + s->blank_en, + s->ttu_disable, + s->underflow_status); + DTN_INFO_MICRO_SEC(s->min_ttu_vblank); + DTN_INFO_MICRO_SEC(s->qos_level_low_wm); + DTN_INFO_MICRO_SEC(s->qos_level_high_wm); + DTN_INFO("\n"); + } + } + + DTN_INFO("\n=========RQ========\n"); + DTN_INFO("HUBP: drq_exp_m prq_exp_m mrq_exp_m crq_exp_m plane1_ba L:chunk_s min_chu_s meta_ch_s" + " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h C:chunk_s min_chu_s meta_ch_s" + " min_m_c_s dpte_gr_s mpte_gr_s swath_hei pte_row_h\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); + struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs; + + if (!s->blank_en) + DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n", + pool->hubps[i]->inst, rq_regs->drq_expansion_mode, rq_regs->prq_expansion_mode, rq_regs->mrq_expansion_mode, + rq_regs->crq_expansion_mode, rq_regs->plane1_base_address, rq_regs->rq_regs_l.chunk_size, + rq_regs->rq_regs_l.min_chunk_size, rq_regs->rq_regs_l.meta_chunk_size, + rq_regs->rq_regs_l.min_meta_chunk_size, rq_regs->rq_regs_l.dpte_group_size, + rq_regs->rq_regs_l.mpte_group_size, rq_regs->rq_regs_l.swath_height, + rq_regs->rq_regs_l.pte_row_height_linear, rq_regs->rq_regs_c.chunk_size, rq_regs->rq_regs_c.min_chunk_size, + rq_regs->rq_regs_c.meta_chunk_size, rq_regs->rq_regs_c.min_meta_chunk_size, + rq_regs->rq_regs_c.dpte_group_size, rq_regs->rq_regs_c.mpte_group_size, + rq_regs->rq_regs_c.swath_height, rq_regs->rq_regs_c.pte_row_height_linear); + } + + DTN_INFO("========DLG========\n"); + DTN_INFO("HUBP: rc_hbe dlg_vbe min_d_y_n rc_per_ht rc_x_a_s " + " dst_y_a_s dst_y_pf dst_y_vvb dst_y_rvb dst_y_vfl dst_y_rfl rf_pix_fq" + " vratio_pf vrat_pf_c rc_pg_vbl rc_pg_vbc rc_mc_vbl rc_mc_vbc rc_pg_fll" + " rc_pg_flc rc_mc_fll rc_mc_flc pr_nom_l pr_nom_c rc_pg_nl rc_pg_nc " + " mr_nom_l mr_nom_c rc_mc_nl rc_mc_nc rc_ld_pl rc_ld_pc rc_ld_l " + " rc_ld_c cha_cur0 ofst_cur1 cha_cur1 vr_af_vc0 ddrq_limt x_rt_dlay" + " x_rp_dlay x_rr_sfl\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); + struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr; + + if (!s->blank_en) + DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh" + "% 8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh" + " %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n", + pool->hubps[i]->inst, dlg_regs->refcyc_h_blank_end, dlg_regs->dlg_vblank_end, dlg_regs->min_dst_y_next_start, + dlg_regs->refcyc_per_htotal, dlg_regs->refcyc_x_after_scaler, dlg_regs->dst_y_after_scaler, + dlg_regs->dst_y_prefetch, dlg_regs->dst_y_per_vm_vblank, dlg_regs->dst_y_per_row_vblank, + dlg_regs->dst_y_per_vm_flip, dlg_regs->dst_y_per_row_flip, dlg_regs->ref_freq_to_pix_freq, + dlg_regs->vratio_prefetch, dlg_regs->vratio_prefetch_c, dlg_regs->refcyc_per_pte_group_vblank_l, + dlg_regs->refcyc_per_pte_group_vblank_c, dlg_regs->refcyc_per_meta_chunk_vblank_l, + dlg_regs->refcyc_per_meta_chunk_vblank_c, dlg_regs->refcyc_per_pte_group_flip_l, + dlg_regs->refcyc_per_pte_group_flip_c, dlg_regs->refcyc_per_meta_chunk_flip_l, + dlg_regs->refcyc_per_meta_chunk_flip_c, dlg_regs->dst_y_per_pte_row_nom_l, + dlg_regs->dst_y_per_pte_row_nom_c, dlg_regs->refcyc_per_pte_group_nom_l, + dlg_regs->refcyc_per_pte_group_nom_c, dlg_regs->dst_y_per_meta_row_nom_l, + dlg_regs->dst_y_per_meta_row_nom_c, dlg_regs->refcyc_per_meta_chunk_nom_l, + dlg_regs->refcyc_per_meta_chunk_nom_c, dlg_regs->refcyc_per_line_delivery_pre_l, + dlg_regs->refcyc_per_line_delivery_pre_c, dlg_regs->refcyc_per_line_delivery_l, + dlg_regs->refcyc_per_line_delivery_c, dlg_regs->chunk_hdl_adjust_cur0, dlg_regs->dst_y_offset_cur1, + dlg_regs->chunk_hdl_adjust_cur1, dlg_regs->vready_after_vcount0, dlg_regs->dst_y_delta_drq_limit, + dlg_regs->xfc_reg_transfer_delay, dlg_regs->xfc_reg_precharge_delay, + dlg_regs->xfc_reg_remote_surface_flip_latency); + } + + DTN_INFO("========TTU========\n"); + DTN_INFO("HUBP: qos_ll_wm qos_lh_wm mn_ttu_vb qos_l_flp rc_rd_p_l rc_rd_l rc_rd_p_c" + " rc_rd_c rc_rd_c0 rc_rd_pc0 rc_rd_c1 rc_rd_pc1 qos_lf_l qos_rds_l" + " qos_lf_c qos_rds_c qos_lf_c0 qos_rds_c0 qos_lf_c1 qos_rds_c1\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state); + struct _vcs_dpi_display_ttu_regs_st *ttu_regs = &s->ttu_attr; + + if (!s->blank_en) + DTN_INFO("[%2d]: %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh %8xh\n", + pool->hubps[i]->inst, ttu_regs->qos_level_low_wm, ttu_regs->qos_level_high_wm, ttu_regs->min_ttu_vblank, + ttu_regs->qos_level_flip, ttu_regs->refcyc_per_req_delivery_pre_l, ttu_regs->refcyc_per_req_delivery_l, + ttu_regs->refcyc_per_req_delivery_pre_c, ttu_regs->refcyc_per_req_delivery_c, ttu_regs->refcyc_per_req_delivery_cur0, + ttu_regs->refcyc_per_req_delivery_pre_cur0, ttu_regs->refcyc_per_req_delivery_cur1, + ttu_regs->refcyc_per_req_delivery_pre_cur1, ttu_regs->qos_level_fixed_l, ttu_regs->qos_ramp_disable_l, + ttu_regs->qos_level_fixed_c, ttu_regs->qos_ramp_disable_c, ttu_regs->qos_level_fixed_cur0, + ttu_regs->qos_ramp_disable_cur0, ttu_regs->qos_level_fixed_cur1, ttu_regs->qos_ramp_disable_cur1); + } + DTN_INFO("\n"); +} + void dcn10_log_hw_state(struct dc *dc) { struct dc_context *dc_ctx = dc->ctx; @@ -121,41 +237,64 @@ void dcn10_log_hw_state(struct dc *dc) dcn10_log_hubbub_state(dc); - DTN_INFO("HUBP:\t format \t addr_hi \t width \t height \t " - "rotation \t mirror \t sw_mode \t " - "dcc_en \t blank_en \t ttu_dis \t underflow \t " - "min_ttu_vblank \t qos_low_wm \t qos_high_wm \n"); + dcn10_log_hubp_states(dc); + DTN_INFO("DPP: IGAM format IGAM mode DGAM mode RGAM mode" + " GAMUT mode C11 C12 C13 C14 C21 C22 C23 C24 " + "C31 C32 C33 C34\n"); for (i = 0; i < pool->pipe_count; i++) { - struct hubp *hubp = pool->hubps[i]; - struct dcn_hubp_state s; - - hubp1_read_state(TO_DCN10_HUBP(hubp), &s); - - DTN_INFO("[%d]:\t %xh \t %xh \t %d \t %d \t " - "%xh \t %xh \t %xh \t " - "%d \t %d \t %d \t %xh \t", - hubp->inst, - s.pixel_format, - s.inuse_addr_hi, - s.viewport_width, - s.viewport_height, - s.rotation_angle, - s.h_mirror_en, - s.sw_mode, - s.dcc_en, - s.blank_en, - s.ttu_disable, - s.underflow_status); - DTN_INFO_MICRO_SEC(s.min_ttu_vblank); - DTN_INFO_MICRO_SEC(s.qos_level_low_wm); - DTN_INFO_MICRO_SEC(s.qos_level_high_wm); + struct dpp *dpp = pool->dpps[i]; + struct dcn_dpp_state s; + + dpp->funcs->dpp_read_state(dpp, &s); + + DTN_INFO("[%2d]: %11xh %-11s %-11s %-11s" + "%8x %08xh %08xh %08xh %08xh %08xh %08xh", + dpp->inst, + s.igam_input_format, + (s.igam_lut_mode == 0) ? "BypassFixed" : + ((s.igam_lut_mode == 1) ? "BypassFloat" : + ((s.igam_lut_mode == 2) ? "RAM" : + ((s.igam_lut_mode == 3) ? "RAM" : + "Unknown"))), + (s.dgam_lut_mode == 0) ? "Bypass" : + ((s.dgam_lut_mode == 1) ? "sRGB" : + ((s.dgam_lut_mode == 2) ? "Ycc" : + ((s.dgam_lut_mode == 3) ? "RAM" : + ((s.dgam_lut_mode == 4) ? "RAM" : + "Unknown")))), + (s.rgam_lut_mode == 0) ? "Bypass" : + ((s.rgam_lut_mode == 1) ? "sRGB" : + ((s.rgam_lut_mode == 2) ? "Ycc" : + ((s.rgam_lut_mode == 3) ? "RAM" : + ((s.rgam_lut_mode == 4) ? "RAM" : + "Unknown")))), + s.gamut_remap_mode, + s.gamut_remap_c11_c12, + s.gamut_remap_c13_c14, + s.gamut_remap_c21_c22, + s.gamut_remap_c23_c24, + s.gamut_remap_c31_c32, + s.gamut_remap_c33_c34); DTN_INFO("\n"); } DTN_INFO("\n"); - DTN_INFO("OTG:\t v_bs \t v_be \t v_ss \t v_se \t vpol \t vmax \t vmin \t " - "h_bs \t h_be \t h_ss \t h_se \t hpol \t htot \t vtot \t underflow\n"); + DTN_INFO("MPCC: OPP DPP MPCCBOT MODE ALPHA_MODE PREMULT OVERLAP_ONLY IDLE\n"); + for (i = 0; i < pool->pipe_count; i++) { + struct mpcc_state s = {0}; + + pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s); + if (s.opp_id != 0xf) + DTN_INFO("[%2d]: %2xh %2xh %6xh %4d %10d %7d %12d %4d\n", + i, s.opp_id, s.dpp_id, s.bot_mpcc_id, + s.mode, s.alpha_mode, s.pre_multiplied_alpha, s.overlap_only, + s.idle); + } + DTN_INFO("\n"); + + DTN_INFO("OTG: v_bs v_be v_ss v_se vpol vmax vmin vmax_sel vmin_sel" + " h_bs h_be h_ss h_se hpol htot vtot underflow\n"); for (i = 0; i < pool->timing_generator_count; i++) { struct timing_generator *tg = pool->timing_generators[i]; @@ -167,9 +306,8 @@ void dcn10_log_hw_state(struct dc *dc) if ((s.otg_enabled & 1) == 0) continue; - DTN_INFO("[%d]:\t %d \t %d \t %d \t %d \t " - "%d \t %d \t %d \t %d \t %d \t %d \t " - "%d \t %d \t %d \t %d \t %d \t ", + DTN_INFO("[%d]: %5d %5d %5d %5d %5d %5d %5d %9d %9d %5d %5d %5d" + " %5d %5d %5d %5d %9d\n", tg->inst, s.v_blank_start, s.v_blank_end, @@ -178,6 +316,8 @@ void dcn10_log_hw_state(struct dc *dc) s.v_sync_a_pol, s.v_total_max, s.v_total_min, + s.v_total_max_sel, + s.v_total_min_sel, s.h_blank_start, s.h_blank_end, s.h_sync_a_start, @@ -186,10 +326,25 @@ void dcn10_log_hw_state(struct dc *dc) s.h_total, s.v_total, s.underflow_occurred_status); - DTN_INFO("\n"); + + // Clear underflow for debug purposes + // We want to keep underflow sticky bit on for the longevity tests outside of test environment. + // This function is called only from Windows or Diags test environment, hence it's safe to clear + // it from here without affecting the original intent. + tg->funcs->clear_optc_underflow(tg); } DTN_INFO("\n"); + DTN_INFO("\nCALCULATED Clocks: dcfclk_khz:%d dcfclk_deep_sleep_khz:%d dispclk_khz:%d\n" + "dppclk_khz:%d max_supported_dppclk_khz:%d fclk_khz:%d socclk_khz:%d\n\n", + dc->current_state->bw.dcn.calc_clk.dcfclk_khz, + dc->current_state->bw.dcn.calc_clk.dcfclk_deep_sleep_khz, + dc->current_state->bw.dcn.calc_clk.dispclk_khz, + dc->current_state->bw.dcn.calc_clk.dppclk_khz, + dc->current_state->bw.dcn.calc_clk.max_supported_dppclk_khz, + dc->current_state->bw.dcn.calc_clk.fclk_khz, + dc->current_state->bw.dcn.calc_clk.socclk_khz); + log_mpc_crc(dc); DTN_INFO_END(); @@ -354,7 +509,7 @@ static void power_on_plane( struct dce_hwseq *hws, int plane_id) { - struct dc_context *ctx = hws->ctx; + DC_LOGGER_INIT(hws->ctx->logger); if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); @@ -461,7 +616,7 @@ static void false_optc_underflow_wa( tg->funcs->clear_optc_underflow(tg); } -static enum dc_status dcn10_prog_pixclk_crtc_otg( +static enum dc_status dcn10_enable_stream_timing( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc) @@ -553,7 +708,7 @@ static void reset_back_end_for_pipe( struct dc_state *context) { int i; - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(dc->ctx->logger); if (pipe_ctx->stream_res.stream_enc == NULL) { pipe_ctx->stream = NULL; return; @@ -603,6 +758,90 @@ static void reset_back_end_for_pipe( pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst); } +static bool dcn10_hw_wa_force_recovery(struct dc *dc) +{ + struct hubp *hubp ; + unsigned int i; + bool need_recover = true; + + if (!dc->debug.recovery_enabled) + return false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = + &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + if (hubp != NULL) { + if (hubp->funcs->hubp_get_underflow_status(hubp) != 0) { + /* one pipe underflow, we will reset all the pipes*/ + need_recover = true; + } + } + } + } + if (!need_recover) + return false; + /* + DCHUBP_CNTL:HUBP_BLANK_EN=1 + DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1 + DCHUBP_CNTL:HUBP_DISABLE=1 + DCHUBP_CNTL:HUBP_DISABLE=0 + DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0 + DCSURF_PRIMARY_SURFACE_ADDRESS + DCHUBP_CNTL:HUBP_BLANK_EN=0 + */ + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = + &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + /*DCHUBP_CNTL:HUBP_BLANK_EN=1*/ + if (hubp != NULL) + hubp->funcs->set_hubp_blank_en(hubp, true); + } + } + /*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1*/ + hubbub1_soft_reset(dc->res_pool->hubbub, true); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = + &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + /*DCHUBP_CNTL:HUBP_DISABLE=1*/ + if (hubp != NULL) + hubp->funcs->hubp_disable_control(hubp, true); + } + } + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = + &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + /*DCHUBP_CNTL:HUBP_DISABLE=0*/ + if (hubp != NULL) + hubp->funcs->hubp_disable_control(hubp, true); + } + } + /*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0*/ + hubbub1_soft_reset(dc->res_pool->hubbub, false); + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = + &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe_ctx != NULL) { + hubp = pipe_ctx->plane_res.hubp; + /*DCHUBP_CNTL:HUBP_BLANK_EN=0*/ + if (hubp != NULL) + hubp->funcs->set_hubp_blank_en(hubp, true); + } + } + return true; + +} + + static void dcn10_verify_allow_pstate_change_high(struct dc *dc) { static bool should_log_hw_state; /* prevent hw state log by default */ @@ -611,13 +850,17 @@ static void dcn10_verify_allow_pstate_change_high(struct dc *dc) if (should_log_hw_state) { dcn10_log_hw_state(dc); } - BREAK_TO_DEBUGGER(); + if (dcn10_hw_wa_force_recovery(dc)) { + /*check again*/ + if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub)) + BREAK_TO_DEBUGGER(); + } } } /* trigger HW to start disconnect plane from stream on the next vsync */ -static void plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) +void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct hubp *hubp = pipe_ctx->plane_res.hubp; int dpp_id = pipe_ctx->plane_res.dpp->inst; @@ -649,7 +892,7 @@ static void plane_atomic_power_down(struct dc *dc, struct pipe_ctx *pipe_ctx) { struct dce_hwseq *hws = dc->hwseq; struct dpp *dpp = pipe_ctx->plane_res.dpp; - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(dc->ctx->logger); if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, @@ -699,7 +942,7 @@ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) { - struct dc_context *ctx = dc->ctx; + DC_LOGGER_INIT(dc->ctx->logger); if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated) return; @@ -800,7 +1043,7 @@ static void dcn10_init_hw(struct dc *dc) dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true; pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; - plane_atomic_disconnect(dc, pipe_ctx); + hwss1_plane_atomic_disconnect(dc, pipe_ctx); } for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -945,9 +1188,8 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx, tf = plane_state->in_transfer_func; if (plane_state->gamma_correction && - plane_state->gamma_correction->is_identity) - dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS); - else if (plane_state->gamma_correction && dce_use_lut(plane_state->format)) + !plane_state->gamma_correction->is_identity + && dce_use_lut(plane_state->format)) dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction); if (tf == NULL) @@ -1433,7 +1675,7 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx, } } -static void program_output_csc(struct dc *dc, +static void dcn10_program_output_csc(struct dc *dc, struct pipe_ctx *pipe_ctx, enum dc_color_space colorspace, uint16_t *matrix, @@ -1542,22 +1784,22 @@ static uint16_t fixed_point_to_int_frac( uint16_t result; - uint16_t d = (uint16_t)dal_fixed31_32_floor( - dal_fixed31_32_abs( + uint16_t d = (uint16_t)dc_fixpt_floor( + dc_fixpt_abs( arg)); if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor)) - numerator = (uint16_t)dal_fixed31_32_floor( - dal_fixed31_32_mul_int( + numerator = (uint16_t)dc_fixpt_floor( + dc_fixpt_mul_int( arg, divisor)); else { - numerator = dal_fixed31_32_floor( - dal_fixed31_32_sub( - dal_fixed31_32_from_int( + numerator = dc_fixpt_floor( + dc_fixpt_sub( + dc_fixpt_from_int( 1LL << integer_bits), - dal_fixed31_32_recip( - dal_fixed31_32_from_int( + dc_fixpt_recip( + dc_fixpt_from_int( divisor)))); } @@ -1567,8 +1809,8 @@ static uint16_t fixed_point_to_int_frac( result = (uint16_t)( (1 << (integer_bits + fractional_bits + 1)) + numerator); - if ((result != 0) && dal_fixed31_32_lt( - arg, dal_fixed31_32_zero)) + if ((result != 0) && dc_fixpt_lt( + arg, dc_fixpt_zero)) result |= 1 << (integer_bits + fractional_bits); return result; @@ -1582,8 +1824,8 @@ void build_prescale_params(struct dc_bias_and_scale *bias_and_scale, && plane_state->input_csc_color_matrix.enable_adjustment && plane_state->coeff_reduction_factor.value != 0) { bias_and_scale->scale_blue = fixed_point_to_int_frac( - dal_fixed31_32_mul(plane_state->coeff_reduction_factor, - dal_fixed31_32_from_fraction(256, 255)), + dc_fixpt_mul(plane_state->coeff_reduction_factor, + dc_fixpt_from_fraction(256, 255)), 2, 13); bias_and_scale->scale_red = bias_and_scale->scale_blue; @@ -1623,6 +1865,8 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); + + /* TODO: proper fix once fpga works */ if (dc->debug.surface_visual_confirm) @@ -1649,6 +1893,7 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) pipe_ctx->stream->output_color_space) && per_pixel_alpha; + /* * TODO: remove hack * Note: currently there is a bug in init_hw such that @@ -1659,6 +1904,12 @@ static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) */ mpcc_id = hubp->inst; + /* If there is no full update, don't need to touch MPC tree*/ + if (!pipe_ctx->plane_state->update_flags.bits.full_update) { + mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id); + return; + } + /* check if this MPCC is already being used */ new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id); /* remove MPCC if being used */ @@ -1777,7 +2028,7 @@ static void update_dchubp_dpp( /*gamut remap*/ program_gamut_remap(pipe_ctx); - program_output_csc(dc, + dc->hwss.program_output_csc(dc, pipe_ctx, pipe_ctx->stream->output_color_space, pipe_ctx->stream->csc_color_matrix.matrix, @@ -1810,9 +2061,9 @@ static void update_dchubp_dpp( hubp->funcs->set_blank(hubp, false); } -static void dcn10_otg_blank( +static void dcn10_blank_pixel_data( struct dc *dc, - struct stream_resource stream_res, + struct stream_resource *stream_res, struct dc_stream_state *stream, bool blank) { @@ -1823,27 +2074,27 @@ static void dcn10_otg_blank( color_space = stream->output_color_space; color_space_to_black_color(dc, color_space, &black_color); - if (stream_res.tg->funcs->set_blank_color) - stream_res.tg->funcs->set_blank_color( - stream_res.tg, + if (stream_res->tg->funcs->set_blank_color) + stream_res->tg->funcs->set_blank_color( + stream_res->tg, &black_color); if (!blank) { - if (stream_res.tg->funcs->set_blank) - stream_res.tg->funcs->set_blank(stream_res.tg, blank); - if (stream_res.abm) - stream_res.abm->funcs->set_abm_level(stream_res.abm, stream->abm_level); + if (stream_res->tg->funcs->set_blank) + stream_res->tg->funcs->set_blank(stream_res->tg, blank); + if (stream_res->abm) + stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level); } else if (blank) { - if (stream_res.abm) - stream_res.abm->funcs->set_abm_immediate_disable(stream_res.abm); - if (stream_res.tg->funcs->set_blank) - stream_res.tg->funcs->set_blank(stream_res.tg, blank); + if (stream_res->abm) + stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm); + if (stream_res->tg->funcs->set_blank) + stream_res->tg->funcs->set_blank(stream_res->tg, blank); } } static void set_hdr_multiplier(struct pipe_ctx *pipe_ctx) { - struct fixed31_32 multiplier = dal_fixed31_32_from_fraction( + struct fixed31_32 multiplier = dc_fixpt_from_fraction( pipe_ctx->plane_state->sdr_white_level, 80); uint32_t hw_mult = 0x1f000; // 1.0 default multiplier struct custom_float_format fmt; @@ -1876,7 +2127,7 @@ static void program_all_pipe_in_tree( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg); - dcn10_otg_blank(dc, pipe_ctx->stream_res, + dc->hwss.blank_pixel_data(dc, &pipe_ctx->stream_res, pipe_ctx->stream, blank); } @@ -1983,9 +2234,9 @@ static void dcn10_apply_ctx_for_surface( bool removed_pipe[4] = { false }; unsigned int ref_clk_mhz = dc->res_pool->ref_clock_inKhz/1000; bool program_water_mark = false; - struct dc_context *ctx = dc->ctx; struct pipe_ctx *top_pipe_to_program = find_top_pipe_for_stream(dc, context, stream); + DC_LOGGER_INIT(dc->ctx->logger); if (!top_pipe_to_program) return; @@ -1996,7 +2247,7 @@ static void dcn10_apply_ctx_for_surface( if (num_planes == 0) { /* OTG blank before remove all front end */ - dcn10_otg_blank(dc, top_pipe_to_program->stream_res, top_pipe_to_program->stream, true); + dc->hwss.blank_pixel_data(dc, &top_pipe_to_program->stream_res, top_pipe_to_program->stream, true); } /* Disconnect unused mpcc */ @@ -2027,7 +2278,7 @@ static void dcn10_apply_ctx_for_surface( old_pipe_ctx->plane_state && old_pipe_ctx->stream_res.tg == tg) { - plane_atomic_disconnect(dc, old_pipe_ctx); + hwss1_plane_atomic_disconnect(dc, old_pipe_ctx); removed_pipe[i] = true; DC_LOG_DC( @@ -2335,15 +2586,6 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx, set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); } -static void set_plane_config( - const struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct resource_context *res_ctx) -{ - /* TODO */ - program_gamut_remap(pipe_ctx); -} - static void dcn10_config_stereo_parameters( struct dc_stream_state *stream, struct crtc_stereo_flags *flags) { @@ -2521,12 +2763,12 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .init_hw = dcn10_init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dcn10_apply_ctx_for_surface, - .set_plane_config = set_plane_config, .update_plane_addr = dcn10_update_plane_addr, .update_dchub = dcn10_update_dchub, .update_pending_status = dcn10_update_pending_status, .set_input_transfer_func = dcn10_set_input_transfer_func, .set_output_transfer_func = dcn10_set_output_transfer_func, + .program_output_csc = dcn10_program_output_csc, .power_down = dce110_power_down, .enable_accelerated_mode = dce110_enable_accelerated_mode, .enable_timing_synchronization = dcn10_enable_timing_synchronization, @@ -2538,10 +2780,11 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .blank_stream = dce110_blank_stream, .enable_display_power_gating = dcn10_dummy_display_power_gating, .disable_plane = dcn10_disable_plane, + .blank_pixel_data = dcn10_blank_pixel_data, .pipe_control_lock = dcn10_pipe_control_lock, .set_bandwidth = dcn10_set_bandwidth, .reset_hw_ctx_wrap = reset_hw_ctx_wrap, - .prog_pixclk_crtc_otg = dcn10_prog_pixclk_crtc_otg, + .enable_stream_timing = dcn10_enable_stream_timing, .set_drr = set_drr, .get_position = get_position, .set_static_screen_control = set_static_screen_control, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 6c526b5095d9..44f734b73f9e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -37,4 +37,6 @@ extern void fill_display_configs( bool is_rgb_cspace(enum dc_color_space output_color_space); +void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx); + #endif /* __DC_HWSS_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c new file mode 100644 index 000000000000..21fa40ac0786 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -0,0 +1,1362 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "reg_helper.h" + +#include "core_types.h" +#include "link_encoder.h" +#include "dcn10_link_encoder.h" +#include "stream_encoder.h" +#include "i2caux_interface.h" +#include "dc_bios_types.h" + +#include "gpio_service_interface.h" + +#define CTX \ + enc10->base.ctx +#define DC_LOGGER \ + enc10->base.ctx->logger + +#define REG(reg)\ + (enc10->link_regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + enc10->link_shift->field_name, enc10->link_mask->field_name + + +/* + * @brief + * Trigger Source Select + * ASIC-dependent, actual values for register programming + */ +#define DCN10_DIG_FE_SOURCE_SELECT_INVALID 0x0 +#define DCN10_DIG_FE_SOURCE_SELECT_DIGA 0x1 +#define DCN10_DIG_FE_SOURCE_SELECT_DIGB 0x2 +#define DCN10_DIG_FE_SOURCE_SELECT_DIGC 0x4 +#define DCN10_DIG_FE_SOURCE_SELECT_DIGD 0x08 +#define DCN10_DIG_FE_SOURCE_SELECT_DIGE 0x10 +#define DCN10_DIG_FE_SOURCE_SELECT_DIGF 0x20 +#define DCN10_DIG_FE_SOURCE_SELECT_DIGG 0x40 + +enum { + DP_MST_UPDATE_MAX_RETRY = 50 +}; + + + +static void aux_initialize(struct dcn10_link_encoder *enc10); + + +static const struct link_encoder_funcs dcn10_lnk_enc_funcs = { + .validate_output_with_stream = + dcn10_link_encoder_validate_output_with_stream, + .hw_init = dcn10_link_encoder_hw_init, + .setup = dcn10_link_encoder_setup, + .enable_tmds_output = dcn10_link_encoder_enable_tmds_output, + .enable_dp_output = dcn10_link_encoder_enable_dp_output, + .enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output, + .disable_output = dcn10_link_encoder_disable_output, + .dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings, + .dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern, + .update_mst_stream_allocation_table = + dcn10_link_encoder_update_mst_stream_allocation_table, + .psr_program_dp_dphy_fast_training = + dcn10_psr_program_dp_dphy_fast_training, + .psr_program_secondary_packet = dcn10_psr_program_secondary_packet, + .connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe, + .enable_hpd = dcn10_link_encoder_enable_hpd, + .disable_hpd = dcn10_link_encoder_disable_hpd, + .is_dig_enabled = dcn10_is_dig_enabled, + .destroy = dcn10_link_encoder_destroy +}; + +static enum bp_result link_transmitter_control( + struct dcn10_link_encoder *enc10, + struct bp_transmitter_control *cntl) +{ + enum bp_result result; + struct dc_bios *bp = enc10->base.ctx->dc_bios; + + result = bp->funcs->transmitter_control(bp, cntl); + + return result; +} + +static void enable_phy_bypass_mode( + struct dcn10_link_encoder *enc10, + bool enable) +{ + /* This register resides in DP back end block; + * transmitter is used for the offset + */ + REG_UPDATE(DP_DPHY_CNTL, DPHY_BYPASS, enable); + +} + +static void disable_prbs_symbols( + struct dcn10_link_encoder *enc10, + bool disable) +{ + /* This register resides in DP back end block; + * transmitter is used for the offset + */ + REG_UPDATE_4(DP_DPHY_CNTL, + DPHY_ATEST_SEL_LANE0, disable, + DPHY_ATEST_SEL_LANE1, disable, + DPHY_ATEST_SEL_LANE2, disable, + DPHY_ATEST_SEL_LANE3, disable); +} + +static void disable_prbs_mode( + struct dcn10_link_encoder *enc10) +{ + REG_UPDATE(DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, 0); +} + +static void program_pattern_symbols( + struct dcn10_link_encoder *enc10, + uint16_t pattern_symbols[8]) +{ + /* This register resides in DP back end block; + * transmitter is used for the offset + */ + REG_SET_3(DP_DPHY_SYM0, 0, + DPHY_SYM1, pattern_symbols[0], + DPHY_SYM2, pattern_symbols[1], + DPHY_SYM3, pattern_symbols[2]); + + /* This register resides in DP back end block; + * transmitter is used for the offset + */ + REG_SET_3(DP_DPHY_SYM1, 0, + DPHY_SYM4, pattern_symbols[3], + DPHY_SYM5, pattern_symbols[4], + DPHY_SYM6, pattern_symbols[5]); + + /* This register resides in DP back end block; + * transmitter is used for the offset + */ + REG_SET_2(DP_DPHY_SYM2, 0, + DPHY_SYM7, pattern_symbols[6], + DPHY_SYM8, pattern_symbols[7]); +} + +static void set_dp_phy_pattern_d102( + struct dcn10_link_encoder *enc10) +{ + /* Disable PHY Bypass mode to setup the test pattern */ + enable_phy_bypass_mode(enc10, false); + + /* For 10-bit PRBS or debug symbols + * please use the following sequence: + * + * Enable debug symbols on the lanes + */ + disable_prbs_symbols(enc10, true); + + /* Disable PRBS mode */ + disable_prbs_mode(enc10); + + /* Program debug symbols to be output */ + { + uint16_t pattern_symbols[8] = { + 0x2AA, 0x2AA, 0x2AA, 0x2AA, + 0x2AA, 0x2AA, 0x2AA, 0x2AA + }; + + program_pattern_symbols(enc10, pattern_symbols); + } + + /* Enable phy bypass mode to enable the test pattern */ + + enable_phy_bypass_mode(enc10, true); +} + +static void set_link_training_complete( + struct dcn10_link_encoder *enc10, + bool complete) +{ + /* This register resides in DP back end block; + * transmitter is used for the offset + */ + REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, complete); + +} + +void dcn10_link_encoder_set_dp_phy_pattern_training_pattern( + struct link_encoder *enc, + uint32_t index) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + /* Write Training Pattern */ + + REG_WRITE(DP_DPHY_TRAINING_PATTERN_SEL, index); + + /* Set HW Register Training Complete to false */ + + set_link_training_complete(enc10, false); + + /* Disable PHY Bypass mode to output Training Pattern */ + + enable_phy_bypass_mode(enc10, false); + + /* Disable PRBS mode */ + disable_prbs_mode(enc10); +} + +static void setup_panel_mode( + struct dcn10_link_encoder *enc10, + enum dp_panel_mode panel_mode) +{ + uint32_t value; + + ASSERT(REG(DP_DPHY_INTERNAL_CTRL)); + value = REG_READ(DP_DPHY_INTERNAL_CTRL); + + switch (panel_mode) { + case DP_PANEL_MODE_EDP: + value = 0x1; + break; + case DP_PANEL_MODE_SPECIAL: + value = 0x11; + break; + default: + value = 0x0; + break; + } + + REG_WRITE(DP_DPHY_INTERNAL_CTRL, value); +} + +static void set_dp_phy_pattern_symbol_error( + struct dcn10_link_encoder *enc10) +{ + /* Disable PHY Bypass mode to setup the test pattern */ + enable_phy_bypass_mode(enc10, false); + + /* program correct panel mode*/ + setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT); + + /* A PRBS23 pattern is used for most DP electrical measurements. */ + + /* Enable PRBS symbols on the lanes */ + disable_prbs_symbols(enc10, false); + + /* For PRBS23 Set bit DPHY_PRBS_SEL=1 and Set bit DPHY_PRBS_EN=1 */ + REG_UPDATE_2(DP_DPHY_PRBS_CNTL, + DPHY_PRBS_SEL, 1, + DPHY_PRBS_EN, 1); + + /* Enable phy bypass mode to enable the test pattern */ + enable_phy_bypass_mode(enc10, true); +} + +static void set_dp_phy_pattern_prbs7( + struct dcn10_link_encoder *enc10) +{ + /* Disable PHY Bypass mode to setup the test pattern */ + enable_phy_bypass_mode(enc10, false); + + /* A PRBS7 pattern is used for most DP electrical measurements. */ + + /* Enable PRBS symbols on the lanes */ + disable_prbs_symbols(enc10, false); + + /* For PRBS7 Set bit DPHY_PRBS_SEL=0 and Set bit DPHY_PRBS_EN=1 */ + REG_UPDATE_2(DP_DPHY_PRBS_CNTL, + DPHY_PRBS_SEL, 0, + DPHY_PRBS_EN, 1); + + /* Enable phy bypass mode to enable the test pattern */ + enable_phy_bypass_mode(enc10, true); +} + +static void set_dp_phy_pattern_80bit_custom( + struct dcn10_link_encoder *enc10, + const uint8_t *pattern) +{ + /* Disable PHY Bypass mode to setup the test pattern */ + enable_phy_bypass_mode(enc10, false); + + /* Enable debug symbols on the lanes */ + + disable_prbs_symbols(enc10, true); + + /* Enable PHY bypass mode to enable the test pattern */ + /* TODO is it really needed ? */ + + enable_phy_bypass_mode(enc10, true); + + /* Program 80 bit custom pattern */ + { + uint16_t pattern_symbols[8]; + + pattern_symbols[0] = + ((pattern[1] & 0x03) << 8) | pattern[0]; + pattern_symbols[1] = + ((pattern[2] & 0x0f) << 6) | ((pattern[1] >> 2) & 0x3f); + pattern_symbols[2] = + ((pattern[3] & 0x3f) << 4) | ((pattern[2] >> 4) & 0x0f); + pattern_symbols[3] = + (pattern[4] << 2) | ((pattern[3] >> 6) & 0x03); + pattern_symbols[4] = + ((pattern[6] & 0x03) << 8) | pattern[5]; + pattern_symbols[5] = + ((pattern[7] & 0x0f) << 6) | ((pattern[6] >> 2) & 0x3f); + pattern_symbols[6] = + ((pattern[8] & 0x3f) << 4) | ((pattern[7] >> 4) & 0x0f); + pattern_symbols[7] = + (pattern[9] << 2) | ((pattern[8] >> 6) & 0x03); + + program_pattern_symbols(enc10, pattern_symbols); + } + + /* Enable phy bypass mode to enable the test pattern */ + + enable_phy_bypass_mode(enc10, true); +} + +static void set_dp_phy_pattern_hbr2_compliance_cp2520_2( + struct dcn10_link_encoder *enc10, + unsigned int cp2520_pattern) +{ + + /* previously there is a register DP_HBR2_EYE_PATTERN + * that is enabled to get the pattern. + * But it does not work with the latest spec change, + * so we are programming the following registers manually. + * + * The following settings have been confirmed + * by Nick Chorney and Sandra Liu + */ + + /* Disable PHY Bypass mode to setup the test pattern */ + + enable_phy_bypass_mode(enc10, false); + + /* Setup DIG encoder in DP SST mode */ + enc10->base.funcs->setup(&enc10->base, SIGNAL_TYPE_DISPLAY_PORT); + + /* ensure normal panel mode. */ + setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT); + + /* no vbid after BS (SR) + * DP_LINK_FRAMING_CNTL changed history Sandra Liu + * 11000260 / 11000104 / 110000FC + */ + REG_UPDATE_3(DP_LINK_FRAMING_CNTL, + DP_IDLE_BS_INTERVAL, 0xFC, + DP_VBID_DISABLE, 1, + DP_VID_ENHANCED_FRAME_MODE, 1); + + /* swap every BS with SR */ + REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, 0); + + /* select cp2520 patterns */ + if (REG(DP_DPHY_HBR2_PATTERN_CONTROL)) + REG_UPDATE(DP_DPHY_HBR2_PATTERN_CONTROL, + DP_DPHY_HBR2_PATTERN_CONTROL, cp2520_pattern); + else + /* pre-DCE11 can only generate CP2520 pattern 2 */ + ASSERT(cp2520_pattern == 2); + + /* set link training complete */ + set_link_training_complete(enc10, true); + + /* disable video stream */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0); + + /* Disable PHY Bypass mode to setup the test pattern */ + enable_phy_bypass_mode(enc10, false); +} + +static void set_dp_phy_pattern_passthrough_mode( + struct dcn10_link_encoder *enc10, + enum dp_panel_mode panel_mode) +{ + /* program correct panel mode */ + setup_panel_mode(enc10, panel_mode); + + /* restore LINK_FRAMING_CNTL and DPHY_SCRAMBLER_BS_COUNT + * in case we were doing HBR2 compliance pattern before + */ + REG_UPDATE_3(DP_LINK_FRAMING_CNTL, + DP_IDLE_BS_INTERVAL, 0x2000, + DP_VBID_DISABLE, 0, + DP_VID_ENHANCED_FRAME_MODE, 1); + + REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, 0x1FF); + + /* set link training complete */ + set_link_training_complete(enc10, true); + + /* Disable PHY Bypass mode to setup the test pattern */ + enable_phy_bypass_mode(enc10, false); + + /* Disable PRBS mode */ + disable_prbs_mode(enc10); +} + +/* return value is bit-vector */ +static uint8_t get_frontend_source( + enum engine_id engine) +{ + switch (engine) { + case ENGINE_ID_DIGA: + return DCN10_DIG_FE_SOURCE_SELECT_DIGA; + case ENGINE_ID_DIGB: + return DCN10_DIG_FE_SOURCE_SELECT_DIGB; + case ENGINE_ID_DIGC: + return DCN10_DIG_FE_SOURCE_SELECT_DIGC; + case ENGINE_ID_DIGD: + return DCN10_DIG_FE_SOURCE_SELECT_DIGD; + case ENGINE_ID_DIGE: + return DCN10_DIG_FE_SOURCE_SELECT_DIGE; + case ENGINE_ID_DIGF: + return DCN10_DIG_FE_SOURCE_SELECT_DIGF; + case ENGINE_ID_DIGG: + return DCN10_DIG_FE_SOURCE_SELECT_DIGG; + default: + ASSERT_CRITICAL(false); + return DCN10_DIG_FE_SOURCE_SELECT_INVALID; + } +} + +static void configure_encoder( + struct dcn10_link_encoder *enc10, + const struct dc_link_settings *link_settings) +{ + /* set number of lanes */ + + REG_SET(DP_CONFIG, 0, + DP_UDI_LANES, link_settings->lane_count - LANE_COUNT_ONE); + + /* setup scrambler */ + REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, 1); +} + +void dcn10_psr_program_dp_dphy_fast_training(struct link_encoder *enc, + bool exit_link_training_required) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + if (exit_link_training_required) + REG_UPDATE(DP_DPHY_FAST_TRAINING, + DPHY_RX_FAST_TRAINING_CAPABLE, 1); + else { + REG_UPDATE(DP_DPHY_FAST_TRAINING, + DPHY_RX_FAST_TRAINING_CAPABLE, 0); + /*In DCE 11, we are able to pre-program a Force SR register + * to be able to trigger SR symbol after 5 idle patterns + * transmitted. Upon PSR Exit, DMCU can trigger + * DPHY_LOAD_BS_COUNT_START = 1. Upon writing 1 to + * DPHY_LOAD_BS_COUNT_START and the internal counter + * reaches DPHY_LOAD_BS_COUNT, the next BS symbol will be + * replaced by SR symbol once. + */ + + REG_UPDATE(DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, 0x5); + } +} + +void dcn10_psr_program_secondary_packet(struct link_encoder *enc, + unsigned int sdp_transmit_line_num_deadline) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + REG_UPDATE_2(DP_SEC_CNTL1, + DP_SEC_GSP0_LINE_NUM, sdp_transmit_line_num_deadline, + DP_SEC_GSP0_PRIORITY, 1); +} + +bool dcn10_is_dig_enabled(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + uint32_t value; + + REG_GET(DIG_BE_EN_CNTL, DIG_ENABLE, &value); + return value; +} + +static void link_encoder_disable(struct dcn10_link_encoder *enc10) +{ + /* reset training pattern */ + REG_SET(DP_DPHY_TRAINING_PATTERN_SEL, 0, + DPHY_TRAINING_PATTERN_SEL, 0); + + /* reset training complete */ + REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0); + + /* reset panel mode */ + setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT); +} + +static void hpd_initialize( + struct dcn10_link_encoder *enc10) +{ + /* Associate HPD with DIG_BE */ + enum hpd_source_id hpd_source = enc10->base.hpd_source; + + REG_UPDATE(DIG_BE_CNTL, DIG_HPD_SELECT, hpd_source); +} + +bool dcn10_link_encoder_validate_dvi_output( + const struct dcn10_link_encoder *enc10, + enum signal_type connector_signal, + enum signal_type signal, + const struct dc_crtc_timing *crtc_timing) +{ + uint32_t max_pixel_clock = TMDS_MAX_PIXEL_CLOCK; + + if (signal == SIGNAL_TYPE_DVI_DUAL_LINK) + max_pixel_clock *= 2; + + /* This handles the case of HDMI downgrade to DVI we don't want to + * we don't want to cap the pixel clock if the DDI is not DVI. + */ + if (connector_signal != SIGNAL_TYPE_DVI_DUAL_LINK && + connector_signal != SIGNAL_TYPE_DVI_SINGLE_LINK) + max_pixel_clock = enc10->base.features.max_hdmi_pixel_clock; + + /* DVI only support RGB pixel encoding */ + if (crtc_timing->pixel_encoding != PIXEL_ENCODING_RGB) + return false; + + /*connect DVI via adpater's HDMI connector*/ + if ((connector_signal == SIGNAL_TYPE_DVI_SINGLE_LINK || + connector_signal == SIGNAL_TYPE_HDMI_TYPE_A) && + signal != SIGNAL_TYPE_HDMI_TYPE_A && + crtc_timing->pix_clk_khz > TMDS_MAX_PIXEL_CLOCK) + return false; + if (crtc_timing->pix_clk_khz < TMDS_MIN_PIXEL_CLOCK) + return false; + + if (crtc_timing->pix_clk_khz > max_pixel_clock) + return false; + + /* DVI supports 6/8bpp single-link and 10/16bpp dual-link */ + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_666: + case COLOR_DEPTH_888: + break; + case COLOR_DEPTH_101010: + case COLOR_DEPTH_161616: + if (signal != SIGNAL_TYPE_DVI_DUAL_LINK) + return false; + break; + default: + return false; + } + + return true; +} + +static bool dcn10_link_encoder_validate_hdmi_output( + const struct dcn10_link_encoder *enc10, + const struct dc_crtc_timing *crtc_timing, + int adjusted_pix_clk_khz) +{ + enum dc_color_depth max_deep_color = + enc10->base.features.max_hdmi_deep_color; + + if (max_deep_color < crtc_timing->display_color_depth) + return false; + + if (crtc_timing->display_color_depth < COLOR_DEPTH_888) + return false; + if (adjusted_pix_clk_khz < TMDS_MIN_PIXEL_CLOCK) + return false; + + if ((adjusted_pix_clk_khz == 0) || + (adjusted_pix_clk_khz > enc10->base.features.max_hdmi_pixel_clock)) + return false; + + /* DCE11 HW does not support 420 */ + if (!enc10->base.features.ycbcr420_supported && + crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) + return false; + + if (!enc10->base.features.flags.bits.HDMI_6GB_EN && + adjusted_pix_clk_khz >= 300000) + return false; + return true; +} + +bool dcn10_link_encoder_validate_dp_output( + const struct dcn10_link_encoder *enc10, + const struct dc_crtc_timing *crtc_timing) +{ + /* default RGB only */ + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB) + return true; + + if (enc10->base.features.flags.bits.IS_YCBCR_CAPABLE) + return true; + + /* for DCE 8.x or later DP Y-only feature, + * we need ASIC cap + FeatureSupportDPYonly, not support 666 + */ + if (crtc_timing->flags.Y_ONLY && + enc10->base.features.flags.bits.IS_YCBCR_CAPABLE && + crtc_timing->display_color_depth != COLOR_DEPTH_666) + return true; + + return false; +} + +void dcn10_link_encoder_construct( + struct dcn10_link_encoder *enc10, + const struct encoder_init_data *init_data, + const struct encoder_feature_support *enc_features, + const struct dcn10_link_enc_registers *link_regs, + const struct dcn10_link_enc_aux_registers *aux_regs, + const struct dcn10_link_enc_hpd_registers *hpd_regs, + const struct dcn10_link_enc_shift *link_shift, + const struct dcn10_link_enc_mask *link_mask) +{ + struct bp_encoder_cap_info bp_cap_info = {0}; + const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs; + enum bp_result result = BP_RESULT_OK; + + enc10->base.funcs = &dcn10_lnk_enc_funcs; + enc10->base.ctx = init_data->ctx; + enc10->base.id = init_data->encoder; + + enc10->base.hpd_source = init_data->hpd_source; + enc10->base.connector = init_data->connector; + + enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; + + enc10->base.features = *enc_features; + + enc10->base.transmitter = init_data->transmitter; + + /* set the flag to indicate whether driver poll the I2C data pin + * while doing the DP sink detect + */ + +/* if (dal_adapter_service_is_feature_supported(as, + FEATURE_DP_SINK_DETECT_POLL_DATA_PIN)) + enc10->base.features.flags.bits. + DP_SINK_DETECT_POLL_DATA_PIN = true;*/ + + enc10->base.output_signals = + SIGNAL_TYPE_DVI_SINGLE_LINK | + SIGNAL_TYPE_DVI_DUAL_LINK | + SIGNAL_TYPE_LVDS | + SIGNAL_TYPE_DISPLAY_PORT | + SIGNAL_TYPE_DISPLAY_PORT_MST | + SIGNAL_TYPE_EDP | + SIGNAL_TYPE_HDMI_TYPE_A; + + /* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE. + * SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY. + * SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer + * DIG is per UNIPHY and used by SST DP, eDP, HDMI, DVI and LVDS. + * Prefer DIG assignment is decided by board design. + * For DCE 8.0, there are only max 6 UNIPHYs, we assume board design + * and VBIOS will filter out 7 UNIPHY for DCE 8.0. + * By this, adding DIGG should not hurt DCE 8.0. + * This will let DCE 8.1 share DCE 8.0 as much as possible + */ + + enc10->link_regs = link_regs; + enc10->aux_regs = aux_regs; + enc10->hpd_regs = hpd_regs; + enc10->link_shift = link_shift; + enc10->link_mask = link_mask; + + switch (enc10->base.transmitter) { + case TRANSMITTER_UNIPHY_A: + enc10->base.preferred_engine = ENGINE_ID_DIGA; + break; + case TRANSMITTER_UNIPHY_B: + enc10->base.preferred_engine = ENGINE_ID_DIGB; + break; + case TRANSMITTER_UNIPHY_C: + enc10->base.preferred_engine = ENGINE_ID_DIGC; + break; + case TRANSMITTER_UNIPHY_D: + enc10->base.preferred_engine = ENGINE_ID_DIGD; + break; + case TRANSMITTER_UNIPHY_E: + enc10->base.preferred_engine = ENGINE_ID_DIGE; + break; + case TRANSMITTER_UNIPHY_F: + enc10->base.preferred_engine = ENGINE_ID_DIGF; + break; + case TRANSMITTER_UNIPHY_G: + enc10->base.preferred_engine = ENGINE_ID_DIGG; + break; + default: + ASSERT_CRITICAL(false); + enc10->base.preferred_engine = ENGINE_ID_UNKNOWN; + } + + /* default to one to mirror Windows behavior */ + enc10->base.features.flags.bits.HDMI_6GB_EN = 1; + + result = bp_funcs->get_encoder_cap_info(enc10->base.ctx->dc_bios, + enc10->base.id, &bp_cap_info); + + /* Override features with DCE-specific values */ + if (result == BP_RESULT_OK) { + enc10->base.features.flags.bits.IS_HBR2_CAPABLE = + bp_cap_info.DP_HBR2_EN; + enc10->base.features.flags.bits.IS_HBR3_CAPABLE = + bp_cap_info.DP_HBR3_EN; + enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN; + } else { + DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n", + __func__, + result); + } +} + +bool dcn10_link_encoder_validate_output_with_stream( + struct link_encoder *enc, + const struct dc_stream_state *stream) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + bool is_valid; + + switch (stream->signal) { + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + is_valid = dcn10_link_encoder_validate_dvi_output( + enc10, + stream->sink->link->connector_signal, + stream->signal, + &stream->timing); + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + is_valid = dcn10_link_encoder_validate_hdmi_output( + enc10, + &stream->timing, + stream->phy_pix_clk); + break; + case SIGNAL_TYPE_DISPLAY_PORT: + case SIGNAL_TYPE_DISPLAY_PORT_MST: + is_valid = dcn10_link_encoder_validate_dp_output( + enc10, &stream->timing); + break; + case SIGNAL_TYPE_EDP: + is_valid = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? true : false; + break; + case SIGNAL_TYPE_VIRTUAL: + is_valid = true; + break; + default: + is_valid = false; + break; + } + + return is_valid; +} + +void dcn10_link_encoder_hw_init( + struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct bp_transmitter_control cntl = { 0 }; + enum bp_result result; + + cntl.action = TRANSMITTER_CONTROL_INIT; + cntl.engine_id = ENGINE_ID_UNKNOWN; + cntl.transmitter = enc10->base.transmitter; + cntl.connector_obj_id = enc10->base.connector; + cntl.lanes_number = LANE_COUNT_FOUR; + cntl.coherent = false; + cntl.hpd_sel = enc10->base.hpd_source; + + if (enc10->base.connector.id == CONNECTOR_ID_EDP) + cntl.signal = SIGNAL_TYPE_EDP; + + result = link_transmitter_control(enc10, &cntl); + + if (result != BP_RESULT_OK) { + DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", + __func__); + BREAK_TO_DEBUGGER(); + return; + } + + if (enc10->base.connector.id == CONNECTOR_ID_LVDS) { + cntl.action = TRANSMITTER_CONTROL_BACKLIGHT_BRIGHTNESS; + + result = link_transmitter_control(enc10, &cntl); + + ASSERT(result == BP_RESULT_OK); + + } + aux_initialize(enc10); + + /* reinitialize HPD. + * hpd_initialize() will pass DIG_FE id to HW context. + * All other routine within HW context will use fe_engine_offset + * as DIG_FE id even caller pass DIG_FE id. + * So this routine must be called first. + */ + hpd_initialize(enc10); +} + +void dcn10_link_encoder_destroy(struct link_encoder **enc) +{ + kfree(TO_DCN10_LINK_ENC(*enc)); + *enc = NULL; +} + +void dcn10_link_encoder_setup( + struct link_encoder *enc, + enum signal_type signal) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + switch (signal) { + case SIGNAL_TYPE_EDP: + case SIGNAL_TYPE_DISPLAY_PORT: + /* DP SST */ + REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 0); + break; + case SIGNAL_TYPE_LVDS: + /* LVDS */ + REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 1); + break; + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + /* TMDS-DVI */ + REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 2); + break; + case SIGNAL_TYPE_HDMI_TYPE_A: + /* TMDS-HDMI */ + REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 3); + break; + case SIGNAL_TYPE_DISPLAY_PORT_MST: + /* DP MST */ + REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 5); + break; + default: + ASSERT_CRITICAL(false); + /* invalid mode ! */ + break; + } + +} + +/* TODO: still need depth or just pass in adjusted pixel clock? */ +void dcn10_link_encoder_enable_tmds_output( + struct link_encoder *enc, + enum clock_source_id clock_source, + enum dc_color_depth color_depth, + enum signal_type signal, + uint32_t pixel_clock) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct bp_transmitter_control cntl = { 0 }; + enum bp_result result; + + /* Enable the PHY */ + + cntl.action = TRANSMITTER_CONTROL_ENABLE; + cntl.engine_id = enc->preferred_engine; + cntl.transmitter = enc10->base.transmitter; + cntl.pll_id = clock_source; + cntl.signal = signal; + if (cntl.signal == SIGNAL_TYPE_DVI_DUAL_LINK) + cntl.lanes_number = 8; + else + cntl.lanes_number = 4; + + cntl.hpd_sel = enc10->base.hpd_source; + + cntl.pixel_clock = pixel_clock; + cntl.color_depth = color_depth; + + result = link_transmitter_control(enc10, &cntl); + + if (result != BP_RESULT_OK) { + DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", + __func__); + BREAK_TO_DEBUGGER(); + } +} + +/* enables DP PHY output */ +void dcn10_link_encoder_enable_dp_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + enum clock_source_id clock_source) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct bp_transmitter_control cntl = { 0 }; + enum bp_result result; + + /* Enable the PHY */ + + /* number_of_lanes is used for pixel clock adjust, + * but it's not passed to asic_control. + * We need to set number of lanes manually. + */ + configure_encoder(enc10, link_settings); + + cntl.action = TRANSMITTER_CONTROL_ENABLE; + cntl.engine_id = enc->preferred_engine; + cntl.transmitter = enc10->base.transmitter; + cntl.pll_id = clock_source; + cntl.signal = SIGNAL_TYPE_DISPLAY_PORT; + cntl.lanes_number = link_settings->lane_count; + cntl.hpd_sel = enc10->base.hpd_source; + cntl.pixel_clock = link_settings->link_rate + * LINK_RATE_REF_FREQ_IN_KHZ; + /* TODO: check if undefined works */ + cntl.color_depth = COLOR_DEPTH_UNDEFINED; + + result = link_transmitter_control(enc10, &cntl); + + if (result != BP_RESULT_OK) { + DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", + __func__); + BREAK_TO_DEBUGGER(); + } +} + +/* enables DP PHY output in MST mode */ +void dcn10_link_encoder_enable_dp_mst_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + enum clock_source_id clock_source) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct bp_transmitter_control cntl = { 0 }; + enum bp_result result; + + /* Enable the PHY */ + + /* number_of_lanes is used for pixel clock adjust, + * but it's not passed to asic_control. + * We need to set number of lanes manually. + */ + configure_encoder(enc10, link_settings); + + cntl.action = TRANSMITTER_CONTROL_ENABLE; + cntl.engine_id = ENGINE_ID_UNKNOWN; + cntl.transmitter = enc10->base.transmitter; + cntl.pll_id = clock_source; + cntl.signal = SIGNAL_TYPE_DISPLAY_PORT_MST; + cntl.lanes_number = link_settings->lane_count; + cntl.hpd_sel = enc10->base.hpd_source; + cntl.pixel_clock = link_settings->link_rate + * LINK_RATE_REF_FREQ_IN_KHZ; + /* TODO: check if undefined works */ + cntl.color_depth = COLOR_DEPTH_UNDEFINED; + + result = link_transmitter_control(enc10, &cntl); + + if (result != BP_RESULT_OK) { + DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", + __func__); + BREAK_TO_DEBUGGER(); + } +} +/* + * @brief + * Disable transmitter and its encoder + */ +void dcn10_link_encoder_disable_output( + struct link_encoder *enc, + enum signal_type signal) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + struct bp_transmitter_control cntl = { 0 }; + enum bp_result result; + + if (!dcn10_is_dig_enabled(enc)) { + /* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */ + return; + } + /* Power-down RX and disable GPU PHY should be paired. + * Disabling PHY without powering down RX may cause + * symbol lock loss, on which we will get DP Sink interrupt. + */ + + /* There is a case for the DP active dongles + * where we want to disable the PHY but keep RX powered, + * for those we need to ignore DP Sink interrupt + * by checking lane count that has been set + * on the last do_enable_output(). + */ + + /* disable transmitter */ + cntl.action = TRANSMITTER_CONTROL_DISABLE; + cntl.transmitter = enc10->base.transmitter; + cntl.hpd_sel = enc10->base.hpd_source; + cntl.signal = signal; + cntl.connector_obj_id = enc10->base.connector; + + result = link_transmitter_control(enc10, &cntl); + + if (result != BP_RESULT_OK) { + DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n", + __func__); + BREAK_TO_DEBUGGER(); + return; + } + + /* disable encoder */ + if (dc_is_dp_signal(signal)) + link_encoder_disable(enc10); +} + +void dcn10_link_encoder_dp_set_lane_settings( + struct link_encoder *enc, + const struct link_training_settings *link_settings) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + union dpcd_training_lane_set training_lane_set = { { 0 } }; + int32_t lane = 0; + struct bp_transmitter_control cntl = { 0 }; + + if (!link_settings) { + BREAK_TO_DEBUGGER(); + return; + } + + cntl.action = TRANSMITTER_CONTROL_SET_VOLTAGE_AND_PREEMPASIS; + cntl.transmitter = enc10->base.transmitter; + cntl.connector_obj_id = enc10->base.connector; + cntl.lanes_number = link_settings->link_settings.lane_count; + cntl.hpd_sel = enc10->base.hpd_source; + cntl.pixel_clock = link_settings->link_settings.link_rate * + LINK_RATE_REF_FREQ_IN_KHZ; + + for (lane = 0; lane < link_settings->link_settings.lane_count; lane++) { + /* translate lane settings */ + + training_lane_set.bits.VOLTAGE_SWING_SET = + link_settings->lane_settings[lane].VOLTAGE_SWING; + training_lane_set.bits.PRE_EMPHASIS_SET = + link_settings->lane_settings[lane].PRE_EMPHASIS; + + /* post cursor 2 setting only applies to HBR2 link rate */ + if (link_settings->link_settings.link_rate == LINK_RATE_HIGH2) { + /* this is passed to VBIOS + * to program post cursor 2 level + */ + training_lane_set.bits.POST_CURSOR2_SET = + link_settings->lane_settings[lane].POST_CURSOR2; + } + + cntl.lane_select = lane; + cntl.lane_settings = training_lane_set.raw; + + /* call VBIOS table to set voltage swing and pre-emphasis */ + link_transmitter_control(enc10, &cntl); + } +} + +/* set DP PHY test and training patterns */ +void dcn10_link_encoder_dp_set_phy_pattern( + struct link_encoder *enc, + const struct encoder_set_dp_phy_pattern_param *param) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + switch (param->dp_phy_pattern) { + case DP_TEST_PATTERN_TRAINING_PATTERN1: + dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 0); + break; + case DP_TEST_PATTERN_TRAINING_PATTERN2: + dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 1); + break; + case DP_TEST_PATTERN_TRAINING_PATTERN3: + dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 2); + break; + case DP_TEST_PATTERN_TRAINING_PATTERN4: + dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 3); + break; + case DP_TEST_PATTERN_D102: + set_dp_phy_pattern_d102(enc10); + break; + case DP_TEST_PATTERN_SYMBOL_ERROR: + set_dp_phy_pattern_symbol_error(enc10); + break; + case DP_TEST_PATTERN_PRBS7: + set_dp_phy_pattern_prbs7(enc10); + break; + case DP_TEST_PATTERN_80BIT_CUSTOM: + set_dp_phy_pattern_80bit_custom( + enc10, param->custom_pattern); + break; + case DP_TEST_PATTERN_CP2520_1: + set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 1); + break; + case DP_TEST_PATTERN_CP2520_2: + set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 2); + break; + case DP_TEST_PATTERN_CP2520_3: + set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 3); + break; + case DP_TEST_PATTERN_VIDEO_MODE: { + set_dp_phy_pattern_passthrough_mode( + enc10, param->dp_panel_mode); + break; + } + + default: + /* invalid phy pattern */ + ASSERT_CRITICAL(false); + break; + } +} + +static void fill_stream_allocation_row_info( + const struct link_mst_stream_allocation *stream_allocation, + uint32_t *src, + uint32_t *slots) +{ + const struct stream_encoder *stream_enc = stream_allocation->stream_enc; + + if (stream_enc) { + *src = stream_enc->id; + *slots = stream_allocation->slot_count; + } else { + *src = 0; + *slots = 0; + } +} + +/* programs DP MST VC payload allocation */ +void dcn10_link_encoder_update_mst_stream_allocation_table( + struct link_encoder *enc, + const struct link_mst_stream_allocation_table *table) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + uint32_t value0 = 0; + uint32_t value1 = 0; + uint32_t value2 = 0; + uint32_t slots = 0; + uint32_t src = 0; + uint32_t retries = 0; + + /* For CZ, there are only 3 pipes. So Virtual channel is up 3.*/ + + /* --- Set MSE Stream Attribute - + * Setup VC Payload Table on Tx Side, + * Issue allocation change trigger + * to commit payload on both tx and rx side + */ + + /* we should clean-up table each time */ + + if (table->stream_count >= 1) { + fill_stream_allocation_row_info( + &table->stream_allocations[0], + &src, + &slots); + } else { + src = 0; + slots = 0; + } + + REG_UPDATE_2(DP_MSE_SAT0, + DP_MSE_SAT_SRC0, src, + DP_MSE_SAT_SLOT_COUNT0, slots); + + if (table->stream_count >= 2) { + fill_stream_allocation_row_info( + &table->stream_allocations[1], + &src, + &slots); + } else { + src = 0; + slots = 0; + } + + REG_UPDATE_2(DP_MSE_SAT0, + DP_MSE_SAT_SRC1, src, + DP_MSE_SAT_SLOT_COUNT1, slots); + + if (table->stream_count >= 3) { + fill_stream_allocation_row_info( + &table->stream_allocations[2], + &src, + &slots); + } else { + src = 0; + slots = 0; + } + + REG_UPDATE_2(DP_MSE_SAT1, + DP_MSE_SAT_SRC2, src, + DP_MSE_SAT_SLOT_COUNT2, slots); + + if (table->stream_count >= 4) { + fill_stream_allocation_row_info( + &table->stream_allocations[3], + &src, + &slots); + } else { + src = 0; + slots = 0; + } + + REG_UPDATE_2(DP_MSE_SAT1, + DP_MSE_SAT_SRC3, src, + DP_MSE_SAT_SLOT_COUNT3, slots); + + /* --- wait for transaction finish */ + + /* send allocation change trigger (ACT) ? + * this step first sends the ACT, + * then double buffers the SAT into the hardware + * making the new allocation active on the DP MST mode link + */ + + /* DP_MSE_SAT_UPDATE: + * 0 - No Action + * 1 - Update SAT with trigger + * 2 - Update SAT without trigger + */ + REG_UPDATE(DP_MSE_SAT_UPDATE, + DP_MSE_SAT_UPDATE, 1); + + /* wait for update to complete + * (i.e. DP_MSE_SAT_UPDATE field is reset to 0) + * then wait for the transmission + * of at least 16 MTP headers on immediate local link. + * i.e. DP_MSE_16_MTP_KEEPOUT field (read only) is reset to 0 + * a value of 1 indicates that DP MST mode + * is in the 16 MTP keepout region after a VC has been added. + * MST stream bandwidth (VC rate) can be configured + * after this bit is cleared + */ + do { + udelay(10); + + value0 = REG_READ(DP_MSE_SAT_UPDATE); + + REG_GET(DP_MSE_SAT_UPDATE, + DP_MSE_SAT_UPDATE, &value1); + + REG_GET(DP_MSE_SAT_UPDATE, + DP_MSE_16_MTP_KEEPOUT, &value2); + + /* bit field DP_MSE_SAT_UPDATE is set to 1 already */ + if (!value1 && !value2) + break; + ++retries; + } while (retries < DP_MST_UPDATE_MAX_RETRY); +} + +void dcn10_link_encoder_connect_dig_be_to_fe( + struct link_encoder *enc, + enum engine_id engine, + bool connect) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + uint32_t field; + + if (engine != ENGINE_ID_UNKNOWN) { + + REG_GET(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, &field); + + if (connect) + field |= get_frontend_source(engine); + else + field &= ~get_frontend_source(engine); + + REG_UPDATE(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, field); + } +} + + +#define HPD_REG(reg)\ + (enc10->hpd_regs->reg) + +#define HPD_REG_READ(reg_name) \ + dm_read_reg(CTX, HPD_REG(reg_name)) + +#define HPD_REG_UPDATE_N(reg_name, n, ...) \ + generic_reg_update_ex(CTX, \ + HPD_REG(reg_name), \ + HPD_REG_READ(reg_name), \ + n, __VA_ARGS__) + +#define HPD_REG_UPDATE(reg_name, field, val) \ + HPD_REG_UPDATE_N(reg_name, 1, \ + FN(reg_name, field), val) + +void dcn10_link_encoder_enable_hpd(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + HPD_REG_UPDATE(DC_HPD_CONTROL, + DC_HPD_EN, 1); +} + +void dcn10_link_encoder_disable_hpd(struct link_encoder *enc) +{ + struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc); + + HPD_REG_UPDATE(DC_HPD_CONTROL, + DC_HPD_EN, 0); +} + + +#define AUX_REG(reg)\ + (enc10->aux_regs->reg) + +#define AUX_REG_READ(reg_name) \ + dm_read_reg(CTX, AUX_REG(reg_name)) + +#define AUX_REG_UPDATE_N(reg_name, n, ...) \ + generic_reg_update_ex(CTX, \ + AUX_REG(reg_name), \ + AUX_REG_READ(reg_name), \ + n, __VA_ARGS__) + +#define AUX_REG_UPDATE(reg_name, field, val) \ + AUX_REG_UPDATE_N(reg_name, 1, \ + FN(reg_name, field), val) + +#define AUX_REG_UPDATE_2(reg, f1, v1, f2, v2) \ + AUX_REG_UPDATE_N(reg, 2,\ + FN(reg, f1), v1,\ + FN(reg, f2), v2) + +static void aux_initialize( + struct dcn10_link_encoder *enc10) +{ + enum hpd_source_id hpd_source = enc10->base.hpd_source; + + AUX_REG_UPDATE_2(AUX_CONTROL, + AUX_HPD_SEL, hpd_source, + AUX_LS_READ_EN, 0); + + /* 1/4 window (the maximum allowed) */ + AUX_REG_UPDATE(AUX_DPHY_RX_CONTROL0, + AUX_RX_RECEIVE_WINDOW, 1); +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h new file mode 100644 index 000000000000..2a97cdb2cfbb --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h @@ -0,0 +1,330 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_LINK_ENCODER__DCN10_H__ +#define __DC_LINK_ENCODER__DCN10_H__ + +#include "link_encoder.h" + +#define TO_DCN10_LINK_ENC(link_encoder)\ + container_of(link_encoder, struct dcn10_link_encoder, base) + + +#define AUX_REG_LIST(id)\ + SRI(AUX_CONTROL, DP_AUX, id), \ + SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id) + +#define HPD_REG_LIST(id)\ + SRI(DC_HPD_CONTROL, HPD, id) + +#define LE_DCN_COMMON_REG_LIST(id) \ + SRI(DIG_BE_CNTL, DIG, id), \ + SRI(DIG_BE_EN_CNTL, DIG, id), \ + SRI(DP_CONFIG, DP, id), \ + SRI(DP_DPHY_CNTL, DP, id), \ + SRI(DP_DPHY_PRBS_CNTL, DP, id), \ + SRI(DP_DPHY_SCRAM_CNTL, DP, id),\ + SRI(DP_DPHY_SYM0, DP, id), \ + SRI(DP_DPHY_SYM1, DP, id), \ + SRI(DP_DPHY_SYM2, DP, id), \ + SRI(DP_DPHY_TRAINING_PATTERN_SEL, DP, id), \ + SRI(DP_LINK_CNTL, DP, id), \ + SRI(DP_LINK_FRAMING_CNTL, DP, id), \ + SRI(DP_MSE_SAT0, DP, id), \ + SRI(DP_MSE_SAT1, DP, id), \ + SRI(DP_MSE_SAT2, DP, id), \ + SRI(DP_MSE_SAT_UPDATE, DP, id), \ + SRI(DP_SEC_CNTL, DP, id), \ + SRI(DP_VID_STREAM_CNTL, DP, id), \ + SRI(DP_DPHY_FAST_TRAINING, DP, id), \ + SRI(DP_SEC_CNTL1, DP, id), \ + SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \ + SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \ + SRI(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id) + +#define LE_DCN10_REG_LIST(id)\ + LE_DCN_COMMON_REG_LIST(id) + +struct dcn10_link_enc_aux_registers { + uint32_t AUX_CONTROL; + uint32_t AUX_DPHY_RX_CONTROL0; +}; + +struct dcn10_link_enc_hpd_registers { + uint32_t DC_HPD_CONTROL; +}; + +struct dcn10_link_enc_registers { + uint32_t DIG_BE_CNTL; + uint32_t DIG_BE_EN_CNTL; + uint32_t DP_CONFIG; + uint32_t DP_DPHY_CNTL; + uint32_t DP_DPHY_INTERNAL_CTRL; + uint32_t DP_DPHY_PRBS_CNTL; + uint32_t DP_DPHY_SCRAM_CNTL; + uint32_t DP_DPHY_SYM0; + uint32_t DP_DPHY_SYM1; + uint32_t DP_DPHY_SYM2; + uint32_t DP_DPHY_TRAINING_PATTERN_SEL; + uint32_t DP_LINK_CNTL; + uint32_t DP_LINK_FRAMING_CNTL; + uint32_t DP_MSE_SAT0; + uint32_t DP_MSE_SAT1; + uint32_t DP_MSE_SAT2; + uint32_t DP_MSE_SAT_UPDATE; + uint32_t DP_SEC_CNTL; + uint32_t DP_VID_STREAM_CNTL; + uint32_t DP_DPHY_FAST_TRAINING; + uint32_t DP_DPHY_BS_SR_SWAP_CNTL; + uint32_t DP_DPHY_HBR2_PATTERN_CONTROL; + uint32_t DP_SEC_CNTL1; +}; + +#define LE_SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define LINK_ENCODER_MASK_SH_LIST_DCN10(mask_sh)\ + LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_ENABLE, mask_sh),\ + LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\ + LE_SF(DIG0_DIG_BE_CNTL, DIG_MODE, mask_sh),\ + LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\ + LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\ + LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\ + LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\ + LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\ + LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\ + LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\ + LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\ + LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\ + LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\ + LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\ + LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\ + LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\ + LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\ + LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\ + LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ + LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\ + LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\ + LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\ + LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\ + LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\ + LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\ + LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\ + LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh) + +#define DCN_LINK_ENCODER_REG_FIELD_LIST(type) \ + type DIG_ENABLE;\ + type DIG_HPD_SELECT;\ + type DIG_MODE;\ + type DIG_FE_SOURCE_SELECT;\ + type DPHY_BYPASS;\ + type DPHY_ATEST_SEL_LANE0;\ + type DPHY_ATEST_SEL_LANE1;\ + type DPHY_ATEST_SEL_LANE2;\ + type DPHY_ATEST_SEL_LANE3;\ + type DPHY_PRBS_EN;\ + type DPHY_PRBS_SEL;\ + type DPHY_SYM1;\ + type DPHY_SYM2;\ + type DPHY_SYM3;\ + type DPHY_SYM4;\ + type DPHY_SYM5;\ + type DPHY_SYM6;\ + type DPHY_SYM7;\ + type DPHY_SYM8;\ + type DPHY_SCRAMBLER_BS_COUNT;\ + type DPHY_SCRAMBLER_ADVANCE;\ + type DPHY_RX_FAST_TRAINING_CAPABLE;\ + type DPHY_LOAD_BS_COUNT;\ + type DPHY_TRAINING_PATTERN_SEL;\ + type DP_DPHY_HBR2_PATTERN_CONTROL;\ + type DP_LINK_TRAINING_COMPLETE;\ + type DP_IDLE_BS_INTERVAL;\ + type DP_VBID_DISABLE;\ + type DP_VID_ENHANCED_FRAME_MODE;\ + type DP_VID_STREAM_ENABLE;\ + type DP_UDI_LANES;\ + type DP_SEC_GSP0_LINE_NUM;\ + type DP_SEC_GSP0_PRIORITY;\ + type DP_MSE_SAT_SRC0;\ + type DP_MSE_SAT_SRC1;\ + type DP_MSE_SAT_SRC2;\ + type DP_MSE_SAT_SRC3;\ + type DP_MSE_SAT_SLOT_COUNT0;\ + type DP_MSE_SAT_SLOT_COUNT1;\ + type DP_MSE_SAT_SLOT_COUNT2;\ + type DP_MSE_SAT_SLOT_COUNT3;\ + type DP_MSE_SAT_UPDATE;\ + type DP_MSE_16_MTP_KEEPOUT;\ + type AUX_HPD_SEL;\ + type AUX_LS_READ_EN;\ + type AUX_RX_RECEIVE_WINDOW;\ + type DC_HPD_EN + +struct dcn10_link_enc_shift { + DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t); +}; + +struct dcn10_link_enc_mask { + DCN_LINK_ENCODER_REG_FIELD_LIST(uint32_t); +}; + +struct dcn10_link_encoder { + struct link_encoder base; + const struct dcn10_link_enc_registers *link_regs; + const struct dcn10_link_enc_aux_registers *aux_regs; + const struct dcn10_link_enc_hpd_registers *hpd_regs; + const struct dcn10_link_enc_shift *link_shift; + const struct dcn10_link_enc_mask *link_mask; +}; + + +void dcn10_link_encoder_construct( + struct dcn10_link_encoder *enc10, + const struct encoder_init_data *init_data, + const struct encoder_feature_support *enc_features, + const struct dcn10_link_enc_registers *link_regs, + const struct dcn10_link_enc_aux_registers *aux_regs, + const struct dcn10_link_enc_hpd_registers *hpd_regs, + const struct dcn10_link_enc_shift *link_shift, + const struct dcn10_link_enc_mask *link_mask); + +bool dcn10_link_encoder_validate_dvi_output( + const struct dcn10_link_encoder *enc10, + enum signal_type connector_signal, + enum signal_type signal, + const struct dc_crtc_timing *crtc_timing); + +bool dcn10_link_encoder_validate_rgb_output( + const struct dcn10_link_encoder *enc10, + const struct dc_crtc_timing *crtc_timing); + +bool dcn10_link_encoder_validate_dp_output( + const struct dcn10_link_encoder *enc10, + const struct dc_crtc_timing *crtc_timing); + +bool dcn10_link_encoder_validate_wireless_output( + const struct dcn10_link_encoder *enc10, + const struct dc_crtc_timing *crtc_timing); + +bool dcn10_link_encoder_validate_output_with_stream( + struct link_encoder *enc, + const struct dc_stream_state *stream); + +/****************** HW programming ************************/ + +/* initialize HW */ /* why do we initialze aux in here? */ +void dcn10_link_encoder_hw_init(struct link_encoder *enc); + +void dcn10_link_encoder_destroy(struct link_encoder **enc); + +/* program DIG_MODE in DIG_BE */ +/* TODO can this be combined with enable_output? */ +void dcn10_link_encoder_setup( + struct link_encoder *enc, + enum signal_type signal); + +/* enables TMDS PHY output */ +/* TODO: still need depth or just pass in adjusted pixel clock? */ +void dcn10_link_encoder_enable_tmds_output( + struct link_encoder *enc, + enum clock_source_id clock_source, + enum dc_color_depth color_depth, + enum signal_type signal, + uint32_t pixel_clock); + +/* enables DP PHY output */ +void dcn10_link_encoder_enable_dp_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + enum clock_source_id clock_source); + +/* enables DP PHY output in MST mode */ +void dcn10_link_encoder_enable_dp_mst_output( + struct link_encoder *enc, + const struct dc_link_settings *link_settings, + enum clock_source_id clock_source); + +/* disable PHY output */ +void dcn10_link_encoder_disable_output( + struct link_encoder *enc, + enum signal_type signal); + +/* set DP lane settings */ +void dcn10_link_encoder_dp_set_lane_settings( + struct link_encoder *enc, + const struct link_training_settings *link_settings); + +void dcn10_link_encoder_dp_set_phy_pattern( + struct link_encoder *enc, + const struct encoder_set_dp_phy_pattern_param *param); + +/* programs DP MST VC payload allocation */ +void dcn10_link_encoder_update_mst_stream_allocation_table( + struct link_encoder *enc, + const struct link_mst_stream_allocation_table *table); + +void dcn10_link_encoder_connect_dig_be_to_fe( + struct link_encoder *enc, + enum engine_id engine, + bool connect); + +void dcn10_link_encoder_set_dp_phy_pattern_training_pattern( + struct link_encoder *enc, + uint32_t index); + +void dcn10_link_encoder_enable_hpd(struct link_encoder *enc); + +void dcn10_link_encoder_disable_hpd(struct link_encoder *enc); + +void dcn10_psr_program_dp_dphy_fast_training(struct link_encoder *enc, + bool exit_link_training_required); + +void dcn10_psr_program_secondary_packet(struct link_encoder *enc, + unsigned int sdp_transmit_line_num_deadline); + +bool dcn10_is_dig_enabled(struct link_encoder *enc); + +#endif /* __DC_LINK_ENCODER__DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 179890b1a8c4..9ca51ae46de7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -65,6 +65,7 @@ static void mpc1_update_blending( int mpcc_id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + struct mpcc *mpcc = mpc1_get_mpcc(mpc, mpcc_id); REG_UPDATE_5(MPCC_CONTROL[mpcc_id], MPCC_ALPHA_BLND_MODE, blnd_cfg->alpha_mode, @@ -74,6 +75,7 @@ static void mpc1_update_blending( MPCC_GLOBAL_GAIN, blnd_cfg->global_gain); mpc1_set_bg_color(mpc, &blnd_cfg->black_color, mpcc_id); + mpcc->blnd_cfg = *blnd_cfg; } void mpc1_update_stereo_mix( @@ -235,8 +237,7 @@ struct mpcc *mpc1_insert_plane( } /* update the blending configuration */ - new_mpcc->blnd_cfg = *blnd_cfg; - mpc->funcs->update_blending(mpc, &new_mpcc->blnd_cfg, mpcc_id); + mpc->funcs->update_blending(mpc, blnd_cfg, mpcc_id); /* update the stereo mix settings, if provided */ if (sm_cfg != NULL) { @@ -409,7 +410,26 @@ void mpc1_init_mpcc_list_from_hw( } } +void mpc1_read_mpcc_state( + struct mpc *mpc, + int mpcc_inst, + struct mpcc_state *s) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + + REG_GET(MPCC_OPP_ID[mpcc_inst], MPCC_OPP_ID, &s->opp_id); + REG_GET(MPCC_TOP_SEL[mpcc_inst], MPCC_TOP_SEL, &s->dpp_id); + REG_GET(MPCC_BOT_SEL[mpcc_inst], MPCC_BOT_SEL, &s->bot_mpcc_id); + REG_GET_4(MPCC_CONTROL[mpcc_inst], MPCC_MODE, &s->mode, + MPCC_ALPHA_BLND_MODE, &s->alpha_mode, + MPCC_ALPHA_MULTIPLIED_MODE, &s->pre_multiplied_alpha, + MPCC_BLND_ACTIVE_OVERLAP_ONLY, &s->overlap_only); + REG_GET_2(MPCC_STATUS[mpcc_inst], MPCC_IDLE, &s->idle, + MPCC_BUSY, &s->busy); +} + const struct mpc_funcs dcn10_mpc_funcs = { + .read_mpcc_state = mpc1_read_mpcc_state, .insert_plane = mpc1_insert_plane, .remove_mpcc = mpc1_remove_mpcc, .mpc_init = mpc1_mpc_init, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h index 267a2995ef6e..d3d16c4cbea3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h @@ -183,4 +183,9 @@ struct mpcc *mpc1_get_mpcc_for_dpp( struct mpc_tree *tree, int dpp_id); +void mpc1_read_mpcc_state( + struct mpc *mpc, + int mpcc_inst, + struct mpcc_state *s); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 4bf64d1b2c60..f2fbce0e3fc5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -93,6 +93,81 @@ static void optc1_disable_stereo(struct timing_generator *optc) OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); } +static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) +{ + struct dc_crtc_timing patched_crtc_timing; + int vesa_sync_start; + int asic_blank_end; + int interlace_factor; + int vertical_line_start; + + patched_crtc_timing = *dc_crtc_timing; + optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); + + vesa_sync_start = patched_crtc_timing.h_addressable + + patched_crtc_timing.h_border_right + + patched_crtc_timing.h_front_porch; + + asic_blank_end = patched_crtc_timing.h_total - + vesa_sync_start - + patched_crtc_timing.h_border_left; + + interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1; + + vesa_sync_start = patched_crtc_timing.v_addressable + + patched_crtc_timing.v_border_bottom + + patched_crtc_timing.v_front_porch; + + asic_blank_end = (patched_crtc_timing.v_total - + vesa_sync_start - + patched_crtc_timing.v_border_top) + * interlace_factor; + + vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; + if (vertical_line_start < 0) { + ASSERT(0); + vertical_line_start = 0; + } + + return vertical_line_start; +} + +void optc1_program_vline_interrupt( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + unsigned long long vsync_delta) +{ + + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000); + unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_khz + 99), 100); + uint32_t req_delta_lines = (uint32_t) div64_u64( + (req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1), + dc_crtc_timing->h_total); + + uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing); + uint32_t start_line = 0; + uint32_t endLine = 0; + + if (req_delta_lines != 0) + req_delta_lines--; + + if (req_delta_lines > vsync_line) + start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) - 1; + else + start_line = vsync_line - req_delta_lines; + + endLine = start_line + 2; + + if (endLine >= dc_crtc_timing->v_total) + endLine = 2; + + REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, + OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, + OTG_VERTICAL_INTERRUPT0_LINE_END, endLine); +} + /** * program_timing_generator used by mode timing set * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition. @@ -285,7 +360,7 @@ void optc1_program_timing( } -static void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) +void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -780,17 +855,17 @@ void optc1_set_drr( OTG_SET_V_TOTAL_MIN_MASK_EN, 0, OTG_SET_V_TOTAL_MIN_MASK, 0); } else { - REG_SET(OTG_V_TOTAL_MIN, 0, - OTG_V_TOTAL_MIN, 0); - - REG_SET(OTG_V_TOTAL_MAX, 0, - OTG_V_TOTAL_MAX, 0); - REG_UPDATE_4(OTG_V_TOTAL_CONTROL, OTG_SET_V_TOTAL_MIN_MASK, 0, OTG_V_TOTAL_MIN_SEL, 0, OTG_V_TOTAL_MAX_SEL, 0, OTG_FORCE_LOCK_ON_EVENT, 0); + + REG_SET(OTG_V_TOTAL_MIN, 0, + OTG_V_TOTAL_MIN, 0); + + REG_SET(OTG_V_TOTAL_MAX, 0, + OTG_V_TOTAL_MAX, 0); } } @@ -1154,6 +1229,12 @@ void optc1_read_otg_state(struct optc *optc1, REG_GET(OTG_V_TOTAL_MIN, OTG_V_TOTAL_MIN, &s->v_total_min); + REG_GET(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MAX_SEL, &s->v_total_max_sel); + + REG_GET(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, &s->v_total_min_sel); + REG_GET_2(OTG_V_SYNC_A, OTG_V_SYNC_A_START, &s->v_sync_a_start, OTG_V_SYNC_A_END, &s->v_sync_a_end); @@ -1176,20 +1257,20 @@ void optc1_read_otg_state(struct optc *optc1, OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status); } -static void optc1_clear_optc_underflow(struct timing_generator *optc) +void optc1_clear_optc_underflow(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_UPDATE(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, 1); } -static void optc1_tg_init(struct timing_generator *optc) +void optc1_tg_init(struct timing_generator *optc) { optc1_set_blank_data_double_buffer(optc, true); optc1_clear_optc_underflow(optc); } -static bool optc1_is_tg_enabled(struct timing_generator *optc) +bool optc1_is_tg_enabled(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t otg_enabled = 0; @@ -1200,7 +1281,7 @@ static bool optc1_is_tg_enabled(struct timing_generator *optc) } -static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc) +bool optc1_is_optc_underflow_occurred(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t underflow_occurred = 0; @@ -1215,6 +1296,7 @@ static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc) static const struct timing_generator_funcs dcn10_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, + .program_vline_interrupt = optc1_program_vline_interrupt, .program_global_sync = optc1_program_global_sync, .enable_crtc = optc1_enable_crtc, .disable_crtc = optc1_disable_crtc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index d25e7bf0d0d7..c62052f46460 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -65,6 +65,8 @@ SRI(OTG_NOM_VERT_POSITION, OTG, inst),\ SRI(OTG_BLACK_COLOR, OTG, inst),\ SRI(OTG_CLOCK_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\ + SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\ SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\ SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ @@ -124,6 +126,8 @@ struct dcn_optc_registers { uint32_t OTG_TEST_PATTERN_CONTROL; uint32_t OTG_TEST_PATTERN_COLOR; uint32_t OTG_CLOCK_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL; + uint32_t OTG_VERTICAL_INTERRUPT0_POSITION; uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL; uint32_t OTG_VERTICAL_INTERRUPT2_POSITION; uint32_t OPTC_INPUT_CLOCK_CONTROL; @@ -206,6 +210,9 @@ struct dcn_optc_registers { SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\ SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\ SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\ + SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\ SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\ SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\ @@ -323,6 +330,9 @@ struct dcn_optc_registers { type OTG_CLOCK_EN;\ type OTG_CLOCK_ON;\ type OTG_CLOCK_GATE_DIS;\ + type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\ + type OTG_VERTICAL_INTERRUPT0_LINE_START;\ + type OTG_VERTICAL_INTERRUPT0_LINE_END;\ type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\ type OTG_VERTICAL_INTERRUPT2_LINE_START;\ type OPTC_INPUT_CLK_EN;\ @@ -396,6 +406,8 @@ struct dcn_otg_state { uint32_t v_total; uint32_t v_total_max; uint32_t v_total_min; + uint32_t v_total_min_sel; + uint32_t v_total_max_sel; uint32_t v_sync_a_start; uint32_t v_sync_a_end; uint32_t h_blank_start; @@ -420,6 +432,10 @@ void optc1_program_timing( const struct dc_crtc_timing *dc_crtc_timing, bool use_vbios); +void optc1_program_vline_interrupt(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + unsigned long long vsync_delta); + void optc1_program_global_sync( struct timing_generator *optc); @@ -481,4 +497,14 @@ void optc1_program_stereo(struct timing_generator *optc, bool optc1_is_stereo_left_eye(struct timing_generator *optc); +void optc1_clear_optc_underflow(struct timing_generator *optc); + +void optc1_tg_init(struct timing_generator *optc); + +bool optc1_is_tg_enabled(struct timing_generator *optc); + +bool optc1_is_optc_underflow_occurred(struct timing_generator *optc); + +void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable); + #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 02bd664aed3e..df5cb2d1d164 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -38,8 +38,8 @@ #include "dcn10/dcn10_hw_sequencer.h" #include "dce110/dce110_hw_sequencer.h" #include "dcn10/dcn10_opp.h" -#include "dce/dce_link_encoder.h" -#include "dce/dce_stream_encoder.h" +#include "dcn10/dcn10_link_encoder.h" +#include "dcn10/dcn10_stream_encoder.h" #include "dce/dce_clocks.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" @@ -166,36 +166,22 @@ static const struct dce_abm_mask abm_mask = { #define stream_enc_regs(id)\ [id] = {\ - SE_DCN_REG_LIST(id),\ - .TMDS_CNTL = 0,\ - .AFMT_AVI_INFO0 = 0,\ - .AFMT_AVI_INFO1 = 0,\ - .AFMT_AVI_INFO2 = 0,\ - .AFMT_AVI_INFO3 = 0,\ + SE_DCN_REG_LIST(id)\ } -static const struct dce110_stream_enc_registers stream_enc_regs[] = { +static const struct dcn10_stream_enc_registers stream_enc_regs[] = { stream_enc_regs(0), stream_enc_regs(1), stream_enc_regs(2), stream_enc_regs(3), }; -static const struct dce_stream_encoder_shift se_shift = { +static const struct dcn10_stream_encoder_shift se_shift = { SE_COMMON_MASK_SH_LIST_DCN10(__SHIFT) }; -static const struct dce_stream_encoder_mask se_mask = { - SE_COMMON_MASK_SH_LIST_DCN10(_MASK), - .AFMT_GENERIC0_UPDATE = 0, - .AFMT_GENERIC2_UPDATE = 0, - .DP_DYN_RANGE = 0, - .DP_YCBCR_RANGE = 0, - .HDMI_AVI_INFO_SEND = 0, - .HDMI_AVI_INFO_CONT = 0, - .HDMI_AVI_INFO_LINE = 0, - .DP_SEC_AVI_ENABLE = 0, - .AFMT_AVI_INFO_VERSION = 0 +static const struct dcn10_stream_encoder_mask se_mask = { + SE_COMMON_MASK_SH_LIST_DCN10(_MASK) }; #define audio_regs(id)\ @@ -228,13 +214,11 @@ static const struct dce_aduio_mask audio_mask = { AUX_REG_LIST(id)\ } -static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { +static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = { aux_regs(0), aux_regs(1), aux_regs(2), - aux_regs(3), - aux_regs(4), - aux_regs(5) + aux_regs(3) }; #define hpd_regs(id)\ @@ -242,13 +226,11 @@ static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = { HPD_REG_LIST(id)\ } -static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { +static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { hpd_regs(0), hpd_regs(1), hpd_regs(2), - hpd_regs(3), - hpd_regs(4), - hpd_regs(5) + hpd_regs(3) }; #define link_regs(id)\ @@ -257,14 +239,19 @@ static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = { SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \ } -static const struct dce110_link_enc_registers link_enc_regs[] = { +static const struct dcn10_link_enc_registers link_enc_regs[] = { link_regs(0), link_regs(1), link_regs(2), - link_regs(3), - link_regs(4), - link_regs(5), - link_regs(6), + link_regs(3) +}; + +static const struct dcn10_link_enc_shift le_shift = { + LINK_ENCODER_MASK_SH_LIST_DCN10(__SHIFT) +}; + +static const struct dcn10_link_enc_mask le_mask = { + LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK) }; #define ipp_regs(id)\ @@ -320,11 +307,14 @@ static const struct dcn_dpp_registers tf_regs[] = { }; static const struct dcn_dpp_shift tf_shift = { - TF_REG_LIST_SH_MASK_DCN10(__SHIFT) + TF_REG_LIST_SH_MASK_DCN10(__SHIFT), + TF_DEBUG_REG_LIST_SH_DCN10 + }; static const struct dcn_dpp_mask tf_mask = { TF_REG_LIST_SH_MASK_DCN10(_MASK), + TF_DEBUG_REG_LIST_MASK_DCN10 }; static const struct dcn_mpc_registers mpc_regs = { @@ -457,6 +447,8 @@ static const struct dc_debug debug_defaults_drv = { .vsr_support = true, .performance_trace = false, .az_endpoint_mute_only = true, + .recovery_enabled = false, /*enable this by default after testing.*/ + .max_downscale_src_width = 3840, }; static const struct dc_debug debug_defaults_diags = { @@ -592,20 +584,22 @@ static const struct encoder_feature_support link_enc_feature = { struct link_encoder *dcn10_link_encoder_create( const struct encoder_init_data *enc_init_data) { - struct dce110_link_encoder *enc110 = - kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL); + struct dcn10_link_encoder *enc10 = + kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL); - if (!enc110) + if (!enc10) return NULL; - dce110_link_encoder_construct(enc110, + dcn10_link_encoder_construct(enc10, enc_init_data, &link_enc_feature, &link_enc_regs[enc_init_data->transmitter], &link_enc_aux_regs[enc_init_data->channel - 1], - &link_enc_hpd_regs[enc_init_data->hpd_source]); + &link_enc_hpd_regs[enc_init_data->hpd_source], + &le_shift, + &le_mask); - return &enc110->base; + return &enc10->base; } struct clock_source *dcn10_clock_source_create( @@ -650,16 +644,16 @@ static struct stream_encoder *dcn10_stream_encoder_create( enum engine_id eng_id, struct dc_context *ctx) { - struct dce110_stream_encoder *enc110 = - kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL); + struct dcn10_stream_encoder *enc1 = + kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL); - if (!enc110) + if (!enc1) return NULL; - dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id, + dcn10_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); - return &enc110->base; + return &enc1->base; } static const struct dce_hwseq_registers hwseq_reg = { @@ -918,36 +912,6 @@ enum dc_status dcn10_add_stream_to_ctx( return result; } -enum dc_status dcn10_validate_guaranteed( - struct dc *dc, - struct dc_stream_state *dc_stream, - struct dc_state *context) -{ - enum dc_status result = DC_ERROR_UNEXPECTED; - - context->streams[0] = dc_stream; - dc_stream_retain(context->streams[0]); - context->stream_count++; - - result = resource_map_pool_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = resource_map_phy_clock_resources(dc, context, dc_stream); - - if (result == DC_OK) - result = build_mapped_resource(dc, context, dc_stream); - - if (result == DC_OK) { - validate_guaranteed_copy_streams( - context, dc->caps.max_streams); - result = resource_build_scaling_params_for_context(dc, context); - } - if (result == DC_OK && !dcn_validate_bandwidth(dc, context)) - return DC_FAIL_BANDWIDTH_VALIDATE; - - return result; -} - static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( struct dc_state *context, const struct resource_pool *pool, @@ -978,235 +942,16 @@ static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer( return idle_pipe; } -enum dcc_control { - dcc_control__256_256_xxx, - dcc_control__128_128_xxx, - dcc_control__256_64_64, -}; - -enum segment_order { - segment_order__na, - segment_order__contiguous, - segment_order__non_contiguous, -}; - -static bool dcc_support_pixel_format( - enum surface_pixel_format format, - unsigned int *bytes_per_element) -{ - /* DML: get_bytes_per_element */ - switch (format) { - case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: - case SURFACE_PIXEL_FORMAT_GRPH_RGB565: - *bytes_per_element = 2; - return true; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010: - *bytes_per_element = 4; - return true; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: - *bytes_per_element = 8; - return true; - default: - return false; - } -} - -static bool dcc_support_swizzle( - enum swizzle_mode_values swizzle, - unsigned int bytes_per_element, - enum segment_order *segment_order_horz, - enum segment_order *segment_order_vert) -{ - bool standard_swizzle = false; - bool display_swizzle = false; - - switch (swizzle) { - case DC_SW_4KB_S: - case DC_SW_64KB_S: - case DC_SW_VAR_S: - case DC_SW_4KB_S_X: - case DC_SW_64KB_S_X: - case DC_SW_VAR_S_X: - standard_swizzle = true; - break; - case DC_SW_4KB_D: - case DC_SW_64KB_D: - case DC_SW_VAR_D: - case DC_SW_4KB_D_X: - case DC_SW_64KB_D_X: - case DC_SW_VAR_D_X: - display_swizzle = true; - break; - default: - break; - } - - if (bytes_per_element == 1 && standard_swizzle) { - *segment_order_horz = segment_order__contiguous; - *segment_order_vert = segment_order__na; - return true; - } - if (bytes_per_element == 2 && standard_swizzle) { - *segment_order_horz = segment_order__non_contiguous; - *segment_order_vert = segment_order__contiguous; - return true; - } - if (bytes_per_element == 4 && standard_swizzle) { - *segment_order_horz = segment_order__non_contiguous; - *segment_order_vert = segment_order__contiguous; - return true; - } - if (bytes_per_element == 8 && standard_swizzle) { - *segment_order_horz = segment_order__na; - *segment_order_vert = segment_order__contiguous; - return true; - } - if (bytes_per_element == 8 && display_swizzle) { - *segment_order_horz = segment_order__contiguous; - *segment_order_vert = segment_order__non_contiguous; - return true; - } - - return false; -} - -static void get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height, - unsigned int bytes_per_element) -{ - /* copied from DML. might want to refactor DML to leverage from DML */ - /* DML : get_blk256_size */ - if (bytes_per_element == 1) { - *blk256_width = 16; - *blk256_height = 16; - } else if (bytes_per_element == 2) { - *blk256_width = 16; - *blk256_height = 8; - } else if (bytes_per_element == 4) { - *blk256_width = 8; - *blk256_height = 8; - } else if (bytes_per_element == 8) { - *blk256_width = 8; - *blk256_height = 4; - } -} - -static void det_request_size( - unsigned int height, - unsigned int width, - unsigned int bpe, - bool *req128_horz_wc, - bool *req128_vert_wc) -{ - unsigned int detile_buf_size = 164 * 1024; /* 164KB for DCN1.0 */ - - unsigned int blk256_height = 0; - unsigned int blk256_width = 0; - unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc; - - get_blk256_size(&blk256_width, &blk256_height, bpe); - - swath_bytes_horz_wc = height * blk256_height * bpe; - swath_bytes_vert_wc = width * blk256_width * bpe; - - *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ? - false : /* full 256B request */ - true; /* half 128b request */ - - *req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ? - false : /* full 256B request */ - true; /* half 128b request */ -} - -static bool get_dcc_compression_cap(const struct dc *dc, +static bool dcn10_get_dcc_compression_cap(const struct dc *dc, const struct dc_dcc_surface_param *input, struct dc_surface_dcc_cap *output) { - /* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */ - enum dcc_control dcc_control; - unsigned int bpe; - enum segment_order segment_order_horz, segment_order_vert; - bool req128_horz_wc, req128_vert_wc; - - memset(output, 0, sizeof(*output)); - - if (dc->debug.disable_dcc == DCC_DISABLE) - return false; - - if (!dcc_support_pixel_format(input->format, - &bpe)) - return false; - - if (!dcc_support_swizzle(input->swizzle_mode, bpe, - &segment_order_horz, &segment_order_vert)) - return false; - - det_request_size(input->surface_size.height, input->surface_size.width, - bpe, &req128_horz_wc, &req128_vert_wc); - - if (!req128_horz_wc && !req128_vert_wc) { - dcc_control = dcc_control__256_256_xxx; - } else if (input->scan == SCAN_DIRECTION_HORIZONTAL) { - if (!req128_horz_wc) - dcc_control = dcc_control__256_256_xxx; - else if (segment_order_horz == segment_order__contiguous) - dcc_control = dcc_control__128_128_xxx; - else - dcc_control = dcc_control__256_64_64; - } else if (input->scan == SCAN_DIRECTION_VERTICAL) { - if (!req128_vert_wc) - dcc_control = dcc_control__256_256_xxx; - else if (segment_order_vert == segment_order__contiguous) - dcc_control = dcc_control__128_128_xxx; - else - dcc_control = dcc_control__256_64_64; - } else { - if ((req128_horz_wc && - segment_order_horz == segment_order__non_contiguous) || - (req128_vert_wc && - segment_order_vert == segment_order__non_contiguous)) - /* access_dir not known, must use most constraining */ - dcc_control = dcc_control__256_64_64; - else - /* reg128 is true for either horz and vert - * but segment_order is contiguous - */ - dcc_control = dcc_control__128_128_xxx; - } - - if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE && - dcc_control != dcc_control__256_256_xxx) - return false; - - switch (dcc_control) { - case dcc_control__256_256_xxx: - output->grph.rgb.max_uncompressed_blk_size = 256; - output->grph.rgb.max_compressed_blk_size = 256; - output->grph.rgb.independent_64b_blks = false; - break; - case dcc_control__128_128_xxx: - output->grph.rgb.max_uncompressed_blk_size = 128; - output->grph.rgb.max_compressed_blk_size = 128; - output->grph.rgb.independent_64b_blks = false; - break; - case dcc_control__256_64_64: - output->grph.rgb.max_uncompressed_blk_size = 256; - output->grph.rgb.max_compressed_blk_size = 64; - output->grph.rgb.independent_64b_blks = true; - break; - } - - output->capable = true; - output->const_color_support = false; - - return true; + return dc->res_pool->hubbub->funcs->get_dcc_compression_cap( + dc->res_pool->hubbub, + input, + output); } - static void dcn10_destroy_resource_pool(struct resource_pool **pool) { struct dcn10_resource_pool *dcn10_pool = TO_DCN10_RES_POOL(*pool); @@ -1227,13 +972,12 @@ static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_st } static struct dc_cap_funcs cap_funcs = { - .get_dcc_compression_cap = get_dcc_compression_cap + .get_dcc_compression_cap = dcn10_get_dcc_compression_cap }; static struct resource_funcs dcn10_res_pool_funcs = { .destroy = dcn10_destroy_resource_pool, .link_enc_create = dcn10_link_encoder_create, - .validate_guaranteed = dcn10_validate_guaranteed, .validate_bandwidth = dcn_validate_bandwidth, .acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer, .validate_plane = dcn10_validate_plane, @@ -1282,6 +1026,7 @@ static bool construct( dc->caps.max_cursor_size = 256; dc->caps.max_slave_planes = 1; dc->caps.is_apu = true; + dc->caps.post_blend_color_processing = false; if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c new file mode 100644 index 000000000000..c928ee4cd382 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -0,0 +1,1494 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + + +#include "dc_bios_types.h" +#include "dcn10_stream_encoder.h" +#include "reg_helper.h" +#include "hw_shared.h" + +#define DC_LOGGER \ + enc1->base.ctx->logger + + +#define REG(reg)\ + (enc1->regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + enc1->se_shift->field_name, enc1->se_mask->field_name + +#define VBI_LINE_0 0 +#define DP_BLANK_MAX_RETRY 20 +#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000 + + +enum { + DP_MST_UPDATE_MAX_RETRY = 50 +}; + +#define CTX \ + enc1->base.ctx + +void enc1_update_generic_info_packet( + struct dcn10_stream_encoder *enc1, + uint32_t packet_index, + const struct dc_info_packet *info_packet) +{ + uint32_t regval; + /* TODOFPGA Figure out a proper number for max_retries polling for lock + * use 50 for now. + */ + uint32_t max_retries = 50; + + /*we need turn on clock before programming AFMT block*/ + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1); + + if (packet_index >= 8) + ASSERT(0); + + /* poll dig_update_lock is not locked -> asic internal signal + * assume otg master lock will unlock it + */ +/* REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS, + 0, 10, max_retries);*/ + + /* check if HW reading GSP memory */ + REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, + 0, 10, max_retries); + + /* HW does is not reading GSP memory not reading too long -> + * something wrong. clear GPS memory access and notify? + * hw SW is writing to GSP memory + */ + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); + + /* choose which generic packet to use */ + regval = REG_READ(AFMT_VBI_PACKET_CONTROL); + REG_UPDATE(AFMT_VBI_PACKET_CONTROL, + AFMT_GENERIC_INDEX, packet_index); + + /* write generic packet header + * (4th byte is for GENERIC0 only) + */ + REG_SET_4(AFMT_GENERIC_HDR, 0, + AFMT_GENERIC_HB0, info_packet->hb0, + AFMT_GENERIC_HB1, info_packet->hb1, + AFMT_GENERIC_HB2, info_packet->hb2, + AFMT_GENERIC_HB3, info_packet->hb3); + + /* write generic packet contents + * (we never use last 4 bytes) + * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers + */ + { + const uint32_t *content = + (const uint32_t *) &info_packet->sb[0]; + + REG_WRITE(AFMT_GENERIC_0, *content++); + REG_WRITE(AFMT_GENERIC_1, *content++); + REG_WRITE(AFMT_GENERIC_2, *content++); + REG_WRITE(AFMT_GENERIC_3, *content++); + REG_WRITE(AFMT_GENERIC_4, *content++); + REG_WRITE(AFMT_GENERIC_5, *content++); + REG_WRITE(AFMT_GENERIC_6, *content++); + REG_WRITE(AFMT_GENERIC_7, *content); + } + + switch (packet_index) { + case 0: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC0_FRAME_UPDATE, 1); + break; + case 1: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC1_FRAME_UPDATE, 1); + break; + case 2: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC2_FRAME_UPDATE, 1); + break; + case 3: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC3_FRAME_UPDATE, 1); + break; + case 4: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC4_FRAME_UPDATE, 1); + break; + case 5: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC5_FRAME_UPDATE, 1); + break; + case 6: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC6_FRAME_UPDATE, 1); + break; + case 7: + REG_UPDATE(AFMT_VBI_PACKET_CONTROL1, + AFMT_GENERIC7_FRAME_UPDATE, 1); + break; + default: + break; + } +} + +static void enc1_update_hdmi_info_packet( + struct dcn10_stream_encoder *enc1, + uint32_t packet_index, + const struct dc_info_packet *info_packet) +{ + uint32_t cont, send, line; + + if (info_packet->valid) { + enc1_update_generic_info_packet( + enc1, + packet_index, + info_packet); + + /* enable transmission of packet(s) - + * packet transmission begins on the next frame + */ + cont = 1; + /* send packet(s) every frame */ + send = 1; + /* select line number to send packets on */ + line = 2; + } else { + cont = 0; + send = 0; + line = 0; + } + + /* choose which generic packet control to use */ + switch (packet_index) { + case 0: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 1: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + case 2: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 3: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + case 4: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 5: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + case 6: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3, + HDMI_GENERIC0_CONT, cont, + HDMI_GENERIC0_SEND, send, + HDMI_GENERIC0_LINE, line); + break; + case 7: + REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3, + HDMI_GENERIC1_CONT, cont, + HDMI_GENERIC1_SEND, send, + HDMI_GENERIC1_LINE, line); + break; + default: + /* invalid HW packet index */ + DC_LOG_WARNING( + "Invalid HW packet index: %s()\n", + __func__); + return; + } +} + +/* setup stream encoder in dp mode */ +void enc1_stream_encoder_dp_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + enum dc_color_space output_color_space) +{ + uint32_t h_active_start; + uint32_t v_active_start; + uint32_t misc0 = 0; + uint32_t misc1 = 0; + uint32_t h_blank; + uint32_t h_back_porch; + uint8_t synchronous_clock = 0; /* asynchronous mode */ + uint8_t colorimetry_bpc; + uint8_t dynamic_range_rgb = 0; /*full range*/ + uint8_t dynamic_range_ycbcr = 1; /*bt709*/ + + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE(DP_DB_CNTL, DP_DB_DISABLE, 1); + + /* set pixel encoding */ + switch (crtc_timing->pixel_encoding) { + case PIXEL_ENCODING_YCBCR422: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_YCBCR422); + break; + case PIXEL_ENCODING_YCBCR444: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_YCBCR444); + + if (crtc_timing->flags.Y_ONLY) + if (crtc_timing->display_color_depth != COLOR_DEPTH_666) + /* HW testing only, no use case yet. + * Color depth of Y-only could be + * 8, 10, 12, 16 bits + */ + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_Y_ONLY); + /* Note: DP_MSA_MISC1 bit 7 is the indicator + * of Y-only mode. + * This bit is set in HW if register + * DP_PIXEL_ENCODING is programmed to 0x4 + */ + break; + case PIXEL_ENCODING_YCBCR420: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_YCBCR420); + REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1); + break; + default: + REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, + DP_PIXEL_ENCODING_TYPE_RGB444); + break; + } + + misc1 = REG_READ(DP_MSA_MISC); + + /* set color depth */ + + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_666: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + 0); + break; + case COLOR_DEPTH_888: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_8BPC); + break; + case COLOR_DEPTH_101010: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_10BPC); + + break; + case COLOR_DEPTH_121212: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_12BPC); + break; + case COLOR_DEPTH_161616: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_16BPC); + break; + default: + REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, + DP_COMPONENT_PIXEL_DEPTH_6BPC); + break; + } + + /* set dynamic range and YCbCr range */ + + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_666: + colorimetry_bpc = 0; + break; + case COLOR_DEPTH_888: + colorimetry_bpc = 1; + break; + case COLOR_DEPTH_101010: + colorimetry_bpc = 2; + break; + case COLOR_DEPTH_121212: + colorimetry_bpc = 3; + break; + default: + colorimetry_bpc = 0; + break; + } + + misc0 = misc0 | synchronous_clock; + misc0 = colorimetry_bpc << 5; + + switch (output_color_space) { + case COLOR_SPACE_SRGB: + misc0 = misc0 | 0x0; + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_rgb = 0; /*full range*/ + break; + case COLOR_SPACE_SRGB_LIMITED: + misc0 = misc0 | 0x8; /* bit3=1 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_rgb = 1; /*limited range*/ + break; + case COLOR_SPACE_YCBCR601: + case COLOR_SPACE_YCBCR601_LIMITED: + misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_ycbcr = 0; /*bt601*/ + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ + else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ + break; + case COLOR_SPACE_YCBCR709: + case COLOR_SPACE_YCBCR709_LIMITED: + misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ + misc1 = misc1 & ~0x80; /* bit7 = 0*/ + dynamic_range_ycbcr = 1; /*bt709*/ + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ + else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444) + misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ + break; + case COLOR_SPACE_2020_RGB_LIMITEDRANGE: + dynamic_range_rgb = 1; /*limited range*/ + break; + case COLOR_SPACE_2020_RGB_FULLRANGE: + case COLOR_SPACE_2020_YCBCR: + case COLOR_SPACE_XR_RGB: + case COLOR_SPACE_MSREF_SCRGB: + case COLOR_SPACE_ADOBERGB: + case COLOR_SPACE_DCIP3: + case COLOR_SPACE_XV_YCC_709: + case COLOR_SPACE_XV_YCC_601: + case COLOR_SPACE_DISPLAYNATIVE: + case COLOR_SPACE_DOLBYVISION: + case COLOR_SPACE_APPCTRL: + case COLOR_SPACE_CUSTOMPOINTS: + case COLOR_SPACE_UNKNOWN: + /* do nothing */ + break; + } + + REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0); + REG_WRITE(DP_MSA_MISC, misc1); /* MSA_MISC1 */ + + /* dcn new register + * dc_crtc_timing is vesa dmt struct. data from edid + */ + REG_SET_2(DP_MSA_TIMING_PARAM1, 0, + DP_MSA_HTOTAL, crtc_timing->h_total, + DP_MSA_VTOTAL, crtc_timing->v_total); + + /* calculate from vesa timing parameters + * h_active_start related to leading edge of sync + */ + + h_blank = crtc_timing->h_total - crtc_timing->h_border_left - + crtc_timing->h_addressable - crtc_timing->h_border_right; + + h_back_porch = h_blank - crtc_timing->h_front_porch - + crtc_timing->h_sync_width; + + /* start at beginning of left border */ + h_active_start = crtc_timing->h_sync_width + h_back_porch; + + + v_active_start = crtc_timing->v_total - crtc_timing->v_border_top - + crtc_timing->v_addressable - crtc_timing->v_border_bottom - + crtc_timing->v_front_porch; + + + /* start at beginning of left border */ + REG_SET_2(DP_MSA_TIMING_PARAM2, 0, + DP_MSA_HSTART, h_active_start, + DP_MSA_VSTART, v_active_start); + + REG_SET_4(DP_MSA_TIMING_PARAM3, 0, + DP_MSA_HSYNCWIDTH, + crtc_timing->h_sync_width, + DP_MSA_HSYNCPOLARITY, + !crtc_timing->flags.HSYNC_POSITIVE_POLARITY, + DP_MSA_VSYNCWIDTH, + crtc_timing->v_sync_width, + DP_MSA_VSYNCPOLARITY, + !crtc_timing->flags.VSYNC_POSITIVE_POLARITY); + + /* HWDITH include border or overscan */ + REG_SET_2(DP_MSA_TIMING_PARAM4, 0, + DP_MSA_HWIDTH, crtc_timing->h_border_left + + crtc_timing->h_addressable + crtc_timing->h_border_right, + DP_MSA_VHEIGHT, crtc_timing->v_border_top + + crtc_timing->v_addressable + crtc_timing->v_border_bottom); +} + +static void enc1_stream_encoder_set_stream_attribute_helper( + struct dcn10_stream_encoder *enc1, + struct dc_crtc_timing *crtc_timing) +{ + switch (crtc_timing->pixel_encoding) { + case PIXEL_ENCODING_YCBCR422: + REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 1); + break; + default: + REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 0); + break; + } + REG_UPDATE(DIG_FE_CNTL, TMDS_COLOR_FORMAT, 0); +} + +/* setup stream encoder in hdmi mode */ +void enc1_stream_encoder_hdmi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + int actual_pix_clk_khz, + bool enable_audio) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + struct bp_encoder_control cntl = {0}; + + cntl.action = ENCODER_CONTROL_SETUP; + cntl.engine_id = enc1->base.id; + cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A; + cntl.enable_dp_audio = enable_audio; + cntl.pixel_clock = actual_pix_clk_khz; + cntl.lanes_number = LANE_COUNT_FOUR; + + if (enc1->base.bp->funcs->encoder_control( + enc1->base.bp, &cntl) != BP_RESULT_OK) + return; + + enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing); + + /* setup HDMI engine */ + REG_UPDATE_5(HDMI_CONTROL, + HDMI_PACKET_GEN_VERSION, 1, + HDMI_KEEPOUT_MODE, 1, + HDMI_DEEP_COLOR_ENABLE, 0, + HDMI_DATA_SCRAMBLE_EN, 0, + HDMI_CLOCK_CHANNEL_RATE, 0); + + + switch (crtc_timing->display_color_depth) { + case COLOR_DEPTH_888: + REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0); + break; + case COLOR_DEPTH_101010: + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 1, + HDMI_DEEP_COLOR_ENABLE, 0); + } else { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 1, + HDMI_DEEP_COLOR_ENABLE, 1); + } + break; + case COLOR_DEPTH_121212: + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 2, + HDMI_DEEP_COLOR_ENABLE, 0); + } else { + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 2, + HDMI_DEEP_COLOR_ENABLE, 1); + } + break; + case COLOR_DEPTH_161616: + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DEEP_COLOR_DEPTH, 3, + HDMI_DEEP_COLOR_ENABLE, 1); + break; + default: + break; + } + + if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) { + /* enable HDMI data scrambler + * HDMI_CLOCK_CHANNEL_RATE_MORE_340M + * Clock channel frequency is 1/4 of character rate. + */ + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DATA_SCRAMBLE_EN, 1, + HDMI_CLOCK_CHANNEL_RATE, 1); + } else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) { + + /* TODO: New feature for DCE11, still need to implement */ + + /* enable HDMI data scrambler + * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE + * Clock channel frequency is the same + * as character rate + */ + REG_UPDATE_2(HDMI_CONTROL, + HDMI_DATA_SCRAMBLE_EN, 1, + HDMI_CLOCK_CHANNEL_RATE, 0); + } + + + REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL, + HDMI_GC_CONT, 1, + HDMI_GC_SEND, 1, + HDMI_NULL_SEND, 1); + + /* following belongs to audio */ + REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); + + REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); + + REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, + VBI_LINE_0 + 2); + + REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0); +} + +/* setup stream encoder in dvi mode */ +void enc1_stream_encoder_dvi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + bool is_dual_link) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + struct bp_encoder_control cntl = {0}; + + cntl.action = ENCODER_CONTROL_SETUP; + cntl.engine_id = enc1->base.id; + cntl.signal = is_dual_link ? + SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK; + cntl.enable_dp_audio = false; + cntl.pixel_clock = crtc_timing->pix_clk_khz; + cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR; + + if (enc1->base.bp->funcs->encoder_control( + enc1->base.bp, &cntl) != BP_RESULT_OK) + return; + + ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB); + ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888); + enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing); +} + +void enc1_stream_encoder_set_mst_bandwidth( + struct stream_encoder *enc, + struct fixed31_32 avg_time_slots_per_mtp) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t x = dc_fixpt_floor( + avg_time_slots_per_mtp); + uint32_t y = dc_fixpt_ceil( + dc_fixpt_shl( + dc_fixpt_sub_int( + avg_time_slots_per_mtp, + x), + 26)); + + REG_SET_2(DP_MSE_RATE_CNTL, 0, + DP_MSE_RATE_X, x, + DP_MSE_RATE_Y, y); + + /* wait for update to be completed on the link */ + /* i.e. DP_MSE_RATE_UPDATE_PENDING field (read only) */ + /* is reset to 0 (not pending) */ + REG_WAIT(DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, + 0, + 10, DP_MST_UPDATE_MAX_RETRY); +} + +static void enc1_stream_encoder_update_hdmi_info_packets( + struct stream_encoder *enc, + const struct encoder_info_frame *info_frame) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* for bring up, disable dp double TODO */ + REG_UPDATE(HDMI_DB_CONTROL, HDMI_DB_DISABLE, 1); + + enc1_update_hdmi_info_packet(enc1, 0, &info_frame->avi); + enc1_update_hdmi_info_packet(enc1, 1, &info_frame->vendor); + enc1_update_hdmi_info_packet(enc1, 2, &info_frame->gamut); + enc1_update_hdmi_info_packet(enc1, 3, &info_frame->spd); + enc1_update_hdmi_info_packet(enc1, 4, &info_frame->hdrsmd); +} + +static void enc1_stream_encoder_stop_hdmi_info_packets( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* stop generic packets 0 & 1 on HDMI */ + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL0, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0); + + /* stop generic packets 2 & 3 on HDMI */ + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL1, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0); + + /* stop generic packets 2 & 3 on HDMI */ + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL2, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0); + + REG_SET_6(HDMI_GENERIC_PACKET_CONTROL3, 0, + HDMI_GENERIC0_CONT, 0, + HDMI_GENERIC0_LINE, 0, + HDMI_GENERIC0_SEND, 0, + HDMI_GENERIC1_CONT, 0, + HDMI_GENERIC1_LINE, 0, + HDMI_GENERIC1_SEND, 0); +} + +void enc1_stream_encoder_update_dp_info_packets( + struct stream_encoder *enc, + const struct encoder_info_frame *info_frame) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + if (info_frame->vsc.valid) + enc1_update_generic_info_packet( + enc1, + 0, /* packetIndex */ + &info_frame->vsc); + + if (info_frame->spd.valid) + enc1_update_generic_info_packet( + enc1, + 2, /* packetIndex */ + &info_frame->spd); + + if (info_frame->hdrsmd.valid) + enc1_update_generic_info_packet( + enc1, + 3, /* packetIndex */ + &info_frame->hdrsmd); + + /* enable/disable transmission of packet(s). + * If enabled, packet transmission begins on the next frame + */ + REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid); + REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid); + REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid); + + + /* This bit is the master enable bit. + * When enabling secondary stream engine, + * this master bit must also be set. + * This register shared with audio info frame. + * Therefore we need to enable master bit + * if at least on of the fields is not 0 + */ + value = REG_READ(DP_SEC_CNTL); + if (value) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); +} + +void enc1_stream_encoder_stop_dp_info_packets( + struct stream_encoder *enc) +{ + /* stop generic packets on DP */ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + REG_SET_10(DP_SEC_CNTL, 0, + DP_SEC_GSP0_ENABLE, 0, + DP_SEC_GSP1_ENABLE, 0, + DP_SEC_GSP2_ENABLE, 0, + DP_SEC_GSP3_ENABLE, 0, + DP_SEC_GSP4_ENABLE, 0, + DP_SEC_GSP5_ENABLE, 0, + DP_SEC_GSP6_ENABLE, 0, + DP_SEC_GSP7_ENABLE, 0, + DP_SEC_MPG_ENABLE, 0, + DP_SEC_STREAM_ENABLE, 0); + + /* this register shared with audio info frame. + * therefore we need to keep master enabled + * if at least one of the fields is not 0 */ + value = REG_READ(DP_SEC_CNTL); + if (value) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); + +} + +void enc1_stream_encoder_dp_blank( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t retries = 0; + uint32_t reg1 = 0; + uint32_t max_retries = DP_BLANK_MAX_RETRY * 10; + + /* Note: For CZ, we are changing driver default to disable + * stream deferred to next VBLANK. If results are positive, we + * will make the same change to all DCE versions. There are a + * handful of panels that cannot handle disable stream at + * HBLANK and will result in a white line flash across the + * screen on stream disable. + */ + REG_GET(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, ®1); + if ((reg1 & 0x1) == 0) + /*stream not enabled*/ + return; + /* Specify the video stream disable point + * (2 = start of the next vertical blank) + */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2); + /* Larger delay to wait until VBLANK - use max retry of + * 10us*3000=30ms. This covers 16.6ms of typical 60 Hz mode + + * a little more because we may not trust delay accuracy. + */ + max_retries = DP_BLANK_MAX_RETRY * 150; + + /* disable DP stream */ + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0); + + /* the encoder stops sending the video stream + * at the start of the vertical blanking. + * Poll for DP_VID_STREAM_STATUS == 0 + */ + + REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, + 0, + 10, max_retries); + + ASSERT(retries <= max_retries); + + /* Tell the DP encoder to ignore timing from CRTC, must be done after + * the polling. If we set DP_STEER_FIFO_RESET before DP stream blank is + * complete, stream status will be stuck in video stream enabled state, + * i.e. DP_VID_STREAM_STATUS stuck at 1. + */ + + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, true); +} + +/* output video stream to link encoder */ +void enc1_stream_encoder_dp_unblank( + struct stream_encoder *enc, + const struct encoder_unblank_param *param) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) { + uint32_t n_vid = 0x8000; + uint32_t m_vid; + + /* M / N = Fstream / Flink + * m_vid / n_vid = pixel rate / link rate + */ + + uint64_t m_vid_l = n_vid; + + m_vid_l *= param->pixel_clk_khz; + m_vid_l = div_u64(m_vid_l, + param->link_settings.link_rate + * LINK_RATE_REF_FREQ_IN_KHZ); + + m_vid = (uint32_t) m_vid_l; + + /* enable auto measurement */ + + REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0); + + /* auto measurement need 1 full 0x8000 symbol cycle to kick in, + * therefore program initial value for Mvid and Nvid + */ + + REG_UPDATE(DP_VID_N, DP_VID_N, n_vid); + + REG_UPDATE(DP_VID_M, DP_VID_M, m_vid); + + REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 1); + } + + /* set DIG_START to 0x1 to resync FIFO */ + + REG_UPDATE(DIG_FE_CNTL, DIG_START, 1); + + /* switch DP encoder to CRTC data */ + + REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); + + /* wait 100us for DIG/DP logic to prime + * (i.e. a few video lines) + */ + udelay(100); + + /* the hardware would start sending video at the start of the next DP + * frame (i.e. rising edge of the vblank). + * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this + * register has no effect on enable transition! HW always guarantees + * VID_STREAM enable at start of next frame, and this is not + * programmable + */ + + REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true); +} + +void enc1_stream_encoder_set_avmute( + struct stream_encoder *enc, + bool enable) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + unsigned int value = enable ? 1 : 0; + + REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, value); +} + + +#define DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT 0x8000 +#define DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC 1 + +#include "include/audio_types.h" + +/** +* speakersToChannels +* +* @brief +* translate speakers to channels +* +* FL - Front Left +* FR - Front Right +* RL - Rear Left +* RR - Rear Right +* RC - Rear Center +* FC - Front Center +* FLC - Front Left Center +* FRC - Front Right Center +* RLC - Rear Left Center +* RRC - Rear Right Center +* LFE - Low Freq Effect +* +* FC +* FLC FRC +* FL FR +* +* LFE +* () +* +* +* RL RR +* RLC RRC +* RC +* +* ch 8 7 6 5 4 3 2 1 +* 0b00000011 - - - - - - FR FL +* 0b00000111 - - - - - LFE FR FL +* 0b00001011 - - - - FC - FR FL +* 0b00001111 - - - - FC LFE FR FL +* 0b00010011 - - - RC - - FR FL +* 0b00010111 - - - RC - LFE FR FL +* 0b00011011 - - - RC FC - FR FL +* 0b00011111 - - - RC FC LFE FR FL +* 0b00110011 - - RR RL - - FR FL +* 0b00110111 - - RR RL - LFE FR FL +* 0b00111011 - - RR RL FC - FR FL +* 0b00111111 - - RR RL FC LFE FR FL +* 0b01110011 - RC RR RL - - FR FL +* 0b01110111 - RC RR RL - LFE FR FL +* 0b01111011 - RC RR RL FC - FR FL +* 0b01111111 - RC RR RL FC LFE FR FL +* 0b11110011 RRC RLC RR RL - - FR FL +* 0b11110111 RRC RLC RR RL - LFE FR FL +* 0b11111011 RRC RLC RR RL FC - FR FL +* 0b11111111 RRC RLC RR RL FC LFE FR FL +* 0b11000011 FRC FLC - - - - FR FL +* 0b11000111 FRC FLC - - - LFE FR FL +* 0b11001011 FRC FLC - - FC - FR FL +* 0b11001111 FRC FLC - - FC LFE FR FL +* 0b11010011 FRC FLC - RC - - FR FL +* 0b11010111 FRC FLC - RC - LFE FR FL +* 0b11011011 FRC FLC - RC FC - FR FL +* 0b11011111 FRC FLC - RC FC LFE FR FL +* 0b11110011 FRC FLC RR RL - - FR FL +* 0b11110111 FRC FLC RR RL - LFE FR FL +* 0b11111011 FRC FLC RR RL FC - FR FL +* 0b11111111 FRC FLC RR RL FC LFE FR FL +* +* @param +* speakers - speaker information as it comes from CEA audio block +*/ +/* translate speakers to channels */ + +union audio_cea_channels { + uint8_t all; + struct audio_cea_channels_bits { + uint32_t FL:1; + uint32_t FR:1; + uint32_t LFE:1; + uint32_t FC:1; + uint32_t RL_RC:1; + uint32_t RR:1; + uint32_t RC_RLC_FLC:1; + uint32_t RRC_FRC:1; + } channels; +}; + +struct audio_clock_info { + /* pixel clock frequency*/ + uint32_t pixel_clock_in_10khz; + /* N - 32KHz audio */ + uint32_t n_32khz; + /* CTS - 32KHz audio*/ + uint32_t cts_32khz; + uint32_t n_44khz; + uint32_t cts_44khz; + uint32_t n_48khz; + uint32_t cts_48khz; +}; + +/* 25.2MHz/1.001*/ +/* 25.2MHz/1.001*/ +/* 25.2MHz*/ +/* 27MHz */ +/* 27MHz*1.001*/ +/* 27MHz*1.001*/ +/* 54MHz*/ +/* 54MHz*1.001*/ +/* 74.25MHz/1.001*/ +/* 74.25MHz*/ +/* 148.5MHz/1.001*/ +/* 148.5MHz*/ + +static const struct audio_clock_info audio_clock_info_table[16] = { + {2517, 4576, 28125, 7007, 31250, 6864, 28125}, + {2518, 4576, 28125, 7007, 31250, 6864, 28125}, + {2520, 4096, 25200, 6272, 28000, 6144, 25200}, + {2700, 4096, 27000, 6272, 30000, 6144, 27000}, + {2702, 4096, 27027, 6272, 30030, 6144, 27027}, + {2703, 4096, 27027, 6272, 30030, 6144, 27027}, + {5400, 4096, 54000, 6272, 60000, 6144, 54000}, + {5405, 4096, 54054, 6272, 60060, 6144, 54054}, + {7417, 11648, 210937, 17836, 234375, 11648, 140625}, + {7425, 4096, 74250, 6272, 82500, 6144, 74250}, + {14835, 11648, 421875, 8918, 234375, 5824, 140625}, + {14850, 4096, 148500, 6272, 165000, 6144, 148500}, + {29670, 5824, 421875, 4459, 234375, 5824, 281250}, + {29700, 3072, 222750, 4704, 247500, 5120, 247500}, + {59340, 5824, 843750, 8918, 937500, 5824, 562500}, + {59400, 3072, 445500, 9408, 990000, 6144, 594000} +}; + +static const struct audio_clock_info audio_clock_info_table_36bpc[14] = { + {2517, 9152, 84375, 7007, 48875, 9152, 56250}, + {2518, 9152, 84375, 7007, 48875, 9152, 56250}, + {2520, 4096, 37800, 6272, 42000, 6144, 37800}, + {2700, 4096, 40500, 6272, 45000, 6144, 40500}, + {2702, 8192, 81081, 6272, 45045, 8192, 54054}, + {2703, 8192, 81081, 6272, 45045, 8192, 54054}, + {5400, 4096, 81000, 6272, 90000, 6144, 81000}, + {5405, 4096, 81081, 6272, 90090, 6144, 81081}, + {7417, 11648, 316406, 17836, 351562, 11648, 210937}, + {7425, 4096, 111375, 6272, 123750, 6144, 111375}, + {14835, 11648, 632812, 17836, 703125, 11648, 421875}, + {14850, 4096, 222750, 6272, 247500, 6144, 222750}, + {29670, 5824, 632812, 8918, 703125, 5824, 421875}, + {29700, 4096, 445500, 4704, 371250, 5120, 371250} +}; + +static const struct audio_clock_info audio_clock_info_table_48bpc[14] = { + {2517, 4576, 56250, 7007, 62500, 6864, 56250}, + {2518, 4576, 56250, 7007, 62500, 6864, 56250}, + {2520, 4096, 50400, 6272, 56000, 6144, 50400}, + {2700, 4096, 54000, 6272, 60000, 6144, 54000}, + {2702, 4096, 54054, 6267, 60060, 8192, 54054}, + {2703, 4096, 54054, 6272, 60060, 8192, 54054}, + {5400, 4096, 108000, 6272, 120000, 6144, 108000}, + {5405, 4096, 108108, 6272, 120120, 6144, 108108}, + {7417, 11648, 421875, 17836, 468750, 11648, 281250}, + {7425, 4096, 148500, 6272, 165000, 6144, 148500}, + {14835, 11648, 843750, 8918, 468750, 11648, 281250}, + {14850, 4096, 297000, 6272, 330000, 6144, 297000}, + {29670, 5824, 843750, 4459, 468750, 5824, 562500}, + {29700, 3072, 445500, 4704, 495000, 5120, 495000} + + +}; + +static union audio_cea_channels speakers_to_channels( + struct audio_speaker_flags speaker_flags) +{ + union audio_cea_channels cea_channels = {0}; + + /* these are one to one */ + cea_channels.channels.FL = speaker_flags.FL_FR; + cea_channels.channels.FR = speaker_flags.FL_FR; + cea_channels.channels.LFE = speaker_flags.LFE; + cea_channels.channels.FC = speaker_flags.FC; + + /* if Rear Left and Right exist move RC speaker to channel 7 + * otherwise to channel 5 + */ + if (speaker_flags.RL_RR) { + cea_channels.channels.RL_RC = speaker_flags.RL_RR; + cea_channels.channels.RR = speaker_flags.RL_RR; + cea_channels.channels.RC_RLC_FLC = speaker_flags.RC; + } else { + cea_channels.channels.RL_RC = speaker_flags.RC; + } + + /* FRONT Left Right Center and REAR Left Right Center are exclusive */ + if (speaker_flags.FLC_FRC) { + cea_channels.channels.RC_RLC_FLC = speaker_flags.FLC_FRC; + cea_channels.channels.RRC_FRC = speaker_flags.FLC_FRC; + } else { + cea_channels.channels.RC_RLC_FLC = speaker_flags.RLC_RRC; + cea_channels.channels.RRC_FRC = speaker_flags.RLC_RRC; + } + + return cea_channels; +} + +static uint32_t calc_max_audio_packets_per_line( + const struct audio_crtc_info *crtc_info) +{ + uint32_t max_packets_per_line; + + max_packets_per_line = + crtc_info->h_total - crtc_info->h_active; + + if (crtc_info->pixel_repetition) + max_packets_per_line *= crtc_info->pixel_repetition; + + /* for other hdmi features */ + max_packets_per_line -= 58; + /* for Control Period */ + max_packets_per_line -= 16; + /* Number of Audio Packets per Line */ + max_packets_per_line /= 32; + + return max_packets_per_line; +} + +static void get_audio_clock_info( + enum dc_color_depth color_depth, + uint32_t crtc_pixel_clock_in_khz, + uint32_t actual_pixel_clock_in_khz, + struct audio_clock_info *audio_clock_info) +{ + const struct audio_clock_info *clock_info; + uint32_t index; + uint32_t crtc_pixel_clock_in_10khz = crtc_pixel_clock_in_khz / 10; + uint32_t audio_array_size; + + switch (color_depth) { + case COLOR_DEPTH_161616: + clock_info = audio_clock_info_table_48bpc; + audio_array_size = ARRAY_SIZE( + audio_clock_info_table_48bpc); + break; + case COLOR_DEPTH_121212: + clock_info = audio_clock_info_table_36bpc; + audio_array_size = ARRAY_SIZE( + audio_clock_info_table_36bpc); + break; + default: + clock_info = audio_clock_info_table; + audio_array_size = ARRAY_SIZE( + audio_clock_info_table); + break; + } + + if (clock_info != NULL) { + /* search for exact pixel clock in table */ + for (index = 0; index < audio_array_size; index++) { + if (clock_info[index].pixel_clock_in_10khz > + crtc_pixel_clock_in_10khz) + break; /* not match */ + else if (clock_info[index].pixel_clock_in_10khz == + crtc_pixel_clock_in_10khz) { + /* match found */ + *audio_clock_info = clock_info[index]; + return; + } + } + } + + /* not found */ + if (actual_pixel_clock_in_khz == 0) + actual_pixel_clock_in_khz = crtc_pixel_clock_in_khz; + + /* See HDMI spec the table entry under + * pixel clock of "Other". */ + audio_clock_info->pixel_clock_in_10khz = + actual_pixel_clock_in_khz / 10; + audio_clock_info->cts_32khz = actual_pixel_clock_in_khz; + audio_clock_info->cts_44khz = actual_pixel_clock_in_khz; + audio_clock_info->cts_48khz = actual_pixel_clock_in_khz; + + audio_clock_info->n_32khz = 4096; + audio_clock_info->n_44khz = 6272; + audio_clock_info->n_48khz = 6144; +} + +static void enc1_se_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *audio_info) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + uint32_t speakers = 0; + uint32_t channels = 0; + + ASSERT(audio_info); + if (audio_info == NULL) + /* This should not happen.it does so we don't get BSOD*/ + return; + + speakers = audio_info->flags.info.ALLSPEAKERS; + channels = speakers_to_channels(audio_info->flags.speaker_flags).all; + + /* setup the audio stream source select (audio -> dig mapping) */ + REG_SET(AFMT_AUDIO_SRC_CONTROL, 0, AFMT_AUDIO_SRC_SELECT, az_inst); + + /* Channel allocation */ + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, channels); +} + +static void enc1_se_setup_hdmi_audio( + struct stream_encoder *enc, + const struct audio_crtc_info *crtc_info) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + struct audio_clock_info audio_clock_info = {0}; + uint32_t max_packets_per_line; + + /* For now still do calculation, although this field is ignored when + * above HDMI_PACKET_GEN_VERSION set to 1 + */ + max_packets_per_line = calc_max_audio_packets_per_line(crtc_info); + + /* HDMI_AUDIO_PACKET_CONTROL */ + REG_UPDATE_2(HDMI_AUDIO_PACKET_CONTROL, + HDMI_AUDIO_PACKETS_PER_LINE, max_packets_per_line, + HDMI_AUDIO_DELAY_EN, 1); + + /* AFMT_AUDIO_PACKET_CONTROL */ + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); + + /* AFMT_AUDIO_PACKET_CONTROL2 */ + REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2, + AFMT_AUDIO_LAYOUT_OVRD, 0, + AFMT_60958_OSF_OVRD, 0); + + /* HDMI_ACR_PACKET_CONTROL */ + REG_UPDATE_3(HDMI_ACR_PACKET_CONTROL, + HDMI_ACR_AUTO_SEND, 1, + HDMI_ACR_SOURCE, 0, + HDMI_ACR_AUDIO_PRIORITY, 0); + + /* Program audio clock sample/regeneration parameters */ + get_audio_clock_info(crtc_info->color_depth, + crtc_info->requested_pixel_clock, + crtc_info->calculated_pixel_clock, + &audio_clock_info); + DC_LOG_HW_AUDIO( + "\n%s:Input::requested_pixel_clock = %d" \ + "calculated_pixel_clock = %d \n", __func__, \ + crtc_info->requested_pixel_clock, \ + crtc_info->calculated_pixel_clock); + + /* HDMI_ACR_32_0__HDMI_ACR_CTS_32_MASK */ + REG_UPDATE(HDMI_ACR_32_0, HDMI_ACR_CTS_32, audio_clock_info.cts_32khz); + + /* HDMI_ACR_32_1__HDMI_ACR_N_32_MASK */ + REG_UPDATE(HDMI_ACR_32_1, HDMI_ACR_N_32, audio_clock_info.n_32khz); + + /* HDMI_ACR_44_0__HDMI_ACR_CTS_44_MASK */ + REG_UPDATE(HDMI_ACR_44_0, HDMI_ACR_CTS_44, audio_clock_info.cts_44khz); + + /* HDMI_ACR_44_1__HDMI_ACR_N_44_MASK */ + REG_UPDATE(HDMI_ACR_44_1, HDMI_ACR_N_44, audio_clock_info.n_44khz); + + /* HDMI_ACR_48_0__HDMI_ACR_CTS_48_MASK */ + REG_UPDATE(HDMI_ACR_48_0, HDMI_ACR_CTS_48, audio_clock_info.cts_48khz); + + /* HDMI_ACR_48_1__HDMI_ACR_N_48_MASK */ + REG_UPDATE(HDMI_ACR_48_1, HDMI_ACR_N_48, audio_clock_info.n_48khz); + + /* Video driver cannot know in advance which sample rate will + * be used by HD Audio driver + * HDMI_ACR_PACKET_CONTROL__HDMI_ACR_N_MULTIPLE field is + * programmed below in interruppt callback + */ + + /* AFMT_60958_0__AFMT_60958_CS_CHANNEL_NUMBER_L_MASK & + * AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK + */ + REG_UPDATE_2(AFMT_60958_0, + AFMT_60958_CS_CHANNEL_NUMBER_L, 1, + AFMT_60958_CS_CLOCK_ACCURACY, 0); + + /* AFMT_60958_1 AFMT_60958_CS_CHALNNEL_NUMBER_R */ + REG_UPDATE(AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); + + /* AFMT_60958_2 now keep this settings until + * Programming guide comes out + */ + REG_UPDATE_6(AFMT_60958_2, + AFMT_60958_CS_CHANNEL_NUMBER_2, 3, + AFMT_60958_CS_CHANNEL_NUMBER_3, 4, + AFMT_60958_CS_CHANNEL_NUMBER_4, 5, + AFMT_60958_CS_CHANNEL_NUMBER_5, 6, + AFMT_60958_CS_CHANNEL_NUMBER_6, 7, + AFMT_60958_CS_CHANNEL_NUMBER_7, 8); +} + +static void enc1_se_setup_dp_audio( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* --- DP Audio packet configurations --- */ + + /* ATP Configuration */ + REG_SET(DP_SEC_AUD_N, 0, + DP_SEC_AUD_N, DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT); + + /* Async/auto-calc timestamp mode */ + REG_SET(DP_SEC_TIMESTAMP, 0, DP_SEC_TIMESTAMP_MODE, + DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC); + + /* --- The following are the registers + * copied from the SetupHDMI --- + */ + + /* AFMT_AUDIO_PACKET_CONTROL */ + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); + + /* AFMT_AUDIO_PACKET_CONTROL2 */ + /* Program the ATP and AIP next */ + REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2, + AFMT_AUDIO_LAYOUT_OVRD, 0, + AFMT_60958_OSF_OVRD, 0); + + /* AFMT_INFOFRAME_CONTROL0 */ + REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); + + /* AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK */ + REG_UPDATE(AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, 0); +} + +static void enc1_se_enable_audio_clock( + struct stream_encoder *enc, + bool enable) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + if (REG(AFMT_CNTL) == 0) + return; /* DCE8/10 does not have this register */ + + REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, !!enable); + + /* wait for AFMT clock to turn on, + * expectation: this should complete in 1-2 reads + * + * REG_WAIT(AFMT_CNTL, AFMT_AUDIO_CLOCK_ON, !!enable, 1, 10); + * + * TODO: wait for clock_on does not work well. May need HW + * program sequence. But audio seems work normally even without wait + * for clock_on status change + */ +} + +static void enc1_se_enable_dp_audio( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* Enable Audio packets */ + REG_UPDATE(DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1); + + /* Program the ATP and AIP next */ + REG_UPDATE_2(DP_SEC_CNTL, + DP_SEC_ATP_ENABLE, 1, + DP_SEC_AIP_ENABLE, 1); + + /* Program STREAM_ENABLE after all the other enables. */ + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); +} + +static void enc1_se_disable_dp_audio( + struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + uint32_t value = 0; + + /* Disable Audio packets */ + REG_UPDATE_5(DP_SEC_CNTL, + DP_SEC_ASP_ENABLE, 0, + DP_SEC_ATP_ENABLE, 0, + DP_SEC_AIP_ENABLE, 0, + DP_SEC_ACM_ENABLE, 0, + DP_SEC_STREAM_ENABLE, 0); + + /* This register shared with encoder info frame. Therefore we need to + * keep master enabled if at least on of the fields is not 0 + */ + value = REG_READ(DP_SEC_CNTL); + if (value != 0) + REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); + +} + +void enc1_se_audio_mute_control( + struct stream_encoder *enc, + bool mute) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, !mute); +} + +void enc1_se_dp_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info) +{ + enc1_se_audio_setup(enc, az_inst, info); +} + +void enc1_se_dp_audio_enable( + struct stream_encoder *enc) +{ + enc1_se_enable_audio_clock(enc, true); + enc1_se_setup_dp_audio(enc); + enc1_se_enable_dp_audio(enc); +} + +void enc1_se_dp_audio_disable( + struct stream_encoder *enc) +{ + enc1_se_disable_dp_audio(enc); + enc1_se_enable_audio_clock(enc, false); +} + +void enc1_se_hdmi_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info, + struct audio_crtc_info *audio_crtc_info) +{ + enc1_se_enable_audio_clock(enc, true); + enc1_se_setup_hdmi_audio(enc, audio_crtc_info); + enc1_se_audio_setup(enc, az_inst, info); +} + +void enc1_se_hdmi_audio_disable( + struct stream_encoder *enc) +{ + enc1_se_enable_audio_clock(enc, false); +} + + +void enc1_setup_stereo_sync( + struct stream_encoder *enc, + int tg_inst, bool enable) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, tg_inst); + REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, !enable); +} + + +static const struct stream_encoder_funcs dcn10_str_enc_funcs = { + .dp_set_stream_attribute = + enc1_stream_encoder_dp_set_stream_attribute, + .hdmi_set_stream_attribute = + enc1_stream_encoder_hdmi_set_stream_attribute, + .dvi_set_stream_attribute = + enc1_stream_encoder_dvi_set_stream_attribute, + .set_mst_bandwidth = + enc1_stream_encoder_set_mst_bandwidth, + .update_hdmi_info_packets = + enc1_stream_encoder_update_hdmi_info_packets, + .stop_hdmi_info_packets = + enc1_stream_encoder_stop_hdmi_info_packets, + .update_dp_info_packets = + enc1_stream_encoder_update_dp_info_packets, + .stop_dp_info_packets = + enc1_stream_encoder_stop_dp_info_packets, + .dp_blank = + enc1_stream_encoder_dp_blank, + .dp_unblank = + enc1_stream_encoder_dp_unblank, + .audio_mute_control = enc1_se_audio_mute_control, + + .dp_audio_setup = enc1_se_dp_audio_setup, + .dp_audio_enable = enc1_se_dp_audio_enable, + .dp_audio_disable = enc1_se_dp_audio_disable, + + .hdmi_audio_setup = enc1_se_hdmi_audio_setup, + .hdmi_audio_disable = enc1_se_hdmi_audio_disable, + .setup_stereo_sync = enc1_setup_stereo_sync, + .set_avmute = enc1_stream_encoder_set_avmute, +}; + +void dcn10_stream_encoder_construct( + struct dcn10_stream_encoder *enc1, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id, + const struct dcn10_stream_enc_registers *regs, + const struct dcn10_stream_encoder_shift *se_shift, + const struct dcn10_stream_encoder_mask *se_mask) +{ + enc1->base.funcs = &dcn10_str_enc_funcs; + enc1->base.ctx = ctx; + enc1->base.id = eng_id; + enc1->base.bp = bp; + enc1->regs = regs; + enc1->se_shift = se_shift; + enc1->se_mask = se_mask; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h new file mode 100644 index 000000000000..6b3e4ded155b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -0,0 +1,524 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DC_STREAM_ENCODER_DCN10_H__ +#define __DC_STREAM_ENCODER_DCN10_H__ + +#include "stream_encoder.h" + +#define DCN10STRENC_FROM_STRENC(stream_encoder)\ + container_of(stream_encoder, struct dcn10_stream_encoder, base) + +#define SE_COMMON_DCN_REG_LIST(id) \ + SRI(AFMT_CNTL, DIG, id), \ + SRI(AFMT_GENERIC_0, DIG, id), \ + SRI(AFMT_GENERIC_1, DIG, id), \ + SRI(AFMT_GENERIC_2, DIG, id), \ + SRI(AFMT_GENERIC_3, DIG, id), \ + SRI(AFMT_GENERIC_4, DIG, id), \ + SRI(AFMT_GENERIC_5, DIG, id), \ + SRI(AFMT_GENERIC_6, DIG, id), \ + SRI(AFMT_GENERIC_7, DIG, id), \ + SRI(AFMT_GENERIC_HDR, DIG, id), \ + SRI(AFMT_INFOFRAME_CONTROL0, DIG, id), \ + SRI(AFMT_VBI_PACKET_CONTROL, DIG, id), \ + SRI(AFMT_VBI_PACKET_CONTROL1, DIG, id), \ + SRI(AFMT_AUDIO_PACKET_CONTROL, DIG, id), \ + SRI(AFMT_AUDIO_PACKET_CONTROL2, DIG, id), \ + SRI(AFMT_AUDIO_SRC_CONTROL, DIG, id), \ + SRI(AFMT_60958_0, DIG, id), \ + SRI(AFMT_60958_1, DIG, id), \ + SRI(AFMT_60958_2, DIG, id), \ + SRI(DIG_FE_CNTL, DIG, id), \ + SRI(HDMI_CONTROL, DIG, id), \ + SRI(HDMI_DB_CONTROL, DIG, id), \ + SRI(HDMI_GC, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \ + SRI(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \ + SRI(HDMI_INFOFRAME_CONTROL0, DIG, id), \ + SRI(HDMI_INFOFRAME_CONTROL1, DIG, id), \ + SRI(HDMI_VBI_PACKET_CONTROL, DIG, id), \ + SRI(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\ + SRI(HDMI_ACR_PACKET_CONTROL, DIG, id),\ + SRI(HDMI_ACR_32_0, DIG, id),\ + SRI(HDMI_ACR_32_1, DIG, id),\ + SRI(HDMI_ACR_44_0, DIG, id),\ + SRI(HDMI_ACR_44_1, DIG, id),\ + SRI(HDMI_ACR_48_0, DIG, id),\ + SRI(HDMI_ACR_48_1, DIG, id),\ + SRI(DP_DB_CNTL, DP, id), \ + SRI(DP_MSA_MISC, DP, id), \ + SRI(DP_MSA_COLORIMETRY, DP, id), \ + SRI(DP_MSA_TIMING_PARAM1, DP, id), \ + SRI(DP_MSA_TIMING_PARAM2, DP, id), \ + SRI(DP_MSA_TIMING_PARAM3, DP, id), \ + SRI(DP_MSA_TIMING_PARAM4, DP, id), \ + SRI(DP_MSE_RATE_CNTL, DP, id), \ + SRI(DP_MSE_RATE_UPDATE, DP, id), \ + SRI(DP_PIXEL_FORMAT, DP, id), \ + SRI(DP_SEC_CNTL, DP, id), \ + SRI(DP_STEER_FIFO, DP, id), \ + SRI(DP_VID_M, DP, id), \ + SRI(DP_VID_N, DP, id), \ + SRI(DP_VID_STREAM_CNTL, DP, id), \ + SRI(DP_VID_TIMING, DP, id), \ + SRI(DP_SEC_AUD_N, DP, id), \ + SRI(DP_SEC_TIMESTAMP, DP, id) + +#define SE_DCN_REG_LIST(id)\ + SE_COMMON_DCN_REG_LIST(id) + + +struct dcn10_stream_enc_registers { + uint32_t AFMT_CNTL; + uint32_t AFMT_AVI_INFO0; + uint32_t AFMT_AVI_INFO1; + uint32_t AFMT_AVI_INFO2; + uint32_t AFMT_AVI_INFO3; + uint32_t AFMT_GENERIC_0; + uint32_t AFMT_GENERIC_1; + uint32_t AFMT_GENERIC_2; + uint32_t AFMT_GENERIC_3; + uint32_t AFMT_GENERIC_4; + uint32_t AFMT_GENERIC_5; + uint32_t AFMT_GENERIC_6; + uint32_t AFMT_GENERIC_7; + uint32_t AFMT_GENERIC_HDR; + uint32_t AFMT_INFOFRAME_CONTROL0; + uint32_t AFMT_VBI_PACKET_CONTROL; + uint32_t AFMT_VBI_PACKET_CONTROL1; + uint32_t AFMT_AUDIO_PACKET_CONTROL; + uint32_t AFMT_AUDIO_PACKET_CONTROL2; + uint32_t AFMT_AUDIO_SRC_CONTROL; + uint32_t AFMT_60958_0; + uint32_t AFMT_60958_1; + uint32_t AFMT_60958_2; + uint32_t DIG_FE_CNTL; + uint32_t DP_MSE_RATE_CNTL; + uint32_t DP_MSE_RATE_UPDATE; + uint32_t DP_PIXEL_FORMAT; + uint32_t DP_SEC_CNTL; + uint32_t DP_STEER_FIFO; + uint32_t DP_VID_M; + uint32_t DP_VID_N; + uint32_t DP_VID_STREAM_CNTL; + uint32_t DP_VID_TIMING; + uint32_t DP_SEC_AUD_N; + uint32_t DP_SEC_TIMESTAMP; + uint32_t HDMI_CONTROL; + uint32_t HDMI_GC; + uint32_t HDMI_GENERIC_PACKET_CONTROL0; + uint32_t HDMI_GENERIC_PACKET_CONTROL1; + uint32_t HDMI_GENERIC_PACKET_CONTROL2; + uint32_t HDMI_GENERIC_PACKET_CONTROL3; + uint32_t HDMI_GENERIC_PACKET_CONTROL4; + uint32_t HDMI_GENERIC_PACKET_CONTROL5; + uint32_t HDMI_INFOFRAME_CONTROL0; + uint32_t HDMI_INFOFRAME_CONTROL1; + uint32_t HDMI_VBI_PACKET_CONTROL; + uint32_t HDMI_AUDIO_PACKET_CONTROL; + uint32_t HDMI_ACR_PACKET_CONTROL; + uint32_t HDMI_ACR_32_0; + uint32_t HDMI_ACR_32_1; + uint32_t HDMI_ACR_44_0; + uint32_t HDMI_ACR_44_1; + uint32_t HDMI_ACR_48_0; + uint32_t HDMI_ACR_48_1; + uint32_t DP_DB_CNTL; + uint32_t DP_MSA_MISC; + uint32_t DP_MSA_COLORIMETRY; + uint32_t DP_MSA_TIMING_PARAM1; + uint32_t DP_MSA_TIMING_PARAM2; + uint32_t DP_MSA_TIMING_PARAM3; + uint32_t DP_MSA_TIMING_PARAM4; + uint32_t HDMI_DB_CONTROL; +}; + + +#define SE_SF(reg_name, field_name, post_fix)\ + .field_name = reg_name ## __ ## field_name ## post_fix + +#define SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB0, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB1, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB2, mask_sh),\ + SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB3, mask_sh),\ + SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\ + SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\ + SE_SF(DIG0_AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\ + SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\ + SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\ + SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\ + SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\ + SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\ + SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\ + SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_START, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, mask_sh),\ + SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_LAYOUT_OVRD, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_60958_OSF_OVRD, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\ + SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\ + SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, mask_sh),\ + SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, mask_sh),\ + SE_SF(DIG0_AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, mask_sh),\ + SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, mask_sh),\ + SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\ + SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\ + SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh),\ + SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\ + SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, TMDS_COLOR_FORMAT, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\ + SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\ + SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\ + SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\ + SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\ + SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\ + SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\ + SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\ + SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh) + +#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\ + SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh) + +#define SE_COMMON_MASK_SH_LIST_DCN10(mask_sh)\ + SE_COMMON_MASK_SH_LIST_SOC(mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_LINE, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\ + SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_LINE, mask_sh) + + +#define SE_REG_FIELD_LIST_DCN1_0(type) \ + type AFMT_GENERIC_INDEX;\ + type AFMT_GENERIC_HB0;\ + type AFMT_GENERIC_HB1;\ + type AFMT_GENERIC_HB2;\ + type AFMT_GENERIC_HB3;\ + type AFMT_GENERIC_LOCK_STATUS;\ + type AFMT_GENERIC_CONFLICT;\ + type AFMT_GENERIC_CONFLICT_CLR;\ + type AFMT_GENERIC0_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC1_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\ + type AFMT_GENERIC0_FRAME_UPDATE;\ + type AFMT_GENERIC1_FRAME_UPDATE;\ + type AFMT_GENERIC2_FRAME_UPDATE;\ + type AFMT_GENERIC3_FRAME_UPDATE;\ + type AFMT_GENERIC4_FRAME_UPDATE;\ + type AFMT_GENERIC5_FRAME_UPDATE;\ + type AFMT_GENERIC6_FRAME_UPDATE;\ + type AFMT_GENERIC7_FRAME_UPDATE;\ + type HDMI_GENERIC0_CONT;\ + type HDMI_GENERIC0_SEND;\ + type HDMI_GENERIC0_LINE;\ + type HDMI_GENERIC1_CONT;\ + type HDMI_GENERIC1_SEND;\ + type HDMI_GENERIC1_LINE;\ + type HDMI_GENERIC2_CONT;\ + type HDMI_GENERIC2_SEND;\ + type HDMI_GENERIC2_LINE;\ + type HDMI_GENERIC3_CONT;\ + type HDMI_GENERIC3_SEND;\ + type HDMI_GENERIC3_LINE;\ + type HDMI_GENERIC4_CONT;\ + type HDMI_GENERIC4_SEND;\ + type HDMI_GENERIC4_LINE;\ + type HDMI_GENERIC5_CONT;\ + type HDMI_GENERIC5_SEND;\ + type HDMI_GENERIC5_LINE;\ + type HDMI_GENERIC6_CONT;\ + type HDMI_GENERIC6_SEND;\ + type HDMI_GENERIC6_LINE;\ + type HDMI_GENERIC7_CONT;\ + type HDMI_GENERIC7_SEND;\ + type HDMI_GENERIC7_LINE;\ + type DP_PIXEL_ENCODING;\ + type DP_COMPONENT_DEPTH;\ + type HDMI_PACKET_GEN_VERSION;\ + type HDMI_KEEPOUT_MODE;\ + type HDMI_DEEP_COLOR_ENABLE;\ + type HDMI_CLOCK_CHANNEL_RATE;\ + type HDMI_DEEP_COLOR_DEPTH;\ + type HDMI_GC_CONT;\ + type HDMI_GC_SEND;\ + type HDMI_NULL_SEND;\ + type HDMI_DATA_SCRAMBLE_EN;\ + type HDMI_AUDIO_INFO_SEND;\ + type AFMT_AUDIO_INFO_UPDATE;\ + type HDMI_AUDIO_INFO_LINE;\ + type HDMI_GC_AVMUTE;\ + type DP_MSE_RATE_X;\ + type DP_MSE_RATE_Y;\ + type DP_MSE_RATE_UPDATE_PENDING;\ + type DP_SEC_GSP0_ENABLE;\ + type DP_SEC_STREAM_ENABLE;\ + type DP_SEC_GSP1_ENABLE;\ + type DP_SEC_GSP2_ENABLE;\ + type DP_SEC_GSP3_ENABLE;\ + type DP_SEC_GSP4_ENABLE;\ + type DP_SEC_GSP5_ENABLE;\ + type DP_SEC_GSP6_ENABLE;\ + type DP_SEC_GSP7_ENABLE;\ + type DP_SEC_MPG_ENABLE;\ + type DP_VID_STREAM_DIS_DEFER;\ + type DP_VID_STREAM_ENABLE;\ + type DP_VID_STREAM_STATUS;\ + type DP_STEER_FIFO_RESET;\ + type DP_VID_M_N_GEN_EN;\ + type DP_VID_N;\ + type DP_VID_M;\ + type DIG_START;\ + type AFMT_AUDIO_SRC_SELECT;\ + type AFMT_AUDIO_CHANNEL_ENABLE;\ + type HDMI_AUDIO_PACKETS_PER_LINE;\ + type HDMI_AUDIO_DELAY_EN;\ + type AFMT_60958_CS_UPDATE;\ + type AFMT_AUDIO_LAYOUT_OVRD;\ + type AFMT_60958_OSF_OVRD;\ + type HDMI_ACR_AUTO_SEND;\ + type HDMI_ACR_SOURCE;\ + type HDMI_ACR_AUDIO_PRIORITY;\ + type HDMI_ACR_CTS_32;\ + type HDMI_ACR_N_32;\ + type HDMI_ACR_CTS_44;\ + type HDMI_ACR_N_44;\ + type HDMI_ACR_CTS_48;\ + type HDMI_ACR_N_48;\ + type AFMT_60958_CS_CHANNEL_NUMBER_L;\ + type AFMT_60958_CS_CLOCK_ACCURACY;\ + type AFMT_60958_CS_CHANNEL_NUMBER_R;\ + type AFMT_60958_CS_CHANNEL_NUMBER_2;\ + type AFMT_60958_CS_CHANNEL_NUMBER_3;\ + type AFMT_60958_CS_CHANNEL_NUMBER_4;\ + type AFMT_60958_CS_CHANNEL_NUMBER_5;\ + type AFMT_60958_CS_CHANNEL_NUMBER_6;\ + type AFMT_60958_CS_CHANNEL_NUMBER_7;\ + type DP_SEC_AUD_N;\ + type DP_SEC_TIMESTAMP_MODE;\ + type DP_SEC_ASP_ENABLE;\ + type DP_SEC_ATP_ENABLE;\ + type DP_SEC_AIP_ENABLE;\ + type DP_SEC_ACM_ENABLE;\ + type AFMT_AUDIO_SAMPLE_SEND;\ + type AFMT_AUDIO_CLOCK_EN;\ + type TMDS_PIXEL_ENCODING;\ + type TMDS_COLOR_FORMAT;\ + type DIG_STEREOSYNC_SELECT;\ + type DIG_STEREOSYNC_GATE_EN;\ + type DP_DB_DISABLE;\ + type DP_MSA_MISC0;\ + type DP_MSA_HTOTAL;\ + type DP_MSA_VTOTAL;\ + type DP_MSA_HSTART;\ + type DP_MSA_VSTART;\ + type DP_MSA_HSYNCWIDTH;\ + type DP_MSA_HSYNCPOLARITY;\ + type DP_MSA_VSYNCWIDTH;\ + type DP_MSA_VSYNCPOLARITY;\ + type DP_MSA_HWIDTH;\ + type DP_MSA_VHEIGHT;\ + type HDMI_DB_DISABLE;\ + type DP_VID_N_MUL;\ + type DP_VID_M_DOUBLE_VALUE_EN + +struct dcn10_stream_encoder_shift { + SE_REG_FIELD_LIST_DCN1_0(uint8_t); +}; + +struct dcn10_stream_encoder_mask { + SE_REG_FIELD_LIST_DCN1_0(uint32_t); +}; + +struct dcn10_stream_encoder { + struct stream_encoder base; + const struct dcn10_stream_enc_registers *regs; + const struct dcn10_stream_encoder_shift *se_shift; + const struct dcn10_stream_encoder_mask *se_mask; +}; + +void dcn10_stream_encoder_construct( + struct dcn10_stream_encoder *enc1, + struct dc_context *ctx, + struct dc_bios *bp, + enum engine_id eng_id, + const struct dcn10_stream_enc_registers *regs, + const struct dcn10_stream_encoder_shift *se_shift, + const struct dcn10_stream_encoder_mask *se_mask); + +void enc1_update_generic_info_packet( + struct dcn10_stream_encoder *enc1, + uint32_t packet_index, + const struct dc_info_packet *info_packet); + +void enc1_stream_encoder_dp_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + enum dc_color_space output_color_space); + +void enc1_stream_encoder_hdmi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + int actual_pix_clk_khz, + bool enable_audio); + +void enc1_stream_encoder_dvi_set_stream_attribute( + struct stream_encoder *enc, + struct dc_crtc_timing *crtc_timing, + bool is_dual_link); + +void enc1_stream_encoder_set_mst_bandwidth( + struct stream_encoder *enc, + struct fixed31_32 avg_time_slots_per_mtp); + +void enc1_stream_encoder_update_dp_info_packets( + struct stream_encoder *enc, + const struct encoder_info_frame *info_frame); + +void enc1_stream_encoder_stop_dp_info_packets( + struct stream_encoder *enc); + +void enc1_stream_encoder_dp_blank( + struct stream_encoder *enc); + +void enc1_stream_encoder_dp_unblank( + struct stream_encoder *enc, + const struct encoder_unblank_param *param); + +void enc1_setup_stereo_sync( + struct stream_encoder *enc, + int tg_inst, bool enable); + +void enc1_stream_encoder_set_avmute( + struct stream_encoder *enc, + bool enable); + +void enc1_se_audio_mute_control( + struct stream_encoder *enc, + bool mute); + +void enc1_se_dp_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info); + +void enc1_se_dp_audio_enable( + struct stream_encoder *enc); + +void enc1_se_dp_audio_disable( + struct stream_encoder *enc); + +void enc1_se_hdmi_audio_setup( + struct stream_encoder *enc, + unsigned int az_inst, + struct audio_info *info, + struct audio_crtc_info *audio_crtc_info); + +void enc1_se_hdmi_audio_disable( + struct stream_encoder *enc); + +#endif /* __DC_STREAM_ENCODER_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 22e7ee7dcd26..4ff9b2bba178 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -341,6 +341,10 @@ bool dm_dmcu_set_pipe(struct dc_context *ctx, unsigned int controller_id); unsigned long long dm_get_timestamp(struct dc_context *ctx); +unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, + unsigned long long current_time_stamp, + unsigned long long last_time_stamp); + /* * performance tracing */ @@ -351,10 +355,6 @@ void dm_perf_trace_timestamp(const char *func_name, unsigned int line); /* * Debug and verification hooks */ -bool dm_helpers_dc_conn_log( - struct dc_context *ctx, - struct log_entry *entry, - enum dc_log_type event); void dm_dtn_log_begin(struct dc_context *ctx); void dm_dtn_log_append_v(struct dc_context *ctx, const char *msg, ...); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index b1ad3553f900..47c19f8fe7d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -108,4 +108,17 @@ enum output_standard { dm_std_uninitialized = 0, dm_std_cvtr2, dm_std_cvt }; +enum mpc_combine_affinity { + dm_mpc_always_when_possible, + dm_mpc_reduce_voltage, + dm_mpc_reduce_voltage_and_clocks +}; + +enum self_refresh_affinity { + dm_try_to_allow_self_refresh_and_mclk_switch, + dm_allow_self_refresh_and_mclk_switch, + dm_allow_self_refresh, + dm_neither_self_refresh_nor_mclk_switch +}; + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index c109b2c34c8f..fd9d97aab071 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -26,75 +26,89 @@ #include "display_mode_lib.h" #include "dc_features.h" +static const struct _vcs_dpi_ip_params_st dcn1_0_ip = { + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = 164, + .dpte_buffer_size_in_pte_reqs = 42, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .pte_enable = 1, + .pte_chunk_size_kbytes = 2, + .meta_chunk_size_kbytes = 2, + .writeback_chunk_size_kbytes = 2, + .line_buffer_size_bits = 589824, + .max_line_buffer_lines = 12, + .IsLineBufferBppFixed = 0, + .LineBufferFixedBpp = -1, + .writeback_luma_buffer_size_kbytes = 12, + .writeback_chroma_buffer_size_kbytes = 8, + .max_num_dpp = 4, + .max_num_wb = 2, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 4, + .max_vscl_ratio = 4, + .hscl_mults = 4, + .vscl_mults = 4, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dispclk_ramp_margin_percent = 1, + .underscan_factor = 1.10, + .min_vblank_lines = 14, + .dppclk_delay_subtotal = 90, + .dispclk_delay_subtotal = 42, + .dcfclk_cstate_latency = 10, + .max_inter_dcn_tile_repeaters = 8, + .can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0, + .bug_forcing_LC_req_same_size_fixed = 0, +}; + +static const struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = { + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .urgent_latency_us = 4.0, + .writeback_latency_us = 12.0, + .ideal_dram_bw_after_urgent_percent = 80.0, + .max_request_size_bytes = 256, + .downspread_percent = 0.5, + .dram_page_open_time_ns = 50.0, + .dram_rw_turnaround_time_ns = 17.5, + .dram_return_buffer_per_channel_bytes = 8192, + .round_trip_ping_latency_dcfclk_cycles = 128, + .urgent_out_of_order_return_per_channel_bytes = 256, + .channel_interleave_bytes = 256, + .num_banks = 8, + .num_chans = 2, + .vmm_page_size_bytes = 4096, + .dram_clock_change_latency_us = 17.0, + .writeback_dram_clock_change_latency_us = 23.0, + .return_bus_width_bytes = 64, +}; + static void set_soc_bounding_box(struct _vcs_dpi_soc_bounding_box_st *soc, enum dml_project project) { - if (project == DML_PROJECT_RAVEN1) { - soc->sr_exit_time_us = 9.0; - soc->sr_enter_plus_exit_time_us = 11.0; - soc->urgent_latency_us = 4.0; - soc->writeback_latency_us = 12.0; - soc->ideal_dram_bw_after_urgent_percent = 80.0; - soc->max_request_size_bytes = 256; - soc->downspread_percent = 0.5; - soc->dram_page_open_time_ns = 50.0; - soc->dram_rw_turnaround_time_ns = 17.5; - soc->dram_return_buffer_per_channel_bytes = 8192; - soc->round_trip_ping_latency_dcfclk_cycles = 128; - soc->urgent_out_of_order_return_per_channel_bytes = 256; - soc->channel_interleave_bytes = 256; - soc->num_banks = 8; - soc->num_chans = 2; - soc->vmm_page_size_bytes = 4096; - soc->dram_clock_change_latency_us = 17.0; - soc->writeback_dram_clock_change_latency_us = 23.0; - soc->return_bus_width_bytes = 64; - } else { - BREAK_TO_DEBUGGER(); /* Invalid Project Specified */ + switch (project) { + case DML_PROJECT_RAVEN1: + *soc = dcn1_0_soc; + break; + default: + ASSERT(0); + break; } } static void set_ip_params(struct _vcs_dpi_ip_params_st *ip, enum dml_project project) { - if (project == DML_PROJECT_RAVEN1) { - ip->rob_buffer_size_kbytes = 64; - ip->det_buffer_size_kbytes = 164; - ip->dpte_buffer_size_in_pte_reqs = 42; - ip->dpp_output_buffer_pixels = 2560; - ip->opp_output_buffer_lines = 1; - ip->pixel_chunk_size_kbytes = 8; - ip->pte_enable = 1; - ip->pte_chunk_size_kbytes = 2; - ip->meta_chunk_size_kbytes = 2; - ip->writeback_chunk_size_kbytes = 2; - ip->line_buffer_size_bits = 589824; - ip->max_line_buffer_lines = 12; - ip->IsLineBufferBppFixed = 0; - ip->LineBufferFixedBpp = -1; - ip->writeback_luma_buffer_size_kbytes = 12; - ip->writeback_chroma_buffer_size_kbytes = 8; - ip->max_num_dpp = 4; - ip->max_num_wb = 2; - ip->max_dchub_pscl_bw_pix_per_clk = 4; - ip->max_pscl_lb_bw_pix_per_clk = 2; - ip->max_lb_vscl_bw_pix_per_clk = 4; - ip->max_vscl_hscl_bw_pix_per_clk = 4; - ip->max_hscl_ratio = 4; - ip->max_vscl_ratio = 4; - ip->hscl_mults = 4; - ip->vscl_mults = 4; - ip->max_hscl_taps = 8; - ip->max_vscl_taps = 8; - ip->dispclk_ramp_margin_percent = 1; - ip->underscan_factor = 1.10; - ip->min_vblank_lines = 14; - ip->dppclk_delay_subtotal = 90; - ip->dispclk_delay_subtotal = 42; - ip->dcfclk_cstate_latency = 10; - ip->max_inter_dcn_tile_repeaters = 8; - ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0; - ip->bug_forcing_LC_req_same_size_fixed = 0; - } else { - BREAK_TO_DEBUGGER(); /* Invalid Project Specified */ + switch (project) { + case DML_PROJECT_RAVEN1: + *ip = dcn1_0_ip; + break; + default: + ASSERT(0); + break; } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 09affa16cc43..7fa0375939ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -25,39 +25,39 @@ #ifndef __DISPLAY_MODE_STRUCTS_H__ #define __DISPLAY_MODE_STRUCTS_H__ -typedef struct _vcs_dpi_voltage_scaling_st voltage_scaling_st; -typedef struct _vcs_dpi_soc_bounding_box_st soc_bounding_box_st; -typedef struct _vcs_dpi_ip_params_st ip_params_st; -typedef struct _vcs_dpi_display_pipe_source_params_st display_pipe_source_params_st; -typedef struct _vcs_dpi_display_output_params_st display_output_params_st; -typedef struct _vcs_dpi_display_bandwidth_st display_bandwidth_st; -typedef struct _vcs_dpi_scaler_ratio_depth_st scaler_ratio_depth_st; -typedef struct _vcs_dpi_scaler_taps_st scaler_taps_st; -typedef struct _vcs_dpi_display_pipe_dest_params_st display_pipe_dest_params_st; -typedef struct _vcs_dpi_display_pipe_params_st display_pipe_params_st; -typedef struct _vcs_dpi_display_clocks_and_cfg_st display_clocks_and_cfg_st; -typedef struct _vcs_dpi_display_e2e_pipe_params_st display_e2e_pipe_params_st; -typedef struct _vcs_dpi_dchub_buffer_sizing_st dchub_buffer_sizing_st; -typedef struct _vcs_dpi_watermarks_perf_st watermarks_perf_st; -typedef struct _vcs_dpi_cstate_pstate_watermarks_st cstate_pstate_watermarks_st; -typedef struct _vcs_dpi_wm_calc_pipe_params_st wm_calc_pipe_params_st; -typedef struct _vcs_dpi_vratio_pre_st vratio_pre_st; -typedef struct _vcs_dpi_display_data_rq_misc_params_st display_data_rq_misc_params_st; -typedef struct _vcs_dpi_display_data_rq_sizing_params_st display_data_rq_sizing_params_st; -typedef struct _vcs_dpi_display_data_rq_dlg_params_st display_data_rq_dlg_params_st; -typedef struct _vcs_dpi_display_cur_rq_dlg_params_st display_cur_rq_dlg_params_st; -typedef struct _vcs_dpi_display_rq_dlg_params_st display_rq_dlg_params_st; -typedef struct _vcs_dpi_display_rq_sizing_params_st display_rq_sizing_params_st; -typedef struct _vcs_dpi_display_rq_misc_params_st display_rq_misc_params_st; -typedef struct _vcs_dpi_display_rq_params_st display_rq_params_st; -typedef struct _vcs_dpi_display_dlg_regs_st display_dlg_regs_st; -typedef struct _vcs_dpi_display_ttu_regs_st display_ttu_regs_st; -typedef struct _vcs_dpi_display_data_rq_regs_st display_data_rq_regs_st; -typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st; -typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st; -typedef struct _vcs_dpi_display_dlg_prefetch_param_st display_dlg_prefetch_param_st; -typedef struct _vcs_dpi_display_pipe_clock_st display_pipe_clock_st; -typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st; +typedef struct _vcs_dpi_voltage_scaling_st voltage_scaling_st; +typedef struct _vcs_dpi_soc_bounding_box_st soc_bounding_box_st; +typedef struct _vcs_dpi_ip_params_st ip_params_st; +typedef struct _vcs_dpi_display_pipe_source_params_st display_pipe_source_params_st; +typedef struct _vcs_dpi_display_output_params_st display_output_params_st; +typedef struct _vcs_dpi_display_bandwidth_st display_bandwidth_st; +typedef struct _vcs_dpi_scaler_ratio_depth_st scaler_ratio_depth_st; +typedef struct _vcs_dpi_scaler_taps_st scaler_taps_st; +typedef struct _vcs_dpi_display_pipe_dest_params_st display_pipe_dest_params_st; +typedef struct _vcs_dpi_display_pipe_params_st display_pipe_params_st; +typedef struct _vcs_dpi_display_clocks_and_cfg_st display_clocks_and_cfg_st; +typedef struct _vcs_dpi_display_e2e_pipe_params_st display_e2e_pipe_params_st; +typedef struct _vcs_dpi_dchub_buffer_sizing_st dchub_buffer_sizing_st; +typedef struct _vcs_dpi_watermarks_perf_st watermarks_perf_st; +typedef struct _vcs_dpi_cstate_pstate_watermarks_st cstate_pstate_watermarks_st; +typedef struct _vcs_dpi_wm_calc_pipe_params_st wm_calc_pipe_params_st; +typedef struct _vcs_dpi_vratio_pre_st vratio_pre_st; +typedef struct _vcs_dpi_display_data_rq_misc_params_st display_data_rq_misc_params_st; +typedef struct _vcs_dpi_display_data_rq_sizing_params_st display_data_rq_sizing_params_st; +typedef struct _vcs_dpi_display_data_rq_dlg_params_st display_data_rq_dlg_params_st; +typedef struct _vcs_dpi_display_cur_rq_dlg_params_st display_cur_rq_dlg_params_st; +typedef struct _vcs_dpi_display_rq_dlg_params_st display_rq_dlg_params_st; +typedef struct _vcs_dpi_display_rq_sizing_params_st display_rq_sizing_params_st; +typedef struct _vcs_dpi_display_rq_misc_params_st display_rq_misc_params_st; +typedef struct _vcs_dpi_display_rq_params_st display_rq_params_st; +typedef struct _vcs_dpi_display_dlg_regs_st display_dlg_regs_st; +typedef struct _vcs_dpi_display_ttu_regs_st display_ttu_regs_st; +typedef struct _vcs_dpi_display_data_rq_regs_st display_data_rq_regs_st; +typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st; +typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st; +typedef struct _vcs_dpi_display_dlg_prefetch_param_st display_dlg_prefetch_param_st; +typedef struct _vcs_dpi_display_pipe_clock_st display_pipe_clock_st; +typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st; struct _vcs_dpi_voltage_scaling_st { int state; @@ -72,89 +72,107 @@ struct _vcs_dpi_voltage_scaling_st { double dppclk_mhz; }; -struct _vcs_dpi_soc_bounding_box_st { - double sr_exit_time_us; - double sr_enter_plus_exit_time_us; - double urgent_latency_us; - double writeback_latency_us; - double ideal_dram_bw_after_urgent_percent; - unsigned int max_request_size_bytes; - double downspread_percent; - double dram_page_open_time_ns; - double dram_rw_turnaround_time_ns; - double dram_return_buffer_per_channel_bytes; - double dram_channel_width_bytes; +struct _vcs_dpi_soc_bounding_box_st { + double sr_exit_time_us; + double sr_enter_plus_exit_time_us; + double urgent_latency_us; + double urgent_latency_pixel_data_only_us; + double urgent_latency_pixel_mixed_with_vm_data_us; + double urgent_latency_vm_data_only_us; + double writeback_latency_us; + double ideal_dram_bw_after_urgent_percent; + double pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly + double pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm; + double pct_ideal_dram_sdp_bw_after_urgent_vm_only; + double max_avg_sdp_bw_use_normal_percent; + double max_avg_dram_bw_use_normal_percent; + unsigned int max_request_size_bytes; + double downspread_percent; + double dram_page_open_time_ns; + double dram_rw_turnaround_time_ns; + double dram_return_buffer_per_channel_bytes; + double dram_channel_width_bytes; double fabric_datapath_to_dcn_data_return_bytes; double dcn_downspread_percent; double dispclk_dppclk_vco_speed_mhz; double dfs_vco_period_ps; - unsigned int round_trip_ping_latency_dcfclk_cycles; - unsigned int urgent_out_of_order_return_per_channel_bytes; - unsigned int channel_interleave_bytes; - unsigned int num_banks; - unsigned int num_chans; - unsigned int vmm_page_size_bytes; - double dram_clock_change_latency_us; - double writeback_dram_clock_change_latency_us; - unsigned int return_bus_width_bytes; - unsigned int voltage_override; - double xfc_bus_transport_time_us; - double xfc_xbuf_latency_tolerance_us; + unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes; + unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; + unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes; + unsigned int round_trip_ping_latency_dcfclk_cycles; + unsigned int urgent_out_of_order_return_per_channel_bytes; + unsigned int channel_interleave_bytes; + unsigned int num_banks; + unsigned int num_chans; + unsigned int vmm_page_size_bytes; + double dram_clock_change_latency_us; + double writeback_dram_clock_change_latency_us; + unsigned int return_bus_width_bytes; + unsigned int voltage_override; + double xfc_bus_transport_time_us; + double xfc_xbuf_latency_tolerance_us; + int use_urgent_burst_bw; struct _vcs_dpi_voltage_scaling_st clock_limits[7]; }; -struct _vcs_dpi_ip_params_st { - unsigned int max_inter_dcn_tile_repeaters; - unsigned int num_dsc; - unsigned int odm_capable; - unsigned int rob_buffer_size_kbytes; - unsigned int det_buffer_size_kbytes; - unsigned int dpte_buffer_size_in_pte_reqs; - unsigned int pde_proc_buffer_size_64k_reqs; - unsigned int dpp_output_buffer_pixels; - unsigned int opp_output_buffer_lines; - unsigned int pixel_chunk_size_kbytes; - unsigned char pte_enable; - unsigned int pte_chunk_size_kbytes; - unsigned int meta_chunk_size_kbytes; - unsigned int writeback_chunk_size_kbytes; - unsigned int line_buffer_size_bits; - unsigned int max_line_buffer_lines; - unsigned int writeback_luma_buffer_size_kbytes; - unsigned int writeback_chroma_buffer_size_kbytes; - unsigned int writeback_chroma_line_buffer_width_pixels; - unsigned int max_page_table_levels; - unsigned int max_num_dpp; - unsigned int max_num_otg; - unsigned int cursor_chunk_size; - unsigned int cursor_buffer_size; - unsigned int max_num_wb; - unsigned int max_dchub_pscl_bw_pix_per_clk; - unsigned int max_pscl_lb_bw_pix_per_clk; - unsigned int max_lb_vscl_bw_pix_per_clk; - unsigned int max_vscl_hscl_bw_pix_per_clk; - double max_hscl_ratio; - double max_vscl_ratio; - unsigned int hscl_mults; - unsigned int vscl_mults; - unsigned int max_hscl_taps; - unsigned int max_vscl_taps; - unsigned int xfc_supported; - unsigned int xfc_fill_constant_bytes; - double dispclk_ramp_margin_percent; - double xfc_fill_bw_overhead_percent; - double underscan_factor; - unsigned int min_vblank_lines; - unsigned int dppclk_delay_subtotal; - unsigned int dispclk_delay_subtotal; - unsigned int dcfclk_cstate_latency; - unsigned int dppclk_delay_scl; - unsigned int dppclk_delay_scl_lb_only; - unsigned int dppclk_delay_cnvc_formatter; - unsigned int dppclk_delay_cnvc_cursor; - unsigned int is_line_buffer_bpp_fixed; - unsigned int line_buffer_fixed_bpp; - unsigned int dcc_supported; +struct _vcs_dpi_ip_params_st { + bool gpuvm_enable; + bool hostvm_enable; + unsigned int gpuvm_max_page_table_levels; + unsigned int hostvm_max_page_table_levels; + unsigned int hostvm_cached_page_table_levels; + unsigned int pte_group_size_bytes; + unsigned int max_inter_dcn_tile_repeaters; + unsigned int num_dsc; + unsigned int odm_capable; + unsigned int rob_buffer_size_kbytes; + unsigned int det_buffer_size_kbytes; + unsigned int dpte_buffer_size_in_pte_reqs; + unsigned int pde_proc_buffer_size_64k_reqs; + unsigned int dpp_output_buffer_pixels; + unsigned int opp_output_buffer_lines; + unsigned int pixel_chunk_size_kbytes; + unsigned char pte_enable; + unsigned int pte_chunk_size_kbytes; + unsigned int meta_chunk_size_kbytes; + unsigned int writeback_chunk_size_kbytes; + unsigned int line_buffer_size_bits; + unsigned int max_line_buffer_lines; + unsigned int writeback_luma_buffer_size_kbytes; + unsigned int writeback_chroma_buffer_size_kbytes; + unsigned int writeback_chroma_line_buffer_width_pixels; + unsigned int max_page_table_levels; + unsigned int max_num_dpp; + unsigned int max_num_otg; + unsigned int cursor_chunk_size; + unsigned int cursor_buffer_size; + unsigned int max_num_wb; + unsigned int max_dchub_pscl_bw_pix_per_clk; + unsigned int max_pscl_lb_bw_pix_per_clk; + unsigned int max_lb_vscl_bw_pix_per_clk; + unsigned int max_vscl_hscl_bw_pix_per_clk; + double max_hscl_ratio; + double max_vscl_ratio; + unsigned int hscl_mults; + unsigned int vscl_mults; + unsigned int max_hscl_taps; + unsigned int max_vscl_taps; + unsigned int xfc_supported; + unsigned int xfc_fill_constant_bytes; + double dispclk_ramp_margin_percent; + double xfc_fill_bw_overhead_percent; + double underscan_factor; + unsigned int min_vblank_lines; + unsigned int dppclk_delay_subtotal; + unsigned int dispclk_delay_subtotal; + unsigned int dcfclk_cstate_latency; + unsigned int dppclk_delay_scl; + unsigned int dppclk_delay_scl_lb_only; + unsigned int dppclk_delay_cnvc_formatter; + unsigned int dppclk_delay_cnvc_cursor; + unsigned int is_line_buffer_bpp_fixed; + unsigned int line_buffer_fixed_bpp; + unsigned int dcc_supported; unsigned int IsLineBufferBppFixed; unsigned int LineBufferFixedBpp; @@ -169,41 +187,45 @@ struct _vcs_dpi_display_xfc_params_st { int xfc_slv_chunk_size_bytes; }; -struct _vcs_dpi_display_pipe_source_params_st { - int source_format; - unsigned char dcc; - unsigned int dcc_override; - unsigned int dcc_rate; - unsigned char dcc_use_global; - unsigned char vm; - unsigned char vm_levels_force_en; - unsigned int vm_levels_force; - int source_scan; - int sw_mode; - int macro_tile_size; - unsigned char is_display_sw; - unsigned int viewport_width; - unsigned int viewport_height; - unsigned int viewport_y_y; - unsigned int viewport_y_c; - unsigned int viewport_width_c; - unsigned int viewport_height_c; - unsigned int data_pitch; - unsigned int data_pitch_c; - unsigned int meta_pitch; - unsigned int meta_pitch_c; - unsigned int cur0_src_width; - int cur0_bpp; - unsigned int cur1_src_width; - int cur1_bpp; - int num_cursors; - unsigned char is_hsplit; - unsigned char dynamic_metadata_enable; - unsigned int dynamic_metadata_lines_before_active; - unsigned int dynamic_metadata_xmit_bytes; - unsigned int hsplit_grp; - unsigned char xfc_enable; - unsigned char xfc_slave; +struct _vcs_dpi_display_pipe_source_params_st { + int source_format; + unsigned char dcc; + unsigned int dcc_override; + unsigned int dcc_rate; + unsigned char dcc_use_global; + unsigned char vm; + bool gpuvm; // gpuvm enabled + bool hostvm; // hostvm enabled + bool gpuvm_levels_force_en; + unsigned int gpuvm_levels_force; + bool hostvm_levels_force_en; + unsigned int hostvm_levels_force; + int source_scan; + int sw_mode; + int macro_tile_size; + unsigned char is_display_sw; + unsigned int viewport_width; + unsigned int viewport_height; + unsigned int viewport_y_y; + unsigned int viewport_y_c; + unsigned int viewport_width_c; + unsigned int viewport_height_c; + unsigned int data_pitch; + unsigned int data_pitch_c; + unsigned int meta_pitch; + unsigned int meta_pitch_c; + unsigned int cur0_src_width; + int cur0_bpp; + unsigned int cur1_src_width; + int cur1_bpp; + int num_cursors; + unsigned char is_hsplit; + unsigned char dynamic_metadata_enable; + unsigned int dynamic_metadata_lines_before_active; + unsigned int dynamic_metadata_xmit_bytes; + unsigned int hsplit_grp; + unsigned char xfc_enable; + unsigned char xfc_slave; struct _vcs_dpi_display_xfc_params_st xfc_params; }; struct writeback_st { @@ -215,338 +237,339 @@ struct writeback_st { int wb_vtaps_luma; int wb_htaps_chroma; int wb_vtaps_chroma; - int wb_hratio; - int wb_vratio; -}; - -struct _vcs_dpi_display_output_params_st { - int dp_lanes; - int output_bpp; - int dsc_enable; - int wb_enable; - int opp_input_bpc; - int output_type; - int output_format; - int output_standard; - int dsc_slices; + double wb_hratio; + double wb_vratio; +}; + +struct _vcs_dpi_display_output_params_st { + int dp_lanes; + int output_bpp; + int dsc_enable; + int wb_enable; + int num_active_wb; + int output_bpc; + int output_type; + int output_format; + int output_standard; + int dsc_slices; struct writeback_st wb; }; -struct _vcs_dpi_display_bandwidth_st { - double total_bw_consumed_gbps; - double guaranteed_urgent_return_bw_gbps; -}; - -struct _vcs_dpi_scaler_ratio_depth_st { - double hscl_ratio; - double vscl_ratio; - double hscl_ratio_c; - double vscl_ratio_c; - double vinit; - double vinit_c; - double vinit_bot; - double vinit_bot_c; - int lb_depth; - int scl_enable; -}; - -struct _vcs_dpi_scaler_taps_st { - unsigned int htaps; - unsigned int vtaps; - unsigned int htaps_c; - unsigned int vtaps_c; -}; - -struct _vcs_dpi_display_pipe_dest_params_st { - unsigned int recout_width; - unsigned int recout_height; - unsigned int full_recout_width; - unsigned int full_recout_height; - unsigned int hblank_start; - unsigned int hblank_end; - unsigned int vblank_start; - unsigned int vblank_end; - unsigned int htotal; - unsigned int vtotal; - unsigned int vactive; - unsigned int hactive; - unsigned int vstartup_start; - unsigned int vupdate_offset; - unsigned int vupdate_width; - unsigned int vready_offset; - unsigned char interlaced; - unsigned char underscan; - double pixel_rate_mhz; - unsigned char synchronized_vblank_all_planes; - unsigned char otg_inst; - unsigned char odm_split_cnt; - unsigned char odm_combine; -}; - -struct _vcs_dpi_display_pipe_params_st { - display_pipe_source_params_st src; - display_pipe_dest_params_st dest; - scaler_ratio_depth_st scale_ratio_depth; - scaler_taps_st scale_taps; -}; - -struct _vcs_dpi_display_clocks_and_cfg_st { - int voltage; - double dppclk_mhz; - double refclk_mhz; - double dispclk_mhz; - double dcfclk_mhz; - double socclk_mhz; -}; - -struct _vcs_dpi_display_e2e_pipe_params_st { - display_pipe_params_st pipe; - display_output_params_st dout; - display_clocks_and_cfg_st clks_cfg; -}; - -struct _vcs_dpi_dchub_buffer_sizing_st { - unsigned int swath_width_y; - unsigned int swath_height_y; - unsigned int swath_height_c; - unsigned int detail_buffer_size_y; -}; - -struct _vcs_dpi_watermarks_perf_st { - double stutter_eff_in_active_region_percent; - double urgent_latency_supported_us; - double non_urgent_latency_supported_us; - double dram_clock_change_margin_us; - double dram_access_eff_percent; -}; - -struct _vcs_dpi_cstate_pstate_watermarks_st { - double cstate_exit_us; - double cstate_enter_plus_exit_us; - double pstate_change_us; -}; - -struct _vcs_dpi_wm_calc_pipe_params_st { - unsigned int num_dpp; - int voltage; - int output_type; - double dcfclk_mhz; - double socclk_mhz; - double dppclk_mhz; - double pixclk_mhz; - unsigned char interlace_en; - unsigned char pte_enable; - unsigned char dcc_enable; - double dcc_rate; - double bytes_per_pixel_c; - double bytes_per_pixel_y; - unsigned int swath_width_y; - unsigned int swath_height_y; - unsigned int swath_height_c; - unsigned int det_buffer_size_y; - double h_ratio; - double v_ratio; - unsigned int h_taps; - unsigned int h_total; - unsigned int v_total; - unsigned int v_active; - unsigned int e2e_index; - double display_pipe_line_delivery_time; - double read_bw; - unsigned int lines_in_det_y; - unsigned int lines_in_det_y_rounded_down_to_swath; - double full_det_buffering_time; - double dcfclk_deepsleep_mhz_per_plane; -}; - -struct _vcs_dpi_vratio_pre_st { - double vratio_pre_l; - double vratio_pre_c; -}; - -struct _vcs_dpi_display_data_rq_misc_params_st { - unsigned int full_swath_bytes; - unsigned int stored_swath_bytes; - unsigned int blk256_height; - unsigned int blk256_width; - unsigned int req_height; - unsigned int req_width; -}; - -struct _vcs_dpi_display_data_rq_sizing_params_st { - unsigned int chunk_bytes; - unsigned int min_chunk_bytes; - unsigned int meta_chunk_bytes; - unsigned int min_meta_chunk_bytes; - unsigned int mpte_group_bytes; - unsigned int dpte_group_bytes; -}; - -struct _vcs_dpi_display_data_rq_dlg_params_st { - unsigned int swath_width_ub; - unsigned int swath_height; - unsigned int req_per_swath_ub; - unsigned int meta_pte_bytes_per_frame_ub; - unsigned int dpte_req_per_row_ub; - unsigned int dpte_groups_per_row_ub; - unsigned int dpte_row_height; - unsigned int dpte_bytes_per_row_ub; - unsigned int meta_chunks_per_row_ub; - unsigned int meta_req_per_row_ub; - unsigned int meta_row_height; - unsigned int meta_bytes_per_row_ub; -}; - -struct _vcs_dpi_display_cur_rq_dlg_params_st { - unsigned char enable; - unsigned int swath_height; - unsigned int req_per_line; -}; - -struct _vcs_dpi_display_rq_dlg_params_st { - display_data_rq_dlg_params_st rq_l; - display_data_rq_dlg_params_st rq_c; - display_cur_rq_dlg_params_st rq_cur0; -}; - -struct _vcs_dpi_display_rq_sizing_params_st { - display_data_rq_sizing_params_st rq_l; - display_data_rq_sizing_params_st rq_c; -}; - -struct _vcs_dpi_display_rq_misc_params_st { - display_data_rq_misc_params_st rq_l; - display_data_rq_misc_params_st rq_c; -}; - -struct _vcs_dpi_display_rq_params_st { - unsigned char yuv420; - unsigned char yuv420_10bpc; - display_rq_misc_params_st misc; - display_rq_sizing_params_st sizing; - display_rq_dlg_params_st dlg; -}; - -struct _vcs_dpi_display_dlg_regs_st { - unsigned int refcyc_h_blank_end; - unsigned int dlg_vblank_end; - unsigned int min_dst_y_next_start; - unsigned int refcyc_per_htotal; - unsigned int refcyc_x_after_scaler; - unsigned int dst_y_after_scaler; - unsigned int dst_y_prefetch; - unsigned int dst_y_per_vm_vblank; - unsigned int dst_y_per_row_vblank; - unsigned int dst_y_per_vm_flip; - unsigned int dst_y_per_row_flip; - unsigned int ref_freq_to_pix_freq; - unsigned int vratio_prefetch; - unsigned int vratio_prefetch_c; - unsigned int refcyc_per_pte_group_vblank_l; - unsigned int refcyc_per_pte_group_vblank_c; - unsigned int refcyc_per_meta_chunk_vblank_l; - unsigned int refcyc_per_meta_chunk_vblank_c; - unsigned int refcyc_per_pte_group_flip_l; - unsigned int refcyc_per_pte_group_flip_c; - unsigned int refcyc_per_meta_chunk_flip_l; - unsigned int refcyc_per_meta_chunk_flip_c; - unsigned int dst_y_per_pte_row_nom_l; - unsigned int dst_y_per_pte_row_nom_c; - unsigned int refcyc_per_pte_group_nom_l; - unsigned int refcyc_per_pte_group_nom_c; - unsigned int dst_y_per_meta_row_nom_l; - unsigned int dst_y_per_meta_row_nom_c; - unsigned int refcyc_per_meta_chunk_nom_l; - unsigned int refcyc_per_meta_chunk_nom_c; - unsigned int refcyc_per_line_delivery_pre_l; - unsigned int refcyc_per_line_delivery_pre_c; - unsigned int refcyc_per_line_delivery_l; - unsigned int refcyc_per_line_delivery_c; - unsigned int chunk_hdl_adjust_cur0; - unsigned int chunk_hdl_adjust_cur1; - unsigned int vready_after_vcount0; - unsigned int dst_y_offset_cur0; - unsigned int dst_y_offset_cur1; - unsigned int xfc_reg_transfer_delay; - unsigned int xfc_reg_precharge_delay; - unsigned int xfc_reg_remote_surface_flip_latency; - unsigned int xfc_reg_prefetch_margin; - unsigned int dst_y_delta_drq_limit; -}; - -struct _vcs_dpi_display_ttu_regs_st { - unsigned int qos_level_low_wm; - unsigned int qos_level_high_wm; - unsigned int min_ttu_vblank; - unsigned int qos_level_flip; - unsigned int refcyc_per_req_delivery_l; - unsigned int refcyc_per_req_delivery_c; - unsigned int refcyc_per_req_delivery_cur0; - unsigned int refcyc_per_req_delivery_cur1; - unsigned int refcyc_per_req_delivery_pre_l; - unsigned int refcyc_per_req_delivery_pre_c; - unsigned int refcyc_per_req_delivery_pre_cur0; - unsigned int refcyc_per_req_delivery_pre_cur1; - unsigned int qos_level_fixed_l; - unsigned int qos_level_fixed_c; - unsigned int qos_level_fixed_cur0; - unsigned int qos_level_fixed_cur1; - unsigned int qos_ramp_disable_l; - unsigned int qos_ramp_disable_c; - unsigned int qos_ramp_disable_cur0; - unsigned int qos_ramp_disable_cur1; -}; - -struct _vcs_dpi_display_data_rq_regs_st { - unsigned int chunk_size; - unsigned int min_chunk_size; - unsigned int meta_chunk_size; - unsigned int min_meta_chunk_size; - unsigned int dpte_group_size; - unsigned int mpte_group_size; - unsigned int swath_height; - unsigned int pte_row_height_linear; -}; - -struct _vcs_dpi_display_rq_regs_st { - display_data_rq_regs_st rq_regs_l; - display_data_rq_regs_st rq_regs_c; - unsigned int drq_expansion_mode; - unsigned int prq_expansion_mode; - unsigned int mrq_expansion_mode; - unsigned int crq_expansion_mode; - unsigned int plane1_base_address; -}; - -struct _vcs_dpi_display_dlg_sys_params_st { - double t_mclk_wm_us; - double t_urg_wm_us; - double t_sr_wm_us; - double t_extra_us; - double mem_trip_us; - double t_srx_delay_us; - double deepsleep_dcfclk_mhz; - double total_flip_bw; - unsigned int total_flip_bytes; -}; - -struct _vcs_dpi_display_dlg_prefetch_param_st { - double prefetch_bw; - unsigned int flip_bytes; -}; - -struct _vcs_dpi_display_pipe_clock_st { - double dcfclk_mhz; - double dispclk_mhz; - double socclk_mhz; - double dscclk_mhz[6]; - double dppclk_mhz[6]; -}; - -struct _vcs_dpi_display_arb_params_st { - int max_req_outstanding; - int min_req_outstanding; - int sat_level_us; +struct _vcs_dpi_display_bandwidth_st { + double total_bw_consumed_gbps; + double guaranteed_urgent_return_bw_gbps; +}; + +struct _vcs_dpi_scaler_ratio_depth_st { + double hscl_ratio; + double vscl_ratio; + double hscl_ratio_c; + double vscl_ratio_c; + double vinit; + double vinit_c; + double vinit_bot; + double vinit_bot_c; + int lb_depth; + int scl_enable; +}; + +struct _vcs_dpi_scaler_taps_st { + unsigned int htaps; + unsigned int vtaps; + unsigned int htaps_c; + unsigned int vtaps_c; +}; + +struct _vcs_dpi_display_pipe_dest_params_st { + unsigned int recout_width; + unsigned int recout_height; + unsigned int full_recout_width; + unsigned int full_recout_height; + unsigned int hblank_start; + unsigned int hblank_end; + unsigned int vblank_start; + unsigned int vblank_end; + unsigned int htotal; + unsigned int vtotal; + unsigned int vactive; + unsigned int hactive; + unsigned int vstartup_start; + unsigned int vupdate_offset; + unsigned int vupdate_width; + unsigned int vready_offset; + unsigned char interlaced; + unsigned char underscan; + double pixel_rate_mhz; + unsigned char synchronized_vblank_all_planes; + unsigned char otg_inst; + unsigned char odm_split_cnt; + unsigned char odm_combine; +}; + +struct _vcs_dpi_display_pipe_params_st { + display_pipe_source_params_st src; + display_pipe_dest_params_st dest; + scaler_ratio_depth_st scale_ratio_depth; + scaler_taps_st scale_taps; +}; + +struct _vcs_dpi_display_clocks_and_cfg_st { + int voltage; + double dppclk_mhz; + double refclk_mhz; + double dispclk_mhz; + double dcfclk_mhz; + double socclk_mhz; +}; + +struct _vcs_dpi_display_e2e_pipe_params_st { + display_pipe_params_st pipe; + display_output_params_st dout; + display_clocks_and_cfg_st clks_cfg; +}; + +struct _vcs_dpi_dchub_buffer_sizing_st { + unsigned int swath_width_y; + unsigned int swath_height_y; + unsigned int swath_height_c; + unsigned int detail_buffer_size_y; +}; + +struct _vcs_dpi_watermarks_perf_st { + double stutter_eff_in_active_region_percent; + double urgent_latency_supported_us; + double non_urgent_latency_supported_us; + double dram_clock_change_margin_us; + double dram_access_eff_percent; +}; + +struct _vcs_dpi_cstate_pstate_watermarks_st { + double cstate_exit_us; + double cstate_enter_plus_exit_us; + double pstate_change_us; +}; + +struct _vcs_dpi_wm_calc_pipe_params_st { + unsigned int num_dpp; + int voltage; + int output_type; + double dcfclk_mhz; + double socclk_mhz; + double dppclk_mhz; + double pixclk_mhz; + unsigned char interlace_en; + unsigned char pte_enable; + unsigned char dcc_enable; + double dcc_rate; + double bytes_per_pixel_c; + double bytes_per_pixel_y; + unsigned int swath_width_y; + unsigned int swath_height_y; + unsigned int swath_height_c; + unsigned int det_buffer_size_y; + double h_ratio; + double v_ratio; + unsigned int h_taps; + unsigned int h_total; + unsigned int v_total; + unsigned int v_active; + unsigned int e2e_index; + double display_pipe_line_delivery_time; + double read_bw; + unsigned int lines_in_det_y; + unsigned int lines_in_det_y_rounded_down_to_swath; + double full_det_buffering_time; + double dcfclk_deepsleep_mhz_per_plane; +}; + +struct _vcs_dpi_vratio_pre_st { + double vratio_pre_l; + double vratio_pre_c; +}; + +struct _vcs_dpi_display_data_rq_misc_params_st { + unsigned int full_swath_bytes; + unsigned int stored_swath_bytes; + unsigned int blk256_height; + unsigned int blk256_width; + unsigned int req_height; + unsigned int req_width; +}; + +struct _vcs_dpi_display_data_rq_sizing_params_st { + unsigned int chunk_bytes; + unsigned int min_chunk_bytes; + unsigned int meta_chunk_bytes; + unsigned int min_meta_chunk_bytes; + unsigned int mpte_group_bytes; + unsigned int dpte_group_bytes; +}; + +struct _vcs_dpi_display_data_rq_dlg_params_st { + unsigned int swath_width_ub; + unsigned int swath_height; + unsigned int req_per_swath_ub; + unsigned int meta_pte_bytes_per_frame_ub; + unsigned int dpte_req_per_row_ub; + unsigned int dpte_groups_per_row_ub; + unsigned int dpte_row_height; + unsigned int dpte_bytes_per_row_ub; + unsigned int meta_chunks_per_row_ub; + unsigned int meta_req_per_row_ub; + unsigned int meta_row_height; + unsigned int meta_bytes_per_row_ub; +}; + +struct _vcs_dpi_display_cur_rq_dlg_params_st { + unsigned char enable; + unsigned int swath_height; + unsigned int req_per_line; +}; + +struct _vcs_dpi_display_rq_dlg_params_st { + display_data_rq_dlg_params_st rq_l; + display_data_rq_dlg_params_st rq_c; + display_cur_rq_dlg_params_st rq_cur0; +}; + +struct _vcs_dpi_display_rq_sizing_params_st { + display_data_rq_sizing_params_st rq_l; + display_data_rq_sizing_params_st rq_c; +}; + +struct _vcs_dpi_display_rq_misc_params_st { + display_data_rq_misc_params_st rq_l; + display_data_rq_misc_params_st rq_c; +}; + +struct _vcs_dpi_display_rq_params_st { + unsigned char yuv420; + unsigned char yuv420_10bpc; + display_rq_misc_params_st misc; + display_rq_sizing_params_st sizing; + display_rq_dlg_params_st dlg; +}; + +struct _vcs_dpi_display_dlg_regs_st { + unsigned int refcyc_h_blank_end; + unsigned int dlg_vblank_end; + unsigned int min_dst_y_next_start; + unsigned int refcyc_per_htotal; + unsigned int refcyc_x_after_scaler; + unsigned int dst_y_after_scaler; + unsigned int dst_y_prefetch; + unsigned int dst_y_per_vm_vblank; + unsigned int dst_y_per_row_vblank; + unsigned int dst_y_per_vm_flip; + unsigned int dst_y_per_row_flip; + unsigned int ref_freq_to_pix_freq; + unsigned int vratio_prefetch; + unsigned int vratio_prefetch_c; + unsigned int refcyc_per_pte_group_vblank_l; + unsigned int refcyc_per_pte_group_vblank_c; + unsigned int refcyc_per_meta_chunk_vblank_l; + unsigned int refcyc_per_meta_chunk_vblank_c; + unsigned int refcyc_per_pte_group_flip_l; + unsigned int refcyc_per_pte_group_flip_c; + unsigned int refcyc_per_meta_chunk_flip_l; + unsigned int refcyc_per_meta_chunk_flip_c; + unsigned int dst_y_per_pte_row_nom_l; + unsigned int dst_y_per_pte_row_nom_c; + unsigned int refcyc_per_pte_group_nom_l; + unsigned int refcyc_per_pte_group_nom_c; + unsigned int dst_y_per_meta_row_nom_l; + unsigned int dst_y_per_meta_row_nom_c; + unsigned int refcyc_per_meta_chunk_nom_l; + unsigned int refcyc_per_meta_chunk_nom_c; + unsigned int refcyc_per_line_delivery_pre_l; + unsigned int refcyc_per_line_delivery_pre_c; + unsigned int refcyc_per_line_delivery_l; + unsigned int refcyc_per_line_delivery_c; + unsigned int chunk_hdl_adjust_cur0; + unsigned int chunk_hdl_adjust_cur1; + unsigned int vready_after_vcount0; + unsigned int dst_y_offset_cur0; + unsigned int dst_y_offset_cur1; + unsigned int xfc_reg_transfer_delay; + unsigned int xfc_reg_precharge_delay; + unsigned int xfc_reg_remote_surface_flip_latency; + unsigned int xfc_reg_prefetch_margin; + unsigned int dst_y_delta_drq_limit; +}; + +struct _vcs_dpi_display_ttu_regs_st { + unsigned int qos_level_low_wm; + unsigned int qos_level_high_wm; + unsigned int min_ttu_vblank; + unsigned int qos_level_flip; + unsigned int refcyc_per_req_delivery_l; + unsigned int refcyc_per_req_delivery_c; + unsigned int refcyc_per_req_delivery_cur0; + unsigned int refcyc_per_req_delivery_cur1; + unsigned int refcyc_per_req_delivery_pre_l; + unsigned int refcyc_per_req_delivery_pre_c; + unsigned int refcyc_per_req_delivery_pre_cur0; + unsigned int refcyc_per_req_delivery_pre_cur1; + unsigned int qos_level_fixed_l; + unsigned int qos_level_fixed_c; + unsigned int qos_level_fixed_cur0; + unsigned int qos_level_fixed_cur1; + unsigned int qos_ramp_disable_l; + unsigned int qos_ramp_disable_c; + unsigned int qos_ramp_disable_cur0; + unsigned int qos_ramp_disable_cur1; +}; + +struct _vcs_dpi_display_data_rq_regs_st { + unsigned int chunk_size; + unsigned int min_chunk_size; + unsigned int meta_chunk_size; + unsigned int min_meta_chunk_size; + unsigned int dpte_group_size; + unsigned int mpte_group_size; + unsigned int swath_height; + unsigned int pte_row_height_linear; +}; + +struct _vcs_dpi_display_rq_regs_st { + display_data_rq_regs_st rq_regs_l; + display_data_rq_regs_st rq_regs_c; + unsigned int drq_expansion_mode; + unsigned int prq_expansion_mode; + unsigned int mrq_expansion_mode; + unsigned int crq_expansion_mode; + unsigned int plane1_base_address; +}; + +struct _vcs_dpi_display_dlg_sys_params_st { + double t_mclk_wm_us; + double t_urg_wm_us; + double t_sr_wm_us; + double t_extra_us; + double mem_trip_us; + double t_srx_delay_us; + double deepsleep_dcfclk_mhz; + double total_flip_bw; + unsigned int total_flip_bytes; +}; + +struct _vcs_dpi_display_dlg_prefetch_param_st { + double prefetch_bw; + unsigned int flip_bytes; +}; + +struct _vcs_dpi_display_pipe_clock_st { + double dcfclk_mhz; + double dispclk_mhz; + double socclk_mhz; + double dscclk_mhz[6]; + double dppclk_mhz[6]; +}; + +struct _vcs_dpi_display_arb_params_st { + int max_req_outstanding; + int min_req_outstanding; + int sat_level_us; }; #endif /*__DISPLAY_MODE_STRUCTS_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index f9cf08357989..e8ce08567cd8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -35,6 +35,16 @@ static inline double dml_min(double a, double b) return (double) dcn_bw_min2(a, b); } +static inline double dml_min3(double a, double b, double c) +{ + return dml_min(dml_min(a, b), c); +} + +static inline double dml_min4(double a, double b, double c, double d) +{ + return dml_min(dml_min(a, b), dml_min(c, d)); +} + static inline double dml_max(double a, double b) { return (double) dcn_bw_max2(a, b); diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c index 87b580fa4bc9..0caee3523017 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c @@ -75,6 +75,7 @@ bool dal_hw_factory_init( return true; case DCE_VERSION_11_0: case DCE_VERSION_11_2: + case DCE_VERSION_11_22: dal_hw_factory_dce110_init(factory); return true; case DCE_VERSION_12_0: diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c index 0ae8ace25739..55c707488541 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c @@ -72,6 +72,7 @@ bool dal_hw_translate_init( case DCE_VERSION_10_0: case DCE_VERSION_11_0: case DCE_VERSION_11_2: + case DCE_VERSION_11_22: dal_hw_translate_dce110_init(translate); return true; case DCE_VERSION_12_0: diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c index abd0095ced30..b7256f595052 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c @@ -527,7 +527,7 @@ static void construct( REG_GET(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, &xtal_ref_div); if (xtal_ref_div == 0) { - DC_LOG_WARNING("Invalid base timer divider\n", + DC_LOG_WARNING("Invalid base timer divider [%s]\n", __func__); xtal_ref_div = 2; } diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c index 5cbf6626b8d4..14dc8c94d862 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c @@ -83,6 +83,7 @@ struct i2caux *dal_i2caux_create( case DCE_VERSION_8_3: return dal_i2caux_dce80_create(ctx); case DCE_VERSION_11_2: + case DCE_VERSION_11_22: return dal_i2caux_dce112_create(ctx); case DCE_VERSION_11_0: return dal_i2caux_dce110_create(ctx); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 8c51ad70cace..a94942d4e66b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -95,11 +95,6 @@ struct resource_funcs { struct link_encoder *(*link_enc_create)( const struct encoder_init_data *init); - enum dc_status (*validate_guaranteed)( - struct dc *dc, - struct dc_stream_state *stream, - struct dc_state *context); - bool (*validate_bandwidth)( struct dc *dc, struct dc_state *context); @@ -250,6 +245,7 @@ struct dce_bw_output { bool all_displays_in_sync; struct dce_watermarks urgent_wm_ns[MAX_PIPES]; struct dce_watermarks stutter_exit_wm_ns[MAX_PIPES]; + struct dce_watermarks stutter_entry_wm_ns[MAX_PIPES]; struct dce_watermarks nbp_state_change_wm_ns[MAX_PIPES]; int sclk_khz; int sclk_deep_sleep_khz; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h index 090b7a8dd67b..30b3a08b91be 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h @@ -102,13 +102,14 @@ bool dal_ddc_service_query_ddc_data( uint8_t *read_buf, uint32_t read_size); -ssize_t dal_ddc_service_read_dpcd_data( +enum ddc_result dal_ddc_service_read_dpcd_data( struct ddc_service *ddc, bool i2c, enum i2c_mot_mode mot, uint32_t address, uint8_t *data, - uint32_t len); + uint32_t len, + uint32_t *read); enum ddc_result dal_ddc_service_write_dpcd_data( struct ddc_service *ddc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h index a9bfe9ff8ce6..eece165206f9 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h @@ -42,6 +42,8 @@ enum bw_calcs_version { BW_CALCS_VERSION_CARRIZO, BW_CALCS_VERSION_POLARIS10, BW_CALCS_VERSION_POLARIS11, + BW_CALCS_VERSION_POLARIS12, + BW_CALCS_VERSION_VEGAM, BW_CALCS_VERSION_STONEY, BW_CALCS_VERSION_VEGA10 }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h new file mode 100644 index 000000000000..02f757dd70d4 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -0,0 +1,64 @@ +/* + * Copyright 2012-15 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DAL_DCHUBBUB_H__ +#define __DAL_DCHUBBUB_H__ + + +enum dcc_control { + dcc_control__256_256_xxx, + dcc_control__128_128_xxx, + dcc_control__256_64_64, +}; + +enum segment_order { + segment_order__na, + segment_order__contiguous, + segment_order__non_contiguous, +}; + + +struct hubbub_funcs { + void (*update_dchub)( + struct hubbub *hubbub, + struct dchub_init_data *dh_data); + + bool (*get_dcc_compression_cap)(struct hubbub *hubbub, + const struct dc_dcc_surface_param *input, + struct dc_surface_dcc_cap *output); + + bool (*dcc_support_swizzle)( + enum swizzle_mode_values swizzle, + unsigned int bytes_per_element, + enum segment_order *segment_order_horz, + enum segment_order *segment_order_vert); + + bool (*dcc_support_pixel_format)( + enum surface_pixel_format format, + unsigned int *bytes_per_element); +}; + + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index 99995608b620..582458f028f8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -44,7 +44,23 @@ struct dpp_grph_csc_adjustment { enum graphics_gamut_adjust_type gamut_adjust_type; }; +struct dcn_dpp_state { + uint32_t igam_lut_mode; + uint32_t igam_input_format; + uint32_t dgam_lut_mode; + uint32_t rgam_lut_mode; + uint32_t gamut_remap_mode; + uint32_t gamut_remap_c11_c12; + uint32_t gamut_remap_c13_c14; + uint32_t gamut_remap_c21_c22; + uint32_t gamut_remap_c23_c24; + uint32_t gamut_remap_c31_c32; + uint32_t gamut_remap_c33_c34; +}; + struct dpp_funcs { + void (*dpp_read_state)(struct dpp *dpp, struct dcn_dpp_state *s); + void (*dpp_reset)(struct dpp *dpp); void (*dpp_set_scaler)(struct dpp *dpp, @@ -117,7 +133,7 @@ struct dpp_funcs { struct dpp *dpp_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); void (*dpp_full_bypass)(struct dpp *dpp_base); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 9ced254e652c..97df82cddf82 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -56,7 +56,6 @@ struct hubp { bool power_gated; }; - struct hubp_funcs { void (*hubp_setup)( struct hubp *hubp, @@ -121,6 +120,9 @@ struct hubp_funcs { void (*hubp_clk_cntl)(struct hubp *hubp, bool enable); void (*hubp_vtg_sel)(struct hubp *hubp, uint32_t otg_inst); + void (*hubp_read_state)(struct hubp *hubp); + void (*hubp_disable_control)(struct hubp *hubp, bool disable_hubp); + unsigned int (*hubp_get_underflow_status)(struct hubp *hubp); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index b22158190262..cf7433ebf91a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -140,11 +140,6 @@ enum opp_regamma { OPP_REGAMMA_USER }; -struct csc_transform { - uint16_t matrix[12]; - bool enable_adjustment; -}; - struct dc_bias_and_scale { uint16_t scale_red; uint16_t bias_red; @@ -191,4 +186,9 @@ enum controller_dp_test_pattern { CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA }; +enum dc_lut_mode { + LUT_BYPASS, + LUT_RAM_A, + LUT_RAM_B +}; #endif /* __DAL_HW_SHARED_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h index 2109eac20a3d..b2fa4c4cd920 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h @@ -87,7 +87,7 @@ struct ipp_funcs { struct input_pixel_processor *ipp, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); /* DCE function to setup IPP. TODO: see if we can consolidate to setup */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index 54d8a1386142..cf6df2e7beb2 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -149,6 +149,7 @@ struct link_encoder_funcs { bool connect); void (*enable_hpd)(struct link_encoder *enc); void (*disable_hpd)(struct link_encoder *enc); + bool (*is_dig_enabled)(struct link_encoder *enc); void (*destroy)(struct link_encoder **enc); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index 3e1e7e6a8792..47f1dc5a43b7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -104,6 +104,7 @@ struct mem_input_funcs { struct mem_input *mem_input, struct dce_watermarks nbp, struct dce_watermarks stutter, + struct dce_watermarks stutter_enter, struct dce_watermarks urgent, uint32_t total_dest_line_time_ns); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 23a8d5e53a89..caf74e3c836f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -105,7 +105,24 @@ struct mpc { struct mpcc mpcc_array[MAX_MPCC]; }; +struct mpcc_state { + uint32_t opp_id; + uint32_t dpp_id; + uint32_t bot_mpcc_id; + uint32_t mode; + uint32_t alpha_mode; + uint32_t pre_multiplied_alpha; + uint32_t overlap_only; + uint32_t idle; + uint32_t busy; +}; + struct mpc_funcs { + void (*read_mpcc_state)( + struct mpc *mpc, + int mpcc_inst, + struct mpcc_state *s); + /* * Insert DPP into MPC tree based on specified blending position. * Only used for planes that are part of blending chain for OPP output diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index b5db1692393c..cfa7ec9517ae 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -29,31 +29,40 @@ #define STREAM_ENCODER_H_ #include "audio_types.h" +#include "hw_shared.h" struct dc_bios; struct dc_context; struct dc_crtc_timing; -struct encoder_info_packet { - bool valid; - uint8_t hb0; - uint8_t hb1; - uint8_t hb2; - uint8_t hb3; - uint8_t sb[32]; +enum dp_pixel_encoding_type { + DP_PIXEL_ENCODING_TYPE_RGB444 = 0x00000000, + DP_PIXEL_ENCODING_TYPE_YCBCR422 = 0x00000001, + DP_PIXEL_ENCODING_TYPE_YCBCR444 = 0x00000002, + DP_PIXEL_ENCODING_TYPE_RGB_WIDE_GAMUT = 0x00000003, + DP_PIXEL_ENCODING_TYPE_Y_ONLY = 0x00000004, + DP_PIXEL_ENCODING_TYPE_YCBCR420 = 0x00000005 +}; + +enum dp_component_depth { + DP_COMPONENT_PIXEL_DEPTH_6BPC = 0x00000000, + DP_COMPONENT_PIXEL_DEPTH_8BPC = 0x00000001, + DP_COMPONENT_PIXEL_DEPTH_10BPC = 0x00000002, + DP_COMPONENT_PIXEL_DEPTH_12BPC = 0x00000003, + DP_COMPONENT_PIXEL_DEPTH_16BPC = 0x00000004 }; struct encoder_info_frame { /* auxiliary video information */ - struct encoder_info_packet avi; - struct encoder_info_packet gamut; - struct encoder_info_packet vendor; + struct dc_info_packet avi; + struct dc_info_packet gamut; + struct dc_info_packet vendor; /* source product description */ - struct encoder_info_packet spd; + struct dc_info_packet spd; /* video stream configuration */ - struct encoder_info_packet vsc; + struct dc_info_packet vsc; /* HDR Static MetaData */ - struct encoder_info_packet hdrsmd; + struct dc_info_packet hdrsmd; }; struct encoder_unblank_param { @@ -147,6 +156,7 @@ struct stream_encoder_funcs { void (*set_avmute)( struct stream_encoder *enc, bool enable); + }; #endif /* STREAM_ENCODER_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 3217b5bf6c7a..69cb0a105300 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -140,6 +140,9 @@ struct timing_generator_funcs { void (*program_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing, bool use_vbios); + void (*program_vline_interrupt)(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + unsigned long long vsync_delta); bool (*enable_crtc)(struct timing_generator *tg); bool (*disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index c5b3623bcbd9..fecc80c47c26 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -252,7 +252,7 @@ struct transform_funcs { struct transform *xfm_base, enum surface_pixel_format format, enum expansion_mode mode, - struct csc_transform input_csc_color_matrix, + struct dc_csc_transform input_csc_color_matrix, enum dc_color_space input_color_space); void (*ipp_full_bypass)(struct transform *xfm_base); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index e764cbad881b..63fc6c499789 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -32,6 +32,8 @@ #include "inc/hw/link_encoder.h" #include "core_status.h" +#define EDP_BACKLIGHT_RAMP_DISABLE_LEVEL 0xFFFFFFFF + enum pipe_gating_control { PIPE_GATING_CONTROL_DISABLE = 0, PIPE_GATING_CONTROL_ENABLE, @@ -63,6 +65,7 @@ struct dchub_init_data; struct dc_static_screen_events; struct resource_pool; struct resource_context; +struct stream_resource; struct hw_sequencer_funcs { @@ -80,11 +83,6 @@ struct hw_sequencer_funcs { int num_planes, struct dc_state *context); - void (*set_plane_config)( - const struct dc *dc, - struct pipe_ctx *pipe_ctx, - struct resource_context *res_ctx); - void (*program_gamut_remap)( struct pipe_ctx *pipe_ctx); @@ -93,6 +91,12 @@ struct hw_sequencer_funcs { enum dc_color_space colorspace, uint16_t *matrix); + void (*program_output_csc)(struct dc *dc, + struct pipe_ctx *pipe_ctx, + enum dc_color_space colorspace, + uint16_t *matrix, + int opp_id); + void (*update_plane_addr)( const struct dc *dc, struct pipe_ctx *pipe_ctx); @@ -154,6 +158,11 @@ struct hw_sequencer_funcs { struct dc *dc, struct pipe_ctx *pipe, bool lock); + void (*blank_pixel_data)( + struct dc *dc, + struct stream_resource *stream_res, + struct dc_stream_state *stream, + bool blank); void (*set_bandwidth)( struct dc *dc, @@ -169,7 +178,7 @@ struct hw_sequencer_funcs { void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx, int num_pipes, const struct dc_static_screen_events *events); - enum dc_status (*prog_pixclk_crtc_otg)( + enum dc_status (*enable_stream_timing)( struct pipe_ctx *pipe_ctx, struct dc_state *context, struct dc *dc); @@ -201,6 +210,7 @@ struct hw_sequencer_funcs { void (*set_cursor_position)(struct pipe_ctx *pipe); void (*set_cursor_attribute)(struct pipe_ctx *pipe); + }; void color_space_to_black_color( diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h index 77eb72874e90..3306e7b0b3e3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h +++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h @@ -183,6 +183,36 @@ FN(reg_name, f4), v4, \ FN(reg_name, f5), v5) +#define REG_GET_6(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6) \ + generic_reg_get6(CTX, REG(reg_name), \ + FN(reg_name, f1), v1, \ + FN(reg_name, f2), v2, \ + FN(reg_name, f3), v3, \ + FN(reg_name, f4), v4, \ + FN(reg_name, f5), v5, \ + FN(reg_name, f6), v6) + +#define REG_GET_7(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7) \ + generic_reg_get7(CTX, REG(reg_name), \ + FN(reg_name, f1), v1, \ + FN(reg_name, f2), v2, \ + FN(reg_name, f3), v3, \ + FN(reg_name, f4), v4, \ + FN(reg_name, f5), v5, \ + FN(reg_name, f6), v6, \ + FN(reg_name, f7), v7) + +#define REG_GET_8(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7, f8, v8) \ + generic_reg_get8(CTX, REG(reg_name), \ + FN(reg_name, f1), v1, \ + FN(reg_name, f2), v2, \ + FN(reg_name, f3), v3, \ + FN(reg_name, f4), v4, \ + FN(reg_name, f5), v5, \ + FN(reg_name, f6), v6, \ + FN(reg_name, f7), v7, \ + FN(reg_name, f8), v8) + /* macro to poll and wait for a register field to read back given value */ #define REG_WAIT(reg_name, field, val, delay_between_poll_us, max_try) \ @@ -389,4 +419,30 @@ uint32_t generic_reg_get5(const struct dc_context *ctx, uint32_t addr, uint8_t shift4, uint32_t mask4, uint32_t *field_value4, uint8_t shift5, uint32_t mask5, uint32_t *field_value5); +uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6); + +uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7); + +uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr, + uint8_t shift1, uint32_t mask1, uint32_t *field_value1, + uint8_t shift2, uint32_t mask2, uint32_t *field_value2, + uint8_t shift3, uint32_t mask3, uint32_t *field_value3, + uint8_t shift4, uint32_t mask4, uint32_t *field_value4, + uint8_t shift5, uint32_t mask5, uint32_t *field_value5, + uint8_t shift6, uint32_t mask6, uint32_t *field_value6, + uint8_t shift7, uint32_t mask7, uint32_t *field_value7, + uint8_t shift8, uint32_t mask8, uint32_t *field_value8); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_REG_HELPER_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 5467332faf7b..640a647f4611 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -139,10 +139,6 @@ bool resource_validate_attach_surfaces( struct dc_state *context, const struct resource_pool *pool); -void validate_guaranteed_copy_streams( - struct dc_state *context, - int max_streams); - void resource_validate_ctx_update_pointer_after_copy( const struct dc_state *src_ctx, struct dc_state *dst_ctx); diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h index a506c2e939f5..0b5f3a278c22 100644 --- a/drivers/gpu/drm/amd/display/dc/irq_types.h +++ b/drivers/gpu/drm/amd/display/dc/irq_types.h @@ -26,6 +26,8 @@ #ifndef __DAL_IRQ_TYPES_H__ #define __DAL_IRQ_TYPES_H__ +#include "os_types.h" + struct dc_context; typedef void (*interrupt_handler)(void *); @@ -135,6 +137,13 @@ enum dc_irq_source { DC_IRQ_SOURCE_VBLANK5, DC_IRQ_SOURCE_VBLANK6, + DC_IRQ_SOURCE_DC1_VLINE0, + DC_IRQ_SOURCE_DC2_VLINE0, + DC_IRQ_SOURCE_DC3_VLINE0, + DC_IRQ_SOURCE_DC4_VLINE0, + DC_IRQ_SOURCE_DC5_VLINE0, + DC_IRQ_SOURCE_DC6_VLINE0, + DAL_IRQ_SOURCES_NUMBER }; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 9831cb5eaa7c..25029ed42d89 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -86,6 +86,7 @@ #define VI_POLARIS10_P_A0 80 #define VI_POLARIS11_M_A0 90 #define VI_POLARIS12_V_A0 100 +#define VI_VEGAM_A0 110 #define VI_UNKNOWN 0xFF @@ -98,7 +99,9 @@ (eChipRev < VI_POLARIS11_M_A0)) #define ASIC_REV_IS_POLARIS11_M(eChipRev) ((eChipRev >= VI_POLARIS11_M_A0) && \ (eChipRev < VI_POLARIS12_V_A0)) -#define ASIC_REV_IS_POLARIS12_V(eChipRev) (eChipRev >= VI_POLARIS12_V_A0) +#define ASIC_REV_IS_POLARIS12_V(eChipRev) ((eChipRev >= VI_POLARIS12_V_A0) && \ + (eChipRev < VI_VEGAM_A0)) +#define ASIC_REV_IS_VEGAM(eChipRev) (eChipRev >= VI_VEGAM_A0) /* DCE11 */ #define CZ_CARRIZO_A0 0x01 @@ -110,12 +113,19 @@ ((rev >= STONEY_A0) && (rev < CZ_UNKNOWN)) /* DCE12 */ +#define AI_UNKNOWN 0xFF #define AI_GREENLAND_P_A0 1 #define AI_GREENLAND_P_A1 2 +#define AI_UNKNOWN 0xFF -#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_UNKNOWN) -#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_UNKNOWN) +#define AI_VEGA12_P_A0 20 +#define AI_VEGA20_P_A0 40 +#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_VEGA12_P_A0) +#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_VEGA12_P_A0) + +#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_VEGA20_P_A0)) +#define ASICREV_IS_VEGA20_P(eChipRev) ((eChipRev >= AI_VEGA20_P_A0) && (eChipRev < AI_UNKNOWN)) /* DCN1_0 */ #define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h index fa543965feb5..840142b65f8b 100644 --- a/drivers/gpu/drm/amd/display/include/dal_types.h +++ b/drivers/gpu/drm/amd/display/include/dal_types.h @@ -40,6 +40,7 @@ enum dce_version { DCE_VERSION_10_0, DCE_VERSION_11_0, DCE_VERSION_11_2, + DCE_VERSION_11_22, DCE_VERSION_12_0, DCE_VERSION_MAX, DCN_VERSION_1_0, diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index 0de258622c12..a981b3e99ab3 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -26,9 +26,13 @@ #ifndef __DAL_FIXED31_32_H__ #define __DAL_FIXED31_32_H__ -#include "os_types.h" - #define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 +#ifndef LLONG_MIN +#define LLONG_MIN (1LL<<63) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX (-1LL>>1) +#endif /* * @brief @@ -44,24 +48,25 @@ */ struct fixed31_32 { - int64_t value; + long long value; }; + /* * @brief * Useful constants */ -static const struct fixed31_32 dal_fixed31_32_zero = { 0 }; -static const struct fixed31_32 dal_fixed31_32_epsilon = { 1LL }; -static const struct fixed31_32 dal_fixed31_32_half = { 0x80000000LL }; -static const struct fixed31_32 dal_fixed31_32_one = { 0x100000000LL }; +static const struct fixed31_32 dc_fixpt_zero = { 0 }; +static const struct fixed31_32 dc_fixpt_epsilon = { 1LL }; +static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL }; +static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL }; -static const struct fixed31_32 dal_fixed31_32_pi = { 13493037705LL }; -static const struct fixed31_32 dal_fixed31_32_two_pi = { 26986075409LL }; -static const struct fixed31_32 dal_fixed31_32_e = { 11674931555LL }; -static const struct fixed31_32 dal_fixed31_32_ln2 = { 2977044471LL }; -static const struct fixed31_32 dal_fixed31_32_ln2_div_2 = { 1488522236LL }; +static const struct fixed31_32 dc_fixpt_pi = { 13493037705LL }; +static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL }; +static const struct fixed31_32 dc_fixpt_e = { 11674931555LL }; +static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL }; +static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL }; /* * @brief @@ -72,24 +77,19 @@ static const struct fixed31_32 dal_fixed31_32_ln2_div_2 = { 1488522236LL }; * @brief * result = numerator / denominator */ -struct fixed31_32 dal_fixed31_32_from_fraction( - int64_t numerator, - int64_t denominator); +struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator); /* * @brief * result = arg */ -struct fixed31_32 dal_fixed31_32_from_int_nonconst(int64_t arg); -static inline struct fixed31_32 dal_fixed31_32_from_int(int64_t arg) +static inline struct fixed31_32 dc_fixpt_from_int(int arg) { - if (__builtin_constant_p(arg)) { - struct fixed31_32 res; - BUILD_BUG_ON((LONG_MIN > arg) || (arg > LONG_MAX)); - res.value = arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; - return res; - } else - return dal_fixed31_32_from_int_nonconst(arg); + struct fixed31_32 res; + + res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART; + + return res; } /* @@ -101,7 +101,7 @@ static inline struct fixed31_32 dal_fixed31_32_from_int(int64_t arg) * @brief * result = -arg */ -static inline struct fixed31_32 dal_fixed31_32_neg(struct fixed31_32 arg) +static inline struct fixed31_32 dc_fixpt_neg(struct fixed31_32 arg) { struct fixed31_32 res; @@ -114,10 +114,10 @@ static inline struct fixed31_32 dal_fixed31_32_neg(struct fixed31_32 arg) * @brief * result = abs(arg) := (arg >= 0) ? arg : -arg */ -static inline struct fixed31_32 dal_fixed31_32_abs(struct fixed31_32 arg) +static inline struct fixed31_32 dc_fixpt_abs(struct fixed31_32 arg) { if (arg.value < 0) - return dal_fixed31_32_neg(arg); + return dc_fixpt_neg(arg); else return arg; } @@ -131,8 +131,7 @@ static inline struct fixed31_32 dal_fixed31_32_abs(struct fixed31_32 arg) * @brief * result = arg1 < arg2 */ -static inline bool dal_fixed31_32_lt(struct fixed31_32 arg1, - struct fixed31_32 arg2) +static inline bool dc_fixpt_lt(struct fixed31_32 arg1, struct fixed31_32 arg2) { return arg1.value < arg2.value; } @@ -141,8 +140,7 @@ static inline bool dal_fixed31_32_lt(struct fixed31_32 arg1, * @brief * result = arg1 <= arg2 */ -static inline bool dal_fixed31_32_le(struct fixed31_32 arg1, - struct fixed31_32 arg2) +static inline bool dc_fixpt_le(struct fixed31_32 arg1, struct fixed31_32 arg2) { return arg1.value <= arg2.value; } @@ -151,8 +149,7 @@ static inline bool dal_fixed31_32_le(struct fixed31_32 arg1, * @brief * result = arg1 == arg2 */ -static inline bool dal_fixed31_32_eq(struct fixed31_32 arg1, - struct fixed31_32 arg2) +static inline bool dc_fixpt_eq(struct fixed31_32 arg1, struct fixed31_32 arg2) { return arg1.value == arg2.value; } @@ -161,8 +158,7 @@ static inline bool dal_fixed31_32_eq(struct fixed31_32 arg1, * @brief * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2 */ -static inline struct fixed31_32 dal_fixed31_32_min(struct fixed31_32 arg1, - struct fixed31_32 arg2) +static inline struct fixed31_32 dc_fixpt_min(struct fixed31_32 arg1, struct fixed31_32 arg2) { if (arg1.value <= arg2.value) return arg1; @@ -174,8 +170,7 @@ static inline struct fixed31_32 dal_fixed31_32_min(struct fixed31_32 arg1, * @brief * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1 */ -static inline struct fixed31_32 dal_fixed31_32_max(struct fixed31_32 arg1, - struct fixed31_32 arg2) +static inline struct fixed31_32 dc_fixpt_max(struct fixed31_32 arg1, struct fixed31_32 arg2) { if (arg1.value <= arg2.value) return arg2; @@ -189,14 +184,14 @@ static inline struct fixed31_32 dal_fixed31_32_max(struct fixed31_32 arg1, * result = | arg, when min_value < arg < max_value * | max_value, when arg >= max_value */ -static inline struct fixed31_32 dal_fixed31_32_clamp( +static inline struct fixed31_32 dc_fixpt_clamp( struct fixed31_32 arg, struct fixed31_32 min_value, struct fixed31_32 max_value) { - if (dal_fixed31_32_le(arg, min_value)) + if (dc_fixpt_le(arg, min_value)) return min_value; - else if (dal_fixed31_32_le(max_value, arg)) + else if (dc_fixpt_le(max_value, arg)) return max_value; else return arg; @@ -211,21 +206,30 @@ static inline struct fixed31_32 dal_fixed31_32_clamp( * @brief * result = arg << shift */ -struct fixed31_32 dal_fixed31_32_shl( - struct fixed31_32 arg, - uint8_t shift); +static inline struct fixed31_32 dc_fixpt_shl(struct fixed31_32 arg, unsigned char shift) +{ + ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) || + ((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift)))); + + arg.value = arg.value << shift; + + return arg; +} /* * @brief * result = arg >> shift */ -static inline struct fixed31_32 dal_fixed31_32_shr( - struct fixed31_32 arg, - uint8_t shift) +static inline struct fixed31_32 dc_fixpt_shr(struct fixed31_32 arg, unsigned char shift) { - struct fixed31_32 res; - res.value = arg.value >> shift; - return res; + bool negative = arg.value < 0; + + if (negative) + arg.value = -arg.value; + arg.value = arg.value >> shift; + if (negative) + arg.value = -arg.value; + return arg; } /* @@ -237,38 +241,50 @@ static inline struct fixed31_32 dal_fixed31_32_shr( * @brief * result = arg1 + arg2 */ -struct fixed31_32 dal_fixed31_32_add( - struct fixed31_32 arg1, - struct fixed31_32 arg2); +static inline struct fixed31_32 dc_fixpt_add(struct fixed31_32 arg1, struct fixed31_32 arg2) +{ + struct fixed31_32 res; + + ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) || + ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value))); + + res.value = arg1.value + arg2.value; + + return res; +} /* * @brief * result = arg1 + arg2 */ -static inline struct fixed31_32 dal_fixed31_32_add_int(struct fixed31_32 arg1, - int32_t arg2) +static inline struct fixed31_32 dc_fixpt_add_int(struct fixed31_32 arg1, int arg2) { - return dal_fixed31_32_add(arg1, - dal_fixed31_32_from_int(arg2)); + return dc_fixpt_add(arg1, dc_fixpt_from_int(arg2)); } /* * @brief * result = arg1 - arg2 */ -struct fixed31_32 dal_fixed31_32_sub( - struct fixed31_32 arg1, - struct fixed31_32 arg2); +static inline struct fixed31_32 dc_fixpt_sub(struct fixed31_32 arg1, struct fixed31_32 arg2) +{ + struct fixed31_32 res; + + ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) || + ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value))); + + res.value = arg1.value - arg2.value; + + return res; +} /* * @brief * result = arg1 - arg2 */ -static inline struct fixed31_32 dal_fixed31_32_sub_int(struct fixed31_32 arg1, - int32_t arg2) +static inline struct fixed31_32 dc_fixpt_sub_int(struct fixed31_32 arg1, int arg2) { - return dal_fixed31_32_sub(arg1, - dal_fixed31_32_from_int(arg2)); + return dc_fixpt_sub(arg1, dc_fixpt_from_int(arg2)); } @@ -281,49 +297,40 @@ static inline struct fixed31_32 dal_fixed31_32_sub_int(struct fixed31_32 arg1, * @brief * result = arg1 * arg2 */ -struct fixed31_32 dal_fixed31_32_mul( - struct fixed31_32 arg1, - struct fixed31_32 arg2); +struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2); /* * @brief * result = arg1 * arg2 */ -static inline struct fixed31_32 dal_fixed31_32_mul_int(struct fixed31_32 arg1, - int32_t arg2) +static inline struct fixed31_32 dc_fixpt_mul_int(struct fixed31_32 arg1, int arg2) { - return dal_fixed31_32_mul(arg1, - dal_fixed31_32_from_int(arg2)); + return dc_fixpt_mul(arg1, dc_fixpt_from_int(arg2)); } /* * @brief * result = square(arg) := arg * arg */ -struct fixed31_32 dal_fixed31_32_sqr( - struct fixed31_32 arg); +struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg); /* * @brief * result = arg1 / arg2 */ -static inline struct fixed31_32 dal_fixed31_32_div_int(struct fixed31_32 arg1, - int64_t arg2) +static inline struct fixed31_32 dc_fixpt_div_int(struct fixed31_32 arg1, long long arg2) { - return dal_fixed31_32_from_fraction(arg1.value, - dal_fixed31_32_from_int(arg2).value); + return dc_fixpt_from_fraction(arg1.value, dc_fixpt_from_int(arg2).value); } /* * @brief * result = arg1 / arg2 */ -static inline struct fixed31_32 dal_fixed31_32_div(struct fixed31_32 arg1, - struct fixed31_32 arg2) +static inline struct fixed31_32 dc_fixpt_div(struct fixed31_32 arg1, struct fixed31_32 arg2) { - return dal_fixed31_32_from_fraction(arg1.value, - arg2.value); + return dc_fixpt_from_fraction(arg1.value, arg2.value); } /* @@ -338,8 +345,7 @@ static inline struct fixed31_32 dal_fixed31_32_div(struct fixed31_32 arg1, * @note * No special actions taken in case argument is zero. */ -struct fixed31_32 dal_fixed31_32_recip( - struct fixed31_32 arg); +struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg); /* * @brief @@ -354,8 +360,7 @@ struct fixed31_32 dal_fixed31_32_recip( * Argument specified in radians, * internally it's normalized to [-2pi...2pi] range. */ -struct fixed31_32 dal_fixed31_32_sinc( - struct fixed31_32 arg); +struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg); /* * @brief @@ -365,8 +370,7 @@ struct fixed31_32 dal_fixed31_32_sinc( * Argument specified in radians, * internally it's normalized to [-2pi...2pi] range. */ -struct fixed31_32 dal_fixed31_32_sin( - struct fixed31_32 arg); +struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg); /* * @brief @@ -378,8 +382,7 @@ struct fixed31_32 dal_fixed31_32_sin( * passing arguments outside that range * will cause incorrect result! */ -struct fixed31_32 dal_fixed31_32_cos( - struct fixed31_32 arg); +struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg); /* * @brief @@ -393,8 +396,7 @@ struct fixed31_32 dal_fixed31_32_cos( * @note * Currently, function is verified for abs(arg) <= 1. */ -struct fixed31_32 dal_fixed31_32_exp( - struct fixed31_32 arg); +struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg); /* * @brief @@ -406,8 +408,7 @@ struct fixed31_32 dal_fixed31_32_exp( * Currently, no special actions taken * in case of invalid argument(s). Take care! */ -struct fixed31_32 dal_fixed31_32_log( - struct fixed31_32 arg); +struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg); /* * @brief @@ -421,9 +422,13 @@ struct fixed31_32 dal_fixed31_32_log( * @note * Currently, abs(arg1) should be less than 1. Take care! */ -struct fixed31_32 dal_fixed31_32_pow( - struct fixed31_32 arg1, - struct fixed31_32 arg2); +static inline struct fixed31_32 dc_fixpt_pow(struct fixed31_32 arg1, struct fixed31_32 arg2) +{ + return dc_fixpt_exp( + dc_fixpt_mul( + dc_fixpt_log(arg1), + arg2)); +} /* * @brief @@ -434,22 +439,56 @@ struct fixed31_32 dal_fixed31_32_pow( * @brief * result = floor(arg) := greatest integer lower than or equal to arg */ -int32_t dal_fixed31_32_floor( - struct fixed31_32 arg); +static inline int dc_fixpt_floor(struct fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} /* * @brief * result = round(arg) := integer nearest to arg */ -int32_t dal_fixed31_32_round( - struct fixed31_32 arg); +static inline int dc_fixpt_round(struct fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = dc_fixpt_half.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} /* * @brief * result = ceil(arg) := lowest integer greater than or equal to arg */ -int32_t dal_fixed31_32_ceil( - struct fixed31_32 arg); +static inline int dc_fixpt_ceil(struct fixed31_32 arg) +{ + unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value; + + const long long summand = dc_fixpt_one.value - + dc_fixpt_epsilon.value; + + ASSERT(LLONG_MAX - (long long)arg_value >= summand); + + arg_value += summand; + + if (arg.value >= 0) + return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); + else + return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART); +} /* the following two function are used in scaler hw programming to convert fixed * point value to format 2 bits from integer part and 19 bits from fractional @@ -457,20 +496,33 @@ int32_t dal_fixed31_32_ceil( * fractional */ -uint32_t dal_fixed31_32_u2d19( - struct fixed31_32 arg); +unsigned int dc_fixpt_u3d19(struct fixed31_32 arg); + +unsigned int dc_fixpt_u2d19(struct fixed31_32 arg); + +unsigned int dc_fixpt_u0d19(struct fixed31_32 arg); -uint32_t dal_fixed31_32_u0d19( - struct fixed31_32 arg); +unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg); +unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg); -uint32_t dal_fixed31_32_clamp_u0d14( - struct fixed31_32 arg); +int dc_fixpt_s4d19(struct fixed31_32 arg); -uint32_t dal_fixed31_32_clamp_u0d10( - struct fixed31_32 arg); +static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigned int frac_bits) +{ + bool negative = arg.value < 0; -int32_t dal_fixed31_32_s4d19( - struct fixed31_32 arg); + if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) { + ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART); + return arg; + } + + if (negative) + arg.value = -arg.value; + arg.value &= (~0LL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits); + if (negative) + arg.value = -arg.value; + return arg; +} #endif diff --git a/drivers/gpu/drm/amd/display/include/fixed32_32.h b/drivers/gpu/drm/amd/display/include/fixed32_32.h deleted file mode 100644 index 9c70341fe026..000000000000 --- a/drivers/gpu/drm/amd/display/include/fixed32_32.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2012-15 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - - -#ifndef __DAL_FIXED32_32_H__ -#define __DAL_FIXED32_32_H__ - -#include "os_types.h" - -struct fixed32_32 { - uint64_t value; -}; - -static const struct fixed32_32 dal_fixed32_32_zero = { 0 }; -static const struct fixed32_32 dal_fixed32_32_one = { 0x100000000LL }; -static const struct fixed32_32 dal_fixed32_32_half = { 0x80000000LL }; - -struct fixed32_32 dal_fixed32_32_from_fraction(uint32_t n, uint32_t d); -static inline struct fixed32_32 dal_fixed32_32_from_int(uint32_t value) -{ - struct fixed32_32 fx; - - fx.value = (uint64_t)value<<32; - return fx; -} - -struct fixed32_32 dal_fixed32_32_add( - struct fixed32_32 lhs, - struct fixed32_32 rhs); -struct fixed32_32 dal_fixed32_32_add_int( - struct fixed32_32 lhs, - uint32_t rhs); -struct fixed32_32 dal_fixed32_32_sub( - struct fixed32_32 lhs, - struct fixed32_32 rhs); -struct fixed32_32 dal_fixed32_32_sub_int( - struct fixed32_32 lhs, - uint32_t rhs); -struct fixed32_32 dal_fixed32_32_mul( - struct fixed32_32 lhs, - struct fixed32_32 rhs); -struct fixed32_32 dal_fixed32_32_mul_int( - struct fixed32_32 lhs, - uint32_t rhs); -struct fixed32_32 dal_fixed32_32_div( - struct fixed32_32 lhs, - struct fixed32_32 rhs); -struct fixed32_32 dal_fixed32_32_div_int( - struct fixed32_32 lhs, - uint32_t rhs); - -static inline struct fixed32_32 dal_fixed32_32_min(struct fixed32_32 lhs, - struct fixed32_32 rhs) -{ - return (lhs.value < rhs.value) ? lhs : rhs; -} - -static inline struct fixed32_32 dal_fixed32_32_max(struct fixed32_32 lhs, - struct fixed32_32 rhs) -{ - return (lhs.value > rhs.value) ? lhs : rhs; -} - -static inline bool dal_fixed32_32_gt(struct fixed32_32 lhs, struct fixed32_32 rhs) -{ - return lhs.value > rhs.value; -} - -static inline bool dal_fixed32_32_gt_int(struct fixed32_32 lhs, uint32_t rhs) -{ - return lhs.value > ((uint64_t)rhs<<32); -} - -static inline bool dal_fixed32_32_lt(struct fixed32_32 lhs, struct fixed32_32 rhs) -{ - return lhs.value < rhs.value; -} - -static inline bool dal_fixed32_32_lt_int(struct fixed32_32 lhs, uint32_t rhs) -{ - return lhs.value < ((uint64_t)rhs<<32); -} - -static inline bool dal_fixed32_32_le(struct fixed32_32 lhs, struct fixed32_32 rhs) -{ - return lhs.value <= rhs.value; -} - -static inline bool dal_fixed32_32_le_int(struct fixed32_32 lhs, uint32_t rhs) -{ - return lhs.value <= ((uint64_t)rhs<<32); -} - -static inline bool dal_fixed32_32_eq(struct fixed32_32 lhs, struct fixed32_32 rhs) -{ - return lhs.value == rhs.value; -} - -uint32_t dal_fixed32_32_ceil(struct fixed32_32 value); -static inline uint32_t dal_fixed32_32_floor(struct fixed32_32 value) -{ - return value.value>>32; -} - -uint32_t dal_fixed32_32_round(struct fixed32_32 value); - -#endif diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h index 28dee960d509..dc98d6d4b2bd 100644 --- a/drivers/gpu/drm/amd/display/include/logger_interface.h +++ b/drivers/gpu/drm/amd/display/include/logger_interface.h @@ -190,4 +190,13 @@ void context_clock_trace( } \ } while (0) +#define DISPLAY_STATS_BEGIN(entry) \ + dm_logger_open(dc->ctx->logger, &entry, LOG_DISPLAYSTATS) + +#define DISPLAY_STATS(msg, ...) \ + dm_logger_append(&log_entry, msg, ##__VA_ARGS__) + +#define DISPLAY_STATS_END(entry) \ + dm_logger_close(&entry) + #endif /* __DAL_LOGGER_INTERFACE_H__ */ diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 427796bdc14a..0a540b9897a6 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -29,39 +29,39 @@ #include "os_types.h" #define MAX_NAME_LEN 32 -#define DC_LOG_ERROR(a, ...) dm_logger_write(DC_LOGGER, LOG_ERROR, a, ## __VA_ARGS__) -#define DC_LOG_WARNING(a, ...) dm_logger_write(DC_LOGGER, LOG_WARNING, a, ## __VA_ARGS__) -#define DC_LOG_DEBUG(a, ...) dm_logger_write(DC_LOGGER, LOG_DEBUG, a, ## __VA_ARGS__) -#define DC_LOG_DC(a, ...) dm_logger_write(DC_LOGGER, LOG_DC, a, ## __VA_ARGS__) -#define DC_LOG_DTN(a, ...) dm_logger_write(DC_LOGGER, LOG_DTN, a, ## __VA_ARGS__) -#define DC_LOG_SURFACE(a, ...) dm_logger_write(DC_LOGGER, LOG_SURFACE, a, ## __VA_ARGS__) -#define DC_LOG_HW_HOTPLUG(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HOTPLUG, a, ## __VA_ARGS__) -#define DC_LOG_HW_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_LINK_TRAINING, a, ## __VA_ARGS__) -#define DC_LOG_HW_SET_MODE(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_SET_MODE, a, ## __VA_ARGS__) -#define DC_LOG_HW_RESUME_S3(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_RESUME_S3, a, ## __VA_ARGS__) -#define DC_LOG_HW_AUDIO(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_AUDIO, a, ## __VA_ARGS__) -#define DC_LOG_HW_HPD_IRQ(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HPD_IRQ, a, ## __VA_ARGS__) -#define DC_LOG_MST(a, ...) dm_logger_write(DC_LOGGER, LOG_MST, a, ## __VA_ARGS__) -#define DC_LOG_SCALER(a, ...) dm_logger_write(DC_LOGGER, LOG_SCALER, a, ## __VA_ARGS__) -#define DC_LOG_BIOS(a, ...) dm_logger_write(DC_LOGGER, LOG_BIOS, a, ## __VA_ARGS__) -#define DC_LOG_BANDWIDTH_CALCS(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_CALCS, a, ## __VA_ARGS__) -#define DC_LOG_BANDWIDTH_VALIDATION(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_VALIDATION, a, ## __VA_ARGS__) -#define DC_LOG_I2C_AUX(a, ...) dm_logger_write(DC_LOGGER, LOG_I2C_AUX, a, ## __VA_ARGS__) -#define DC_LOG_SYNC(a, ...) dm_logger_write(DC_LOGGER, LOG_SYNC, a, ## __VA_ARGS__) -#define DC_LOG_BACKLIGHT(a, ...) dm_logger_write(DC_LOGGER, LOG_BACKLIGHT, a, ## __VA_ARGS__) -#define DC_LOG_FEATURE_OVERRIDE(a, ...) dm_logger_write(DC_LOGGER, LOG_FEATURE_OVERRIDE, a, ## __VA_ARGS__) -#define DC_LOG_DETECTION_EDID_PARSER(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_EDID_PARSER, a, ## __VA_ARGS__) -#define DC_LOG_DETECTION_DP_CAPS(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_DP_CAPS, a, ## __VA_ARGS__) -#define DC_LOG_RESOURCE(a, ...) dm_logger_write(DC_LOGGER, LOG_RESOURCE, a, ## __VA_ARGS__) -#define DC_LOG_DML(a, ...) dm_logger_write(DC_LOGGER, LOG_DML, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_MODE_SET(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_MODE_SET, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_DETECTION(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_DETECTION, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_TRAINING, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_LINK_LOSS(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_LOSS, a, ## __VA_ARGS__) -#define DC_LOG_EVENT_UNDERFLOW(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_UNDERFLOW, a, ## __VA_ARGS__) -#define DC_LOG_IF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_IF_TRACE, a, ## __VA_ARGS__) -#define DC_LOG_PERF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_PERF_TRACE, a, ## __VA_ARGS__) +#define DC_LOG_ERROR(...) DRM_ERROR(__VA_ARGS__) +#define DC_LOG_WARNING(...) DRM_WARN(__VA_ARGS__) +#define DC_LOG_DEBUG(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__) +#define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_HW_LINK_TRAINING(...) pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__) +#define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_HW_RESUME_S3(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_HW_AUDIO(...) pr_debug("[HW_AUDIO]:"__VA_ARGS__) +#define DC_LOG_HW_HPD_IRQ(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_MST(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_SCALER(...) pr_debug("[SCALER]:"__VA_ARGS__) +#define DC_LOG_BIOS(...) pr_debug("[BIOS]:"__VA_ARGS__) +#define DC_LOG_BANDWIDTH_CALCS(...) pr_debug("[BANDWIDTH_CALCS]:"__VA_ARGS__) +#define DC_LOG_BANDWIDTH_VALIDATION(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_I2C_AUX(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_SYNC(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_BACKLIGHT(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_FEATURE_OVERRIDE(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DETECTION_EDID_PARSER(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DETECTION_DP_CAPS(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_RESOURCE(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_DML(...) pr_debug("[DML]:"__VA_ARGS__) +#define DC_LOG_EVENT_MODE_SET(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_DETECTION(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_LINK_TRAINING(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_LINK_LOSS(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_EVENT_UNDERFLOW(...) DRM_DEBUG_KMS(__VA_ARGS__) +#define DC_LOG_IF_TRACE(...) pr_debug("[IF_TRACE]:"__VA_ARGS__) +#define DC_LOG_PERF_TRACE(...) DRM_DEBUG_KMS(__VA_ARGS__) struct dal_logger; @@ -98,7 +98,7 @@ enum dc_log_type { LOG_EVENT_UNDERFLOW, LOG_IF_TRACE, LOG_PERF_TRACE, - LOG_PROFILING, + LOG_DISPLAYSTATS, LOG_SECTION_TOTAL_COUNT }; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index e7e374f56864..0cd111d59018 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -43,7 +43,7 @@ static bool de_pq_initialized; /* = false; */ /* one-time setup of X points */ void setup_x_points_distribution(void) { - struct fixed31_32 region_size = dal_fixed31_32_from_int(128); + struct fixed31_32 region_size = dc_fixpt_from_int(128); int32_t segment; uint32_t seg_offset; uint32_t index; @@ -53,8 +53,8 @@ void setup_x_points_distribution(void) coordinates_x[MAX_HW_POINTS + 1].x = region_size; for (segment = 6; segment > (6 - NUM_REGIONS); segment--) { - region_size = dal_fixed31_32_div_int(region_size, 2); - increment = dal_fixed31_32_div_int(region_size, + region_size = dc_fixpt_div_int(region_size, 2); + increment = dc_fixpt_div_int(region_size, NUM_PTS_IN_REGION); seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION; coordinates_x[seg_offset].x = region_size; @@ -62,7 +62,7 @@ void setup_x_points_distribution(void) for (index = seg_offset + 1; index < seg_offset + NUM_PTS_IN_REGION; index++) { - coordinates_x[index].x = dal_fixed31_32_add + coordinates_x[index].x = dc_fixpt_add (coordinates_x[index-1].x, increment); } } @@ -72,63 +72,63 @@ static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y) { /* consts for PQ gamma formula. */ const struct fixed31_32 m1 = - dal_fixed31_32_from_fraction(159301758, 1000000000); + dc_fixpt_from_fraction(159301758, 1000000000); const struct fixed31_32 m2 = - dal_fixed31_32_from_fraction(7884375, 100000); + dc_fixpt_from_fraction(7884375, 100000); const struct fixed31_32 c1 = - dal_fixed31_32_from_fraction(8359375, 10000000); + dc_fixpt_from_fraction(8359375, 10000000); const struct fixed31_32 c2 = - dal_fixed31_32_from_fraction(188515625, 10000000); + dc_fixpt_from_fraction(188515625, 10000000); const struct fixed31_32 c3 = - dal_fixed31_32_from_fraction(186875, 10000); + dc_fixpt_from_fraction(186875, 10000); struct fixed31_32 l_pow_m1; struct fixed31_32 base; - if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero)) - in_x = dal_fixed31_32_zero; + if (dc_fixpt_lt(in_x, dc_fixpt_zero)) + in_x = dc_fixpt_zero; - l_pow_m1 = dal_fixed31_32_pow(in_x, m1); - base = dal_fixed31_32_div( - dal_fixed31_32_add(c1, - (dal_fixed31_32_mul(c2, l_pow_m1))), - dal_fixed31_32_add(dal_fixed31_32_one, - (dal_fixed31_32_mul(c3, l_pow_m1)))); - *out_y = dal_fixed31_32_pow(base, m2); + l_pow_m1 = dc_fixpt_pow(in_x, m1); + base = dc_fixpt_div( + dc_fixpt_add(c1, + (dc_fixpt_mul(c2, l_pow_m1))), + dc_fixpt_add(dc_fixpt_one, + (dc_fixpt_mul(c3, l_pow_m1)))); + *out_y = dc_fixpt_pow(base, m2); } static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y) { /* consts for dePQ gamma formula. */ const struct fixed31_32 m1 = - dal_fixed31_32_from_fraction(159301758, 1000000000); + dc_fixpt_from_fraction(159301758, 1000000000); const struct fixed31_32 m2 = - dal_fixed31_32_from_fraction(7884375, 100000); + dc_fixpt_from_fraction(7884375, 100000); const struct fixed31_32 c1 = - dal_fixed31_32_from_fraction(8359375, 10000000); + dc_fixpt_from_fraction(8359375, 10000000); const struct fixed31_32 c2 = - dal_fixed31_32_from_fraction(188515625, 10000000); + dc_fixpt_from_fraction(188515625, 10000000); const struct fixed31_32 c3 = - dal_fixed31_32_from_fraction(186875, 10000); + dc_fixpt_from_fraction(186875, 10000); struct fixed31_32 l_pow_m1; struct fixed31_32 base, div; - if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero)) - in_x = dal_fixed31_32_zero; + if (dc_fixpt_lt(in_x, dc_fixpt_zero)) + in_x = dc_fixpt_zero; - l_pow_m1 = dal_fixed31_32_pow(in_x, - dal_fixed31_32_div(dal_fixed31_32_one, m2)); - base = dal_fixed31_32_sub(l_pow_m1, c1); + l_pow_m1 = dc_fixpt_pow(in_x, + dc_fixpt_div(dc_fixpt_one, m2)); + base = dc_fixpt_sub(l_pow_m1, c1); - if (dal_fixed31_32_lt(base, dal_fixed31_32_zero)) - base = dal_fixed31_32_zero; + if (dc_fixpt_lt(base, dc_fixpt_zero)) + base = dc_fixpt_zero; - div = dal_fixed31_32_sub(c2, dal_fixed31_32_mul(c3, l_pow_m1)); + div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1)); - *out_y = dal_fixed31_32_pow(dal_fixed31_32_div(base, div), - dal_fixed31_32_div(dal_fixed31_32_one, m1)); + *out_y = dc_fixpt_pow(dc_fixpt_div(base, div), + dc_fixpt_div(dc_fixpt_one, m1)); } /* one-time pre-compute PQ values - only for sdr_white_level 80 */ @@ -138,14 +138,14 @@ void precompute_pq(void) struct fixed31_32 x; const struct hw_x_point *coord_x = coordinates_x + 32; struct fixed31_32 scaling_factor = - dal_fixed31_32_from_fraction(80, 10000); + dc_fixpt_from_fraction(80, 10000); /* pow function has problems with arguments too small */ for (i = 0; i < 32; i++) - pq_table[i] = dal_fixed31_32_zero; + pq_table[i] = dc_fixpt_zero; for (i = 32; i <= MAX_HW_POINTS; i++) { - x = dal_fixed31_32_mul(coord_x->x, scaling_factor); + x = dc_fixpt_mul(coord_x->x, scaling_factor); compute_pq(x, &pq_table[i]); ++coord_x; } @@ -158,7 +158,7 @@ void precompute_de_pq(void) struct fixed31_32 y; uint32_t begin_index, end_index; - struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125); + struct fixed31_32 scaling_factor = dc_fixpt_from_int(125); /* X points is 2^-25 to 2^7 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions @@ -167,11 +167,11 @@ void precompute_de_pq(void) end_index = begin_index + 12 * NUM_PTS_IN_REGION; for (i = 0; i <= begin_index; i++) - de_pq_table[i] = dal_fixed31_32_zero; + de_pq_table[i] = dc_fixpt_zero; for (; i <= end_index; i++) { compute_de_pq(coordinates_x[i].x, &y); - de_pq_table[i] = dal_fixed31_32_mul(y, scaling_factor); + de_pq_table[i] = dc_fixpt_mul(y, scaling_factor); } for (; i <= MAX_HW_POINTS; i++) @@ -185,25 +185,25 @@ struct dividers { static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4) { - static const int32_t numerator01[] = { 31308, 180000}; - static const int32_t numerator02[] = { 12920, 4500}; - static const int32_t numerator03[] = { 55, 99}; - static const int32_t numerator04[] = { 55, 99}; - static const int32_t numerator05[] = { 2400, 2200}; + static const int32_t numerator01[] = { 31308, 180000}; + static const int32_t numerator02[] = { 12920, 4500}; + static const int32_t numerator03[] = { 55, 99}; + static const int32_t numerator04[] = { 55, 99}; + static const int32_t numerator05[] = { 2400, 2200}; - uint32_t i = 0; - uint32_t index = is_2_4 == true ? 0:1; + uint32_t i = 0; + uint32_t index = is_2_4 == true ? 0:1; do { - coefficients->a0[i] = dal_fixed31_32_from_fraction( + coefficients->a0[i] = dc_fixpt_from_fraction( numerator01[index], 10000000); - coefficients->a1[i] = dal_fixed31_32_from_fraction( + coefficients->a1[i] = dc_fixpt_from_fraction( numerator02[index], 1000); - coefficients->a2[i] = dal_fixed31_32_from_fraction( + coefficients->a2[i] = dc_fixpt_from_fraction( numerator03[index], 1000); - coefficients->a3[i] = dal_fixed31_32_from_fraction( + coefficients->a3[i] = dc_fixpt_from_fraction( numerator04[index], 1000); - coefficients->user_gamma[i] = dal_fixed31_32_from_fraction( + coefficients->user_gamma[i] = dc_fixpt_from_fraction( numerator05[index], 1000); ++i; @@ -218,33 +218,33 @@ static struct fixed31_32 translate_from_linear_space( struct fixed31_32 a3, struct fixed31_32 gamma) { - const struct fixed31_32 one = dal_fixed31_32_from_int(1); + const struct fixed31_32 one = dc_fixpt_from_int(1); - if (dal_fixed31_32_lt(one, arg)) + if (dc_fixpt_lt(one, arg)) return one; - if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0))) - return dal_fixed31_32_sub( + if (dc_fixpt_le(arg, dc_fixpt_neg(a0))) + return dc_fixpt_sub( a2, - dal_fixed31_32_mul( - dal_fixed31_32_add( + dc_fixpt_mul( + dc_fixpt_add( one, a3), - dal_fixed31_32_pow( - dal_fixed31_32_neg(arg), - dal_fixed31_32_recip(gamma)))); - else if (dal_fixed31_32_le(a0, arg)) - return dal_fixed31_32_sub( - dal_fixed31_32_mul( - dal_fixed31_32_add( + dc_fixpt_pow( + dc_fixpt_neg(arg), + dc_fixpt_recip(gamma)))); + else if (dc_fixpt_le(a0, arg)) + return dc_fixpt_sub( + dc_fixpt_mul( + dc_fixpt_add( one, a3), - dal_fixed31_32_pow( + dc_fixpt_pow( arg, - dal_fixed31_32_recip(gamma))), + dc_fixpt_recip(gamma))), a2); else - return dal_fixed31_32_mul( + return dc_fixpt_mul( arg, a1); } @@ -259,25 +259,25 @@ static struct fixed31_32 translate_to_linear_space( { struct fixed31_32 linear; - a0 = dal_fixed31_32_mul(a0, a1); - if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0))) + a0 = dc_fixpt_mul(a0, a1); + if (dc_fixpt_le(arg, dc_fixpt_neg(a0))) - linear = dal_fixed31_32_neg( - dal_fixed31_32_pow( - dal_fixed31_32_div( - dal_fixed31_32_sub(a2, arg), - dal_fixed31_32_add( - dal_fixed31_32_one, a3)), gamma)); + linear = dc_fixpt_neg( + dc_fixpt_pow( + dc_fixpt_div( + dc_fixpt_sub(a2, arg), + dc_fixpt_add( + dc_fixpt_one, a3)), gamma)); - else if (dal_fixed31_32_le(dal_fixed31_32_neg(a0), arg) && - dal_fixed31_32_le(arg, a0)) - linear = dal_fixed31_32_div(arg, a1); + else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) && + dc_fixpt_le(arg, a0)) + linear = dc_fixpt_div(arg, a1); else - linear = dal_fixed31_32_pow( - dal_fixed31_32_div( - dal_fixed31_32_add(a2, arg), - dal_fixed31_32_add( - dal_fixed31_32_one, a3)), gamma); + linear = dc_fixpt_pow( + dc_fixpt_div( + dc_fixpt_add(a2, arg), + dc_fixpt_add( + dc_fixpt_one, a3)), gamma); return linear; } @@ -352,8 +352,8 @@ static bool find_software_points( right = axis_x[max_number - 1].b; } - if (dal_fixed31_32_le(left, hw_point) && - dal_fixed31_32_le(hw_point, right)) { + if (dc_fixpt_le(left, hw_point) && + dc_fixpt_le(hw_point, right)) { *index_to_start = i; *index_left = i; @@ -366,7 +366,7 @@ static bool find_software_points( return true; } else if ((i == *index_to_start) && - dal_fixed31_32_le(hw_point, left)) { + dc_fixpt_le(hw_point, left)) { *index_to_start = i; *index_left = i; *index_right = i; @@ -375,7 +375,7 @@ static bool find_software_points( return true; } else if ((i == max_number - 1) && - dal_fixed31_32_le(right, hw_point)) { + dc_fixpt_le(right, hw_point)) { *index_to_start = i; *index_left = i; *index_right = i; @@ -457,17 +457,17 @@ static bool build_custom_gamma_mapping_coefficients_worker( } if (hw_pos == HW_POINT_POSITION_MIDDLE) - point->coeff = dal_fixed31_32_div( - dal_fixed31_32_sub( + point->coeff = dc_fixpt_div( + dc_fixpt_sub( coord_x, left_pos), - dal_fixed31_32_sub( + dc_fixpt_sub( right_pos, left_pos)); else if (hw_pos == HW_POINT_POSITION_LEFT) - point->coeff = dal_fixed31_32_zero; + point->coeff = dc_fixpt_zero; else if (hw_pos == HW_POINT_POSITION_RIGHT) - point->coeff = dal_fixed31_32_from_int(2); + point->coeff = dc_fixpt_from_int(2); else { BREAK_TO_DEBUGGER(); return false; @@ -502,45 +502,45 @@ static struct fixed31_32 calculate_mapped_value( if ((point->left_index < 0) || (point->left_index > max_index)) { BREAK_TO_DEBUGGER(); - return dal_fixed31_32_zero; + return dc_fixpt_zero; } if ((point->right_index < 0) || (point->right_index > max_index)) { BREAK_TO_DEBUGGER(); - return dal_fixed31_32_zero; + return dc_fixpt_zero; } if (point->pos == HW_POINT_POSITION_MIDDLE) if (channel == CHANNEL_NAME_RED) - result = dal_fixed31_32_add( - dal_fixed31_32_mul( + result = dc_fixpt_add( + dc_fixpt_mul( point->coeff, - dal_fixed31_32_sub( + dc_fixpt_sub( rgb[point->right_index].r, rgb[point->left_index].r)), rgb[point->left_index].r); else if (channel == CHANNEL_NAME_GREEN) - result = dal_fixed31_32_add( - dal_fixed31_32_mul( + result = dc_fixpt_add( + dc_fixpt_mul( point->coeff, - dal_fixed31_32_sub( + dc_fixpt_sub( rgb[point->right_index].g, rgb[point->left_index].g)), rgb[point->left_index].g); else - result = dal_fixed31_32_add( - dal_fixed31_32_mul( + result = dc_fixpt_add( + dc_fixpt_mul( point->coeff, - dal_fixed31_32_sub( + dc_fixpt_sub( rgb[point->right_index].b, rgb[point->left_index].b)), rgb[point->left_index].b); else if (point->pos == HW_POINT_POSITION_LEFT) { BREAK_TO_DEBUGGER(); - result = dal_fixed31_32_zero; + result = dc_fixpt_zero; } else { BREAK_TO_DEBUGGER(); - result = dal_fixed31_32_one; + result = dc_fixpt_one; } return result; @@ -558,7 +558,7 @@ static void build_pq(struct pwl_float_data_ex *rgb_regamma, struct fixed31_32 x; struct fixed31_32 output; struct fixed31_32 scaling_factor = - dal_fixed31_32_from_fraction(sdr_white_level, 10000); + dc_fixpt_from_fraction(sdr_white_level, 10000); if (!pq_initialized && sdr_white_level == 80) { precompute_pq(); @@ -579,15 +579,15 @@ static void build_pq(struct pwl_float_data_ex *rgb_regamma, if (sdr_white_level == 80) { output = pq_table[i]; } else { - x = dal_fixed31_32_mul(coord_x->x, scaling_factor); + x = dc_fixpt_mul(coord_x->x, scaling_factor); compute_pq(x, &output); } /* should really not happen? */ - if (dal_fixed31_32_lt(output, dal_fixed31_32_zero)) - output = dal_fixed31_32_zero; - else if (dal_fixed31_32_lt(dal_fixed31_32_one, output)) - output = dal_fixed31_32_one; + if (dc_fixpt_lt(output, dc_fixpt_zero)) + output = dc_fixpt_zero; + else if (dc_fixpt_lt(dc_fixpt_one, output)) + output = dc_fixpt_one; rgb->r = output; rgb->g = output; @@ -605,7 +605,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq, uint32_t i; struct fixed31_32 output; - struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125); + struct fixed31_32 scaling_factor = dc_fixpt_from_int(125); if (!de_pq_initialized) { precompute_de_pq(); @@ -616,9 +616,9 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq, for (i = 0; i <= hw_points_num; i++) { output = de_pq_table[i]; /* should really not happen? */ - if (dal_fixed31_32_lt(output, dal_fixed31_32_zero)) - output = dal_fixed31_32_zero; - else if (dal_fixed31_32_lt(scaling_factor, output)) + if (dc_fixpt_lt(output, dc_fixpt_zero)) + output = dc_fixpt_zero; + else if (dc_fixpt_lt(scaling_factor, output)) output = scaling_factor; de_pq[i].r = output; de_pq[i].g = output; @@ -670,9 +670,9 @@ static void build_degamma(struct pwl_float_data_ex *curve, end_index = begin_index + 12 * NUM_PTS_IN_REGION; while (i != begin_index) { - curve[i].r = dal_fixed31_32_zero; - curve[i].g = dal_fixed31_32_zero; - curve[i].b = dal_fixed31_32_zero; + curve[i].r = dc_fixpt_zero; + curve[i].g = dc_fixpt_zero; + curve[i].b = dc_fixpt_zero; i++; } @@ -684,19 +684,19 @@ static void build_degamma(struct pwl_float_data_ex *curve, i++; } while (i != hw_points_num + 1) { - curve[i].r = dal_fixed31_32_one; - curve[i].g = dal_fixed31_32_one; - curve[i].b = dal_fixed31_32_one; + curve[i].r = dc_fixpt_one; + curve[i].g = dc_fixpt_one; + curve[i].b = dc_fixpt_one; i++; } } -static bool scale_gamma(struct pwl_float_data *pwl_rgb, +static void scale_gamma(struct pwl_float_data *pwl_rgb, const struct dc_gamma *ramp, struct dividers dividers) { - const struct fixed31_32 max_driver = dal_fixed31_32_from_int(0xFFFF); - const struct fixed31_32 max_os = dal_fixed31_32_from_int(0xFF00); + const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF); + const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00); struct fixed31_32 scaler = max_os; uint32_t i; struct pwl_float_data *rgb = pwl_rgb; @@ -705,9 +705,9 @@ static bool scale_gamma(struct pwl_float_data *pwl_rgb, i = 0; do { - if (dal_fixed31_32_lt(max_os, ramp->entries.red[i]) || - dal_fixed31_32_lt(max_os, ramp->entries.green[i]) || - dal_fixed31_32_lt(max_os, ramp->entries.blue[i])) { + if (dc_fixpt_lt(max_os, ramp->entries.red[i]) || + dc_fixpt_lt(max_os, ramp->entries.green[i]) || + dc_fixpt_lt(max_os, ramp->entries.blue[i])) { scaler = max_driver; break; } @@ -717,109 +717,170 @@ static bool scale_gamma(struct pwl_float_data *pwl_rgb, i = 0; do { - rgb->r = dal_fixed31_32_div( + rgb->r = dc_fixpt_div( ramp->entries.red[i], scaler); - rgb->g = dal_fixed31_32_div( + rgb->g = dc_fixpt_div( ramp->entries.green[i], scaler); - rgb->b = dal_fixed31_32_div( + rgb->b = dc_fixpt_div( ramp->entries.blue[i], scaler); ++rgb; ++i; } while (i != ramp->num_entries); - rgb->r = dal_fixed31_32_mul(rgb_last->r, + rgb->r = dc_fixpt_mul(rgb_last->r, dividers.divider1); - rgb->g = dal_fixed31_32_mul(rgb_last->g, + rgb->g = dc_fixpt_mul(rgb_last->g, dividers.divider1); - rgb->b = dal_fixed31_32_mul(rgb_last->b, + rgb->b = dc_fixpt_mul(rgb_last->b, dividers.divider1); ++rgb; - rgb->r = dal_fixed31_32_mul(rgb_last->r, + rgb->r = dc_fixpt_mul(rgb_last->r, dividers.divider2); - rgb->g = dal_fixed31_32_mul(rgb_last->g, + rgb->g = dc_fixpt_mul(rgb_last->g, dividers.divider2); - rgb->b = dal_fixed31_32_mul(rgb_last->b, + rgb->b = dc_fixpt_mul(rgb_last->b, dividers.divider2); ++rgb; - rgb->r = dal_fixed31_32_mul(rgb_last->r, + rgb->r = dc_fixpt_mul(rgb_last->r, dividers.divider3); - rgb->g = dal_fixed31_32_mul(rgb_last->g, + rgb->g = dc_fixpt_mul(rgb_last->g, dividers.divider3); - rgb->b = dal_fixed31_32_mul(rgb_last->b, + rgb->b = dc_fixpt_mul(rgb_last->b, dividers.divider3); - - return true; } -static bool scale_gamma_dx(struct pwl_float_data *pwl_rgb, +static void scale_gamma_dx(struct pwl_float_data *pwl_rgb, const struct dc_gamma *ramp, struct dividers dividers) { uint32_t i; - struct fixed31_32 min = dal_fixed31_32_zero; - struct fixed31_32 max = dal_fixed31_32_one; + struct fixed31_32 min = dc_fixpt_zero; + struct fixed31_32 max = dc_fixpt_one; - struct fixed31_32 delta = dal_fixed31_32_zero; - struct fixed31_32 offset = dal_fixed31_32_zero; + struct fixed31_32 delta = dc_fixpt_zero; + struct fixed31_32 offset = dc_fixpt_zero; for (i = 0 ; i < ramp->num_entries; i++) { - if (dal_fixed31_32_lt(ramp->entries.red[i], min)) + if (dc_fixpt_lt(ramp->entries.red[i], min)) min = ramp->entries.red[i]; - if (dal_fixed31_32_lt(ramp->entries.green[i], min)) + if (dc_fixpt_lt(ramp->entries.green[i], min)) min = ramp->entries.green[i]; - if (dal_fixed31_32_lt(ramp->entries.blue[i], min)) + if (dc_fixpt_lt(ramp->entries.blue[i], min)) min = ramp->entries.blue[i]; - if (dal_fixed31_32_lt(max, ramp->entries.red[i])) + if (dc_fixpt_lt(max, ramp->entries.red[i])) max = ramp->entries.red[i]; - if (dal_fixed31_32_lt(max, ramp->entries.green[i])) + if (dc_fixpt_lt(max, ramp->entries.green[i])) max = ramp->entries.green[i]; - if (dal_fixed31_32_lt(max, ramp->entries.blue[i])) + if (dc_fixpt_lt(max, ramp->entries.blue[i])) max = ramp->entries.blue[i]; } - if (dal_fixed31_32_lt(min, dal_fixed31_32_zero)) - delta = dal_fixed31_32_neg(min); + if (dc_fixpt_lt(min, dc_fixpt_zero)) + delta = dc_fixpt_neg(min); - offset = dal_fixed31_32_add(min, max); + offset = dc_fixpt_add(min, max); for (i = 0 ; i < ramp->num_entries; i++) { - pwl_rgb[i].r = dal_fixed31_32_div( - dal_fixed31_32_add( + pwl_rgb[i].r = dc_fixpt_div( + dc_fixpt_add( ramp->entries.red[i], delta), offset); - pwl_rgb[i].g = dal_fixed31_32_div( - dal_fixed31_32_add( + pwl_rgb[i].g = dc_fixpt_div( + dc_fixpt_add( ramp->entries.green[i], delta), offset); - pwl_rgb[i].b = dal_fixed31_32_div( - dal_fixed31_32_add( + pwl_rgb[i].b = dc_fixpt_div( + dc_fixpt_add( ramp->entries.blue[i], delta), offset); } - pwl_rgb[i].r = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int( pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r); - pwl_rgb[i].g = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int( pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g); - pwl_rgb[i].b = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int( pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b); ++i; - pwl_rgb[i].r = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int( pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r); - pwl_rgb[i].g = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int( pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g); - pwl_rgb[i].b = dal_fixed31_32_sub(dal_fixed31_32_mul_int( + pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int( pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b); +} - return true; +/* todo: all these scale_gamma functions are inherently the same but + * take different structures as params or different format for ramp + * values. We could probably implement it in a more generic fashion + */ +static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb, + const struct regamma_ramp *ramp, + struct dividers dividers) +{ + unsigned short max_driver = 0xFFFF; + unsigned short max_os = 0xFF00; + unsigned short scaler = max_os; + uint32_t i; + struct pwl_float_data *rgb = pwl_rgb; + struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1; + + i = 0; + do { + if (ramp->gamma[i] > max_os || + ramp->gamma[i + 256] > max_os || + ramp->gamma[i + 512] > max_os) { + scaler = max_driver; + break; + } + i++; + } while (i != GAMMA_RGB_256_ENTRIES); + + i = 0; + do { + rgb->r = dc_fixpt_from_fraction( + ramp->gamma[i], scaler); + rgb->g = dc_fixpt_from_fraction( + ramp->gamma[i + 256], scaler); + rgb->b = dc_fixpt_from_fraction( + ramp->gamma[i + 512], scaler); + + ++rgb; + ++i; + } while (i != GAMMA_RGB_256_ENTRIES); + + rgb->r = dc_fixpt_mul(rgb_last->r, + dividers.divider1); + rgb->g = dc_fixpt_mul(rgb_last->g, + dividers.divider1); + rgb->b = dc_fixpt_mul(rgb_last->b, + dividers.divider1); + + ++rgb; + + rgb->r = dc_fixpt_mul(rgb_last->r, + dividers.divider2); + rgb->g = dc_fixpt_mul(rgb_last->g, + dividers.divider2); + rgb->b = dc_fixpt_mul(rgb_last->b, + dividers.divider2); + + ++rgb; + + rgb->r = dc_fixpt_mul(rgb_last->r, + dividers.divider3); + rgb->g = dc_fixpt_mul(rgb_last->g, + dividers.divider3); + rgb->b = dc_fixpt_mul(rgb_last->b, + dividers.divider3); } /* @@ -852,7 +913,7 @@ static void apply_lut_1d( struct fixed31_32 lut2; const int max_lut_index = 4095; const struct fixed31_32 max_lut_index_f = - dal_fixed31_32_from_int_nonconst(max_lut_index); + dc_fixpt_from_int(max_lut_index); int32_t index = 0, index_next = 0; struct fixed31_32 index_f; struct fixed31_32 delta_lut; @@ -870,10 +931,10 @@ static void apply_lut_1d( else regamma_y = &tf_pts->blue[i]; - norm_y = dal_fixed31_32_mul(max_lut_index_f, + norm_y = dc_fixpt_mul(max_lut_index_f, *regamma_y); - index = dal_fixed31_32_floor(norm_y); - index_f = dal_fixed31_32_from_int_nonconst(index); + index = dc_fixpt_floor(norm_y); + index_f = dc_fixpt_from_int(index); if (index < 0 || index > max_lut_index) continue; @@ -892,11 +953,11 @@ static void apply_lut_1d( } // we have everything now, so interpolate - delta_lut = dal_fixed31_32_sub(lut2, lut1); - delta_index = dal_fixed31_32_sub(norm_y, index_f); + delta_lut = dc_fixpt_sub(lut2, lut1); + delta_index = dc_fixpt_sub(norm_y, index_f); - *regamma_y = dal_fixed31_32_add(lut1, - dal_fixed31_32_mul(delta_index, delta_lut)); + *regamma_y = dc_fixpt_add(lut1, + dc_fixpt_mul(delta_index, delta_lut)); } } } @@ -912,7 +973,7 @@ static void build_evenly_distributed_points( uint32_t i = 0; do { - struct fixed31_32 value = dal_fixed31_32_from_fraction(i, + struct fixed31_32 value = dc_fixpt_from_fraction(i, numberof_points - 1); p->r = value; @@ -923,21 +984,21 @@ static void build_evenly_distributed_points( ++i; } while (i != numberof_points); - p->r = dal_fixed31_32_div(p_last->r, dividers.divider1); - p->g = dal_fixed31_32_div(p_last->g, dividers.divider1); - p->b = dal_fixed31_32_div(p_last->b, dividers.divider1); + p->r = dc_fixpt_div(p_last->r, dividers.divider1); + p->g = dc_fixpt_div(p_last->g, dividers.divider1); + p->b = dc_fixpt_div(p_last->b, dividers.divider1); ++p; - p->r = dal_fixed31_32_div(p_last->r, dividers.divider2); - p->g = dal_fixed31_32_div(p_last->g, dividers.divider2); - p->b = dal_fixed31_32_div(p_last->b, dividers.divider2); + p->r = dc_fixpt_div(p_last->r, dividers.divider2); + p->g = dc_fixpt_div(p_last->g, dividers.divider2); + p->b = dc_fixpt_div(p_last->b, dividers.divider2); ++p; - p->r = dal_fixed31_32_div(p_last->r, dividers.divider3); - p->g = dal_fixed31_32_div(p_last->g, dividers.divider3); - p->b = dal_fixed31_32_div(p_last->b, dividers.divider3); + p->r = dc_fixpt_div(p_last->r, dividers.divider3); + p->g = dc_fixpt_div(p_last->g, dividers.divider3); + p->b = dc_fixpt_div(p_last->b, dividers.divider3); } static inline void copy_rgb_regamma_to_coordinates_x( @@ -949,7 +1010,7 @@ static inline void copy_rgb_regamma_to_coordinates_x( uint32_t i = 0; const struct pwl_float_data_ex *rgb_regamma = rgb_ex; - while (i <= hw_points_num) { + while (i <= hw_points_num + 1) { coords->regamma_y_red = rgb_regamma->r; coords->regamma_y_green = rgb_regamma->g; coords->regamma_y_blue = rgb_regamma->b; @@ -1002,6 +1063,102 @@ static bool calculate_interpolated_hardware_curve( return true; } +/* The "old" interpolation uses a complicated scheme to build an array of + * coefficients while also using an array of 0-255 normalized to 0-1 + * Then there's another loop using both of the above + new scaled user ramp + * and we concatenate them. It also searches for points of interpolation and + * uses enums for positions. + * + * This function uses a different approach: + * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255 + * To find index for hwX , we notice the following: + * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1 + * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT + * + * Once the index is known, combined Y is simply: + * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index) + * + * We should switch to this method in all cases, it's simpler and faster + * ToDo one day - for now this only applies to ADL regamma to avoid regression + * for regular use cases (sRGB and PQ) + */ +static void interpolate_user_regamma(uint32_t hw_points_num, + struct pwl_float_data *rgb_user, + bool apply_degamma, + struct dc_transfer_func_distributed_points *tf_pts) +{ + uint32_t i; + uint32_t color = 0; + int32_t index; + int32_t index_next; + struct fixed31_32 *tf_point; + struct fixed31_32 hw_x; + struct fixed31_32 norm_factor = + dc_fixpt_from_int(255); + struct fixed31_32 norm_x; + struct fixed31_32 index_f; + struct fixed31_32 lut1; + struct fixed31_32 lut2; + struct fixed31_32 delta_lut; + struct fixed31_32 delta_index; + + i = 0; + /* fixed_pt library has problems handling too small values */ + while (i != 32) { + tf_pts->red[i] = dc_fixpt_zero; + tf_pts->green[i] = dc_fixpt_zero; + tf_pts->blue[i] = dc_fixpt_zero; + ++i; + } + while (i <= hw_points_num + 1) { + for (color = 0; color < 3; color++) { + if (color == 0) + tf_point = &tf_pts->red[i]; + else if (color == 1) + tf_point = &tf_pts->green[i]; + else + tf_point = &tf_pts->blue[i]; + + if (apply_degamma) { + if (color == 0) + hw_x = coordinates_x[i].regamma_y_red; + else if (color == 1) + hw_x = coordinates_x[i].regamma_y_green; + else + hw_x = coordinates_x[i].regamma_y_blue; + } else + hw_x = coordinates_x[i].x; + + norm_x = dc_fixpt_mul(norm_factor, hw_x); + index = dc_fixpt_floor(norm_x); + if (index < 0 || index > 255) + continue; + + index_f = dc_fixpt_from_int(index); + index_next = (index == 255) ? index : index + 1; + + if (color == 0) { + lut1 = rgb_user[index].r; + lut2 = rgb_user[index_next].r; + } else if (color == 1) { + lut1 = rgb_user[index].g; + lut2 = rgb_user[index_next].g; + } else { + lut1 = rgb_user[index].b; + lut2 = rgb_user[index_next].b; + } + + // we have everything now, so interpolate + delta_lut = dc_fixpt_sub(lut2, lut1); + delta_index = dc_fixpt_sub(norm_x, index_f); + + *tf_point = dc_fixpt_add(lut1, + dc_fixpt_mul(delta_index, delta_lut)); + } + ++i; + } +} + static void build_new_custom_resulted_curve( uint32_t hw_points_num, struct dc_transfer_func_distributed_points *tf_pts) @@ -1011,16 +1168,39 @@ static void build_new_custom_resulted_curve( i = 0; while (i != hw_points_num + 1) { - tf_pts->red[i] = dal_fixed31_32_clamp( - tf_pts->red[i], dal_fixed31_32_zero, - dal_fixed31_32_one); - tf_pts->green[i] = dal_fixed31_32_clamp( - tf_pts->green[i], dal_fixed31_32_zero, - dal_fixed31_32_one); - tf_pts->blue[i] = dal_fixed31_32_clamp( - tf_pts->blue[i], dal_fixed31_32_zero, - dal_fixed31_32_one); + tf_pts->red[i] = dc_fixpt_clamp( + tf_pts->red[i], dc_fixpt_zero, + dc_fixpt_one); + tf_pts->green[i] = dc_fixpt_clamp( + tf_pts->green[i], dc_fixpt_zero, + dc_fixpt_one); + tf_pts->blue[i] = dc_fixpt_clamp( + tf_pts->blue[i], dc_fixpt_zero, + dc_fixpt_one); + + ++i; + } +} + +static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma, + uint32_t hw_points_num) +{ + uint32_t i; + + struct gamma_coefficients coeff; + struct pwl_float_data_ex *rgb = rgb_regamma; + const struct hw_x_point *coord_x = coordinates_x; + build_coefficients(&coeff, true); + + i = 0; + while (i != hw_points_num + 1) { + rgb->r = translate_from_linear_space_ex( + coord_x->x, &coeff, 0); + rgb->g = rgb->r; + rgb->b = rgb->r; + ++coord_x; + ++rgb; ++i; } } @@ -1062,6 +1242,7 @@ static bool map_regamma_hw_to_x_user( } } + /* this should be named differently, all it does is clamp to 0-1 */ build_new_custom_resulted_curve(hw_points_num, tf_pts); return true; @@ -1093,25 +1274,25 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), - GFP_KERNEL); + rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_user) goto rgb_user_alloc_fail; - rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS), - GFP_KERNEL); + rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_regamma) goto rgb_regamma_alloc_fail; - axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + 3), - GFP_KERNEL); + axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + 3), + GFP_KERNEL); if (!axix_x) goto axix_x_alloc_fail; - coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); + coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); if (!coeff) goto coeff_alloc_fail; - dividers.divider1 = dal_fixed31_32_from_fraction(3, 2); - dividers.divider2 = dal_fixed31_32_from_int(2); - dividers.divider3 = dal_fixed31_32_from_fraction(5, 2); + dividers.divider1 = dc_fixpt_from_fraction(3, 2); + dividers.divider2 = dc_fixpt_from_int(2); + dividers.divider3 = dc_fixpt_from_fraction(5, 2); tf = output_tf->tf; @@ -1157,13 +1338,120 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, ret = true; - kfree(coeff); + kvfree(coeff); coeff_alloc_fail: - kfree(axix_x); + kvfree(axix_x); axix_x_alloc_fail: + kvfree(rgb_regamma); +rgb_regamma_alloc_fail: + kvfree(rgb_user); +rgb_user_alloc_fail: + return ret; +} + +bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma) +{ + struct gamma_coefficients coeff; + const struct hw_x_point *coord_x = coordinates_x; + uint32_t i = 0; + + do { + coeff.a0[i] = dc_fixpt_from_fraction( + regamma->coeff.A0[i], 10000000); + coeff.a1[i] = dc_fixpt_from_fraction( + regamma->coeff.A1[i], 1000); + coeff.a2[i] = dc_fixpt_from_fraction( + regamma->coeff.A2[i], 1000); + coeff.a3[i] = dc_fixpt_from_fraction( + regamma->coeff.A3[i], 1000); + coeff.user_gamma[i] = dc_fixpt_from_fraction( + regamma->coeff.gamma[i], 1000); + + ++i; + } while (i != 3); + + i = 0; + /* fixed_pt library has problems handling too small values */ + while (i != 32) { + output_tf->tf_pts.red[i] = dc_fixpt_zero; + output_tf->tf_pts.green[i] = dc_fixpt_zero; + output_tf->tf_pts.blue[i] = dc_fixpt_zero; + ++coord_x; + ++i; + } + while (i != MAX_HW_POINTS + 1) { + output_tf->tf_pts.red[i] = translate_from_linear_space_ex( + coord_x->x, &coeff, 0); + output_tf->tf_pts.green[i] = translate_from_linear_space_ex( + coord_x->x, &coeff, 1); + output_tf->tf_pts.blue[i] = translate_from_linear_space_ex( + coord_x->x, &coeff, 2); + ++coord_x; + ++i; + } + + // this function just clamps output to 0-1 + build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts); + output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + + return true; +} + +bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma) +{ + struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; + struct dividers dividers; + + struct pwl_float_data *rgb_user = NULL; + struct pwl_float_data_ex *rgb_regamma = NULL; + bool ret = false; + + if (regamma == NULL) + return false; + + output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; + + rgb_user = kzalloc(sizeof(*rgb_user) * (GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_user) + goto rgb_user_alloc_fail; + + rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_regamma) + goto rgb_regamma_alloc_fail; + + dividers.divider1 = dc_fixpt_from_fraction(3, 2); + dividers.divider2 = dc_fixpt_from_int(2); + dividers.divider3 = dc_fixpt_from_fraction(5, 2); + + scale_user_regamma_ramp(rgb_user, ®amma->ramp, dividers); + + if (regamma->flags.bits.applyDegamma == 1) { + apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS); + copy_rgb_regamma_to_coordinates_x(coordinates_x, + MAX_HW_POINTS, rgb_regamma); + } + + interpolate_user_regamma(MAX_HW_POINTS, rgb_user, + regamma->flags.bits.applyDegamma, tf_pts); + + // no custom HDR curves! + tf_pts->end_exponent = 0; + tf_pts->x_point_at_y1_red = 1; + tf_pts->x_point_at_y1_green = 1; + tf_pts->x_point_at_y1_blue = 1; + + // this function just clamps output to 0-1 + build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts); + + ret = true; + kfree(rgb_regamma); rgb_regamma_alloc_fail: - kfree(rgb_user); + kvfree(rgb_user); rgb_user_alloc_fail: return ret; } @@ -1192,25 +1480,25 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), - GFP_KERNEL); + rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_user) goto rgb_user_alloc_fail; - curve = kzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS), - GFP_KERNEL); + curve = kvzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!curve) goto curve_alloc_fail; - axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS), - GFP_KERNEL); + axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); if (!axix_x) goto axix_x_alloc_fail; - coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); + coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); if (!coeff) goto coeff_alloc_fail; - dividers.divider1 = dal_fixed31_32_from_fraction(3, 2); - dividers.divider2 = dal_fixed31_32_from_int(2); - dividers.divider3 = dal_fixed31_32_from_fraction(5, 2); + dividers.divider1 = dc_fixpt_from_fraction(3, 2); + dividers.divider2 = dc_fixpt_from_int(2); + dividers.divider3 = dc_fixpt_from_fraction(5, 2); tf = input_tf->tf; @@ -1246,13 +1534,13 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, ret = true; - kfree(coeff); + kvfree(coeff); coeff_alloc_fail: - kfree(axix_x); + kvfree(axix_x); axix_x_alloc_fail: - kfree(curve); + kvfree(curve); curve_alloc_fail: - kfree(rgb_user); + kvfree(rgb_user); rgb_user_alloc_fail: return ret; @@ -1281,8 +1569,9 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, } ret = true; } else if (trans == TRANSFER_FUNCTION_PQ) { - rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_regamma) goto rgb_regamma_alloc_fail; points->end_exponent = 7; @@ -1302,11 +1591,12 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_regamma); + kvfree(rgb_regamma); } else if (trans == TRANSFER_FUNCTION_SRGB || trans == TRANSFER_FUNCTION_BT709) { - rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_regamma) goto rgb_regamma_alloc_fail; points->end_exponent = 0; @@ -1324,7 +1614,7 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_regamma); + kvfree(rgb_regamma); } rgb_regamma_alloc_fail: return ret; @@ -1348,8 +1638,9 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, } ret = true; } else if (trans == TRANSFER_FUNCTION_PQ) { - rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_degamma = kvzalloc(sizeof(*rgb_degamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_degamma) goto rgb_degamma_alloc_fail; @@ -1364,11 +1655,12 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_degamma); + kvfree(rgb_degamma); } else if (trans == TRANSFER_FUNCTION_SRGB || trans == TRANSFER_FUNCTION_BT709) { - rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_degamma = kvzalloc(sizeof(*rgb_degamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_degamma) goto rgb_degamma_alloc_fail; @@ -1382,7 +1674,7 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_degamma); + kvfree(rgb_degamma); } points->end_exponent = 0; points->x_point_at_y1_red = 1; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index b7f9bc27d101..b64048991a95 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -32,6 +32,47 @@ struct dc_transfer_func_distributed_points; struct dc_rgb_fixed; enum dc_transfer_func_predefined; +/* For SetRegamma ADL interface support + * Must match escape type + */ +union regamma_flags { + unsigned int raw; + struct { + unsigned int gammaRampArray :1; // RegammaRamp is in use + unsigned int gammaFromEdid :1; //gamma from edid is in use + unsigned int gammaFromEdidEx :1; //gamma from edid is in use , but only for Display Id 1.2 + unsigned int gammaFromUser :1; //user custom gamma is used + unsigned int coeffFromUser :1; //coeff. A0-A3 from user is in use + unsigned int coeffFromEdid :1; //coeff. A0-A3 from edid is in use + unsigned int applyDegamma :1; //flag for additional degamma correction in driver + unsigned int gammaPredefinedSRGB :1; //flag for SRGB gamma + unsigned int gammaPredefinedPQ :1; //flag for PQ gamma + unsigned int gammaPredefinedPQ2084Interim :1; //flag for PQ gamma, lower max nits + unsigned int gammaPredefined36 :1; //flag for 3.6 gamma + unsigned int gammaPredefinedReset :1; //flag to return to previous gamma + } bits; +}; + +struct regamma_ramp { + unsigned short gamma[256*3]; // gamma ramp packed in same way as OS windows ,r , g & b +}; + +struct regamma_coeff { + int gamma[3]; + int A0[3]; + int A1[3]; + int A2[3]; + int A3[3]; +}; + +struct regamma_lut { + union regamma_flags flags; + union { + struct regamma_ramp ramp; + struct regamma_coeff coeff; + }; +}; + void setup_x_points_distribution(void); void precompute_pq(void); void precompute_de_pq(void); @@ -45,9 +86,14 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf, bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, struct dc_transfer_func_distributed_points *points); -bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, +bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, struct dc_transfer_func_distributed_points *points); +bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma); + +bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, + const struct regamma_lut *regamma); #endif /* COLOR_MOD_COLOR_GAMMA_H_ */ diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h index 3230e2adb870..3812094b52e8 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h @@ -46,6 +46,10 @@ void mod_stats_dump(struct mod_stats *mod_stats); void mod_stats_reset_data(struct mod_stats *mod_stats); +void mod_stats_update_event(struct mod_stats *mod_stats, + char *event_string, + unsigned int length); + void mod_stats_update_flip(struct mod_stats *mod_stats, unsigned long timestamp_in_ns); diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c index 041f87b73d5f..3f7d47fdc367 100644 --- a/drivers/gpu/drm/amd/display/modules/stats/stats.c +++ b/drivers/gpu/drm/amd/display/modules/stats/stats.c @@ -36,9 +36,14 @@ #define DAL_STATS_ENTRIES_REGKEY_DEFAULT 0x00350000 #define DAL_STATS_ENTRIES_REGKEY_MAX 0x01000000 +#define DAL_STATS_EVENT_ENTRIES_DEFAULT 0x00000100 + #define MOD_STATS_NUM_VSYNCS 5 +#define MOD_STATS_EVENT_STRING_MAX 512 struct stats_time_cache { + unsigned int entry_id; + unsigned long flip_timestamp_in_ns; unsigned long vupdate_timestamp_in_ns; @@ -63,15 +68,26 @@ struct stats_time_cache { unsigned int flags; }; +struct stats_event_cache { + unsigned int entry_id; + char event_string[MOD_STATS_EVENT_STRING_MAX]; +}; + struct core_stats { struct mod_stats public; struct dc *dc; + bool enabled; + unsigned int entries; + unsigned int event_entries; + unsigned int entry_id; + struct stats_time_cache *time; unsigned int index; - bool enabled; - unsigned int entries; + struct stats_event_cache *events; + unsigned int event_index; + }; #define MOD_STATS_TO_CORE(mod_stats)\ @@ -99,12 +115,12 @@ struct mod_stats *mod_stats_create(struct dc *dc) unsigned int reg_data; int i = 0; + if (dc == NULL) + goto fail_construct; + core_stats = kzalloc(sizeof(struct core_stats), GFP_KERNEL); if (core_stats == NULL) - goto fail_alloc_context; - - if (dc == NULL) goto fail_construct; core_stats->dc = dc; @@ -115,33 +131,55 @@ struct mod_stats *mod_stats_create(struct dc *dc) ®_data, sizeof(unsigned int), &flag)) core_stats->enabled = reg_data; - core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT; - if (dm_read_persistent_data(dc->ctx, NULL, NULL, - DAL_STATS_ENTRIES_REGKEY, - ®_data, sizeof(unsigned int), &flag)) { - if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX) - core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX; - else - core_stats->entries = reg_data; - } + if (core_stats->enabled) { + core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT; + if (dm_read_persistent_data(dc->ctx, NULL, NULL, + DAL_STATS_ENTRIES_REGKEY, + ®_data, sizeof(unsigned int), &flag)) { + if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX) + core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX; + else + core_stats->entries = reg_data; + } + core_stats->time = kzalloc( + sizeof(struct stats_time_cache) * + core_stats->entries, + GFP_KERNEL); - core_stats->time = kzalloc(sizeof(struct stats_time_cache) * core_stats->entries, - GFP_KERNEL); + if (core_stats->time == NULL) + goto fail_construct_time; - if (core_stats->time == NULL) - goto fail_construct; + core_stats->event_entries = DAL_STATS_EVENT_ENTRIES_DEFAULT; + core_stats->events = kzalloc( + sizeof(struct stats_event_cache) * + core_stats->event_entries, + GFP_KERNEL); + + if (core_stats->events == NULL) + goto fail_construct_events; + + } else { + core_stats->entries = 0; + } /* Purposely leave index 0 unused so we don't need special logic to * handle calculation cases that depend on previous flip data. */ core_stats->index = 1; + core_stats->event_index = 0; + + // Keeps track of ordering within the different stats structures + core_stats->entry_id = 0; return &core_stats->public; -fail_construct: +fail_construct_events: + kfree(core_stats->time); + +fail_construct_time: kfree(core_stats); -fail_alloc_context: +fail_construct: return NULL; } @@ -153,6 +191,9 @@ void mod_stats_destroy(struct mod_stats *mod_stats) if (core_stats->time != NULL) kfree(core_stats->time); + if (core_stats->events != NULL) + kfree(core_stats->events); + kfree(core_stats); } } @@ -163,7 +204,11 @@ void mod_stats_dump(struct mod_stats *mod_stats) struct dal_logger *logger = NULL; struct core_stats *core_stats = NULL; struct stats_time_cache *time = NULL; + struct stats_event_cache *events = NULL; + unsigned int time_index = 1; + unsigned int event_index = 0; unsigned int index = 0; + struct log_entry log_entry; if (mod_stats == NULL) return; @@ -172,45 +217,62 @@ void mod_stats_dump(struct mod_stats *mod_stats) dc = core_stats->dc; logger = dc->ctx->logger; time = core_stats->time; - - //LogEntry* pLog = GetLog()->Open(LogMajor_ISR, LogMinor_ISR_FreeSyncSW); - - //if (!pLog->IsDummyEntry()) - { - dm_logger_write(logger, LOG_PROFILING, "==Display Caps==\n"); - dm_logger_write(logger, LOG_PROFILING, "\n"); - dm_logger_write(logger, LOG_PROFILING, "\n"); - - dm_logger_write(logger, LOG_PROFILING, "==Stats==\n"); - dm_logger_write(logger, LOG_PROFILING, - "render avgRender minWindow midPoint maxWindow vsyncToFlip flipToVsync #vsyncBetweenFlip #frame insertDuration vTotalMin vTotalMax eventTrigs vSyncTime1 vSyncTime2 vSyncTime3 vSyncTime4 vSyncTime5 flags\n"); - - for (int i = 0; i < core_stats->index && i < core_stats->entries; i++) { - dm_logger_write(logger, LOG_PROFILING, - "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n", - time[i].render_time_in_us, - time[i].avg_render_time_in_us_last_ten, - time[i].min_window, - time[i].lfc_mid_point_in_us, - time[i].max_window, - time[i].vsync_to_flip_time_in_us, - time[i].flip_to_vsync_time_in_us, - time[i].num_vsync_between_flips, - time[i].num_frames_inserted, - time[i].inserted_duration_in_us, - time[i].v_total_min, - time[i].v_total_max, - time[i].event_triggers, - time[i].v_sync_time_in_us[0], - time[i].v_sync_time_in_us[1], - time[i].v_sync_time_in_us[2], - time[i].v_sync_time_in_us[3], - time[i].v_sync_time_in_us[4], - time[i].flags); + events = core_stats->events; + + DISPLAY_STATS_BEGIN(log_entry); + + DISPLAY_STATS("==Display Caps==\n"); + + DISPLAY_STATS("==Display Stats==\n"); + + DISPLAY_STATS("%10s %10s %10s %10s %10s" + " %11s %11s %17s %10s %14s" + " %10s %10s %10s %10s %10s" + " %10s %10s %10s %10s\n", + "render", "avgRender", + "minWindow", "midPoint", "maxWindow", + "vsyncToFlip", "flipToVsync", "vsyncsBetweenFlip", + "numFrame", "insertDuration", + "vTotalMin", "vTotalMax", "eventTrigs", + "vSyncTime1", "vSyncTime2", "vSyncTime3", + "vSyncTime4", "vSyncTime5", "flags"); + + for (int i = 0; i < core_stats->entry_id; i++) { + if (event_index < core_stats->event_index && + i == events[event_index].entry_id) { + DISPLAY_STATS("%s\n", events[event_index].event_string); + event_index++; + } else if (time_index < core_stats->index && + i == time[time_index].entry_id) { + DISPLAY_STATS("%10u %10u %10u %10u %10u" + " %11u %11u %17u %10u %14u" + " %10u %10u %10u %10u %10u" + " %10u %10u %10u %10u\n", + time[time_index].render_time_in_us, + time[time_index].avg_render_time_in_us_last_ten, + time[time_index].min_window, + time[time_index].lfc_mid_point_in_us, + time[time_index].max_window, + time[time_index].vsync_to_flip_time_in_us, + time[time_index].flip_to_vsync_time_in_us, + time[time_index].num_vsync_between_flips, + time[time_index].num_frames_inserted, + time[time_index].inserted_duration_in_us, + time[time_index].v_total_min, + time[time_index].v_total_max, + time[time_index].event_triggers, + time[time_index].v_sync_time_in_us[0], + time[time_index].v_sync_time_in_us[1], + time[time_index].v_sync_time_in_us[2], + time[time_index].v_sync_time_in_us[3], + time[time_index].v_sync_time_in_us[4], + time[time_index].flags); + + time_index++; } } - //GetLog()->Close(pLog); - //GetLog()->UnSetLogMask(LogMajor_ISR, LogMinor_ISR_FreeSyncSW); + + DISPLAY_STATS_END(log_entry); } void mod_stats_reset_data(struct mod_stats *mod_stats) @@ -227,7 +289,46 @@ void mod_stats_reset_data(struct mod_stats *mod_stats) memset(core_stats->time, 0, sizeof(struct stats_time_cache) * core_stats->entries); - core_stats->index = 0; + memset(core_stats->events, 0, + sizeof(struct stats_event_cache) * core_stats->event_entries); + + core_stats->index = 1; + core_stats->event_index = 0; + + // Keeps track of ordering within the different stats structures + core_stats->entry_id = 0; +} + +void mod_stats_update_event(struct mod_stats *mod_stats, + char *event_string, + unsigned int length) +{ + struct core_stats *core_stats = NULL; + struct stats_event_cache *events = NULL; + unsigned int index = 0; + unsigned int copy_length = 0; + + if (mod_stats == NULL) + return; + + core_stats = MOD_STATS_TO_CORE(mod_stats); + + if (core_stats->event_index >= core_stats->event_entries) + return; + + events = core_stats->events; + index = core_stats->event_index; + + copy_length = length; + if (length > MOD_STATS_EVENT_STRING_MAX) + copy_length = MOD_STATS_EVENT_STRING_MAX; + + memcpy(&events[index].event_string, event_string, copy_length); + events[index].event_string[copy_length - 1] = '\0'; + + events[index].entry_id = core_stats->entry_id; + core_stats->event_index++; + core_stats->entry_id++; } void mod_stats_update_flip(struct mod_stats *mod_stats, @@ -250,7 +351,7 @@ void mod_stats_update_flip(struct mod_stats *mod_stats, time[index].flip_timestamp_in_ns = timestamp_in_ns; time[index].render_time_in_us = - timestamp_in_ns - time[index - 1].flip_timestamp_in_ns; + (timestamp_in_ns - time[index - 1].flip_timestamp_in_ns) / 1000; if (index >= 10) { for (unsigned int i = 0; i < 10; i++) @@ -261,12 +362,16 @@ void mod_stats_update_flip(struct mod_stats *mod_stats, if (time[index].num_vsync_between_flips > 0) time[index].vsync_to_flip_time_in_us = - timestamp_in_ns - time[index].vupdate_timestamp_in_ns; + (timestamp_in_ns - + time[index].vupdate_timestamp_in_ns) / 1000; else time[index].vsync_to_flip_time_in_us = - timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns; + (timestamp_in_ns - + time[index - 1].vupdate_timestamp_in_ns) / 1000; + time[index].entry_id = core_stats->entry_id; core_stats->index++; + core_stats->entry_id++; } void mod_stats_update_vupdate(struct mod_stats *mod_stats, @@ -275,6 +380,8 @@ void mod_stats_update_vupdate(struct mod_stats *mod_stats, struct core_stats *core_stats = NULL; struct stats_time_cache *time = NULL; unsigned int index = 0; + unsigned int num_vsyncs = 0; + unsigned int prev_vsync_in_ns = 0; if (mod_stats == NULL) return; @@ -286,14 +393,27 @@ void mod_stats_update_vupdate(struct mod_stats *mod_stats, time = core_stats->time; index = core_stats->index; + num_vsyncs = time[index].num_vsync_between_flips; + + if (num_vsyncs < MOD_STATS_NUM_VSYNCS) { + if (num_vsyncs == 0) { + prev_vsync_in_ns = + time[index - 1].vupdate_timestamp_in_ns; + + time[index].flip_to_vsync_time_in_us = + (timestamp_in_ns - + time[index - 1].flip_timestamp_in_ns) / + 1000; + } else { + prev_vsync_in_ns = + time[index].vupdate_timestamp_in_ns; + } - time[index].vupdate_timestamp_in_ns = timestamp_in_ns; - if (time[index].num_vsync_between_flips < MOD_STATS_NUM_VSYNCS) - time[index].v_sync_time_in_us[time[index].num_vsync_between_flips] = - timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns; - time[index].flip_to_vsync_time_in_us = - timestamp_in_ns - time[index - 1].flip_timestamp_in_ns; + time[index].v_sync_time_in_us[num_vsyncs] = + (timestamp_in_ns - prev_vsync_in_ns) / 1000; + } + time[index].vupdate_timestamp_in_ns = timestamp_in_ns; time[index].num_vsync_between_flips++; } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 9fa3aaef3f33..b178176b72ac 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -92,7 +92,7 @@ enum amd_powergating_state { #define AMD_CG_SUPPORT_GFX_3D_CGLS (1 << 21) #define AMD_CG_SUPPORT_DRM_MGCG (1 << 22) #define AMD_CG_SUPPORT_DF_MGCG (1 << 23) - +#define AMD_CG_SUPPORT_VCN_MGCG (1 << 24) /* PG flags */ #define AMD_PG_SUPPORT_GFX_PG (1 << 0) #define AMD_PG_SUPPORT_GFX_SMG (1 << 1) @@ -108,6 +108,27 @@ enum amd_powergating_state { #define AMD_PG_SUPPORT_GFX_QUICK_MG (1 << 11) #define AMD_PG_SUPPORT_GFX_PIPELINE (1 << 12) #define AMD_PG_SUPPORT_MMHUB (1 << 13) +#define AMD_PG_SUPPORT_VCN (1 << 14) + +enum PP_FEATURE_MASK { + PP_SCLK_DPM_MASK = 0x1, + PP_MCLK_DPM_MASK = 0x2, + PP_PCIE_DPM_MASK = 0x4, + PP_SCLK_DEEP_SLEEP_MASK = 0x8, + PP_POWER_CONTAINMENT_MASK = 0x10, + PP_UVD_HANDSHAKE_MASK = 0x20, + PP_SMC_VOLTAGE_CONTROL_MASK = 0x40, + PP_VBI_TIME_SUPPORT_MASK = 0x80, + PP_ULV_MASK = 0x100, + PP_ENABLE_GFX_CG_THRU_SMU = 0x200, + PP_CLOCK_STRETCH_MASK = 0x400, + PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800, + PP_SOCCLK_DPM_MASK = 0x1000, + PP_DCEFCLK_DPM_MASK = 0x2000, + PP_OVERDRIVE_MASK = 0x4000, + PP_GFXOFF_MASK = 0x8000, + PP_ACG_MASK = 0x10000, +}; struct amd_ip_funcs { /* Name of IP block */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h index f730d0629020..b6f74bf4af02 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h @@ -2095,6 +2095,18 @@ #define mmDC_GPIO_AUX_CTRL_2_BASE_IDX 2 #define mmDC_GPIO_RXEN 0x212f #define mmDC_GPIO_RXEN_BASE_IDX 2 +#define mmDC_GPIO_AUX_CTRL_3 0x2130 +#define mmDC_GPIO_AUX_CTRL_3_BASE_IDX 2 +#define mmDC_GPIO_AUX_CTRL_4 0x2131 +#define mmDC_GPIO_AUX_CTRL_4_BASE_IDX 2 +#define mmDC_GPIO_AUX_CTRL_5 0x2132 +#define mmDC_GPIO_AUX_CTRL_5_BASE_IDX 2 +#define mmAUXI2C_PAD_ALL_PWR_OK 0x2133 +#define mmAUXI2C_PAD_ALL_PWR_OK_BASE_IDX 2 +#define mmDC_GPIO_PULLUPEN 0x2134 +#define mmDC_GPIO_PULLUPEN_BASE_IDX 2 +#define mmDC_GPIO_AUX_CTRL_6 0x2135 +#define mmDC_GPIO_AUX_CTRL_6_BASE_IDX 2 #define mmBPHYC_DAC_MACRO_CNTL 0x2136 #define mmBPHYC_DAC_MACRO_CNTL_BASE_IDX 2 #define mmDAC_MACRO_CNTL_RESERVED0 0x2136 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h index 6d3162c42957..bcd190a3fcdd 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h @@ -10971,6 +10971,158 @@ #define DC_GPIO_RXEN__DC_GPIO_BLON_RXEN_MASK 0x00100000L #define DC_GPIO_RXEN__DC_GPIO_DIGON_RXEN_MASK 0x00200000L #define DC_GPIO_RXEN__DC_GPIO_ENA_BL_RXEN_MASK 0x00400000L +//DC_GPIO_AUX_CTRL_3 +#define DC_GPIO_AUX_CTRL_3__AUX1_NEN_RTERM__SHIFT 0x0 +#define DC_GPIO_AUX_CTRL_3__AUX2_NEN_RTERM__SHIFT 0x1 +#define DC_GPIO_AUX_CTRL_3__AUX3_NEN_RTERM__SHIFT 0x2 +#define DC_GPIO_AUX_CTRL_3__AUX4_NEN_RTERM__SHIFT 0x3 +#define DC_GPIO_AUX_CTRL_3__AUX5_NEN_RTERM__SHIFT 0x4 +#define DC_GPIO_AUX_CTRL_3__AUX6_NEN_RTERM__SHIFT 0x5 +#define DC_GPIO_AUX_CTRL_3__AUX1_DP_DN_SWAP__SHIFT 0x8 +#define DC_GPIO_AUX_CTRL_3__AUX2_DP_DN_SWAP__SHIFT 0x9 +#define DC_GPIO_AUX_CTRL_3__AUX3_DP_DN_SWAP__SHIFT 0xa +#define DC_GPIO_AUX_CTRL_3__AUX4_DP_DN_SWAP__SHIFT 0xb +#define DC_GPIO_AUX_CTRL_3__AUX5_DP_DN_SWAP__SHIFT 0xc +#define DC_GPIO_AUX_CTRL_3__AUX6_DP_DN_SWAP__SHIFT 0xd +#define DC_GPIO_AUX_CTRL_3__AUX1_HYS_TUNE__SHIFT 0x10 +#define DC_GPIO_AUX_CTRL_3__AUX2_HYS_TUNE__SHIFT 0x12 +#define DC_GPIO_AUX_CTRL_3__AUX3_HYS_TUNE__SHIFT 0x14 +#define DC_GPIO_AUX_CTRL_3__AUX4_HYS_TUNE__SHIFT 0x16 +#define DC_GPIO_AUX_CTRL_3__AUX5_HYS_TUNE__SHIFT 0x18 +#define DC_GPIO_AUX_CTRL_3__AUX6_HYS_TUNE__SHIFT 0x1a +#define DC_GPIO_AUX_CTRL_3__AUX1_NEN_RTERM_MASK 0x00000001L +#define DC_GPIO_AUX_CTRL_3__AUX2_NEN_RTERM_MASK 0x00000002L +#define DC_GPIO_AUX_CTRL_3__AUX3_NEN_RTERM_MASK 0x00000004L +#define DC_GPIO_AUX_CTRL_3__AUX4_NEN_RTERM_MASK 0x00000008L +#define DC_GPIO_AUX_CTRL_3__AUX5_NEN_RTERM_MASK 0x00000010L +#define DC_GPIO_AUX_CTRL_3__AUX6_NEN_RTERM_MASK 0x00000020L +#define DC_GPIO_AUX_CTRL_3__AUX1_DP_DN_SWAP_MASK 0x00000100L +#define DC_GPIO_AUX_CTRL_3__AUX2_DP_DN_SWAP_MASK 0x00000200L +#define DC_GPIO_AUX_CTRL_3__AUX3_DP_DN_SWAP_MASK 0x00000400L +#define DC_GPIO_AUX_CTRL_3__AUX4_DP_DN_SWAP_MASK 0x00000800L +#define DC_GPIO_AUX_CTRL_3__AUX5_DP_DN_SWAP_MASK 0x00001000L +#define DC_GPIO_AUX_CTRL_3__AUX6_DP_DN_SWAP_MASK 0x00002000L +#define DC_GPIO_AUX_CTRL_3__AUX1_HYS_TUNE_MASK 0x00030000L +#define DC_GPIO_AUX_CTRL_3__AUX2_HYS_TUNE_MASK 0x000C0000L +#define DC_GPIO_AUX_CTRL_3__AUX3_HYS_TUNE_MASK 0x00300000L +#define DC_GPIO_AUX_CTRL_3__AUX4_HYS_TUNE_MASK 0x00C00000L +#define DC_GPIO_AUX_CTRL_3__AUX5_HYS_TUNE_MASK 0x03000000L +#define DC_GPIO_AUX_CTRL_3__AUX6_HYS_TUNE_MASK 0x0C000000L +//DC_GPIO_AUX_CTRL_4 +#define DC_GPIO_AUX_CTRL_4__AUX1_AUX_CTRL__SHIFT 0x0 +#define DC_GPIO_AUX_CTRL_4__AUX2_AUX_CTRL__SHIFT 0x4 +#define DC_GPIO_AUX_CTRL_4__AUX3_AUX_CTRL__SHIFT 0x8 +#define DC_GPIO_AUX_CTRL_4__AUX4_AUX_CTRL__SHIFT 0xc +#define DC_GPIO_AUX_CTRL_4__AUX5_AUX_CTRL__SHIFT 0x10 +#define DC_GPIO_AUX_CTRL_4__AUX6_AUX_CTRL__SHIFT 0x14 +#define DC_GPIO_AUX_CTRL_4__AUX1_AUX_CTRL_MASK 0x0000000FL +#define DC_GPIO_AUX_CTRL_4__AUX2_AUX_CTRL_MASK 0x000000F0L +#define DC_GPIO_AUX_CTRL_4__AUX3_AUX_CTRL_MASK 0x00000F00L +#define DC_GPIO_AUX_CTRL_4__AUX4_AUX_CTRL_MASK 0x0000F000L +#define DC_GPIO_AUX_CTRL_4__AUX5_AUX_CTRL_MASK 0x000F0000L +#define DC_GPIO_AUX_CTRL_4__AUX6_AUX_CTRL_MASK 0x00F00000L +//DC_GPIO_AUX_CTRL_5 +#define DC_GPIO_AUX_CTRL_5__AUX1_VOD_TUNE__SHIFT 0x0 +#define DC_GPIO_AUX_CTRL_5__AUX2_VOD_TUNE__SHIFT 0x2 +#define DC_GPIO_AUX_CTRL_5__AUX3_VOD_TUNE__SHIFT 0x4 +#define DC_GPIO_AUX_CTRL_5__AUX4_VOD_TUNE__SHIFT 0x6 +#define DC_GPIO_AUX_CTRL_5__AUX5_VOD_TUNE__SHIFT 0x8 +#define DC_GPIO_AUX_CTRL_5__AUX6_VOD_TUNE__SHIFT 0xa +#define DC_GPIO_AUX_CTRL_5__DDC_PAD1_I2CMODE__SHIFT 0xc +#define DC_GPIO_AUX_CTRL_5__DDC_PAD2_I2CMODE__SHIFT 0xd +#define DC_GPIO_AUX_CTRL_5__DDC_PAD3_I2CMODE__SHIFT 0xe +#define DC_GPIO_AUX_CTRL_5__DDC_PAD4_I2CMODE__SHIFT 0xf +#define DC_GPIO_AUX_CTRL_5__DDC_PAD5_I2CMODE__SHIFT 0x10 +#define DC_GPIO_AUX_CTRL_5__DDC_PAD6_I2CMODE__SHIFT 0x11 +#define DC_GPIO_AUX_CTRL_5__DDC1_I2C_VPH_1V2_EN__SHIFT 0x12 +#define DC_GPIO_AUX_CTRL_5__DDC2_I2C_VPH_1V2_EN__SHIFT 0x13 +#define DC_GPIO_AUX_CTRL_5__DDC3_I2C_VPH_1V2_EN__SHIFT 0x14 +#define DC_GPIO_AUX_CTRL_5__DDC4_I2C_VPH_1V2_EN__SHIFT 0x15 +#define DC_GPIO_AUX_CTRL_5__DDC5_I2C_VPH_1V2_EN__SHIFT 0x16 +#define DC_GPIO_AUX_CTRL_5__DDC6_I2C_VPH_1V2_EN__SHIFT 0x17 +#define DC_GPIO_AUX_CTRL_5__DDC1_PAD_I2C_CTRL__SHIFT 0x18 +#define DC_GPIO_AUX_CTRL_5__DDC2_PAD_I2C_CTRL__SHIFT 0x19 +#define DC_GPIO_AUX_CTRL_5__DDC3_PAD_I2C_CTRL__SHIFT 0x1a +#define DC_GPIO_AUX_CTRL_5__DDC4_PAD_I2C_CTRL__SHIFT 0x1b +#define DC_GPIO_AUX_CTRL_5__DDC5_PAD_I2C_CTRL__SHIFT 0x1c +#define DC_GPIO_AUX_CTRL_5__DDC6_PAD_I2C_CTRL__SHIFT 0x1d +#define DC_GPIO_AUX_CTRL_5__AUX1_VOD_TUNE_MASK 0x00000003L +#define DC_GPIO_AUX_CTRL_5__AUX2_VOD_TUNE_MASK 0x0000000CL +#define DC_GPIO_AUX_CTRL_5__AUX3_VOD_TUNE_MASK 0x00000030L +#define DC_GPIO_AUX_CTRL_5__AUX4_VOD_TUNE_MASK 0x000000C0L +#define DC_GPIO_AUX_CTRL_5__AUX5_VOD_TUNE_MASK 0x00000300L +#define DC_GPIO_AUX_CTRL_5__AUX6_VOD_TUNE_MASK 0x00000C00L +#define DC_GPIO_AUX_CTRL_5__DDC_PAD1_I2CMODE_MASK 0x00001000L +#define DC_GPIO_AUX_CTRL_5__DDC_PAD2_I2CMODE_MASK 0x00002000L +#define DC_GPIO_AUX_CTRL_5__DDC_PAD3_I2CMODE_MASK 0x00004000L +#define DC_GPIO_AUX_CTRL_5__DDC_PAD4_I2CMODE_MASK 0x00008000L +#define DC_GPIO_AUX_CTRL_5__DDC_PAD5_I2CMODE_MASK 0x00010000L +#define DC_GPIO_AUX_CTRL_5__DDC_PAD6_I2CMODE_MASK 0x00020000L +#define DC_GPIO_AUX_CTRL_5__DDC1_I2C_VPH_1V2_EN_MASK 0x00040000L +#define DC_GPIO_AUX_CTRL_5__DDC2_I2C_VPH_1V2_EN_MASK 0x00080000L +#define DC_GPIO_AUX_CTRL_5__DDC3_I2C_VPH_1V2_EN_MASK 0x00100000L +#define DC_GPIO_AUX_CTRL_5__DDC4_I2C_VPH_1V2_EN_MASK 0x00200000L +#define DC_GPIO_AUX_CTRL_5__DDC5_I2C_VPH_1V2_EN_MASK 0x00400000L +#define DC_GPIO_AUX_CTRL_5__DDC6_I2C_VPH_1V2_EN_MASK 0x00800000L +#define DC_GPIO_AUX_CTRL_5__DDC1_PAD_I2C_CTRL_MASK 0x01000000L +#define DC_GPIO_AUX_CTRL_5__DDC2_PAD_I2C_CTRL_MASK 0x02000000L +#define DC_GPIO_AUX_CTRL_5__DDC3_PAD_I2C_CTRL_MASK 0x04000000L +#define DC_GPIO_AUX_CTRL_5__DDC4_PAD_I2C_CTRL_MASK 0x08000000L +#define DC_GPIO_AUX_CTRL_5__DDC5_PAD_I2C_CTRL_MASK 0x10000000L +#define DC_GPIO_AUX_CTRL_5__DDC6_PAD_I2C_CTRL_MASK 0x20000000L +//AUXI2C_PAD_ALL_PWR_OK +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY1_ALL_PWR_OK__SHIFT 0x0 +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY2_ALL_PWR_OK__SHIFT 0x1 +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY3_ALL_PWR_OK__SHIFT 0x2 +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY4_ALL_PWR_OK__SHIFT 0x3 +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY5_ALL_PWR_OK__SHIFT 0x4 +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY6_ALL_PWR_OK__SHIFT 0x5 +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY1_ALL_PWR_OK_MASK 0x00000001L +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY2_ALL_PWR_OK_MASK 0x00000002L +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY3_ALL_PWR_OK_MASK 0x00000004L +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY4_ALL_PWR_OK_MASK 0x00000008L +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY5_ALL_PWR_OK_MASK 0x00000010L +#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY6_ALL_PWR_OK_MASK 0x00000020L +//DC_GPIO_PULLUPEN +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICA_PU_EN__SHIFT 0x0 +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICB_PU_EN__SHIFT 0x1 +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICC_PU_EN__SHIFT 0x2 +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICD_PU_EN__SHIFT 0x3 +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICE_PU_EN__SHIFT 0x4 +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICF_PU_EN__SHIFT 0x5 +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICG_PU_EN__SHIFT 0x6 +#define DC_GPIO_PULLUPEN__DC_GPIO_HSYNCA_PU_EN__SHIFT 0x8 +#define DC_GPIO_PULLUPEN__DC_GPIO_VSYNCA_PU_EN__SHIFT 0x9 +#define DC_GPIO_PULLUPEN__DC_GPIO_HPD1_PU_EN__SHIFT 0xe +#define DC_GPIO_PULLUPEN__DC_GPIO_BLON_PU_EN__SHIFT 0x14 +#define DC_GPIO_PULLUPEN__DC_GPIO_DIGON_PU_EN__SHIFT 0x15 +#define DC_GPIO_PULLUPEN__DC_GPIO_ENA_BL_PU_EN__SHIFT 0x16 +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICA_PU_EN_MASK 0x00000001L +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICB_PU_EN_MASK 0x00000002L +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICC_PU_EN_MASK 0x00000004L +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICD_PU_EN_MASK 0x00000008L +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICE_PU_EN_MASK 0x00000010L +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICF_PU_EN_MASK 0x00000020L +#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICG_PU_EN_MASK 0x00000040L +#define DC_GPIO_PULLUPEN__DC_GPIO_HSYNCA_PU_EN_MASK 0x00000100L +#define DC_GPIO_PULLUPEN__DC_GPIO_VSYNCA_PU_EN_MASK 0x00000200L +#define DC_GPIO_PULLUPEN__DC_GPIO_HPD1_PU_EN_MASK 0x00004000L +#define DC_GPIO_PULLUPEN__DC_GPIO_BLON_PU_EN_MASK 0x00100000L +#define DC_GPIO_PULLUPEN__DC_GPIO_DIGON_PU_EN_MASK 0x00200000L +#define DC_GPIO_PULLUPEN__DC_GPIO_ENA_BL_PU_EN_MASK 0x00400000L +//DC_GPIO_AUX_CTRL_6 +#define DC_GPIO_AUX_CTRL_6__AUX1_PAD_RXSEL__SHIFT 0x0 +#define DC_GPIO_AUX_CTRL_6__AUX2_PAD_RXSEL__SHIFT 0x2 +#define DC_GPIO_AUX_CTRL_6__AUX3_PAD_RXSEL__SHIFT 0x4 +#define DC_GPIO_AUX_CTRL_6__AUX4_PAD_RXSEL__SHIFT 0x6 +#define DC_GPIO_AUX_CTRL_6__AUX5_PAD_RXSEL__SHIFT 0x8 +#define DC_GPIO_AUX_CTRL_6__AUX6_PAD_RXSEL__SHIFT 0xa +#define DC_GPIO_AUX_CTRL_6__AUX1_PAD_RXSEL_MASK 0x00000003L +#define DC_GPIO_AUX_CTRL_6__AUX2_PAD_RXSEL_MASK 0x0000000CL +#define DC_GPIO_AUX_CTRL_6__AUX3_PAD_RXSEL_MASK 0x00000030L +#define DC_GPIO_AUX_CTRL_6__AUX4_PAD_RXSEL_MASK 0x000000C0L +#define DC_GPIO_AUX_CTRL_6__AUX5_PAD_RXSEL_MASK 0x00000300L +#define DC_GPIO_AUX_CTRL_6__AUX6_PAD_RXSEL_MASK 0x00000C00L //BPHYC_DAC_MACRO_CNTL #define BPHYC_DAC_MACRO_CNTL__BPHYC_DAC_WHITE_LEVEL__SHIFT 0x0 #define BPHYC_DAC_MACRO_CNTL__BPHYC_DAC_WHITE_FINE_CONTROL__SHIFT 0x8 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h index 4ccf9681c45d..721c61171045 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h @@ -3895,6 +3895,10 @@ #define mmCM0_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM0_CM_MEM_PWR_STATUS 0x0d33 #define mmCM0_CM_MEM_PWR_STATUS_BASE_IDX 2 +#define mmCM0_CM_TEST_DEBUG_INDEX 0x0d35 +#define mmCM0_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM0_CM_TEST_DEBUG_DATA 0x0d36 +#define mmCM0_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec @@ -4367,7 +4371,10 @@ #define mmCM1_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM1_CM_MEM_PWR_STATUS 0x0e4e #define mmCM1_CM_MEM_PWR_STATUS_BASE_IDX 2 - +#define mmCM1_CM_TEST_DEBUG_INDEX 0x0e50 +#define mmCM1_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM1_CM_TEST_DEBUG_DATA 0x0e51 +#define mmCM1_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec // base address: 0x399c @@ -4839,7 +4846,10 @@ #define mmCM2_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM2_CM_MEM_PWR_STATUS 0x0f69 #define mmCM2_CM_MEM_PWR_STATUS_BASE_IDX 2 - +#define mmCM2_CM_TEST_DEBUG_INDEX 0x0f6b +#define mmCM2_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM2_CM_TEST_DEBUG_DATA 0x0f6c +#define mmCM2_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec // base address: 0x3e08 @@ -5311,7 +5321,10 @@ #define mmCM3_CM_MEM_PWR_CTRL_BASE_IDX 2 #define mmCM3_CM_MEM_PWR_STATUS 0x1084 #define mmCM3_CM_MEM_PWR_STATUS_BASE_IDX 2 - +#define mmCM3_CM_TEST_DEBUG_INDEX 0x1086 +#define mmCM3_CM_TEST_DEBUG_INDEX_BASE_IDX 2 +#define mmCM3_CM_TEST_DEBUG_DATA 0x1087 +#define mmCM3_CM_TEST_DEBUG_DATA_BASE_IDX 2 // addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec // base address: 0x4274 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h index e2a2f114bd8e..e7c0cad41081 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h @@ -14049,6 +14049,14 @@ #define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE__SHIFT 0x2 #define CM0_CM_MEM_PWR_STATUS__SHARED_MEM_PWR_STATE_MASK 0x00000003L #define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE_MASK 0x0000000CL +//CM0_CM_TEST_DEBUG_INDEX +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT 0x0 +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT 0x8 +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK 0x000000FFL +#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK 0x00000100L +//CM0_CM_TEST_DEBUG_DATA +#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT 0x0 +#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK 0xFFFFFFFFL // addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h new file mode 100644 index 000000000000..9e19e723081b --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_1_7_DEFAULT_HEADER +#define _df_1_7_DEFAULT_HEADER + +#define mmFabricConfigAccessControl_DEFAULT 0x00000000 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h new file mode 100644 index 000000000000..e6044e27a913 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_1_7_OFFSET_HEADER +#define _df_1_7_OFFSET_HEADER + +#define mmFabricConfigAccessControl 0x0410 +#define mmFabricConfigAccessControl_BASE_IDX 0 + +#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc +#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 + +#define mmDF_CS_AON0_DramBaseAddress0 0x0044 +#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 + +#define mmDF_CS_AON0_CoherentSlaveModeCtrlA0 0x0214 +#define mmDF_CS_AON0_CoherentSlaveModeCtrlA0_BASE_IDX 0 + + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h new file mode 100644 index 000000000000..a78c99480e2d --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_1_7_SH_MASK_HEADER +#define _df_1_7_SH_MASK_HEADER + +/* FabricConfigAccessControl */ +#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 +#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 +#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 +#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L +#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L +#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L + +/* DF_PIE_AON0_DfGlobalClkGater */ +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL + +/* DF_CS_AON0_DramBaseAddress0 */ +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L + +//DF_CS_AON0_CoherentSlaveModeCtrlA0 +#define DF_CS_AON0_CoherentSlaveModeCtrlA0__ForceParWrRMW__SHIFT 0x3 +#define DF_CS_AON0_CoherentSlaveModeCtrlA0__ForceParWrRMW_MASK 0x00000008L + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h new file mode 100644 index 000000000000..e58c207ac980 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_3_6_DEFAULT_HEADER +#define _df_3_6_DEFAULT_HEADER + +#define mmFabricConfigAccessControl_DEFAULT 0x00000000 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h new file mode 100644 index 000000000000..a9575db8d7aa --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_3_6_OFFSET_HEADER +#define _df_3_6_OFFSET_HEADER + +#define mmFabricConfigAccessControl 0x0410 +#define mmFabricConfigAccessControl_BASE_IDX 0 + +#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc +#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 + +#define mmDF_CS_UMC_AON0_DramBaseAddress0 0x0044 +#define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX 0 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h new file mode 100644 index 000000000000..06fac509e987 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _df_3_6_SH_MASK_HEADER +#define _df_3_6_SH_MASK_HEADER + +/* FabricConfigAccessControl */ +#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 +#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 +#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 +#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L +#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L +#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L + +/* DF_PIE_AON0_DfGlobalClkGater */ +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL + +/* DF_CS_AON0_DramBaseAddress0 */ +#define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 +#define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 +#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x2 +#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x9 +#define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc +#define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L +#define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L +#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x0000003CL +#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000E00L +#define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L + +#endif diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index f696bbb643ef..7931502fa54f 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -632,6 +632,13 @@ typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 ULONG ulReserved; }COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2; +typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3 +{ + COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 ulClock; + USHORT usMclk_fcw_frac; //fractional divider of fcw = usSclk_fcw_frac/65536 + USHORT usMclk_fcw_int; //integer divider of fcwc +}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3; + //Input parameter of DynamicMemorySettingsTable //when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag = COMPUTE_MEMORY_PLL_PARAM typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 0f5ad54d3fd3..092d800b703a 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -501,6 +501,32 @@ enum atom_cooling_solution_id{ LIQUID_COOLING = 0x01 }; +struct atom_firmware_info_v3_2 { + struct atom_common_table_header table_header; + uint32_t firmware_revision; + uint32_t bootup_sclk_in10khz; + uint32_t bootup_mclk_in10khz; + uint32_t firmware_capability; // enum atombios_firmware_capability + uint32_t main_call_parser_entry; /* direct address of main parser call in VBIOS binary. */ + uint32_t bios_scratch_reg_startaddr; // 1st bios scratch register dword address + uint16_t bootup_vddc_mv; + uint16_t bootup_vddci_mv; + uint16_t bootup_mvddc_mv; + uint16_t bootup_vddgfx_mv; + uint8_t mem_module_id; + uint8_t coolingsolution_id; /*0: Air cooling; 1: Liquid cooling ... */ + uint8_t reserved1[2]; + uint32_t mc_baseaddr_high; + uint32_t mc_baseaddr_low; + uint8_t board_i2c_feature_id; // enum of atom_board_i2c_feature_id_def + uint8_t board_i2c_feature_gpio_id; // i2c id find in gpio_lut data table gpio_id + uint8_t board_i2c_feature_slave_addr; + uint8_t reserved3; + uint16_t bootup_mvddq_mv; + uint16_t bootup_mvpp_mv; + uint32_t zfbstartaddrin16mb; + uint32_t reserved2[3]; +}; /* *************************************************************************** @@ -1169,7 +1195,64 @@ struct atom_gfx_info_v2_2 uint32_t rlc_gpu_timer_refclk; }; +struct atom_gfx_info_v2_3 { + struct atom_common_table_header table_header; + uint8_t gfxip_min_ver; + uint8_t gfxip_max_ver; + uint8_t max_shader_engines; + uint8_t max_tile_pipes; + uint8_t max_cu_per_sh; + uint8_t max_sh_per_se; + uint8_t max_backends_per_se; + uint8_t max_texture_channel_caches; + uint32_t regaddr_cp_dma_src_addr; + uint32_t regaddr_cp_dma_src_addr_hi; + uint32_t regaddr_cp_dma_dst_addr; + uint32_t regaddr_cp_dma_dst_addr_hi; + uint32_t regaddr_cp_dma_command; + uint32_t regaddr_cp_status; + uint32_t regaddr_rlc_gpu_clock_32; + uint32_t rlc_gpu_timer_refclk; + uint8_t active_cu_per_sh; + uint8_t active_rb_per_se; + uint16_t gcgoldenoffset; + uint32_t rm21_sram_vmin_value; +}; +struct atom_gfx_info_v2_4 { + struct atom_common_table_header table_header; + uint8_t gfxip_min_ver; + uint8_t gfxip_max_ver; + uint8_t gc_num_se; + uint8_t max_tile_pipes; + uint8_t gc_num_cu_per_sh; + uint8_t gc_num_sh_per_se; + uint8_t gc_num_rb_per_se; + uint8_t gc_num_tccs; + uint32_t regaddr_cp_dma_src_addr; + uint32_t regaddr_cp_dma_src_addr_hi; + uint32_t regaddr_cp_dma_dst_addr; + uint32_t regaddr_cp_dma_dst_addr_hi; + uint32_t regaddr_cp_dma_command; + uint32_t regaddr_cp_status; + uint32_t regaddr_rlc_gpu_clock_32; + uint32_t rlc_gpu_timer_refclk; + uint8_t active_cu_per_sh; + uint8_t active_rb_per_se; + uint16_t gcgoldenoffset; + uint16_t gc_num_gprs; + uint16_t gc_gsprim_buff_depth; + uint16_t gc_parameter_cache_depth; + uint16_t gc_wave_size; + uint16_t gc_max_waves_per_simd; + uint16_t gc_lds_size; + uint8_t gc_num_max_gs_thds; + uint8_t gc_gs_table_depth; + uint8_t gc_double_offchip_lds_buffer; + uint8_t gc_max_scratch_slots_per_cu; + uint32_t sram_rm_fuses_val; + uint32_t sram_custom_rm_fuses_val; +}; /* *************************************************************************** @@ -1198,6 +1281,76 @@ struct atom_smu_info_v3_1 uint8_t fw_ctf_polarity; // GPIO polarity for CTF }; +struct atom_smu_info_v3_2 { + struct atom_common_table_header table_header; + uint8_t smuip_min_ver; + uint8_t smuip_max_ver; + uint8_t smu_rsd1; + uint8_t gpuclk_ss_mode; + uint16_t sclk_ss_percentage; + uint16_t sclk_ss_rate_10hz; + uint16_t gpuclk_ss_percentage; // in unit of 0.001% + uint16_t gpuclk_ss_rate_10hz; + uint32_t core_refclk_10khz; + uint8_t ac_dc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for AC/DC switching, =0xff means invalid + uint8_t ac_dc_polarity; // GPIO polarity for AC/DC switching + uint8_t vr0hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR0 HOT event, =0xff means invalid + uint8_t vr0hot_polarity; // GPIO polarity for VR0 HOT event + uint8_t vr1hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid + uint8_t vr1hot_polarity; // GPIO polarity for VR1 HOT event + uint8_t fw_ctf_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid + uint8_t fw_ctf_polarity; // GPIO polarity for CTF + uint8_t pcc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid + uint8_t pcc_gpio_polarity; // GPIO polarity for CTF + uint16_t smugoldenoffset; + uint32_t gpupll_vco_freq_10khz; + uint32_t bootup_smnclk_10khz; + uint32_t bootup_socclk_10khz; + uint32_t bootup_mp0clk_10khz; + uint32_t bootup_mp1clk_10khz; + uint32_t bootup_lclk_10khz; + uint32_t bootup_dcefclk_10khz; + uint32_t ctf_threshold_override_value; + uint32_t reserved[5]; +}; + +struct atom_smu_info_v3_3 { + struct atom_common_table_header table_header; + uint8_t smuip_min_ver; + uint8_t smuip_max_ver; + uint8_t smu_rsd1; + uint8_t gpuclk_ss_mode; + uint16_t sclk_ss_percentage; + uint16_t sclk_ss_rate_10hz; + uint16_t gpuclk_ss_percentage; // in unit of 0.001% + uint16_t gpuclk_ss_rate_10hz; + uint32_t core_refclk_10khz; + uint8_t ac_dc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for AC/DC switching, =0xff means invalid + uint8_t ac_dc_polarity; // GPIO polarity for AC/DC switching + uint8_t vr0hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR0 HOT event, =0xff means invalid + uint8_t vr0hot_polarity; // GPIO polarity for VR0 HOT event + uint8_t vr1hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid + uint8_t vr1hot_polarity; // GPIO polarity for VR1 HOT event + uint8_t fw_ctf_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid + uint8_t fw_ctf_polarity; // GPIO polarity for CTF + uint8_t pcc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid + uint8_t pcc_gpio_polarity; // GPIO polarity for CTF + uint16_t smugoldenoffset; + uint32_t gpupll_vco_freq_10khz; + uint32_t bootup_smnclk_10khz; + uint32_t bootup_socclk_10khz; + uint32_t bootup_mp0clk_10khz; + uint32_t bootup_mp1clk_10khz; + uint32_t bootup_lclk_10khz; + uint32_t bootup_dcefclk_10khz; + uint32_t ctf_threshold_override_value; + uint32_t syspll3_0_vco_freq_10khz; + uint32_t syspll3_1_vco_freq_10khz; + uint32_t bootup_fclk_10khz; + uint32_t bootup_waflclk_10khz; + uint32_t reserved[3]; +}; + /* *************************************************************************** Data Table smc_dpm_info structure @@ -1283,7 +1436,6 @@ struct atom_smc_dpm_info_v4_1 uint32_t boardreserved[10]; }; - /* *************************************************************************** Data Table asic_profiling_info structure @@ -1864,6 +2016,53 @@ enum atom_smu9_syspll0_clock_id SMU9_SYSPLL0_DISPCLK_ID = 11, // DISPCLK }; +enum atom_smu11_syspll_id { + SMU11_SYSPLL0_ID = 0, + SMU11_SYSPLL1_0_ID = 1, + SMU11_SYSPLL1_1_ID = 2, + SMU11_SYSPLL1_2_ID = 3, + SMU11_SYSPLL2_ID = 4, + SMU11_SYSPLL3_0_ID = 5, + SMU11_SYSPLL3_1_ID = 6, +}; + +enum atom_smu11_syspll0_clock_id { + SMU11_SYSPLL0_ECLK_ID = 0, // ECLK + SMU11_SYSPLL0_SOCCLK_ID = 1, // SOCCLK + SMU11_SYSPLL0_MP0CLK_ID = 2, // MP0CLK + SMU11_SYSPLL0_DCLK_ID = 3, // DCLK + SMU11_SYSPLL0_VCLK_ID = 4, // VCLK + SMU11_SYSPLL0_DCEFCLK_ID = 5, // DCEFCLK +}; + +enum atom_smu11_syspll1_0_clock_id { + SMU11_SYSPLL1_0_UCLKA_ID = 0, // UCLK_a +}; + +enum atom_smu11_syspll1_1_clock_id { + SMU11_SYSPLL1_0_UCLKB_ID = 0, // UCLK_b +}; + +enum atom_smu11_syspll1_2_clock_id { + SMU11_SYSPLL1_0_FCLK_ID = 0, // FCLK +}; + +enum atom_smu11_syspll2_clock_id { + SMU11_SYSPLL2_GFXCLK_ID = 0, // GFXCLK +}; + +enum atom_smu11_syspll3_0_clock_id { + SMU11_SYSPLL3_0_WAFCLK_ID = 0, // WAFCLK + SMU11_SYSPLL3_0_DISPCLK_ID = 1, // DISPCLK + SMU11_SYSPLL3_0_DPREFCLK_ID = 2, // DPREFCLK +}; + +enum atom_smu11_syspll3_1_clock_id { + SMU11_SYSPLL3_1_MP1CLK_ID = 0, // MP1CLK + SMU11_SYSPLL3_1_SMNCLK_ID = 1, // SMNCLK + SMU11_SYSPLL3_1_LCLK_ID = 2, // LCLK +}; + struct atom_get_smu_clock_info_output_parameters_v3_1 { union { diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index f2814ae7ecdd..a69deb3a2ac0 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -42,20 +42,6 @@ enum cgs_ind_reg { CGS_IND_REG__AUDIO_ENDPT }; -/** - * enum cgs_engine - Engines that can be statically power-gated - */ -enum cgs_engine { - CGS_ENGINE__UVD, - CGS_ENGINE__VCE, - CGS_ENGINE__VP8, - CGS_ENGINE__ACP_DMA, - CGS_ENGINE__ACP_DSP0, - CGS_ENGINE__ACP_DSP1, - CGS_ENGINE__ISP, - /* ... */ -}; - /* * enum cgs_ucode_id - Firmware types for different IPs */ @@ -76,17 +62,6 @@ enum cgs_ucode_id { CGS_UCODE_ID_MAXIMUM, }; -/* - * enum cgs_resource_type - GPU resource type - */ -enum cgs_resource_type { - CGS_RESOURCE_TYPE_MMIO = 0, - CGS_RESOURCE_TYPE_FB, - CGS_RESOURCE_TYPE_IO, - CGS_RESOURCE_TYPE_DOORBELL, - CGS_RESOURCE_TYPE_ROM, -}; - /** * struct cgs_firmware_info - Firmware information */ @@ -104,17 +79,6 @@ struct cgs_firmware_info { bool is_kicker; }; -struct cgs_mode_info { - uint32_t refresh_rate; - uint32_t vblank_time_us; -}; - -struct cgs_display_info { - uint32_t display_count; - uint32_t active_display_mask; - struct cgs_mode_info *mode_info; -}; - typedef unsigned long cgs_handle_t; /** @@ -170,119 +134,18 @@ typedef void (*cgs_write_ind_register_t)(struct cgs_device *cgs_device, enum cgs #define CGS_WREG32_FIELD_IND(device, space, reg, field, val) \ cgs_write_ind_register(device, space, ix##reg, (cgs_read_ind_register(device, space, ix##reg) & ~CGS_REG_FIELD_MASK(reg, field)) | (val) << CGS_REG_FIELD_SHIFT(reg, field)) -/** - * cgs_get_pci_resource() - provide access to a device resource (PCI BAR) - * @cgs_device: opaque device handle - * @resource_type: Type of Resource (MMIO, IO, ROM, FB, DOORBELL) - * @size: size of the region - * @offset: offset from the start of the region - * @resource_base: base address (not including offset) returned - * - * Return: 0 on success, -errno otherwise - */ -typedef int (*cgs_get_pci_resource_t)(struct cgs_device *cgs_device, - enum cgs_resource_type resource_type, - uint64_t size, - uint64_t offset, - uint64_t *resource_base); - -/** - * cgs_atom_get_data_table() - Get a pointer to an ATOM BIOS data table - * @cgs_device: opaque device handle - * @table: data table index - * @size: size of the table (output, may be NULL) - * @frev: table format revision (output, may be NULL) - * @crev: table content revision (output, may be NULL) - * - * Return: Pointer to start of the table, or NULL on failure - */ -typedef const void *(*cgs_atom_get_data_table_t)( - struct cgs_device *cgs_device, unsigned table, - uint16_t *size, uint8_t *frev, uint8_t *crev); - -/** - * cgs_atom_get_cmd_table_revs() - Get ATOM BIOS command table revisions - * @cgs_device: opaque device handle - * @table: data table index - * @frev: table format revision (output, may be NULL) - * @crev: table content revision (output, may be NULL) - * - * Return: 0 on success, -errno otherwise - */ -typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsigned table, - uint8_t *frev, uint8_t *crev); - -/** - * cgs_atom_exec_cmd_table() - Execute an ATOM BIOS command table - * @cgs_device: opaque device handle - * @table: command table index - * @args: arguments - * - * Return: 0 on success, -errno otherwise - */ -typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device, - unsigned table, void *args); - -/** - * cgs_get_firmware_info - Get the firmware information from core driver - * @cgs_device: opaque device handle - * @type: the firmware type - * @info: returend firmware information - * - * Return: 0 on success, -errno otherwise - */ typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info); -typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device, - enum cgs_ucode_id type); - -typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_powergating_state state); - -typedef int(*cgs_set_clockgating_state)(struct cgs_device *cgs_device, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state); - -typedef int(*cgs_get_active_displays_info)( - struct cgs_device *cgs_device, - struct cgs_display_info *info); - -typedef int (*cgs_notify_dpm_enabled)(struct cgs_device *cgs_device, bool enabled); - -typedef int (*cgs_is_virtualization_enabled_t)(void *cgs_device); - -typedef int (*cgs_enter_safe_mode)(struct cgs_device *cgs_device, bool en); - -typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock); - struct cgs_ops { /* MMIO access */ cgs_read_register_t read_register; cgs_write_register_t write_register; cgs_read_ind_register_t read_ind_register; cgs_write_ind_register_t write_ind_register; - /* PCI resources */ - cgs_get_pci_resource_t get_pci_resource; - /* ATOM BIOS */ - cgs_atom_get_data_table_t atom_get_data_table; - cgs_atom_get_cmd_table_revs_t atom_get_cmd_table_revs; - cgs_atom_exec_cmd_table_t atom_exec_cmd_table; /* Firmware Info */ cgs_get_firmware_info get_firmware_info; - cgs_rel_firmware rel_firmware; - /* cg pg interface*/ - cgs_set_powergating_state set_powergating_state; - cgs_set_clockgating_state set_clockgating_state; - /* display manager */ - cgs_get_active_displays_info get_active_displays_info; - /* notify dpm enabled */ - cgs_notify_dpm_enabled notify_dpm_enabled; - cgs_is_virtualization_enabled_t is_virtualization_enabled; - cgs_enter_safe_mode enter_safe_mode; - cgs_lock_grbm_idx lock_grbm_idx; }; struct cgs_os_ops; /* To be define in OS-specific CGS header */ @@ -309,40 +172,7 @@ struct cgs_device #define cgs_write_ind_register(dev,space,index,value) \ CGS_CALL(write_ind_register,dev,space,index,value) -#define cgs_atom_get_data_table(dev,table,size,frev,crev) \ - CGS_CALL(atom_get_data_table,dev,table,size,frev,crev) -#define cgs_atom_get_cmd_table_revs(dev,table,frev,crev) \ - CGS_CALL(atom_get_cmd_table_revs,dev,table,frev,crev) -#define cgs_atom_exec_cmd_table(dev,table,args) \ - CGS_CALL(atom_exec_cmd_table,dev,table,args) - #define cgs_get_firmware_info(dev, type, info) \ CGS_CALL(get_firmware_info, dev, type, info) -#define cgs_rel_firmware(dev, type) \ - CGS_CALL(rel_firmware, dev, type) -#define cgs_set_powergating_state(dev, block_type, state) \ - CGS_CALL(set_powergating_state, dev, block_type, state) -#define cgs_set_clockgating_state(dev, block_type, state) \ - CGS_CALL(set_clockgating_state, dev, block_type, state) -#define cgs_notify_dpm_enabled(dev, enabled) \ - CGS_CALL(notify_dpm_enabled, dev, enabled) - -#define cgs_get_active_displays_info(dev, info) \ - CGS_CALL(get_active_displays_info, dev, info) - -#define cgs_get_pci_resource(cgs_device, resource_type, size, offset, \ - resource_base) \ - CGS_CALL(get_pci_resource, cgs_device, resource_type, size, offset, \ - resource_base) - -#define cgs_is_virtualization_enabled(cgs_device) \ - CGS_CALL(is_virtualization_enabled, cgs_device) - -#define cgs_enter_safe_mode(cgs_device, en) \ - CGS_CALL(enter_safe_mode, cgs_device, en) - -#define cgs_lock_grbm_idx(cgs_device, lock) \ - CGS_CALL(lock_grbm_idx, cgs_device, lock) - #endif /* _CGS_COMMON_H */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 237289a72bb7..5733fbee07f7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means Queue n is available for KFD */ DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); + /* Doorbell assignments (SOC15 and later chips only). Only + * specific doorbells are routed to each SDMA engine. Others + * are routed to IH and VCN. They are not usable by the CP. + * + * Any doorbell number D that satisfies the following condition + * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val + * + * KFD currently uses 1024 (= 0x3ff) doorbells per process. If + * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means + * mask would be set to 0x1f8 and val set to 0x0f0. + */ + unsigned int sdma_doorbell[2][2]; + unsigned int reserved_doorbell_mask; + unsigned int reserved_doorbell_val; + /* Base address of doorbell aperture. */ phys_addr_t doorbell_physical_address; @@ -173,8 +188,6 @@ struct tile_config { * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp * scheduling mode. Only used for no cp scheduling mode. * - * @init_pipeline: Initialized the compute pipelines. - * * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp * sceduling mode. * @@ -274,9 +287,6 @@ struct kfd2kgd_calls { int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); - int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); - int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, @@ -382,6 +392,10 @@ struct kfd2kgd_calls { * * @resume: Notifies amdkfd about a resume action done to a kgd device * + * @quiesce_mm: Quiesce all user queue access to specified MM address space + * + * @resume_mm: Resume user queue access to specified MM address space + * * @schedule_evict_and_restore_process: Schedules work queue that will prepare * for safe eviction of KFD BOs that belong to the specified process. * @@ -399,6 +413,8 @@ struct kgd2kfd_calls { void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); void (*suspend)(struct kfd_dev *kfd); int (*resume)(struct kfd_dev *kfd); + int (*quiesce_mm)(struct mm_struct *mm); + int (*resume_mm)(struct mm_struct *mm); int (*schedule_evict_and_restore_process)(struct mm_struct *mm, struct dma_fence *fence); }; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5c840c022b52..06f08f34a110 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -94,6 +94,7 @@ enum pp_clock_type { PP_PCIE, OD_SCLK, OD_MCLK, + OD_RANGE, }; enum amd_pp_sensors { @@ -149,13 +150,6 @@ struct pp_states_info { uint32_t states[16]; }; -struct pp_gpu_power { - uint32_t vddc_power; - uint32_t vddci_power; - uint32_t max_gpu_power; - uint32_t average_gpu_power; -}; - #define PP_GROUP_MASK 0xF0000000 #define PP_GROUP_SHIFT 28 @@ -246,11 +240,6 @@ struct amd_pm_funcs { int (*load_firmware)(void *handle); int (*wait_for_fw_loading_complete)(void *handle); int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id); - int (*notify_smu_memory_info)(void *handle, uint32_t virtual_addr_low, - uint32_t virtual_addr_hi, - uint32_t mc_addr_low, - uint32_t mc_addr_hi, - uint32_t size); int (*set_power_limit)(void *handle, uint32_t n); int (*get_power_limit)(void *handle, uint32_t *limit, bool default_limit); /* export to DC */ diff --git a/drivers/gpu/drm/amd/include/soc15_ih_clientid.h b/drivers/gpu/drm/amd/include/soc15_ih_clientid.h index a12d4f27cfa4..12e196c15bbe 100644 --- a/drivers/gpu/drm/amd/include/soc15_ih_clientid.h +++ b/drivers/gpu/drm/amd/include/soc15_ih_clientid.h @@ -43,6 +43,7 @@ enum soc15_ih_clientid { SOC15_IH_CLIENTID_SE2SH = 0x0c, SOC15_IH_CLIENTID_SE3SH = 0x0d, SOC15_IH_CLIENTID_SYSHUB = 0x0e, + SOC15_IH_CLIENTID_UVD1 = 0x0e, SOC15_IH_CLIENTID_THM = 0x0f, SOC15_IH_CLIENTID_UVD = 0x10, SOC15_IH_CLIENTID_VCE0 = 0x11, diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index 2fb25abaf7c8..ceaf4932258d 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -29,10 +29,10 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_rb_base; uint32_t sdmax_rlcx_rb_base_hi; uint32_t sdmax_rlcx_rb_rptr; + uint32_t sdmax_rlcx_rb_rptr_hi; uint32_t sdmax_rlcx_rb_wptr; + uint32_t sdmax_rlcx_rb_wptr_hi; uint32_t sdmax_rlcx_rb_wptr_poll_cntl; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; uint32_t sdmax_rlcx_rb_rptr_addr_hi; uint32_t sdmax_rlcx_rb_rptr_addr_lo; uint32_t sdmax_rlcx_ib_cntl; @@ -44,29 +44,29 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_skip_cntl; uint32_t sdmax_rlcx_context_status; uint32_t sdmax_rlcx_doorbell; - uint32_t sdmax_rlcx_virtual_addr; - uint32_t sdmax_rlcx_ape1_cntl; + uint32_t sdmax_rlcx_status; uint32_t sdmax_rlcx_doorbell_log; - uint32_t reserved_22; - uint32_t reserved_23; - uint32_t reserved_24; - uint32_t reserved_25; - uint32_t reserved_26; - uint32_t reserved_27; - uint32_t reserved_28; - uint32_t reserved_29; - uint32_t reserved_30; - uint32_t reserved_31; - uint32_t reserved_32; - uint32_t reserved_33; - uint32_t reserved_34; - uint32_t reserved_35; - uint32_t reserved_36; - uint32_t reserved_37; - uint32_t reserved_38; - uint32_t reserved_39; - uint32_t reserved_40; - uint32_t reserved_41; + uint32_t sdmax_rlcx_watermark; + uint32_t sdmax_rlcx_doorbell_offset; + uint32_t sdmax_rlcx_csa_addr_lo; + uint32_t sdmax_rlcx_csa_addr_hi; + uint32_t sdmax_rlcx_ib_sub_remain; + uint32_t sdmax_rlcx_preempt; + uint32_t sdmax_rlcx_dummy_reg; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; + uint32_t sdmax_rlcx_rb_aql_cntl; + uint32_t sdmax_rlcx_minor_ptr_update; + uint32_t sdmax_rlcx_midcmd_data0; + uint32_t sdmax_rlcx_midcmd_data1; + uint32_t sdmax_rlcx_midcmd_data2; + uint32_t sdmax_rlcx_midcmd_data3; + uint32_t sdmax_rlcx_midcmd_data4; + uint32_t sdmax_rlcx_midcmd_data5; + uint32_t sdmax_rlcx_midcmd_data6; + uint32_t sdmax_rlcx_midcmd_data7; + uint32_t sdmax_rlcx_midcmd_data8; + uint32_t sdmax_rlcx_midcmd_cntl; uint32_t reserved_42; uint32_t reserved_43; uint32_t reserved_44; diff --git a/drivers/gpu/drm/amd/include/vega20_ip_offset.h b/drivers/gpu/drm/amd/include/vega20_ip_offset.h new file mode 100644 index 000000000000..2a2a9cc8bedb --- /dev/null +++ b/drivers/gpu/drm/amd/include/vega20_ip_offset.h @@ -0,0 +1,1051 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _vega20_ip_offset_HEADER +#define _vega20_ip_offset_HEADER + +#define MAX_INSTANCE 6 +#define MAX_SEGMENT 6 + + +struct IP_BASE_INSTANCE +{ + unsigned int segment[MAX_SEGMENT]; +}; + +struct IP_BASE +{ + struct IP_BASE_INSTANCE instance[MAX_INSTANCE]; +}; + + +static const struct IP_BASE ATHUB_BASE ={ { { { 0x00000C20, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE CLK_BASE ={ { { { 0x00016C00, 0x00016E00, 0x00017000, 0x00017200, 0x0001B000, 0x0001B200 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE DCE_BASE ={ { { { 0x00000012, 0x000000C0, 0x000034C0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE DF_BASE ={ { { { 0x00007000, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE FUSE_BASE ={ { { { 0x00017400, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE GC_BASE ={ { { { 0x00002000, 0x0000A000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE HDP_BASE ={ { { { 0x00000F20, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MMHUB_BASE ={ { { { 0x0001A000, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MP0_BASE ={ { { { 0x00016000, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE MP1_BASE ={ { { { 0x00016000, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE NBIO_BASE ={ { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE OSSSYS_BASE ={ { { { 0x000010A0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE SDMA0_BASE ={ { { { 0x00001260, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE SDMA1_BASE ={ { { { 0x00001860, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE SMUIO_BASE ={ { { { 0x00016800, 0x00016A00, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE THM_BASE ={ { { { 0x00016600, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE UMC_BASE ={ { { { 0x00014000, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE UVD_BASE ={ { { { 0x00007800, 0x00007E00, 0, 0, 0, 0 } }, + { { 0, 0x00009000, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +/* Adjust VCE_BASE to make vce_4_1 use vce_4_0 offset header files*/ +static const struct IP_BASE VCE_BASE ={ { { { 0x00007E00/* 0x00008800 */, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE XDMA_BASE ={ { { { 0x00003400, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; +static const struct IP_BASE RSMU_BASE ={ { { { 0x00012000, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } }, + { { 0, 0, 0, 0, 0, 0 } } } }; + + +#define ATHUB_BASE__INST0_SEG0 0x00000C20 +#define ATHUB_BASE__INST0_SEG1 0 +#define ATHUB_BASE__INST0_SEG2 0 +#define ATHUB_BASE__INST0_SEG3 0 +#define ATHUB_BASE__INST0_SEG4 0 +#define ATHUB_BASE__INST0_SEG5 0 + +#define ATHUB_BASE__INST1_SEG0 0 +#define ATHUB_BASE__INST1_SEG1 0 +#define ATHUB_BASE__INST1_SEG2 0 +#define ATHUB_BASE__INST1_SEG3 0 +#define ATHUB_BASE__INST1_SEG4 0 +#define ATHUB_BASE__INST1_SEG5 0 + +#define ATHUB_BASE__INST2_SEG0 0 +#define ATHUB_BASE__INST2_SEG1 0 +#define ATHUB_BASE__INST2_SEG2 0 +#define ATHUB_BASE__INST2_SEG3 0 +#define ATHUB_BASE__INST2_SEG4 0 +#define ATHUB_BASE__INST2_SEG5 0 + +#define ATHUB_BASE__INST3_SEG0 0 +#define ATHUB_BASE__INST3_SEG1 0 +#define ATHUB_BASE__INST3_SEG2 0 +#define ATHUB_BASE__INST3_SEG3 0 +#define ATHUB_BASE__INST3_SEG4 0 +#define ATHUB_BASE__INST3_SEG5 0 + +#define ATHUB_BASE__INST4_SEG0 0 +#define ATHUB_BASE__INST4_SEG1 0 +#define ATHUB_BASE__INST4_SEG2 0 +#define ATHUB_BASE__INST4_SEG3 0 +#define ATHUB_BASE__INST4_SEG4 0 +#define ATHUB_BASE__INST4_SEG5 0 + +#define ATHUB_BASE__INST5_SEG0 0 +#define ATHUB_BASE__INST5_SEG1 0 +#define ATHUB_BASE__INST5_SEG2 0 +#define ATHUB_BASE__INST5_SEG3 0 +#define ATHUB_BASE__INST5_SEG4 0 +#define ATHUB_BASE__INST5_SEG5 0 + +#define CLK_BASE__INST0_SEG0 0x00016C00 +#define CLK_BASE__INST0_SEG1 0x00016E00 +#define CLK_BASE__INST0_SEG2 0x00017000 +#define CLK_BASE__INST0_SEG3 0x00017200 +#define CLK_BASE__INST0_SEG4 0x0001B000 +#define CLK_BASE__INST0_SEG5 0x0001B200 + +#define CLK_BASE__INST1_SEG0 0 +#define CLK_BASE__INST1_SEG1 0 +#define CLK_BASE__INST1_SEG2 0 +#define CLK_BASE__INST1_SEG3 0 +#define CLK_BASE__INST1_SEG4 0 +#define CLK_BASE__INST1_SEG5 0 + +#define CLK_BASE__INST2_SEG0 0 +#define CLK_BASE__INST2_SEG1 0 +#define CLK_BASE__INST2_SEG2 0 +#define CLK_BASE__INST2_SEG3 0 +#define CLK_BASE__INST2_SEG4 0 +#define CLK_BASE__INST2_SEG5 0 + +#define CLK_BASE__INST3_SEG0 0 +#define CLK_BASE__INST3_SEG1 0 +#define CLK_BASE__INST3_SEG2 0 +#define CLK_BASE__INST3_SEG3 0 +#define CLK_BASE__INST3_SEG4 0 +#define CLK_BASE__INST3_SEG5 0 + +#define CLK_BASE__INST4_SEG0 0 +#define CLK_BASE__INST4_SEG1 0 +#define CLK_BASE__INST4_SEG2 0 +#define CLK_BASE__INST4_SEG3 0 +#define CLK_BASE__INST4_SEG4 0 +#define CLK_BASE__INST4_SEG5 0 + +#define CLK_BASE__INST5_SEG0 0 +#define CLK_BASE__INST5_SEG1 0 +#define CLK_BASE__INST5_SEG2 0 +#define CLK_BASE__INST5_SEG3 0 +#define CLK_BASE__INST5_SEG4 0 +#define CLK_BASE__INST5_SEG5 0 + +#define DCE_BASE__INST0_SEG0 0x00000012 +#define DCE_BASE__INST0_SEG1 0x000000C0 +#define DCE_BASE__INST0_SEG2 0x000034C0 +#define DCE_BASE__INST0_SEG3 0 +#define DCE_BASE__INST0_SEG4 0 +#define DCE_BASE__INST0_SEG5 0 + +#define DCE_BASE__INST1_SEG0 0 +#define DCE_BASE__INST1_SEG1 0 +#define DCE_BASE__INST1_SEG2 0 +#define DCE_BASE__INST1_SEG3 0 +#define DCE_BASE__INST1_SEG4 0 +#define DCE_BASE__INST1_SEG5 0 + +#define DCE_BASE__INST2_SEG0 0 +#define DCE_BASE__INST2_SEG1 0 +#define DCE_BASE__INST2_SEG2 0 +#define DCE_BASE__INST2_SEG3 0 +#define DCE_BASE__INST2_SEG4 0 +#define DCE_BASE__INST2_SEG5 0 + +#define DCE_BASE__INST3_SEG0 0 +#define DCE_BASE__INST3_SEG1 0 +#define DCE_BASE__INST3_SEG2 0 +#define DCE_BASE__INST3_SEG3 0 +#define DCE_BASE__INST3_SEG4 0 +#define DCE_BASE__INST3_SEG5 0 + +#define DCE_BASE__INST4_SEG0 0 +#define DCE_BASE__INST4_SEG1 0 +#define DCE_BASE__INST4_SEG2 0 +#define DCE_BASE__INST4_SEG3 0 +#define DCE_BASE__INST4_SEG4 0 +#define DCE_BASE__INST4_SEG5 0 + +#define DCE_BASE__INST5_SEG0 0 +#define DCE_BASE__INST5_SEG1 0 +#define DCE_BASE__INST5_SEG2 0 +#define DCE_BASE__INST5_SEG3 0 +#define DCE_BASE__INST5_SEG4 0 +#define DCE_BASE__INST5_SEG5 0 + +#define DF_BASE__INST0_SEG0 0x00007000 +#define DF_BASE__INST0_SEG1 0 +#define DF_BASE__INST0_SEG2 0 +#define DF_BASE__INST0_SEG3 0 +#define DF_BASE__INST0_SEG4 0 +#define DF_BASE__INST0_SEG5 0 + +#define DF_BASE__INST1_SEG0 0 +#define DF_BASE__INST1_SEG1 0 +#define DF_BASE__INST1_SEG2 0 +#define DF_BASE__INST1_SEG3 0 +#define DF_BASE__INST1_SEG4 0 +#define DF_BASE__INST1_SEG5 0 + +#define DF_BASE__INST2_SEG0 0 +#define DF_BASE__INST2_SEG1 0 +#define DF_BASE__INST2_SEG2 0 +#define DF_BASE__INST2_SEG3 0 +#define DF_BASE__INST2_SEG4 0 +#define DF_BASE__INST2_SEG5 0 + +#define DF_BASE__INST3_SEG0 0 +#define DF_BASE__INST3_SEG1 0 +#define DF_BASE__INST3_SEG2 0 +#define DF_BASE__INST3_SEG3 0 +#define DF_BASE__INST3_SEG4 0 +#define DF_BASE__INST3_SEG5 0 + +#define DF_BASE__INST4_SEG0 0 +#define DF_BASE__INST4_SEG1 0 +#define DF_BASE__INST4_SEG2 0 +#define DF_BASE__INST4_SEG3 0 +#define DF_BASE__INST4_SEG4 0 +#define DF_BASE__INST4_SEG5 0 + +#define DF_BASE__INST5_SEG0 0 +#define DF_BASE__INST5_SEG1 0 +#define DF_BASE__INST5_SEG2 0 +#define DF_BASE__INST5_SEG3 0 +#define DF_BASE__INST5_SEG4 0 +#define DF_BASE__INST5_SEG5 0 + +#define FUSE_BASE__INST0_SEG0 0x00017400 +#define FUSE_BASE__INST0_SEG1 0 +#define FUSE_BASE__INST0_SEG2 0 +#define FUSE_BASE__INST0_SEG3 0 +#define FUSE_BASE__INST0_SEG4 0 +#define FUSE_BASE__INST0_SEG5 0 + +#define FUSE_BASE__INST1_SEG0 0 +#define FUSE_BASE__INST1_SEG1 0 +#define FUSE_BASE__INST1_SEG2 0 +#define FUSE_BASE__INST1_SEG3 0 +#define FUSE_BASE__INST1_SEG4 0 +#define FUSE_BASE__INST1_SEG5 0 + +#define FUSE_BASE__INST2_SEG0 0 +#define FUSE_BASE__INST2_SEG1 0 +#define FUSE_BASE__INST2_SEG2 0 +#define FUSE_BASE__INST2_SEG3 0 +#define FUSE_BASE__INST2_SEG4 0 +#define FUSE_BASE__INST2_SEG5 0 + +#define FUSE_BASE__INST3_SEG0 0 +#define FUSE_BASE__INST3_SEG1 0 +#define FUSE_BASE__INST3_SEG2 0 +#define FUSE_BASE__INST3_SEG3 0 +#define FUSE_BASE__INST3_SEG4 0 +#define FUSE_BASE__INST3_SEG5 0 + +#define FUSE_BASE__INST4_SEG0 0 +#define FUSE_BASE__INST4_SEG1 0 +#define FUSE_BASE__INST4_SEG2 0 +#define FUSE_BASE__INST4_SEG3 0 +#define FUSE_BASE__INST4_SEG4 0 +#define FUSE_BASE__INST4_SEG5 0 + +#define FUSE_BASE__INST5_SEG0 0 +#define FUSE_BASE__INST5_SEG1 0 +#define FUSE_BASE__INST5_SEG2 0 +#define FUSE_BASE__INST5_SEG3 0 +#define FUSE_BASE__INST5_SEG4 0 +#define FUSE_BASE__INST5_SEG5 0 + +#define GC_BASE__INST0_SEG0 0x00002000 +#define GC_BASE__INST0_SEG1 0x0000A000 +#define GC_BASE__INST0_SEG2 0 +#define GC_BASE__INST0_SEG3 0 +#define GC_BASE__INST0_SEG4 0 +#define GC_BASE__INST0_SEG5 0 + +#define GC_BASE__INST1_SEG0 0 +#define GC_BASE__INST1_SEG1 0 +#define GC_BASE__INST1_SEG2 0 +#define GC_BASE__INST1_SEG3 0 +#define GC_BASE__INST1_SEG4 0 +#define GC_BASE__INST1_SEG5 0 + +#define GC_BASE__INST2_SEG0 0 +#define GC_BASE__INST2_SEG1 0 +#define GC_BASE__INST2_SEG2 0 +#define GC_BASE__INST2_SEG3 0 +#define GC_BASE__INST2_SEG4 0 +#define GC_BASE__INST2_SEG5 0 + +#define GC_BASE__INST3_SEG0 0 +#define GC_BASE__INST3_SEG1 0 +#define GC_BASE__INST3_SEG2 0 +#define GC_BASE__INST3_SEG3 0 +#define GC_BASE__INST3_SEG4 0 +#define GC_BASE__INST3_SEG5 0 + +#define GC_BASE__INST4_SEG0 0 +#define GC_BASE__INST4_SEG1 0 +#define GC_BASE__INST4_SEG2 0 +#define GC_BASE__INST4_SEG3 0 +#define GC_BASE__INST4_SEG4 0 +#define GC_BASE__INST4_SEG5 0 + +#define GC_BASE__INST5_SEG0 0 +#define GC_BASE__INST5_SEG1 0 +#define GC_BASE__INST5_SEG2 0 +#define GC_BASE__INST5_SEG3 0 +#define GC_BASE__INST5_SEG4 0 +#define GC_BASE__INST5_SEG5 0 + +#define HDP_BASE__INST0_SEG0 0x00000F20 +#define HDP_BASE__INST0_SEG1 0 +#define HDP_BASE__INST0_SEG2 0 +#define HDP_BASE__INST0_SEG3 0 +#define HDP_BASE__INST0_SEG4 0 +#define HDP_BASE__INST0_SEG5 0 + +#define HDP_BASE__INST1_SEG0 0 +#define HDP_BASE__INST1_SEG1 0 +#define HDP_BASE__INST1_SEG2 0 +#define HDP_BASE__INST1_SEG3 0 +#define HDP_BASE__INST1_SEG4 0 +#define HDP_BASE__INST1_SEG5 0 + +#define HDP_BASE__INST2_SEG0 0 +#define HDP_BASE__INST2_SEG1 0 +#define HDP_BASE__INST2_SEG2 0 +#define HDP_BASE__INST2_SEG3 0 +#define HDP_BASE__INST2_SEG4 0 +#define HDP_BASE__INST2_SEG5 0 + +#define HDP_BASE__INST3_SEG0 0 +#define HDP_BASE__INST3_SEG1 0 +#define HDP_BASE__INST3_SEG2 0 +#define HDP_BASE__INST3_SEG3 0 +#define HDP_BASE__INST3_SEG4 0 +#define HDP_BASE__INST3_SEG5 0 + +#define HDP_BASE__INST4_SEG0 0 +#define HDP_BASE__INST4_SEG1 0 +#define HDP_BASE__INST4_SEG2 0 +#define HDP_BASE__INST4_SEG3 0 +#define HDP_BASE__INST4_SEG4 0 +#define HDP_BASE__INST4_SEG5 0 + +#define HDP_BASE__INST5_SEG0 0 +#define HDP_BASE__INST5_SEG1 0 +#define HDP_BASE__INST5_SEG2 0 +#define HDP_BASE__INST5_SEG3 0 +#define HDP_BASE__INST5_SEG4 0 +#define HDP_BASE__INST5_SEG5 0 + +#define MMHUB_BASE__INST0_SEG0 0x0001A000 +#define MMHUB_BASE__INST0_SEG1 0 +#define MMHUB_BASE__INST0_SEG2 0 +#define MMHUB_BASE__INST0_SEG3 0 +#define MMHUB_BASE__INST0_SEG4 0 +#define MMHUB_BASE__INST0_SEG5 0 + +#define MMHUB_BASE__INST1_SEG0 0 +#define MMHUB_BASE__INST1_SEG1 0 +#define MMHUB_BASE__INST1_SEG2 0 +#define MMHUB_BASE__INST1_SEG3 0 +#define MMHUB_BASE__INST1_SEG4 0 +#define MMHUB_BASE__INST1_SEG5 0 + +#define MMHUB_BASE__INST2_SEG0 0 +#define MMHUB_BASE__INST2_SEG1 0 +#define MMHUB_BASE__INST2_SEG2 0 +#define MMHUB_BASE__INST2_SEG3 0 +#define MMHUB_BASE__INST2_SEG4 0 +#define MMHUB_BASE__INST2_SEG5 0 + +#define MMHUB_BASE__INST3_SEG0 0 +#define MMHUB_BASE__INST3_SEG1 0 +#define MMHUB_BASE__INST3_SEG2 0 +#define MMHUB_BASE__INST3_SEG3 0 +#define MMHUB_BASE__INST3_SEG4 0 +#define MMHUB_BASE__INST3_SEG5 0 + +#define MMHUB_BASE__INST4_SEG0 0 +#define MMHUB_BASE__INST4_SEG1 0 +#define MMHUB_BASE__INST4_SEG2 0 +#define MMHUB_BASE__INST4_SEG3 0 +#define MMHUB_BASE__INST4_SEG4 0 +#define MMHUB_BASE__INST4_SEG5 0 + +#define MMHUB_BASE__INST5_SEG0 0 +#define MMHUB_BASE__INST5_SEG1 0 +#define MMHUB_BASE__INST5_SEG2 0 +#define MMHUB_BASE__INST5_SEG3 0 +#define MMHUB_BASE__INST5_SEG4 0 +#define MMHUB_BASE__INST5_SEG5 0 + +#define MP0_BASE__INST0_SEG0 0x00016000 +#define MP0_BASE__INST0_SEG1 0 +#define MP0_BASE__INST0_SEG2 0 +#define MP0_BASE__INST0_SEG3 0 +#define MP0_BASE__INST0_SEG4 0 +#define MP0_BASE__INST0_SEG5 0 + +#define MP0_BASE__INST1_SEG0 0 +#define MP0_BASE__INST1_SEG1 0 +#define MP0_BASE__INST1_SEG2 0 +#define MP0_BASE__INST1_SEG3 0 +#define MP0_BASE__INST1_SEG4 0 +#define MP0_BASE__INST1_SEG5 0 + +#define MP0_BASE__INST2_SEG0 0 +#define MP0_BASE__INST2_SEG1 0 +#define MP0_BASE__INST2_SEG2 0 +#define MP0_BASE__INST2_SEG3 0 +#define MP0_BASE__INST2_SEG4 0 +#define MP0_BASE__INST2_SEG5 0 + +#define MP0_BASE__INST3_SEG0 0 +#define MP0_BASE__INST3_SEG1 0 +#define MP0_BASE__INST3_SEG2 0 +#define MP0_BASE__INST3_SEG3 0 +#define MP0_BASE__INST3_SEG4 0 +#define MP0_BASE__INST3_SEG5 0 + +#define MP0_BASE__INST4_SEG0 0 +#define MP0_BASE__INST4_SEG1 0 +#define MP0_BASE__INST4_SEG2 0 +#define MP0_BASE__INST4_SEG3 0 +#define MP0_BASE__INST4_SEG4 0 +#define MP0_BASE__INST4_SEG5 0 + +#define MP0_BASE__INST5_SEG0 0 +#define MP0_BASE__INST5_SEG1 0 +#define MP0_BASE__INST5_SEG2 0 +#define MP0_BASE__INST5_SEG3 0 +#define MP0_BASE__INST5_SEG4 0 +#define MP0_BASE__INST5_SEG5 0 + +#define MP1_BASE__INST0_SEG0 0x00016000 +#define MP1_BASE__INST0_SEG1 0 +#define MP1_BASE__INST0_SEG2 0 +#define MP1_BASE__INST0_SEG3 0 +#define MP1_BASE__INST0_SEG4 0 +#define MP1_BASE__INST0_SEG5 0 + +#define MP1_BASE__INST1_SEG0 0 +#define MP1_BASE__INST1_SEG1 0 +#define MP1_BASE__INST1_SEG2 0 +#define MP1_BASE__INST1_SEG3 0 +#define MP1_BASE__INST1_SEG4 0 +#define MP1_BASE__INST1_SEG5 0 + +#define MP1_BASE__INST2_SEG0 0 +#define MP1_BASE__INST2_SEG1 0 +#define MP1_BASE__INST2_SEG2 0 +#define MP1_BASE__INST2_SEG3 0 +#define MP1_BASE__INST2_SEG4 0 +#define MP1_BASE__INST2_SEG5 0 + +#define MP1_BASE__INST3_SEG0 0 +#define MP1_BASE__INST3_SEG1 0 +#define MP1_BASE__INST3_SEG2 0 +#define MP1_BASE__INST3_SEG3 0 +#define MP1_BASE__INST3_SEG4 0 +#define MP1_BASE__INST3_SEG5 0 + +#define MP1_BASE__INST4_SEG0 0 +#define MP1_BASE__INST4_SEG1 0 +#define MP1_BASE__INST4_SEG2 0 +#define MP1_BASE__INST4_SEG3 0 +#define MP1_BASE__INST4_SEG4 0 +#define MP1_BASE__INST4_SEG5 0 + +#define MP1_BASE__INST5_SEG0 0 +#define MP1_BASE__INST5_SEG1 0 +#define MP1_BASE__INST5_SEG2 0 +#define MP1_BASE__INST5_SEG3 0 +#define MP1_BASE__INST5_SEG4 0 +#define MP1_BASE__INST5_SEG5 0 + +#define NBIO_BASE__INST0_SEG0 0x00000000 +#define NBIO_BASE__INST0_SEG1 0x00000014 +#define NBIO_BASE__INST0_SEG2 0x00000D20 +#define NBIO_BASE__INST0_SEG3 0x00010400 +#define NBIO_BASE__INST0_SEG4 0 +#define NBIO_BASE__INST0_SEG5 0 + +#define NBIO_BASE__INST1_SEG0 0 +#define NBIO_BASE__INST1_SEG1 0 +#define NBIO_BASE__INST1_SEG2 0 +#define NBIO_BASE__INST1_SEG3 0 +#define NBIO_BASE__INST1_SEG4 0 +#define NBIO_BASE__INST1_SEG5 0 + +#define NBIO_BASE__INST2_SEG0 0 +#define NBIO_BASE__INST2_SEG1 0 +#define NBIO_BASE__INST2_SEG2 0 +#define NBIO_BASE__INST2_SEG3 0 +#define NBIO_BASE__INST2_SEG4 0 +#define NBIO_BASE__INST2_SEG5 0 + +#define NBIO_BASE__INST3_SEG0 0 +#define NBIO_BASE__INST3_SEG1 0 +#define NBIO_BASE__INST3_SEG2 0 +#define NBIO_BASE__INST3_SEG3 0 +#define NBIO_BASE__INST3_SEG4 0 +#define NBIO_BASE__INST3_SEG5 0 + +#define NBIO_BASE__INST4_SEG0 0 +#define NBIO_BASE__INST4_SEG1 0 +#define NBIO_BASE__INST4_SEG2 0 +#define NBIO_BASE__INST4_SEG3 0 +#define NBIO_BASE__INST4_SEG4 0 +#define NBIO_BASE__INST4_SEG5 0 + +#define NBIO_BASE__INST5_SEG0 0 +#define NBIO_BASE__INST5_SEG1 0 +#define NBIO_BASE__INST5_SEG2 0 +#define NBIO_BASE__INST5_SEG3 0 +#define NBIO_BASE__INST5_SEG4 0 +#define NBIO_BASE__INST5_SEG5 0 + +#define OSSSYS_BASE__INST0_SEG0 0x000010A0 +#define OSSSYS_BASE__INST0_SEG1 0 +#define OSSSYS_BASE__INST0_SEG2 0 +#define OSSSYS_BASE__INST0_SEG3 0 +#define OSSSYS_BASE__INST0_SEG4 0 +#define OSSSYS_BASE__INST0_SEG5 0 + +#define OSSSYS_BASE__INST1_SEG0 0 +#define OSSSYS_BASE__INST1_SEG1 0 +#define OSSSYS_BASE__INST1_SEG2 0 +#define OSSSYS_BASE__INST1_SEG3 0 +#define OSSSYS_BASE__INST1_SEG4 0 +#define OSSSYS_BASE__INST1_SEG5 0 + +#define OSSSYS_BASE__INST2_SEG0 0 +#define OSSSYS_BASE__INST2_SEG1 0 +#define OSSSYS_BASE__INST2_SEG2 0 +#define OSSSYS_BASE__INST2_SEG3 0 +#define OSSSYS_BASE__INST2_SEG4 0 +#define OSSSYS_BASE__INST2_SEG5 0 + +#define OSSSYS_BASE__INST3_SEG0 0 +#define OSSSYS_BASE__INST3_SEG1 0 +#define OSSSYS_BASE__INST3_SEG2 0 +#define OSSSYS_BASE__INST3_SEG3 0 +#define OSSSYS_BASE__INST3_SEG4 0 +#define OSSSYS_BASE__INST3_SEG5 0 + +#define OSSSYS_BASE__INST4_SEG0 0 +#define OSSSYS_BASE__INST4_SEG1 0 +#define OSSSYS_BASE__INST4_SEG2 0 +#define OSSSYS_BASE__INST4_SEG3 0 +#define OSSSYS_BASE__INST4_SEG4 0 +#define OSSSYS_BASE__INST4_SEG5 0 + +#define OSSSYS_BASE__INST5_SEG0 0 +#define OSSSYS_BASE__INST5_SEG1 0 +#define OSSSYS_BASE__INST5_SEG2 0 +#define OSSSYS_BASE__INST5_SEG3 0 +#define OSSSYS_BASE__INST5_SEG4 0 +#define OSSSYS_BASE__INST5_SEG5 0 + +#define SDMA0_BASE__INST0_SEG0 0x00001260 +#define SDMA0_BASE__INST0_SEG1 0 +#define SDMA0_BASE__INST0_SEG2 0 +#define SDMA0_BASE__INST0_SEG3 0 +#define SDMA0_BASE__INST0_SEG4 0 +#define SDMA0_BASE__INST0_SEG5 0 + +#define SDMA0_BASE__INST1_SEG0 0 +#define SDMA0_BASE__INST1_SEG1 0 +#define SDMA0_BASE__INST1_SEG2 0 +#define SDMA0_BASE__INST1_SEG3 0 +#define SDMA0_BASE__INST1_SEG4 0 +#define SDMA0_BASE__INST1_SEG5 0 + +#define SDMA0_BASE__INST2_SEG0 0 +#define SDMA0_BASE__INST2_SEG1 0 +#define SDMA0_BASE__INST2_SEG2 0 +#define SDMA0_BASE__INST2_SEG3 0 +#define SDMA0_BASE__INST2_SEG4 0 +#define SDMA0_BASE__INST2_SEG5 0 + +#define SDMA0_BASE__INST3_SEG0 0 +#define SDMA0_BASE__INST3_SEG1 0 +#define SDMA0_BASE__INST3_SEG2 0 +#define SDMA0_BASE__INST3_SEG3 0 +#define SDMA0_BASE__INST3_SEG4 0 +#define SDMA0_BASE__INST3_SEG5 0 + +#define SDMA0_BASE__INST4_SEG0 0 +#define SDMA0_BASE__INST4_SEG1 0 +#define SDMA0_BASE__INST4_SEG2 0 +#define SDMA0_BASE__INST4_SEG3 0 +#define SDMA0_BASE__INST4_SEG4 0 +#define SDMA0_BASE__INST4_SEG5 0 + +#define SDMA0_BASE__INST5_SEG0 0 +#define SDMA0_BASE__INST5_SEG1 0 +#define SDMA0_BASE__INST5_SEG2 0 +#define SDMA0_BASE__INST5_SEG3 0 +#define SDMA0_BASE__INST5_SEG4 0 +#define SDMA0_BASE__INST5_SEG5 0 + +#define SDMA1_BASE__INST0_SEG0 0x00001860 +#define SDMA1_BASE__INST0_SEG1 0 +#define SDMA1_BASE__INST0_SEG2 0 +#define SDMA1_BASE__INST0_SEG3 0 +#define SDMA1_BASE__INST0_SEG4 0 +#define SDMA1_BASE__INST0_SEG5 0 + +#define SDMA1_BASE__INST1_SEG0 0 +#define SDMA1_BASE__INST1_SEG1 0 +#define SDMA1_BASE__INST1_SEG2 0 +#define SDMA1_BASE__INST1_SEG3 0 +#define SDMA1_BASE__INST1_SEG4 0 +#define SDMA1_BASE__INST1_SEG5 0 + +#define SDMA1_BASE__INST2_SEG0 0 +#define SDMA1_BASE__INST2_SEG1 0 +#define SDMA1_BASE__INST2_SEG2 0 +#define SDMA1_BASE__INST2_SEG3 0 +#define SDMA1_BASE__INST2_SEG4 0 +#define SDMA1_BASE__INST2_SEG5 0 + +#define SDMA1_BASE__INST3_SEG0 0 +#define SDMA1_BASE__INST3_SEG1 0 +#define SDMA1_BASE__INST3_SEG2 0 +#define SDMA1_BASE__INST3_SEG3 0 +#define SDMA1_BASE__INST3_SEG4 0 +#define SDMA1_BASE__INST3_SEG5 0 + +#define SDMA1_BASE__INST4_SEG0 0 +#define SDMA1_BASE__INST4_SEG1 0 +#define SDMA1_BASE__INST4_SEG2 0 +#define SDMA1_BASE__INST4_SEG3 0 +#define SDMA1_BASE__INST4_SEG4 0 +#define SDMA1_BASE__INST4_SEG5 0 + +#define SDMA1_BASE__INST5_SEG0 0 +#define SDMA1_BASE__INST5_SEG1 0 +#define SDMA1_BASE__INST5_SEG2 0 +#define SDMA1_BASE__INST5_SEG3 0 +#define SDMA1_BASE__INST5_SEG4 0 +#define SDMA1_BASE__INST5_SEG5 0 + +#define SMUIO_BASE__INST0_SEG0 0x00016800 +#define SMUIO_BASE__INST0_SEG1 0x00016A00 +#define SMUIO_BASE__INST0_SEG2 0 +#define SMUIO_BASE__INST0_SEG3 0 +#define SMUIO_BASE__INST0_SEG4 0 +#define SMUIO_BASE__INST0_SEG5 0 + +#define SMUIO_BASE__INST1_SEG0 0 +#define SMUIO_BASE__INST1_SEG1 0 +#define SMUIO_BASE__INST1_SEG2 0 +#define SMUIO_BASE__INST1_SEG3 0 +#define SMUIO_BASE__INST1_SEG4 0 +#define SMUIO_BASE__INST1_SEG5 0 + +#define SMUIO_BASE__INST2_SEG0 0 +#define SMUIO_BASE__INST2_SEG1 0 +#define SMUIO_BASE__INST2_SEG2 0 +#define SMUIO_BASE__INST2_SEG3 0 +#define SMUIO_BASE__INST2_SEG4 0 +#define SMUIO_BASE__INST2_SEG5 0 + +#define SMUIO_BASE__INST3_SEG0 0 +#define SMUIO_BASE__INST3_SEG1 0 +#define SMUIO_BASE__INST3_SEG2 0 +#define SMUIO_BASE__INST3_SEG3 0 +#define SMUIO_BASE__INST3_SEG4 0 +#define SMUIO_BASE__INST3_SEG5 0 + +#define SMUIO_BASE__INST4_SEG0 0 +#define SMUIO_BASE__INST4_SEG1 0 +#define SMUIO_BASE__INST4_SEG2 0 +#define SMUIO_BASE__INST4_SEG3 0 +#define SMUIO_BASE__INST4_SEG4 0 +#define SMUIO_BASE__INST4_SEG5 0 + +#define SMUIO_BASE__INST5_SEG0 0 +#define SMUIO_BASE__INST5_SEG1 0 +#define SMUIO_BASE__INST5_SEG2 0 +#define SMUIO_BASE__INST5_SEG3 0 +#define SMUIO_BASE__INST5_SEG4 0 +#define SMUIO_BASE__INST5_SEG5 0 + +#define THM_BASE__INST0_SEG0 0x00016600 +#define THM_BASE__INST0_SEG1 0 +#define THM_BASE__INST0_SEG2 0 +#define THM_BASE__INST0_SEG3 0 +#define THM_BASE__INST0_SEG4 0 +#define THM_BASE__INST0_SEG5 0 + +#define THM_BASE__INST1_SEG0 0 +#define THM_BASE__INST1_SEG1 0 +#define THM_BASE__INST1_SEG2 0 +#define THM_BASE__INST1_SEG3 0 +#define THM_BASE__INST1_SEG4 0 +#define THM_BASE__INST1_SEG5 0 + +#define THM_BASE__INST2_SEG0 0 +#define THM_BASE__INST2_SEG1 0 +#define THM_BASE__INST2_SEG2 0 +#define THM_BASE__INST2_SEG3 0 +#define THM_BASE__INST2_SEG4 0 +#define THM_BASE__INST2_SEG5 0 + +#define THM_BASE__INST3_SEG0 0 +#define THM_BASE__INST3_SEG1 0 +#define THM_BASE__INST3_SEG2 0 +#define THM_BASE__INST3_SEG3 0 +#define THM_BASE__INST3_SEG4 0 +#define THM_BASE__INST3_SEG5 0 + +#define THM_BASE__INST4_SEG0 0 +#define THM_BASE__INST4_SEG1 0 +#define THM_BASE__INST4_SEG2 0 +#define THM_BASE__INST4_SEG3 0 +#define THM_BASE__INST4_SEG4 0 +#define THM_BASE__INST4_SEG5 0 + +#define THM_BASE__INST5_SEG0 0 +#define THM_BASE__INST5_SEG1 0 +#define THM_BASE__INST5_SEG2 0 +#define THM_BASE__INST5_SEG3 0 +#define THM_BASE__INST5_SEG4 0 +#define THM_BASE__INST5_SEG5 0 + +#define UMC_BASE__INST0_SEG0 0x00014000 +#define UMC_BASE__INST0_SEG1 0 +#define UMC_BASE__INST0_SEG2 0 +#define UMC_BASE__INST0_SEG3 0 +#define UMC_BASE__INST0_SEG4 0 +#define UMC_BASE__INST0_SEG5 0 + +#define UMC_BASE__INST1_SEG0 0 +#define UMC_BASE__INST1_SEG1 0 +#define UMC_BASE__INST1_SEG2 0 +#define UMC_BASE__INST1_SEG3 0 +#define UMC_BASE__INST1_SEG4 0 +#define UMC_BASE__INST1_SEG5 0 + +#define UMC_BASE__INST2_SEG0 0 +#define UMC_BASE__INST2_SEG1 0 +#define UMC_BASE__INST2_SEG2 0 +#define UMC_BASE__INST2_SEG3 0 +#define UMC_BASE__INST2_SEG4 0 +#define UMC_BASE__INST2_SEG5 0 + +#define UMC_BASE__INST3_SEG0 0 +#define UMC_BASE__INST3_SEG1 0 +#define UMC_BASE__INST3_SEG2 0 +#define UMC_BASE__INST3_SEG3 0 +#define UMC_BASE__INST3_SEG4 0 +#define UMC_BASE__INST3_SEG5 0 + +#define UMC_BASE__INST4_SEG0 0 +#define UMC_BASE__INST4_SEG1 0 +#define UMC_BASE__INST4_SEG2 0 +#define UMC_BASE__INST4_SEG3 0 +#define UMC_BASE__INST4_SEG4 0 +#define UMC_BASE__INST4_SEG5 0 + +#define UMC_BASE__INST5_SEG0 0 +#define UMC_BASE__INST5_SEG1 0 +#define UMC_BASE__INST5_SEG2 0 +#define UMC_BASE__INST5_SEG3 0 +#define UMC_BASE__INST5_SEG4 0 +#define UMC_BASE__INST5_SEG5 0 + +#define UVD_BASE__INST0_SEG0 0x00007800 +#define UVD_BASE__INST0_SEG1 0x00007E00 +#define UVD_BASE__INST0_SEG2 0 +#define UVD_BASE__INST0_SEG3 0 +#define UVD_BASE__INST0_SEG4 0 +#define UVD_BASE__INST0_SEG5 0 + +#define UVD_BASE__INST1_SEG0 0 +#define UVD_BASE__INST1_SEG1 0x00009000 +#define UVD_BASE__INST1_SEG2 0 +#define UVD_BASE__INST1_SEG3 0 +#define UVD_BASE__INST1_SEG4 0 +#define UVD_BASE__INST1_SEG5 0 + +#define UVD_BASE__INST2_SEG0 0 +#define UVD_BASE__INST2_SEG1 0 +#define UVD_BASE__INST2_SEG2 0 +#define UVD_BASE__INST2_SEG3 0 +#define UVD_BASE__INST2_SEG4 0 +#define UVD_BASE__INST2_SEG5 0 + +#define UVD_BASE__INST3_SEG0 0 +#define UVD_BASE__INST3_SEG1 0 +#define UVD_BASE__INST3_SEG2 0 +#define UVD_BASE__INST3_SEG3 0 +#define UVD_BASE__INST3_SEG4 0 +#define UVD_BASE__INST3_SEG5 0 + +#define UVD_BASE__INST4_SEG0 0 +#define UVD_BASE__INST4_SEG1 0 +#define UVD_BASE__INST4_SEG2 0 +#define UVD_BASE__INST4_SEG3 0 +#define UVD_BASE__INST4_SEG4 0 +#define UVD_BASE__INST4_SEG5 0 + +#define UVD_BASE__INST5_SEG0 0 +#define UVD_BASE__INST5_SEG1 0 +#define UVD_BASE__INST5_SEG2 0 +#define UVD_BASE__INST5_SEG3 0 +#define UVD_BASE__INST5_SEG4 0 +#define UVD_BASE__INST5_SEG5 0 + +#define VCE_BASE__INST0_SEG0 0x00008800 +#define VCE_BASE__INST0_SEG1 0 +#define VCE_BASE__INST0_SEG2 0 +#define VCE_BASE__INST0_SEG3 0 +#define VCE_BASE__INST0_SEG4 0 +#define VCE_BASE__INST0_SEG5 0 + +#define VCE_BASE__INST1_SEG0 0 +#define VCE_BASE__INST1_SEG1 0 +#define VCE_BASE__INST1_SEG2 0 +#define VCE_BASE__INST1_SEG3 0 +#define VCE_BASE__INST1_SEG4 0 +#define VCE_BASE__INST1_SEG5 0 + +#define VCE_BASE__INST2_SEG0 0 +#define VCE_BASE__INST2_SEG1 0 +#define VCE_BASE__INST2_SEG2 0 +#define VCE_BASE__INST2_SEG3 0 +#define VCE_BASE__INST2_SEG4 0 +#define VCE_BASE__INST2_SEG5 0 + +#define VCE_BASE__INST3_SEG0 0 +#define VCE_BASE__INST3_SEG1 0 +#define VCE_BASE__INST3_SEG2 0 +#define VCE_BASE__INST3_SEG3 0 +#define VCE_BASE__INST3_SEG4 0 +#define VCE_BASE__INST3_SEG5 0 + +#define VCE_BASE__INST4_SEG0 0 +#define VCE_BASE__INST4_SEG1 0 +#define VCE_BASE__INST4_SEG2 0 +#define VCE_BASE__INST4_SEG3 0 +#define VCE_BASE__INST4_SEG4 0 +#define VCE_BASE__INST4_SEG5 0 + +#define VCE_BASE__INST5_SEG0 0 +#define VCE_BASE__INST5_SEG1 0 +#define VCE_BASE__INST5_SEG2 0 +#define VCE_BASE__INST5_SEG3 0 +#define VCE_BASE__INST5_SEG4 0 +#define VCE_BASE__INST5_SEG5 0 + +#define XDMA_BASE__INST0_SEG0 0x00003400 +#define XDMA_BASE__INST0_SEG1 0 +#define XDMA_BASE__INST0_SEG2 0 +#define XDMA_BASE__INST0_SEG3 0 +#define XDMA_BASE__INST0_SEG4 0 +#define XDMA_BASE__INST0_SEG5 0 + +#define XDMA_BASE__INST1_SEG0 0 +#define XDMA_BASE__INST1_SEG1 0 +#define XDMA_BASE__INST1_SEG2 0 +#define XDMA_BASE__INST1_SEG3 0 +#define XDMA_BASE__INST1_SEG4 0 +#define XDMA_BASE__INST1_SEG5 0 + +#define XDMA_BASE__INST2_SEG0 0 +#define XDMA_BASE__INST2_SEG1 0 +#define XDMA_BASE__INST2_SEG2 0 +#define XDMA_BASE__INST2_SEG3 0 +#define XDMA_BASE__INST2_SEG4 0 +#define XDMA_BASE__INST2_SEG5 0 + +#define XDMA_BASE__INST3_SEG0 0 +#define XDMA_BASE__INST3_SEG1 0 +#define XDMA_BASE__INST3_SEG2 0 +#define XDMA_BASE__INST3_SEG3 0 +#define XDMA_BASE__INST3_SEG4 0 +#define XDMA_BASE__INST3_SEG5 0 + +#define XDMA_BASE__INST4_SEG0 0 +#define XDMA_BASE__INST4_SEG1 0 +#define XDMA_BASE__INST4_SEG2 0 +#define XDMA_BASE__INST4_SEG3 0 +#define XDMA_BASE__INST4_SEG4 0 +#define XDMA_BASE__INST4_SEG5 0 + +#define XDMA_BASE__INST5_SEG0 0 +#define XDMA_BASE__INST5_SEG1 0 +#define XDMA_BASE__INST5_SEG2 0 +#define XDMA_BASE__INST5_SEG3 0 +#define XDMA_BASE__INST5_SEG4 0 +#define XDMA_BASE__INST5_SEG5 0 + +#define RSMU_BASE__INST0_SEG0 0x00012000 +#define RSMU_BASE__INST0_SEG1 0 +#define RSMU_BASE__INST0_SEG2 0 +#define RSMU_BASE__INST0_SEG3 0 +#define RSMU_BASE__INST0_SEG4 0 +#define RSMU_BASE__INST0_SEG5 0 + +#define RSMU_BASE__INST1_SEG0 0 +#define RSMU_BASE__INST1_SEG1 0 +#define RSMU_BASE__INST1_SEG2 0 +#define RSMU_BASE__INST1_SEG3 0 +#define RSMU_BASE__INST1_SEG4 0 +#define RSMU_BASE__INST1_SEG5 0 + +#define RSMU_BASE__INST2_SEG0 0 +#define RSMU_BASE__INST2_SEG1 0 +#define RSMU_BASE__INST2_SEG2 0 +#define RSMU_BASE__INST2_SEG3 0 +#define RSMU_BASE__INST2_SEG4 0 +#define RSMU_BASE__INST2_SEG5 0 + +#define RSMU_BASE__INST3_SEG0 0 +#define RSMU_BASE__INST3_SEG1 0 +#define RSMU_BASE__INST3_SEG2 0 +#define RSMU_BASE__INST3_SEG3 0 +#define RSMU_BASE__INST3_SEG4 0 +#define RSMU_BASE__INST3_SEG5 0 + +#define RSMU_BASE__INST4_SEG0 0 +#define RSMU_BASE__INST4_SEG1 0 +#define RSMU_BASE__INST4_SEG2 0 +#define RSMU_BASE__INST4_SEG3 0 +#define RSMU_BASE__INST4_SEG4 0 +#define RSMU_BASE__INST4_SEG5 0 + +#define RSMU_BASE__INST5_SEG0 0 +#define RSMU_BASE__INST5_SEG1 0 +#define RSMU_BASE__INST5_SEG2 0 +#define RSMU_BASE__INST5_SEG3 0 +#define RSMU_BASE__INST5_SEG4 0 +#define RSMU_BASE__INST5_SEG5 0 + +#endif + diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 7e8ad30d98e2..d567be49c31b 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -25,30 +25,16 @@ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/slab.h> +#include <linux/firmware.h> #include "amd_shared.h" #include "amd_powerplay.h" #include "power_state.h" #include "amdgpu.h" #include "hwmgr.h" -#define PP_DPM_DISABLED 0xCCCC - -static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id, - enum amd_pm_state_type *user_state); static const struct amd_pm_funcs pp_dpm_funcs; -static inline int pp_check(struct pp_hwmgr *hwmgr) -{ - if (hwmgr == NULL || hwmgr->smumgr_funcs == NULL) - return -EINVAL; - - if (hwmgr->pm_en == 0 || hwmgr->hwmgr_func == NULL) - return PP_DPM_DISABLED; - - return 0; -} - static int amd_powerplay_create(struct amdgpu_device *adev) { struct pp_hwmgr *hwmgr; @@ -61,19 +47,21 @@ static int amd_powerplay_create(struct amdgpu_device *adev) return -ENOMEM; hwmgr->adev = adev; - hwmgr->pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false; + hwmgr->not_vf = !amdgpu_sriov_vf(adev); + hwmgr->pm_en = (amdgpu_dpm && hwmgr->not_vf) ? true : false; hwmgr->device = amdgpu_cgs_create_device(adev); mutex_init(&hwmgr->smu_lock); hwmgr->chip_family = adev->family; hwmgr->chip_id = adev->asic_type; - hwmgr->feature_mask = amdgpu_pp_feature_mask; + hwmgr->feature_mask = adev->powerplay.pp_feature; + hwmgr->display_config = &adev->pm.pm_display_cfg; adev->powerplay.pp_handle = hwmgr; adev->powerplay.pp_funcs = &pp_dpm_funcs; return 0; } -static int amd_powerplay_destroy(struct amdgpu_device *adev) +static void amd_powerplay_destroy(struct amdgpu_device *adev) { struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; @@ -82,8 +70,6 @@ static int amd_powerplay_destroy(struct amdgpu_device *adev) kfree(hwmgr); hwmgr = NULL; - - return 0; } static int pp_early_init(void *handle) @@ -109,18 +95,9 @@ static int pp_sw_init(void *handle) struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret >= 0) { - if (hwmgr->smumgr_funcs->smu_init == NULL) - return -EINVAL; + ret = hwmgr_sw_init(hwmgr); - ret = hwmgr->smumgr_funcs->smu_init(hwmgr); - - phm_register_irq_handlers(hwmgr); - - pr_debug("amdgpu: powerplay sw initialized\n"); - } + pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully"); return ret; } @@ -129,16 +106,14 @@ static int pp_sw_fini(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - ret = pp_check(hwmgr); - if (ret >= 0) { - if (hwmgr->smumgr_funcs->smu_fini != NULL) - hwmgr->smumgr_funcs->smu_fini(hwmgr); - } + hwmgr_sw_fini(hwmgr); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; amdgpu_ucode_fini_bo(adev); + } return 0; } @@ -152,55 +127,68 @@ static int pp_hw_init(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) amdgpu_ucode_init_bo(adev); - ret = pp_check(hwmgr); + ret = hwmgr_hw_init(hwmgr); - if (ret >= 0) { - if (hwmgr->smumgr_funcs->start_smu == NULL) - return -EINVAL; + if (ret) + pr_err("powerplay hw init failed\n"); - if (hwmgr->smumgr_funcs->start_smu(hwmgr)) { - pr_err("smc start failed\n"); - hwmgr->smumgr_funcs->smu_fini(hwmgr); - return -EINVAL; - } - if (ret == PP_DPM_DISABLED) - goto exit; - ret = hwmgr_hw_init(hwmgr); - if (ret) - goto exit; - } return ret; -exit: - hwmgr->pm_en = 0; - cgs_notify_dpm_enabled(hwmgr->device, false); - return 0; - } static int pp_hw_fini(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - ret = pp_check(hwmgr); - if (ret == 0) - hwmgr_hw_fini(hwmgr); + hwmgr_hw_fini(hwmgr); return 0; } +static void pp_reserve_vram_for_smu(struct amdgpu_device *adev) +{ + int r = -EINVAL; + void *cpu_ptr = NULL; + uint64_t gpu_addr; + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + + if (amdgpu_bo_create_kernel(adev, adev->pm.smu_prv_buffer_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->pm.smu_prv_buffer, + &gpu_addr, + &cpu_ptr)) { + DRM_ERROR("amdgpu: failed to create smu prv buffer\n"); + return; + } + + if (hwmgr->hwmgr_func->notify_cac_buffer_info) + r = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr, + lower_32_bits((unsigned long)cpu_ptr), + upper_32_bits((unsigned long)cpu_ptr), + lower_32_bits(gpu_addr), + upper_32_bits(gpu_addr), + adev->pm.smu_prv_buffer_size); + + if (r) { + amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL); + adev->pm.smu_prv_buffer = NULL; + DRM_ERROR("amdgpu: failed to notify SMU buffer address\n"); + } +} + static int pp_late_init(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret == 0) - pp_dpm_dispatch_tasks(hwmgr, + if (hwmgr && hwmgr->pm_en) { + mutex_lock(&hwmgr->smu_lock); + hwmgr_handle_task(hwmgr, AMD_PP_TASK_COMPLETE_INIT, NULL); + mutex_unlock(&hwmgr->smu_lock); + } + if (adev->pm.smu_prv_buffer_size != 0) + pp_reserve_vram_for_smu(adev); return 0; } @@ -209,6 +197,8 @@ static void pp_late_fini(void *handle) { struct amdgpu_device *adev = handle; + if (adev->pm.smu_prv_buffer) + amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL); amd_powerplay_destroy(adev); } @@ -233,15 +223,21 @@ static int pp_set_powergating_state(void *handle, { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; + int ret; - ret = pp_check(hwmgr); + if (!hwmgr || !hwmgr->pm_en) + return 0; - if (ret) - return ret; + if (hwmgr->hwmgr_func->gfx_off_control) { + /* Enable/disable GFX off through SMU */ + ret = hwmgr->hwmgr_func->gfx_off_control(hwmgr, + state == AMD_PG_STATE_GATE); + if (ret) + pr_err("gfx off control failed!\n"); + } if (hwmgr->hwmgr_func->enable_per_cu_power_gating == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_debug("%s was not implemented.\n", __func__); return 0; } @@ -254,38 +250,16 @@ static int pp_suspend(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret = 0; - ret = pp_check(hwmgr); - if (ret == 0) - hwmgr_hw_suspend(hwmgr); - return 0; + return hwmgr_suspend(hwmgr); } static int pp_resume(void *handle) { struct amdgpu_device *adev = handle; struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; - int ret; - - ret = pp_check(hwmgr); - - if (ret < 0) - return ret; - - if (hwmgr->smumgr_funcs->start_smu == NULL) - return -EINVAL; - - if (hwmgr->smumgr_funcs->start_smu(hwmgr)) { - pr_err("smc start failed\n"); - hwmgr->smumgr_funcs->smu_fini(hwmgr); - return -EINVAL; - } - if (ret == PP_DPM_DISABLED) - return 0; - - return hwmgr_hw_resume(hwmgr); + return hwmgr_resume(hwmgr); } static int pp_set_clockgating_state(void *handle, @@ -334,12 +308,9 @@ static int pp_dpm_fw_loading_complete(void *handle) static int pp_set_clockgating_by_smu(void *handle, uint32_t msg_id) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->update_clock_gatings == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -362,10 +333,10 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr, if (*level & profile_mode_mask) { hwmgr->saved_dpm_level = hwmgr->dpm_level; hwmgr->en_umd_pstate = true; - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); } @@ -375,10 +346,10 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr *hwmgr, if (*level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT) *level = hwmgr->saved_dpm_level; hwmgr->en_umd_pstate = false; - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_GATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); } @@ -389,12 +360,9 @@ static int pp_dpm_force_performance_level(void *handle, enum amd_dpm_forced_level level) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (level == hwmgr->dpm_level) return 0; @@ -412,13 +380,10 @@ static enum amd_dpm_forced_level pp_dpm_get_performance_level( void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; enum amd_dpm_forced_level level; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); level = hwmgr->dpm_level; @@ -429,13 +394,10 @@ static enum amd_dpm_forced_level pp_dpm_get_performance_level( static uint32_t pp_dpm_get_sclk(void *handle, bool low) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; uint32_t clk = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return 0; if (hwmgr->hwmgr_func->get_sclk == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -450,13 +412,10 @@ static uint32_t pp_dpm_get_sclk(void *handle, bool low) static uint32_t pp_dpm_get_mclk(void *handle, bool low) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; uint32_t clk = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return 0; if (hwmgr->hwmgr_func->get_mclk == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -471,11 +430,8 @@ static uint32_t pp_dpm_get_mclk(void *handle, bool low) static void pp_dpm_powergate_vce(void *handle, bool gate) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) + if (!hwmgr || !hwmgr->pm_en) return; if (hwmgr->hwmgr_func->powergate_vce == NULL) { @@ -490,11 +446,8 @@ static void pp_dpm_powergate_vce(void *handle, bool gate) static void pp_dpm_powergate_uvd(void *handle, bool gate) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) + if (!hwmgr || !hwmgr->pm_en) return; if (hwmgr->hwmgr_func->powergate_uvd == NULL) { @@ -512,10 +465,8 @@ static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id, int ret = 0; struct pp_hwmgr *hwmgr = handle; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); ret = hwmgr_handle_task(hwmgr, task_id, user_state); @@ -528,15 +479,9 @@ static enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle) { struct pp_hwmgr *hwmgr = handle; struct pp_power_state *state; - int ret = 0; enum amd_pm_state_type pm_type; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (hwmgr->current_ps == NULL) + if (!hwmgr || !hwmgr->pm_en || !hwmgr->current_ps) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -568,11 +513,8 @@ static enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle) static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) + if (!hwmgr || !hwmgr->pm_en) return; if (hwmgr->hwmgr_func->set_fan_control_mode == NULL) { @@ -587,13 +529,10 @@ static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode) static uint32_t pp_dpm_get_fan_control_mode(void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; uint32_t mode = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return 0; if (hwmgr->hwmgr_func->get_fan_control_mode == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -610,10 +549,8 @@ static int pp_dpm_set_fan_speed_percent(void *handle, uint32_t percent) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_fan_speed_percent == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -630,10 +567,8 @@ static int pp_dpm_get_fan_speed_percent(void *handle, uint32_t *speed) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_fan_speed_percent == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -651,10 +586,8 @@ static int pp_dpm_get_fan_speed_rpm(void *handle, uint32_t *rpm) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_fan_speed_rpm == NULL) return -EINVAL; @@ -670,16 +603,10 @@ static int pp_dpm_get_pp_num_states(void *handle, { struct pp_hwmgr *hwmgr = handle; int i; - int ret = 0; memset(data, 0, sizeof(*data)); - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (hwmgr->ps == NULL) + if (!hwmgr || !hwmgr->pm_en ||!hwmgr->ps) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -713,15 +640,9 @@ static int pp_dpm_get_pp_num_states(void *handle, static int pp_dpm_get_pp_table(void *handle, char **table) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; int size = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (!hwmgr->soft_pp_table) + if (!hwmgr || !hwmgr->pm_en ||!hwmgr->soft_pp_table) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -736,10 +657,6 @@ static int amd_powerplay_reset(void *handle) struct pp_hwmgr *hwmgr = handle; int ret; - ret = pp_check(hwmgr); - if (ret) - return ret; - ret = hwmgr_hw_fini(hwmgr); if (ret) return ret; @@ -754,40 +671,38 @@ static int amd_powerplay_reset(void *handle) static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); + int ret = -ENOMEM; - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); if (!hwmgr->hardcode_pp_table) { hwmgr->hardcode_pp_table = kmemdup(hwmgr->soft_pp_table, hwmgr->soft_pp_table_size, GFP_KERNEL); - if (!hwmgr->hardcode_pp_table) { - mutex_unlock(&hwmgr->smu_lock); - return -ENOMEM; - } + if (!hwmgr->hardcode_pp_table) + goto err; } memcpy(hwmgr->hardcode_pp_table, buf, size); hwmgr->soft_pp_table = hwmgr->hardcode_pp_table; - mutex_unlock(&hwmgr->smu_lock); ret = amd_powerplay_reset(handle); if (ret) - return ret; + goto err; if (hwmgr->hwmgr_func->avfs_control) { ret = hwmgr->hwmgr_func->avfs_control(hwmgr, false); if (ret) - return ret; + goto err; } - + mutex_unlock(&hwmgr->smu_lock); return 0; +err: + mutex_unlock(&hwmgr->smu_lock); + return ret; } static int pp_dpm_force_clock_level(void *handle, @@ -796,10 +711,8 @@ static int pp_dpm_force_clock_level(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->force_clock_level == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -820,10 +733,8 @@ static int pp_dpm_print_clock_levels(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->print_clock_levels == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -840,10 +751,8 @@ static int pp_dpm_get_sclk_od(void *handle) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_sclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -860,10 +769,8 @@ static int pp_dpm_set_sclk_od(void *handle, uint32_t value) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_sclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -881,10 +788,8 @@ static int pp_dpm_get_mclk_od(void *handle) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->get_mclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -901,10 +806,8 @@ static int pp_dpm_set_mclk_od(void *handle, uint32_t value) struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_mclk_od == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -922,11 +825,7 @@ static int pp_dpm_read_sensor(void *handle, int idx, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (value == NULL) + if (!hwmgr || !hwmgr->pm_en || !value) return -EINVAL; switch (idx) { @@ -948,14 +847,11 @@ static struct amd_vce_state* pp_dpm_get_vce_clock_state(void *handle, unsigned idx) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - ret = pp_check(hwmgr); - - if (ret) + if (!hwmgr || !hwmgr->pm_en) return NULL; - if (hwmgr && idx < hwmgr->num_vce_state_tables) + if (idx < hwmgr->num_vce_state_tables) return &hwmgr->vce_states[idx]; return NULL; } @@ -964,7 +860,7 @@ static int pp_get_power_profile_mode(void *handle, char *buf) { struct pp_hwmgr *hwmgr = handle; - if (!buf || pp_check(hwmgr)) + if (!hwmgr || !hwmgr->pm_en || !buf) return -EINVAL; if (hwmgr->hwmgr_func->get_power_profile_mode == NULL) { @@ -980,12 +876,12 @@ static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size) struct pp_hwmgr *hwmgr = handle; int ret = -EINVAL; - if (pp_check(hwmgr)) - return -EINVAL; + if (!hwmgr || !hwmgr->pm_en) + return ret; if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) { pr_info("%s was not implemented.\n", __func__); - return -EINVAL; + return ret; } mutex_lock(&hwmgr->smu_lock); if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) @@ -998,7 +894,7 @@ static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint3 { struct pp_hwmgr *hwmgr = handle; - if (pp_check(hwmgr)) + if (!hwmgr || !hwmgr->pm_en) return -EINVAL; if (hwmgr->hwmgr_func->odn_edit_dpm_table == NULL) { @@ -1016,7 +912,7 @@ static int pp_dpm_switch_power_profile(void *handle, long workload; uint32_t index; - if (pp_check(hwmgr)) + if (!hwmgr || !hwmgr->pm_en) return -EINVAL; if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) { @@ -1048,46 +944,12 @@ static int pp_dpm_switch_power_profile(void *handle, return 0; } -static int pp_dpm_notify_smu_memory_info(void *handle, - uint32_t virtual_addr_low, - uint32_t virtual_addr_hi, - uint32_t mc_addr_low, - uint32_t mc_addr_hi, - uint32_t size) -{ - struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (hwmgr->hwmgr_func->notify_cac_buffer_info == NULL) { - pr_info("%s was not implemented.\n", __func__); - return -EINVAL; - } - - mutex_lock(&hwmgr->smu_lock); - - ret = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr, virtual_addr_low, - virtual_addr_hi, mc_addr_low, mc_addr_hi, - size); - - mutex_unlock(&hwmgr->smu_lock); - - return ret; -} - static int pp_set_power_limit(void *handle, uint32_t limit) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_power_limit == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -1104,20 +966,14 @@ static int pp_set_power_limit(void *handle, uint32_t limit) hwmgr->hwmgr_func->set_power_limit(hwmgr, limit); hwmgr->power_limit = limit; mutex_unlock(&hwmgr->smu_lock); - return ret; + return 0; } static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - - if (ret) - return ret; - if (limit == NULL) + if (!hwmgr || !hwmgr->pm_en ||!limit) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1129,19 +985,16 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit) mutex_unlock(&hwmgr->smu_lock); - return ret; + return 0; } static int pp_display_configuration_change(void *handle, const struct amd_pp_display_configuration *display_config) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); phm_store_dal_configuration_data(hwmgr, display_config); @@ -1155,12 +1008,7 @@ static int pp_get_display_power_level(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (output == NULL) + if (!hwmgr || !hwmgr->pm_en ||!output) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1177,10 +1025,8 @@ static int pp_get_current_clocks(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1225,10 +1071,8 @@ static int pp_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struc struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (clocks == NULL) return -EINVAL; @@ -1246,11 +1090,7 @@ static int pp_get_clock_by_type_with_latency(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!clocks) + if (!hwmgr || !hwmgr->pm_en ||!clocks) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1266,11 +1106,7 @@ static int pp_get_clock_by_type_with_voltage(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!clocks) + if (!hwmgr || !hwmgr->pm_en ||!clocks) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1287,11 +1123,7 @@ static int pp_set_watermarks_for_clocks_ranges(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!wm_with_clock_ranges) + if (!hwmgr || !hwmgr->pm_en ||!wm_with_clock_ranges) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1308,11 +1140,7 @@ static int pp_display_clock_voltage_request(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - if (ret) - return ret; - - if (!clock) + if (!hwmgr || !hwmgr->pm_en ||!clock) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1328,12 +1156,7 @@ static int pp_get_display_mode_validation_clocks(void *handle, struct pp_hwmgr *hwmgr = handle; int ret = 0; - ret = pp_check(hwmgr); - - if (ret) - return ret; - - if (clocks == NULL) + if (!hwmgr || !hwmgr->pm_en ||!clocks) return -EINVAL; mutex_lock(&hwmgr->smu_lock); @@ -1348,12 +1171,9 @@ static int pp_get_display_mode_validation_clocks(void *handle, static int pp_set_mmhub_powergating_by_smu(void *handle) { struct pp_hwmgr *hwmgr = handle; - int ret = 0; - - ret = pp_check(hwmgr); - if (ret) - return ret; + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; if (hwmgr->hwmgr_func->set_mmhub_powergating_by_smu == NULL) { pr_info("%s was not implemented.\n", __func__); @@ -1390,7 +1210,6 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .get_vce_clock_state = pp_dpm_get_vce_clock_state, .switch_power_profile = pp_dpm_switch_power_profile, .set_clockgating_by_smu = pp_set_clockgating_by_smu, - .notify_smu_memory_info = pp_dpm_notify_smu_memory_info, .get_power_profile_mode = pp_get_power_profile_mode, .set_power_profile_mode = pp_set_power_profile_mode, .odn_edit_dpm_table = pp_odn_edit_dpm_table, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index ae2e9339dd6b..a0bb921fac22 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -75,8 +75,7 @@ int phm_set_power_state(struct pp_hwmgr *hwmgr, int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr) { - int ret = 1; - bool enabled; + int ret = -EINVAL;; PHM_FUNC_CHECK(hwmgr); if (smum_is_dpm_running(hwmgr)) { @@ -87,17 +86,12 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr) if (NULL != hwmgr->hwmgr_func->dynamic_state_management_enable) ret = hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr); - enabled = ret == 0; - - cgs_notify_dpm_enabled(hwmgr->device, enabled); - return ret; } int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr) { - int ret = -1; - bool enabled; + int ret = -EINVAL; PHM_FUNC_CHECK(hwmgr); @@ -109,10 +103,6 @@ int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr) if (hwmgr->hwmgr_func->dynamic_state_management_disable) ret = hwmgr->hwmgr_func->dynamic_state_management_disable(hwmgr); - enabled = ret == 0 ? false : true; - - cgs_notify_dpm_enabled(hwmgr->device, enabled); - return ret; } @@ -142,6 +132,15 @@ int phm_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, return 0; } +int phm_apply_clock_adjust_rules(struct pp_hwmgr *hwmgr) +{ + PHM_FUNC_CHECK(hwmgr); + + if (hwmgr->hwmgr_func->apply_clocks_adjust_rules != NULL) + return hwmgr->hwmgr_func->apply_clocks_adjust_rules(hwmgr); + return 0; +} + int phm_powerdown_uvd(struct pp_hwmgr *hwmgr) { PHM_FUNC_CHECK(hwmgr); @@ -171,6 +170,16 @@ int phm_disable_clock_power_gatings(struct pp_hwmgr *hwmgr) return 0; } +int phm_pre_display_configuration_changed(struct pp_hwmgr *hwmgr) +{ + PHM_FUNC_CHECK(hwmgr); + + if (NULL != hwmgr->hwmgr_func->pre_display_config_changed) + hwmgr->hwmgr_func->pre_display_config_changed(hwmgr); + + return 0; + +} int phm_display_configuration_changed(struct pp_hwmgr *hwmgr) { @@ -275,13 +284,11 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, if (display_config == NULL) return -EINVAL; - hwmgr->display_config = *display_config; - if (NULL != hwmgr->hwmgr_func->set_deep_sleep_dcefclk) - hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, hwmgr->display_config.min_dcef_deep_sleep_set_clk); + hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk); - for (index = 0; index < hwmgr->display_config.num_path_including_non_display; index++) { - if (hwmgr->display_config.displays[index].controller_id != 0) + for (index = 0; index < display_config->num_path_including_non_display; index++) { + if (display_config->displays[index].controller_id != 0) number_of_active_display++; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 42982055b161..e63bc47dc715 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -40,6 +40,7 @@ extern const struct pp_smumgr_func iceland_smu_funcs; extern const struct pp_smumgr_func tonga_smu_funcs; extern const struct pp_smumgr_func fiji_smu_funcs; extern const struct pp_smumgr_func polaris10_smu_funcs; +extern const struct pp_smumgr_func vegam_smu_funcs; extern const struct pp_smumgr_func vega10_smu_funcs; extern const struct pp_smumgr_func vega12_smu_funcs; extern const struct pp_smumgr_func smu10_smu_funcs; @@ -76,7 +77,7 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr) int hwmgr_early_init(struct pp_hwmgr *hwmgr) { - if (hwmgr == NULL) + if (!hwmgr) return -EINVAL; hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT; @@ -95,7 +96,8 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) hwmgr->smumgr_funcs = &ci_smu_funcs; ci_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK | - PP_ENABLE_GFX_CG_THRU_SMU); + PP_ENABLE_GFX_CG_THRU_SMU | + PP_GFXOFF_MASK); hwmgr->pp_table_version = PP_TABLE_V0; hwmgr->od_enabled = false; smu7_init_function_pointers(hwmgr); @@ -103,9 +105,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) case AMDGPU_FAMILY_CZ: hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &smu8_smu_funcs; + hwmgr->feature_mask &= ~PP_GFXOFF_MASK; smu8_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_VI: + hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_TOPAZ: hwmgr->smumgr_funcs = &iceland_smu_funcs; @@ -133,14 +137,21 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) polaris_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK); break; + case CHIP_VEGAM: + hwmgr->smumgr_funcs = &vegam_smu_funcs; + polaris_set_asic_special_caps(hwmgr); + hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK); + break; default: return -EINVAL; } smu7_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_AI: + hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_VEGA10: + case CHIP_VEGA20: hwmgr->smumgr_funcs = &vega10_smu_funcs; vega10_hwmgr_init(hwmgr); break; @@ -170,22 +181,58 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) return 0; } +int hwmgr_sw_init(struct pp_hwmgr *hwmgr) +{ + if (!hwmgr|| !hwmgr->smumgr_funcs || !hwmgr->smumgr_funcs->smu_init) + return -EINVAL; + + phm_register_irq_handlers(hwmgr); + + return hwmgr->smumgr_funcs->smu_init(hwmgr); +} + + +int hwmgr_sw_fini(struct pp_hwmgr *hwmgr) +{ + if (hwmgr && hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->smu_fini) + hwmgr->smumgr_funcs->smu_fini(hwmgr); + + return 0; +} + int hwmgr_hw_init(struct pp_hwmgr *hwmgr) { int ret = 0; - if (hwmgr == NULL) + if (!hwmgr || !hwmgr->smumgr_funcs) return -EINVAL; - if (hwmgr->pptable_func == NULL || - hwmgr->pptable_func->pptable_init == NULL || - hwmgr->hwmgr_func->backend_init == NULL) - return -EINVAL; + if (hwmgr->smumgr_funcs->start_smu) { + ret = hwmgr->smumgr_funcs->start_smu(hwmgr); + if (ret) { + pr_err("smc start failed\n"); + return -EINVAL; + } + } + + if (!hwmgr->pm_en) + return 0; + + if (!hwmgr->pptable_func || + !hwmgr->pptable_func->pptable_init || + !hwmgr->hwmgr_func->backend_init) { + hwmgr->pm_en = false; + pr_info("dpm not supported \n"); + return 0; + } ret = hwmgr->pptable_func->pptable_init(hwmgr); if (ret) goto err; + ((struct amdgpu_device *)hwmgr->adev)->pm.no_fan = + hwmgr->thermal_controller.fanInfo.bNoFan; + ret = hwmgr->hwmgr_func->backend_init(hwmgr); if (ret) goto err1; @@ -206,6 +253,8 @@ int hwmgr_hw_init(struct pp_hwmgr *hwmgr) if (ret) goto err2; + ((struct amdgpu_device *)hwmgr->adev)->pm.dpm_enabled = true; + return 0; err2: if (hwmgr->hwmgr_func->backend_fini) @@ -214,14 +263,13 @@ err1: if (hwmgr->pptable_func->pptable_fini) hwmgr->pptable_func->pptable_fini(hwmgr); err: - pr_err("amdgpu: powerplay initialization failed\n"); return ret; } int hwmgr_hw_fini(struct pp_hwmgr *hwmgr) { - if (hwmgr == NULL) - return -EINVAL; + if (!hwmgr || !hwmgr->pm_en) + return 0; phm_stop_thermal_controller(hwmgr); psm_set_boot_states(hwmgr); @@ -236,12 +284,12 @@ int hwmgr_hw_fini(struct pp_hwmgr *hwmgr) return psm_fini_power_state_table(hwmgr); } -int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr) +int hwmgr_suspend(struct pp_hwmgr *hwmgr) { int ret = 0; - if (hwmgr == NULL) - return -EINVAL; + if (!hwmgr || !hwmgr->pm_en) + return 0; phm_disable_smc_firmware_ctf(hwmgr); ret = psm_set_boot_states(hwmgr); @@ -255,13 +303,23 @@ int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr) return ret; } -int hwmgr_hw_resume(struct pp_hwmgr *hwmgr) +int hwmgr_resume(struct pp_hwmgr *hwmgr) { int ret = 0; - if (hwmgr == NULL) + if (!hwmgr) return -EINVAL; + if (hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->start_smu) { + if (hwmgr->smumgr_funcs->start_smu(hwmgr)) { + pr_err("smc start failed\n"); + return -EINVAL; + } + } + + if (!hwmgr->pm_en) + return 0; + ret = phm_setup_asic(hwmgr); if (ret) return ret; @@ -270,9 +328,6 @@ int hwmgr_hw_resume(struct pp_hwmgr *hwmgr) if (ret) return ret; ret = phm_start_thermal_controller(hwmgr); - if (ret) - return ret; - ret |= psm_set_performance_states(hwmgr); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c index 0f2851b5b368..323990b77ead 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c @@ -46,7 +46,7 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr) sizeof(struct pp_power_state); if (table_entries == 0 || size == 0) { - pr_warn("Please check whether power state management is suppported on this asic\n"); + pr_warn("Please check whether power state management is supported on this asic\n"); return 0; } @@ -265,10 +265,18 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip, if (skip) return 0; + phm_pre_display_configuration_changed(hwmgr); + phm_display_configuration_changed(hwmgr); if (hwmgr->ps) power_state_management(hwmgr, new_ps); + else + /* + * for vega12/vega20 which does not support power state manager + * DAL clock limits should also be honoured + */ + phm_apply_clock_adjust_rules(hwmgr); phm_notify_smc_display_config_after_ps_adjustment(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c index c6febbf0bf69..7047e29755c3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c @@ -23,7 +23,8 @@ #include "pp_debug.h" #include <linux/module.h> #include <linux/slab.h> - +#include <linux/delay.h> +#include "atom.h" #include "ppatomctrl.h" #include "atombios.h" #include "cgs_common.h" @@ -128,7 +129,6 @@ static int atomctrl_set_mc_reg_address_table( return 0; } - int atomctrl_initialize_mc_reg_table( struct pp_hwmgr *hwmgr, uint8_t module_index, @@ -141,7 +141,7 @@ int atomctrl_initialize_mc_reg_table( u16 size; vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev); if (module_index >= vram_info->ucNumOfVRAMModule) { @@ -174,6 +174,8 @@ int atomctrl_set_engine_dram_timings_rv770( uint32_t engine_clock, uint32_t memory_clock) { + struct amdgpu_device *adev = hwmgr->adev; + SET_ENGINE_CLOCK_PS_ALLOCATION engine_clock_parameters; /* They are both in 10KHz Units. */ @@ -184,9 +186,10 @@ int atomctrl_set_engine_dram_timings_rv770( /* in 10 khz units.*/ engine_clock_parameters.sReserved.ulClock = cpu_to_le32(memory_clock & SET_CLOCK_FREQ_MASK); - return cgs_atom_exec_cmd_table(hwmgr->device, + + return amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings), - &engine_clock_parameters); + (uint32_t *)&engine_clock_parameters); } /** @@ -203,7 +206,7 @@ static ATOM_VOLTAGE_OBJECT_INFO *get_voltage_info_table(void *device) union voltage_object_info *voltage_info; voltage_info = (union voltage_object_info *) - cgs_atom_get_data_table(device, index, + smu_atom_get_data_table(device, index, &size, &frev, &crev); if (voltage_info != NULL) @@ -247,16 +250,16 @@ int atomctrl_get_memory_pll_dividers_si( pp_atomctrl_memory_clock_param *mpll_param, bool strobe_mode) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 mpll_parameters; int result; mpll_parameters.ulClock = cpu_to_le32(clock_value); mpll_parameters.ucInputFlag = (uint8_t)((strobe_mode) ? 1 : 0); - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam), - &mpll_parameters); + (uint32_t *)&mpll_parameters); if (0 == result) { mpll_param->mpll_fb_divider.clk_frac = @@ -295,14 +298,15 @@ int atomctrl_get_memory_pll_dividers_si( int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 mpll_parameters; int result; mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam), - &mpll_parameters); + (uint32_t *)&mpll_parameters); if (!result) mpll_param->mpll_post_divider = @@ -311,19 +315,49 @@ int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr, return result; } +int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr, + uint32_t clock_value, + pp_atomctrl_memory_clock_param_ai *mpll_param) +{ + struct amdgpu_device *adev = hwmgr->adev; + COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3 mpll_parameters = {{0}, 0, 0}; + int result; + + mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value); + + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, + GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam), + (uint32_t *)&mpll_parameters); + + /* VEGAM's mpll takes sometime to finish computing */ + udelay(10); + + if (!result) { + mpll_param->ulMclk_fcw_int = + le16_to_cpu(mpll_parameters.usMclk_fcw_int); + mpll_param->ulMclk_fcw_frac = + le16_to_cpu(mpll_parameters.usMclk_fcw_frac); + mpll_param->ulClock = + le32_to_cpu(mpll_parameters.ulClock.ulClock); + mpll_param->ulPostDiv = mpll_parameters.ulClock.ucPostDiv; + } + + return result; +} + int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_clock_dividers_kong *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 pll_parameters; int result; pll_parameters.ulClock = cpu_to_le32(clock_value); - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_parameters); + (uint32_t *)&pll_parameters); if (0 == result) { dividers->pll_post_divider = pll_parameters.ucPostDiv; @@ -338,16 +372,16 @@ int atomctrl_get_engine_pll_dividers_vi( uint32_t clock_value, pp_atomctrl_clock_dividers_vi *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters; int result; pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value); pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_patameters); + (uint32_t *)&pll_patameters); if (0 == result) { dividers->pll_post_divider = @@ -375,16 +409,16 @@ int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_clock_dividers_ai *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7 pll_patameters; int result; pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value); pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_patameters); + (uint32_t *)&pll_patameters); if (0 == result) { dividers->usSclk_fcw_frac = le16_to_cpu(pll_patameters.usSclk_fcw_frac); @@ -407,6 +441,7 @@ int atomctrl_get_dfs_pll_dividers_vi( uint32_t clock_value, pp_atomctrl_clock_dividers_vi *dividers) { + struct amdgpu_device *adev = hwmgr->adev; COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters; int result; @@ -414,10 +449,9 @@ int atomctrl_get_dfs_pll_dividers_vi( pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL), - &pll_patameters); + (uint32_t *)&pll_patameters); if (0 == result) { dividers->pll_post_divider = @@ -452,7 +486,7 @@ uint32_t atomctrl_get_reference_clock(struct pp_hwmgr *hwmgr) uint32_t clock; fw_info = (ATOM_FIRMWARE_INFO *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); @@ -476,7 +510,7 @@ bool atomctrl_is_voltage_controlled_by_gpio_v3( uint8_t voltage_mode) { ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info = - (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device); + (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev); bool ret; PP_ASSERT_WITH_CODE((NULL != voltage_info), @@ -495,7 +529,7 @@ int atomctrl_get_voltage_table_v3( pp_atomctrl_voltage_table *voltage_table) { ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info = - (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device); + (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev); const ATOM_VOLTAGE_OBJECT_V3 *voltage_object; unsigned int i; @@ -572,7 +606,7 @@ static ATOM_GPIO_PIN_LUT *get_gpio_lookup_table(void *device) void *table_address; table_address = (ATOM_GPIO_PIN_LUT *) - cgs_atom_get_data_table(device, + smu_atom_get_data_table(device, GetIndexIntoMasterTable(DATA, GPIO_Pin_LUT), &size, &frev, &crev); @@ -592,7 +626,7 @@ bool atomctrl_get_pp_assign_pin( { bool bRet = false; ATOM_GPIO_PIN_LUT *gpio_lookup_table = - get_gpio_lookup_table(hwmgr->device); + get_gpio_lookup_table(hwmgr->adev); PP_ASSERT_WITH_CODE((NULL != gpio_lookup_table), "Could not find GPIO lookup Table in BIOS.", return false); @@ -613,7 +647,7 @@ int atomctrl_calculate_voltage_evv_on_sclk( bool debug) { ATOM_ASIC_PROFILING_INFO_V3_4 *getASICProfilingInfo; - + struct amdgpu_device *adev = hwmgr->adev; EFUSE_LINEAR_FUNC_PARAM sRO_fuse; EFUSE_LINEAR_FUNC_PARAM sCACm_fuse; EFUSE_LINEAR_FUNC_PARAM sCACb_fuse; @@ -640,7 +674,7 @@ int atomctrl_calculate_voltage_evv_on_sclk( int result; getASICProfilingInfo = (ATOM_ASIC_PROFILING_INFO_V3_4 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo), NULL, NULL, NULL); @@ -706,9 +740,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -727,9 +761,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -747,9 +781,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sInput_FuseValues.ucBitLength = sCACb_fuse.ucEfuseLength; sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -768,9 +802,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -790,9 +824,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -811,9 +845,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sInput_FuseValues.ucBitLength = sKv_b_fuse.ucEfuseLength; sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -842,9 +876,9 @@ int atomctrl_calculate_voltage_evv_on_sclk( sOutput_FuseValues.sEfuse = sInput_FuseValues; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &sOutput_FuseValues); + (uint32_t *)&sOutput_FuseValues); if (result) return result; @@ -1053,8 +1087,9 @@ int atomctrl_get_voltage_evv_on_sclk( uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage) { - int result; + struct amdgpu_device *adev = hwmgr->adev; GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space; + int result; get_voltage_info_param_space.ucVoltageType = voltage_type; @@ -1065,14 +1100,12 @@ int atomctrl_get_voltage_evv_on_sclk( get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, GetVoltageInfo), - &get_voltage_info_param_space); - - if (0 != result) - return result; + (uint32_t *)&get_voltage_info_param_space); - *voltage = le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *) + *voltage = result ? 0 : + le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *) (&get_voltage_info_param_space))->usVoltageLevel); return result; @@ -1088,9 +1121,10 @@ int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr, uint16_t virtual_voltage_id, uint16_t *voltage) { + struct amdgpu_device *adev = hwmgr->adev; + GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space; int result; int entry_id; - GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space; /* search for leakage voltage ID 0xff01 ~ 0xff08 and sckl */ for (entry_id = 0; entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count; entry_id++) { @@ -1111,9 +1145,9 @@ int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr, get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].clk); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, GetVoltageInfo), - &get_voltage_info_param_space); + (uint32_t *)&get_voltage_info_param_space); if (0 != result) return result; @@ -1135,7 +1169,7 @@ uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr) u16 size; fw_info = (ATOM_COMMON_TABLE_HEADER *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); @@ -1167,7 +1201,7 @@ static ATOM_ASIC_INTERNAL_SS_INFO *asic_internal_ss_get_ss_table(void *device) u16 size; table = (ATOM_ASIC_INTERNAL_SS_INFO *) - cgs_atom_get_data_table(device, + smu_atom_get_data_table(device, GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info), &size, &frev, &crev); @@ -1188,7 +1222,7 @@ static int asic_internal_ss_get_ss_asignment(struct pp_hwmgr *hwmgr, memset(ssEntry, 0x00, sizeof(pp_atomctrl_internal_ss_info)); - table = asic_internal_ss_get_ss_table(hwmgr->device); + table = asic_internal_ss_get_ss_table(hwmgr->adev); if (NULL == table) return -1; @@ -1260,9 +1294,10 @@ int atomctrl_get_engine_clock_spread_spectrum( ASIC_INTERNAL_ENGINE_SS, engine_clock, ssInfo); } -int atomctrl_read_efuse(void *device, uint16_t start_index, +int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index, uint16_t end_index, uint32_t mask, uint32_t *efuse) { + struct amdgpu_device *adev = hwmgr->adev; int result; READ_EFUSE_VALUE_PARAMETER efuse_param; @@ -1272,11 +1307,10 @@ int atomctrl_read_efuse(void *device, uint16_t start_index, efuse_param.sEfuse.ucBitLength = (uint8_t) ((end_index - start_index) + 1); - result = cgs_atom_exec_cmd_table(device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, ReadEfuseValue), - &efuse_param); - if (!result) - *efuse = le32_to_cpu(efuse_param.ulEfuseValue) & mask; + (uint32_t *)&efuse_param); + *efuse = result ? 0 : le32_to_cpu(efuse_param.ulEfuseValue) & mask; return result; } @@ -1284,6 +1318,7 @@ int atomctrl_read_efuse(void *device, uint16_t start_index, int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, uint8_t level) { + struct amdgpu_device *adev = hwmgr->adev; DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1 memory_clock_parameters; int result; @@ -1293,10 +1328,9 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, ADJUST_MC_SETTING_PARAM; memory_clock_parameters.asDPMMCReg.ucMclkDPMState = level; - result = cgs_atom_exec_cmd_table - (hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings), - &memory_clock_parameters); + (uint32_t *)&memory_clock_parameters); return result; } @@ -1304,7 +1338,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock, int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage) { - + struct amdgpu_device *adev = hwmgr->adev; int result; GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3 get_voltage_info_param_space; @@ -1313,15 +1347,12 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_ get_voltage_info_param_space.usVoltageLevel = cpu_to_le16(virtual_voltage_Id); get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk); - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, GetVoltageInfo), - &get_voltage_info_param_space); + (uint32_t *)&get_voltage_info_param_space); - if (0 != result) - return result; - - *voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *) - (&get_voltage_info_param_space))->ulVoltageLevel); + *voltage = result ? 0 : + le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel); return result; } @@ -1334,7 +1365,7 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr u16 size; ATOM_SMU_INFO_V2_1 *psmu_info = - (ATOM_SMU_INFO_V2_1 *)cgs_atom_get_data_table(hwmgr->device, + (ATOM_SMU_INFO_V2_1 *)smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, SMU_Info), &size, &frev, &crev); @@ -1362,7 +1393,7 @@ int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, return -EINVAL; profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo), NULL, NULL, NULL); if (!profile) @@ -1402,7 +1433,7 @@ int atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint16_t *load_line) { ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info = - (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device); + (ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev); const ATOM_VOLTAGE_OBJECT_V3 *voltage_object; @@ -1421,16 +1452,17 @@ int atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type, int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id) { - int result; + struct amdgpu_device *adev = hwmgr->adev; SET_VOLTAGE_PS_ALLOCATION allocation; SET_VOLTAGE_PARAMETERS_V1_3 *voltage_parameters = (SET_VOLTAGE_PARAMETERS_V1_3 *)&allocation.sASICSetVoltage; + int result; voltage_parameters->ucVoltageMode = ATOM_GET_LEAKAGE_ID; - result = cgs_atom_exec_cmd_table(hwmgr->device, + result = amdgpu_atom_execute_table(adev->mode_info.atom_context, GetIndexIntoMasterTable(COMMAND, SetVoltage), - voltage_parameters); + (uint32_t *)voltage_parameters); *virtual_voltage_id = voltage_parameters->usVoltageLevel; @@ -1453,7 +1485,7 @@ int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr, ix = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo); profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, ix, NULL, NULL, NULL); if (!profile) @@ -1498,3 +1530,33 @@ int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr, return 0; } + +void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc, + uint32_t *min_vddc) +{ + void *profile; + + profile = smu_atom_get_data_table(hwmgr->adev, + GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo), + NULL, NULL, NULL); + + if (profile) { + switch (hwmgr->chip_id) { + case CHIP_TONGA: + case CHIP_FIJI: + *max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMaxVddc/4); + *min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMinVddc/4); + return; + case CHIP_POLARIS11: + case CHIP_POLARIS10: + case CHIP_POLARIS12: + *max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMaxVddc/100); + *min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMinVddc/100); + return; + default: + break; + } + } + *max_vddc = 0; + *min_vddc = 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h index c44a92064cf1..3ee54f182943 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h @@ -146,6 +146,14 @@ struct pp_atomctrl_memory_clock_param { }; typedef struct pp_atomctrl_memory_clock_param pp_atomctrl_memory_clock_param; +struct pp_atomctrl_memory_clock_param_ai { + uint32_t ulClock; + uint32_t ulPostDiv; + uint16_t ulMclk_fcw_frac; + uint16_t ulMclk_fcw_int; +}; +typedef struct pp_atomctrl_memory_clock_param_ai pp_atomctrl_memory_clock_param_ai; + struct pp_atomctrl_internal_ss_info { uint32_t speed_spectrum_percentage; /* in 1/100 percentage */ uint32_t speed_spectrum_rate; /* in KHz */ @@ -295,10 +303,12 @@ extern bool atomctrl_is_voltage_controlled_by_gpio_v3(struct pp_hwmgr *hwmgr, ui extern int atomctrl_get_voltage_table_v3(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint8_t voltage_mode, pp_atomctrl_voltage_table *voltage_table); extern int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param); +extern int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr, + uint32_t clock_value, pp_atomctrl_memory_clock_param_ai *mpll_param); extern int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr, uint32_t clock_value, pp_atomctrl_clock_dividers_kong *dividers); -extern int atomctrl_read_efuse(void *device, uint16_t start_index, +extern int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index, uint16_t end_index, uint32_t mask, uint32_t *efuse); extern int atomctrl_calculate_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage, uint16_t dpm_level, bool debug); @@ -320,5 +330,8 @@ extern int atomctrl_get_leakage_vddc_base_on_leakage(struct pp_hwmgr *hwmgr, uint16_t virtual_voltage_id, uint16_t efuse_voltage_id); extern int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id); + +extern void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc, + uint32_t *min_vddc); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c index ad42caac033e..5325661fedff 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c @@ -23,9 +23,9 @@ #include "ppatomfwctrl.h" #include "atomfirmware.h" +#include "atom.h" #include "pp_debug.h" - static const union atom_voltage_object_v4 *pp_atomfwctrl_lookup_voltage_type_v4( const struct atom_voltage_objects_info_v4_1 *voltage_object_info_table, uint8_t voltage_type, uint8_t voltage_mode) @@ -38,35 +38,34 @@ static const union atom_voltage_object_v4 *pp_atomfwctrl_lookup_voltage_type_v4( while (offset < size) { const union atom_voltage_object_v4 *voltage_object = - (const union atom_voltage_object_v4 *)(start + offset); + (const union atom_voltage_object_v4 *)(start + offset); - if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type && - voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode) - return voltage_object; + if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type && + voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode) + return voltage_object; - offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size); + offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size); - } + } - return NULL; + return NULL; } static struct atom_voltage_objects_info_v4_1 *pp_atomfwctrl_get_voltage_info_table( struct pp_hwmgr *hwmgr) { - const void *table_address; - uint16_t idx; + const void *table_address; + uint16_t idx; - idx = GetIndexIntoMasterDataTable(voltageobject_info); - table_address = cgs_atom_get_data_table(hwmgr->device, - idx, NULL, NULL, NULL); + idx = GetIndexIntoMasterDataTable(voltageobject_info); + table_address = smu_atom_get_data_table(hwmgr->adev, + idx, NULL, NULL, NULL); - PP_ASSERT_WITH_CODE( - table_address, - "Error retrieving BIOS Table Address!", - return NULL); + PP_ASSERT_WITH_CODE(table_address, + "Error retrieving BIOS Table Address!", + return NULL); - return (struct atom_voltage_objects_info_v4_1 *)table_address; + return (struct atom_voltage_objects_info_v4_1 *)table_address; } /** @@ -167,7 +166,7 @@ static struct atom_gpio_pin_lut_v2_1 *pp_atomfwctrl_get_gpio_lookup_table( uint16_t idx; idx = GetIndexIntoMasterDataTable(gpio_pin_lut); - table_address = cgs_atom_get_data_table(hwmgr->device, + table_address = smu_atom_get_data_table(hwmgr->adev, idx, NULL, NULL, NULL); PP_ASSERT_WITH_CODE(table_address, "Error retrieving BIOS Table Address!", @@ -248,28 +247,30 @@ int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr, uint32_t clock_type, uint32_t clock_value, struct pp_atomfwctrl_clock_dividers_soc15 *dividers) { + struct amdgpu_device *adev = hwmgr->adev; struct compute_gpu_clock_input_parameter_v1_8 pll_parameters; struct compute_gpu_clock_output_parameter_v1_8 *pll_output; - int result; uint32_t idx; pll_parameters.gpuclock_10khz = (uint32_t)clock_value; pll_parameters.gpu_clock_type = clock_type; idx = GetIndexIntoMasterCmdTable(computegpuclockparam); - result = cgs_atom_exec_cmd_table(hwmgr->device, idx, &pll_parameters); - - if (!result) { - pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *) - &pll_parameters; - dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz); - dividers->ulDid = le32_to_cpu(pll_output->dfs_did); - dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult); - dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult); - dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac); - dividers->ucPll_ss_enable = pll_output->pll_ss_enable; - } - return result; + + if (amdgpu_atom_execute_table( + adev->mode_info.atom_context, idx, (uint32_t *)&pll_parameters)) + return -EINVAL; + + pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *) + &pll_parameters; + dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz); + dividers->ulDid = le32_to_cpu(pll_output->dfs_did); + dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult); + dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult); + dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac); + dividers->ucPll_ss_enable = pll_output->pll_ss_enable; + + return 0; } int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr, @@ -283,7 +284,7 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr, idx = GetIndexIntoMasterDataTable(asic_profiling_info); profile = (struct atom_asic_profiling_info_v4_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, idx, NULL, NULL, NULL); if (!profile) @@ -467,7 +468,7 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr, idx = GetIndexIntoMasterDataTable(smu_info); info = (struct atom_smu_info_v3_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, idx, NULL, NULL, NULL); if (!info) { @@ -487,23 +488,26 @@ int pp_atomfwctrl_get_gpio_information(struct pp_hwmgr *hwmgr, return 0; } -int pp_atomfwctrl__get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency) +int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency) { + struct amdgpu_device *adev = hwmgr->adev; struct atom_get_smu_clock_info_parameters_v3_1 parameters; struct atom_get_smu_clock_info_output_parameters_v3_1 *output; uint32_t ix; parameters.clk_id = id; + parameters.syspll_id = 0; parameters.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ; + parameters.dfsdid = 0; ix = GetIndexIntoMasterCmdTable(getsmuclockinfo); - if (!cgs_atom_exec_cmd_table(hwmgr->device, ix, ¶meters)) { - output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)¶meters; - *frequency = output->atom_smu_outputclkfreq.smu_clock_freq_hz / 10000; - } else { - pr_info("Error execute_table getsmuclockinfo!"); - return -1; - } + + if (amdgpu_atom_execute_table( + adev->mode_info.atom_context, ix, (uint32_t *)¶meters)) + return -EINVAL; + + output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)¶meters; + *frequency = le32_to_cpu(output->atom_smu_outputclkfreq.smu_clock_freq_hz) / 10000; return 0; } @@ -513,11 +517,10 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, { struct atom_firmware_info_v3_1 *info = NULL; uint16_t ix; - uint32_t frequency = 0; ix = GetIndexIntoMasterDataTable(firmwareinfo); info = (struct atom_firmware_info_v3_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, ix, NULL, NULL, NULL); if (!info) { @@ -536,12 +539,6 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, boot_values->ulSocClk = 0; boot_values->ulDCEFClk = 0; - if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, &frequency)) - boot_values->ulSocClk = frequency; - - if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, &frequency)) - boot_values->ulDCEFClk = frequency; - return 0; } @@ -553,7 +550,7 @@ int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr, ix = GetIndexIntoMasterDataTable(smc_dpm_info); info = (struct atom_smc_dpm_info_v4_1 *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, ix, NULL, NULL, NULL); if (!info) { pr_info("Error retrieving BIOS Table Address!"); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h index 8df1e84f27c9..fe10aa4db5e6 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h @@ -230,6 +230,8 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, struct pp_atomfwctrl_bios_boot_up_values *boot_values); int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr, struct pp_atomfwctrl_smc_dpm_parameters *param); +int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, + BIOS_CLKID id, uint32_t *frequency); #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index c9eecce5683f..35bd9870ab10 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -141,7 +141,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr) if (!table_address) { table_address = (ATOM_Tonga_POWERPLAYTABLE *) - cgs_atom_get_data_table(hwmgr->device, + smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/ hwmgr->soft_pp_table_size = size; @@ -728,6 +728,32 @@ static int get_mm_clock_voltage_table( return 0; } +static int get_gpio_table(struct pp_hwmgr *hwmgr, + struct phm_ppt_v1_gpio_table **pp_tonga_gpio_table, + const ATOM_Tonga_GPIO_Table *atom_gpio_table) +{ + uint32_t table_size; + struct phm_ppt_v1_gpio_table *pp_gpio_table; + struct phm_ppt_v1_information *pp_table_information = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + table_size = sizeof(struct phm_ppt_v1_gpio_table); + pp_gpio_table = kzalloc(table_size, GFP_KERNEL); + if (!pp_gpio_table) + return -ENOMEM; + + if (pp_table_information->vdd_dep_on_sclk->count < + atom_gpio_table->ucVRHotTriggeredSclkDpmIndex) + PP_ASSERT_WITH_CODE(false, + "SCLK DPM index for VRHot cannot exceed the total sclk level count!",); + else + pp_gpio_table->vrhot_triggered_sclk_dpm_index = + atom_gpio_table->ucVRHotTriggeredSclkDpmIndex; + + *pp_tonga_gpio_table = pp_gpio_table; + + return 0; +} /** * Private Function used during initialization. * Initialize clock voltage dependency @@ -761,11 +787,15 @@ static int init_clock_voltage_dependency( const PPTable_Generic_SubTable_Header *pcie_table = (const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) + le16_to_cpu(powerplay_table->usPCIETableOffset)); + const ATOM_Tonga_GPIO_Table *gpio_table = + (const ATOM_Tonga_GPIO_Table *)(((unsigned long) powerplay_table) + + le16_to_cpu(powerplay_table->usGPIOTableOffset)); pp_table_information->vdd_dep_on_sclk = NULL; pp_table_information->vdd_dep_on_mclk = NULL; pp_table_information->mm_dep_table = NULL; pp_table_information->pcie_table = NULL; + pp_table_information->gpio_table = NULL; if (powerplay_table->usMMDependencyTableOffset != 0) result = get_mm_clock_voltage_table(hwmgr, @@ -810,6 +840,10 @@ static int init_clock_voltage_dependency( result = get_valid_clk(hwmgr, &pp_table_information->valid_sclk_values, pp_table_information->vdd_dep_on_sclk); + if (!result && gpio_table) + result = get_gpio_table(hwmgr, &pp_table_information->gpio_table, + gpio_table); + return result; } @@ -836,12 +870,6 @@ static int init_over_drive_limits( hwmgr->platform_descriptor.maxOverdriveVDDC = 0; hwmgr->platform_descriptor.overdriveVDDCStep = 0; - if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0 \ - || hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) { - hwmgr->od_enabled = false; - pr_debug("OverDrive feature not support by VBIOS\n"); - } - return 0; } @@ -1116,6 +1144,9 @@ static int pp_tables_v1_0_uninitialize(struct pp_hwmgr *hwmgr) kfree(pp_table_information->pcie_table); pp_table_information->pcie_table = NULL; + kfree(pp_table_information->gpio_table); + pp_table_information->gpio_table = NULL; + kfree(hwmgr->pptable); hwmgr->pptable = NULL; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c index 36ca7c419c90..925e17104f90 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c @@ -837,7 +837,7 @@ static const ATOM_PPLIB_POWERPLAYTABLE *get_powerplay_table( hwmgr->soft_pp_table = &soft_dummy_pp_table[0]; hwmgr->soft_pp_table_size = sizeof(soft_dummy_pp_table); } else { - table_addr = cgs_atom_get_data_table(hwmgr->device, + table_addr = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, PowerPlayInfo), &size, &frev, &crev); hwmgr->soft_pp_table = table_addr; @@ -1058,7 +1058,7 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, return 0; /* We assume here that fw_info is unchanged if this call fails.*/ - fw_info = cgs_atom_get_data_table(hwmgr->device, + fw_info = smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, FirmwareInfo), &size, &frev, &crev); @@ -1074,12 +1074,6 @@ static int init_overdrive_limits(struct pp_hwmgr *hwmgr, powerplay_table, (const ATOM_FIRMWARE_INFO_V2_1 *)fw_info); - if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0 - && hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) { - hwmgr->od_enabled = false; - pr_debug("OverDrive feature not support by VBIOS\n"); - } - return result; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 10253b89b3d8..d4bc83e81389 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -34,7 +34,7 @@ #include "rv_ppsmc.h" #include "smu10_hwmgr.h" #include "power_state.h" -#include "pp_soc15.h" +#include "soc15_common.h" #define SMU10_MAX_DEEPSLEEP_DIVIDER_ID 5 #define SMU10_MINIMUM_ENGINE_CLOCK 800 /* 8Mhz, the low boundary of engine clock allowed on this chip */ @@ -42,12 +42,48 @@ #define SMU10_DISPCLK_BYPASS_THRESHOLD 10000 /* 100Mhz */ #define SMC_RAM_END 0x40000 +#define mmPWR_MISC_CNTL_STATUS 0x0183 +#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L + static const unsigned long SMU10_Magic = (unsigned long) PHM_Rv_Magic; static int smu10_display_clock_voltage_request(struct pp_hwmgr *hwmgr, - struct pp_display_clock_request *clock_req); + struct pp_display_clock_request *clock_req) +{ + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + enum amd_pp_clock_type clk_type = clock_req->clock_type; + uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; + PPSMC_Msg msg; + switch (clk_type) { + case amd_pp_dcf_clock: + if (clk_freq == smu10_data->dcf_actual_hard_min_freq) + return 0; + msg = PPSMC_MSG_SetHardMinDcefclkByFreq; + smu10_data->dcf_actual_hard_min_freq = clk_freq; + break; + case amd_pp_soc_clock: + msg = PPSMC_MSG_SetHardMinSocclkByFreq; + break; + case amd_pp_f_clock: + if (clk_freq == smu10_data->f_actual_hard_min_freq) + return 0; + smu10_data->f_actual_hard_min_freq = clk_freq; + msg = PPSMC_MSG_SetHardMinFclkByFreq; + break; + default: + pr_info("[DisplayClockVoltageRequest]Invalid Clock Type!"); + return -EINVAL; + } + smum_send_msg_to_smc_with_parameter(hwmgr, msg, clk_freq); + + return 0; +} static struct smu10_power_state *cast_smu10_ps(struct pp_hw_power_state *hw_ps) { @@ -74,11 +110,15 @@ static int smu10_initialize_dpm_defaults(struct pp_hwmgr *hwmgr) smu10_data->thermal_auto_throttling_treshold = 0; smu10_data->is_nb_dpm_enabled = 1; smu10_data->dpm_flags = 1; - smu10_data->gfx_off_controled_by_driver = false; smu10_data->need_min_deep_sleep_dcefclk = true; smu10_data->num_active_display = 0; smu10_data->deep_sleep_dcefclk = 0; + if (hwmgr->feature_mask & PP_GFXOFF_MASK) + smu10_data->gfx_off_controled_by_driver = true; + else + smu10_data->gfx_off_controled_by_driver = false; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep); @@ -161,7 +201,7 @@ static int smu10_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input) struct PP_Clocks clocks = {0}; struct pp_display_clock_request clock_req; - clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk; + clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; clock_req.clock_type = amd_pp_dcf_clock; clock_req.clock_freq_in_khz = clocks.dcefClock * 10; @@ -206,12 +246,18 @@ static int smu10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input static int smu10_init_power_gate_state(struct pp_hwmgr *hwmgr) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + struct amdgpu_device *adev = hwmgr->adev; smu10_data->vcn_power_gated = true; smu10_data->isp_tileA_power_gated = true; smu10_data->isp_tileB_power_gated = true; - return 0; + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) + return smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetGfxCGPG, + true); + else + return 0; } @@ -237,19 +283,37 @@ static int smu10_power_off_asic(struct pp_hwmgr *hwmgr) return smu10_reset_cc6_data(hwmgr); } +static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr) +{ + uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; + + reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS); + if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) == + (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT)) + return true; + + return false; +} + static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); - if (smu10_data->gfx_off_controled_by_driver) + if (smu10_data->gfx_off_controled_by_driver) { smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff); + /* confirm gfx is back to "on" state */ + while (!smu10_is_gfx_on(hwmgr)) + msleep(1); + } + return 0; } static int smu10_disable_dpm_tasks(struct pp_hwmgr *hwmgr) { - return smu10_disable_gfx_off(hwmgr); + return 0; } static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr) @@ -264,7 +328,15 @@ static int smu10_enable_gfx_off(struct pp_hwmgr *hwmgr) static int smu10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) { - return smu10_enable_gfx_off(hwmgr); + return 0; +} + +static int smu10_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable) +{ + if (enable) + return smu10_enable_gfx_off(hwmgr); + else + return smu10_disable_gfx_off(hwmgr); } static int smu10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, @@ -340,7 +412,7 @@ static int smu10_get_clock_voltage_dependency_table(struct pp_hwmgr *hwmgr, static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr) { - int result; + uint32_t result; struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); DpmClocks_t *table = &(smu10_data->clock_table); @@ -386,11 +458,11 @@ static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr) smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency); result = smum_get_argument(hwmgr); - smu10_data->gfx_min_freq_limit = result * 100; + smu10_data->gfx_min_freq_limit = result / 10 * 1000; smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency); result = smum_get_argument(hwmgr); - smu10_data->gfx_max_freq_limit = result * 100; + smu10_data->gfx_max_freq_limit = result / 10 * 1000; return 0; } @@ -436,8 +508,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) hwmgr->platform_descriptor.minimumClocksReductionPercentage = 50; - hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK; - hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK; + hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK * 100; + hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK * 100; return result; } @@ -472,6 +544,8 @@ static int smu10_hwmgr_backend_fini(struct pp_hwmgr *hwmgr) static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level) { + struct smu10_hwmgr *data = hwmgr->backend; + if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); return 0; @@ -482,7 +556,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_PEAK_GFXCLK); + data->gfx_max_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, SMU10_UMD_PSTATE_PEAK_FCLK); @@ -495,7 +569,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_PEAK_GFXCLK); + data->gfx_max_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, SMU10_UMD_PSTATE_PEAK_FCLK); @@ -509,10 +583,10 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: smum_send_msg_to_smc_with_parameter(hwmgr, @@ -552,10 +626,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_AUTO: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, + hwmgr->display_config->num_display > 3 ? + SMU10_UMD_PSTATE_PEAK_FCLK : SMU10_UMD_PSTATE_MIN_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, SMU10_UMD_PSTATE_MIN_SOCCLK); @@ -565,7 +642,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_PEAK_GFXCLK); + data->gfx_max_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, SMU10_UMD_PSTATE_PEAK_FCLK); @@ -579,10 +656,10 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, case AMD_DPM_FORCED_LEVEL_LOW: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxGfxClk, - SMU10_UMD_PSTATE_MIN_GFXCLK); + data->gfx_min_freq_limit/100); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, SMU10_UMD_PSTATE_MIN_FCLK); @@ -699,6 +776,16 @@ static int smu10_set_cpu_power_state(struct pp_hwmgr *hwmgr) static int smu10_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time, bool cc6_disable, bool pstate_disable, bool pstate_switch_disable) { + struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (separation_time != data->separation_time || + cc6_disable != data->cc6_disable || + pstate_disable != data->pstate_disable) { + data->separation_time = separation_time; + data->cc6_disable = cc6_disable; + data->pstate_disable = pstate_disable; + data->cc6_setting_changed = true; + } return 0; } @@ -711,6 +798,51 @@ static int smu10_get_dal_power_level(struct pp_hwmgr *hwmgr, static int smu10_force_clock_level(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask) { + struct smu10_hwmgr *data = hwmgr->backend; + struct smu10_voltage_dependency_table *mclk_table = + data->clock_vol_info.vdd_dep_on_fclk; + uint32_t low, high; + + low = mask ? (ffs(mask) - 1) : 0; + high = mask ? (fls(mask) - 1) : 0; + + switch (type) { + case PP_SCLK: + if (low > 2 || high > 2) { + pr_info("Currently sclk only support 3 levels on RV\n"); + return -EINVAL; + } + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + low == 2 ? data->gfx_max_freq_limit/100 : + low == 1 ? SMU10_UMD_PSTATE_GFXCLK : + data->gfx_min_freq_limit/100); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + high == 0 ? data->gfx_min_freq_limit/100 : + high == 1 ? SMU10_UMD_PSTATE_GFXCLK : + data->gfx_max_freq_limit/100); + break; + + case PP_MCLK: + if (low > mclk_table->count - 1 || high > mclk_table->count - 1) + return -EINVAL; + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + mclk_table->entries[low].clk/100); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxFclkByFreq, + mclk_table->entries[high].clk/100); + break; + + case PP_PCIE: + default: + break; + } return 0; } @@ -720,21 +852,30 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend); struct smu10_voltage_dependency_table *mclk_table = data->clock_vol_info.vdd_dep_on_fclk; - int i, now, size = 0; + uint32_t i, now, size = 0; switch (type) { case PP_SCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency); now = smum_get_argument(hwmgr); + /* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */ + if (now == data->gfx_max_freq_limit/100) + i = 2; + else if (now == data->gfx_min_freq_limit/100) + i = 0; + else + i = 1; + size += sprintf(buf + size, "0: %uMhz %s\n", - data->gfx_min_freq_limit / 100, - ((data->gfx_min_freq_limit / 100) - == now) ? "*" : ""); + data->gfx_min_freq_limit/100, + i == 0 ? "*" : ""); size += sprintf(buf + size, "1: %uMhz %s\n", - data->gfx_max_freq_limit / 100, - ((data->gfx_max_freq_limit / 100) - == now) ? "*" : ""); + i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK, + i == 1 ? "*" : ""); + size += sprintf(buf + size, "2: %uMhz %s\n", + data->gfx_max_freq_limit/100, + i == 2 ? "*" : ""); break; case PP_MCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency); @@ -888,6 +1029,12 @@ static int smu10_get_clock_by_type_with_voltage(struct pp_hwmgr *hwmgr, case amd_pp_soc_clock: pclk_vol_table = pinfo->vdd_dep_on_socclk; break; + case amd_pp_disp_clock: + pclk_vol_table = pinfo->vdd_dep_on_dispclk; + break; + case amd_pp_phy_clock: + pclk_vol_table = pinfo->vdd_dep_on_phyclk; + break; default: return -EINVAL; } @@ -905,39 +1052,7 @@ static int smu10_get_clock_by_type_with_voltage(struct pp_hwmgr *hwmgr, return 0; } -static int smu10_display_clock_voltage_request(struct pp_hwmgr *hwmgr, - struct pp_display_clock_request *clock_req) -{ - struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); - enum amd_pp_clock_type clk_type = clock_req->clock_type; - uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000; - PPSMC_Msg msg; - - switch (clk_type) { - case amd_pp_dcf_clock: - if (clk_freq == smu10_data->dcf_actual_hard_min_freq) - return 0; - msg = PPSMC_MSG_SetHardMinDcefclkByFreq; - smu10_data->dcf_actual_hard_min_freq = clk_freq; - break; - case amd_pp_soc_clock: - msg = PPSMC_MSG_SetHardMinSocclkByFreq; - break; - case amd_pp_f_clock: - if (clk_freq == smu10_data->f_actual_hard_min_freq) - return 0; - smu10_data->f_actual_hard_min_freq = clk_freq; - msg = PPSMC_MSG_SetHardMinFclkByFreq; - break; - default: - pr_info("[DisplayClockVoltageRequest]Invalid Clock Type!"); - return -EINVAL; - } - - smum_send_msg_to_smc_with_parameter(hwmgr, msg, clk_freq); - return 0; -} static int smu10_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks) { @@ -947,9 +1062,8 @@ static int smu10_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simpl static int smu10_thermal_get_temperature(struct pp_hwmgr *hwmgr) { - uint32_t reg_offset = soc15_get_register_offset(THM_HWID, 0, - mmTHM_TCON_CUR_TMP_BASE_IDX, mmTHM_TCON_CUR_TMP); - uint32_t reg_value = cgs_read_register(hwmgr->device, reg_offset); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t reg_value = RREG32_SOC15(THM, 0, mmTHM_TCON_CUR_TMP); int cur_temp = (reg_value & THM_TCON_CUR_TMP__CUR_TEMP_MASK) >> THM_TCON_CUR_TMP__CUR_TEMP__SHIFT; @@ -993,11 +1107,47 @@ static int smu10_read_sensor(struct pp_hwmgr *hwmgr, int idx, return ret; } +static int smu10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr, + struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges) +{ + struct smu10_hwmgr *data = hwmgr->backend; + Watermarks_t *table = &(data->water_marks_table); + int result = 0; + + smu_set_watermarks_for_clocks_ranges(table,wm_with_clock_ranges); + smum_smc_table_manager(hwmgr, (uint8_t *)table, (uint16_t)SMU10_WMTABLE, false); + data->water_marks_exist = true; + return result; +} + +static int smu10_smus_notify_pwe(struct pp_hwmgr *hwmgr) +{ + + return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SetRccPfcPmeRestoreRegister); +} + static int smu10_set_mmhub_powergating_by_smu(struct pp_hwmgr *hwmgr) { return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerGateMmHub); } +static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate) +{ + if (bgate) { + amdgpu_device_ip_set_powergating_state(hwmgr->adev, + AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_PowerDownVcn, 0); + } else { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_PowerUpVcn, 0); + amdgpu_device_ip_set_powergating_state(hwmgr->adev, + AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); + } +} + static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .backend_init = smu10_hwmgr_backend_init, .backend_fini = smu10_hwmgr_backend_fini, @@ -1006,7 +1156,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .force_dpm_level = smu10_dpm_force_dpm_level, .get_power_state_size = smu10_get_power_state_size, .powerdown_uvd = NULL, - .powergate_uvd = NULL, + .powergate_uvd = smu10_powergate_vcn, .powergate_vce = NULL, .get_mclk = smu10_dpm_get_mclk, .get_sclk = smu10_dpm_get_sclk, @@ -1022,6 +1172,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .get_current_shallow_sleep_clocks = smu10_get_current_shallow_sleep_clocks, .get_clock_by_type_with_latency = smu10_get_clock_by_type_with_latency, .get_clock_by_type_with_voltage = smu10_get_clock_by_type_with_voltage, + .set_watermarks_for_clocks_ranges = smu10_set_watermarks_for_clocks_ranges, .get_max_high_clocks = smu10_get_max_high_clocks, .read_sensor = smu10_read_sensor, .set_active_display_count = smu10_set_active_display_count, @@ -1032,6 +1183,9 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .power_state_set = smu10_set_power_state_tasks, .dynamic_state_management_disable = smu10_disable_dpm_tasks, .set_mmhub_powergating_by_smu = smu10_set_mmhub_powergating_by_smu, + .smus_notify_pwe = smu10_smus_notify_pwe, + .gfx_off_control = smu10_gfx_off_control, + .display_clock_voltage_request = smu10_display_clock_voltage_request, }; int smu10_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h index 175c3a592b6c..1fb296a996f3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h @@ -290,6 +290,7 @@ struct smu10_hwmgr { bool vcn_dpg_mode; bool gfx_off_controled_by_driver; + bool water_marks_exist; Watermarks_t water_marks_table; struct smu10_clock_voltage_information clock_vol_info; DpmClocks_t clock_table; @@ -310,11 +311,9 @@ int smu10_init_function_pointers(struct pp_hwmgr *hwmgr); #define SMU10_UMD_PSTATE_FCLK 933 #define SMU10_UMD_PSTATE_VCE 0x03C00320 -#define SMU10_UMD_PSTATE_PEAK_GFXCLK 1100 #define SMU10_UMD_PSTATE_PEAK_SOCCLK 757 #define SMU10_UMD_PSTATE_PEAK_FCLK 1200 -#define SMU10_UMD_PSTATE_MIN_GFXCLK 200 #define SMU10_UMD_PSTATE_MIN_FCLK 400 #define SMU10_UMD_PSTATE_MIN_SOCCLK 200 #define SMU10_UMD_PSTATE_MIN_VCE 0x0190012C diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index f4cbaee4e2ca..6d72a5600917 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -147,20 +147,20 @@ void smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) data->uvd_power_gated = bgate; if (bgate) { - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_GATE); smu7_update_uvd_dpm(hwmgr, true); smu7_powerdown_uvd(hwmgr); } else { smu7_powerup_uvd(hwmgr); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_UNGATE); smu7_update_uvd_dpm(hwmgr, false); @@ -175,20 +175,20 @@ void smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) data->vce_power_gated = bgate; if (bgate) { - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); smu7_update_vce_dpm(hwmgr, true); smu7_powerdown_vce(hwmgr); } else { smu7_powerup_vce(hwmgr); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_UNGATE); smu7_update_vce_dpm(hwmgr, false); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 26fbeafc3c96..f8e866ceda02 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -61,10 +61,6 @@ #define SMC_CG_IND_START 0xc0030000 #define SMC_CG_IND_END 0xc0040000 -#define VOLTAGE_SCALE 4 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 - #define MEM_FREQ_LOW_LATENCY 25000 #define MEM_FREQ_HIGH_LATENCY 80000 @@ -79,14 +75,23 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) -static const struct profile_mode_setting smu7_profiling[5] = +static const struct profile_mode_setting smu7_profiling[6] = {{1, 0, 100, 30, 1, 0, 100, 10}, {1, 10, 0, 30, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 10, 16, 31}, {1, 0, 11, 50, 1, 0, 100, 10}, {1, 0, 5, 30, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, }; +#define PPSMC_MSG_SetVBITimeout_VEGAM ((uint16_t) 0x310) + +#define ixPWR_SVI2_PLANE1_LOAD 0xC0200280 +#define PWR_SVI2_PLANE1_LOAD__PSI1_MASK 0x00000020L +#define PWR_SVI2_PLANE1_LOAD__PSI0_EN_MASK 0x00000040L +#define PWR_SVI2_PLANE1_LOAD__PSI1__SHIFT 0x00000005 +#define PWR_SVI2_PLANE1_LOAD__PSI0_EN__SHIFT 0x00000006 + /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */ enum DPM_EVENT_SRC { DPM_EVENT_SRC_ANALOG = 0, @@ -168,6 +173,13 @@ static int smu7_get_current_pcie_lane_number(struct pp_hwmgr *hwmgr) */ static int smu7_enable_smc_voltage_controller(struct pp_hwmgr *hwmgr) { + if (hwmgr->chip_id == CHIP_VEGAM) { + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI1, 0); + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI0_EN, 0); + } + if (hwmgr->feature_mask & PP_SMC_VOLTAGE_CONTROL_MASK) smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Enable); @@ -779,7 +791,8 @@ static int smu7_setup_dpm_tables_v1(struct pp_hwmgr *hwmgr) data->dpm_table.sclk_table.count++; } } - + if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0) + hwmgr->platform_descriptor.overdriveLimit.engineClock = dep_sclk_table->entries[i-1].clk; /* Initialize Mclk DPM table based on allow Mclk values */ data->dpm_table.mclk_table.count = 0; for (i = 0; i < dep_mclk_table->count; i++) { @@ -794,32 +807,8 @@ static int smu7_setup_dpm_tables_v1(struct pp_hwmgr *hwmgr) } } - return 0; -} - -static int smu7_get_voltage_dependency_table( - const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table, - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table) -{ - uint8_t i = 0; - PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count), - "Voltage Lookup Table empty", - return -EINVAL); - - dep_table->count = allowed_dep_table->count; - for (i=0; i<dep_table->count; i++) { - dep_table->entries[i].clk = allowed_dep_table->entries[i].clk; - dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd; - dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset; - dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc; - dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx; - dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci; - dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd; - dep_table->entries[i].phases = allowed_dep_table->entries[i].phases; - dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable; - dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset; - } - + if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) + hwmgr->platform_descriptor.overdriveLimit.memoryClock = dep_mclk_table->entries[i-1].clk; return 0; } @@ -850,7 +839,7 @@ static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr) entries[i].vddc = dep_sclk_table->entries[i].vddc; } - smu7_get_voltage_dependency_table(dep_sclk_table, + smu_get_voltage_dependency_table_ppt_v1(dep_sclk_table, (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk)); odn_table->odn_memory_clock_dpm_levels.num_of_pl = @@ -862,12 +851,40 @@ static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr) entries[i].vddc = dep_mclk_table->entries[i].vddc; } - smu7_get_voltage_dependency_table(dep_mclk_table, + smu_get_voltage_dependency_table_ppt_v1(dep_mclk_table, (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk)); return 0; } +static void smu7_setup_voltage_range_from_vbios(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t min_vddc = 0; + uint32_t max_vddc = 0; + + if (!table_info) + return; + + dep_sclk_table = table_info->vdd_dep_on_sclk; + + atomctrl_get_voltage_range(hwmgr, &max_vddc, &min_vddc); + + if (min_vddc == 0 || min_vddc > 2000 + || min_vddc > dep_sclk_table->entries[0].vddc) + min_vddc = dep_sclk_table->entries[0].vddc; + + if (max_vddc == 0 || max_vddc > 2000 + || max_vddc < dep_sclk_table->entries[dep_sclk_table->count-1].vddc) + max_vddc = dep_sclk_table->entries[dep_sclk_table->count-1].vddc; + + data->odn_dpm_table.min_vddc = min_vddc; + data->odn_dpm_table.max_vddc = max_vddc; +} + static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -886,8 +903,10 @@ static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) sizeof(struct smu7_dpm_table)); /* initialize ODN table */ - if (hwmgr->od_enabled) + if (hwmgr->od_enabled) { + smu7_setup_voltage_range_from_vbios(hwmgr); smu7_odn_initial_default_setting(hwmgr); + } return 0; } @@ -965,6 +984,22 @@ static int smu7_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr) return 0; } +static int smu7_disable_sclk_vce_handshake(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t soft_register_value = 0; + uint32_t handshake_disables_offset = data->soft_regs_start + + smum_get_offsetof(hwmgr, + SMU_SoftRegisters, HandshakeDisables); + + soft_register_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, handshake_disables_offset); + soft_register_value |= SMU7_VCE_SCLK_HANDSHAKE_DISABLE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + handshake_disables_offset, soft_register_value); + return 0; +} + static int smu7_disable_handshake_uvd(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -987,23 +1022,29 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); /* enable SCLK dpm */ - if (!data->sclk_dpm_key_disabled) + if (!data->sclk_dpm_key_disabled) { + if (hwmgr->chip_id == CHIP_VEGAM) + smu7_disable_sclk_vce_handshake(hwmgr); + PP_ASSERT_WITH_CODE( (0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Enable)), "Failed to enable SCLK DPM during DPM Start Function!", return -EINVAL); + } /* enable MCLK dpm */ if (0 == data->mclk_dpm_key_disabled) { if (!(hwmgr->feature_mask & PP_UVD_HANDSHAKE_MASK)) smu7_disable_handshake_uvd(hwmgr); + PP_ASSERT_WITH_CODE( (0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_MCLKDPM_Enable)), "Failed to enable MCLK DPM during DPM Start Function!", return -EINVAL); - PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1); + if (hwmgr->chip_family != CHIP_VEGAM) + PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1); if (hwmgr->chip_family == AMDGPU_FAMILY_CI) { @@ -1019,8 +1060,13 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr) cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x5); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x100005); udelay(10); - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005); - cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005); + if (hwmgr->chip_id == CHIP_VEGAM) { + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400009); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400009); + } else { + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005); + } cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x500005); } } @@ -1229,7 +1275,7 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr) tmp_result = smu7_construct_voltage_tables(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), - "Failed to contruct voltage tables!", + "Failed to construct voltage tables!", result = tmp_result); } smum_initialize_mc_reg_table(hwmgr); @@ -1261,10 +1307,12 @@ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to process firmware header!", result = tmp_result); - tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr); - PP_ASSERT_WITH_CODE((0 == tmp_result), - "Failed to initialize switch from ArbF0 to F1!", - result = tmp_result); + if (hwmgr->chip_id != CHIP_VEGAM) { + tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to initialize switch from ArbF0 to F1!", + result = tmp_result); + } result = smu7_setup_default_dpm_tables(hwmgr); PP_ASSERT_WITH_CODE(0 == result, @@ -2754,6 +2802,9 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, case CHIP_POLARIS12: switch_limit_us = data->is_memory_gddr5 ? 190 : 150; break; + case CHIP_VEGAM: + switch_limit_us = 30; + break; default: switch_limit_us = data->is_memory_gddr5 ? 450 : 150; break; @@ -2777,8 +2828,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct PP_Clocks minimum_clocks = {0}; bool disable_mclk_switching; bool disable_mclk_switching_for_frame_lock; - struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info = {0}; const struct phm_clock_and_voltage_limits *max_limits; uint32_t i; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -2787,7 +2836,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, int32_t count; int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0; - info.mode_info = &mode_info; data->battery_state = (PP_StateUILabel_Battery == request_ps->classification.ui_label); @@ -2809,10 +2857,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, } } - cgs_get_active_displays_info(hwmgr->device, &info); - - minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; - minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock; + minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) { @@ -2843,12 +2889,12 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); - if (info.display_count == 0) + if (hwmgr->display_config->num_display == 0) disable_mclk_switching = false; else - disable_mclk_switching = ((1 < info.display_count) || + disable_mclk_switching = ((1 < hwmgr->display_config->num_display) || disable_mclk_switching_for_frame_lock || - smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us)); + smu7_vblank_too_short(hwmgr, hwmgr->display_config->min_vblank_time)); sclk = smu7_ps->performance_levels[0].engine_clock; mclk = smu7_ps->performance_levels[0].memory_clock; @@ -2957,8 +3003,7 @@ static int smu7_dpm_patch_boot_state(struct pp_hwmgr *hwmgr, /* First retrieve the Boot clocks and VDDC from the firmware info table. * We assume here that fw_info is unchanged if this call fails. */ - fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)cgs_atom_get_data_table( - hwmgr->device, index, + fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); if (!fw_info) /* During a test, there is no firmware info table. */ @@ -3366,34 +3411,35 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr, return 0; } -static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, - struct pp_gpu_power *query) +static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query) { - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_PmStatusLogStart), - "Failed to start pm status log!", - return -1); + int i; + u32 tmp = 0; + + if (!query) + return -EINVAL; - /* Sampling period from 50ms to 4sec */ - msleep_interruptible(200); + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); + tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + *query = tmp; - PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_PmStatusLogSample), - "Failed to sample pm status log!", - return -1); + if (tmp != 0) + return 0; - query->vddc_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_40); - query->vddci_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_49); - query->max_gpu_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_94); - query->average_gpu_power = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, - ixSMU_PM_STATUS_95); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_PM_STATUS_94, 0); + + for (i = 0; i < 10; i++) { + mdelay(1); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogSample); + tmp = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + ixSMU_PM_STATUS_94); + if (tmp != 0) + break; + } + *query = tmp; return 0; } @@ -3446,10 +3492,7 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; return 0; case AMDGPU_PP_SENSOR_GPU_POWER: - if (*size < sizeof(struct pp_gpu_power)) - return -EINVAL; - *size = sizeof(struct pp_gpu_power); - return smu7_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); + return smu7_get_gpu_power(hwmgr, (uint32_t *)value); case AMDGPU_PP_SENSOR_VDDGFX: if ((data->vr_config & 0xff) == 0x2) val_vid = PHM_READ_INDIRECT_FIELD(hwmgr->device, @@ -3480,7 +3523,6 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons [smu7_ps->performance_level_count - 1].memory_clock; struct PP_Clocks min_clocks = {0}; uint32_t i; - struct cgs_display_info info = {0}; for (i = 0; i < sclk_table->count; i++) { if (sclk == sclk_table->dpm_levels[i].value) @@ -3507,9 +3549,8 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons if (i >= mclk_table->count) data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - cgs_get_active_displays_info(hwmgr->device, &info); - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK; return 0; @@ -3714,14 +3755,17 @@ static int smu7_trim_dpm_states(struct pp_hwmgr *hwmgr, static int smu7_generate_dpm_level_enable_mask( struct pp_hwmgr *hwmgr, const void *input) { - int result; + int result = 0; const struct phm_set_power_state_input *states = (const struct phm_set_power_state_input *)input; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); const struct smu7_power_state *smu7_ps = cast_const_phw_smu7_power_state(states->pnew_state); - result = smu7_trim_dpm_states(hwmgr, smu7_ps); + /*skip the trim if od is enabled*/ + if (!hwmgr->od_enabled) + result = smu7_trim_dpm_states(hwmgr, smu7_ps); + if (result) return result; @@ -3812,9 +3856,14 @@ static int smu7_notify_smc_display(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK) - smum_send_msg_to_smc_with_parameter(hwmgr, - (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2); + if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK) { + if (hwmgr->chip_id == CHIP_VEGAM) + smum_send_msg_to_smc_with_parameter(hwmgr, + (PPSMC_Msg)PPSMC_MSG_SetVBITimeout_VEGAM, data->frame_time_x2); + else + smum_send_msg_to_smc_with_parameter(hwmgr, + (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2); + } return (smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ? 0 : -EINVAL; } @@ -3908,15 +3957,8 @@ smu7_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display) static int smu7_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr) { - uint32_t num_active_displays = 0; - struct cgs_display_info info = {0}; - - info.mode_info = NULL; - cgs_get_active_displays_info(hwmgr->device, &info); - - num_active_displays = info.display_count; - - if (num_active_displays > 1 && hwmgr->display_config.multi_monitor_in_sync != true) + if (hwmgr->display_config->num_display > 1 && + !hwmgr->display_config->multi_monitor_in_sync) smu7_notify_smc_display_change(hwmgr, false); return 0; @@ -3931,33 +3973,24 @@ smu7_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr) static int smu7_program_display_gap(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t num_active_displays = 0; uint32_t display_gap = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL); uint32_t display_gap2; uint32_t pre_vbi_time_in_us; uint32_t frame_time_in_us; - uint32_t ref_clock; - uint32_t refresh_rate = 0; - struct cgs_display_info info = {0}; - struct cgs_mode_info mode_info = {0}; - - info.mode_info = &mode_info; - cgs_get_active_displays_info(hwmgr->device, &info); - num_active_displays = info.display_count; + uint32_t ref_clock, refresh_rate; - display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (num_active_displays > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE); + display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (hwmgr->display_config->num_display > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL, display_gap); ref_clock = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); - - refresh_rate = mode_info.refresh_rate; + refresh_rate = hwmgr->display_config->vrefresh; if (0 == refresh_rate) refresh_rate = 60; frame_time_in_us = 1000000 / refresh_rate; - pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + pre_vbi_time_in_us = frame_time_in_us - 200 - hwmgr->display_config->min_vblank_time; data->frame_time_x2 = frame_time_in_us * 2 / 100; @@ -4037,17 +4070,14 @@ smu7_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); bool is_update_required = false; - struct cgs_display_info info = {0, 0, NULL}; - - cgs_get_active_displays_info(hwmgr->device, &info); - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) is_update_required = true; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) { - if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr && + if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr && (data->display_timing.min_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK || - hwmgr->display_config.min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK)) + hwmgr->display_config->min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK)) is_update_required = true; } return is_update_required; @@ -4102,7 +4132,7 @@ static int smu7_check_states_equal(struct pp_hwmgr *hwmgr, return 0; } -static int smu7_upload_mc_firmware(struct pp_hwmgr *hwmgr) +static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -4181,13 +4211,9 @@ static int smu7_read_clock_registers(struct pp_hwmgr *hwmgr) static int smu7_get_memory_type(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t temp; - - temp = cgs_read_register(hwmgr->device, mmMC_SEQ_MISC0); + struct amdgpu_device *adev = hwmgr->adev; - data->is_memory_gddr5 = (MC_SEQ_MISC0_GDDR5_VALUE == - ((temp & MC_SEQ_MISC0_GDDR5_MASK) >> - MC_SEQ_MISC0_GDDR5_SHIFT)); + data->is_memory_gddr5 = (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5); return 0; } @@ -4235,7 +4261,7 @@ static int smu7_setup_asic_task(struct pp_hwmgr *hwmgr) { int tmp_result, result = 0; - smu7_upload_mc_firmware(hwmgr); + smu7_check_mc_firmware(hwmgr); tmp_result = smu7_read_clock_registers(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), @@ -4370,22 +4396,36 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s: \n", "OD_SCLK"); + size = sprintf(buf, "%s:\n", "OD_SCLK"); for (i = 0; i < odn_sclk_table->num_of_pl; i++) - size += sprintf(buf + size, "%d: %10uMhz %10u mV\n", - i, odn_sclk_table->entries[i].clock / 100, + size += sprintf(buf + size, "%d: %10uMHz %10umV\n", + i, odn_sclk_table->entries[i].clock/100, odn_sclk_table->entries[i].vddc); } break; case OD_MCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s: \n", "OD_MCLK"); + size = sprintf(buf, "%s:\n", "OD_MCLK"); for (i = 0; i < odn_mclk_table->num_of_pl; i++) - size += sprintf(buf + size, "%d: %10uMhz %10u mV\n", - i, odn_mclk_table->entries[i].clock / 100, + size += sprintf(buf + size, "%d: %10uMHz %10umV\n", + i, odn_mclk_table->entries[i].clock/100, odn_mclk_table->entries[i].vddc); } break; + case OD_RANGE: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.sclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.mclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + size += sprintf(buf + size, "VDDC: %7umV %11umV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); + } + break; default: break; } @@ -4669,36 +4709,27 @@ static bool smu7_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint32_t min_vddc; - struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table; - - if (table_info == NULL) - return false; - - dep_sclk_table = table_info->vdd_dep_on_sclk; - min_vddc = dep_sclk_table->entries[0].vddc; - - if (voltage < min_vddc || voltage > 2000) { - pr_info("OD voltage is out of range [%d - 2000] mV\n", min_vddc); + if (voltage < data->odn_dpm_table.min_vddc || voltage > data->odn_dpm_table.max_vddc) { + pr_info("OD voltage is out of range [%d - %d] mV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); return false; } if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { - if (data->vbios_boot_state.sclk_bootup_value > clk || + if (data->golden_dpm_table.sclk_table.dpm_levels[0].value > clk || hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) { pr_info("OD engine clock is out of range [%d - %d] MHz\n", - data->vbios_boot_state.sclk_bootup_value, - hwmgr->platform_descriptor.overdriveLimit.engineClock / 100); + data->golden_dpm_table.sclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); return false; } } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { - if (data->vbios_boot_state.mclk_bootup_value > clk || + if (data->golden_dpm_table.mclk_table.dpm_levels[0].value > clk || hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) { pr_info("OD memory clock is out of range [%d - %d] MHz\n", - data->vbios_boot_state.mclk_bootup_value/100, - hwmgr->platform_descriptor.overdriveLimit.memoryClock / 100); + data->golden_dpm_table.mclk_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); return false; } } else { @@ -4747,10 +4778,6 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) return; } } - if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { - data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - } dep_table = table_info->vdd_dep_on_sclk; odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk); @@ -4760,9 +4787,9 @@ static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) return; } } - if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; } } @@ -4864,6 +4891,17 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) len = sizeof(smu7_profiling) / sizeof(struct profile_mode_setting); for (i = 0; i < len; i++) { + if (i == hwmgr->power_profile_mode) { + size += sprintf(buf + size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n", + i, profile_name[i], "*", + data->current_profile_setting.sclk_up_hyst, + data->current_profile_setting.sclk_down_hyst, + data->current_profile_setting.sclk_activity, + data->current_profile_setting.mclk_up_hyst, + data->current_profile_setting.mclk_down_hyst, + data->current_profile_setting.mclk_activity); + continue; + } if (smu7_profiling[i].bupdate_sclk) size += sprintf(buf + size, "%3d %16s: %8d %16d %16d ", i, profile_name[i], smu7_profiling[i].sclk_up_hyst, @@ -4883,24 +4921,6 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) "-", "-", "-"); } - size += sprintf(buf + size, "%3d %16s: %8d %16d %16d %16d %16d %16d\n", - i, profile_name[i], - data->custom_profile_setting.sclk_up_hyst, - data->custom_profile_setting.sclk_down_hyst, - data->custom_profile_setting.sclk_activity, - data->custom_profile_setting.mclk_up_hyst, - data->custom_profile_setting.mclk_down_hyst, - data->custom_profile_setting.mclk_activity); - - size += sprintf(buf + size, "%3s %16s: %8d %16d %16d %16d %16d %16d\n", - "*", "CURRENT", - data->current_profile_setting.sclk_up_hyst, - data->current_profile_setting.sclk_down_hyst, - data->current_profile_setting.sclk_activity, - data->current_profile_setting.mclk_up_hyst, - data->current_profile_setting.mclk_down_hyst, - data->current_profile_setting.mclk_activity); - return size; } @@ -4939,16 +4959,16 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint if (size < 8) return -EINVAL; - data->custom_profile_setting.bupdate_sclk = input[0]; - data->custom_profile_setting.sclk_up_hyst = input[1]; - data->custom_profile_setting.sclk_down_hyst = input[2]; - data->custom_profile_setting.sclk_activity = input[3]; - data->custom_profile_setting.bupdate_mclk = input[4]; - data->custom_profile_setting.mclk_up_hyst = input[5]; - data->custom_profile_setting.mclk_down_hyst = input[6]; - data->custom_profile_setting.mclk_activity = input[7]; - if (!smum_update_dpm_settings(hwmgr, &data->custom_profile_setting)) { - memcpy(&data->current_profile_setting, &data->custom_profile_setting, sizeof(struct profile_mode_setting)); + tmp.bupdate_sclk = input[0]; + tmp.sclk_up_hyst = input[1]; + tmp.sclk_down_hyst = input[2]; + tmp.sclk_activity = input[3]; + tmp.bupdate_mclk = input[4]; + tmp.mclk_up_hyst = input[5]; + tmp.mclk_down_hyst = input[6]; + tmp.mclk_activity = input[7]; + if (!smum_update_dpm_settings(hwmgr, &tmp)) { + memcpy(&data->current_profile_setting, &tmp, sizeof(struct profile_mode_setting)); hwmgr->power_profile_mode = mode; } break; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h index f40179c9ca97..c91e75db6a8e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h @@ -184,6 +184,8 @@ struct smu7_odn_dpm_table { struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_sclk; struct smu7_odn_clock_voltage_dependency_table vdd_dependency_on_mclk; uint32_t odn_mclk_min_limit; + uint32_t min_vddc; + uint32_t max_vddc; }; struct profile_mode_setting { @@ -325,7 +327,6 @@ struct smu7_hwmgr { uint16_t mem_latency_high; uint16_t mem_latency_low; uint32_t vr_config; - struct profile_mode_setting custom_profile_setting; struct profile_mode_setting current_profile_setting; }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c index 03bc7453f3b1..c952845833d7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c @@ -623,6 +623,190 @@ static const struct gpu_pt_config_reg DIDTConfig_Polaris11_Kicker[] = { 0xFFFFFFFF } /* End of list */ }; +static const struct gpu_pt_config_reg GCCACConfig_VegaM[] = +{ +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +// Offset Mask Shift Value Type +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + // DIDT_SQ + // + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x03060013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x03860013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x04060013, GPU_CONFIGREG_GC_CAC_IND }, + + // DIDT_TD + // + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x000E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x008E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x010E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x018E0013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x020E0013, GPU_CONFIGREG_GC_CAC_IND }, + + // DIDT_TCP + // + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00100013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x00900013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01100013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x01900013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02100013, GPU_CONFIGREG_GC_CAC_IND }, + { ixGC_CAC_CNTL, 0xFFFFFFFF, 0, 0x02900013, GPU_CONFIGREG_GC_CAC_IND }, + + { 0xFFFFFFFF } // End of list +}; + +static const struct gpu_pt_config_reg DIDTConfig_VegaM[] = +{ +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +// Offset Mask Shift Value Type +// --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + // DIDT_SQ + // + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT, 0x0073, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT, 0x00ab, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT, 0x0084, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT0_3, DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK, DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT, 0x005a, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT, 0x0067, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT, 0x0084, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT, 0x0027, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT4_7, DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK, DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT, 0x0046, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT, 0x00aa, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_WEIGHT8_11, DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK, DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MIN_POWER_MASK, DIDT_SQ_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MAX_POWER_MASK, DIDT_SQ_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__UNUSED_0_MASK, DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL_OCP, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK, DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_0_MASK, DIDT_SQ_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x005a, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_1_MASK, DIDT_SQ_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__UNUSED_2_MASK, DIDT_SQ_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__UNUSED_0_MASK, DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3853, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3153, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK, DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK, DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__PHASE_OFFSET_MASK, DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__UNUSED_0_MASK, DIDT_SQ_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + // DIDT_TD + // + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT0_MASK, DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT, 0x000a, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT1_MASK, DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT2_MASK, DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT, 0x0017, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT0_3, DIDT_TD_WEIGHT0_3__WEIGHT3_MASK, DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT, 0x002f, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT4_MASK, DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT, 0x0046, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT5_MASK, DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT, 0x005d, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT6_MASK, DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_WEIGHT4_7, DIDT_TD_WEIGHT4_7__WEIGHT7_MASK, DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MIN_POWER_MASK, DIDT_TD_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MAX_POWER_MASK, DIDT_TD_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__UNUSED_0_MASK, DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL_OCP, DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0x00ff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3fff, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_0_MASK, DIDT_TD_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x000f, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_1_MASK, DIDT_TD_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__UNUSED_2_MASK, DIDT_TD_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__UNUSED_0_MASK, DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x0dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x0dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__UNUSED_0_MASK, DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__USE_REF_CLOCK_MASK, DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__PHASE_OFFSET_MASK, DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0009, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0009, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__UNUSED_0_MASK, DIDT_TD_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + // DIDT_TCP + // + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT, 0x0004, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT, 0x0037, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT0_3, DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK, DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT, 0x00ff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT, 0x0054, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_WEIGHT4_7, DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK, DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MIN_POWER_MASK, DIDT_TCP_CTRL1__MIN_POWER__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MAX_POWER_MASK, DIDT_TCP_CTRL1__MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL_OCP, DIDT_TCP_CTRL_OCP__UNUSED_0_MASK, DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL_OCP, DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK, DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT, 0xffff, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_0_MASK, DIDT_TCP_CTRL2__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x0032, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_1_MASK, DIDT_TCP_CTRL2__UNUSED_1__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__UNUSED_2_MASK, DIDT_TCP_CTRL2__UNUSED_2__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT,0x01aa, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__UNUSED_0_MASK, DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK, DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3dde, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK, DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0001, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK, DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__PHASE_OFFSET_MASK, DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x0010, GPU_CONFIGREG_DIDT_IND }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__UNUSED_0_MASK, DIDT_TCP_CTRL0__UNUSED_0__SHIFT, 0x0000, GPU_CONFIGREG_DIDT_IND }, + + { 0xFFFFFFFF } // End of list +}; static int smu7_enable_didt(struct pp_hwmgr *hwmgr, const bool enable) { uint32_t en = enable ? 1 : 0; @@ -740,8 +924,8 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - cgs_enter_safe_mode(hwmgr->device, true); - cgs_lock_grbm_idx(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); + mutex_lock(&adev->grbm_idx_mutex); value = 0; value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX); for (count = 0; count < num_se; count++) { @@ -752,67 +936,80 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) if (hwmgr->chip_id == CHIP_POLARIS10) { result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris10); - PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris10); - PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); } else if (hwmgr->chip_id == CHIP_POLARIS11) { result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11); - PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); if (hwmgr->is_kicker) result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11_Kicker); else result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11); - PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); } else if (hwmgr->chip_id == CHIP_POLARIS12) { result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11); - PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris12); - PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); + } else if (hwmgr->chip_id == CHIP_VEGAM) { + result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_VegaM); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); + result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_VegaM); + PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error); } } cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value2); result = smu7_enable_didt(hwmgr, true); - PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", return result); + PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", goto error); if (hwmgr->chip_id == CHIP_POLARIS11) { result = smum_send_msg_to_smc(hwmgr, (uint16_t)(PPSMC_MSG_EnableDpmDidt)); PP_ASSERT_WITH_CODE((0 == result), - "Failed to enable DPM DIDT.", return result); + "Failed to enable DPM DIDT.", goto error); } - cgs_lock_grbm_idx(hwmgr->device, false); - cgs_enter_safe_mode(hwmgr->device, false); + mutex_unlock(&adev->grbm_idx_mutex); + adev->gfx.rlc.funcs->exit_safe_mode(adev); } return 0; +error: + mutex_unlock(&adev->grbm_idx_mutex); + adev->gfx.rlc.funcs->exit_safe_mode(adev); + return result; } int smu7_disable_didt_config(struct pp_hwmgr *hwmgr) { int result; + struct amdgpu_device *adev = hwmgr->adev; if (PP_CAP(PHM_PlatformCaps_SQRamping) || PP_CAP(PHM_PlatformCaps_DBRamping) || PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); result = smu7_enable_didt(hwmgr, false); PP_ASSERT_WITH_CODE((result == 0), "Post DIDT enable clock gating failed.", - return result); + goto error); if (hwmgr->chip_id == CHIP_POLARIS11) { result = smum_send_msg_to_smc(hwmgr, (uint16_t)(PPSMC_MSG_DisableDpmDidt)); PP_ASSERT_WITH_CODE((0 == result), - "Failed to disable DPM DIDT.", return result); + "Failed to disable DPM DIDT.", goto error); } - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); } return 0; +error: + adev->gfx.rlc.funcs->exit_safe_mode(adev); + return result; } int smu7_enable_smc_cac(struct pp_hwmgr *hwmgr) @@ -852,12 +1049,10 @@ int smu7_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - n = (n & 0xff) << 8; - if (data->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) return smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_PkgPwrSetLimit, n); + PPSMC_MSG_PkgPwrSetLimit, n<<8); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c index 7b26607c646a..50690c72b2ea 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c @@ -314,8 +314,7 @@ static int smu8_get_system_info_data(struct pp_hwmgr *hwmgr) uint8_t frev, crev; uint16_t size; - info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *) cgs_atom_get_data_table( - hwmgr->device, + info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *)smu_atom_get_data_table(hwmgr->adev, GetIndexIntoMasterTable(DATA, IntegratedSystemInfo), &size, &frev, &crev); @@ -694,7 +693,7 @@ static int smu8_update_sclk_limit(struct pp_hwmgr *hwmgr) else data->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk; - clock = hwmgr->display_config.min_core_set_clock; + clock = hwmgr->display_config->min_core_set_clock; if (clock == 0) pr_debug("min_core_set_clock not set\n"); @@ -749,7 +748,7 @@ static int smu8_set_deep_sleep_sclk_threshold(struct pp_hwmgr *hwmgr) { if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) { - uint32_t clks = hwmgr->display_config.min_core_set_clock_in_sr; + uint32_t clks = hwmgr->display_config->min_core_set_clock_in_sr; if (clks == 0) clks = SMU8_MIN_DEEP_SLEEP_SCLK; @@ -1041,25 +1040,21 @@ static int smu8_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct smu8_hwmgr *data = hwmgr->backend; struct PP_Clocks clocks = {0, 0, 0, 0}; bool force_high; - uint32_t num_of_active_displays = 0; - struct cgs_display_info info = {0}; smu8_ps->need_dfs_bypass = true; data->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); - clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ? - hwmgr->display_config.min_mem_set_clock : + clocks.memoryClock = hwmgr->display_config->min_mem_set_clock != 0 ? + hwmgr->display_config->min_mem_set_clock : data->sys_info.nbp_memory_clock[1]; - cgs_get_active_displays_info(hwmgr->device, &info); - num_of_active_displays = info.display_count; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk; force_high = (clocks.memoryClock > data->sys_info.nbp_memory_clock[SMU8_NUM_NBPMEMORYCLOCK - 1]) - || (num_of_active_displays >= 3); + || (hwmgr->display_config->num_display >= 3); smu8_ps->action = smu8_current_ps->action; @@ -1897,20 +1892,20 @@ static void smu8_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) data->uvd_power_gated = bgate; if (bgate) { - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_GATE); smu8_dpm_update_uvd_dpm(hwmgr, true); smu8_dpm_powerdown_uvd(hwmgr); } else { smu8_dpm_powerup_uvd(hwmgr); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state(hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_UVD, AMD_PG_STATE_UNGATE); smu8_dpm_update_uvd_dpm(hwmgr, false); @@ -1923,12 +1918,10 @@ static void smu8_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) struct smu8_hwmgr *data = hwmgr->backend; if (bgate) { - cgs_set_powergating_state( - hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_GATE); - cgs_set_clockgating_state( - hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); smu8_enable_disable_vce_dpm(hwmgr, false); @@ -1937,12 +1930,10 @@ static void smu8_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) } else { smu8_dpm_powerup_vce(hwmgr); data->vce_power_gated = false; - cgs_set_clockgating_state( - hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_UNGATE); - cgs_set_powergating_state( - hwmgr->device, + amdgpu_device_ip_set_powergating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_UNGATE); smu8_dpm_update_vce_dpm(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 598122854ab5..93a3d022ba47 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -24,6 +24,7 @@ #include "pp_debug.h" #include "ppatomctrl.h" #include "ppsmc.h" +#include "atom.h" uint8_t convert_to_vid(uint16_t vddc) { @@ -608,3 +609,100 @@ int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr) return 0; } + +void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size, + uint8_t *frev, uint8_t *crev) +{ + struct amdgpu_device *adev = dev; + uint16_t data_start; + + if (amdgpu_atom_parse_data_header( + adev->mode_info.atom_context, table, size, + frev, crev, &data_start)) + return (uint8_t *)adev->mode_info.atom_context->bios + + data_start; + + return NULL; +} + +int smu_get_voltage_dependency_table_ppt_v1( + const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table) +{ + uint8_t i = 0; + PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count), + "Voltage Lookup Table empty", + return -EINVAL); + + dep_table->count = allowed_dep_table->count; + for (i=0; i<dep_table->count; i++) { + dep_table->entries[i].clk = allowed_dep_table->entries[i].clk; + dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd; + dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset; + dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc; + dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx; + dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci; + dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd; + dep_table->entries[i].phases = allowed_dep_table->entries[i].phases; + dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable; + dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset; + } + + return 0; +} + +int smu_set_watermarks_for_clocks_ranges(void *wt_table, + struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges) +{ + uint32_t i; + struct watermarks *table = wt_table; + + if (!table || !wm_with_clock_ranges) + return -EINVAL; + + if (wm_with_clock_ranges->num_wm_sets_dmif > 4 || wm_with_clock_ranges->num_wm_sets_mcif > 4) + return -EINVAL; + + for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) { + table->WatermarkRow[1][i].MinClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) / + 100); + table->WatermarkRow[1][i].MaxClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) / + 100); + table->WatermarkRow[1][i].MinUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) / + 100); + table->WatermarkRow[1][i].MaxUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) / + 100); + table->WatermarkRow[1][i].WmSetting = (uint8_t) + wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id; + } + + for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) { + table->WatermarkRow[0][i].MinClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) / + 100); + table->WatermarkRow[0][i].MaxClock = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) / + 100); + table->WatermarkRow[0][i].MinUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) / + 100); + table->WatermarkRow[0][i].MaxUclk = + cpu_to_le16((uint16_t) + (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) / + 100); + table->WatermarkRow[0][i].WmSetting = (uint8_t) + wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id; + } + return 0; +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h index d37d16e4b613..916cc01e7652 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h @@ -26,10 +26,27 @@ struct pp_atomctrl_voltage_table; struct pp_hwmgr; struct phm_ppt_v1_voltage_lookup_table; +struct Watermarks_t; +struct pp_wm_sets_with_clock_ranges_soc15; uint8_t convert_to_vid(uint16_t vddc); uint16_t convert_to_vddc(uint8_t vid); +struct watermark_row_generic_t { + uint16_t MinClock; + uint16_t MaxClock; + uint16_t MinUclk; + uint16_t MaxUclk; + + uint8_t WmSetting; + uint8_t Padding[3]; +}; + +struct watermarks { + struct watermark_row_generic_t WatermarkRow[2][4]; + uint32_t padding[7]; +}; + extern int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr, uint32_t index, uint32_t value, uint32_t mask); @@ -82,6 +99,16 @@ int phm_irq_process(struct amdgpu_device *adev, int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr); +void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size, + uint8_t *frev, uint8_t *crev); + +int smu_get_voltage_dependency_table_ppt_v1( + const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table); + +int smu_set_watermarks_for_clocks_ranges(void *wt_table, + struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges); + #define PHM_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT #define PHM_FIELD_MASK(reg, field) reg##__##field##_MASK diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 7cbb56ba6fab..05e680d55dbb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -36,7 +36,7 @@ #include "smu9.h" #include "smu9_driver_if.h" #include "vega10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "pppcielanes.h" #include "vega10_hwmgr.h" #include "vega10_processpptables.h" @@ -51,10 +51,6 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" -#define VOLTAGE_SCALE 4 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 - #define HBM_MEMORY_CHANNEL_WIDTH 128 static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; @@ -79,8 +75,6 @@ static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; #define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L #define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L #define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L -static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, - enum pp_clock_type type, uint32_t mask); static const ULONG PhwVega10_Magic = (ULONG)(PHM_VIslands_Magic); @@ -291,6 +285,52 @@ static int vega10_set_features_platform_caps(struct pp_hwmgr *hwmgr) return 0; } +static int vega10_odn_initial_default_setting(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct phm_ppt_v2_information *table_info = + (struct phm_ppt_v2_information *)(hwmgr->pptable); + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct vega10_odn_vddc_lookup_table *od_lookup_table; + struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table[3]; + struct phm_ppt_v1_clock_voltage_dependency_table *od_table[3]; + uint32_t i; + + od_lookup_table = &odn_table->vddc_lookup_table; + vddc_lookup_table = table_info->vddc_lookup_table; + + for (i = 0; i < vddc_lookup_table->count; i++) + od_lookup_table->entries[i].us_vdd = vddc_lookup_table->entries[i].us_vdd; + + od_lookup_table->count = vddc_lookup_table->count; + + dep_table[0] = table_info->vdd_dep_on_sclk; + dep_table[1] = table_info->vdd_dep_on_mclk; + dep_table[2] = table_info->vdd_dep_on_socclk; + od_table[0] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_sclk; + od_table[1] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_mclk; + od_table[2] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_socclk; + + for (i = 0; i < 3; i++) + smu_get_voltage_dependency_table_ppt_v1(dep_table[i], od_table[i]); + + if (odn_table->max_vddc == 0 || odn_table->max_vddc > 2000) + odn_table->max_vddc = dep_table[0]->entries[dep_table[0]->count - 1].vddc; + if (odn_table->min_vddc == 0 || odn_table->min_vddc > 2000) + odn_table->min_vddc = dep_table[0]->entries[0].vddc; + + i = od_table[2]->count - 1; + od_table[2]->entries[i].clk = hwmgr->platform_descriptor.overdriveLimit.memoryClock > od_table[2]->entries[i].clk ? + hwmgr->platform_descriptor.overdriveLimit.memoryClock : + od_table[2]->entries[i].clk; + od_table[2]->entries[i].vddc = odn_table->max_vddc > od_table[2]->entries[i].vddc ? + odn_table->max_vddc : + od_table[2]->entries[i].vddc; + + return 0; +} + static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = hwmgr->backend; @@ -427,7 +467,6 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) /* ACG firmware has major version 5 */ if ((hwmgr->smu_version & 0xff000000) == 0x5000000) data->smu_features[GNLD_ACG].supported = true; - if (data->registry_data.didt_support) data->smu_features[GNLD_DIDT].supported = true; @@ -754,7 +793,6 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) uint32_t config_telemetry = 0; struct pp_atomfwctrl_voltage_table vol_table; struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; data = kzalloc(sizeof(struct vega10_hwmgr), GFP_KERNEL); if (data == NULL) @@ -860,10 +898,7 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) advanceFanControlParameters.usFanPWMMinLimit * hwmgr->thermal_controller.fanInfo.ulMaxRPM / 100; - reg = soc15_get_register_offset(DF_HWID, 0, - mmDF_CS_AON0_DramBaseAddress0_BASE_IDX, - mmDF_CS_AON0_DramBaseAddress0); - data->mem_channels = (cgs_read_register(hwmgr->device, reg) & + data->mem_channels = (RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0) & DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK) >> DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; PP_ASSERT_WITH_CODE(data->mem_channels < ARRAY_SIZE(channel_number), @@ -1280,6 +1315,9 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) vega10_setup_default_single_dpm_table(hwmgr, dpm_table, dep_gfx_table); + if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0) + hwmgr->platform_descriptor.overdriveLimit.engineClock = + dpm_table->dpm_levels[dpm_table->count-1].value; vega10_init_dpm_state(&(dpm_table->dpm_state)); /* Initialize Mclk DPM table based on allow Mclk values */ @@ -1288,6 +1326,10 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) vega10_setup_default_single_dpm_table(hwmgr, dpm_table, dep_mclk_table); + if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) + hwmgr->platform_descriptor.overdriveLimit.memoryClock = + dpm_table->dpm_levels[dpm_table->count-1].value; + vega10_init_dpm_state(&(dpm_table->dpm_state)); data->dpm_table.eclk_table.count = 0; @@ -1370,48 +1412,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) memcpy(&(data->golden_dpm_table), &(data->dpm_table), sizeof(struct vega10_dpm_table)); - if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) || - PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) { - data->odn_dpm_table.odn_core_clock_dpm_levels.num_of_pl = - data->dpm_table.gfx_table.count; - for (i = 0; i < data->dpm_table.gfx_table.count; i++) { - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].clock = - data->dpm_table.gfx_table.dpm_levels[i].value; - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].enabled = true; - } - - data->odn_dpm_table.vdd_dependency_on_sclk.count = - dep_gfx_table->count; - for (i = 0; i < dep_gfx_table->count; i++) { - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].clk = - dep_gfx_table->entries[i].clk; - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].vddInd = - dep_gfx_table->entries[i].vddInd; - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_enable = - dep_gfx_table->entries[i].cks_enable; - data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_voffset = - dep_gfx_table->entries[i].cks_voffset; - } - - data->odn_dpm_table.odn_memory_clock_dpm_levels.num_of_pl = - data->dpm_table.mem_table.count; - for (i = 0; i < data->dpm_table.mem_table.count; i++) { - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].clock = - data->dpm_table.mem_table.dpm_levels[i].value; - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].enabled = true; - } - - data->odn_dpm_table.vdd_dependency_on_mclk.count = dep_mclk_table->count; - for (i = 0; i < dep_mclk_table->count; i++) { - data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].clk = - dep_mclk_table->entries[i].clk; - data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddInd = - dep_mclk_table->entries[i].vddInd; - data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddci = - dep_mclk_table->entries[i].vddci; - } - } - return 0; } @@ -1514,18 +1514,18 @@ static int vega10_populate_single_gfx_level(struct pp_hwmgr *hwmgr, { struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk = - table_info->vdd_dep_on_sclk; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk; struct vega10_hwmgr *data = hwmgr->backend; struct pp_atomfwctrl_clock_dividers_soc15 dividers; uint32_t gfx_max_clock = hwmgr->platform_descriptor.overdriveLimit.engineClock; uint32_t i = 0; - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_VDDC) + if (hwmgr->od_enabled) dep_on_sclk = (struct phm_ppt_v1_clock_voltage_dependency_table *) - &(data->odn_dpm_table.vdd_dependency_on_sclk); + &(data->odn_dpm_table.vdd_dep_on_sclk); + else + dep_on_sclk = table_info->vdd_dep_on_sclk; PP_ASSERT_WITH_CODE(dep_on_sclk, "Invalid SOC_VDD-GFX_CLK Dependency Table!", @@ -1577,23 +1577,32 @@ static int vega10_populate_single_soc_level(struct pp_hwmgr *hwmgr, uint32_t soc_clock, uint8_t *current_soc_did, uint8_t *current_vol_index) { + struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc = - table_info->vdd_dep_on_socclk; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc; struct pp_atomfwctrl_clock_dividers_soc15 dividers; uint32_t i; - PP_ASSERT_WITH_CODE(dep_on_soc, - "Invalid SOC_VDD-SOC_CLK Dependency Table!", - return -EINVAL); - for (i = 0; i < dep_on_soc->count; i++) { - if (dep_on_soc->entries[i].clk == soc_clock) - break; + if (hwmgr->od_enabled) { + dep_on_soc = (struct phm_ppt_v1_clock_voltage_dependency_table *) + &data->odn_dpm_table.vdd_dep_on_socclk; + for (i = 0; i < dep_on_soc->count; i++) { + if (dep_on_soc->entries[i].clk >= soc_clock) + break; + } + } else { + dep_on_soc = table_info->vdd_dep_on_socclk; + for (i = 0; i < dep_on_soc->count; i++) { + if (dep_on_soc->entries[i].clk == soc_clock) + break; + } } + PP_ASSERT_WITH_CODE(dep_on_soc->count > i, "Cannot find SOC_CLK in SOC_VDD-SOC_CLK Dependency Table", return -EINVAL); + PP_ASSERT_WITH_CODE(!pp_atomfwctrl_get_gpu_pll_dividers_vega10(hwmgr, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, soc_clock, ÷rs), @@ -1602,22 +1611,6 @@ static int vega10_populate_single_soc_level(struct pp_hwmgr *hwmgr, *current_soc_did = (uint8_t)dividers.ulDid; *current_vol_index = (uint8_t)(dep_on_soc->entries[i].vddInd); - - return 0; -} - -uint16_t vega10_locate_vddc_given_clock(struct pp_hwmgr *hwmgr, - uint32_t clk, - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table) -{ - uint16_t i; - - for (i = 0; i < dep_table->count; i++) { - if (dep_table->entries[i].clk == clk) - return dep_table->entries[i].vddc; - } - - pr_info("[LocateVddcGivenClock] Cannot locate SOC Vddc for this clock!"); return 0; } @@ -1631,8 +1624,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = - table_info->vdd_dep_on_socclk; PPTable_t *pp_table = &(data->smc_state_table.pp_table); struct vega10_single_dpm_table *dpm_table = &(data->dpm_table.gfx_table); int result = 0; @@ -1663,11 +1654,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) dpm_table = &(data->dpm_table.soc_table); for (i = 0; i < dpm_table->count; i++) { - pp_table->SocVid[i] = - (uint8_t)convert_to_vid( - vega10_locate_vddc_given_clock(hwmgr, - dpm_table->dpm_levels[i].value, - dep_table)); result = vega10_populate_single_soc_level(hwmgr, dpm_table->dpm_levels[i].value, &(pp_table->SocclkDid[i]), @@ -1678,7 +1664,6 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) j = i - 1; while (i < NUM_SOCCLK_DPM_LEVELS) { - pp_table->SocVid[i] = pp_table->SocVid[j]; result = vega10_populate_single_soc_level(hwmgr, dpm_table->dpm_levels[j].value, &(pp_table->SocclkDid[i]), @@ -1691,6 +1676,32 @@ static int vega10_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) return result; } +static void vega10_populate_vddc_soc_levels(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + struct phm_ppt_v2_information *table_info = hwmgr->pptable; + struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table; + + uint8_t soc_vid = 0; + uint32_t i, max_vddc_level; + + if (hwmgr->od_enabled) + vddc_lookup_table = (struct phm_ppt_v1_voltage_lookup_table *)&data->odn_dpm_table.vddc_lookup_table; + else + vddc_lookup_table = table_info->vddc_lookup_table; + + max_vddc_level = vddc_lookup_table->count; + for (i = 0; i < max_vddc_level; i++) { + soc_vid = (uint8_t)convert_to_vid(vddc_lookup_table->entries[i].us_vdd); + pp_table->SocVid[i] = soc_vid; + } + while (i < MAX_REGULAR_DPM_NUMBER) { + pp_table->SocVid[i] = soc_vid; + i++; + } +} + /** * @brief Populates single SMC GFXCLK structure using the provided clock. * @@ -1705,25 +1716,25 @@ static int vega10_populate_single_memory_level(struct pp_hwmgr *hwmgr, struct vega10_hwmgr *data = hwmgr->backend; struct phm_ppt_v2_information *table_info = (struct phm_ppt_v2_information *)(hwmgr->pptable); - struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk = - table_info->vdd_dep_on_mclk; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk; struct pp_atomfwctrl_clock_dividers_soc15 dividers; uint32_t mem_max_clock = hwmgr->platform_descriptor.overdriveLimit.memoryClock; uint32_t i = 0; - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_VDDC) + if (hwmgr->od_enabled) dep_on_mclk = (struct phm_ppt_v1_clock_voltage_dependency_table *) - &data->odn_dpm_table.vdd_dependency_on_mclk; + &data->odn_dpm_table.vdd_dep_on_mclk; + else + dep_on_mclk = table_info->vdd_dep_on_mclk; PP_ASSERT_WITH_CODE(dep_on_mclk, "Invalid SOC_VDD-UCLK Dependency Table!", return -EINVAL); - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK) + if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK) { mem_clock = mem_clock > mem_max_clock ? mem_max_clock : mem_clock; - else { + } else { for (i = 0; i < dep_on_mclk->count; i++) { if (dep_on_mclk->entries[i].clk == mem_clock) break; @@ -2067,6 +2078,9 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) if (data->smu_features[GNLD_AVFS].supported) { result = pp_atomfwctrl_get_avfs_information(hwmgr, &avfs_params); if (!result) { + data->odn_dpm_table.max_vddc = avfs_params.ulMaxVddc; + data->odn_dpm_table.min_vddc = avfs_params.ulMinVddc; + pp_table->MinVoltageVid = (uint8_t) convert_to_vid((uint16_t)(avfs_params.ulMinVddc)); pp_table->MaxVoltageVid = (uint8_t) @@ -2345,6 +2359,22 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable) return 0; } +static int vega10_update_avfs(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + + if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + vega10_avfs_enable(hwmgr, false); + } else if (data->need_update_dpm_table) { + vega10_avfs_enable(hwmgr, false); + vega10_avfs_enable(hwmgr, true); + } else { + vega10_avfs_enable(hwmgr, true); + } + + return 0; +} + static int vega10_populate_and_upload_avfs_fuse_override(struct pp_hwmgr *hwmgr) { int result = 0; @@ -2406,6 +2436,10 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) "Failed to setup default DPM tables!", return result); + /* initialize ODN table */ + if (hwmgr->od_enabled) + vega10_odn_initial_default_setting(hwmgr); + pp_atomfwctrl_get_voltage_table_v4(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2, &voltage_table); pp_table->MaxVidStep = voltage_table.max_vid_step; @@ -2452,6 +2486,8 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) "Failed to initialize Memory Level!", return result); + vega10_populate_vddc_soc_levels(hwmgr); + result = vega10_populate_all_display_clock_levels(hwmgr); PP_ASSERT_WITH_CODE(!result, "Failed to initialize Display Level!", @@ -2481,6 +2517,12 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) data->vbios_boot_state.mvddc = boot_up_values.usMvddc; data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk; data->vbios_boot_state.mem_clock = boot_up_values.ulUClk; + pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, + SMU9_SYSPLL0_SOCCLK_ID, &boot_up_values.ulSocClk); + + pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, + SMU9_SYSPLL0_DCEFCLK_ID, &boot_up_values.ulDCEFClk); + data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk; data->vbios_boot_state.dcef_clock = boot_up_values.ulDCEFClk; if (0 != boot_up_values.usVddc) { @@ -2829,7 +2871,7 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) tmp_result = vega10_construct_voltage_tables(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, - "Failed to contruct voltage tables!", + "Failed to construct voltage tables!", result = tmp_result); tmp_result = vega10_init_smc_table(hwmgr); @@ -3028,7 +3070,6 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, bool disable_mclk_switching_for_frame_lock; bool disable_mclk_switching_for_vr; bool force_mclk_high; - struct cgs_display_info info = {0}; const struct phm_clock_and_voltage_limits *max_limits; uint32_t i; struct vega10_hwmgr *data = hwmgr->backend; @@ -3063,11 +3104,9 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, } } - cgs_get_active_displays_info(hwmgr->device, &info); - /* result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/ - minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; - minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock; + minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; if (PP_CAP(PHM_PlatformCaps_StablePState)) { stable_pstate_sclk_dpm_percentage = @@ -3107,10 +3146,10 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR); force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh); - if (info.display_count == 0) + if (hwmgr->display_config->num_display == 0) disable_mclk_switching = false; else - disable_mclk_switching = (info.display_count > 1) || + disable_mclk_switching = (hwmgr->display_config->num_display > 1) || disable_mclk_switching_for_frame_lock || disable_mclk_switching_for_vr || force_mclk_high; @@ -3171,87 +3210,11 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input) { - const struct phm_set_power_state_input *states = - (const struct phm_set_power_state_input *)input; - const struct vega10_power_state *vega10_ps = - cast_const_phw_vega10_power_state(states->pnew_state); struct vega10_hwmgr *data = hwmgr->backend; - struct vega10_single_dpm_table *sclk_table = - &(data->dpm_table.gfx_table); - uint32_t sclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].gfx_clock; - struct vega10_single_dpm_table *mclk_table = - &(data->dpm_table.mem_table); - uint32_t mclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].mem_clock; - struct PP_Clocks min_clocks = {0}; - uint32_t i; - struct cgs_display_info info = {0}; - - data->need_update_dpm_table = 0; - - if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) || - PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) { - for (i = 0; i < sclk_table->count; i++) { - if (sclk == sclk_table->dpm_levels[i].value) - break; - } - - if (!(data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_SCLK) && i >= sclk_table->count) { - /* Check SCLK in DAL's minimum clocks - * in case DeepSleep divider update is required. - */ - if (data->display_timing.min_clock_in_sr != - min_clocks.engineClockInSR && - (min_clocks.engineClockInSR >= - VEGA10_MINIMUM_ENGINE_CLOCK || - data->display_timing.min_clock_in_sr >= - VEGA10_MINIMUM_ENGINE_CLOCK)) - data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK; - } - - cgs_get_active_displays_info(hwmgr->device, &info); - - if (data->display_timing.num_existing_displays != - info.display_count) - data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; - } else { - for (i = 0; i < sclk_table->count; i++) { - if (sclk == sclk_table->dpm_levels[i].value) - break; - } - - if (i >= sclk_table->count) - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; - else { - /* Check SCLK in DAL's minimum clocks - * in case DeepSleep divider update is required. - */ - if (data->display_timing.min_clock_in_sr != - min_clocks.engineClockInSR && - (min_clocks.engineClockInSR >= - VEGA10_MINIMUM_ENGINE_CLOCK || - data->display_timing.min_clock_in_sr >= - VEGA10_MINIMUM_ENGINE_CLOCK)) - data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK; - } - - for (i = 0; i < mclk_table->count; i++) { - if (mclk == mclk_table->dpm_levels[i].value) - break; - } - - cgs_get_active_displays_info(hwmgr->device, &info); - if (i >= mclk_table->count) - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) + data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; - if (data->display_timing.num_existing_displays != - info.display_count || - i >= mclk_table->count) - data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK; - } return 0; } @@ -3259,194 +3222,29 @@ static int vega10_populate_and_upload_sclk_mclk_dpm_levels( struct pp_hwmgr *hwmgr, const void *input) { int result = 0; - const struct phm_set_power_state_input *states = - (const struct phm_set_power_state_input *)input; - const struct vega10_power_state *vega10_ps = - cast_const_phw_vega10_power_state(states->pnew_state); struct vega10_hwmgr *data = hwmgr->backend; - uint32_t sclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].gfx_clock; - uint32_t mclk = vega10_ps->performance_levels - [vega10_ps->performance_level_count - 1].mem_clock; - struct vega10_dpm_table *dpm_table = &data->dpm_table; - struct vega10_dpm_table *golden_dpm_table = - &data->golden_dpm_table; - uint32_t dpm_count, clock_percent; - uint32_t i; - - if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) || - PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) { - - if (!data->need_update_dpm_table && - !data->apply_optimized_settings && - !data->apply_overdrive_next_settings_mask) - return 0; - - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_SCLK) { - for (dpm_count = 0; - dpm_count < dpm_table->gfx_table.count; - dpm_count++) { - dpm_table->gfx_table.dpm_levels[dpm_count].enabled = - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].enabled; - dpm_table->gfx_table.dpm_levels[dpm_count].value = - data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].clock; - } - } - - if (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_MCLK) { - for (dpm_count = 0; - dpm_count < dpm_table->mem_table.count; - dpm_count++) { - dpm_table->mem_table.dpm_levels[dpm_count].enabled = - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].enabled; - dpm_table->mem_table.dpm_levels[dpm_count].value = - data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].clock; - } - } - if ((data->need_update_dpm_table & DPMTABLE_UPDATE_SCLK) || - data->apply_optimized_settings || - (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_SCLK)) { - result = vega10_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate SCLK during PopulateNewDPMClocksStates Function!", - return result); - } - - if ((data->need_update_dpm_table & DPMTABLE_UPDATE_MCLK) || - (data->apply_overdrive_next_settings_mask & - DPMTABLE_OD_UPDATE_MCLK)){ - result = vega10_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate MCLK during PopulateNewDPMClocksStates Function!", - return result); - } - } else { - if (!data->need_update_dpm_table && - !data->apply_optimized_settings) - return 0; - - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_SCLK && - data->smu_features[GNLD_DPM_GFXCLK].supported) { - dpm_table-> - gfx_table.dpm_levels[dpm_table->gfx_table.count - 1]. - value = sclk; - if (hwmgr->od_enabled) { - /* Need to do calculation based on the golden DPM table - * as the Heatmap GPU Clock axis is also based on - * the default values - */ - PP_ASSERT_WITH_CODE( - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value, - "Divide by 0!", - return -1); - - dpm_count = dpm_table->gfx_table.count < 2 ? - 0 : dpm_table->gfx_table.count - 2; - for (i = dpm_count; i > 1; i--) { - if (sclk > golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value) { - clock_percent = - ((sclk - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value) * - 100) / - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value; - - dpm_table->gfx_table.dpm_levels[i].value = - golden_dpm_table->gfx_table.dpm_levels[i].value + - (golden_dpm_table->gfx_table.dpm_levels[i].value * - clock_percent) / 100; - } else if (golden_dpm_table-> - gfx_table.dpm_levels[dpm_table->gfx_table.count-1].value > - sclk) { - clock_percent = - ((golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count - 1].value - - sclk) * 100) / - golden_dpm_table->gfx_table.dpm_levels - [golden_dpm_table->gfx_table.count-1].value; - - dpm_table->gfx_table.dpm_levels[i].value = - golden_dpm_table->gfx_table.dpm_levels[i].value - - (golden_dpm_table->gfx_table.dpm_levels[i].value * - clock_percent) / 100; - } else - dpm_table->gfx_table.dpm_levels[i].value = - golden_dpm_table->gfx_table.dpm_levels[i].value; - } - } - } - - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK && - data->smu_features[GNLD_DPM_UCLK].supported) { - dpm_table-> - mem_table.dpm_levels[dpm_table->mem_table.count - 1]. - value = mclk; + if (!data->need_update_dpm_table) + return 0; - if (hwmgr->od_enabled) { - PP_ASSERT_WITH_CODE( - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count - 1].value, - "Divide by 0!", - return -1); + if (data->need_update_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK + DPMTABLE_UPDATE_SOCCLK)) { + result = vega10_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to populate SCLK during PopulateNewDPMClocksStates Function!", + return result); + } - dpm_count = dpm_table->mem_table.count < 2 ? - 0 : dpm_table->mem_table.count - 2; - for (i = dpm_count; i > 1; i--) { - if (mclk > golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value) { - clock_percent = ((mclk - - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value) * - 100) / - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value; - - dpm_table->mem_table.dpm_levels[i].value = - golden_dpm_table->mem_table.dpm_levels[i].value + - (golden_dpm_table->mem_table.dpm_levels[i].value * - clock_percent) / 100; - } else if (golden_dpm_table->mem_table.dpm_levels - [dpm_table->mem_table.count-1].value > mclk) { - clock_percent = ((golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value - mclk) * - 100) / - golden_dpm_table->mem_table.dpm_levels - [golden_dpm_table->mem_table.count-1].value; - - dpm_table->mem_table.dpm_levels[i].value = - golden_dpm_table->mem_table.dpm_levels[i].value - - (golden_dpm_table->mem_table.dpm_levels[i].value * - clock_percent) / 100; - } else - dpm_table->mem_table.dpm_levels[i].value = - golden_dpm_table->mem_table.dpm_levels[i].value; - } - } - } + if (data->need_update_dpm_table & + (DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) { + result = vega10_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), + "Failed to populate MCLK during PopulateNewDPMClocksStates Function!", + return result); + } - if ((data->need_update_dpm_table & - (DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK)) || - data->apply_optimized_settings) { - result = vega10_populate_all_graphic_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate SCLK during PopulateNewDPMClocksStates Function!", - return result); - } + vega10_populate_vddc_soc_levels(hwmgr); - if (data->need_update_dpm_table & - (DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) { - result = vega10_populate_all_memory_levels(hwmgr); - PP_ASSERT_WITH_CODE(!result, - "Failed to populate MCLK during PopulateNewDPMClocksStates Function!", - return result); - } - } return result; } @@ -3742,8 +3540,9 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE(!result, "Failed to upload PPtable!", return result); - data->apply_optimized_settings = false; - data->apply_overdrive_next_settings_mask = 0; + vega10_update_avfs(hwmgr); + + data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC; return 0; } @@ -3793,16 +3592,18 @@ static uint32_t vega10_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) } static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr, - struct pp_gpu_power *query) + uint32_t *query) { uint32_t value; + if (!query) + return -EINVAL; + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrPkgPwr); value = smum_get_argument(hwmgr); - /* power value is an integer */ - memset(query, 0, sizeof *query); - query->average_gpu_power = value << 8; + /* SMC returning actual watts, keep consistent with legacy asics, low 8 bit as 8 fractional bits */ + *query = value << 8; return 0; } @@ -3810,22 +3611,18 @@ static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr, static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, void *value, int *size) { - uint32_t sclk_idx, mclk_idx, activity_percent = 0; + struct amdgpu_device *adev = hwmgr->adev; + uint32_t sclk_mhz, mclk_idx, activity_percent = 0; struct vega10_hwmgr *data = hwmgr->backend; struct vega10_dpm_table *dpm_table = &data->dpm_table; int ret = 0; - uint32_t reg, val_vid; + uint32_t val_vid; switch (idx) { case AMDGPU_PP_SENSOR_GFX_SCLK: - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex); - sclk_idx = smum_get_argument(hwmgr); - if (sclk_idx < dpm_table->gfx_table.count) { - *((uint32_t *)value) = dpm_table->gfx_table.dpm_levels[sclk_idx].value; - *size = 4; - } else { - ret = -EINVAL; - } + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetAverageGfxclkActualFrequency); + sclk_mhz = smum_get_argument(hwmgr); + *((uint32_t *)value) = sclk_mhz * 100; break; case AMDGPU_PP_SENSOR_GFX_MCLK: smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex); @@ -3856,18 +3653,10 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_POWER: - if (*size < sizeof(struct pp_gpu_power)) - ret = -EINVAL; - else { - *size = sizeof(struct pp_gpu_power); - ret = vega10_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); - } + ret = vega10_get_gpu_power(hwmgr, (uint32_t *)value); break; case AMDGPU_PP_SENSOR_VDDGFX: - reg = soc15_get_register_offset(SMUIO_HWID, 0, - mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX, - mmSMUSVI0_PLANE0_CURRENTVID); - val_vid = (cgs_read_register(hwmgr->device, reg) & + val_vid = (RREG32_SOC15(SMUIO, 0, mmSMUSVI0_PLANE0_CURRENTVID) & SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK) >> SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT; *((uint32_t *)value) = (uint32_t)convert_to_vddc((uint8_t)val_vid); @@ -3956,26 +3745,18 @@ static int vega10_notify_smc_display_config_after_ps_adjustment( (struct phm_ppt_v2_information *)hwmgr->pptable; struct phm_ppt_v1_clock_voltage_dependency_table *mclk_table = table_info->vdd_dep_on_mclk; uint32_t idx; - uint32_t num_active_disps = 0; - struct cgs_display_info info = {0}; struct PP_Clocks min_clocks = {0}; uint32_t i; struct pp_display_clock_request clock_req; - info.mode_info = NULL; - - cgs_get_active_displays_info(hwmgr->device, &info); - - num_active_disps = info.display_count; - - if (num_active_disps > 1) + if (hwmgr->display_config->num_display > 1) vega10_notify_smc_display_change(hwmgr, false); else vega10_notify_smc_display_change(hwmgr, true); - min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk; - min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk; - min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; + min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk; + min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; for (i = 0; i < dpm_table->count; i++) { if (dpm_table->dpm_levels[i].value == min_clocks.dcefClock) @@ -4120,6 +3901,47 @@ static void vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) } } +static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, uint32_t mask) +{ + struct vega10_hwmgr *data = hwmgr->backend; + + switch (type) { + case PP_SCLK: + data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0; + data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0; + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), + "Failed to upload boot level to lowest!", + return -EINVAL); + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), + "Failed to upload dpm max level to highest!", + return -EINVAL); + break; + + case PP_MCLK: + data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0; + data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0; + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), + "Failed to upload boot level to lowest!", + return -EINVAL); + + PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), + "Failed to upload dpm max level to highest!", + return -EINVAL); + + break; + + case PP_PCIE: + default: + break; + } + + return 0; +} + static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level) { @@ -4356,97 +4178,15 @@ static int vega10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr, struct vega10_hwmgr *data = hwmgr->backend; Watermarks_t *table = &(data->smc_state_table.water_marks_table); int result = 0; - uint32_t i; if (!data->registry_data.disable_water_mark) { - for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) { - table->WatermarkRow[WM_DCEFCLK][i].MinClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].MaxClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].MinUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].MaxUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) / - 100); - table->WatermarkRow[WM_DCEFCLK][i].WmSetting = (uint8_t) - wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id; - } - - for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) { - table->WatermarkRow[WM_SOCCLK][i].MinClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].MaxClock = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].MinUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].MaxUclk = - cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) / - 100); - table->WatermarkRow[WM_SOCCLK][i].WmSetting = (uint8_t) - wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id; - } + smu_set_watermarks_for_clocks_ranges(table, wm_with_clock_ranges); data->water_marks_bitmap = WaterMarksExist; } return result; } -static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, - enum pp_clock_type type, uint32_t mask) -{ - struct vega10_hwmgr *data = hwmgr->backend; - - switch (type) { - case PP_SCLK: - data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0; - data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0; - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), - "Failed to upload boot level to lowest!", - return -EINVAL); - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), - "Failed to upload dpm max level to highest!", - return -EINVAL); - break; - - case PP_MCLK: - data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0; - data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0; - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr), - "Failed to upload boot level to lowest!", - return -EINVAL); - - PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr), - "Failed to upload dpm max level to highest!", - return -EINVAL); - - break; - - case PP_PCIE: - default: - break; - } - - return 0; -} - static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -4454,6 +4194,8 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table); struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table); + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL; + int i, now, size = 0; switch (type) { @@ -4492,6 +4234,40 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, (pcie_table->pcie_gen[i] == 2) ? "8.0GT/s, x16" : "", (i == now) ? "*" : ""); break; + case OD_SCLK: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_SCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; + for (i = 0; i < podn_vdd_dep->count; i++) + size += sprintf(buf + size, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk / 100, + podn_vdd_dep->entries[i].vddc); + } + break; + case OD_MCLK: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_MCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; + for (i = 0; i < podn_vdd_dep->count; i++) + size += sprintf(buf + size, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk/100, + podn_vdd_dep->entries[i].vddc); + } + break; + case OD_RANGE: + if (hwmgr->od_enabled) { + size = sprintf(buf, "%s:\n", "OD_RANGE"); + size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.gfx_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.mem_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + size += sprintf(buf + size, "VDDC: %7umV %11umV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); + } + break; default: break; } @@ -4501,10 +4277,8 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = hwmgr->backend; - int result = 0; - uint32_t num_turned_on_displays = 1; Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table); - struct cgs_display_info info = {0}; + int result = 0; if ((data->water_marks_bitmap & WaterMarksExist) && !(data->water_marks_bitmap & WaterMarksLoaded)) { @@ -4514,10 +4288,8 @@ static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr) } if (data->water_marks_bitmap & WaterMarksLoaded) { - cgs_get_active_displays_info(hwmgr->device, &info); - num_turned_on_displays = info.display_count; smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_NumOfDisplays, num_turned_on_displays); + PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display); } return result; @@ -4603,15 +4375,12 @@ vega10_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg { struct vega10_hwmgr *data = hwmgr->backend; bool is_update_required = false; - struct cgs_display_info info = {0, 0, NULL}; - - cgs_get_active_displays_info(hwmgr->device, &info); - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) is_update_required = true; if (PP_CAP(PHM_PlatformCaps_SclkDeepSleep)) { - if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr) + if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr) is_update_required = true; } @@ -4886,6 +4655,200 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui return 0; } + +static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + uint32_t clk, + uint32_t voltage) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct vega10_single_dpm_table *golden_table; + + if (voltage < odn_table->min_vddc || voltage > odn_table->max_vddc) { + pr_info("OD voltage is out of range [%d - %d] mV\n", odn_table->min_vddc, odn_table->max_vddc); + return false; + } + + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { + golden_table = &(data->golden_dpm_table.gfx_table); + if (golden_table->dpm_levels[0].value > clk || + hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) { + pr_info("OD engine clock is out of range [%d - %d] MHz\n", + golden_table->dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + return false; + } + } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { + golden_table = &(data->golden_dpm_table.mem_table); + if (golden_table->dpm_levels[0].value > clk || + hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) { + pr_info("OD memory clock is out of range [%d - %d] MHz\n", + golden_table->dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + return false; + } + } else { + return false; + } + + return true; +} + +static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_ppt_v2_information *table_info = hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table; + struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table; + uint32_t i; + + dep_table = table_info->vdd_dep_on_mclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_mclk); + + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; + return; + } + } + + dep_table = table_info->vdd_dep_on_sclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_sclk); + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; + return; + } + } + + if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; + } +} + +static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct phm_ppt_v2_information *table_info = hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk; + struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table; + + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk = + &data->odn_dpm_table.vdd_dep_on_socclk; + struct vega10_odn_vddc_lookup_table *od_vddc_lookup_table = &data->odn_dpm_table.vddc_lookup_table; + + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep; + uint8_t i, j; + + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; + for (i = 0; i < podn_vdd_dep->count - 1; i++) + od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; + if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc) + od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc; + } else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) { + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; + for (i = 0; i < dpm_table->count; i++) { + for (j = 0; j < od_vddc_lookup_table->count; j++) { + if (od_vddc_lookup_table->entries[j].us_vdd > + podn_vdd_dep->entries[i].vddc) + break; + } + if (j == od_vddc_lookup_table->count) { + od_vddc_lookup_table->entries[j-1].us_vdd = + podn_vdd_dep->entries[i].vddc; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + } + podn_vdd_dep->entries[i].vddInd = j; + } + dpm_table = &data->dpm_table.soc_table; + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd && + dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) { + data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; + podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk; + dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk; + } + } + if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk < + podn_vdd_dep->entries[dep_table->count-1].clk) { + data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk; + dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk; + } + if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd < + podn_vdd_dep->entries[dep_table->count-1].vddInd) { + data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK; + podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd; + } + } +} + +static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, + enum PP_OD_DPM_TABLE_COMMAND type, + long *input, uint32_t size) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_table; + struct vega10_single_dpm_table *dpm_table; + + uint32_t input_clk; + uint32_t input_vol; + uint32_t input_level; + uint32_t i; + + PP_ASSERT_WITH_CODE(input, "NULL user input for clock and voltage", + return -EINVAL); + + if (!hwmgr->od_enabled) { + pr_info("OverDrive feature not enabled\n"); + return -EINVAL; + } + + if (PP_OD_EDIT_SCLK_VDDC_TABLE == type) { + dpm_table = &data->dpm_table.gfx_table; + podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_sclk; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + } else if (PP_OD_EDIT_MCLK_VDDC_TABLE == type) { + dpm_table = &data->dpm_table.mem_table; + podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_mclk; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + } else if (PP_OD_RESTORE_DEFAULT_TABLE == type) { + memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table)); + vega10_odn_initial_default_setting(hwmgr); + return 0; + } else if (PP_OD_COMMIT_DPM_TABLE == type) { + vega10_check_dpm_table_updated(hwmgr); + return 0; + } else { + return -EINVAL; + } + + for (i = 0; i < size; i += 3) { + if (i + 3 > size || input[i] >= podn_vdd_dep_table->count) { + pr_info("invalid clock voltage input\n"); + return 0; + } + input_level = input[i]; + input_clk = input[i+1] * 100; + input_vol = input[i+2]; + + if (vega10_check_clk_voltage_valid(hwmgr, type, input_clk, input_vol)) { + dpm_table->dpm_levels[input_level].value = input_clk; + podn_vdd_dep_table->entries[input_level].clk = input_clk; + podn_vdd_dep_table->entries[input_level].vddc = input_vol; + } else { + return -EINVAL; + } + } + vega10_odn_update_soc_table(hwmgr, type); + return 0; +} + static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .backend_init = vega10_hwmgr_backend_init, .backend_fini = vega10_hwmgr_backend_fini, @@ -4944,6 +4907,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_power_profile_mode = vega10_get_power_profile_mode, .set_power_profile_mode = vega10_set_power_profile_mode, .set_power_limit = vega10_set_power_limit, + .odn_edit_dpm_table = vega10_odn_edit_dpm_table, }; int vega10_enable_smc_features(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index 5339ea1f3dce..aadd6cbc7e85 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -282,15 +282,21 @@ struct vega10_registry_data { struct vega10_odn_clock_voltage_dependency_table { uint32_t count; - struct phm_ppt_v1_clock_voltage_dependency_record - entries[MAX_REGULAR_DPM_NUMBER]; + struct phm_ppt_v1_clock_voltage_dependency_record entries[MAX_REGULAR_DPM_NUMBER]; +}; + +struct vega10_odn_vddc_lookup_table { + uint32_t count; + struct phm_ppt_v1_voltage_lookup_record entries[MAX_REGULAR_DPM_NUMBER]; }; struct vega10_odn_dpm_table { - struct phm_odn_clock_levels odn_core_clock_dpm_levels; - struct phm_odn_clock_levels odn_memory_clock_dpm_levels; - struct vega10_odn_clock_voltage_dependency_table vdd_dependency_on_sclk; - struct vega10_odn_clock_voltage_dependency_table vdd_dependency_on_mclk; + struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_sclk; + struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_mclk; + struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_socclk; + struct vega10_odn_vddc_lookup_table vddc_lookup_table; + uint32_t max_vddc; + uint32_t min_vddc; }; struct vega10_odn_fan_table { @@ -301,8 +307,8 @@ struct vega10_odn_fan_table { }; struct vega10_hwmgr { - struct vega10_dpm_table dpm_table; - struct vega10_dpm_table golden_dpm_table; + struct vega10_dpm_table dpm_table; + struct vega10_dpm_table golden_dpm_table; struct vega10_registry_data registry_data; struct vega10_vbios_boot_state vbios_boot_state; struct vega10_mclk_latency_table mclk_latency_table; @@ -368,12 +374,8 @@ struct vega10_hwmgr { bool need_long_memory_training; /* Internal settings to apply the application power optimization parameters */ - bool apply_optimized_settings; uint32_t disable_dpm_mask; - /* ---- Overdrive next setting ---- */ - uint32_t apply_overdrive_next_settings_mask; - /* ---- SMU9 ---- */ struct smu_features smu_features[GNLD_FEATURES_MAX]; struct vega10_smc_state_table smc_state_table; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index ba63faefc61f..dbe4b1f66784 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -27,7 +27,7 @@ #include "vega10_ppsmc.h" #include "vega10_inc.h" #include "pp_debug.h" -#include "pp_soc15.h" +#include "soc15_common.h" static const struct vega10_didt_config_reg SEDiDtTuningCtrlConfig_Vega10[] = { @@ -888,36 +888,36 @@ static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable) if (PP_CAP(PHM_PlatformCaps_DiDtEDCEnable)) { if (PP_CAP(PHM_PlatformCaps_SQRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_DBRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_TDRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_TCPRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL, data); } if (PP_CAP(PHM_PlatformCaps_DBRRamping)) { data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL); - data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en); - data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en); + data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en); + data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL, data); } } @@ -930,20 +930,18 @@ static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable) static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0, count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT); @@ -958,43 +956,43 @@ static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) { - cgs_enter_safe_mode(hwmgr->device, true); + struct amdgpu_device *adev = hwmgr->adev; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0, count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT); @@ -1003,12 +1001,12 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega10); if (PP_CAP(PHM_PlatformCaps_GCEDC)) @@ -1022,13 +1020,14 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1043,20 +1042,18 @@ static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0, count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); result = vega10_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT); @@ -1067,47 +1064,47 @@ static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr) { - cgs_enter_safe_mode(hwmgr->device, true); + struct amdgpu_device *adev = hwmgr->adev; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int result; uint32_t num_se = 0; uint32_t count, data; - struct amdgpu_device *adev = hwmgr->adev; - uint32_t reg; num_se = adev->gfx.config.max_shader_engines; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); - cgs_write_register(hwmgr->device, reg, data); - result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); + result = vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlResetConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlConfig_Vega10, VEGA10_CONFIGREG_DIDT); @@ -1115,12 +1112,12 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) if (0 != result) break; } - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); vega10_didt_set_mask(hwmgr, true); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega10); @@ -1137,13 +1134,14 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1158,15 +1156,14 @@ static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; int result; - cgs_enter_safe_mode(hwmgr->device, true); + adev->gfx.rlc.funcs->enter_safe_mode(adev); - cgs_lock_grbm_idx(hwmgr->device, true); - reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); - cgs_write_register(hwmgr->device, reg, 0xE0000000); - cgs_lock_grbm_idx(hwmgr->device, false); + mutex_lock(&adev->grbm_idx_mutex); + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); + mutex_unlock(&adev->grbm_idx_mutex); result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT); @@ -1175,7 +1172,7 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, false); - cgs_enter_safe_mode(hwmgr->device, false); + adev->gfx.rlc.funcs->exit_safe_mode(adev); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index c61d0744860d..16b1a9cf6cf0 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -52,7 +52,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr) if (!table_address) { table_address = (ATOM_Vega10_POWERPLAYTABLE *) - cgs_atom_get_data_table(hwmgr->device, index, + smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/ @@ -267,12 +267,6 @@ static int init_over_drive_limits( hwmgr->platform_descriptor.maxOverdriveVDDC = 0; hwmgr->platform_descriptor.overdriveVDDCStep = 0; - if (hwmgr->platform_descriptor.overdriveLimit.engineClock == 0 || - hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0) { - hwmgr->od_enabled = false; - pr_debug("OverDrive feature not support by VBIOS\n"); - } - return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index 9f18226a56ea..aa044c1955fe 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -25,7 +25,7 @@ #include "vega10_hwmgr.h" #include "vega10_ppsmc.h" #include "vega10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "pp_debug.h" static int vega10_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm) @@ -89,6 +89,7 @@ int vega10_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr, int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) { + struct amdgpu_device *adev = hwmgr->adev; struct vega10_hwmgr *data = hwmgr->backend; uint32_t tach_period; uint32_t crystal_clock_freq; @@ -100,10 +101,8 @@ int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) if (data->smu_features[GNLD_FAN_CONTROL].supported) { result = vega10_get_current_rpm(hwmgr, speed); } else { - uint32_t reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS); tach_period = - CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS), CG_TACH_STATUS, TACH_PERIOD); @@ -127,26 +126,23 @@ int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed) */ int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode) { - uint32_t reg; - - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2); + struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->fan_ctrl_is_in_default_mode) { hwmgr->fan_ctrl_default_mode = - CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, FDO_PWM_MODE); hwmgr->tmin = - CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TMIN); hwmgr->fan_ctrl_is_in_default_mode = false; } - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TMIN, 0)); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, FDO_PWM_MODE, mode)); return 0; @@ -159,18 +155,15 @@ int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode) */ int vega10_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr) { - uint32_t reg; - - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2); + struct amdgpu_device *adev = hwmgr->adev; if (!hwmgr->fan_ctrl_is_in_default_mode) { - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, FDO_PWM_MODE, hwmgr->fan_ctrl_default_mode)); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TMIN, hwmgr->tmin << CG_FDO_CTRL2__TMIN__SHIFT)); hwmgr->fan_ctrl_is_in_default_mode = true; @@ -257,10 +250,10 @@ int vega10_fan_ctrl_stop_smc_fan_control(struct pp_hwmgr *hwmgr) int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, uint32_t speed) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t duty100; uint32_t duty; uint64_t tmp64; - uint32_t reg; if (hwmgr->thermal_controller.fanInfo.bNoFan) return 0; @@ -271,10 +264,7 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl)) vega10_fan_ctrl_stop_smc_fan_control(hwmgr); - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL1_BASE_IDX, mmCG_FDO_CTRL1); - - duty100 = CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg), + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), CG_FDO_CTRL1, FMAX_DUTY100); if (duty100 == 0) @@ -284,10 +274,8 @@ int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr, do_div(tmp64, 100); duty = (uint32_t)tmp64; - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL0_BASE_IDX, mmCG_FDO_CTRL0); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0), CG_FDO_CTRL0, FDO_STATIC_DUTY, duty)); return vega10_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC); @@ -317,10 +305,10 @@ int vega10_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr) */ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t tach_period; uint32_t crystal_clock_freq; int result = 0; - uint32_t reg; if (hwmgr->thermal_controller.fanInfo.bNoFan || (speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) || @@ -333,10 +321,8 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) if (!result) { crystal_clock_freq = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_TACH_STATUS, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS), CG_TACH_STATUS, TACH_PERIOD, tach_period)); } @@ -350,13 +336,10 @@ int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed) */ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int temp; - uint32_t reg; - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_MULT_THERMAL_STATUS_BASE_IDX, mmCG_MULT_THERMAL_STATUS); - - temp = cgs_read_register(hwmgr->device, reg); + temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; @@ -379,11 +362,12 @@ int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr) static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range) { + struct amdgpu_device *adev = hwmgr->adev; int low = VEGA10_THERMAL_MINIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; int high = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - uint32_t val, reg; + uint32_t val; if (low < range->min) low = range->min; @@ -393,20 +377,17 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, if (low > high) return -EINVAL; - reg = soc15_get_register_offset(THM_HWID, 0, - mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL); - - val = cgs_read_register(hwmgr->device, reg); + val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); val &= (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK) & (~THM_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK) & (~THM_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); return 0; } @@ -418,21 +399,17 @@ static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, */ static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; if (hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution) { - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_TACH_CTRL_BASE_IDX, mmCG_TACH_CTRL); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_TACH_CTRL, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL), CG_TACH_CTRL, EDGE_PER_REV, hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution - 1)); } - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2); - cgs_write_register(hwmgr->device, reg, - CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg), + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2), CG_FDO_CTRL2, TACH_PWM_RESP_RATE, 0x28)); return 0; @@ -445,9 +422,9 @@ static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr) */ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; struct vega10_hwmgr *data = hwmgr->backend; uint32_t val = 0; - uint32_t reg; if (data->smu_features[GNLD_FW_CTF].supported) { if (data->smu_features[GNLD_FW_CTF].enabled) @@ -465,8 +442,7 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr) val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT); val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT); - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val); return 0; } @@ -477,8 +453,8 @@ static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr) */ int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; struct vega10_hwmgr *data = hwmgr->backend; - uint32_t reg; if (data->smu_features[GNLD_FW_CTF].supported) { if (!data->smu_features[GNLD_FW_CTF].enabled) @@ -493,8 +469,7 @@ int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr) data->smu_features[GNLD_FW_CTF].enabled = false; } - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 200de46bd06b..782e2098824d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -34,7 +34,6 @@ #include "atomfirmware.h" #include "cgs_common.h" #include "vega12_inc.h" -#include "pp_soc15.h" #include "pppcielanes.h" #include "vega12_hwmgr.h" #include "vega12_processpptables.h" @@ -546,6 +545,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -565,6 +565,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -585,6 +586,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -605,6 +607,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -625,6 +628,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -645,6 +649,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -666,6 +671,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -686,6 +692,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -706,6 +713,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -726,6 +734,7 @@ static int vega12_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) return -EINVAL); dpm_table->dpm_levels[i].value = clock; + dpm_table->dpm_levels[i].enabled = true; } vega12_init_dpm_state(&(dpm_table->dpm_state)); @@ -992,15 +1001,55 @@ static uint32_t vega12_find_highest_dpm_level( static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr) { + struct vega12_hwmgr *data = hwmgr->backend; + if (data->smc_state_table.gfx_boot_level != + data->dpm_table.gfx_table.dpm_state.soft_min_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMinByFreq, + PPCLK_GFXCLK<<16 | data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_boot_level].value); + data->dpm_table.gfx_table.dpm_state.soft_min_level = + data->smc_state_table.gfx_boot_level; + } + + if (data->smc_state_table.mem_boot_level != + data->dpm_table.mem_table.dpm_state.soft_min_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMinByFreq, + PPCLK_UCLK<<16 | data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_boot_level].value); + data->dpm_table.mem_table.dpm_state.soft_min_level = + data->smc_state_table.mem_boot_level; + } + return 0; + } static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr) { + struct vega12_hwmgr *data = hwmgr->backend; + if (data->smc_state_table.gfx_max_level != + data->dpm_table.gfx_table.dpm_state.soft_max_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxByFreq, + /* plus the vale by 1 to align the resolution */ + PPCLK_GFXCLK<<16 | (data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_max_level].value + 1)); + data->dpm_table.gfx_table.dpm_state.soft_max_level = + data->smc_state_table.gfx_max_level; + } + + if (data->smc_state_table.mem_max_level != + data->dpm_table.mem_table.dpm_state.soft_max_level) { + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxByFreq, + /* plus the vale by 1 to align the resolution */ + PPCLK_UCLK<<16 | (data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_max_level].value + 1)); + data->dpm_table.mem_table.dpm_state.soft_max_level = + data->smc_state_table.mem_max_level; + } + return 0; } - int vega12_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable) { struct vega12_hwmgr *data = @@ -1064,8 +1113,7 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) return (mem_clk * 100); } -static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, - struct pp_gpu_power *query) +static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query) { #if 0 uint32_t value; @@ -1077,7 +1125,7 @@ static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, vega12_read_arg_from_smc(hwmgr, &value); /* power value is an integer */ - query->average_gpu_power = value << 8; + *query = value << 8; #endif return 0; } @@ -1186,12 +1234,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, *size = 4; break; case AMDGPU_PP_SENSOR_GPU_POWER: - if (*size < sizeof(struct pp_gpu_power)) - ret = -EINVAL; - else { - *size = sizeof(struct pp_gpu_power); - ret = vega12_get_gpu_power(hwmgr, (struct pp_gpu_power *)value); - } + ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value); + break; default: ret = -EINVAL; @@ -1260,23 +1304,18 @@ static int vega12_notify_smc_display_config_after_ps_adjustment( { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); - uint32_t num_active_disps = 0; - struct cgs_display_info info = {0}; struct PP_Clocks min_clocks = {0}; struct pp_display_clock_request clock_req; uint32_t clk_request; - info.mode_info = NULL; - cgs_get_active_displays_info(hwmgr->device, &info); - num_active_disps = info.display_count; - if (num_active_disps > 1) + if (hwmgr->display_config->num_display > 1) vega12_notify_smc_display_change(hwmgr, false); else vega12_notify_smc_display_change(hwmgr, true); - min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk; - min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk; - min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock; + min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk; + min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk; + min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock; if (data->smu_features[GNLD_DPM_DCEFCLK].supported) { clock_req.clock_type = amd_pp_dcef_clock; @@ -1832,9 +1871,7 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr) { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); int result = 0; - uint32_t num_turned_on_displays = 1; Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table); - struct cgs_display_info info = {0}; if ((data->water_marks_bitmap & WaterMarksExist) && !(data->water_marks_bitmap & WaterMarksLoaded)) { @@ -1846,12 +1883,9 @@ static int vega12_display_configuration_changed_task(struct pp_hwmgr *hwmgr) if ((data->water_marks_bitmap & WaterMarksExist) && data->smu_features[GNLD_DPM_DCEFCLK].supported && - data->smu_features[GNLD_DPM_SOCCLK].supported) { - cgs_get_active_displays_info(hwmgr->device, &info); - num_turned_on_displays = info.display_count; + data->smu_features[GNLD_DPM_SOCCLK].supported) smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_NumOfDisplays, num_turned_on_displays); - } + PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display); return result; } @@ -1894,15 +1928,12 @@ vega12_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hwmg { struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); bool is_update_required = false; - struct cgs_display_info info = {0, 0, NULL}; - - cgs_get_active_displays_info(hwmgr->device, &info); - if (data->display_timing.num_existing_displays != info.display_count) + if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display) is_update_required = true; if (data->registry_data.gfx_clk_deep_sleep_support) { - if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr) + if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr) is_update_required = true; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h index bc98b1df3b65..e81ded1ec198 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h @@ -33,7 +33,7 @@ #define WaterMarksExist 1 #define WaterMarksLoaded 2 -#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS 8 +#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS 16 #define VG12_PSUEDO_NUM_SOCCLK_DPM_LEVELS 8 #define VG12_PSUEDO_NUM_DCEFCLK_DPM_LEVELS 8 #define VG12_PSUEDO_NUM_UCLK_DPM_LEVELS 4 diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c index b34113f45904..888ddca902d8 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c @@ -51,7 +51,7 @@ static const void *get_powerplay_table(struct pp_hwmgr *hwmgr) if (!table_address) { table_address = (ATOM_Vega12_POWERPLAYTABLE *) - cgs_atom_get_data_table(hwmgr->device, index, + smu_atom_get_data_table(hwmgr->adev, index, &size, &frev, &crev); hwmgr->soft_pp_table = table_address; /*Cache the result in RAM.*/ @@ -224,6 +224,11 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable ppsmc_pptable->AcgGfxclkSpreadPercent = smc_dpm_table.acggfxclkspreadpercent; ppsmc_pptable->AcgGfxclkSpreadFreq = smc_dpm_table.acggfxclkspreadfreq; + /* 0xFFFF will disable the ACG feature */ + if (!(hwmgr->feature_mask & PP_ACG_MASK)) { + ppsmc_pptable->AcgThresholdFreqHigh = 0xFFFF; + ppsmc_pptable->AcgThresholdFreqLow = 0xFFFF; + } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c index df0fa815cd6e..cfd9e6ccb790 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c @@ -26,7 +26,7 @@ #include "vega12_smumgr.h" #include "vega12_ppsmc.h" #include "vega12_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "pp_debug.h" static int vega12_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm) @@ -147,13 +147,10 @@ int vega12_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr) */ int vega12_thermal_get_temperature(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; int temp = 0; - uint32_t reg; - reg = soc15_get_register_offset(THM_HWID, 0, - mmCG_MULT_THERMAL_STATUS_BASE_IDX, mmCG_MULT_THERMAL_STATUS); - - temp = cgs_read_register(hwmgr->device, reg); + temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS); temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >> CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT; @@ -175,11 +172,12 @@ int vega12_thermal_get_temperature(struct pp_hwmgr *hwmgr) static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range) { + struct amdgpu_device *adev = hwmgr->adev; int low = VEGA12_THERMAL_MINIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; int high = VEGA12_THERMAL_MAXIMUM_ALERT_TEMP * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; - uint32_t val, reg; + uint32_t val; if (low < range->min) low = range->min; @@ -189,18 +187,15 @@ static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, if (low > high) return -EINVAL; - reg = soc15_get_register_offset(THM_HWID, 0, - mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL); - - val = cgs_read_register(hwmgr->device, reg); + val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); - val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); + val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES)); val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val); return 0; } @@ -212,15 +207,14 @@ static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr, */ static int vega12_thermal_enable_alert(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t val = 0; - uint32_t reg; val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT); val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT); val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT); - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, val); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val); return 0; } @@ -231,10 +225,9 @@ static int vega12_thermal_enable_alert(struct pp_hwmgr *hwmgr) */ int vega12_thermal_disable_alert(struct pp_hwmgr *hwmgr) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 8b78bbecd1bc..a202247c9894 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -377,11 +377,7 @@ struct phm_clocks { #define DPMTABLE_UPDATE_SCLK 0x00000004 #define DPMTABLE_UPDATE_MCLK 0x00000008 #define DPMTABLE_OD_UPDATE_VDDC 0x00000010 - -/* To determine if sclk and mclk are in overdrive state */ -#define SCLK_OVERDRIVE_ENABLED 0x00000001 -#define MCLK_OVERDRIVE_ENABLED 0x00000002 -#define VDDC_OVERDRIVE_ENABLED 0x00000010 +#define DPMTABLE_UPDATE_SOCCLK 0x00000020 struct phm_odn_performance_level { uint32_t clock; @@ -414,7 +410,10 @@ extern int phm_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *adjusted_ps, const struct pp_power_state *current_ps); +extern int phm_apply_clock_adjust_rules(struct pp_hwmgr *hwmgr); + extern int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level); +extern int phm_pre_display_configuration_changed(struct pp_hwmgr *hwmgr); extern int phm_display_configuration_changed(struct pp_hwmgr *hwmgr); extern int phm_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr); extern int phm_register_irq_handlers(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 17f811d181c8..b99fb8ac822c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -38,6 +38,8 @@ struct phm_fan_speed_info; struct pp_atomctrl_voltage_table; #define VOLTAGE_SCALE 4 +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 enum DISPLAY_GAP { DISPLAY_GAP_VBLANK_OR_WM = 0, /* Wait for vblank or MCHG watermark. */ @@ -64,24 +66,6 @@ struct vi_dpm_table { #define PCIE_PERF_REQ_GEN2 3 #define PCIE_PERF_REQ_GEN3 4 -enum PP_FEATURE_MASK { - PP_SCLK_DPM_MASK = 0x1, - PP_MCLK_DPM_MASK = 0x2, - PP_PCIE_DPM_MASK = 0x4, - PP_SCLK_DEEP_SLEEP_MASK = 0x8, - PP_POWER_CONTAINMENT_MASK = 0x10, - PP_UVD_HANDSHAKE_MASK = 0x20, - PP_SMC_VOLTAGE_CONTROL_MASK = 0x40, - PP_VBI_TIME_SUPPORT_MASK = 0x80, - PP_ULV_MASK = 0x100, - PP_ENABLE_GFX_CG_THRU_SMU = 0x200, - PP_CLOCK_STRETCH_MASK = 0x400, - PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800, - PP_SOCCLK_DPM_MASK = 0x1000, - PP_DCEFCLK_DPM_MASK = 0x2000, - PP_OVERDRIVE_MASK = 0x4000, -}; - enum PHM_BackEnd_Magic { PHM_Dummy_Magic = 0xAA5555AA, PHM_RV770_Magic = 0xDCBAABCD, @@ -245,6 +229,8 @@ struct pp_hwmgr_func { struct pp_power_state *prequest_ps, const struct pp_power_state *pcurrent_ps); + int (*apply_clocks_adjust_rules)(struct pp_hwmgr *hwmgr); + int (*force_dpm_level)(struct pp_hwmgr *hw_mgr, enum amd_dpm_forced_level level); @@ -268,6 +254,7 @@ struct pp_hwmgr_func { const void *state); int (*enable_clock_power_gating)(struct pp_hwmgr *hwmgr); int (*notify_smc_display_config_after_ps_adjustment)(struct pp_hwmgr *hwmgr); + int (*pre_display_config_changed)(struct pp_hwmgr *hwmgr); int (*display_config_changed)(struct pp_hwmgr *hwmgr); int (*disable_clock_power_gating)(struct pp_hwmgr *hwmgr); int (*update_clock_gatings)(struct pp_hwmgr *hwmgr, @@ -312,6 +299,7 @@ struct pp_hwmgr_func { int (*display_clock_voltage_request)(struct pp_hwmgr *hwmgr, struct pp_display_clock_request *clock); int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); + int (*gfx_off_control)(struct pp_hwmgr *hwmgr, bool enable); int (*power_off_asic)(struct pp_hwmgr *hwmgr); int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask); int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf); @@ -341,6 +329,7 @@ struct pp_hwmgr_func { long *input, uint32_t size); int (*set_power_limit)(struct pp_hwmgr *hwmgr, uint32_t n); int (*set_mmhub_powergating_by_smu)(struct pp_hwmgr *hwmgr); + int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr); }; struct pp_table_func { @@ -718,6 +707,7 @@ struct pp_hwmgr { uint32_t chip_family; uint32_t chip_id; uint32_t smu_version; + bool not_vf; bool pm_en; struct mutex smu_lock; @@ -764,7 +754,7 @@ struct pp_hwmgr { struct pp_power_state *request_ps; struct pp_power_state *boot_ps; struct pp_power_state *uvd_ps; - struct amd_pp_display_configuration display_config; + const struct amd_pp_display_configuration *display_config; uint32_t feature_mask; bool avfs_supported; /* UMD Pstate */ @@ -782,10 +772,13 @@ struct pp_hwmgr { }; int hwmgr_early_init(struct pp_hwmgr *hwmgr); +int hwmgr_sw_init(struct pp_hwmgr *hwmgr); +int hwmgr_sw_fini(struct pp_hwmgr *hwmgr); int hwmgr_hw_init(struct pp_hwmgr *hwmgr); int hwmgr_hw_fini(struct pp_hwmgr *hwmgr); -int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr); -int hwmgr_hw_resume(struct pp_hwmgr *hwmgr); +int hwmgr_suspend(struct pp_hwmgr *hwmgr); +int hwmgr_resume(struct pp_hwmgr *hwmgr); + int hwmgr_handle_task(struct pp_hwmgr *hwmgr, enum amd_pp_task task_id, enum amd_pm_state_type *user_state); diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h index 426bff2aad2b..a2991fa2e6f8 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h @@ -75,13 +75,15 @@ #define PPSMC_MSG_GetMinGfxclkFrequency 0x2C #define PPSMC_MSG_GetMaxGfxclkFrequency 0x2D #define PPSMC_MSG_SoftReset 0x2E +#define PPSMC_MSG_SetGfxCGPG 0x2F #define PPSMC_MSG_SetSoftMaxGfxClk 0x30 #define PPSMC_MSG_SetHardMinGfxClk 0x31 #define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x32 #define PPSMC_MSG_SetSoftMaxFclkByFreq 0x33 #define PPSMC_MSG_SetSoftMaxVcn 0x34 #define PPSMC_MSG_PowerGateMmHub 0x35 -#define PPSMC_Message_Count 0x36 +#define PPSMC_MSG_SetRccPfcPmeRestoreRegister 0x36 +#define PPSMC_Message_Count 0x37 typedef uint16_t PPSMC_Result; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75.h b/drivers/gpu/drm/amd/powerplay/inc/smu75.h new file mode 100644 index 000000000000..771523001533 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu75.h @@ -0,0 +1,760 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef SMU75_H +#define SMU75_H + +#pragma pack(push, 1) + +typedef struct { + uint32_t high; + uint32_t low; +} data_64_t; + +typedef struct { + data_64_t high; + data_64_t low; +} data_128_t; + +#define SMU__DGPU_ONLY + +#define SMU__NUM_SCLK_DPM_STATE 8 +#define SMU__NUM_MCLK_DPM_LEVELS 4 +#define SMU__NUM_LCLK_DPM_LEVELS 8 +#define SMU__NUM_PCIE_DPM_LEVELS 8 + +#define SMU7_CONTEXT_ID_SMC 1 +#define SMU7_CONTEXT_ID_VBIOS 2 + +#define SMU75_MAX_LEVELS_VDDC 16 +#define SMU75_MAX_LEVELS_VDDGFX 16 +#define SMU75_MAX_LEVELS_VDDCI 8 +#define SMU75_MAX_LEVELS_MVDD 4 + +#define SMU_MAX_SMIO_LEVELS 4 + +#define SMU75_MAX_LEVELS_GRAPHICS SMU__NUM_SCLK_DPM_STATE +#define SMU75_MAX_LEVELS_MEMORY SMU__NUM_MCLK_DPM_LEVELS +#define SMU75_MAX_LEVELS_GIO SMU__NUM_LCLK_DPM_LEVELS +#define SMU75_MAX_LEVELS_LINK SMU__NUM_PCIE_DPM_LEVELS +#define SMU75_MAX_LEVELS_UVD 8 +#define SMU75_MAX_LEVELS_VCE 8 +#define SMU75_MAX_LEVELS_ACP 8 +#define SMU75_MAX_LEVELS_SAMU 8 +#define SMU75_MAX_ENTRIES_SMIO 32 + +#define DPM_NO_LIMIT 0 +#define DPM_NO_UP 1 +#define DPM_GO_DOWN 2 +#define DPM_GO_UP 3 + +#define SMU7_FIRST_DPM_GRAPHICS_LEVEL 0 +#define SMU7_FIRST_DPM_MEMORY_LEVEL 0 + +#define GPIO_CLAMP_MODE_VRHOT 1 +#define GPIO_CLAMP_MODE_THERM 2 +#define GPIO_CLAMP_MODE_DC 4 + +#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0 +#define SCRATCH_B_TARG_PCIE_INDEX_MASK (0x7<<SCRATCH_B_TARG_PCIE_INDEX_SHIFT) +#define SCRATCH_B_CURR_PCIE_INDEX_SHIFT 3 +#define SCRATCH_B_CURR_PCIE_INDEX_MASK (0x7<<SCRATCH_B_CURR_PCIE_INDEX_SHIFT) +#define SCRATCH_B_TARG_UVD_INDEX_SHIFT 6 +#define SCRATCH_B_TARG_UVD_INDEX_MASK (0x7<<SCRATCH_B_TARG_UVD_INDEX_SHIFT) +#define SCRATCH_B_CURR_UVD_INDEX_SHIFT 9 +#define SCRATCH_B_CURR_UVD_INDEX_MASK (0x7<<SCRATCH_B_CURR_UVD_INDEX_SHIFT) +#define SCRATCH_B_TARG_VCE_INDEX_SHIFT 12 +#define SCRATCH_B_TARG_VCE_INDEX_MASK (0x7<<SCRATCH_B_TARG_VCE_INDEX_SHIFT) +#define SCRATCH_B_CURR_VCE_INDEX_SHIFT 15 +#define SCRATCH_B_CURR_VCE_INDEX_MASK (0x7<<SCRATCH_B_CURR_VCE_INDEX_SHIFT) +#define SCRATCH_B_TARG_ACP_INDEX_SHIFT 18 +#define SCRATCH_B_TARG_ACP_INDEX_MASK (0x7<<SCRATCH_B_TARG_ACP_INDEX_SHIFT) +#define SCRATCH_B_CURR_ACP_INDEX_SHIFT 21 +#define SCRATCH_B_CURR_ACP_INDEX_MASK (0x7<<SCRATCH_B_CURR_ACP_INDEX_SHIFT) +#define SCRATCH_B_TARG_SAMU_INDEX_SHIFT 24 +#define SCRATCH_B_TARG_SAMU_INDEX_MASK (0x7<<SCRATCH_B_TARG_SAMU_INDEX_SHIFT) +#define SCRATCH_B_CURR_SAMU_INDEX_SHIFT 27 +#define SCRATCH_B_CURR_SAMU_INDEX_MASK (0x7<<SCRATCH_B_CURR_SAMU_INDEX_SHIFT) + +/* Virtualization Defines */ +#define CG_XDMA_MASK 0x1 +#define CG_XDMA_SHIFT 0 +#define CG_UVD_MASK 0x2 +#define CG_UVD_SHIFT 1 +#define CG_VCE_MASK 0x4 +#define CG_VCE_SHIFT 2 +#define CG_SAMU_MASK 0x8 +#define CG_SAMU_SHIFT 3 +#define CG_GFX_MASK 0x10 +#define CG_GFX_SHIFT 4 +#define CG_SDMA_MASK 0x20 +#define CG_SDMA_SHIFT 5 +#define CG_HDP_MASK 0x40 +#define CG_HDP_SHIFT 6 +#define CG_MC_MASK 0x80 +#define CG_MC_SHIFT 7 +#define CG_DRM_MASK 0x100 +#define CG_DRM_SHIFT 8 +#define CG_ROM_MASK 0x200 +#define CG_ROM_SHIFT 9 +#define CG_BIF_MASK 0x400 +#define CG_BIF_SHIFT 10 + +#if defined SMU__DGPU_ONLY +#define SMU75_DTE_ITERATIONS 5 +#define SMU75_DTE_SOURCES 3 +#define SMU75_DTE_SINKS 1 +#define SMU75_NUM_CPU_TES 0 +#define SMU75_NUM_GPU_TES 1 +#define SMU75_NUM_NON_TES 2 +#define SMU75_DTE_FAN_SCALAR_MIN 0x100 +#define SMU75_DTE_FAN_SCALAR_MAX 0x166 +#define SMU75_DTE_FAN_TEMP_MAX 93 +#define SMU75_DTE_FAN_TEMP_MIN 83 +#endif +#define SMU75_THERMAL_INPUT_LOOP_COUNT 2 +#define SMU75_THERMAL_CLAMP_MODE_COUNT 2 + +#define EXP_M1_1 93 +#define EXP_M2_1 195759 +#define EXP_B_1 111176531 + +#define EXP_M1_2 67 +#define EXP_M2_2 153720 +#define EXP_B_2 94415767 + +#define EXP_M1_3 48 +#define EXP_M2_3 119796 +#define EXP_B_3 79195279 + +#define EXP_M1_4 550 +#define EXP_M2_4 1484190 +#define EXP_B_4 1051432828 + +#define EXP_M1_5 394 +#define EXP_M2_5 1143049 +#define EXP_B_5 864288432 + +struct SMU7_HystController_Data { + uint16_t waterfall_up; + uint16_t waterfall_down; + uint16_t waterfall_limit; + uint16_t release_cnt; + uint16_t release_limit; + uint16_t spare; +}; + +typedef struct SMU7_HystController_Data SMU7_HystController_Data; + +struct SMU75_PIDController { + uint32_t Ki; + int32_t LFWindupUpperLim; + int32_t LFWindupLowerLim; + uint32_t StatePrecision; + uint32_t LfPrecision; + uint32_t LfOffset; + uint32_t MaxState; + uint32_t MaxLfFraction; + uint32_t StateShift; +}; + +typedef struct SMU75_PIDController SMU75_PIDController; + +struct SMU7_LocalDpmScoreboard { + uint32_t PercentageBusy; + + int32_t PIDError; + int32_t PIDIntegral; + int32_t PIDOutput; + + uint32_t SigmaDeltaAccum; + uint32_t SigmaDeltaOutput; + uint32_t SigmaDeltaLevel; + + uint32_t UtilizationSetpoint; + + uint8_t TdpClampMode; + uint8_t TdcClampMode; + uint8_t ThermClampMode; + uint8_t VoltageBusy; + + int8_t CurrLevel; + int8_t TargLevel; + uint8_t LevelChangeInProgress; + uint8_t UpHyst; + + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t DpmEnable; + uint8_t DpmRunning; + + uint8_t DpmForce; + uint8_t DpmForceLevel; + uint8_t DisplayWatermark; + uint8_t McArbIndex; + + uint32_t MinimumPerfSclk; + + uint8_t AcpiReq; + uint8_t AcpiAck; + uint8_t GfxClkSlow; + uint8_t GpioClampMode; + + uint8_t EnableModeSwitchRLCNotification; + uint8_t EnabledLevelsChange; + uint8_t DteClampMode; + uint8_t FpsClampMode; + + uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_GRAPHICS]; + uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_GRAPHICS]; + + void (*TargetStateCalculator)(uint8_t); + void (*SavedTargetStateCalculator)(uint8_t); + + uint16_t AutoDpmInterval; + uint16_t AutoDpmRange; + + uint8_t FpsEnabled; + uint8_t MaxPerfLevel; + uint8_t AllowLowClkInterruptToHost; + uint8_t FpsRunning; + + uint32_t MaxAllowedFrequency; + + uint32_t FilteredSclkFrequency; + uint32_t LastSclkFrequency; + uint32_t FilteredSclkFrequencyCnt; + + uint8_t MinPerfLevel; +#ifdef SMU__FIRMWARE_SCKS_PRESENT__1 + uint8_t ScksClampMode; + uint8_t padding[2]; +#else + uint8_t padding[3]; +#endif + + uint16_t FpsAlpha; + uint16_t DeltaTime; + uint32_t CurrentFps; + uint32_t FilteredFps; + uint32_t FrameCount; + uint32_t FrameCountLast; + uint16_t FpsTargetScalar; + uint16_t FpsWaterfallLimitScalar; + uint16_t FpsAlphaScalar; + uint16_t spare8; + SMU7_HystController_Data HystControllerData; +}; + +typedef struct SMU7_LocalDpmScoreboard SMU7_LocalDpmScoreboard; + +#define SMU7_MAX_VOLTAGE_CLIENTS 12 + +typedef uint8_t (*VoltageChangeHandler_t)(uint16_t, uint8_t); + +#define VDDC_MASK 0x00007FFF +#define VDDC_SHIFT 0 +#define VDDCI_MASK 0x3FFF8000 +#define VDDCI_SHIFT 15 +#define PHASES_MASK 0xC0000000 +#define PHASES_SHIFT 30 + +typedef uint32_t SMU_VoltageLevel; + +struct SMU7_VoltageScoreboard { + SMU_VoltageLevel TargetVoltage; + uint16_t MaxVid; + uint8_t HighestVidOffset; + uint8_t CurrentVidOffset; + + uint16_t CurrentVddc; + uint16_t CurrentVddci; + + uint8_t ControllerBusy; + uint8_t CurrentVid; + uint8_t CurrentVddciVid; + uint8_t padding; + + SMU_VoltageLevel RequestedVoltage[SMU7_MAX_VOLTAGE_CLIENTS]; + SMU_VoltageLevel TargetVoltageState; + uint8_t EnabledRequest[SMU7_MAX_VOLTAGE_CLIENTS]; + + uint8_t padding2; + uint8_t padding3; + uint8_t ControllerEnable; + uint8_t ControllerRunning; + uint16_t CurrentStdVoltageHiSidd; + uint16_t CurrentStdVoltageLoSidd; + uint8_t OverrideVoltage; + uint8_t padding4; + uint8_t padding5; + uint8_t CurrentPhases; + + VoltageChangeHandler_t ChangeVddc; + VoltageChangeHandler_t ChangeVddci; + VoltageChangeHandler_t ChangePhase; + VoltageChangeHandler_t ChangeMvdd; + + VoltageChangeHandler_t functionLinks[6]; + + uint16_t * VddcFollower1; + int16_t Driver_OD_RequestedVidOffset1; + int16_t Driver_OD_RequestedVidOffset2; +}; + +typedef struct SMU7_VoltageScoreboard SMU7_VoltageScoreboard; + +#define SMU7_MAX_PCIE_LINK_SPEEDS 3 + +struct SMU7_PCIeLinkSpeedScoreboard { + uint8_t DpmEnable; + uint8_t DpmRunning; + uint8_t DpmForce; + uint8_t DpmForceLevel; + + uint8_t CurrentLinkSpeed; + uint8_t EnabledLevelsChange; + uint16_t AutoDpmInterval; + + uint16_t AutoDpmRange; + uint16_t AutoDpmCount; + + uint8_t DpmMode; + uint8_t AcpiReq; + uint8_t AcpiAck; + uint8_t CurrentLinkLevel; +}; + +typedef struct SMU7_PCIeLinkSpeedScoreboard SMU7_PCIeLinkSpeedScoreboard; + +#define SMU7_LKGE_LUT_NUM_OF_TEMP_ENTRIES 16 +#define SMU7_LKGE_LUT_NUM_OF_VOLT_ENTRIES 16 + +#define SMU7_SCALE_I 7 +#define SMU7_SCALE_R 12 + +struct SMU7_PowerScoreboard { + uint32_t GpuPower; + + uint32_t VddcPower; + uint32_t VddcVoltage; + uint32_t VddcCurrent; + + uint32_t VddciPower; + uint32_t VddciVoltage; + uint32_t VddciCurrent; + + uint32_t RocPower; + + uint16_t Telemetry_1_slope; + uint16_t Telemetry_2_slope; + int32_t Telemetry_1_offset; + int32_t Telemetry_2_offset; + + uint8_t MCLK_patch_flag; + uint8_t reserved[3]; +}; + +typedef struct SMU7_PowerScoreboard SMU7_PowerScoreboard; + +#define SMU7_SCLK_DPM_CONFIG_MASK 0x01 +#define SMU7_VOLTAGE_CONTROLLER_CONFIG_MASK 0x02 +#define SMU7_THERMAL_CONTROLLER_CONFIG_MASK 0x04 +#define SMU7_MCLK_DPM_CONFIG_MASK 0x08 +#define SMU7_UVD_DPM_CONFIG_MASK 0x10 +#define SMU7_VCE_DPM_CONFIG_MASK 0x20 +#define SMU7_ACP_DPM_CONFIG_MASK 0x40 +#define SMU7_SAMU_DPM_CONFIG_MASK 0x80 +#define SMU7_PCIEGEN_DPM_CONFIG_MASK 0x100 + +#define SMU7_ACP_MCLK_HANDSHAKE_DISABLE 0x00000001 +#define SMU7_ACP_SCLK_HANDSHAKE_DISABLE 0x00000002 +#define SMU7_UVD_MCLK_HANDSHAKE_DISABLE 0x00000100 +#define SMU7_UVD_SCLK_HANDSHAKE_DISABLE 0x00000200 +#define SMU7_VCE_MCLK_HANDSHAKE_DISABLE 0x00010000 +#define SMU7_VCE_SCLK_HANDSHAKE_DISABLE 0x00020000 + +struct SMU75_SoftRegisters { + uint32_t RefClockFrequency; + uint32_t PmTimerPeriod; + uint32_t FeatureEnables; +#if defined (SMU__DGPU_ONLY) + uint32_t PreVBlankGap; + uint32_t VBlankTimeout; + uint32_t TrainTimeGap; + uint32_t MvddSwitchTime; + uint32_t LongestAcpiTrainTime; + uint32_t AcpiDelay; + uint32_t G5TrainTime; + uint32_t DelayMpllPwron; + uint32_t VoltageChangeTimeout; +#endif + uint32_t HandshakeDisables; + + uint8_t DisplayPhy1Config; + uint8_t DisplayPhy2Config; + uint8_t DisplayPhy3Config; + uint8_t DisplayPhy4Config; + + uint8_t DisplayPhy5Config; + uint8_t DisplayPhy6Config; + uint8_t DisplayPhy7Config; + uint8_t DisplayPhy8Config; + + uint32_t AverageGraphicsActivity; + uint32_t AverageMemoryActivity; + uint32_t AverageGioActivity; + + uint8_t SClkDpmEnabledLevels; + uint8_t MClkDpmEnabledLevels; + uint8_t LClkDpmEnabledLevels; + uint8_t PCIeDpmEnabledLevels; + + uint8_t UVDDpmEnabledLevels; + uint8_t SAMUDpmEnabledLevels; + uint8_t ACPDpmEnabledLevels; + uint8_t VCEDpmEnabledLevels; + + uint32_t DRAM_LOG_ADDR_H; + uint32_t DRAM_LOG_ADDR_L; + uint32_t DRAM_LOG_PHY_ADDR_H; + uint32_t DRAM_LOG_PHY_ADDR_L; + uint32_t DRAM_LOG_BUFF_SIZE; + uint32_t UlvEnterCount; + uint32_t UlvTime; + uint32_t UcodeLoadStatus; + uint32_t AllowMvddSwitch; + uint8_t Activity_Weight; + uint8_t Reserved8[3]; +}; + +typedef struct SMU75_SoftRegisters SMU75_SoftRegisters; + +struct SMU75_Firmware_Header { + uint32_t Digest[5]; + uint32_t Version; + uint32_t HeaderSize; + uint32_t Flags; + uint32_t EntryPoint; + uint32_t CodeSize; + uint32_t ImageSize; + + uint32_t Rtos; + uint32_t SoftRegisters; + uint32_t DpmTable; + uint32_t FanTable; + uint32_t CacConfigTable; + uint32_t CacStatusTable; + uint32_t mcRegisterTable; + uint32_t mcArbDramTimingTable; + uint32_t PmFuseTable; + uint32_t Globals; + uint32_t ClockStretcherTable; + uint32_t VftTable; + uint32_t Reserved1; + uint32_t AvfsCksOff_AvfsGbvTable; + uint32_t AvfsCksOff_BtcGbvTable; + uint32_t MM_AvfsTable; + uint32_t PowerSharingTable; + uint32_t AvfsTable; + uint32_t AvfsCksOffGbvTable; + uint32_t AvfsMeanNSigma; + uint32_t AvfsSclkOffsetTable; + uint32_t Reserved[12]; + uint32_t Signature; +}; + +typedef struct SMU75_Firmware_Header SMU75_Firmware_Header; + +#define SMU7_FIRMWARE_HEADER_LOCATION 0x20000 + +enum DisplayConfig { + PowerDown = 1, + DP54x4, + DP54x2, + DP54x1, + DP27x4, + DP27x2, + DP27x1, + HDMI297, + HDMI162, + LVDS, + DP324x4, + DP324x2, + DP324x1 +}; + +#define MC_BLOCK_COUNT 1 +#define CPL_BLOCK_COUNT 5 +#define SE_BLOCK_COUNT 15 +#define GC_BLOCK_COUNT 24 + +struct SMU7_Local_Cac { + uint8_t BlockId; + uint8_t SignalId; + uint8_t Threshold; + uint8_t Padding; +}; + +typedef struct SMU7_Local_Cac SMU7_Local_Cac; + +struct SMU7_Local_Cac_Table { + SMU7_Local_Cac CplLocalCac[CPL_BLOCK_COUNT]; + SMU7_Local_Cac McLocalCac[MC_BLOCK_COUNT]; + SMU7_Local_Cac SeLocalCac[SE_BLOCK_COUNT]; + SMU7_Local_Cac GcLocalCac[GC_BLOCK_COUNT]; +}; + +typedef struct SMU7_Local_Cac_Table SMU7_Local_Cac_Table; + +#pragma pack(pop) + +#define CG_SYS_BITMASK_FIRST_BIT 0 +#define CG_SYS_BITMASK_LAST_BIT 10 +#define CG_SYS_BIF_MGLS_SHIFT 0 +#define CG_SYS_ROM_SHIFT 1 +#define CG_SYS_MC_MGCG_SHIFT 2 +#define CG_SYS_MC_MGLS_SHIFT 3 +#define CG_SYS_SDMA_MGCG_SHIFT 4 +#define CG_SYS_SDMA_MGLS_SHIFT 5 +#define CG_SYS_DRM_MGCG_SHIFT 6 +#define CG_SYS_HDP_MGCG_SHIFT 7 +#define CG_SYS_HDP_MGLS_SHIFT 8 +#define CG_SYS_DRM_MGLS_SHIFT 9 +#define CG_SYS_BIF_MGCG_SHIFT 10 + +#define CG_SYS_BIF_MGLS_MASK 0x1 +#define CG_SYS_ROM_MASK 0x2 +#define CG_SYS_MC_MGCG_MASK 0x4 +#define CG_SYS_MC_MGLS_MASK 0x8 +#define CG_SYS_SDMA_MGCG_MASK 0x10 +#define CG_SYS_SDMA_MGLS_MASK 0x20 +#define CG_SYS_DRM_MGCG_MASK 0x40 +#define CG_SYS_HDP_MGCG_MASK 0x80 +#define CG_SYS_HDP_MGLS_MASK 0x100 +#define CG_SYS_DRM_MGLS_MASK 0x200 +#define CG_SYS_BIF_MGCG_MASK 0x400 + +#define CG_GFX_BITMASK_FIRST_BIT 16 +#define CG_GFX_BITMASK_LAST_BIT 24 + +#define CG_GFX_CGCG_SHIFT 16 +#define CG_GFX_CGLS_SHIFT 17 +#define CG_CPF_MGCG_SHIFT 18 +#define CG_RLC_MGCG_SHIFT 19 +#define CG_GFX_OTHERS_MGCG_SHIFT 20 +#define CG_GFX_3DCG_SHIFT 21 +#define CG_GFX_3DLS_SHIFT 22 +#define CG_GFX_RLC_LS_SHIFT 23 +#define CG_GFX_CP_LS_SHIFT 24 + +#define CG_GFX_CGCG_MASK 0x00010000 +#define CG_GFX_CGLS_MASK 0x00020000 +#define CG_CPF_MGCG_MASK 0x00040000 +#define CG_RLC_MGCG_MASK 0x00080000 +#define CG_GFX_OTHERS_MGCG_MASK 0x00100000 +#define CG_GFX_3DCG_MASK 0x00200000 +#define CG_GFX_3DLS_MASK 0x00400000 +#define CG_GFX_RLC_LS_MASK 0x00800000 +#define CG_GFX_CP_LS_MASK 0x01000000 + + +#define VRCONF_VDDC_MASK 0x000000FF +#define VRCONF_VDDC_SHIFT 0 +#define VRCONF_VDDGFX_MASK 0x0000FF00 +#define VRCONF_VDDGFX_SHIFT 8 +#define VRCONF_VDDCI_MASK 0x00FF0000 +#define VRCONF_VDDCI_SHIFT 16 +#define VRCONF_MVDD_MASK 0xFF000000 +#define VRCONF_MVDD_SHIFT 24 + +#define VR_MERGED_WITH_VDDC 0 +#define VR_SVI2_PLANE_1 1 +#define VR_SVI2_PLANE_2 2 +#define VR_SMIO_PATTERN_1 3 +#define VR_SMIO_PATTERN_2 4 +#define VR_STATIC_VOLTAGE 5 + +#define CLOCK_STRETCHER_MAX_ENTRIES 0x4 +#define CKS_LOOKUPTable_MAX_ENTRIES 0x4 + +#define CLOCK_STRETCHER_SETTING_DDT_MASK 0x01 +#define CLOCK_STRETCHER_SETTING_DDT_SHIFT 0x0 +#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_MASK 0x1E +#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_SHIFT 0x1 +#define CLOCK_STRETCHER_SETTING_ENABLE_MASK 0x80 +#define CLOCK_STRETCHER_SETTING_ENABLE_SHIFT 0x7 + +struct SMU_ClockStretcherDataTableEntry { + uint8_t minVID; + uint8_t maxVID; + + uint16_t setting; +}; +typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry; + +struct SMU_ClockStretcherDataTable { + SMU_ClockStretcherDataTableEntry ClockStretcherDataTableEntry[CLOCK_STRETCHER_MAX_ENTRIES]; +}; +typedef struct SMU_ClockStretcherDataTable SMU_ClockStretcherDataTable; + +struct SMU_CKS_LOOKUPTableEntry { + uint16_t minFreq; + uint16_t maxFreq; + + uint8_t setting; + uint8_t padding[3]; +}; +typedef struct SMU_CKS_LOOKUPTableEntry SMU_CKS_LOOKUPTableEntry; + +struct SMU_CKS_LOOKUPTable { + SMU_CKS_LOOKUPTableEntry CKS_LOOKUPTableEntry[CKS_LOOKUPTable_MAX_ENTRIES]; +}; +typedef struct SMU_CKS_LOOKUPTable SMU_CKS_LOOKUPTable; + +struct AgmAvfsData_t { + uint16_t avgPsmCount[28]; + uint16_t minPsmCount[28]; +}; +typedef struct AgmAvfsData_t AgmAvfsData_t; + +enum VFT_COLUMNS { + SCLK0, + SCLK1, + SCLK2, + SCLK3, + SCLK4, + SCLK5, + SCLK6, + SCLK7, + + NUM_VFT_COLUMNS +}; +enum { + SCS_FUSE_T0, + SCS_FUSE_T1, + NUM_SCS_FUSE_TEMPERATURE +}; +enum { + SCKS_ON, + SCKS_OFF, + NUM_SCKS_STATE_TYPES +}; + +#define VFT_TABLE_DEFINED + +#define TEMP_RANGE_MAXSTEPS 12 +struct VFT_CELL_t { + uint16_t Voltage; +}; + +typedef struct VFT_CELL_t VFT_CELL_t; +#ifdef SMU__FIRMWARE_SCKS_PRESENT__1 +struct SCS_CELL_t { + uint16_t PsmCnt[NUM_SCKS_STATE_TYPES]; +}; +typedef struct SCS_CELL_t SCS_CELL_t; +#endif + +struct VFT_TABLE_t { + VFT_CELL_t Cell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS]; + uint16_t AvfsGbv [NUM_VFT_COLUMNS]; + uint16_t BtcGbv [NUM_VFT_COLUMNS]; + int16_t Temperature [TEMP_RANGE_MAXSTEPS]; + +#ifdef SMU__FIRMWARE_SCKS_PRESENT__1 + SCS_CELL_t ScksCell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS]; +#endif + + uint8_t NumTemperatureSteps; + uint8_t padding[3]; +}; +typedef struct VFT_TABLE_t VFT_TABLE_t; + +#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2 +#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2 + +struct GB_VDROOP_TABLE_t { + int32_t a0; + int32_t a1; + int32_t a2; + uint32_t spare; +}; +typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t; + +struct SMU_QuadraticCoeffs { + int32_t m1; + int32_t b; + + int16_t m2; + uint8_t m1_shift; + uint8_t m2_shift; +}; +typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs; + +struct AVFS_Margin_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_Margin_t AVFS_Margin_t; + +struct AVFS_CksOff_Gbv_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t; + +struct AVFS_CksOff_AvfsGbv_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_CksOff_AvfsGbv_t AVFS_CksOff_AvfsGbv_t; + +struct AVFS_CksOff_BtcGbv_t { + VFT_CELL_t Cell[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_CksOff_BtcGbv_t AVFS_CksOff_BtcGbv_t; + +struct AVFS_meanNsigma_t { + uint32_t Aconstant[3]; + uint16_t DC_tol_sigma; + uint16_t Platform_mean; + uint16_t Platform_sigma; + uint16_t PSM_Age_CompFactor; + uint8_t Static_Voltage_Offset[NUM_VFT_COLUMNS]; +}; +typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t; + +struct AVFS_Sclk_Offset_t { + uint16_t Sclk_Offset[8]; +}; +typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t; + +struct Power_Sharing_t { + uint32_t EnergyCounter; + uint32_t EngeryThreshold; + uint64_t AM_SCLK_CNT; + uint64_t AM_0_BUSY_CNT; +}; +typedef struct Power_Sharing_t Power_Sharing_t; + + +#endif + + diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h new file mode 100644 index 000000000000..b64e58a22ddf --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h @@ -0,0 +1,886 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU75_DISCRETE_H +#define SMU75_DISCRETE_H + +#include "smu75.h" + +#pragma pack(push, 1) + +#define NUM_SCLK_RANGE 8 + +#define VCO_3_6 1 +#define VCO_2_4 3 + +#define POSTDIV_DIV_BY_1 0 +#define POSTDIV_DIV_BY_2 1 +#define POSTDIV_DIV_BY_4 2 +#define POSTDIV_DIV_BY_8 3 +#define POSTDIV_DIV_BY_16 4 + +struct sclkFcwRange_t { + uint8_t vco_setting; /* 1: 3-6GHz, 3: 2-4GHz */ + uint8_t postdiv; /* divide by 2^n */ + uint16_t fcw_pcc; + uint16_t fcw_trans_upper; + uint16_t fcw_trans_lower; +}; +typedef struct sclkFcwRange_t sclkFcwRange_t; + +struct SMIO_Pattern { + uint16_t Voltage; + uint8_t Smio; + uint8_t padding; +}; + +typedef struct SMIO_Pattern SMIO_Pattern; + +struct SMIO_Table { + SMIO_Pattern Pattern[SMU_MAX_SMIO_LEVELS]; +}; + +typedef struct SMIO_Table SMIO_Table; + +struct SMU_SclkSetting { + uint32_t SclkFrequency; + uint16_t Fcw_int; + uint16_t Fcw_frac; + uint16_t Pcc_fcw_int; + uint8_t PllRange; + uint8_t SSc_En; + uint16_t Sclk_slew_rate; + uint16_t Pcc_up_slew_rate; + uint16_t Pcc_down_slew_rate; + uint16_t Fcw1_int; + uint16_t Fcw1_frac; + uint16_t Sclk_ss_slew_rate; +}; +typedef struct SMU_SclkSetting SMU_SclkSetting; + +struct SMU75_Discrete_GraphicsLevel { + SMU_VoltageLevel MinVoltage; + + uint8_t pcieDpmLevel; + uint8_t DeepSleepDivId; + uint16_t ActivityLevel; + + uint32_t CgSpllFuncCntl3; + uint32_t CgSpllFuncCntl4; + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + + uint8_t SclkDid; + uint8_t padding; + uint8_t EnabledForActivity; + uint8_t EnabledForThrottle; + uint8_t UpHyst; + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t PowerThrottle; + + SMU_SclkSetting SclkSetting; + + uint8_t ScksStretchThreshVid[NUM_SCKS_STATE_TYPES]; + uint16_t Padding; +}; + +typedef struct SMU75_Discrete_GraphicsLevel SMU75_Discrete_GraphicsLevel; + +struct SMU75_Discrete_ACPILevel { + uint32_t Flags; + SMU_VoltageLevel MinVoltage; + uint32_t SclkFrequency; + uint8_t SclkDid; + uint8_t DisplayWatermark; + uint8_t DeepSleepDivId; + uint8_t padding; + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + + SMU_SclkSetting SclkSetting; +}; + +typedef struct SMU75_Discrete_ACPILevel SMU75_Discrete_ACPILevel; + +struct SMU75_Discrete_Ulv { + uint32_t CcPwrDynRm; + uint32_t CcPwrDynRm1; + uint16_t VddcOffset; + uint8_t VddcOffsetVid; + uint8_t VddcPhase; + uint16_t BifSclkDfs; + uint16_t Reserved; +}; + +typedef struct SMU75_Discrete_Ulv SMU75_Discrete_Ulv; + +struct SMU75_Discrete_MemoryLevel { + SMU_VoltageLevel MinVoltage; + uint32_t MinMvdd; + + uint32_t MclkFrequency; + + uint8_t StutterEnable; + uint8_t EnabledForThrottle; + uint8_t EnabledForActivity; + uint8_t padding_0; + + uint8_t UpHyst; + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t padding_1; + + uint16_t ActivityLevel; + uint8_t DisplayWatermark; + uint8_t padding_2; + + uint16_t Fcw_int; + uint16_t Fcw_frac; + uint8_t Postdiv; + uint8_t padding_3[3]; +}; + +typedef struct SMU75_Discrete_MemoryLevel SMU75_Discrete_MemoryLevel; + +struct SMU75_Discrete_LinkLevel { + uint8_t PcieGenSpeed; + uint8_t PcieLaneCount; + uint8_t EnabledForActivity; + uint8_t SPC; + uint32_t DownThreshold; + uint32_t UpThreshold; + uint16_t BifSclkDfs; + uint16_t Reserved; +}; + +typedef struct SMU75_Discrete_LinkLevel SMU75_Discrete_LinkLevel; + + +/* MC ARB DRAM Timing registers. */ +struct SMU75_Discrete_MCArbDramTimingTableEntry { + uint32_t McArbDramTiming; + uint32_t McArbDramTiming2; + uint32_t McArbBurstTime; + uint32_t McArbRfshRate; + uint32_t McArbMisc3; +}; + +typedef struct SMU75_Discrete_MCArbDramTimingTableEntry SMU75_Discrete_MCArbDramTimingTableEntry; + +struct SMU75_Discrete_MCArbDramTimingTable { + SMU75_Discrete_MCArbDramTimingTableEntry entries[SMU__NUM_SCLK_DPM_STATE][SMU__NUM_MCLK_DPM_LEVELS]; +}; + +typedef struct SMU75_Discrete_MCArbDramTimingTable SMU75_Discrete_MCArbDramTimingTable; + +/* UVD VCLK/DCLK state (level) definition. */ +struct SMU75_Discrete_UvdLevel { + uint32_t VclkFrequency; + uint32_t DclkFrequency; + SMU_VoltageLevel MinVoltage; + uint8_t VclkDivider; + uint8_t DclkDivider; + uint8_t padding[2]; +}; + +typedef struct SMU75_Discrete_UvdLevel SMU75_Discrete_UvdLevel; + +/* Clocks for other external blocks (VCE, ACP, SAMU). */ +struct SMU75_Discrete_ExtClkLevel { + uint32_t Frequency; + SMU_VoltageLevel MinVoltage; + uint8_t Divider; + uint8_t padding[3]; +}; + +typedef struct SMU75_Discrete_ExtClkLevel SMU75_Discrete_ExtClkLevel; + +struct SMU75_Discrete_StateInfo { + uint32_t SclkFrequency; + uint32_t MclkFrequency; + uint32_t VclkFrequency; + uint32_t DclkFrequency; + uint32_t SamclkFrequency; + uint32_t AclkFrequency; + uint32_t EclkFrequency; + uint16_t MvddVoltage; + uint16_t padding16; + uint8_t DisplayWatermark; + uint8_t McArbIndex; + uint8_t McRegIndex; + uint8_t SeqIndex; + uint8_t SclkDid; + int8_t SclkIndex; + int8_t MclkIndex; + uint8_t PCIeGen; +}; + +typedef struct SMU75_Discrete_StateInfo SMU75_Discrete_StateInfo; + +struct SMU75_Discrete_DpmTable { + SMU75_PIDController GraphicsPIDController; + SMU75_PIDController MemoryPIDController; + SMU75_PIDController LinkPIDController; + + uint32_t SystemFlags; + + uint32_t VRConfig; + uint32_t SmioMask1; + uint32_t SmioMask2; + SMIO_Table SmioTable1; + SMIO_Table SmioTable2; + + uint32_t MvddLevelCount; + + uint8_t BapmVddcVidHiSidd [SMU75_MAX_LEVELS_VDDC]; + uint8_t BapmVddcVidLoSidd [SMU75_MAX_LEVELS_VDDC]; + uint8_t BapmVddcVidHiSidd2 [SMU75_MAX_LEVELS_VDDC]; + + uint8_t GraphicsDpmLevelCount; + uint8_t MemoryDpmLevelCount; + uint8_t LinkLevelCount; + uint8_t MasterDeepSleepControl; + + uint8_t UvdLevelCount; + uint8_t VceLevelCount; + uint8_t AcpLevelCount; + uint8_t SamuLevelCount; + + uint8_t ThermOutGpio; + uint8_t ThermOutPolarity; + uint8_t ThermOutMode; + uint8_t BootPhases; + + uint8_t VRHotLevel; + uint8_t LdoRefSel; + + uint8_t Reserved1[2]; + + uint16_t FanStartTemperature; + uint16_t FanStopTemperature; + + uint16_t MaxVoltage; + uint16_t Reserved2; + uint32_t Reserved; + + SMU75_Discrete_GraphicsLevel GraphicsLevel [SMU75_MAX_LEVELS_GRAPHICS]; + SMU75_Discrete_MemoryLevel MemoryACPILevel; + SMU75_Discrete_MemoryLevel MemoryLevel [SMU75_MAX_LEVELS_MEMORY]; + SMU75_Discrete_LinkLevel LinkLevel [SMU75_MAX_LEVELS_LINK]; + SMU75_Discrete_ACPILevel ACPILevel; + SMU75_Discrete_UvdLevel UvdLevel [SMU75_MAX_LEVELS_UVD]; + SMU75_Discrete_ExtClkLevel VceLevel [SMU75_MAX_LEVELS_VCE]; + SMU75_Discrete_ExtClkLevel AcpLevel [SMU75_MAX_LEVELS_ACP]; + SMU75_Discrete_ExtClkLevel SamuLevel [SMU75_MAX_LEVELS_SAMU]; + SMU75_Discrete_Ulv Ulv; + + uint8_t DisplayWatermark [SMU75_MAX_LEVELS_MEMORY][SMU75_MAX_LEVELS_GRAPHICS]; + + uint32_t SclkStepSize; + uint32_t Smio [SMU75_MAX_ENTRIES_SMIO]; + + uint8_t UvdBootLevel; + uint8_t VceBootLevel; + uint8_t AcpBootLevel; + uint8_t SamuBootLevel; + + uint8_t GraphicsBootLevel; + uint8_t GraphicsVoltageChangeEnable; + uint8_t GraphicsThermThrottleEnable; + uint8_t GraphicsInterval; + + uint8_t VoltageInterval; + uint8_t ThermalInterval; + uint16_t TemperatureLimitHigh; + + uint16_t TemperatureLimitLow; + uint8_t MemoryBootLevel; + uint8_t MemoryVoltageChangeEnable; + + uint16_t BootMVdd; + uint8_t MemoryInterval; + uint8_t MemoryThermThrottleEnable; + + uint16_t VoltageResponseTime; + uint16_t PhaseResponseTime; + + uint8_t PCIeBootLinkLevel; + uint8_t PCIeGenInterval; + uint8_t DTEInterval; + uint8_t DTEMode; + + uint8_t SVI2Enable; + uint8_t VRHotGpio; + uint8_t AcDcGpio; + uint8_t ThermGpio; + + uint16_t PPM_PkgPwrLimit; + uint16_t PPM_TemperatureLimit; + + uint16_t DefaultTdp; + uint16_t TargetTdp; + + uint16_t FpsHighThreshold; + uint16_t FpsLowThreshold; + + uint16_t BAPMTI_R [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS]; + uint16_t BAPMTI_RC [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS]; + + uint16_t TemperatureLimitEdge; + uint16_t TemperatureLimitHotspot; + + uint16_t BootVddc; + uint16_t BootVddci; + + uint16_t FanGainEdge; + uint16_t FanGainHotspot; + + uint32_t LowSclkInterruptThreshold; + uint32_t VddGfxReChkWait; + + uint8_t ClockStretcherAmount; + uint8_t Sclk_CKS_masterEn0_7; + uint8_t Sclk_CKS_masterEn8_15; + uint8_t DPMFreezeAndForced; + + uint8_t Sclk_voltageOffset[8]; + + SMU_ClockStretcherDataTable ClockStretcherDataTable; + SMU_CKS_LOOKUPTable CKS_LOOKUPTable; + + uint32_t CurrSclkPllRange; + sclkFcwRange_t SclkFcwRangeTable[NUM_SCLK_RANGE]; + + GB_VDROOP_TABLE_t BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES]; + SMU_QuadraticCoeffs AVFSGB_FUSE_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES]; +}; + +typedef struct SMU75_Discrete_DpmTable SMU75_Discrete_DpmTable; + +struct SMU75_Discrete_FanTable { + uint16_t FdoMode; + int16_t TempMin; + int16_t TempMed; + int16_t TempMax; + int16_t Slope1; + int16_t Slope2; + int16_t FdoMin; + int16_t HystUp; + int16_t HystDown; + int16_t HystSlope; + int16_t TempRespLim; + int16_t TempCurr; + int16_t SlopeCurr; + int16_t PwmCurr; + uint32_t RefreshPeriod; + int16_t FdoMax; + uint8_t TempSrc; + int8_t Padding; +}; + +typedef struct SMU75_Discrete_FanTable SMU75_Discrete_FanTable; + +#define SMU7_DISCRETE_GPIO_SCLK_DEBUG 4 +#define SMU7_DISCRETE_GPIO_SCLK_DEBUG_BIT (0x1 << SMU7_DISCRETE_GPIO_SCLK_DEBUG) + + + +struct SMU7_MclkDpmScoreboard { + uint32_t PercentageBusy; + + int32_t PIDError; + int32_t PIDIntegral; + int32_t PIDOutput; + + uint32_t SigmaDeltaAccum; + uint32_t SigmaDeltaOutput; + uint32_t SigmaDeltaLevel; + + uint32_t UtilizationSetpoint; + + uint8_t TdpClampMode; + uint8_t TdcClampMode; + uint8_t ThermClampMode; + uint8_t VoltageBusy; + + int8_t CurrLevel; + int8_t TargLevel; + uint8_t LevelChangeInProgress; + uint8_t UpHyst; + + uint8_t DownHyst; + uint8_t VoltageDownHyst; + uint8_t DpmEnable; + uint8_t DpmRunning; + + uint8_t DpmForce; + uint8_t DpmForceLevel; + uint8_t padding2; + uint8_t McArbIndex; + + uint32_t MinimumPerfMclk; + + uint8_t AcpiReq; + uint8_t AcpiAck; + uint8_t MclkSwitchInProgress; + uint8_t MclkSwitchCritical; + + uint8_t IgnoreVBlank; + uint8_t TargetMclkIndex; + uint8_t TargetMvddIndex; + uint8_t MclkSwitchResult; + + uint16_t VbiFailureCount; + uint8_t VbiWaitCounter; + uint8_t EnabledLevelsChange; + + uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_MEMORY]; + uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_MEMORY]; + + void (*TargetStateCalculator)(uint8_t); + void (*SavedTargetStateCalculator)(uint8_t); + + uint16_t AutoDpmInterval; + uint16_t AutoDpmRange; + + uint16_t VbiTimeoutCount; + uint16_t MclkSwitchingTime; + + uint8_t fastSwitch; + uint8_t Save_PIC_VDDGFX_EXIT; + uint8_t Save_PIC_VDDGFX_ENTER; + uint8_t VbiTimeout; + + uint32_t HbmTempRegBackup; +}; + +typedef struct SMU7_MclkDpmScoreboard SMU7_MclkDpmScoreboard; + +struct SMU7_UlvScoreboard { + uint8_t EnterUlv; + uint8_t ExitUlv; + uint8_t UlvActive; + uint8_t WaitingForUlv; + uint8_t UlvEnable; + uint8_t UlvRunning; + uint8_t UlvMasterEnable; + uint8_t padding; + uint32_t UlvAbortedCount; + uint32_t UlvTimeStamp; +}; + +typedef struct SMU7_UlvScoreboard SMU7_UlvScoreboard; + +struct VddgfxSavedRegisters { + uint32_t GPU_DBG[3]; + uint32_t MEC_BaseAddress_Hi; + uint32_t MEC_BaseAddress_Lo; + uint32_t THM_TMON0_CTRL2__RDIR_PRESENT; + uint32_t THM_TMON1_CTRL2__RDIR_PRESENT; + uint32_t CP_INT_CNTL; +}; + +typedef struct VddgfxSavedRegisters VddgfxSavedRegisters; + +struct SMU7_VddGfxScoreboard { + uint8_t VddGfxEnable; + uint8_t VddGfxActive; + uint8_t VPUResetOccured; + uint8_t padding; + + uint32_t VddGfxEnteredCount; + uint32_t VddGfxAbortedCount; + + uint32_t VddGfxVid; + + VddgfxSavedRegisters SavedRegisters; +}; + +typedef struct SMU7_VddGfxScoreboard SMU7_VddGfxScoreboard; + +struct SMU7_TdcLimitScoreboard { + uint8_t Enable; + uint8_t Running; + uint16_t Alpha; + uint32_t FilteredIddc; + uint32_t IddcLimit; + uint32_t IddcHyst; + SMU7_HystController_Data HystControllerData; +}; + +typedef struct SMU7_TdcLimitScoreboard SMU7_TdcLimitScoreboard; + +struct SMU7_PkgPwrLimitScoreboard { + uint8_t Enable; + uint8_t Running; + uint16_t Alpha; + uint32_t FilteredPkgPwr; + uint32_t Limit; + uint32_t Hyst; + uint32_t LimitFromDriver; + uint8_t PowerSharingEnabled; + uint8_t PowerSharingCounter; + uint8_t PowerSharingINTEnabled; + uint8_t GFXActivityCounterEnabled; + uint32_t EnergyCount; + uint32_t PSACTCount; + uint8_t RollOverRequired; + uint8_t RollOverCount; + uint8_t padding[2]; + SMU7_HystController_Data HystControllerData; +}; + +typedef struct SMU7_PkgPwrLimitScoreboard SMU7_PkgPwrLimitScoreboard; + +struct SMU7_BapmScoreboard { + uint32_t source_powers[SMU75_DTE_SOURCES]; + uint32_t source_powers_last[SMU75_DTE_SOURCES]; + int32_t entity_temperatures[SMU75_NUM_GPU_TES]; + int32_t initial_entity_temperatures[SMU75_NUM_GPU_TES]; + int32_t Limit; + int32_t Hyst; + int32_t therm_influence_coeff_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS * 2]; + int32_t therm_node_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS]; + uint16_t ConfigTDPPowerScalar; + uint16_t FanSpeedPowerScalar; + uint16_t OverDrivePowerScalar; + uint16_t OverDriveLimitScalar; + uint16_t FinalPowerScalar; + uint8_t VariantID; + uint8_t spare997; + + SMU7_HystController_Data HystControllerData; + + int32_t temperature_gradient_slope; + int32_t temperature_gradient; + uint32_t measured_temperature; +}; + + +typedef struct SMU7_BapmScoreboard SMU7_BapmScoreboard; + +struct SMU7_AcpiScoreboard { + uint32_t SavedInterruptMask[2]; + uint8_t LastACPIRequest; + uint8_t CgBifResp; + uint8_t RequestType; + uint8_t Padding; + SMU75_Discrete_ACPILevel D0Level; +}; + +typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard; + +struct SMU75_Discrete_PmFuses { + uint8_t BapmVddCVidHiSidd[8]; + + uint8_t BapmVddCVidLoSidd[8]; + + uint8_t VddCVid[8]; + + uint8_t SviLoadLineEn; + uint8_t SviLoadLineVddC; + uint8_t SviLoadLineTrimVddC; + uint8_t SviLoadLineOffsetVddC; + + uint16_t TDC_VDDC_PkgLimit; + uint8_t TDC_VDDC_ThrottleReleaseLimitPerc; + uint8_t TDC_MAWt; + + uint8_t TdcWaterfallCtl; + uint8_t LPMLTemperatureMin; + uint8_t LPMLTemperatureMax; + uint8_t Reserved; + + uint8_t LPMLTemperatureScaler[16]; + + int16_t FuzzyFan_ErrorSetDelta; + int16_t FuzzyFan_ErrorRateSetDelta; + int16_t FuzzyFan_PwmSetDelta; + uint16_t Reserved6; + + uint8_t GnbLPML[16]; + + uint8_t GnbLPMLMaxVid; + uint8_t GnbLPMLMinVid; + uint8_t Reserved1[2]; + + uint16_t BapmVddCBaseLeakageHiSidd; + uint16_t BapmVddCBaseLeakageLoSidd; + + uint16_t VFT_Temp[3]; + uint8_t Version; + uint8_t padding; + + SMU_QuadraticCoeffs VFT_ATE[3]; + + SMU_QuadraticCoeffs AVFS_GB; + SMU_QuadraticCoeffs ATE_ACBTC_GB; + + SMU_QuadraticCoeffs P2V; + + uint32_t PsmCharzFreq; + + uint16_t InversionVoltage; + uint16_t PsmCharzTemp; + + uint32_t EnabledAvfsModules; + + SMU_QuadraticCoeffs BtcGbv_CksOff; +}; + +typedef struct SMU75_Discrete_PmFuses SMU75_Discrete_PmFuses; + +struct SMU7_Discrete_Log_Header_Table { + uint32_t version; + uint32_t asic_id; + uint16_t flags; + uint16_t entry_size; + uint32_t total_size; + uint32_t num_of_entries; + uint8_t type; + uint8_t mode; + uint8_t filler_0[2]; + uint32_t filler_1[2]; +}; + +typedef struct SMU7_Discrete_Log_Header_Table SMU7_Discrete_Log_Header_Table; + +struct SMU7_Discrete_Log_Cntl { + uint8_t Enabled; + uint8_t Type; + uint8_t padding[2]; + uint32_t BufferSize; + uint32_t SamplesLogged; + uint32_t SampleSize; + uint32_t AddrL; + uint32_t AddrH; +}; + +typedef struct SMU7_Discrete_Log_Cntl SMU7_Discrete_Log_Cntl; + +#if defined SMU__DGPU_ONLY +#define CAC_ACC_NW_NUM_OF_SIGNALS 87 +#endif + + +struct SMU7_Discrete_Cac_Collection_Table { + uint32_t temperature; + uint32_t cac_acc_nw[CAC_ACC_NW_NUM_OF_SIGNALS]; +}; + +typedef struct SMU7_Discrete_Cac_Collection_Table SMU7_Discrete_Cac_Collection_Table; + +struct SMU7_Discrete_Cac_Verification_Table { + uint32_t VddcTotalPower; + uint32_t VddcLeakagePower; + uint32_t VddcConstantPower; + uint32_t VddcGfxDynamicPower; + uint32_t VddcUvdDynamicPower; + uint32_t VddcVceDynamicPower; + uint32_t VddcAcpDynamicPower; + uint32_t VddcPcieDynamicPower; + uint32_t VddcDceDynamicPower; + uint32_t VddcCurrent; + uint32_t VddcVoltage; + uint32_t VddciTotalPower; + uint32_t VddciLeakagePower; + uint32_t VddciConstantPower; + uint32_t VddciDynamicPower; + uint32_t Vddr1TotalPower; + uint32_t Vddr1LeakagePower; + uint32_t Vddr1ConstantPower; + uint32_t Vddr1DynamicPower; + uint32_t spare[4]; + uint32_t temperature; +}; + +typedef struct SMU7_Discrete_Cac_Verification_Table SMU7_Discrete_Cac_Verification_Table; + +struct SMU7_Discrete_Pm_Status_Table { + int32_t T_meas_max[SMU75_THERMAL_INPUT_LOOP_COUNT]; + int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT]; + + uint32_t I_calc_max; + uint32_t I_calc_acc; + uint32_t P_meas_acc; + uint32_t V_meas_load_acc; + uint32_t I_meas_acc; + uint32_t P_meas_acc_vddci; + uint32_t V_meas_load_acc_vddci; + uint32_t I_meas_acc_vddci; + + uint16_t Sclk_dpm_residency[8]; + uint16_t Uvd_dpm_residency[8]; + uint16_t Vce_dpm_residency[8]; + uint16_t Mclk_dpm_residency[4]; + + uint32_t P_roc_acc; + uint32_t PkgPwr_max; + uint32_t PkgPwr_acc; + uint32_t MclkSwitchingTime_max; + uint32_t MclkSwitchingTime_acc; + uint32_t FanPwm_acc; + uint32_t FanRpm_acc; + uint32_t Gfx_busy_acc; + uint32_t Mc_busy_acc; + uint32_t Fps_acc; + + uint32_t AccCnt; +}; + +typedef struct SMU7_Discrete_Pm_Status_Table SMU7_Discrete_Pm_Status_Table; + +struct SMU7_Discrete_AutoWattMan_Status_Table { + int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT]; + uint16_t Sclk_dpm_residency[8]; + uint16_t Mclk_dpm_residency[4]; + uint32_t TgpPwr_acc; + uint32_t Gfx_busy_acc; + uint32_t Mc_busy_acc; + uint32_t AccCnt; +}; + +typedef struct SMU7_Discrete_AutoWattMan_Status_Table SMU7_Discrete_AutoWattMan_Status_Table; + +#define SMU7_MAX_GFX_CU_COUNT 24 +#define SMU7_MIN_GFX_CU_COUNT 8 +#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT 0 +#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_MASK (0xFFFF << SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT) +#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT 16 +#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_MASK (0xFFFF << SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT) + +struct SMU7_GfxCuPgScoreboard { + uint8_t Enabled; + uint8_t WaterfallUp; + uint8_t WaterfallDown; + uint8_t WaterfallLimit; + uint8_t CurrMaxCu; + uint8_t TargMaxCu; + uint8_t ClampMode; + uint8_t Active; + uint8_t MaxSupportedCu; + uint8_t MinSupportedCu; + uint8_t PendingGfxCuHostInterrupt; + uint8_t LastFilteredMaxCuInteger; + uint16_t FilteredMaxCu; + uint16_t FilteredMaxCuAlpha; + uint16_t FilterResetCount; + uint16_t FilterResetCountLimit; + uint8_t ForceCu; + uint8_t ForceCuCount; + uint8_t AcModeMaxCu; + uint8_t DcModeMaxCu; +}; + +typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard; + +#define SMU7_SCLK_CAC 0x561 +#define SMU7_MCLK_CAC 0xF9 +#define SMU7_VCLK_CAC 0x2DE +#define SMU7_DCLK_CAC 0x2DE +#define SMU7_ECLK_CAC 0x25E +#define SMU7_ACLK_CAC 0x25E +#define SMU7_SAMCLK_CAC 0x25E +#define SMU7_DISPCLK_CAC 0x100 +#define SMU7_CAC_CONSTANT 0x2EE3430 +#define SMU7_CAC_CONSTANT_SHIFT 18 + +#define SMU7_VDDCI_MCLK_CONST 1765 +#define SMU7_VDDCI_MCLK_CONST_SHIFT 16 +#define SMU7_VDDCI_VDDCI_CONST 50958 +#define SMU7_VDDCI_VDDCI_CONST_SHIFT 14 +#define SMU7_VDDCI_CONST 11781 +#define SMU7_VDDCI_STROBE_PWR 1331 + +#define SMU7_VDDR1_CONST 693 +#define SMU7_VDDR1_CAC_WEIGHT 20 +#define SMU7_VDDR1_CAC_WEIGHT_SHIFT 19 +#define SMU7_VDDR1_STROBE_PWR 512 + +#define SMU7_AREA_COEFF_UVD 0xA78 +#define SMU7_AREA_COEFF_VCE 0x190A +#define SMU7_AREA_COEFF_ACP 0x22D1 +#define SMU7_AREA_COEFF_SAMU 0x534 + +#define SMU7_THERM_OUT_MODE_DISABLE 0x0 +#define SMU7_THERM_OUT_MODE_THERM_ONLY 0x1 +#define SMU7_THERM_OUT_MODE_THERM_VRHOT 0x2 + +#define SQ_Enable_MASK 0x1 +#define SQ_IR_MASK 0x2 +#define SQ_PCC_MASK 0x4 +#define SQ_EDC_MASK 0x8 + +#define TCP_Enable_MASK 0x100 +#define TCP_IR_MASK 0x200 +#define TCP_PCC_MASK 0x400 +#define TCP_EDC_MASK 0x800 + +#define TD_Enable_MASK 0x10000 +#define TD_IR_MASK 0x20000 +#define TD_PCC_MASK 0x40000 +#define TD_EDC_MASK 0x80000 + +#define DB_Enable_MASK 0x1000000 +#define DB_IR_MASK 0x2000000 +#define DB_PCC_MASK 0x4000000 +#define DB_EDC_MASK 0x8000000 + +#define SQ_Enable_SHIFT 0 +#define SQ_IR_SHIFT 1 +#define SQ_PCC_SHIFT 2 +#define SQ_EDC_SHIFT 3 + +#define TCP_Enable_SHIFT 8 +#define TCP_IR_SHIFT 9 +#define TCP_PCC_SHIFT 10 +#define TCP_EDC_SHIFT 11 + +#define TD_Enable_SHIFT 16 +#define TD_IR_SHIFT 17 +#define TD_PCC_SHIFT 18 +#define TD_EDC_SHIFT 19 + +#define DB_Enable_SHIFT 24 +#define DB_IR_SHIFT 25 +#define DB_PCC_SHIFT 26 +#define DB_EDC_SHIFT 27 + +#define PMFUSES_AVFSSIZE 104 + +#define BTCGB0_Vdroop_Enable_MASK 0x1 +#define BTCGB1_Vdroop_Enable_MASK 0x2 +#define AVFSGB0_Vdroop_Enable_MASK 0x4 +#define AVFSGB1_Vdroop_Enable_MASK 0x8 + +#define BTCGB0_Vdroop_Enable_SHIFT 0 +#define BTCGB1_Vdroop_Enable_SHIFT 1 +#define AVFSGB0_Vdroop_Enable_SHIFT 2 +#define AVFSGB1_Vdroop_Enable_SHIFT 3 + +#pragma pack(pop) + + +#endif + diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h index c3ed737ab951..715b5a168831 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h @@ -131,6 +131,7 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_RunAcgInOpenLoop 0x5E #define PPSMC_MSG_InitializeAcg 0x5F #define PPSMC_MSG_GetCurrPkgPwr 0x61 +#define PPSMC_MSG_GetAverageGfxclkActualFrequency 0x63 #define PPSMC_MSG_SetPccThrottleLevel 0x67 #define PPSMC_MSG_UpdatePkgPwrPidAlpha 0x68 #define PPSMC_Message_Count 0x69 diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile index 958755075421..0a200406a1ec 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile @@ -26,7 +26,7 @@ SMU_MGR = smumgr.o smu8_smumgr.o tonga_smumgr.o fiji_smumgr.o \ polaris10_smumgr.o iceland_smumgr.o \ smu7_smumgr.o vega10_smumgr.o smu10_smumgr.o ci_smumgr.o \ - vega12_smumgr.o + vega12_smumgr.o vegam_smumgr.o AMD_PP_SMUMGR = $(addprefix $(AMD_PP_PATH)/smumgr/,$(SMU_MGR)) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 08d000140eca..2d4ec8ac3a08 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -61,9 +61,6 @@ #define SMC_RAM_END 0x40000 -#define VOLTAGE_SCALE 4 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define CISLAND_MINIMUM_ENGINE_CLOCK 800 #define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5 @@ -211,9 +208,7 @@ static int ci_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { int ret; - if (!ci_is_smc_ram_running(hwmgr)) - return -EINVAL; - + cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0); cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg); PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); @@ -1182,7 +1177,6 @@ static int ci_populate_single_memory_level( struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); int result = 0; bool dll_state_on; - struct cgs_display_info info = {0}; uint32_t mclk_edc_wr_enable_threshold = 40000; uint32_t mclk_edc_enable_threshold = 40000; uint32_t mclk_strobe_mode_threshold = 40000; @@ -1236,8 +1230,7 @@ static int ci_populate_single_memory_level( /* default set to low watermark. Highest level will be set to high later.*/ memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; /* stutter mode not support on ci */ @@ -2784,7 +2777,6 @@ static int ci_smu_fini(struct pp_hwmgr *hwmgr) { kfree(hwmgr->smu_backend); hwmgr->smu_backend = NULL; - cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index faef78321446..53df9405f43a 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -53,10 +53,7 @@ #define FIJI_SMC_SIZE 0x20000 -#define VOLTAGE_SCALE 4 #define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define VDDC_VDDCI_DELTA 300 #define MC_CG_ARB_FREQ_F1 0x0b @@ -288,8 +285,7 @@ static int fiji_start_smu(struct pp_hwmgr *hwmgr) struct fiji_smumgr *priv = (struct fiji_smumgr *)(hwmgr->smu_backend); /* Only start SMC if SMC RAM is not running */ - if (!(smu7_is_smc_ram_running(hwmgr) - || cgs_is_virtualization_enabled(hwmgr->device))) { + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { /* Check if SMU is running in protected mode */ if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, @@ -307,13 +303,13 @@ static int fiji_start_smu(struct pp_hwmgr *hwmgr) } /* To initialize all clock gating before RLC loaded and running.*/ - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_GMC, AMD_CG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_SDMA, AMD_CG_STATE_GATE); - cgs_set_clockgating_state(hwmgr->device, + amdgpu_device_ip_set_clockgating_state(hwmgr->adev, AMD_IP_BLOCK_TYPE_COMMON, AMD_CG_STATE_GATE); /* Setup SoftRegsStart here for register lookup in case @@ -335,10 +331,10 @@ static bool fiji_is_hw_avfs_present(struct pp_hwmgr *hwmgr) uint32_t efuse = 0; uint32_t mask = (1 << ((AVFS_EN_MSB - AVFS_EN_LSB) + 1)) - 1; - if (cgs_is_virtualization_enabled(hwmgr->device)) - return 0; + if (!hwmgr->not_vf) + return false; - if (!atomctrl_read_efuse(hwmgr->device, AVFS_EN_LSB, AVFS_EN_MSB, + if (!atomctrl_read_efuse(hwmgr, AVFS_EN_LSB, AVFS_EN_MSB, mask, &efuse)) { if (efuse) return true; @@ -989,11 +985,11 @@ static int fiji_populate_single_graphic_level(struct pp_hwmgr *hwmgr, threshold = clock * data->fast_watermark_threshold / 100; - data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; + data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, - hwmgr->display_config.min_core_set_clock_in_sr); + hwmgr->display_config->min_core_set_clock_in_sr); /* Default to slow, highest DPM level will be diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index d4bb934e7334..415f691c3fa9 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -60,10 +60,7 @@ #define ICELAND_SMC_SIZE 0x20000 -#define VOLTAGE_SCALE 4 #define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define MC_CG_ARB_FREQ_F1 0x0b #define VDDC_VDDCI_DELTA 200 @@ -932,7 +929,7 @@ static int iceland_populate_single_graphic_level(struct pp_hwmgr *hwmgr, graphic_level->PowerThrottle = 0; data->display_timing.min_clock_in_sr = - hwmgr->display_config.min_core_set_clock_in_sr; + hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) @@ -1236,7 +1233,6 @@ static int iceland_populate_single_memory_level( struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); int result = 0; bool dll_state_on; - struct cgs_display_info info = {0}; uint32_t mclk_edc_wr_enable_threshold = 40000; uint32_t mclk_edc_enable_threshold = 40000; uint32_t mclk_strobe_mode_threshold = 40000; @@ -1283,8 +1279,7 @@ static int iceland_populate_single_memory_level( /* default set to low watermark. Highest level will be set to high later.*/ memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; /* stutter mode not support on iceland */ diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 997a777dd35b..a8c6524f07e4 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -52,8 +52,6 @@ #include "dce/dce_10_0_sh_mask.h" #define POLARIS10_SMC_SIZE 0x20000 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define POWERTUNE_DEFAULT_SET_MAX 1 #define VDDC_VDDCI_DELTA 200 #define MC_CG_ARB_FREQ_F1 0x0b @@ -295,25 +293,16 @@ static int polaris10_start_smu(struct pp_hwmgr *hwmgr) struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); /* Only start SMC if SMC RAM is not running */ - if (!(smu7_is_smc_ram_running(hwmgr) - || cgs_is_virtualization_enabled(hwmgr->device))) { + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { smu_data->protected_mode = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE)); smu_data->smu7_data.security_hard_key = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL)); /* Check if SMU is running in protected mode */ - if (smu_data->protected_mode == 0) { + if (smu_data->protected_mode == 0) result = polaris10_start_smu_in_non_protection_mode(hwmgr); - } else { + else result = polaris10_start_smu_in_protection_mode(hwmgr); - /* If failed, try with different security Key. */ - if (result != 0) { - smu_data->smu7_data.security_hard_key ^= 1; - cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU); - result = polaris10_start_smu_in_protection_mode(hwmgr); - } - } - if (result != 0) PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result); @@ -951,11 +940,11 @@ static int polaris10_populate_single_graphic_level(struct pp_hwmgr *hwmgr, level->DownHyst = data->current_profile_setting.sclk_down_hyst; level->VoltageDownHyst = 0; level->PowerThrottle = 0; - data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr; + data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock, - hwmgr->display_config.min_core_set_clock_in_sr); + hwmgr->display_config->min_core_set_clock_in_sr); /* Default to slow, highest DPM level will be * set to PPSMC_DISPLAY_WATERMARK_LOW later. @@ -1085,11 +1074,9 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_information *table_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); int result = 0; - struct cgs_display_info info = {0, 0, NULL}; uint32_t mclk_stutter_mode_threshold = 40000; phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; - cgs_get_active_displays_info(hwmgr->device, &info); if (hwmgr->od_enabled) vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk; @@ -1115,7 +1102,7 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr, mem_level->StutterEnable = false; mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; if (mclk_stutter_mode_threshold && (clock <= mclk_stutter_mode_threshold) && diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c index bc53f2beda30..0a563f6fe9ea 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c @@ -23,7 +23,7 @@ #include "smumgr.h" #include "smu10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "smu10_smumgr.h" #include "ppatomctrl.h" #include "rv_ppsmc.h" @@ -33,8 +33,6 @@ #include "pp_debug.h" -#define VOLTAGE_SCALE 4 - #define BUFFER_SIZE 80000 #define MAX_STRING_SIZE 15 #define BUFFER_SIZETWO 131072 @@ -49,48 +47,41 @@ static uint32_t smu10_wait_for_response(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t reg; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); + reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); phm_wait_for_register_unequal(hwmgr, reg, 0, MP1_C2PMSG_90__CONTENT_MASK); - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); } static int smu10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, msg); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); return 0; } static int smu10_read_arg_from_smc(struct pp_hwmgr *hwmgr) { - uint32_t reg; - - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); + struct amdgpu_device *adev = hwmgr->adev; - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); } static int smu10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; smu10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); smu10_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -104,17 +95,13 @@ static int smu10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) static int smu10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; smu10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter); smu10_send_msg_to_smc_without_waiting(hwmgr, msg); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c index 0399c10d2be0..d644a9bb9078 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c @@ -167,24 +167,25 @@ int smu7_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { int ret; - if (!smu7_is_smc_ram_running(hwmgr)) - return -EINVAL; - - PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP); - if (ret != 1) - pr_info("\n failed to send pre message %x ret is %d \n", msg, ret); + if (ret == 0xFE) + pr_debug("last message was not supported\n"); + else if (ret != 1) + pr_info("\n last message was failed ret is %d\n", ret); + cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0); cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg); PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP); - if (ret != 1) + if (ret == 0xFE) + pr_debug("message %x was not supported\n", msg); + else if (ret != 1) pr_info("\n failed to send message %x ret is %d \n", msg, ret); return 0; @@ -199,10 +200,6 @@ int smu7_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - if (!smu7_is_smc_ram_running(hwmgr)) { - return -EINVAL; - } - PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0); cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, parameter); @@ -231,16 +228,6 @@ int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr) return 0; } -int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr) -{ - if (!smu7_is_smc_ram_running(hwmgr)) - return -EINVAL; - - PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, SMC_SYSCON_CLOCK_CNTL_0, cken, 0); - return 0; -} - - enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type) { enum cgs_ucode_id result = CGS_UCODE_ID_MAXIMUM; @@ -296,11 +283,9 @@ int smu7_read_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr, uint32_t result = smu7_set_smc_sram_address(hwmgr, smc_addr, limit); - if (result) - return result; + *value = result ? 0 : cgs_read_register(hwmgr->device, mmSMC_IND_DATA_11); - *value = cgs_read_register(hwmgr->device, mmSMC_IND_DATA_11); - return 0; + return result; } int smu7_write_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr, uint32_t value, uint32_t limit) @@ -375,7 +360,7 @@ static int smu7_populate_single_firmware_entry(struct pp_hwmgr *hwmgr, entry->meta_data_addr_low = 0; /* digest need be excluded out */ - if (cgs_is_virtualization_enabled(hwmgr->device)) + if (!hwmgr->not_vf) info.image_size -= 20; entry->data_size_byte = info.image_size; entry->num_register_entries = 0; @@ -409,7 +394,7 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr) 0x0); if (hwmgr->chip_id > CHIP_TOPAZ) { /* add support for Topaz */ - if (!cgs_is_virtualization_enabled(hwmgr->device)) { + if (hwmgr->not_vf) { smu7_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SMU_DRAM_ADDR_HI, upper_32_bits(smu_data->smu_buffer.mc_addr)); @@ -467,7 +452,7 @@ int smu7_request_smu_load_fw(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr, UCODE_ID_SDMA1, &toc->entry[toc->num_entries++]), "Failed to Get Firmware Entry.", return -EINVAL); - if (cgs_is_virtualization_enabled(hwmgr->device)) + if (!hwmgr->not_vf) PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr, UCODE_ID_MEC_STORAGE, &toc->entry[toc->num_entries++]), "Failed to Get Firmware Entry.", return -EINVAL); @@ -608,7 +593,7 @@ int smu7_init(struct pp_hwmgr *hwmgr) smu_data->header = smu_data->header_buffer.kaddr; smu_data->header_buffer.mc_addr = mc_addr; - if (cgs_is_virtualization_enabled(hwmgr->device)) + if (!hwmgr->not_vf) return 0; smu_data->smu_buffer.data_size = 200*4096; @@ -643,13 +628,12 @@ int smu7_smu_fini(struct pp_hwmgr *hwmgr) &smu_data->header_buffer.mc_addr, &smu_data->header_buffer.kaddr); - if (!cgs_is_virtualization_enabled(hwmgr->device)) + if (hwmgr->not_vf) amdgpu_bo_free_kernel(&smu_data->smu_buffer.handle, &smu_data->smu_buffer.mc_addr, &smu_data->smu_buffer.kaddr); kfree(hwmgr->smu_backend); hwmgr->smu_backend = NULL; - cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h index 126d300259ba..39c9bfda0ab4 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h @@ -67,7 +67,6 @@ int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, int smu7_send_msg_to_smc_with_parameter_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter); int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr); -int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr); enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type); int smu7_read_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index c28b60aae5f8..c9837935f0f5 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -41,9 +41,11 @@ MODULE_FIRMWARE("amdgpu/polaris11_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin"); MODULE_FIRMWARE("amdgpu/polaris11_k_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_smc.bin"); +MODULE_FIRMWARE("amdgpu/vegam_smc.bin"); MODULE_FIRMWARE("amdgpu/vega10_smc.bin"); MODULE_FIRMWARE("amdgpu/vega10_acg_smc.bin"); MODULE_FIRMWARE("amdgpu/vega12_smc.bin"); +MODULE_FIRMWARE("amdgpu/vega20_smc.bin"); int smum_thermal_avfs_enable(struct pp_hwmgr *hwmgr) { diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index b51d7468c3e7..782b19fc2e70 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -55,11 +55,7 @@ #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" - -#define VOLTAGE_SCALE 4 #define POWERTUNE_DEFAULT_SET_MAX 1 -#define VOLTAGE_VID_OFFSET_SCALE1 625 -#define VOLTAGE_VID_OFFSET_SCALE2 100 #define MC_CG_ARB_FREQ_F1 0x0b #define VDDC_VDDCI_DELTA 200 @@ -199,8 +195,7 @@ static int tonga_start_smu(struct pp_hwmgr *hwmgr) int result; /* Only start SMC if SMC RAM is not running */ - if (!(smu7_is_smc_ram_running(hwmgr) || - cgs_is_virtualization_enabled(hwmgr->device))) { + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { /*Check if SMU is running in protected mode*/ if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE)) { @@ -651,7 +646,7 @@ static int tonga_populate_single_graphic_level(struct pp_hwmgr *hwmgr, graphic_level->PowerThrottle = 0; data->display_timing.min_clock_in_sr = - hwmgr->display_config.min_core_set_clock_in_sr; + hwmgr->display_config->min_core_set_clock_in_sr; if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) @@ -957,18 +952,17 @@ static int tonga_populate_single_memory_level( SMU72_Discrete_MemoryLevel *memory_level ) { - uint32_t mvdd = 0; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); - int result = 0; - bool dll_state_on; - struct cgs_display_info info = {0}; uint32_t mclk_edc_wr_enable_threshold = 40000; uint32_t mclk_stutter_mode_threshold = 30000; uint32_t mclk_edc_enable_threshold = 40000; uint32_t mclk_strobe_mode_threshold = 40000; phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL; + int result = 0; + bool dll_state_on; + uint32_t mvdd = 0; if (hwmgr->od_enabled) vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk; @@ -1009,8 +1003,7 @@ static int tonga_populate_single_memory_level( /* default set to low watermark. Highest level will be set to high later.*/ memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; - cgs_get_active_displays_info(hwmgr->device, &info); - data->display_timing.num_existing_displays = info.display_count; + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; if ((mclk_stutter_mode_threshold != 0) && (memory_clock <= mclk_stutter_mode_threshold) && diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c index 4aafb043bcb0..e84669c448a3 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c @@ -23,7 +23,7 @@ #include "smumgr.h" #include "vega10_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "vega10_smumgr.h" #include "vega10_hwmgr.h" #include "vega10_ppsmc.h" @@ -35,8 +35,6 @@ #define AVFS_EN_MSB 1568 #define AVFS_EN_LSB 1568 -#define VOLTAGE_SCALE 4 - /* Microcode file is stored in this buffer */ #define BUFFER_SIZE 80000 #define MAX_STRING_SIZE 15 @@ -54,18 +52,13 @@ static bool vega10_is_smc_ram_running(struct pp_hwmgr *hwmgr) { - uint32_t mp1_fw_flags, reg; - - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2); + struct amdgpu_device *adev = hwmgr->adev; + uint32_t mp1_fw_flags; - cgs_write_register(hwmgr->device, reg, + WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2, (MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff))); - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2); - - mp1_fw_flags = cgs_read_register(hwmgr->device, reg); + mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2); if (mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) return true; @@ -81,11 +74,11 @@ static bool vega10_is_smc_ram_running(struct pp_hwmgr *hwmgr) */ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t reg; uint32_t ret; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); + reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); ret = phm_wait_for_register_unequal(hwmgr, reg, 0, MP1_C2PMSG_90__CONTENT_MASK); @@ -93,7 +86,7 @@ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr) if (ret) pr_err("No response from smu\n"); - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); } /* @@ -105,11 +98,9 @@ static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr) static int vega10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, msg); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); return 0; } @@ -122,14 +113,12 @@ static int vega10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, */ static int vega10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; uint32_t ret; vega10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); vega10_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -150,18 +139,14 @@ static int vega10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) static int vega10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; uint32_t ret; vega10_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter); vega10_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -174,12 +159,9 @@ static int vega10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, static int vega10_get_argument(struct pp_hwmgr *hwmgr) { - uint32_t reg; - - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); + struct amdgpu_device *adev = hwmgr->adev; - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); } static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c index 651a3f28734b..7d9b40e8b1bf 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c @@ -23,7 +23,7 @@ #include "smumgr.h" #include "vega12_inc.h" -#include "pp_soc15.h" +#include "soc15_common.h" #include "vega12_smumgr.h" #include "vega12_ppsmc.h" #include "vega12/smu9_driver_if.h" @@ -44,18 +44,13 @@ static bool vega12_is_smc_ram_running(struct pp_hwmgr *hwmgr) { - uint32_t mp1_fw_flags, reg; + struct amdgpu_device *adev = hwmgr->adev; + uint32_t mp1_fw_flags; - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2); - - cgs_write_register(hwmgr->device, reg, + WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2, (MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff))); - reg = soc15_get_register_offset(NBIF_HWID, 0, - mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2); - - mp1_fw_flags = cgs_read_register(hwmgr->device, reg); + mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2); if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) @@ -72,15 +67,15 @@ static bool vega12_is_smc_ram_running(struct pp_hwmgr *hwmgr) */ static uint32_t vega12_wait_for_response(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev = hwmgr->adev; uint32_t reg; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); + reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); phm_wait_for_register_unequal(hwmgr, reg, 0, MP1_C2PMSG_90__CONTENT_MASK); - return cgs_read_register(hwmgr->device, reg); + return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); } /* @@ -92,11 +87,9 @@ static uint32_t vega12_wait_for_response(struct pp_hwmgr *hwmgr) int vega12_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, msg); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); return 0; } @@ -109,13 +102,11 @@ int vega12_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr, */ int vega12_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; vega12_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); vega12_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -135,17 +126,13 @@ int vega12_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) int vega12_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; vega12_wait_for_response(hwmgr); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90); - cgs_write_register(hwmgr->device, reg, 0); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter); vega12_send_msg_to_smc_without_waiting(hwmgr, msg); @@ -166,11 +153,9 @@ int vega12_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, int vega12_send_msg_to_smc_with_parameter_without_waiting( struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter) { - uint32_t reg; + struct amdgpu_device *adev = hwmgr->adev; - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66); - cgs_write_register(hwmgr->device, reg, parameter); + WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, parameter); return vega12_send_msg_to_smc_without_waiting(hwmgr, msg); } @@ -183,12 +168,9 @@ int vega12_send_msg_to_smc_with_parameter_without_waiting( */ int vega12_read_arg_from_smc(struct pp_hwmgr *hwmgr, uint32_t *arg) { - uint32_t reg; - - reg = soc15_get_register_offset(MP1_HWID, 0, - mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82); + struct amdgpu_device *adev = hwmgr->adev; - *arg = cgs_read_register(hwmgr->device, reg); + *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c new file mode 100644 index 000000000000..2de48959ac93 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -0,0 +1,2383 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "pp_debug.h" +#include "smumgr.h" +#include "smu_ucode_xfer_vi.h" +#include "vegam_smumgr.h" +#include "smu/smu_7_1_3_d.h" +#include "smu/smu_7_1_3_sh_mask.h" +#include "gmc/gmc_8_1_d.h" +#include "gmc/gmc_8_1_sh_mask.h" +#include "oss/oss_3_0_d.h" +#include "gca/gfx_8_0_d.h" +#include "bif/bif_5_0_d.h" +#include "bif/bif_5_0_sh_mask.h" +#include "ppatomctrl.h" +#include "cgs_common.h" +#include "smu7_ppsmc.h" + +#include "smu7_dyn_defaults.h" + +#include "smu7_hwmgr.h" +#include "hardwaremanager.h" +#include "ppatomctrl.h" +#include "atombios.h" +#include "pppcielanes.h" + +#include "dce/dce_11_2_d.h" +#include "dce/dce_11_2_sh_mask.h" + +#define PPVEGAM_TARGETACTIVITY_DFLT 50 + +#define VOLTAGE_VID_OFFSET_SCALE1 625 +#define VOLTAGE_VID_OFFSET_SCALE2 100 +#define POWERTUNE_DEFAULT_SET_MAX 1 +#define VDDC_VDDCI_DELTA 200 +#define MC_CG_ARB_FREQ_F1 0x0b + +#define STRAP_ASIC_RO_LSB 2168 +#define STRAP_ASIC_RO_MSB 2175 + +#define PPSMC_MSG_ApplyAvfsCksOffVoltage ((uint16_t) 0x415) +#define PPSMC_MSG_EnableModeSwitchRLCNotification ((uint16_t) 0x305) + +static const struct vegam_pt_defaults +vegam_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = { + /* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt, + * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */ + { 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000, + { 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61}, + { 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } }, +}; + +static const sclkFcwRange_t Range_Table[NUM_SCLK_RANGE] = { + {VCO_2_4, POSTDIV_DIV_BY_16, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_16, 112, 224, 160}, + {VCO_2_4, POSTDIV_DIV_BY_8, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_8, 112, 224, 160}, + {VCO_2_4, POSTDIV_DIV_BY_4, 75, 160, 112}, + {VCO_3_6, POSTDIV_DIV_BY_4, 112, 216, 160}, + {VCO_2_4, POSTDIV_DIV_BY_2, 75, 160, 108}, + {VCO_3_6, POSTDIV_DIV_BY_2, 112, 216, 160} }; + +static int vegam_smu_init(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data; + + smu_data = kzalloc(sizeof(struct vegam_smumgr), GFP_KERNEL); + if (smu_data == NULL) + return -ENOMEM; + + hwmgr->smu_backend = smu_data; + + if (smu7_init(hwmgr)) { + kfree(smu_data); + return -EINVAL; + } + + return 0; +} + +static int vegam_start_smu_in_protection_mode(struct pp_hwmgr *hwmgr) +{ + int result = 0; + + /* Wait for smc boot up */ + /* PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(smumgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0) */ + + /* Assert reset */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 1); + + result = smu7_upload_smu_firmware_image(hwmgr); + if (result != 0) + return result; + + /* Clear status */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixSMU_STATUS, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0); + + /* De-assert reset */ + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 0); + + + PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, RCU_UC_EVENTS, INTERRUPTS_ENABLED, 1); + + + /* Call Test SMU message with 0x20000 offset to trigger SMU start */ + smu7_send_msg_to_smc_offset(hwmgr); + + /* Wait done bit to be set */ + /* Check pass/failed indicator */ + + PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, SMU_STATUS, SMU_DONE, 0); + + if (1 != PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMU_STATUS, SMU_PASS)) + PP_ASSERT_WITH_CODE(false, "SMU Firmware start failed!", return -1); + + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixFIRMWARE_FLAGS, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 1); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 0); + + /* Wait for firmware to initialize */ + PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1); + + return result; +} + +static int vegam_start_smu_in_non_protection_mode(struct pp_hwmgr *hwmgr) +{ + int result = 0; + + /* wait for smc boot up */ + PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0); + + /* Clear firmware interrupt enable flag */ + /* PHM_WRITE_VFPF_INDIRECT_FIELD(pSmuMgr, SMC_IND, SMC_SYSCON_MISC_CNTL, pre_fetcher_en, 1); */ + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixFIRMWARE_FLAGS, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, + rst_reg, 1); + + result = smu7_upload_smu_firmware_image(hwmgr); + if (result != 0) + return result; + + /* Set smc instruct start point at 0x0 */ + smu7_program_jump_on_start(hwmgr); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0); + + PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, rst_reg, 0); + + /* Wait for firmware to initialize */ + + PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, + FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1); + + return result; +} + +static int vegam_start_smu(struct pp_hwmgr *hwmgr) +{ + int result = 0; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + /* Only start SMC if SMC RAM is not running */ + if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) { + smu_data->protected_mode = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE)); + smu_data->smu7_data.security_hard_key = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD( + hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL)); + + /* Check if SMU is running in protected mode */ + if (smu_data->protected_mode == 0) + result = vegam_start_smu_in_non_protection_mode(hwmgr); + else + result = vegam_start_smu_in_protection_mode(hwmgr); + + if (result != 0) + PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result); + } + + /* Setup SoftRegsStart here for register lookup in case DummyBackEnd is used and ProcessFirmwareHeader is not executed */ + smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU75_Firmware_Header, SoftRegisters), + &(smu_data->smu7_data.soft_regs_start), + 0x40000); + + result = smu7_request_smu_load_fw(hwmgr); + + return result; +} + +static int vegam_process_firmware_header(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t tmp; + int result; + bool error = false; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, DpmTable), + &tmp, SMC_RAM_END); + + if (0 == result) + smu_data->smu7_data.dpm_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, SoftRegisters), + &tmp, SMC_RAM_END); + + if (!result) { + data->soft_regs_start = tmp; + smu_data->smu7_data.soft_regs_start = tmp; + } + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, mcRegisterTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.mc_reg_table_start = tmp; + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, FanTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.fan_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, mcArbDramTimingTable), + &tmp, SMC_RAM_END); + + if (!result) + smu_data->smu7_data.arb_table_start = tmp; + + error |= (0 != result); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, Version), + &tmp, SMC_RAM_END); + + if (!result) + hwmgr->microcode_version_info.SMC = tmp; + + error |= (0 != result); + + return error ? -1 : 0; +} + +static bool vegam_is_dpm_running(struct pp_hwmgr *hwmgr) +{ + return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, + CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON)) + ? true : false; +} + +static uint32_t vegam_get_mac_definition(uint32_t value) +{ + switch (value) { + case SMU_MAX_LEVELS_GRAPHICS: + return SMU75_MAX_LEVELS_GRAPHICS; + case SMU_MAX_LEVELS_MEMORY: + return SMU75_MAX_LEVELS_MEMORY; + case SMU_MAX_LEVELS_LINK: + return SMU75_MAX_LEVELS_LINK; + case SMU_MAX_ENTRIES_SMIO: + return SMU75_MAX_ENTRIES_SMIO; + case SMU_MAX_LEVELS_VDDC: + return SMU75_MAX_LEVELS_VDDC; + case SMU_MAX_LEVELS_VDDGFX: + return SMU75_MAX_LEVELS_VDDGFX; + case SMU_MAX_LEVELS_VDDCI: + return SMU75_MAX_LEVELS_VDDCI; + case SMU_MAX_LEVELS_MVDD: + return SMU75_MAX_LEVELS_MVDD; + case SMU_UVD_MCLK_HANDSHAKE_DISABLE: + return SMU7_UVD_MCLK_HANDSHAKE_DISABLE | + SMU7_VCE_MCLK_HANDSHAKE_DISABLE; + } + + pr_warn("can't get the mac of %x\n", value); + return 0; +} + +static int vegam_update_uvd_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + smu_data->smc_state_table.UvdBootLevel = 0; + if (table_info->mm_dep_table->count > 0) + smu_data->smc_state_table.UvdBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU75_Discrete_DpmTable, + UvdBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0x00FFFFFF; + mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_UVDDPM) || + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_UVDDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel)); + return 0; +} + +static int vegam_update_vce_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smu_data->smc_state_table.VceBootLevel = + (uint8_t) (table_info->mm_dep_table->count - 1); + else + smu_data->smc_state_table.VceBootLevel = 0; + + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, VceBootLevel); + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFF00FFFF; + mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_VCEDPM_SetEnabledMask, + (uint32_t)1 << smu_data->smc_state_table.VceBootLevel); + return 0; +} + +static int vegam_update_samu_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t mm_boot_level_offset, mm_boot_level_value; + + + smu_data->smc_state_table.SamuBootLevel = 0; + mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, SamuBootLevel); + + mm_boot_level_offset /= 4; + mm_boot_level_offset *= 4; + mm_boot_level_value = cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset); + mm_boot_level_value &= 0xFFFFFF00; + mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StablePState)) + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SAMUDPM_SetEnabledMask, + (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); + return 0; +} + + +static int vegam_update_bif_smc_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; + int max_entry, i; + + max_entry = (SMU75_MAX_LEVELS_LINK < pcie_table->count) ? + SMU75_MAX_LEVELS_LINK : + pcie_table->count; + /* Setup BIF_SCLK levels */ + for (i = 0; i < max_entry; i++) + smu_data->bif_sclk_table[i] = pcie_table->entries[i].pcie_sclk; + return 0; +} + +static int vegam_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) +{ + switch (type) { + case SMU_UVD_TABLE: + vegam_update_uvd_smc_table(hwmgr); + break; + case SMU_VCE_TABLE: + vegam_update_vce_smc_table(hwmgr); + break; + case SMU_SAMU_TABLE: + vegam_update_samu_smc_table(hwmgr); + break; + case SMU_BIF_TABLE: + vegam_update_bif_smc_table(hwmgr); + break; + default: + break; + } + return 0; +} + +static void vegam_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + if (table_info && + table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX && + table_info->cac_dtp_table->usPowerTuneDataSetID) + smu_data->power_tune_defaults = + &vegam_power_tune_data_set_array + [table_info->cac_dtp_table->usPowerTuneDataSetID - 1]; + else + smu_data->power_tune_defaults = &vegam_power_tune_data_set_array[0]; + +} + +static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t count, level; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + count = data->mvdd_voltage_table.count; + if (count > SMU_MAX_SMIO_LEVELS) + count = SMU_MAX_SMIO_LEVELS; + for (level = 0; level < count; level++) { + table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US( + data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE); + /* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/ + table->SmioTable2.Pattern[level].Smio = + (uint8_t) level; + table->Smio[level] |= + data->mvdd_voltage_table.entries[level].smio_low; + } + table->SmioMask2 = data->mvdd_voltage_table.mask_low; + + table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count); + } + + return 0; +} + +static int vegam_populate_smc_vddci_table(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + uint32_t count, level; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + count = data->vddci_voltage_table.count; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + if (count > SMU_MAX_SMIO_LEVELS) + count = SMU_MAX_SMIO_LEVELS; + for (level = 0; level < count; ++level) { + table->SmioTable1.Pattern[level].Voltage = PP_HOST_TO_SMC_US( + data->vddci_voltage_table.entries[level].value * VOLTAGE_SCALE); + table->SmioTable1.Pattern[level].Smio = (uint8_t) level; + + table->Smio[level] |= data->vddci_voltage_table.entries[level].smio_low; + } + } + + table->SmioMask1 = data->vddci_voltage_table.mask_low; + + return 0; +} + +static int vegam_populate_cac_table(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + uint32_t count; + uint8_t index; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_voltage_lookup_table *lookup_table = + table_info->vddc_lookup_table; + /* tables is already swapped, so in order to use the value from it, + * we need to swap it back. + * We are populating vddc CAC data to BapmVddc table + * in split and merged mode + */ + for (count = 0; count < lookup_table->count; count++) { + index = phm_get_voltage_index(lookup_table, + data->vddc_voltage_table.entries[count].value); + table->BapmVddcVidLoSidd[count] = + convert_to_vid(lookup_table->entries[index].us_cac_low); + table->BapmVddcVidHiSidd[count] = + convert_to_vid(lookup_table->entries[index].us_cac_mid); + table->BapmVddcVidHiSidd2[count] = + convert_to_vid(lookup_table->entries[index].us_cac_high); + } + + return 0; +} + +static int vegam_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + vegam_populate_smc_vddci_table(hwmgr, table); + vegam_populate_smc_mvdd_table(hwmgr, table); + vegam_populate_cac_table(hwmgr, table); + + return 0; +} + +static int vegam_populate_ulv_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_Ulv *state) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + + state->CcPwrDynRm = 0; + state->CcPwrDynRm1 = 0; + + state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset; + state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset * + VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1); + + state->VddcPhase = data->vddc_phase_shed_control ^ 0x3; + + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset); + + return 0; +} + +static int vegam_populate_ulv_state(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + return vegam_populate_ulv_level(hwmgr, &table->Ulv); +} + +static int vegam_populate_smc_link_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &data->dpm_table; + int i; + + /* Index (dpm_table->pcie_speed_table.count) + * is reserved for PCIE boot level. */ + for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) { + table->LinkLevel[i].PcieGenSpeed = + (uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value; + table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width( + dpm_table->pcie_speed_table.dpm_levels[i].param1); + table->LinkLevel[i].EnabledForActivity = 1; + table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff); + table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5); + table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30); + } + + smu_data->smc_state_table.LinkLevelCount = + (uint8_t)dpm_table->pcie_speed_table.count; + +/* To Do move to hwmgr */ + data->dpm_level_enable_mask.pcie_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table); + + return 0; +} + +static int vegam_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr, + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table, + uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd) +{ + uint32_t i; + uint16_t vddci; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + *voltage = *mvdd = 0; + + /* clock - voltage dependency table is empty table */ + if (dep_table->count == 0) + return -EINVAL; + + for (i = 0; i < dep_table->count; i++) { + /* find first sclk bigger than request */ + if (dep_table->entries[i].clk >= clock) { + *voltage |= (dep_table->entries[i].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i].vddci) + *voltage |= (dep_table->entries[i].vddci * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else { + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i].vddc - + (uint16_t)VDDC_VDDCI_DELTA)); + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + } + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i].mvdd * + VOLTAGE_SCALE; + + *voltage |= 1 << PHASES_SHIFT; + return 0; + } + } + + /* sclk is bigger than max sclk in the dependence table */ + *voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + vddci = phm_find_closest_vddci(&(data->vddci_voltage_table), + (dep_table->entries[i - 1].vddc - + (uint16_t)VDDC_VDDCI_DELTA)); + + if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control) + *voltage |= (data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE) << VDDCI_SHIFT; + else if (dep_table->entries[i - 1].vddci) + *voltage |= (dep_table->entries[i - 1].vddci * + VOLTAGE_SCALE) << VDDC_SHIFT; + else + *voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) + *mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE; + else if (dep_table->entries[i].mvdd) + *mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE; + + return 0; +} + +static void vegam_get_sclk_range_table(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t i, ref_clk; + + struct pp_atom_ctrl_sclk_range_table range_table_from_vbios = { { {0} } }; + + ref_clk = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); + + if (0 == atomctrl_get_smc_sclk_range_table(hwmgr, &range_table_from_vbios)) { + for (i = 0; i < NUM_SCLK_RANGE; i++) { + table->SclkFcwRangeTable[i].vco_setting = + range_table_from_vbios.entry[i].ucVco_setting; + table->SclkFcwRangeTable[i].postdiv = + range_table_from_vbios.entry[i].ucPostdiv; + table->SclkFcwRangeTable[i].fcw_pcc = + range_table_from_vbios.entry[i].usFcw_pcc; + + table->SclkFcwRangeTable[i].fcw_trans_upper = + range_table_from_vbios.entry[i].usFcw_trans_upper; + table->SclkFcwRangeTable[i].fcw_trans_lower = + range_table_from_vbios.entry[i].usRcw_trans_lower; + + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); + } + return; + } + + for (i = 0; i < NUM_SCLK_RANGE; i++) { + smu_data->range_table[i].trans_lower_frequency = + (ref_clk * Range_Table[i].fcw_trans_lower) >> Range_Table[i].postdiv; + smu_data->range_table[i].trans_upper_frequency = + (ref_clk * Range_Table[i].fcw_trans_upper) >> Range_Table[i].postdiv; + + table->SclkFcwRangeTable[i].vco_setting = Range_Table[i].vco_setting; + table->SclkFcwRangeTable[i].postdiv = Range_Table[i].postdiv; + table->SclkFcwRangeTable[i].fcw_pcc = Range_Table[i].fcw_pcc; + + table->SclkFcwRangeTable[i].fcw_trans_upper = Range_Table[i].fcw_trans_upper; + table->SclkFcwRangeTable[i].fcw_trans_lower = Range_Table[i].fcw_trans_lower; + + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper); + CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower); + } +} + +static int vegam_calculate_sclk_params(struct pp_hwmgr *hwmgr, + uint32_t clock, SMU_SclkSetting *sclk_setting) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + const SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + struct pp_atomctrl_clock_dividers_ai dividers; + uint32_t ref_clock; + uint32_t pcc_target_percent, pcc_target_freq, ss_target_percent, ss_target_freq; + uint8_t i; + int result; + uint64_t temp; + + sclk_setting->SclkFrequency = clock; + /* get the engine clock dividers for this clock value */ + result = atomctrl_get_engine_pll_dividers_ai(hwmgr, clock, ÷rs); + if (result == 0) { + sclk_setting->Fcw_int = dividers.usSclk_fcw_int; + sclk_setting->Fcw_frac = dividers.usSclk_fcw_frac; + sclk_setting->Pcc_fcw_int = dividers.usPcc_fcw_int; + sclk_setting->PllRange = dividers.ucSclkPllRange; + sclk_setting->Sclk_slew_rate = 0x400; + sclk_setting->Pcc_up_slew_rate = dividers.usPcc_fcw_slew_frac; + sclk_setting->Pcc_down_slew_rate = 0xffff; + sclk_setting->SSc_En = dividers.ucSscEnable; + sclk_setting->Fcw1_int = dividers.usSsc_fcw1_int; + sclk_setting->Fcw1_frac = dividers.usSsc_fcw1_frac; + sclk_setting->Sclk_ss_slew_rate = dividers.usSsc_fcw_slew_frac; + return result; + } + + ref_clock = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev); + + for (i = 0; i < NUM_SCLK_RANGE; i++) { + if (clock > smu_data->range_table[i].trans_lower_frequency + && clock <= smu_data->range_table[i].trans_upper_frequency) { + sclk_setting->PllRange = i; + break; + } + } + + sclk_setting->Fcw_int = (uint16_t) + ((clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / + ref_clock); + temp = clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; + temp <<= 0x10; + do_div(temp, ref_clock); + sclk_setting->Fcw_frac = temp & 0xffff; + + pcc_target_percent = 10; /* Hardcode 10% for now. */ + pcc_target_freq = clock - (clock * pcc_target_percent / 100); + sclk_setting->Pcc_fcw_int = (uint16_t) + ((pcc_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / + ref_clock); + + ss_target_percent = 2; /* Hardcode 2% for now. */ + sclk_setting->SSc_En = 0; + if (ss_target_percent) { + sclk_setting->SSc_En = 1; + ss_target_freq = clock - (clock * ss_target_percent / 100); + sclk_setting->Fcw1_int = (uint16_t) + ((ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) / + ref_clock); + temp = ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv; + temp <<= 0x10; + do_div(temp, ref_clock); + sclk_setting->Fcw1_frac = temp & 0xffff; + } + + return 0; +} + +static uint8_t vegam_get_sleep_divider_id_from_clock(uint32_t clock, + uint32_t clock_insr) +{ + uint8_t i; + uint32_t temp; + uint32_t min = max(clock_insr, (uint32_t)SMU7_MINIMUM_ENGINE_CLOCK); + + PP_ASSERT_WITH_CODE((clock >= min), + "Engine clock can't satisfy stutter requirement!", + return 0); + for (i = 31; ; i--) { + temp = clock / (i + 1); + + if (temp >= min || i == 0) + break; + } + return i; +} + +static int vegam_populate_single_graphic_level(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU75_Discrete_GraphicsLevel *level) +{ + int result; + /* PP_Clocks minClocks; */ + uint32_t mvdd; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + SMU_SclkSetting curr_sclk_setting = { 0 }; + + result = vegam_calculate_sclk_params(hwmgr, clock, &curr_sclk_setting); + + /* populate graphics levels */ + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, clock, + &level->MinVoltage, &mvdd); + + PP_ASSERT_WITH_CODE((0 == result), + "can not find VDDC voltage value for " + "VDDC engine clock dependency table", + return result); + level->ActivityLevel = (uint16_t)(SclkDPMTuning_VEGAM >> DPMTuning_Activity_Shift); + + level->CcPwrDynRm = 0; + level->CcPwrDynRm1 = 0; + level->EnabledForActivity = 0; + level->EnabledForThrottle = 1; + level->VoltageDownHyst = 0; + level->PowerThrottle = 0; + data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) + level->DeepSleepDivId = vegam_get_sleep_divider_id_from_clock(clock, + hwmgr->display_config->min_core_set_clock_in_sr); + + level->SclkSetting = curr_sclk_setting; + + CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1); + CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(level->SclkSetting.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_up_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_down_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_int); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_frac); + CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_ss_slew_rate); + return 0; +} + +static int vegam_populate_all_graphic_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table; + uint8_t pcie_entry_cnt = (uint8_t) hw_data->dpm_table.pcie_speed_table.count; + int result = 0; + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU75_Discrete_GraphicsLevel) * + SMU75_MAX_LEVELS_GRAPHICS; + struct SMU75_Discrete_GraphicsLevel *levels = + smu_data->smc_state_table.GraphicsLevel; + uint32_t i, max_entry; + uint8_t hightest_pcie_level_enabled = 0, + lowest_pcie_level_enabled = 0, + mid_pcie_level_enabled = 0, + count = 0; + + vegam_get_sclk_range_table(hwmgr, &(smu_data->smc_state_table)); + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + + result = vegam_populate_single_graphic_level(hwmgr, + dpm_table->sclk_table.dpm_levels[i].value, + &(smu_data->smc_state_table.GraphicsLevel[i])); + if (result) + return result; + + levels[i].UpHyst = (uint8_t) + (SclkDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift); + levels[i].DownHyst = (uint8_t) + (SclkDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift); + /* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */ + if (i > 1) + levels[i].DeepSleepDivId = 0; + } + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SPLLShutdownSupport)) + smu_data->smc_state_table.GraphicsLevel[0].SclkSetting.SSc_En = 0; + + smu_data->smc_state_table.GraphicsDpmLevelCount = + (uint8_t)dpm_table->sclk_table.count; + hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table); + + for (i = 0; i < dpm_table->sclk_table.count; i++) + levels[i].EnabledForActivity = + (hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask >> i) & 0x1; + + if (pcie_table != NULL) { + PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt), + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + max_entry = pcie_entry_cnt - 1; + for (i = 0; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = + (uint8_t) ((i < max_entry) ? i : max_entry); + } else { + while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (hightest_pcie_level_enabled + 1))) != 0)) + hightest_pcie_level_enabled++; + + while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << lowest_pcie_level_enabled)) == 0)) + lowest_pcie_level_enabled++; + + while ((count < hightest_pcie_level_enabled) && + ((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask & + (1 << (lowest_pcie_level_enabled + 1 + count))) == 0)) + count++; + + mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) < + hightest_pcie_level_enabled ? + (lowest_pcie_level_enabled + 1 + count) : + hightest_pcie_level_enabled; + + /* set pcieDpmLevel to hightest_pcie_level_enabled */ + for (i = 2; i < dpm_table->sclk_table.count; i++) + levels[i].pcieDpmLevel = hightest_pcie_level_enabled; + + /* set pcieDpmLevel to lowest_pcie_level_enabled */ + levels[0].pcieDpmLevel = lowest_pcie_level_enabled; + + /* set pcieDpmLevel to mid_pcie_level_enabled */ + levels[1].pcieDpmLevel = mid_pcie_level_enabled; + } + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + +static int vegam_calculate_mclk_params(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level) +{ + struct pp_atomctrl_memory_clock_param_ai mpll_param; + + PP_ASSERT_WITH_CODE(!atomctrl_get_memory_pll_dividers_ai(hwmgr, + clock, &mpll_param), + "Failed to retrieve memory pll parameter.", + return -EINVAL); + + mem_level->MclkFrequency = (uint32_t)mpll_param.ulClock; + mem_level->Fcw_int = (uint16_t)mpll_param.ulMclk_fcw_int; + mem_level->Fcw_frac = (uint16_t)mpll_param.ulMclk_fcw_frac; + mem_level->Postdiv = (uint8_t)mpll_param.ulPostDiv; + + return 0; +} + +static int vegam_populate_single_memory_level(struct pp_hwmgr *hwmgr, + uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + int result = 0; + uint32_t mclk_stutter_mode_threshold = 60000; + + + if (table_info->vdd_dep_on_mclk) { + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, clock, + &mem_level->MinVoltage, &mem_level->MinMvdd); + PP_ASSERT_WITH_CODE(!result, + "can not find MinVddc voltage value from memory " + "VDDC voltage dependency table", return result); + } + + result = vegam_calculate_mclk_params(hwmgr, clock, mem_level); + PP_ASSERT_WITH_CODE(!result, + "Failed to calculate mclk params.", + return -EINVAL); + + mem_level->EnabledForThrottle = 1; + mem_level->EnabledForActivity = 0; + mem_level->VoltageDownHyst = 0; + mem_level->ActivityLevel = (uint16_t) + (MemoryDPMTuning_VEGAM >> DPMTuning_Activity_Shift); + mem_level->StutterEnable = false; + mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; + + data->display_timing.num_existing_displays = hwmgr->display_config->num_display; + + if (mclk_stutter_mode_threshold && + (clock <= mclk_stutter_mode_threshold) && + (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, + STUTTER_ENABLE) & 0x1)) + mem_level->StutterEnable = true; + + if (!result) { + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel); + CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage); + } + + return result; +} + +static int vegam_populate_all_memory_levels(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct smu7_dpm_table *dpm_table = &hw_data->dpm_table; + int result; + /* populate MCLK dpm table to SMU7 */ + uint32_t array = smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, MemoryLevel); + uint32_t array_size = sizeof(SMU75_Discrete_MemoryLevel) * + SMU75_MAX_LEVELS_MEMORY; + struct SMU75_Discrete_MemoryLevel *levels = + smu_data->smc_state_table.MemoryLevel; + uint32_t i; + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value), + "can not populate memory level as memory clock is zero", + return -EINVAL); + result = vegam_populate_single_memory_level(hwmgr, + dpm_table->mclk_table.dpm_levels[i].value, + &levels[i]); + + if (result) + return result; + + levels[i].UpHyst = (uint8_t) + (MemoryDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift); + levels[i].DownHyst = (uint8_t) + (MemoryDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift); + } + + smu_data->smc_state_table.MemoryDpmLevelCount = + (uint8_t)dpm_table->mclk_table.count; + hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask = + phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table); + + for (i = 0; i < dpm_table->mclk_table.count; i++) + levels[i].EnabledForActivity = + (hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask >> i) & 0x1; + + levels[dpm_table->mclk_table.count - 1].DisplayWatermark = + PPSMC_DISPLAY_WATERMARK_HIGH; + + /* level count will send to smc once at init smc table and never change */ + result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels, + (uint32_t)array_size, SMC_RAM_END); + + return result; +} + +static int vegam_populate_mvdd_value(struct pp_hwmgr *hwmgr, + uint32_t mclk, SMIO_Pattern *smio_pat) +{ + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i = 0; + + if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) { + /* find mvdd value which clock is more than request */ + for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) { + if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) { + smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value; + break; + } + } + PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count, + "MVDD Voltage is outside the supported range.", + return -EINVAL); + } else + return -EINVAL; + + return 0; +} + +static int vegam_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + int result = 0; + uint32_t sclk_frequency; + const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + SMIO_Pattern vol_level; + uint32_t mvdd; + uint16_t us_mvdd; + + table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; + + /* Get MinVoltage and Frequency from DPM0, + * already converted to SMC_UL */ + sclk_frequency = data->vbios_boot_state.sclk_bootup_value; + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, + sclk_frequency, + &table->ACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE(!result, + "Cannot find ACPI VDDC voltage value " + "in Clock Dependency Table", + ); + + result = vegam_calculate_sclk_params(hwmgr, sclk_frequency, + &(table->ACPILevel.SclkSetting)); + PP_ASSERT_WITH_CODE(!result, + "Error retrieving Engine Clock dividers from VBIOS.", + return result); + + table->ACPILevel.DeepSleepDivId = 0; + table->ACPILevel.CcPwrDynRm = 0; + table->ACPILevel.CcPwrDynRm1 = 0; + + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm); + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1); + + CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkSetting.SclkFrequency); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_frac); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_fcw_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_up_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_down_slew_rate); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_int); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac); + CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate); + + + /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ + table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value; + result = vegam_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, + table->MemoryACPILevel.MclkFrequency, + &table->MemoryACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDCI voltage value " + "in Clock Dependency Table", + ); + + us_mvdd = 0; + if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) || + (data->mclk_dpm_key_disabled)) + us_mvdd = data->vbios_boot_state.mvdd_bootup_value; + else { + if (!vegam_populate_mvdd_value(hwmgr, + data->dpm_table.mclk_table.dpm_levels[0].value, + &vol_level)) + us_mvdd = vol_level.Voltage; + } + + if (!vegam_populate_mvdd_value(hwmgr, 0, &vol_level)) + table->MemoryACPILevel.MinMvdd = PP_HOST_TO_SMC_UL(vol_level.Voltage); + else + table->MemoryACPILevel.MinMvdd = 0; + + table->MemoryACPILevel.StutterEnable = false; + + table->MemoryACPILevel.EnabledForThrottle = 0; + table->MemoryACPILevel.EnabledForActivity = 0; + table->MemoryACPILevel.UpHyst = 0; + table->MemoryACPILevel.DownHyst = 100; + table->MemoryACPILevel.VoltageDownHyst = 0; + table->MemoryACPILevel.ActivityLevel = + PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity); + + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage); + + return result; +} + +static int vegam_populate_smc_vce_level(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->VceLevelCount = (uint8_t)(mm_table->count); + table->VceBootLevel = 0; + + for (count = 0; count < table->VceLevelCount; count++) { + table->VceLevel[count].Frequency = mm_table->entries[count].eclk; + table->VceLevel[count].MinVoltage = 0; + table->VceLevel[count].MinVoltage |= + (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + + table->VceLevel[count].MinVoltage |= + (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /*retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->VceLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for VCE engine clock", + return result); + + table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage); + } + return result; +} + +static int vegam_populate_smc_samu_level(struct pp_hwmgr *hwmgr, + SMU75_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->SamuBootLevel = 0; + table->SamuLevelCount = (uint8_t)(mm_table->count); + + for (count = 0; count < table->SamuLevelCount; count++) { + /* not sure whether we need evclk or not */ + table->SamuLevel[count].MinVoltage = 0; + table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; + table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * + VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->SamuLevel[count].Frequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for samu clock", return result); + + table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); + } + return result; +} + +static int vegam_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, + int32_t eng_clock, int32_t mem_clock, + SMU75_Discrete_MCArbDramTimingTableEntry *arb_regs) +{ + uint32_t dram_timing; + uint32_t dram_timing2; + uint32_t burst_time; + uint32_t rfsh_rate; + uint32_t misc3; + + int result; + + result = atomctrl_set_engine_dram_timings_rv770(hwmgr, + eng_clock, mem_clock); + PP_ASSERT_WITH_CODE(result == 0, + "Error calling VBIOS to set DRAM_TIMING.", + return result); + + dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING); + dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2); + burst_time = cgs_read_register(hwmgr->device, mmMC_ARB_BURST_TIME); + rfsh_rate = cgs_read_register(hwmgr->device, mmMC_ARB_RFSH_RATE); + misc3 = cgs_read_register(hwmgr->device, mmMC_ARB_MISC3); + + arb_regs->McArbDramTiming = PP_HOST_TO_SMC_UL(dram_timing); + arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2); + arb_regs->McArbBurstTime = PP_HOST_TO_SMC_UL(burst_time); + arb_regs->McArbRfshRate = PP_HOST_TO_SMC_UL(rfsh_rate); + arb_regs->McArbMisc3 = PP_HOST_TO_SMC_UL(misc3); + + return 0; +} + +static int vegam_program_memory_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct SMU75_Discrete_MCArbDramTimingTable arb_regs; + uint32_t i, j; + int result = 0; + + memset(&arb_regs, 0, sizeof(SMU75_Discrete_MCArbDramTimingTable)); + + for (i = 0; i < hw_data->dpm_table.sclk_table.count; i++) { + for (j = 0; j < hw_data->dpm_table.mclk_table.count; j++) { + result = vegam_populate_memory_timing_parameters(hwmgr, + hw_data->dpm_table.sclk_table.dpm_levels[i].value, + hw_data->dpm_table.mclk_table.dpm_levels[j].value, + &arb_regs.entries[i][j]); + if (result) + return result; + } + } + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.arb_table_start, + (uint8_t *)&arb_regs, + sizeof(SMU75_Discrete_MCArbDramTimingTable), + SMC_RAM_END); + return result; +} + +static int vegam_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + int result = -EINVAL; + uint8_t count; + struct pp_atomctrl_clock_dividers_vi dividers; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = + table_info->mm_dep_table; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + uint32_t vddci; + + table->UvdLevelCount = (uint8_t)(mm_table->count); + table->UvdBootLevel = 0; + + for (count = 0; count < table->UvdLevelCount; count++) { + table->UvdLevel[count].MinVoltage = 0; + table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk; + table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; + table->UvdLevel[count].MinVoltage |= + (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; + table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; + + /* retrieve divider value for VBIOS */ + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].VclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Vclk clock", return result); + + table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider; + + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + table->UvdLevel[count].DclkFrequency, ÷rs); + PP_ASSERT_WITH_CODE((0 == result), + "can not find divide id for Dclk clock", return result); + + table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider; + + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); + CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); + } + + return result; +} + +static int vegam_populate_smc_boot_level(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + int result = 0; + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + table->GraphicsBootLevel = 0; + table->MemoryBootLevel = 0; + + /* find boot level from dpm table */ + result = phm_find_boot_level(&(data->dpm_table.sclk_table), + data->vbios_boot_state.sclk_bootup_value, + (uint32_t *)&(table->GraphicsBootLevel)); + + result = phm_find_boot_level(&(data->dpm_table.mclk_table), + data->vbios_boot_state.mclk_bootup_value, + (uint32_t *)&(table->MemoryBootLevel)); + + table->BootVddc = data->vbios_boot_state.vddc_bootup_value * + VOLTAGE_SCALE; + table->BootVddci = data->vbios_boot_state.vddci_bootup_value * + VOLTAGE_SCALE; + table->BootMVdd = data->vbios_boot_state.mvdd_bootup_value * + VOLTAGE_SCALE; + + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc); + CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci); + CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd); + + return 0; +} + +static int vegam_populate_smc_initial_state(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint8_t count, level; + + count = (uint8_t)(table_info->vdd_dep_on_sclk->count); + + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_sclk->entries[level].clk >= + hw_data->vbios_boot_state.sclk_bootup_value) { + smu_data->smc_state_table.GraphicsBootLevel = level; + break; + } + } + + count = (uint8_t)(table_info->vdd_dep_on_mclk->count); + for (level = 0; level < count; level++) { + if (table_info->vdd_dep_on_mclk->entries[level].clk >= + hw_data->vbios_boot_state.mclk_bootup_value) { + smu_data->smc_state_table.MemoryBootLevel = level; + break; + } + } + + return 0; +} + +static uint16_t scale_fan_gain_settings(uint16_t raw_setting) +{ + uint32_t tmp; + tmp = raw_setting * 4096 / 100; + return (uint16_t)tmp; +} + +static int vegam_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table; + struct pp_advance_fan_control_parameters *fan_table = + &hwmgr->thermal_controller.advanceFanControlParameters; + int i, j, k; + const uint16_t *pdef1; + const uint16_t *pdef2; + + table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); + table->TargetTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128)); + + PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255, + "Target Operating Temp is out of Range!", + ); + + table->TemperatureLimitEdge = PP_HOST_TO_SMC_US( + cac_dtp_table->usTargetOperatingTemp * 256); + table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US( + cac_dtp_table->usTemperatureLimitHotspot * 256); + table->FanGainEdge = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainEdge)); + table->FanGainHotspot = PP_HOST_TO_SMC_US( + scale_fan_gain_settings(fan_table->usFanGainHotspot)); + + pdef1 = defaults->BAPMTI_R; + pdef2 = defaults->BAPMTI_RC; + + for (i = 0; i < SMU75_DTE_ITERATIONS; i++) { + for (j = 0; j < SMU75_DTE_SOURCES; j++) { + for (k = 0; k < SMU75_DTE_SINKS; k++) { + table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*pdef1); + table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*pdef2); + pdef1++; + pdef2++; + } + } + } + + return 0; +} + +static int vegam_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) +{ + uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min; + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + + uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0; + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + uint32_t mask = (1 << ((STRAP_ASIC_RO_MSB - STRAP_ASIC_RO_LSB) + 1)) - 1; + + stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount; + + atomctrl_read_efuse(hwmgr, STRAP_ASIC_RO_LSB, STRAP_ASIC_RO_MSB, + mask, &efuse); + + min = 1200; + max = 2500; + + ro = efuse * (max - min) / 255 + min; + + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ + for (i = 0; i < sclk_table->count; i++) { + smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |= + sclk_table->entries[i].cks_enable << i; + volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * + 136418 - (ro - 70) * 1000000) / + (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000)); + volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * + 3232 - (ro - 65) * 1000000) / + (2522480 - sclk_table->entries[i].clk/100 * 115764/100)); + + if (volt_without_cks >= volt_with_cks) + volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 + 624) / 625); + + smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; + } + + smu_data->smc_state_table.LdoRefSel = + (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? + table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 5; + /* Populate CKS Lookup Table */ + if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) + stretch_amount2 = 0; + else if (stretch_amount == 3 || stretch_amount == 4) + stretch_amount2 = 1; + else { + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher); + PP_ASSERT_WITH_CODE(false, + "Stretch Amount in PPTable not supported\n", + return -EINVAL); + } + + value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL); + value &= 0xFFFFFFFE; + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value); + + return 0; +} + +static bool vegam_is_hw_avfs_present(struct pp_hwmgr *hwmgr) +{ + uint32_t efuse; + + efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixSMU_EFUSE_0 + (49 * 4)); + efuse &= 0x00000001; + + if (efuse) + return true; + + return false; +} + +static int vegam_populate_avfs_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + int result = 0; + struct pp_atom_ctrl__avfs_parameters avfs_params = {0}; + AVFS_meanNsigma_t AVFS_meanNsigma = { {0} }; + AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} }; + uint32_t tmp, i; + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = + table_info->vdd_dep_on_sclk; + + if (!hwmgr->avfs_supported) + return 0; + + result = atomctrl_get_avfs_information(hwmgr, &avfs_params); + + if (0 == result) { + table->BTCGB_VDROOP_TABLE[0].a0 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0); + table->BTCGB_VDROOP_TABLE[0].a1 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1); + table->BTCGB_VDROOP_TABLE[0].a2 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2); + table->BTCGB_VDROOP_TABLE[1].a0 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0); + table->BTCGB_VDROOP_TABLE[1].a1 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1); + table->BTCGB_VDROOP_TABLE[1].a2 = + PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2); + table->AVFSGB_FUSE_TABLE[0].m1 = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1); + table->AVFSGB_FUSE_TABLE[0].m2 = + PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2); + table->AVFSGB_FUSE_TABLE[0].b = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b); + table->AVFSGB_FUSE_TABLE[0].m1_shift = 24; + table->AVFSGB_FUSE_TABLE[0].m2_shift = 12; + table->AVFSGB_FUSE_TABLE[1].m1 = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1); + table->AVFSGB_FUSE_TABLE[1].m2 = + PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2); + table->AVFSGB_FUSE_TABLE[1].b = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b); + table->AVFSGB_FUSE_TABLE[1].m1_shift = 24; + table->AVFSGB_FUSE_TABLE[1].m2_shift = 12; + table->MaxVoltage = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv); + AVFS_meanNsigma.Aconstant[0] = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0); + AVFS_meanNsigma.Aconstant[1] = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1); + AVFS_meanNsigma.Aconstant[2] = + PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2); + AVFS_meanNsigma.DC_tol_sigma = + PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma); + AVFS_meanNsigma.Platform_mean = + PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean); + AVFS_meanNsigma.PSM_Age_CompFactor = + PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor); + AVFS_meanNsigma.Platform_sigma = + PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma); + + for (i = 0; i < sclk_table->count; i++) { + AVFS_meanNsigma.Static_Voltage_Offset[i] = + (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625); + AVFS_SclkOffset.Sclk_Offset[i] = + PP_HOST_TO_SMC_US((uint16_t) + (sclk_table->entries[i].sclk_offset) / 100); + } + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, AvfsMeanNSigma), + &tmp, SMC_RAM_END); + smu7_copy_bytes_to_smc(hwmgr, + tmp, + (uint8_t *)&AVFS_meanNsigma, + sizeof(AVFS_meanNsigma_t), + SMC_RAM_END); + + result = smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, AvfsSclkOffsetTable), + &tmp, SMC_RAM_END); + smu7_copy_bytes_to_smc(hwmgr, + tmp, + (uint8_t *)&AVFS_SclkOffset, + sizeof(AVFS_Sclk_Offset_t), + SMC_RAM_END); + + data->avfs_vdroop_override_setting = + (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) | + (avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT); + data->apply_avfs_cks_off_voltage = + (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false; + } + return result; +} + +static int vegam_populate_vr_config(struct pp_hwmgr *hwmgr, + struct SMU75_Discrete_DpmTable *table) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + uint16_t config; + + config = VR_MERGED_WITH_VDDC; + table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT); + + /* Set Vddc Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) { + config = VR_SVI2_PLANE_1; + table->VRConfig |= config; + } else { + PP_ASSERT_WITH_CODE(false, + "VDDC should be on SVI2 control in merged mode!", + ); + } + /* Set Vddci Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) { + config = VR_SVI2_PLANE_2; /* only in merged mode */ + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) { + config = VR_SMIO_PATTERN_1; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_VDDCI_SHIFT); + } + /* Set Mvdd Voltage Controller */ + if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) { + if (config != VR_SVI2_PLANE_2) { + config = VR_SVI2_PLANE_2; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + smu_data->smu7_data.soft_regs_start + + offsetof(SMU75_SoftRegisters, AllowMvddSwitch), + 0x1); + } else { + PP_ASSERT_WITH_CODE(false, + "SVI2 Plane 2 is already taken, set MVDD as Static",); + config = VR_STATIC_VOLTAGE; + table->VRConfig = (config << VRCONF_MVDD_SHIFT); + } + } else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) { + config = VR_SMIO_PATTERN_2; + table->VRConfig = (config << VRCONF_MVDD_SHIFT); + cgs_write_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + smu_data->smu7_data.soft_regs_start + + offsetof(SMU75_SoftRegisters, AllowMvddSwitch), + 0x1); + } else { + config = VR_STATIC_VOLTAGE; + table->VRConfig |= (config << VRCONF_MVDD_SHIFT); + } + + return 0; +} + +static int vegam_populate_svi_load_line(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + + smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn; + smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC; + smu_data->power_tune_table.SviLoadLineTrimVddC = 3; + smu_data->power_tune_table.SviLoadLineOffsetVddC = 0; + + return 0; +} + +static int vegam_populate_tdc_limit(struct pp_hwmgr *hwmgr) +{ + uint16_t tdc_limit; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + + tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128); + smu_data->power_tune_table.TDC_VDDC_PkgLimit = + CONVERT_FROM_HOST_TO_SMC_US(tdc_limit); + smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc = + defaults->TDC_VDDC_ThrottleReleaseLimitPerc; + smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt; + + return 0; +} + +static int vegam_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults; + uint32_t temp; + + if (smu7_read_smc_sram_dword(hwmgr, + fuse_table_offset + + offsetof(SMU75_Discrete_PmFuses, TdcWaterfallCtl), + (uint32_t *)&temp, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!", + return -EINVAL); + else { + smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl; + smu_data->power_tune_table.LPMLTemperatureMin = + (uint8_t)((temp >> 16) & 0xff); + smu_data->power_tune_table.LPMLTemperatureMax = + (uint8_t)((temp >> 8) & 0xff); + smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff); + } + return 0; +} + +static int vegam_populate_temperature_scaler(struct pp_hwmgr *hwmgr) +{ + int i; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0; + + return 0; +} + +static int vegam_populate_fuzzy_fan(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + +/* TO DO move to hwmgr */ + if ((hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity & (1 << 15)) + || 0 == hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity) + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity = + hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity; + + smu_data->power_tune_table.FuzzyFan_PwmSetDelta = PP_HOST_TO_SMC_US( + hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity); + return 0; +} + +static int vegam_populate_gnb_lpml(struct pp_hwmgr *hwmgr) +{ + int i; + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + /* Currently not used. Set all to zero. */ + for (i = 0; i < 16; i++) + smu_data->power_tune_table.GnbLPML[i] = 0; + + return 0; +} + +static int vegam_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd; + uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd; + struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table; + + hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256); + lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256); + + smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd = + CONVERT_FROM_HOST_TO_SMC_US(hi_sidd); + smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd = + CONVERT_FROM_HOST_TO_SMC_US(lo_sidd); + + return 0; +} + +static int vegam_populate_pm_fuses(struct pp_hwmgr *hwmgr) +{ + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + uint32_t pm_fuse_table_offset; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PowerContainment)) { + if (smu7_read_smc_sram_dword(hwmgr, + SMU7_FIRMWARE_HEADER_LOCATION + + offsetof(SMU75_Firmware_Header, PmFuseTable), + &pm_fuse_table_offset, SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to get pm_fuse_table_offset Failed!", + return -EINVAL); + + if (vegam_populate_svi_load_line(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate SviLoadLine Failed!", + return -EINVAL); + + if (vegam_populate_tdc_limit(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TDCLimit Failed!", return -EINVAL); + + if (vegam_populate_dw8(hwmgr, pm_fuse_table_offset)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate TdcWaterfallCtl, " + "LPMLTemperature Min and Max Failed!", + return -EINVAL); + + if (0 != vegam_populate_temperature_scaler(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate LPMLTemperatureScaler Failed!", + return -EINVAL); + + if (vegam_populate_fuzzy_fan(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate Fuzzy Fan Control parameters Failed!", + return -EINVAL); + + if (vegam_populate_gnb_lpml(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate GnbLPML Failed!", + return -EINVAL); + + if (vegam_populate_bapm_vddc_base_leakage_sidd(hwmgr)) + PP_ASSERT_WITH_CODE(false, + "Attempt to populate BapmVddCBaseLeakage Hi and Lo " + "Sidd Failed!", return -EINVAL); + + if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset, + (uint8_t *)&smu_data->power_tune_table, + (sizeof(struct SMU75_Discrete_PmFuses) - PMFUSES_AVFSSIZE), + SMC_RAM_END)) + PP_ASSERT_WITH_CODE(false, + "Attempt to download PmFuseTable Failed!", + return -EINVAL); + } + return 0; +} + +static int vegam_enable_reconfig_cus(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_EnableModeSwitchRLCNotification, + adev->gfx.cu_info.number); + + return 0; +} + +static int vegam_init_smc_table(struct pp_hwmgr *hwmgr) +{ + int result; + struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); + + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table); + uint8_t i; + struct pp_atomctrl_gpio_pin_assignment gpio_pin; + struct phm_ppt_v1_gpio_table *gpio_table = + (struct phm_ppt_v1_gpio_table *)table_info->gpio_table; + pp_atomctrl_clock_dividers_vi dividers; + + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + + vegam_initialize_power_tune_defaults(hwmgr); + + if (SMU7_VOLTAGE_CONTROL_NONE != hw_data->voltage_control) + vegam_populate_smc_voltage_tables(hwmgr, table); + + table->SystemFlags = 0; + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_StepVddc)) + table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; + + if (hw_data->is_memory_gddr5) + table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; + + if (hw_data->ulv_supported && table_info->us_ulv_voltage_offset) { + result = vegam_populate_ulv_state(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize ULV state!", return result); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, + ixCG_ULV_PARAMETER, SMU7_CGULVPARAMETER_DFLT); + } + + result = vegam_populate_smc_link_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Link Level!", return result); + + result = vegam_populate_all_graphic_levels(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Graphics Level!", return result); + + result = vegam_populate_all_memory_levels(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Memory Level!", return result); + + result = vegam_populate_smc_acpi_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize ACPI Level!", return result); + + result = vegam_populate_smc_vce_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize VCE Level!", return result); + + result = vegam_populate_smc_samu_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize SAMU Level!", return result); + + /* Since only the initial state is completely set up at this point + * (the other states are just copies of the boot state) we only + * need to populate the ARB settings for the initial state. + */ + result = vegam_program_memory_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to Write ARB settings for the initial state.", return result); + + result = vegam_populate_smc_uvd_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize UVD Level!", return result); + + result = vegam_populate_smc_boot_level(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Boot Level!", return result); + + result = vegam_populate_smc_initial_state(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to initialize Boot State!", return result); + + result = vegam_populate_bapm_parameters_in_dpm_table(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate BAPM Parameters!", return result); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_ClockStretcher)) { + result = vegam_populate_clock_stretcher_data_table(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate Clock Stretcher Data Table!", + return result); + } + + result = vegam_populate_avfs_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate AVFS Parameters!", return result;); + + table->CurrSclkPllRange = 0xff; + table->GraphicsVoltageChangeEnable = 1; + table->GraphicsThermThrottleEnable = 1; + table->GraphicsInterval = 1; + table->VoltageInterval = 1; + table->ThermalInterval = 1; + table->TemperatureLimitHigh = + table_info->cac_dtp_table->usTargetOperatingTemp * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->TemperatureLimitLow = + (table_info->cac_dtp_table->usTargetOperatingTemp - 1) * + SMU7_Q88_FORMAT_CONVERSION_UNIT; + table->MemoryVoltageChangeEnable = 1; + table->MemoryInterval = 1; + table->VoltageResponseTime = 0; + table->PhaseResponseTime = 0; + table->MemoryThermThrottleEnable = 1; + + PP_ASSERT_WITH_CODE(hw_data->dpm_table.pcie_speed_table.count >= 1, + "There must be 1 or more PCIE levels defined in PPTable.", + return -EINVAL); + table->PCIeBootLinkLevel = + hw_data->dpm_table.pcie_speed_table.count; + table->PCIeGenInterval = 1; + table->VRConfig = 0; + + result = vegam_populate_vr_config(hwmgr, table); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate VRConfig setting!", return result); + + table->ThermGpio = 17; + table->SclkStepSize = 0x4000; + + if (atomctrl_get_pp_assign_pin(hwmgr, + VDDC_VRHOT_GPIO_PINID, &gpio_pin)) { + table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift; + if (gpio_table) + table->VRHotLevel = + table_info->gpio_table->vrhot_triggered_sclk_dpm_index; + } else { + table->VRHotGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot); + } + + if (atomctrl_get_pp_assign_pin(hwmgr, + PP_AC_DC_SWITCH_GPIO_PINID, &gpio_pin)) { + table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift; + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition) && + !smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UseNewGPIOScheme)) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SMCtoPPLIBAcdcGpioScheme); + } else { + table->AcDcGpio = SMU7_UNUSED_GPIO_PIN; + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_AutomaticDCTransition); + } + + /* Thermal Output GPIO */ + if (atomctrl_get_pp_assign_pin(hwmgr, + THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin)) { + table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift; + + /* For porlarity read GPIOPAD_A with assigned Gpio pin + * since VBIOS will program this register to set 'inactive state', + * driver can then determine 'active state' from this and + * program SMU with correct polarity + */ + table->ThermOutPolarity = + (0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) & + (1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0; + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY; + + /* if required, combine VRHot/PCC with thermal out GPIO */ + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_RegulatorHot) && + phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_CombinePCCWithThermalSignal)) + table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT; + } else { + table->ThermOutGpio = 17; + table->ThermOutPolarity = 1; + table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE; + } + + /* Populate BIF_SCLK levels into SMC DPM table */ + for (i = 0; i <= hw_data->dpm_table.pcie_speed_table.count; i++) { + result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, + smu_data->bif_sclk_table[i], ÷rs); + PP_ASSERT_WITH_CODE(!result, + "Can not find DFS divide id for Sclk", + return result); + + if (i == 0) + table->Ulv.BifSclkDfs = + PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider)); + else + table->LinkLevel[i - 1].BifSclkDfs = + PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider)); + } + + for (i = 0; i < SMU75_MAX_ENTRIES_SMIO; i++) + table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]); + + CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags); + CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1); + CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2); + CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize); + CONVERT_FROM_HOST_TO_SMC_UL(table->CurrSclkPllRange); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh); + CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow); + CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime); + CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime); + + /* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */ + result = smu7_copy_bytes_to_smc(hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, SystemFlags), + (uint8_t *)&(table->SystemFlags), + sizeof(SMU75_Discrete_DpmTable) - 3 * sizeof(SMU75_PIDController), + SMC_RAM_END); + PP_ASSERT_WITH_CODE(!result, + "Failed to upload dpm data to SMC memory!", return result); + + result = vegam_populate_pm_fuses(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to populate PM fuses to SMC memory!", return result); + + result = vegam_enable_reconfig_cus(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to enable reconfigurable CUs!", return result); + + return 0; +} + +static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member) +{ + switch (type) { + case SMU_SoftRegisters: + switch (member) { + case HandshakeDisables: + return offsetof(SMU75_SoftRegisters, HandshakeDisables); + case VoltageChangeTimeout: + return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout); + case AverageGraphicsActivity: + return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity); + case PreVBlankGap: + return offsetof(SMU75_SoftRegisters, PreVBlankGap); + case VBlankTimeout: + return offsetof(SMU75_SoftRegisters, VBlankTimeout); + case UcodeLoadStatus: + return offsetof(SMU75_SoftRegisters, UcodeLoadStatus); + case DRAM_LOG_ADDR_H: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_H); + case DRAM_LOG_ADDR_L: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_L); + case DRAM_LOG_PHY_ADDR_H: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_H); + case DRAM_LOG_PHY_ADDR_L: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_L); + case DRAM_LOG_BUFF_SIZE: + return offsetof(SMU75_SoftRegisters, DRAM_LOG_BUFF_SIZE); + } + case SMU_Discrete_DpmTable: + switch (member) { + case UvdBootLevel: + return offsetof(SMU75_Discrete_DpmTable, UvdBootLevel); + case VceBootLevel: + return offsetof(SMU75_Discrete_DpmTable, VceBootLevel); + case SamuBootLevel: + return offsetof(SMU75_Discrete_DpmTable, SamuBootLevel); + case LowSclkInterruptThreshold: + return offsetof(SMU75_Discrete_DpmTable, LowSclkInterruptThreshold); + } + } + pr_warn("can't get the offset of type %x member %x\n", type, member); + return 0; +} + +static int vegam_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + + if (data->need_update_smu7_dpm_table & + (DPMTABLE_OD_UPDATE_SCLK + + DPMTABLE_UPDATE_SCLK + + DPMTABLE_UPDATE_MCLK)) + return vegam_program_memory_timing_parameters(hwmgr); + + return 0; +} + +static int vegam_update_sclk_threshold(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct vegam_smumgr *smu_data = + (struct vegam_smumgr *)(hwmgr->smu_backend); + int result = 0; + uint32_t low_sclk_interrupt_threshold = 0; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_SclkThrottleLowNotification) + && (data->low_sclk_interrupt_threshold != 0)) { + low_sclk_interrupt_threshold = + data->low_sclk_interrupt_threshold; + + CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold); + + result = smu7_copy_bytes_to_smc( + hwmgr, + smu_data->smu7_data.dpm_table_start + + offsetof(SMU75_Discrete_DpmTable, + LowSclkInterruptThreshold), + (uint8_t *)&low_sclk_interrupt_threshold, + sizeof(uint32_t), + SMC_RAM_END); + } + PP_ASSERT_WITH_CODE((result == 0), + "Failed to update SCLK threshold!", return result); + + result = vegam_program_mem_timing_parameters(hwmgr); + PP_ASSERT_WITH_CODE((result == 0), + "Failed to program memory timing parameters!", + ); + + return result; +} + +int vegam_thermal_avfs_enable(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + int ret; + + if (!hwmgr->avfs_supported) + return 0; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs); + if (!ret) { + if (data->apply_avfs_cks_off_voltage) + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage); + } + + return ret; +} + +static int vegam_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) +{ + PP_ASSERT_WITH_CODE(hwmgr->thermal_controller.fanInfo.bNoFan, + "VBIOS fan info is not correct!", + ); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_MicrocodeFanControl); + return 0; +} + +const struct pp_smumgr_func vegam_smu_funcs = { + .smu_init = vegam_smu_init, + .smu_fini = smu7_smu_fini, + .start_smu = vegam_start_smu, + .check_fw_load_finish = smu7_check_fw_load_finish, + .request_smu_load_fw = smu7_reload_firmware, + .request_smu_load_specific_fw = NULL, + .send_msg_to_smc = smu7_send_msg_to_smc, + .send_msg_to_smc_with_parameter = smu7_send_msg_to_smc_with_parameter, + .process_firmware_header = vegam_process_firmware_header, + .is_dpm_running = vegam_is_dpm_running, + .get_mac_definition = vegam_get_mac_definition, + .update_smc_table = vegam_update_smc_table, + .init_smc_table = vegam_init_smc_table, + .get_offsetof = vegam_get_offsetof, + .populate_all_graphic_levels = vegam_populate_all_graphic_levels, + .populate_all_memory_levels = vegam_populate_all_memory_levels, + .update_sclk_threshold = vegam_update_sclk_threshold, + .is_hw_avfs_present = vegam_is_hw_avfs_present, + .thermal_avfs_enable = vegam_thermal_avfs_enable, + .thermal_setup_fan_table = vegam_thermal_setup_fan_table, +}; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h new file mode 100644 index 000000000000..2b6558238500 --- /dev/null +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h @@ -0,0 +1,75 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _VEGAM_SMUMANAGER_H +#define _VEGAM_SMUMANAGER_H + + +#include <pp_endian.h> +#include "smu75_discrete.h" +#include "smu7_smumgr.h" + +#define SMC_RAM_END 0x40000 + +#define DPMTuning_Uphyst_Shift 0 +#define DPMTuning_Downhyst_Shift 8 +#define DPMTuning_Activity_Shift 16 + +#define GraphicsDPMTuning_VEGAM 0x001e6400 +#define MemoryDPMTuning_VEGAM 0x000f3c0a +#define SclkDPMTuning_VEGAM 0x002d000a +#define MclkDPMTuning_VEGAM 0x001f100a + + +struct vegam_pt_defaults { + uint8_t SviLoadLineEn; + uint8_t SviLoadLineVddC; + uint8_t TDC_VDDC_ThrottleReleaseLimitPerc; + uint8_t TDC_MAWt; + uint8_t TdcWaterfallCtl; + uint8_t DTEAmbientTempBase; + + uint32_t DisplayCac; + uint32_t BAPM_TEMP_GRADIENT; + uint16_t BAPMTI_R[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS]; + uint16_t BAPMTI_RC[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS]; +}; + +struct vegam_range_table { + uint32_t trans_lower_frequency; /* in 10khz */ + uint32_t trans_upper_frequency; +}; + +struct vegam_smumgr { + struct smu7_smumgr smu7_data; + uint8_t protected_mode; + SMU75_Discrete_DpmTable smc_state_table; + struct SMU75_Discrete_Ulv ulv_setting; + struct SMU75_Discrete_PmFuses power_tune_table; + struct vegam_range_table range_table[NUM_SCLK_RANGE]; + const struct vegam_pt_defaults *power_tune_defaults; + uint32_t bif_sclk_table[SMU75_MAX_LEVELS_LINK]; +}; + + +#endif diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 5cc4a51e1e92..bf6cad6c9178 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -84,6 +84,7 @@ config DRM_SIL_SII8620 tristate "Silicon Image SII8620 HDMI/MHL bridge" depends on OF select DRM_KMS_HELPER + imply EXTCON select INPUT select RC_CORE help diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c index 498d5948d1a8..9837c8d69e69 100644 --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c @@ -56,7 +56,9 @@ static int dumb_vga_get_modes(struct drm_connector *connector) } drm_mode_connector_update_edid_property(connector, edid); - return drm_add_edid_modes(connector, edid); + ret = drm_add_edid_modes(connector, edid); + kfree(edid); + return ret; fallback: /* diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 115719059434..178842380f75 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -156,6 +156,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->connectors[i].state); state->connectors[i].ptr = NULL; state->connectors[i].state = NULL; + state->connectors[i].old_state = NULL; + state->connectors[i].new_state = NULL; drm_connector_put(connector); } @@ -170,6 +172,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->crtcs[i].ptr = NULL; state->crtcs[i].state = NULL; + state->crtcs[i].old_state = NULL; + state->crtcs[i].new_state = NULL; } for (i = 0; i < config->num_total_plane; i++) { @@ -182,6 +186,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->planes[i].state); state->planes[i].ptr = NULL; state->planes[i].state = NULL; + state->planes[i].old_state = NULL; + state->planes[i].new_state = NULL; } for (i = 0; i < state->num_private_objs; i++) { @@ -191,6 +197,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->private_objs[i].state); state->private_objs[i].ptr = NULL; state->private_objs[i].state = NULL; + state->private_objs[i].old_state = NULL; + state->private_objs[i].new_state = NULL; } state->num_private_objs = 0; @@ -1940,11 +1948,15 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - if (config->funcs->atomic_check) + if (config->funcs->atomic_check) { ret = config->funcs->atomic_check(state->dev, state); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_ATOMIC("atomic driver check for %p failed: %d\n", + state, ret); + return ret; + } + } if (!state->allow_modeset) { for_each_new_crtc_in_state(state, crtc, crtc_state, i) { diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index e394799979a6..6d9b9453707c 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -212,6 +212,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor) return -ENOMEM; filp->private_data = priv; + filp->f_mode |= FMODE_UNSIGNED_OFFSET; priv->filp = filp; priv->pid = get_pid(task_pid(current)); priv->minor = minor; diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig index e5bfeca361bd..041a77e400d4 100644 --- a/drivers/gpu/drm/etnaviv/Kconfig +++ b/drivers/gpu/drm/etnaviv/Kconfig @@ -22,11 +22,3 @@ config DRM_ETNAVIV_THERMAL help Compile in support for thermal throttling. Say Y unless you want to risk burning your SoC. - -config DRM_ETNAVIV_REGISTER_LOGGING - bool "enable ETNAVIV register logging" - depends on DRM_ETNAVIV - help - Compile in support for logging register reads/writes in a format - that can be parsed by envytools demsm tool. If enabled, register - logging can be switched on via etnaviv.reglog=y module param. diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index bfc6d4aa3b7c..7fea74861a87 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -1,18 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2014 Etnaviv Project - * Author: Christian Gmeiner <christian.gmeiner@gmail.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2014-2018 Etnaviv Project */ #include "etnaviv_cmdbuf.h" diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c index 68e6d3772ad8..b106e8b288ad 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #include <linux/kernel.h> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c index 3746827f45eb..a3c44f145c1d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2017 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2017-2018 Etnaviv Project */ #include <drm/drm_mm.h> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h index ddc3f7ea169c..acb68c698363 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2017 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __ETNAVIV_CMDBUF_H__ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index ab50090d066c..e5013a999147 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #include <linux/component.h> @@ -25,57 +14,6 @@ #include "etnaviv_mmu.h" #include "etnaviv_perfmon.h" -#ifdef CONFIG_DRM_ETNAVIV_REGISTER_LOGGING -static bool reglog; -MODULE_PARM_DESC(reglog, "Enable register read/write logging"); -module_param(reglog, bool, 0600); -#else -#define reglog 0 -#endif - -void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name, - const char *dbgname) -{ - struct resource *res; - void __iomem *ptr; - - if (name) - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name); - else - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - - ptr = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(ptr)) { - dev_err(&pdev->dev, "failed to ioremap %s: %ld\n", name, - PTR_ERR(ptr)); - return ptr; - } - - if (reglog) - dev_printk(KERN_DEBUG, &pdev->dev, "IO:region %s 0x%p %08zx\n", - dbgname, ptr, (size_t)resource_size(res)); - - return ptr; -} - -void etnaviv_writel(u32 data, void __iomem *addr) -{ - if (reglog) - printk(KERN_DEBUG "IO:W %p %08x\n", addr, data); - - writel(data, addr); -} - -u32 etnaviv_readl(const void __iomem *addr) -{ - u32 val = readl(addr); - - if (reglog) - printk(KERN_DEBUG "IO:R %p %08x\n", addr, val); - - return val; -} - /* * DRM operations: */ @@ -116,7 +54,7 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file) drm_sched_entity_init(&gpu->sched, &ctx->sched_entity[i], &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], - 32, NULL); + NULL); } } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h index ddb17ee565e9..d36c7bbe66db 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #ifndef __ETNAVIV_DRV_H__ @@ -26,6 +15,7 @@ #include <linux/pm_runtime.h> #include <linux/slab.h> #include <linux/list.h> +#include <linux/time64.h> #include <linux/types.h> #include <linux/sizes.h> @@ -101,11 +91,6 @@ void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv, struct seq_file *m); #endif -void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name, - const char *dbgname); -void etnaviv_writel(u32 data, void __iomem *addr); -u32 etnaviv_readl(const void __iomem *addr); - #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__) #define VERB(fmt, ...) if (0) DRM_DEBUG(fmt"\n", ##__VA_ARGS__) @@ -132,19 +117,27 @@ static inline bool fence_after_eq(u32 a, u32 b) return (s32)(a - b) >= 0; } +/* + * Etnaviv timeouts are specified wrt CLOCK_MONOTONIC, not jiffies. + * We need to calculate the timeout in terms of number of jiffies + * between the specified timeout and the current CLOCK_MONOTONIC time. + */ static inline unsigned long etnaviv_timeout_to_jiffies( const struct timespec *timeout) { - unsigned long timeout_jiffies = timespec_to_jiffies(timeout); - unsigned long start_jiffies = jiffies; - unsigned long remaining_jiffies; + struct timespec64 ts, to; + + to = timespec_to_timespec64(*timeout); + + ktime_get_ts64(&ts); + + /* timeouts before "now" have already expired */ + if (timespec64_compare(&to, &ts) <= 0) + return 0; - if (time_after(start_jiffies, timeout_jiffies)) - remaining_jiffies = 0; - else - remaining_jiffies = timeout_jiffies - start_jiffies; + ts = timespec64_sub(to, ts); - return remaining_jiffies; + return timespec64_to_jiffies(&ts); } #endif /* __ETNAVIV_DRV_H__ */ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 48aef6cf6a42..9146e30e24a6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #include <linux/devcoredump.h> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.h b/drivers/gpu/drm/etnaviv/etnaviv_dump.h index 97f2f8db9133..2d916c2667ee 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.h @@ -1,20 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. - * - * Etnaviv devcoredump file definitions */ + #ifndef ETNAVIV_DUMP_H #define ETNAVIV_DUMP_H diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index fcc969fa0e69..209ef1274b80 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #include <linux/spinlock.h> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index 93e696fcc14f..76079c2291f8 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #ifndef __ETNAVIV_GEM_H__ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index 5704305d41e6..0566171f8df2 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -1,18 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2013 Red Hat - * Author: Rob Clark <robdclark@gmail.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2014-2018 Etnaviv Project */ #include <linux/dma-buf.h> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 8a88799bf79b..686f6552db48 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #include <linux/component.h> @@ -1735,6 +1724,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct etnaviv_gpu *gpu; + struct resource *res; int err; gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL); @@ -1746,7 +1736,8 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev) mutex_init(&gpu->fence_idr_lock); /* Map registers: */ - gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev)); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + gpu->mmio = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(gpu->mmio)) return PTR_ERR(gpu->mmio); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 3c3005501846..dd430f0f8ff5 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #ifndef __ETNAVIV_GPU_H__ @@ -161,12 +150,12 @@ struct etnaviv_gpu { static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data) { - etnaviv_writel(data, gpu->mmio + reg); + writel(data, gpu->mmio + reg); } static inline u32 gpu_read(struct etnaviv_gpu *gpu, u32 reg) { - return etnaviv_readl(gpu->mmio + reg); + return readl(gpu->mmio + reg); } static inline bool fence_completed(struct etnaviv_gpu *gpu, u32 fence) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c index ea08bb38caaf..39b463db76c9 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include "etnaviv_gpu.h" diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c index 4b9b11ca6f03..b163bdbcb880 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2014-2018 Etnaviv Project */ #include <linux/platform_device.h> @@ -47,11 +36,10 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain) u32 *p; int i; - etnaviv_domain->base.bad_page_cpu = dma_alloc_coherent( - etnaviv_domain->base.dev, - SZ_4K, - &etnaviv_domain->base.bad_page_dma, - GFP_KERNEL); + etnaviv_domain->base.bad_page_cpu = + dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K, + &etnaviv_domain->base.bad_page_dma, + GFP_KERNEL); if (!etnaviv_domain->base.bad_page_cpu) return -ENOMEM; @@ -59,14 +47,14 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain) for (i = 0; i < SZ_4K / 4; i++) *p++ = 0xdead55aa; - etnaviv_domain->pgtable_cpu = - dma_alloc_coherent(etnaviv_domain->base.dev, PT_SIZE, - &etnaviv_domain->pgtable_dma, - GFP_KERNEL); + etnaviv_domain->pgtable_cpu = dma_alloc_wc(etnaviv_domain->base.dev, + PT_SIZE, + &etnaviv_domain->pgtable_dma, + GFP_KERNEL); if (!etnaviv_domain->pgtable_cpu) { - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->base.bad_page_cpu, - etnaviv_domain->base.bad_page_dma); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->base.bad_page_cpu, + etnaviv_domain->base.bad_page_dma); return -ENOMEM; } @@ -81,13 +69,12 @@ static void etnaviv_iommuv1_domain_free(struct etnaviv_iommu_domain *domain) struct etnaviv_iommuv1_domain *etnaviv_domain = to_etnaviv_domain(domain); - dma_free_coherent(etnaviv_domain->base.dev, PT_SIZE, - etnaviv_domain->pgtable_cpu, - etnaviv_domain->pgtable_dma); + dma_free_wc(etnaviv_domain->base.dev, PT_SIZE, + etnaviv_domain->pgtable_cpu, etnaviv_domain->pgtable_dma); - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->base.bad_page_cpu, - etnaviv_domain->base.bad_page_dma); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->base.bad_page_cpu, + etnaviv_domain->base.bad_page_dma); kfree(etnaviv_domain); } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h index 01d59bf70d78..b279404ce91a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2014-2018 Etnaviv Project */ #ifndef __ETNAVIV_IOMMU_H__ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index 9752dbd5d28b..71fbc1f96cb6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2016 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2016-2018 Etnaviv Project */ #include <linux/platform_device.h> @@ -47,8 +36,8 @@ struct etnaviv_iommuv2_domain { u32 *mtlb_cpu; dma_addr_t mtlb_dma; /* S(lave) TLB aka second level pagetable */ - u32 *stlb_cpu[1024]; - dma_addr_t stlb_dma[1024]; + u32 *stlb_cpu[MMUv2_MAX_STLB_ENTRIES]; + dma_addr_t stlb_dma[MMUv2_MAX_STLB_ENTRIES]; }; static struct etnaviv_iommuv2_domain * @@ -57,24 +46,54 @@ to_etnaviv_domain(struct etnaviv_iommu_domain *domain) return container_of(domain, struct etnaviv_iommuv2_domain, base); } +static int +etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_domain *etnaviv_domain, + int stlb) +{ + if (etnaviv_domain->stlb_cpu[stlb]) + return 0; + + etnaviv_domain->stlb_cpu[stlb] = + dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K, + &etnaviv_domain->stlb_dma[stlb], + GFP_KERNEL); + + if (!etnaviv_domain->stlb_cpu[stlb]) + return -ENOMEM; + + memset32(etnaviv_domain->stlb_cpu[stlb], MMUv2_PTE_EXCEPTION, + SZ_4K / sizeof(u32)); + + etnaviv_domain->mtlb_cpu[stlb] = etnaviv_domain->stlb_dma[stlb] | + MMUv2_PTE_PRESENT; + return 0; +} + static int etnaviv_iommuv2_map(struct etnaviv_iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { struct etnaviv_iommuv2_domain *etnaviv_domain = to_etnaviv_domain(domain); - int mtlb_entry, stlb_entry; - u32 entry = (u32)paddr | MMUv2_PTE_PRESENT; + int mtlb_entry, stlb_entry, ret; + u32 entry = lower_32_bits(paddr) | MMUv2_PTE_PRESENT; if (size != SZ_4K) return -EINVAL; + if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) + entry |= (upper_32_bits(paddr) & 0xff) << 4; + if (prot & ETNAVIV_PROT_WRITE) entry |= MMUv2_PTE_WRITEABLE; mtlb_entry = (iova & MMUv2_MTLB_MASK) >> MMUv2_MTLB_SHIFT; stlb_entry = (iova & MMUv2_STLB_MASK) >> MMUv2_STLB_SHIFT; + ret = etnaviv_iommuv2_ensure_stlb(etnaviv_domain, mtlb_entry); + if (ret) + return ret; + etnaviv_domain->stlb_cpu[mtlb_entry][stlb_entry] = entry; return 0; @@ -101,14 +120,13 @@ static size_t etnaviv_iommuv2_unmap(struct etnaviv_iommu_domain *domain, static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain) { u32 *p; - int ret, i, j; + int ret, i; /* allocate scratch page */ - etnaviv_domain->base.bad_page_cpu = dma_alloc_coherent( - etnaviv_domain->base.dev, - SZ_4K, - &etnaviv_domain->base.bad_page_dma, - GFP_KERNEL); + etnaviv_domain->base.bad_page_cpu = + dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K, + &etnaviv_domain->base.bad_page_dma, + GFP_KERNEL); if (!etnaviv_domain->base.bad_page_cpu) { ret = -ENOMEM; goto fail_mem; @@ -117,67 +135,40 @@ static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain) for (i = 0; i < SZ_4K / 4; i++) *p++ = 0xdead55aa; - etnaviv_domain->pta_cpu = dma_alloc_coherent(etnaviv_domain->base.dev, - SZ_4K, - &etnaviv_domain->pta_dma, - GFP_KERNEL); + etnaviv_domain->pta_cpu = dma_alloc_wc(etnaviv_domain->base.dev, + SZ_4K, &etnaviv_domain->pta_dma, + GFP_KERNEL); if (!etnaviv_domain->pta_cpu) { ret = -ENOMEM; goto fail_mem; } - etnaviv_domain->mtlb_cpu = dma_alloc_coherent(etnaviv_domain->base.dev, - SZ_4K, - &etnaviv_domain->mtlb_dma, - GFP_KERNEL); + etnaviv_domain->mtlb_cpu = dma_alloc_wc(etnaviv_domain->base.dev, + SZ_4K, &etnaviv_domain->mtlb_dma, + GFP_KERNEL); if (!etnaviv_domain->mtlb_cpu) { ret = -ENOMEM; goto fail_mem; } - /* pre-populate STLB pages (may want to switch to on-demand later) */ - for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) { - etnaviv_domain->stlb_cpu[i] = - dma_alloc_coherent(etnaviv_domain->base.dev, - SZ_4K, - &etnaviv_domain->stlb_dma[i], - GFP_KERNEL); - if (!etnaviv_domain->stlb_cpu[i]) { - ret = -ENOMEM; - goto fail_mem; - } - p = etnaviv_domain->stlb_cpu[i]; - for (j = 0; j < SZ_4K / 4; j++) - *p++ = MMUv2_PTE_EXCEPTION; - - etnaviv_domain->mtlb_cpu[i] = etnaviv_domain->stlb_dma[i] | - MMUv2_PTE_PRESENT; - } + memset32(etnaviv_domain->mtlb_cpu, MMUv2_PTE_EXCEPTION, + MMUv2_MAX_STLB_ENTRIES); return 0; fail_mem: if (etnaviv_domain->base.bad_page_cpu) - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->base.bad_page_cpu, - etnaviv_domain->base.bad_page_dma); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->base.bad_page_cpu, + etnaviv_domain->base.bad_page_dma); if (etnaviv_domain->pta_cpu) - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->pta_cpu, - etnaviv_domain->pta_dma); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma); if (etnaviv_domain->mtlb_cpu) - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->mtlb_cpu, - etnaviv_domain->mtlb_dma); - - for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) { - if (etnaviv_domain->stlb_cpu[i]) - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->stlb_cpu[i], - etnaviv_domain->stlb_dma[i]); - } + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma); return ret; } @@ -188,23 +179,21 @@ static void etnaviv_iommuv2_domain_free(struct etnaviv_iommu_domain *domain) to_etnaviv_domain(domain); int i; - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->base.bad_page_cpu, - etnaviv_domain->base.bad_page_dma); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->base.bad_page_cpu, + etnaviv_domain->base.bad_page_dma); - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->pta_cpu, - etnaviv_domain->pta_dma); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma); - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->mtlb_cpu, - etnaviv_domain->mtlb_dma); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma); for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) { if (etnaviv_domain->stlb_cpu[i]) - dma_free_coherent(etnaviv_domain->base.dev, SZ_4K, - etnaviv_domain->stlb_cpu[i], - etnaviv_domain->stlb_dma[i]); + dma_free_wc(etnaviv_domain->base.dev, SZ_4K, + etnaviv_domain->stlb_cpu[i], + etnaviv_domain->stlb_dma[i]); } vfree(etnaviv_domain); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 49e049713a52..8069f9f36a2e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #include "common.xml.h" @@ -162,22 +151,10 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu, bool found; ret = drm_mm_insert_node_in_range(&mmu->mm, node, - size, 0, 0, - mmu->last_iova, U64_MAX, - mode); + size, 0, 0, 0, U64_MAX, mode); if (ret != -ENOSPC) break; - /* - * If we did not search from the start of the MMU region, - * try again in case there are free slots. - */ - if (mmu->last_iova) { - mmu->last_iova = 0; - mmu->need_flush = true; - continue; - } - /* Try to retire some entries */ drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, mode); @@ -274,7 +251,6 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, if (ret < 0) goto unlock; - mmu->last_iova = node->start + etnaviv_obj->base.size; mapping->iova = node->start; ret = etnaviv_iommu_map(mmu, node->start, sgt, etnaviv_obj->base.size, ETNAVIV_PROT_READ | ETNAVIV_PROT_WRITE); @@ -381,7 +357,6 @@ int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr, mutex_unlock(&mmu->lock); return ret; } - mmu->last_iova = vram_node->start + size; gpu->mmu->need_flush = true; mutex_unlock(&mmu->lock); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h index ab603f5166b1..a0db17ffb686 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2015 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2015-2018 Etnaviv Project */ #ifndef __ETNAVIV_MMU_H__ @@ -59,7 +48,6 @@ struct etnaviv_iommu { struct mutex lock; struct list_head mappings; struct drm_mm mm; - u32 last_iova; bool need_flush; }; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 26dddfc41aac..9980d81a26e3 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 Etnaviv Project * Copyright (C) 2017 Zodiac Inflight Innovations - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include "etnaviv_gpu.h" diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h index c1653c64ab6b..4a9d508f6e10 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2017 Etnaviv Project * Copyright (C) 2017 Zodiac Inflight Innovations - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __ETNAVIV_PERFMON_H__ diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 6cf0775dbcd7..a74eb57af15b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2017 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/kthread.h> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.h b/drivers/gpu/drm/etnaviv/etnaviv_sched.h index 097635fa78ae..c0a6796e22c9 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2017 Etnaviv Project - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __ETNAVIV_SCHED_H__ diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 735ce47688f9..208bc27be3cc 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -1,6 +1,6 @@ config DRM_EXYNOS tristate "DRM Support for Samsung SoC EXYNOS Series" - depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM) + depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM) select DRM_KMS_HELPER select VIDEOMODE_HELPERS select SND_SOC_HDMI_CODEC if SND_SOC @@ -95,21 +95,31 @@ config DRM_EXYNOS_G2D help Choose this option if you want to use Exynos G2D for DRM. +config DRM_EXYNOS_IPP + bool + config DRM_EXYNOS_FIMC bool "FIMC" - depends on BROKEN && MFD_SYSCON + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos FIMC for DRM. config DRM_EXYNOS_ROTATOR bool "Rotator" - depends on BROKEN + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos Rotator for DRM. +config DRM_EXYNOS_SCALER + bool "Scaler" + select DRM_EXYNOS_IPP + help + Choose this option if you want to use Exynos Scaler for DRM. + config DRM_EXYNOS_GSC bool "GScaler" - depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n + depends on VIDEO_SAMSUNG_EXYNOS_GSC=n + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index a51c5459bb13..3b323f1e0475 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -18,8 +18,10 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o +exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o +exynosdrm-$(CONFIG_DRM_EXYNOS_SCALER) += exynos_drm_scaler.o exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o exynosdrm-$(CONFIG_DRM_EXYNOS_MIC) += exynos_drm_mic.o diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 1c330f2a7a5d..82c95c34447f 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -31,7 +31,10 @@ #define DSD_CFG_MUX 0x1004 #define DSD_CFG_MUX_TE_UNMASK_GLOBAL BIT(13) -#define WINDOWS_NR 3 +#define WINDOWS_NR 5 +#define PRIMARY_WIN 2 +#define CURSON_WIN 4 + #define MIN_FB_WIDTH_FOR_16WORD_BURST 128 #define I80_HW_TRG (1 << 0) @@ -43,6 +46,9 @@ static const char * const decon_clks_name[] = { "aclk_smmu_decon0x", "aclk_xiu_decon0x", "pclk_smmu_decon0x", + "aclk_smmu_decon1x", + "aclk_xiu_decon1x", + "pclk_smmu_decon1x", "sclk_decon_vclk", "sclk_decon_eclk", }; @@ -74,9 +80,8 @@ static const uint32_t decon_formats[] = { }; static const enum drm_plane_type decon_win_types[WINDOWS_NR] = { - DRM_PLANE_TYPE_PRIMARY, - DRM_PLANE_TYPE_OVERLAY, - DRM_PLANE_TYPE_CURSOR, + [PRIMARY_WIN] = DRM_PLANE_TYPE_PRIMARY, + [CURSON_WIN] = DRM_PLANE_TYPE_CURSOR, }; static inline void decon_set_bits(struct decon_context *ctx, u32 reg, u32 mask, @@ -552,12 +557,10 @@ static int decon_bind(struct device *dev, struct device *master, void *data) drm_dev->max_vblank_count = 0xffffffff; for (win = ctx->first_win; win < WINDOWS_NR; win++) { - int tmp = (win == ctx->first_win) ? 0 : win; - ctx->configs[win].pixel_formats = decon_formats; ctx->configs[win].num_pixel_formats = ARRAY_SIZE(decon_formats); - ctx->configs[win].zpos = win; - ctx->configs[win].type = decon_win_types[tmp]; + ctx->configs[win].zpos = win - ctx->first_win; + ctx->configs[win].type = decon_win_types[win]; ret = exynos_plane_init(drm_dev, &ctx->planes[win], win, &ctx->configs[win]); @@ -565,7 +568,7 @@ static int decon_bind(struct device *dev, struct device *master, void *data) return ret; } - exynos_plane = &ctx->planes[ctx->first_win]; + exynos_plane = &ctx->planes[PRIMARY_WIN]; out_type = (ctx->out_type & IFTYPE_HDMI) ? EXYNOS_DISPLAY_TYPE_HDMI : EXYNOS_DISPLAY_TYPE_LCD; ctx->crtc = exynos_drm_crtc_create(drm_dev, &exynos_plane->base, diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index dc01342e759a..eea90251808f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -228,7 +228,7 @@ struct exynos_drm_crtc *exynos_drm_crtc_get_by_type(struct drm_device *drm_dev, if (to_exynos_crtc(crtc)->type == out_type) return to_exynos_crtc(crtc); - return ERR_PTR(-EPERM); + return ERR_PTR(-ENODEV); } int exynos_drm_set_possible_crtcs(struct drm_encoder *encoder, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 39284bb7c2c2..a81b4a5e24a7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -27,15 +27,23 @@ #include "exynos_drm_fb.h" #include "exynos_drm_gem.h" #include "exynos_drm_plane.h" +#include "exynos_drm_ipp.h" #include "exynos_drm_vidi.h" #include "exynos_drm_g2d.h" #include "exynos_drm_iommu.h" #define DRIVER_NAME "exynos" #define DRIVER_DESC "Samsung SoC DRM" -#define DRIVER_DATE "20110530" +#define DRIVER_DATE "20180330" + +/* + * Interface history: + * + * 1.0 - Original version + * 1.1 - Upgrade IPP driver to version 2.0 + */ #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 0 +#define DRIVER_MINOR 1 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) { @@ -88,6 +96,16 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_AUTH | DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_RESOURCES, + exynos_drm_ipp_get_res_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_CAPS, exynos_drm_ipp_get_caps_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_LIMITS, + exynos_drm_ipp_get_limits_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_COMMIT, exynos_drm_ipp_commit_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), }; static const struct file_operations exynos_drm_driver_fops = { @@ -184,6 +202,7 @@ struct exynos_drm_driver_info { #define DRM_COMPONENT_DRIVER BIT(0) /* supports component framework */ #define DRM_VIRTUAL_DEVICE BIT(1) /* create virtual platform device */ #define DRM_DMA_DEVICE BIT(2) /* can be used for dma allocations */ +#define DRM_FIMC_DEVICE BIT(3) /* devices shared with V4L2 subsystem */ #define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL) @@ -223,10 +242,16 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = { DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D), }, { DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC), + DRM_COMPONENT_DRIVER | DRM_FIMC_DEVICE, }, { DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR), + DRM_COMPONENT_DRIVER + }, { + DRV_PTR(scaler_driver, CONFIG_DRM_EXYNOS_SCALER), + DRM_COMPONENT_DRIVER }, { DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC), + DRM_COMPONENT_DRIVER }, { &exynos_drm_platform_driver, DRM_VIRTUAL_DEVICE @@ -254,7 +279,11 @@ static struct component_match *exynos_drm_match_add(struct device *dev) &info->driver->driver, (void *)platform_bus_type.match))) { put_device(p); - component_match_add(dev, &match, compare_dev, d); + + if (!(info->flags & DRM_FIMC_DEVICE) || + exynos_drm_check_fimc_device(d) == 0) + component_match_add(dev, &match, + compare_dev, d); p = d; } put_device(p); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index 075957cb6ba1..0f6d079a55c9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -273,6 +273,15 @@ static inline int exynos_dpi_bind(struct drm_device *dev, } #endif +#ifdef CONFIG_DRM_EXYNOS_FIMC +int exynos_drm_check_fimc_device(struct device *dev); +#else +static inline int exynos_drm_check_fimc_device(struct device *dev) +{ + return 0; +} +#endif + int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); @@ -288,6 +297,7 @@ extern struct platform_driver vidi_driver; extern struct platform_driver g2d_driver; extern struct platform_driver fimc_driver; extern struct platform_driver rotator_driver; +extern struct platform_driver scaler_driver; extern struct platform_driver gsc_driver; extern struct platform_driver mic_driver; #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 7904ffa9abfb..7c3030b7e586 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -270,7 +270,6 @@ struct exynos_dsi { u32 lanes; u32 mode_flags; u32 format; - struct videomode vm; int state; struct drm_property *brightness; @@ -881,30 +880,30 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi) static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi) { - struct videomode *vm = &dsi->vm; + struct drm_display_mode *m = &dsi->encoder.crtc->state->adjusted_mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { reg = DSIM_CMD_ALLOW(0xf) - | DSIM_STABLE_VFP(vm->vfront_porch) - | DSIM_MAIN_VBP(vm->vback_porch); + | DSIM_STABLE_VFP(m->vsync_start - m->vdisplay) + | DSIM_MAIN_VBP(m->vtotal - m->vsync_end); exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg); - reg = DSIM_MAIN_HFP(vm->hfront_porch) - | DSIM_MAIN_HBP(vm->hback_porch); + reg = DSIM_MAIN_HFP(m->hsync_start - m->hdisplay) + | DSIM_MAIN_HBP(m->htotal - m->hsync_end); exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(vm->vsync_len) - | DSIM_MAIN_HSA(vm->hsync_len); + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + | DSIM_MAIN_HSA(m->hsync_end - m->hsync_start); exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg); } - reg = DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) | - DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol); + reg = DSIM_MAIN_HRESOL(m->hdisplay, num_bits_resol) | + DSIM_MAIN_VRESOL(m->vdisplay, num_bits_resol); exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg); - dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive); + dev_dbg(dsi->dev, "LCD size = %dx%d\n", m->hdisplay, m->vdisplay); } static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable) @@ -1265,15 +1264,15 @@ static irqreturn_t exynos_dsi_irq(int irq, void *dev_id) if (status & DSIM_INT_SW_RST_RELEASE) { u32 mask = ~(DSIM_INT_RX_DONE | DSIM_INT_SFR_FIFO_EMPTY | - DSIM_INT_SFR_HDR_FIFO_EMPTY | DSIM_INT_FRAME_DONE | - DSIM_INT_RX_ECC_ERR | DSIM_INT_SW_RST_RELEASE); + DSIM_INT_SFR_HDR_FIFO_EMPTY | DSIM_INT_RX_ECC_ERR | + DSIM_INT_SW_RST_RELEASE); exynos_dsi_write(dsi, DSIM_INTMSK_REG, mask); complete(&dsi->completed); return IRQ_HANDLED; } if (!(status & (DSIM_INT_RX_DONE | DSIM_INT_SFR_FIFO_EMPTY | - DSIM_INT_FRAME_DONE | DSIM_INT_PLL_STABLE))) + DSIM_INT_PLL_STABLE))) return IRQ_HANDLED; if (exynos_dsi_transfer_finish(dsi)) @@ -1485,26 +1484,7 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder) return 0; } -static void exynos_dsi_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct exynos_dsi *dsi = encoder_to_dsi(encoder); - struct videomode *vm = &dsi->vm; - struct drm_display_mode *m = adjusted_mode; - - vm->hactive = m->hdisplay; - vm->vactive = m->vdisplay; - vm->vfront_porch = m->vsync_start - m->vdisplay; - vm->vback_porch = m->vtotal - m->vsync_end; - vm->vsync_len = m->vsync_end - m->vsync_start; - vm->hfront_porch = m->hsync_start - m->hdisplay; - vm->hback_porch = m->htotal - m->hsync_end; - vm->hsync_len = m->hsync_end - m->hsync_start; -} - static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = { - .mode_set = exynos_dsi_mode_set, .enable = exynos_dsi_enable, .disable = exynos_dsi_disable, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 5b18b5c5fdf2..5ce84025d1cb 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -12,6 +12,7 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> @@ -24,8 +25,8 @@ #include <drm/exynos_drm.h> #include "regs-fimc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_fimc.h" /* * FIMC stands for Fully Interactive Mobile Camera and @@ -33,23 +34,6 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * FIMC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> FIMC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> FIMC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> FIMC H/W ----> FIMD. - */ - -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. */ #define FIMC_MAX_DEVS 4 @@ -59,29 +43,19 @@ #define FIMC_BUF_STOP 1 #define FIMC_BUF_START 2 #define FIMC_WIDTH_ITU_709 1280 -#define FIMC_REFRESH_MAX 60 -#define FIMC_REFRESH_MIN 12 -#define FIMC_CROP_MAX 8192 -#define FIMC_CROP_MIN 32 -#define FIMC_SCALE_MAX 4224 -#define FIMC_SCALE_MIN 32 +#define FIMC_AUTOSUSPEND_DELAY 2000 + +static unsigned int fimc_mask = 0xc; +module_param_named(fimc_devs, fimc_mask, uint, 0644); +MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM"); #define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct fimc_context, ippdrv); -enum fimc_wb { - FIMC_WB_NONE, - FIMC_WB_A, - FIMC_WB_B, -}; enum { FIMC_CLK_LCLK, FIMC_CLK_GATE, FIMC_CLK_WB_A, FIMC_CLK_WB_B, - FIMC_CLK_MUX, - FIMC_CLK_PARENT, FIMC_CLKS_MAX }; @@ -90,12 +64,8 @@ static const char * const fimc_clock_names[] = { [FIMC_CLK_GATE] = "fimc", [FIMC_CLK_WB_A] = "pxl_async0", [FIMC_CLK_WB_B] = "pxl_async1", - [FIMC_CLK_MUX] = "mux", - [FIMC_CLK_PARENT] = "parent", }; -#define FIMC_DEFAULT_LCLK_FREQUENCY 133000000UL - /* * A structure of scaler. * @@ -107,7 +77,7 @@ static const char * const fimc_clock_names[] = { * @vratio: vertical ratio. */ struct fimc_scaler { - bool range; + bool range; bool bypass; bool up_h; bool up_v; @@ -116,56 +86,32 @@ struct fimc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual table 43-1. - * @in_hori: scaler input horizontal size. - * @bypass: scaler bypass mode. - * @dst_h_wo_rot: target horizontal size without output rotation. - * @dst_h_rot: target horizontal size with output rotation. - * @rl_w_wo_rot: real width without input rotation. - * @rl_h_rot: real height without output rotation. - */ -struct fimc_capability { - /* scaler */ - u32 in_hori; - u32 bypass; - /* output rotator */ - u32 dst_h_wo_rot; - u32 dst_h_rot; - /* input rotator */ - u32 rl_w_wo_rot; - u32 rl_h_rot; -}; - -/* * A structure of fimc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. * @lock: locking of operations. * @clocks: fimc clocks. - * @clk_frequency: LCLK clock frequency. - * @sysreg: handle to SYSREG block regmap. * @sc: scaler infomations. * @pol: porarity of writeback. * @id: fimc id. * @irq: irq number. - * @suspended: qos operations. */ struct fimc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; spinlock_t lock; struct clk *clocks[FIMC_CLKS_MAX]; - u32 clk_frequency; - struct regmap *sysreg; struct fimc_scaler sc; int id; int irq; - bool suspended; }; static u32 fimc_read(struct fimc_context *ctx, u32 reg) @@ -217,19 +163,10 @@ static void fimc_sw_reset(struct fimc_context *ctx) fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ); } -static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx) -{ - return regmap_update_bits(ctx->sysreg, SYSREG_CAMERA_BLK, - SYSREG_FIMD0WB_DEST_MASK, - ctx->id << SYSREG_FIMD0WB_DEST_SHIFT); -} - -static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) +static void fimc_set_type_ctrl(struct fimc_context *ctx) { u32 cfg; - DRM_DEBUG_KMS("wb[%d]\n", wb); - cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK | EXYNOS_CIGCTRL_SELCAM_ITU_MASK | @@ -238,23 +175,10 @@ static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) EXYNOS_CIGCTRL_SELWB_CAMIF_MASK | EXYNOS_CIGCTRL_SELWRITEBACK_MASK); - switch (wb) { - case FIMC_WB_A: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_B: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_NONE: - default: - cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | - EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELCAM_MIPI_A | - EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); - break; - } + cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | + EXYNOS_CIGCTRL_SELWRITEBACK_A | + EXYNOS_CIGCTRL_SELCAM_MIPI_A | + EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); fimc_write(ctx, cfg, EXYNOS_CIGCTRL); } @@ -296,7 +220,6 @@ static void fimc_clear_irq(struct fimc_context *ctx) static bool fimc_check_ovf(struct fimc_context *ctx) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 status, flag; status = fimc_read(ctx, EXYNOS_CISTATUS); @@ -310,7 +233,7 @@ static bool fimc_check_ovf(struct fimc_context *ctx) EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB | EXYNOS_CIWDOFST_CLROVFICR); - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); return true; } @@ -376,10 +299,8 @@ static void fimc_handle_lastend(struct fimc_context *ctx, bool enable) fimc_write(ctx, cfg, EXYNOS_CIOCTRL); } - -static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -392,12 +313,12 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; default: /* bypass */ break; @@ -438,20 +359,13 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR | EXYNOS_MSCTRL_C_INT_IN_2PLANE); break; - default: - dev_err(ippdrv->dev, "invalid source yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); - - return 0; } -static int fimc_src_set_fmt(struct device *dev, u32 fmt) +static void fimc_src_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -485,9 +399,6 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid source format 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); @@ -495,22 +406,22 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_src_set_fmt_order(ctx, fmt); + fimc_src_set_fmt_order(ctx, fmt); } -static int fimc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg1, cfg2; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[%x]\n", rotation); cfg1 = fimc_read(ctx, EXYNOS_MSCTRL); cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR | @@ -520,61 +431,56 @@ static int fimc_src_set_transf(struct device *dev, cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg1, EXYNOS_MSCTRL); fimc_write(ctx, cfg2, EXYNOS_CITRGFMT); - *swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0; - - return 0; } -static int fimc_set_window(struct fimc_context *ctx, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_set_window(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { u32 cfg, h1, h2, v1, v2; /* cropped image */ - h1 = pos->x; - h2 = sz->hsize - pos->w - pos->x; - v1 = pos->y; - v2 = sz->vsize - pos->h - pos->y; + h1 = buf->rect.x; + h2 = buf->buf.width - buf->rect.w - buf->rect.x; + v1 = buf->rect.y; + v2 = buf->buf.height - buf->rect.h - buf->rect.y; DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n", - pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize); + buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h, + buf->buf.width, buf->buf.height); DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2); /* @@ -592,42 +498,30 @@ static int fimc_set_window(struct fimc_context *ctx, cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) | EXYNOS_CIWDOFST2_WINVEROFST2(v2)); fimc_write(ctx, cfg, EXYNOS_CIWDOFST2); - - return 0; } -static int fimc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_src_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; u32 cfg; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGISIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGISIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* set input DMA image size */ cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE); cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK | EXYNOS_CIREAL_ISIZE_WIDTH_MASK); - cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) | - EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h)); + cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(buf->rect.w) | + EXYNOS_CIREAL_ISIZE_HEIGHT(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE); /* @@ -635,91 +529,34 @@ static int fimc_src_set_size(struct device *dev, int swap, * for now, we support only ITU601 8 bit mode */ cfg = (EXYNOS_CISRCFMT_ITU601_8BIT | - EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) | - EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize)); + EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) | + EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_CISRCFMT); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIIYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIIYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIIYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIIYOFF); - cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICBOFF); - cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICROFF); - return fimc_set_window(ctx, &img_pos, &img_sz); + fimc_set_window(ctx, buf); } -static int fimc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_src_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > FIMC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_SRC]; - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIIYSA0); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICRSA0); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICRSA0); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIIYSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICBSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICRSA0); - break; - default: - /* bypass */ - break; - } - - return 0; + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIIYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIICBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIICRSA(0)); } -static struct exynos_drm_ipp_ops fimc_src_ops = { - .set_fmt = fimc_src_set_fmt, - .set_transf = fimc_src_set_transf, - .set_size = fimc_src_set_size, - .set_addr = fimc_src_set_addr, -}; - -static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -732,11 +569,11 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_XRGB8888: cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 | EXYNOS_CISCCTRL_EXTRGB_EXTENSION); @@ -784,20 +621,13 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR; cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE; break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CIOCTRL); - - return 0; } -static int fimc_dst_set_fmt(struct device *dev, u32 fmt) +static void fimc_dst_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -837,10 +667,6 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid target format 0x%x.\n", - fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); @@ -849,73 +675,67 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_dst_set_fmt_order(ctx, fmt); + fimc_dst_set_fmt_order(ctx, fmt); } -static int fimc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_dst_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[0x%x]\n", rotation); cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK; cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE | EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); - *swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0; - - return 0; } static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg, cfg_ext, shfactor; u32 pre_dst_width, pre_dst_height; u32 hfactor, vfactor; @@ -942,13 +762,13 @@ static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, /* fimc_ippdrv_check_property assures that dividers are not null */ hfactor = fls(src_w / dst_w / 2); if (hfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return -EINVAL; } vfactor = fls(src_h / dst_h / 2); if (vfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return -EINVAL; } @@ -1019,83 +839,77 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc) fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN); } -static int fimc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_dst_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; - u32 cfg; + u32 cfg, cfg_ext; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGOSIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* CSC ITU */ cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~EXYNOS_CIGCTRL_CSC_MASK; - if (sz->hsize >= FIMC_WIDTH_ITU_709) + if (buf->buf.width >= FIMC_WIDTH_ITU_709) cfg |= EXYNOS_CIGCTRL_CSC_ITU709; else cfg |= EXYNOS_CIGCTRL_CSC_ITU601; fimc_write(ctx, cfg, EXYNOS_CIGCTRL); - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT); /* target image size */ cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK | EXYNOS_CITRGFMT_TARGETV_MASK); - cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) | - EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h)); + if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.h) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.w)); + else + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.w) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CITRGFMT); /* target area */ - cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h); + cfg = EXYNOS_CITAREA_TARGET_AREA(buf->rect.w * buf->rect.h); fimc_write(ctx, cfg, EXYNOS_CITAREA); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOYOFF); - cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCBOFF); - cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCROFF); - - return 0; } static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) + bool enqueue) { unsigned long flags; u32 buf_num; u32 cfg; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); + DRM_DEBUG_KMS("buf_id[%d]enqueu[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ); - if (buf_type == IPP_BUF_ENQUEUE) + if (enqueue) cfg |= (1 << buf_id); else cfg &= ~(1 << buf_id); @@ -1104,88 +918,29 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, buf_num = hweight32(cfg); - if (buf_type == IPP_BUF_ENQUEUE && buf_num >= FIMC_BUF_START) + if (enqueue && buf_num >= FIMC_BUF_START) fimc_mask_irq(ctx, true); - else if (buf_type == IPP_BUF_DEQUEUE && buf_num <= FIMC_BUF_STOP) + else if (!enqueue && buf_num <= FIMC_BUF_STOP) fimc_mask_irq(ctx, false); spin_unlock_irqrestore(&ctx->lock, flags); } -static int fimc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_dst_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIOYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIOCBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIOCRSA(0)); - if (buf_id > FIMC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_DST]; - - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIOYSA(buf_id)); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCRSA(buf_id)); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCRSA(buf_id)); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id)); - break; - default: - /* bypass */ - break; - } - - fimc_dst_set_buf_seq(ctx, buf_id, buf_type); - - return 0; + fimc_dst_set_buf_seq(ctx, 0, true); } -static struct exynos_drm_ipp_ops fimc_dst_ops = { - .set_fmt = fimc_dst_set_fmt, - .set_transf = fimc_dst_set_transf, - .set_size = fimc_dst_set_size, - .set_addr = fimc_dst_set_addr, -}; +static void fimc_stop(struct fimc_context *ctx); static irqreturn_t fimc_irq_handler(int irq, void *dev_id) { struct fimc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; int buf_id; DRM_DEBUG_KMS("fimc id[%d]\n", ctx->id); @@ -1203,170 +958,19 @@ static irqreturn_t fimc_irq_handler(int irq, void *dev_id) DRM_DEBUG_KMS("buf_id[%d]\n", buf_id); - fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - queue_work(ippdrv->event_workq, &event_work->work); - - return IRQ_HANDLED; -} - -static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = FIMC_REFRESH_MIN; - prop_list->refresh_max = FIMC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) | - (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = FIMC_CROP_MAX; - prop_list->crop_max.vsize = FIMC_CROP_MAX; - prop_list->crop_min.hsize = FIMC_CROP_MIN; - prop_list->crop_min.vsize = FIMC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = FIMC_SCALE_MAX; - prop_list->scale_max.vsize = FIMC_SCALE_MAX; - prop_list->scale_min.hsize = FIMC_SCALE_MIN; - prop_list->scale_min.vsize = FIMC_SCALE_MIN; - - return 0; -} - -static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int fimc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!fimc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } - - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } - - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, 0); } - return 0; + fimc_dst_set_buf_seq(ctx, buf_id, false); + fimc_stop(ctx); -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); - } - - return -EINVAL; + return IRQ_HANDLED; } static void fimc_clear_addr(struct fimc_context *ctx) @@ -1386,10 +990,8 @@ static void fimc_clear_addr(struct fimc_context *ctx) } } -static int fimc_ippdrv_reset(struct device *dev) +static void fimc_reset(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - /* reset h/w block */ fimc_sw_reset(ctx); @@ -1397,82 +999,26 @@ static int fimc_ippdrv_reset(struct device *dev) memset(&ctx->sc, 0x0, sizeof(ctx->sc)); fimc_clear_addr(ctx); - - return 0; } -static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_start(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; - int ret, i; u32 cfg0, cfg1; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - fimc_mask_irq(ctx, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } - - ret = fimc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } - - /* If set ture, we can save jpeg about screen */ + /* If set true, we can save jpeg about screen */ fimc_handle_jpeg(ctx, false); fimc_set_scaler(ctx, &ctx->sc); - switch (cmd) { - case IPP_CMD_M2M: - fimc_set_type_ctrl(ctx, FIMC_WB_NONE); - fimc_handle_lastend(ctx, false); - - /* setup dma */ - cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); - cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; - fimc_write(ctx, cfg0, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - fimc_set_type_ctrl(ctx, FIMC_WB_A); - fimc_handle_lastend(ctx, true); - - /* setup FIMD */ - ret = fimc_set_camblk_fimd0_wb(ctx); - if (ret < 0) { - dev_err(dev, "camblk setup failed.\n"); - return ret; - } + fimc_set_type_ctrl(ctx); + fimc_handle_lastend(ctx, false); - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* setup dma */ + cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); + cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; + fimc_write(ctx, cfg0, EXYNOS_MSCTRL); /* Reset status */ fimc_write(ctx, 0x0, EXYNOS_CISTATUS); @@ -1498,36 +1044,18 @@ static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK); - if (cmd == IPP_CMD_M2M) - fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); - - return 0; + fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); } -static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_stop(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; u32 cfg; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - switch (cmd) { - case IPP_CMD_M2M: - /* Source clear */ - cfg = fimc_read(ctx, EXYNOS_MSCTRL); - cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg &= ~EXYNOS_MSCTRL_ENVID; - fimc_write(ctx, cfg, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; - } + /* Source clear */ + cfg = fimc_read(ctx, EXYNOS_MSCTRL); + cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg &= ~EXYNOS_MSCTRL_ENVID; + fimc_write(ctx, cfg, EXYNOS_MSCTRL); fimc_mask_irq(ctx, false); @@ -1545,6 +1073,87 @@ static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE); } +static int fimc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + fimc_src_set_fmt(ctx, task->src.buf.fourcc, task->src.buf.modifier); + fimc_src_set_size(ctx, &task->src); + fimc_src_set_transf(ctx, DRM_MODE_ROTATE_0); + fimc_src_set_addr(ctx, &task->src); + fimc_dst_set_fmt(ctx, task->dst.buf.fourcc, task->dst.buf.modifier); + fimc_dst_set_transf(ctx, task->transform.rotation); + fimc_dst_set_size(ctx, &task->dst); + fimc_dst_set_addr(ctx, &task->dst); + fimc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + fimc_start(ctx); + + return 0; +} + +static void fimc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + fimc_reset(ctx); + + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; + + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); + } +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = fimc_commit, + .abort = fimc_abort, +}; + +static int fimc_bind(struct device *dev, struct device *master, void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "fimc"); + + dev_info(dev, "The exynos fimc has been probed successfully\n"); + + return 0; +} + +static void fimc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); +} + +static const struct component_ops fimc_component_ops = { + .bind = fimc_bind, + .unbind = fimc_unbind, +}; + static void fimc_put_clocks(struct fimc_context *ctx) { int i; @@ -1559,7 +1168,7 @@ static void fimc_put_clocks(struct fimc_context *ctx) static int fimc_setup_clocks(struct fimc_context *ctx) { - struct device *fimc_dev = ctx->ippdrv.dev; + struct device *fimc_dev = ctx->dev; struct device *dev; int ret, i; @@ -1574,8 +1183,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) ctx->clocks[i] = clk_get(dev, fimc_clock_names[i]); if (IS_ERR(ctx->clocks[i])) { - if (i >= FIMC_CLK_MUX) - break; ret = PTR_ERR(ctx->clocks[i]); dev_err(fimc_dev, "failed to get clock: %s\n", fimc_clock_names[i]); @@ -1583,20 +1190,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) } } - /* Optional FIMC LCLK parent clock setting */ - if (!IS_ERR(ctx->clocks[FIMC_CLK_PARENT])) { - ret = clk_set_parent(ctx->clocks[FIMC_CLK_MUX], - ctx->clocks[FIMC_CLK_PARENT]); - if (ret < 0) { - dev_err(fimc_dev, "failed to set parent.\n"); - goto e_clk_free; - } - } - - ret = clk_set_rate(ctx->clocks[FIMC_CLK_LCLK], ctx->clk_frequency); - if (ret < 0) - goto e_clk_free; - ret = clk_prepare_enable(ctx->clocks[FIMC_CLK_LCLK]); if (!ret) return ret; @@ -1605,57 +1198,118 @@ e_clk_free: return ret; } -static int fimc_parse_dt(struct fimc_context *ctx) +int exynos_drm_check_fimc_device(struct device *dev) { - struct device_node *node = ctx->ippdrv.dev->of_node; + int id = of_alias_get_id(dev->of_node, "fimc"); - /* Handle only devices that support the LCD Writeback data path */ - if (!of_property_read_bool(node, "samsung,lcd-wb")) - return -ENODEV; + if (id >= 0 && (BIT(id) & fimc_mask)) + return 0; + return -ENODEV; +} - if (of_property_read_u32(node, "clock-frequency", - &ctx->clk_frequency)) - ctx->clk_frequency = FIMC_DEFAULT_LCLK_FREQUENCY; +static const unsigned int fimc_formats[] = { + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, + DRM_FORMAT_YUV444, +}; - ctx->id = of_alias_get_id(node, "fimc"); +static const unsigned int fimc_tiled_formats[] = { + DRM_FORMAT_NV12, DRM_FORMAT_NV21, +}; - if (ctx->id < 0) { - dev_err(ctx->ippdrv.dev, "failed to get node alias id.\n"); - return -EINVAL; - } +static const struct drm_exynos_ipp_limit fimc_4210_limits_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4224, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1920 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; - return 0; -} +static const struct drm_exynos_ipp_limit fimc_4210_limits_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 1920, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1366 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1920, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1366, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; static int fimc_probe(struct platform_device *pdev) { + const struct drm_exynos_ipp_limit *limits; + struct exynos_drm_ipp_formats *formats; struct device *dev = &pdev->dev; struct fimc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; int ret; + int i, j, num_limits, num_formats; - if (!dev->of_node) { - dev_err(dev, "device tree node not found.\n"); + if (exynos_drm_check_fimc_device(dev) != 0) return -ENODEV; - } ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->ippdrv.dev = dev; + ctx->dev = dev; + ctx->id = of_alias_get_id(dev->of_node, "fimc"); - ret = fimc_parse_dt(ctx); - if (ret < 0) - return ret; + /* construct formats/limits array */ + num_formats = ARRAY_SIZE(fimc_formats) + ARRAY_SIZE(fimc_tiled_formats); + formats = devm_kzalloc(dev, sizeof(*formats) * num_formats, GFP_KERNEL); + if (!formats) + return -ENOMEM; + + /* linear formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_v1); + } else { + limits = fimc_4210_limits_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_v2); + } + for (i = 0; i < ARRAY_SIZE(fimc_formats); i++) { + formats[i].fourcc = fimc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = limits; + formats[i].num_limits = num_limits; + } - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_err(dev, "syscon regmap lookup failed.\n"); - return PTR_ERR(ctx->sysreg); + /* tiled formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_tiled_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v1); + } else { + limits = fimc_4210_limits_tiled_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v2); } + for (j = i, i = 0; i < ARRAY_SIZE(fimc_tiled_formats); j++, i++) { + formats[j].fourcc = fimc_tiled_formats[i]; + formats[j].modifier = DRM_FORMAT_MOD_SAMSUNG_64_32_TILE; + formats[j].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[j].limits = limits; + formats[j].num_limits = num_limits; + } + + ctx->formats = formats; + ctx->num_formats = num_formats; /* resource memory */ ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1670,9 +1324,8 @@ static int fimc_probe(struct platform_device *pdev) return -ENOENT; } - ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, fimc_irq_handler, - IRQF_ONESHOT, "drm_fimc", ctx); + ret = devm_request_irq(dev, res->start, fimc_irq_handler, + 0, dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1682,39 +1335,24 @@ static int fimc_probe(struct platform_device *pdev) if (ret < 0) return ret; - ippdrv = &ctx->ippdrv; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops; - ippdrv->check_property = fimc_ippdrv_check_property; - ippdrv->reset = fimc_ippdrv_reset; - ippdrv->start = fimc_ippdrv_start; - ippdrv->stop = fimc_ippdrv_stop; - ret = fimc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_put_clk; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - spin_lock_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, FIMC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm fimc device.\n"); + ret = component_add(dev, &fimc_component_ops); + if (ret) goto err_pm_dis; - } dev_info(dev, "drm fimc registered successfully.\n"); return 0; err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); -err_put_clk: fimc_put_clocks(ctx); return ret; @@ -1724,42 +1362,24 @@ static int fimc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &fimc_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); fimc_put_clocks(ctx); - pm_runtime_set_suspended(dev); - pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); - clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); - clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = true; - } - - return 0; -} - static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, false); + clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); + return 0; } static int fimc_runtime_resume(struct device *dev) @@ -1767,8 +1387,7 @@ static int fimc_runtime_resume(struct device *dev) struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, true); + return clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); } #endif @@ -1795,4 +1414,3 @@ struct platform_driver fimc_driver = { .pm = &fimc_pm_ops, }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h deleted file mode 100644 index 127a424c5fdf..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_FIMC_H_ -#define _EXYNOS_DRM_FIMC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_FIMC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index d42ae2bc3e56..01b1570d0c3a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -121,6 +121,12 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = { .has_limited_fmt = 1, }; +static struct fimd_driver_data s5pv210_fimd_driver_data = { + .timing_base = 0x0, + .has_shadowcon = 1, + .has_clksel = 1, +}; + static struct fimd_driver_data exynos3_fimd_driver_data = { .timing_base = 0x20000, .lcdblk_offset = 0x210, @@ -193,6 +199,8 @@ struct fimd_context { static const struct of_device_id fimd_driver_dt_match[] = { { .compatible = "samsung,s3c6400-fimd", .data = &s3c64xx_fimd_driver_data }, + { .compatible = "samsung,s5pv210-fimd", + .data = &s5pv210_fimd_driver_data }, { .compatible = "samsung,exynos3250-fimd", .data = &exynos3_fimd_driver_data }, { .compatible = "samsung,exynos4210-fimd", diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 11cc01b47bc0..6e1494fa71b4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -431,37 +431,24 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, return 0; } -int exynos_drm_gem_fault(struct vm_fault *vmf) +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj); unsigned long pfn; pgoff_t page_offset; - int ret; page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) { DRM_ERROR("invalid page offset\n"); - ret = -EINVAL; - goto out; + return VM_FAULT_SIGBUS; } pfn = page_to_pfn(exynos_gem->pages[page_offset]); - ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); - -out: - switch (ret) { - case 0: - case -ERESTARTSYS: - case -EINTR: - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } + return vmf_insert_mixed(vma, vmf->address, + __pfn_to_pfn_t(pfn, PFN_DEV)); } static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 5a4c7de80f65..9057d7f1d6ed 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -13,6 +13,7 @@ #define _EXYNOS_DRM_GEM_H_ #include <drm/drm_gem.h> +#include <linux/mm_types.h> #define to_exynos_gem(x) container_of(x, struct exynos_drm_gem, base) @@ -111,7 +112,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args); /* page fault handler and mmap fault address(virtual) to physical memory. */ -int exynos_drm_gem_fault(struct vm_fault *vmf); +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf); /* set vm_flags and we can change the vm attribute to other one at here. */ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 0506b2b17ac1..e99dd1e4ba65 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -12,18 +12,20 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/clk.h> #include <linux/pm_runtime.h> #include <linux/mfd/syscon.h> +#include <linux/of_device.h> #include <linux/regmap.h> #include <drm/drmP.h> #include <drm/exynos_drm.h> #include "regs-gsc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_gsc.h" /* * GSC stands for General SCaler and @@ -31,26 +33,10 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * GSC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> GSC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> GSC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> GSC H/W ----> FIMD, Mixer. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. - */ -#define GSC_MAX_DEVS 4 +#define GSC_MAX_CLOCKS 8 #define GSC_MAX_SRC 4 #define GSC_MAX_DST 16 #define GSC_RESET_TIMEOUT 50 @@ -65,8 +51,6 @@ #define GSC_SC_DOWN_RATIO_4_8 131072 #define GSC_SC_DOWN_RATIO_3_8 174762 #define GSC_SC_DOWN_RATIO_2_8 262144 -#define GSC_REFRESH_MIN 12 -#define GSC_REFRESH_MAX 60 #define GSC_CROP_MAX 8192 #define GSC_CROP_MIN 32 #define GSC_SCALE_MAX 4224 @@ -77,10 +61,9 @@ #define GSC_COEF_H_8T 8 #define GSC_COEF_V_4T 4 #define GSC_COEF_DEPTH 3 +#define GSC_AUTOSUSPEND_DELAY 2000 #define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct gsc_context, ippdrv); #define gsc_read(offset) readl(ctx->regs + (offset)) #define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset)) @@ -104,50 +87,47 @@ struct gsc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual 49.2 features. - * @tile_w: tile mode or rotation width. - * @tile_h: tile mode or rotation height. - * @w: other cases width. - * @h: other cases height. - */ -struct gsc_capability { - /* tile or rotation */ - u32 tile_w; - u32 tile_h; - /* other cases */ - u32 w; - u32 h; -}; - -/* * A structure of gsc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. - * @sysreg: handle to SYSREG block regmap. - * @lock: locking of operations. * @gsc_clk: gsc gate clock. * @sc: scaler infomations. * @id: gsc id. * @irq: irq number. * @rotation: supports rotation of src. - * @suspended: qos operations. */ struct gsc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; - struct regmap *sysreg; - struct mutex lock; - struct clk *gsc_clk; + const char **clk_names; + struct clk *clocks[GSC_MAX_CLOCKS]; + int num_clocks; struct gsc_scaler sc; int id; int irq; bool rotation; - bool suspended; +}; + +/** + * struct gsc_driverdata - per device type driver data for init time. + * + * @limits: picture size limits array + * @clk_names: names of clocks needed by this variant + * @num_clocks: the number of clocks needed by this variant + */ +struct gsc_driverdata { + const struct drm_exynos_ipp_limit *limits; + int num_limits; + const char *clk_names[GSC_MAX_CLOCKS]; + int num_clocks; }; /* 8-tap Filter Coefficient */ @@ -438,25 +418,6 @@ static int gsc_sw_reset(struct gsc_context *ctx) return 0; } -static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable) -{ - unsigned int gscblk_cfg; - - if (!ctx->sysreg) - return; - - regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg); - - if (enable) - gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) | - GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) | - GSC_BLK_SW_RESET_WB_DEST(ctx->id); - else - gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id); - - regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg); -} - static void gsc_handle_irq(struct gsc_context *ctx, bool enable, bool overflow, bool done) { @@ -487,10 +448,8 @@ static void gsc_handle_irq(struct gsc_context *ctx, bool enable, } -static int gsc_src_set_fmt(struct device *dev, u32 fmt) +static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -506,6 +465,7 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= GSC_IN_RGB565; break; case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: cfg |= GSC_IN_XRGB8888; break; case DRM_FORMAT_BGRX8888: @@ -548,115 +508,84 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_ROT_MASK; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_Y) cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; + case DRM_MODE_ROTATE_90: + cfg |= GSC_IN_ROT_90; + if (rotation & DRM_MODE_REFLECT_Y) + cfg |= GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_X) + cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } -static int gsc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_src_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) | - GSC_SRCIMG_OFFSET_Y(img_pos.y)); + cfg = (GSC_SRCIMG_OFFSET_X(buf->rect.x) | + GSC_SRCIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_SRCIMG_OFFSET); /* cropped size */ - cfg = (GSC_CROPPED_WIDTH(img_pos.w) | - GSC_CROPPED_HEIGHT(img_pos.h)); + cfg = (GSC_CROPPED_WIDTH(buf->rect.w) | + GSC_CROPPED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_CROPPED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_SRCIMG_SIZE); cfg &= ~(GSC_SRCIMG_HEIGHT_MASK | GSC_SRCIMG_WIDTH_MASK); - cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) | - GSC_SRCIMG_HEIGHT(sz->vsize)); + cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) | + GSC_SRCIMG_HEIGHT(buf->buf.height)); gsc_write(cfg, GSC_SRCIMG_SIZE); cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_IN_RGB_HD_WIDE; else @@ -668,103 +597,39 @@ static int gsc_src_set_size(struct device *dev, int swap, cfg |= GSC_IN_RGB_SD_NARROW; gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - /* mask register set */ cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - return -EINVAL; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK); - - return 0; } -static int gsc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_addr(struct gsc_context *ctx, u32 buf_id, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_IN_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_IN_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_IN_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_IN_BASE_ADDR_CR(buf_id)); - return gsc_src_set_buf_seq(ctx, buf_id, buf_type); + gsc_src_set_buf_seq(ctx, buf_id, true); } -static struct exynos_drm_ipp_ops gsc_src_ops = { - .set_fmt = gsc_src_set_fmt, - .set_transf = gsc_src_set_transf, - .set_size = gsc_src_set_size, - .set_addr = gsc_src_set_addr, -}; - -static int gsc_dst_set_fmt(struct device *dev, u32 fmt) +static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -779,8 +644,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_RGB565: cfg |= GSC_OUT_RGB565; break; + case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB8888: - cfg |= GSC_OUT_XRGB8888; + cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_GLOBAL_ALPHA(0xff)); break; case DRM_FORMAT_BGRX8888: cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP); @@ -819,69 +685,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_OUT_CON); - - return 0; -} - -static int gsc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - u32 cfg; - - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - - cfg = gsc_read(GSC_IN_CON); - cfg &= ~GSC_IN_ROT_MASK; - - switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: - cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_270: - cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; - } - - gsc_write(cfg, GSC_IN_CON); - - ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio) @@ -919,9 +725,9 @@ static void gsc_get_prescaler_shfactor(u32 hratio, u32 vratio, u32 *shfactor) } static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; u32 src_w, src_h, dst_w, dst_h; int ret = 0; @@ -939,13 +745,13 @@ static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return ret; } ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return ret; } @@ -1039,47 +845,37 @@ static void gsc_set_scaler(struct gsc_context *ctx, struct gsc_scaler *sc) gsc_write(cfg, GSC_MAIN_V_RATIO); } -static int gsc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_dst_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_DSTIMG_OFFSET_X(pos->x) | - GSC_DSTIMG_OFFSET_Y(pos->y)); + cfg = (GSC_DSTIMG_OFFSET_X(buf->rect.x) | + GSC_DSTIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_DSTIMG_OFFSET); /* scaled size */ - cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h)); + if (ctx->rotation) + cfg = (GSC_SCALED_WIDTH(buf->rect.h) | + GSC_SCALED_HEIGHT(buf->rect.w)); + else + cfg = (GSC_SCALED_WIDTH(buf->rect.w) | + GSC_SCALED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_SCALED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_DSTIMG_SIZE); - cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | - GSC_DSTIMG_WIDTH_MASK); - cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) | - GSC_DSTIMG_HEIGHT(sz->vsize)); + cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK); + cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) | + GSC_DSTIMG_HEIGHT(buf->buf.height); gsc_write(cfg, GSC_DSTIMG_SIZE); cfg = gsc_read(GSC_OUT_CON); cfg &= ~GSC_OUT_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_OUT_RGB_HD_WIDE; else @@ -1091,8 +887,6 @@ static int gsc_dst_set_size(struct device *dev, int swap, cfg |= GSC_OUT_RGB_SD_NARROW; gsc_write(cfg, GSC_OUT_CON); - - return 0; } static int gsc_dst_get_buf_seq(struct gsc_context *ctx) @@ -1111,35 +905,16 @@ static int gsc_dst_get_buf_seq(struct gsc_context *ctx) return buf_num; } -static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - int ret = 0; - - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - - mutex_lock(&ctx->lock); /* mask register set */ cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - ret = -EINVAL; - goto err_unlock; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; @@ -1148,94 +923,29 @@ static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK); /* interrupt enable */ - if (buf_type == IPP_BUF_ENQUEUE && - gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) + if (enqueue && gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) gsc_handle_irq(ctx, true, false, true); /* interrupt disable */ - if (buf_type == IPP_BUF_DEQUEUE && - gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) + if (!enqueue && gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) gsc_handle_irq(ctx, false, false, true); - -err_unlock: - mutex_unlock(&ctx->lock); - return ret; } -static int gsc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_addr(struct gsc_context *ctx, + u32 buf_id, struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_OUT_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_OUT_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_OUT_BASE_ADDR_CR(buf_id)); - return gsc_dst_set_buf_seq(ctx, buf_id, buf_type); -} - -static struct exynos_drm_ipp_ops gsc_dst_ops = { - .set_fmt = gsc_dst_set_fmt, - .set_transf = gsc_dst_set_transf, - .set_size = gsc_dst_set_size, - .set_addr = gsc_dst_set_addr, -}; - -static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->gsc_clk); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->gsc_clk); - ctx->suspended = true; - } - - return 0; + gsc_dst_set_buf_seq(ctx, buf_id, true); } static int gsc_get_src_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_SRC; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1249,19 +959,15 @@ static int gsc_get_src_buf_index(struct gsc_context *ctx) } } + DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, + curr_index, buf_id); + if (buf_id == GSC_MAX_SRC) { DRM_ERROR("failed to get in buffer index.\n"); return -EINVAL; } - ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } - - DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, - curr_index, buf_id); + gsc_src_set_buf_seq(ctx, buf_id, false); return buf_id; } @@ -1270,7 +976,6 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_DST; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1289,11 +994,7 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) return -EINVAL; } - ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } + gsc_dst_set_buf_seq(ctx, buf_id, false); DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, curr_index, buf_id); @@ -1304,215 +1005,55 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) static irqreturn_t gsc_irq_handler(int irq, void *dev_id) { struct gsc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; u32 status; - int buf_id[EXYNOS_DRM_OPS_MAX]; + int err = 0; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); status = gsc_read(GSC_IRQ); if (status & GSC_IRQ_STATUS_OR_IRQ) { - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); - return IRQ_NONE; + err = -EINVAL; } if (status & GSC_IRQ_STATUS_OR_FRM_DONE) { - dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n", - ctx->id, status); - - buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_SRC] < 0) - return IRQ_HANDLED; - - buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_DST] < 0) - return IRQ_HANDLED; - - DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", - buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_SRC] = - buf_id[EXYNOS_DRM_OPS_SRC]; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } - - return IRQ_HANDLED; -} - -static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = GSC_REFRESH_MIN; - prop_list->refresh_max = GSC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = GSC_CROP_MAX; - prop_list->crop_max.vsize = GSC_CROP_MAX; - prop_list->crop_min.hsize = GSC_CROP_MIN; - prop_list->crop_min.vsize = GSC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = GSC_SCALE_MAX; - prop_list->scale_max.vsize = GSC_SCALE_MAX; - prop_list->scale_min.hsize = GSC_SCALE_MIN; - prop_list->scale_min.vsize = GSC_SCALE_MIN; - - return 0; -} - -static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int gsc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + int src_buf_id, dst_buf_id; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!gsc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } + dev_dbg(ctx->dev, "occurred frame done at %d, status 0x%x.\n", + ctx->id, status); - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + src_buf_id = gsc_get_src_buf_index(ctx); + dst_buf_id = gsc_get_dst_buf_index(ctx); - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } + DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", src_buf_id, + dst_buf_id); - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + if (src_buf_id < 0 || dst_buf_id < 0) + err = -EINVAL; } - return 0; - -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, err); } - return -EINVAL; + return IRQ_HANDLED; } - -static int gsc_ippdrv_reset(struct device *dev) +static int gsc_reset(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); struct gsc_scaler *sc = &ctx->sc; int ret; /* reset h/w block */ ret = gsc_sw_reset(ctx); if (ret < 0) { - dev_err(dev, "failed to reset hardware.\n"); + dev_err(ctx->dev, "failed to reset hardware.\n"); return ret; } @@ -1523,166 +1064,172 @@ static int gsc_ippdrv_reset(struct device *dev) return 0; } -static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_start(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; u32 cfg; - int ret, i; - - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; gsc_handle_irq(ctx, true, false, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } + /* enable one shot */ + cfg = gsc_read(GSC_ENABLE); + cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | + GSC_ENABLE_CLK_GATE_MODE_MASK); + cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; + gsc_write(cfg, GSC_ENABLE); - switch (cmd) { - case IPP_CMD_M2M: - /* enable one shot */ - cfg = gsc_read(GSC_ENABLE); - cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | - GSC_ENABLE_CLK_GATE_MODE_MASK); - cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; - gsc_write(cfg, GSC_ENABLE); - - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_WB: - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - - /* src local path */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB); - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_OUTPUT: - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst local path */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* src dma memory */ + cfg = gsc_read(GSC_IN_CON); + cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); + cfg |= GSC_IN_PATH_MEMORY; + gsc_write(cfg, GSC_IN_CON); - ret = gsc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } + /* dst dma memory */ + cfg = gsc_read(GSC_OUT_CON); + cfg |= GSC_OUT_PATH_MEMORY; + gsc_write(cfg, GSC_OUT_CON); gsc_set_scaler(ctx, &ctx->sc); cfg = gsc_read(GSC_ENABLE); cfg |= GSC_ENABLE_ON; gsc_write(cfg, GSC_ENABLE); +} + +static int gsc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct gsc_context *ctx = container_of(ipp, struct gsc_context, ipp); + int ret; + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + ret = gsc_reset(ctx); + if (ret) { + pm_runtime_put_autosuspend(ctx->dev); + ctx->task = NULL; + return ret; + } + + gsc_src_set_fmt(ctx, task->src.buf.fourcc); + gsc_src_set_transf(ctx, task->transform.rotation); + gsc_src_set_size(ctx, &task->src); + gsc_src_set_addr(ctx, 0, &task->src); + gsc_dst_set_fmt(ctx, task->dst.buf.fourcc); + gsc_dst_set_size(ctx, &task->dst); + gsc_dst_set_addr(ctx, 0, &task->dst); + gsc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + gsc_start(ctx); return 0; } -static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; - u32 cfg; + struct gsc_context *ctx = + container_of(ipp, struct gsc_context, ipp); - DRM_DEBUG_KMS("cmd[%d]\n", cmd); + gsc_reset(ctx); + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - switch (cmd) { - case IPP_CMD_M2M: - /* bypass */ - break; - case IPP_CMD_WB: - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); } +} - gsc_handle_irq(ctx, false, false, true); +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = gsc_commit, + .abort = gsc_abort, +}; - /* reset sequence */ - gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK); +static int gsc_bind(struct device *dev, struct device *master, void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; - cfg = gsc_read(GSC_ENABLE); - cfg &= ~GSC_ENABLE_ON; - gsc_write(cfg, GSC_ENABLE); + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "gsc"); + + dev_info(dev, "The exynos gscaler has been probed successfully\n"); + + return 0; +} + +static void gsc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); } +static const struct component_ops gsc_component_ops = { + .bind = gsc_bind, + .unbind = gsc_unbind, +}; + +static const unsigned int gsc_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_BGRX8888, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, +}; + static int gsc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct gsc_driverdata *driver_data; + struct exynos_drm_ipp_formats *formats; struct gsc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; - int ret; + int ret, i; ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - if (dev->of_node) { - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_warn(dev, "failed to get system register.\n"); - ctx->sysreg = NULL; - } + formats = devm_kzalloc(dev, sizeof(*formats) * + (ARRAY_SIZE(gsc_formats)), GFP_KERNEL); + if (!formats) + return -ENOMEM; + + driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev); + ctx->dev = dev; + ctx->num_clocks = driver_data->num_clocks; + ctx->clk_names = driver_data->clk_names; + + for (i = 0; i < ARRAY_SIZE(gsc_formats); i++) { + formats[i].fourcc = gsc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = driver_data->limits; + formats[i].num_limits = driver_data->num_limits; } + ctx->formats = formats; + ctx->num_formats = ARRAY_SIZE(gsc_formats); /* clock control */ - ctx->gsc_clk = devm_clk_get(dev, "gscl"); - if (IS_ERR(ctx->gsc_clk)) { - dev_err(dev, "failed to get gsc clock.\n"); - return PTR_ERR(ctx->gsc_clk); + for (i = 0; i < ctx->num_clocks; i++) { + ctx->clocks[i] = devm_clk_get(dev, ctx->clk_names[i]); + if (IS_ERR(ctx->clocks[i])) { + dev_err(dev, "failed to get clock: %s\n", + ctx->clk_names[i]); + return PTR_ERR(ctx->clocks[i]); + } } /* resource memory */ @@ -1699,8 +1246,8 @@ static int gsc_probe(struct platform_device *pdev) } ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, gsc_irq_handler, - IRQF_ONESHOT, "drm_gsc", ctx); + ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0, + dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1709,38 +1256,22 @@ static int gsc_probe(struct platform_device *pdev) /* context initailization */ ctx->id = pdev->id; - ippdrv = &ctx->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops; - ippdrv->check_property = gsc_ippdrv_check_property; - ippdrv->reset = gsc_ippdrv_reset; - ippdrv->start = gsc_ippdrv_start; - ippdrv->stop = gsc_ippdrv_stop; - ret = gsc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - return ret; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - - mutex_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, GSC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm gsc device.\n"); - goto err_ippdrv_register; - } + ret = component_add(dev, &gsc_component_ops); + if (ret) + goto err_pm_dis; dev_info(dev, "drm gsc registered successfully.\n"); return 0; -err_ippdrv_register: +err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -1748,13 +1279,8 @@ err_ippdrv_register: static int gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); - mutex_destroy(&ctx->lock); - - pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; @@ -1763,19 +1289,32 @@ static int gsc_remove(struct platform_device *pdev) static int __maybe_unused gsc_runtime_suspend(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, false); + for (i = ctx->num_clocks - 1; i >= 0; i--) + clk_disable_unprepare(ctx->clocks[i]); + + return 0; } static int __maybe_unused gsc_runtime_resume(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i, ret; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, true); + for (i = 0; i < ctx->num_clocks; i++) { + ret = clk_prepare_enable(ctx->clocks[i]); + if (ret) { + while (--i > 0) + clk_disable_unprepare(ctx->clocks[i]); + return ret; + } + } + return 0; } static const struct dev_pm_ops gsc_pm_ops = { @@ -1784,9 +1323,66 @@ static const struct dev_pm_ops gsc_pm_ops = { SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL) }; +static const struct drm_exynos_ipp_limit gsc_5250_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2048 }, .v = { 16, 2048 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5420_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 16, 2016 }, .v = { 8, 2016 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5433_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static struct gsc_driverdata gsc_exynos5250_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5250_limits, + .num_limits = ARRAY_SIZE(gsc_5250_limits), +}; + +static struct gsc_driverdata gsc_exynos5420_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5420_limits, + .num_limits = ARRAY_SIZE(gsc_5420_limits), +}; + +static struct gsc_driverdata gsc_exynos5433_drvdata = { + .clk_names = {"pclk", "aclk", "aclk_xiu", "aclk_gsclbend"}, + .num_clocks = 4, + .limits = gsc_5433_limits, + .num_limits = ARRAY_SIZE(gsc_5433_limits), +}; + static const struct of_device_id exynos_drm_gsc_of_match[] = { - { .compatible = "samsung,exynos5-gsc" }, - { }, + { + .compatible = "samsung,exynos5-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5250-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5420-gsc", + .data = &gsc_exynos5420_drvdata, + }, { + .compatible = "samsung,exynos5433-gsc", + .data = &gsc_exynos5433_drvdata, + }, { + }, }; MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match); @@ -1800,4 +1396,3 @@ struct platform_driver gsc_driver = { .of_match_table = of_match_ptr(exynos_drm_gsc_of_match), }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h deleted file mode 100644 index 29ec1c5efcf2..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_GSC_H_ -#define _EXYNOS_DRM_GSC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - * Mixer output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_GSC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c new file mode 100644 index 000000000000..26374e58c557 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -0,0 +1,916 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Authors: + * Marek Szyprowski <m.szyprowski@samsung.com> + * + * Exynos DRM Image Post Processing (IPP) related functions + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + */ + + +#include <drm/drmP.h> +#include <drm/drm_mode.h> +#include <uapi/drm/exynos_drm.h> + +#include "exynos_drm_drv.h" +#include "exynos_drm_gem.h" +#include "exynos_drm_ipp.h" + +static int num_ipp; +static LIST_HEAD(ipp_list); + +/** + * exynos_drm_ipp_register - Register a new picture processor hardware module + * @dev: DRM device + * @ipp: ipp module to init + * @funcs: callbacks for the new ipp object + * @caps: bitmask of ipp capabilities (%DRM_EXYNOS_IPP_CAP_*) + * @formats: array of supported formats + * @num_formats: size of the supported formats array + * @name: name (for debugging purposes) + * + * Initializes a ipp module. + * + * Returns: + * Zero on success, error code on failure. + */ +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name) +{ + WARN_ON(!ipp); + WARN_ON(!funcs); + WARN_ON(!formats); + WARN_ON(!num_formats); + + spin_lock_init(&ipp->lock); + INIT_LIST_HEAD(&ipp->todo_list); + init_waitqueue_head(&ipp->done_wq); + ipp->dev = dev; + ipp->funcs = funcs; + ipp->capabilities = caps; + ipp->name = name; + ipp->formats = formats; + ipp->num_formats = num_formats; + + /* ipp_list modification is serialized by component framework */ + list_add_tail(&ipp->head, &ipp_list); + ipp->id = num_ipp++; + + DRM_DEBUG_DRIVER("Registered ipp %d\n", ipp->id); + + return 0; +} + +/** + * exynos_drm_ipp_unregister - Unregister the picture processor module + * @dev: DRM device + * @ipp: ipp module + */ +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp) +{ + WARN_ON(ipp->task); + WARN_ON(!list_empty(&ipp->todo_list)); + list_del(&ipp->head); +} + +/** + * exynos_drm_ipp_ioctl_get_res_ioctl - enumerate all ipp modules + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a list of ipp ids. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + struct exynos_drm_ipp *ipp; + uint32_t __user *ipp_ptr = (uint32_t __user *) + (unsigned long)resp->ipp_id_ptr; + unsigned int count = num_ipp, copied = 0; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (count && resp->count_ipps >= count) { + list_for_each_entry(ipp, &ipp_list, head) { + if (put_user(ipp->id, ipp_ptr + copied)) + return -EFAULT; + copied++; + } + } + resp->count_ipps = count; + + return 0; +} + +static inline struct exynos_drm_ipp *__ipp_get(uint32_t id) +{ + struct exynos_drm_ipp *ipp; + + list_for_each_entry(ipp, &ipp_list, head) + if (ipp->id == id) + return ipp; + return NULL; +} + +/** + * exynos_drm_ipp_ioctl_get_caps - get ipp module capabilities and formats + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module capabilities. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_caps *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->formats_ptr; + struct exynos_drm_ipp *ipp; + int i; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + resp->ipp_id = ipp->id; + resp->capabilities = ipp->capabilities; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (resp->formats_count >= ipp->num_formats) { + for (i = 0; i < ipp->num_formats; i++) { + struct drm_exynos_ipp_format tmp = { + .fourcc = ipp->formats[i].fourcc, + .type = ipp->formats[i].type, + .modifier = ipp->formats[i].modifier, + }; + + if (copy_to_user(ptr, &tmp, sizeof(tmp))) + return -EFAULT; + ptr += sizeof(tmp); + } + } + resp->formats_count = ipp->num_formats; + + return 0; +} + +static inline const struct exynos_drm_ipp_formats *__ipp_format_get( + struct exynos_drm_ipp *ipp, uint32_t fourcc, + uint64_t mod, unsigned int type) +{ + int i; + + for (i = 0; i < ipp->num_formats; i++) { + if ((ipp->formats[i].type & type) && + ipp->formats[i].fourcc == fourcc && + ipp->formats[i].modifier == mod) + return &ipp->formats[i]; + } + return NULL; +} + +/** + * exynos_drm_ipp_get_limits_ioctl - get ipp module limits + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module limitations for provided + * picture format. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_limits *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->limits_ptr; + const struct exynos_drm_ipp_formats *format; + struct exynos_drm_ipp *ipp; + + if (resp->type != DRM_EXYNOS_IPP_FORMAT_SOURCE && + resp->type != DRM_EXYNOS_IPP_FORMAT_DESTINATION) + return -EINVAL; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + format = __ipp_format_get(ipp, resp->fourcc, resp->modifier, + resp->type); + if (!format) + return -EINVAL; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (format->num_limits && resp->limits_count >= format->num_limits) + if (copy_to_user((void __user *)ptr, format->limits, + sizeof(*format->limits) * format->num_limits)) + return -EFAULT; + resp->limits_count = format->num_limits; + + return 0; +} + +struct drm_pending_exynos_ipp_event { + struct drm_pending_event base; + struct drm_exynos_ipp_event event; +}; + +static inline struct exynos_drm_ipp_task * + exynos_drm_ipp_task_alloc(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + + task = kzalloc(sizeof(*task), GFP_KERNEL); + if (!task) + return NULL; + + task->dev = ipp->dev; + task->ipp = ipp; + + /* some defaults */ + task->src.rect.w = task->dst.rect.w = UINT_MAX; + task->src.rect.h = task->dst.rect.h = UINT_MAX; + task->transform.rotation = DRM_MODE_ROTATE_0; + + DRM_DEBUG_DRIVER("Allocated task %pK\n", task); + + return task; +} + +static const struct exynos_drm_param_map { + unsigned int id; + unsigned int size; + unsigned int offset; +} exynos_drm_ipp_params_maps[] = { + { + DRM_EXYNOS_IPP_TASK_BUFFER | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, src.buf), + }, { + DRM_EXYNOS_IPP_TASK_BUFFER | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, dst.buf), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, src.rect), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, dst.rect), + }, { + DRM_EXYNOS_IPP_TASK_TRANSFORM, + sizeof(struct drm_exynos_ipp_task_transform), + offsetof(struct exynos_drm_ipp_task, transform), + }, { + DRM_EXYNOS_IPP_TASK_ALPHA, + sizeof(struct drm_exynos_ipp_task_alpha), + offsetof(struct exynos_drm_ipp_task, alpha), + }, +}; + +static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, + struct drm_exynos_ioctl_ipp_commit *arg) +{ + const struct exynos_drm_param_map *map = exynos_drm_ipp_params_maps; + void __user *params = (void __user *)(unsigned long)arg->params_ptr; + unsigned int size = arg->params_size; + uint32_t id; + int i; + + while (size) { + if (get_user(id, (uint32_t __user *)params)) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(exynos_drm_ipp_params_maps); i++) + if (map[i].id == id) + break; + if (i == ARRAY_SIZE(exynos_drm_ipp_params_maps) || + map[i].size > size) + return -EINVAL; + + if (copy_from_user((void *)task + map[i].offset, params, + map[i].size)) + return -EFAULT; + + params += map[i].size; + size -= map[i].size; + } + + DRM_DEBUG_DRIVER("Got task %pK configuration from userspace\n", task); + return 0; +} + +static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf, + struct drm_file *filp) +{ + int ret = 0; + int i; + + /* basic checks */ + if (buf->buf.width == 0 || buf->buf.height == 0) + return -EINVAL; + buf->format = drm_format_info(buf->buf.fourcc); + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int width = (i == 0) ? buf->buf.width : + DIV_ROUND_UP(buf->buf.width, buf->format->hsub); + + if (buf->buf.pitch[i] == 0) + buf->buf.pitch[i] = width * buf->format->cpp[i]; + if (buf->buf.pitch[i] < width * buf->format->cpp[i]) + return -EINVAL; + if (!buf->buf.gem_id[i]) + return -ENOENT; + } + + /* pitch for additional planes must match */ + if (buf->format->num_planes > 2 && + buf->buf.pitch[1] != buf->buf.pitch[2]) + return -EINVAL; + + /* get GEM buffers and check their size */ + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int height = (i == 0) ? buf->buf.height : + DIV_ROUND_UP(buf->buf.height, buf->format->vsub); + unsigned long size = height * buf->buf.pitch[i]; + struct drm_gem_object *obj = drm_gem_object_lookup(filp, + buf->buf.gem_id[i]); + if (!obj) { + ret = -ENOENT; + goto gem_free; + } + buf->exynos_gem[i] = to_exynos_gem(obj); + + if (size + buf->buf.offset[i] > buf->exynos_gem[i]->size) { + i++; + ret = -EINVAL; + goto gem_free; + } + buf->dma_addr[i] = buf->exynos_gem[i]->dma_addr + + buf->buf.offset[i]; + } + + return 0; +gem_free: + while (i--) { + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); + buf->exynos_gem[i] = NULL; + } + return ret; +} + +static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) +{ + int i; + + if (!buf->exynos_gem[0]) + return; + for (i = 0; i < buf->format->num_planes; i++) + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); +} + +static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + DRM_DEBUG_DRIVER("Freeing task %pK\n", task); + + exynos_drm_ipp_task_release_buf(&task->src); + exynos_drm_ipp_task_release_buf(&task->dst); + if (task->event) + drm_event_cancel_free(ipp->dev, &task->event->base); + kfree(task); +} + +struct drm_ipp_limit { + struct drm_exynos_ipp_limit_val h; + struct drm_exynos_ipp_limit_val v; +}; + +enum drm_ipp_size_id { + IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX +}; + +static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = { + [IPP_LIMIT_BUFFER] = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_AREA] = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_ROTATED] = { DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED, + DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, +}; + +static inline void __limit_set_val(unsigned int *ptr, unsigned int val) +{ + if (!*ptr) + *ptr = val; +} + +static void __get_size_limit(const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, enum drm_ipp_size_id id, + struct drm_ipp_limit *res) +{ + const struct drm_exynos_ipp_limit *l = limits; + int i = 0; + + memset(res, 0, sizeof(*res)); + for (i = 0; limit_id_fallback[id][i]; i++) + for (l = limits; l - limits < num_limits; l++) { + if (((l->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) != + DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE) || + ((l->type & DRM_EXYNOS_IPP_LIMIT_SIZE_MASK) != + limit_id_fallback[id][i])) + continue; + __limit_set_val(&res->h.min, l->h.min); + __limit_set_val(&res->h.max, l->h.max); + __limit_set_val(&res->h.align, l->h.align); + __limit_set_val(&res->v.min, l->v.min); + __limit_set_val(&res->v.max, l->v.max); + __limit_set_val(&res->v.align, l->v.align); + } +} + +static inline bool __align_check(unsigned int val, unsigned int align) +{ + if (align && (val & (align - 1))) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (align %d)\n", + val, align); + return false; + } + return true; +} + +static inline bool __size_limit_check(unsigned int val, + struct drm_exynos_ipp_limit_val *l) +{ + if ((l->min && val < l->min) || (l->max && val > l->max)) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (min %d, max %d)\n", + val, l->min, l->max); + return false; + } + return __align_check(val, l->align); +} + +static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf, + const struct drm_exynos_ipp_limit *limits, unsigned int num_limits, + bool rotate, bool swap) +{ + enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA; + struct drm_ipp_limit l; + struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v; + + if (!limits) + return 0; + + __get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l); + if (!__size_limit_check(buf->buf.width, &l.h) || + !__size_limit_check(buf->buf.height, &l.v)) + return -EINVAL; + + if (swap) { + lv = &l.h; + lh = &l.v; + } + __get_size_limit(limits, num_limits, id, &l); + if (!__size_limit_check(buf->rect.w, lh) || + !__align_check(buf->rect.x, lh->align) || + !__size_limit_check(buf->rect.h, lv) || + !__align_check(buf->rect.y, lv->align)) + return -EINVAL; + + return 0; +} + +static inline bool __scale_limit_check(unsigned int src, unsigned int dst, + unsigned int min, unsigned int max) +{ + if ((max && (dst << 16) > src * max) || + (min && (dst << 16) < src * min)) { + DRM_DEBUG_DRIVER("Scale from %d to %d exceeds HW limits (ratio min %d.%05d, max %d.%05d)\n", + src, dst, + min >> 16, 100000 * (min & 0xffff) / (1 << 16), + max >> 16, 100000 * (max & 0xffff) / (1 << 16)); + return false; + } + return true; +} + +static int exynos_drm_ipp_check_scale_limits( + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst, + const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, bool swap) +{ + const struct drm_exynos_ipp_limit_val *lh, *lv; + int dw, dh; + + for (; num_limits; limits++, num_limits--) + if ((limits->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) == + DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE) + break; + if (!num_limits) + return 0; + + lh = (!swap) ? &limits->h : &limits->v; + lv = (!swap) ? &limits->v : &limits->h; + dw = (!swap) ? dst->w : dst->h; + dh = (!swap) ? dst->h : dst->w; + + if (!__scale_limit_check(src->w, dw, lh->min, lh->max) || + !__scale_limit_check(src->h, dh, lv->min, lv->max)) + return -EINVAL; + + return 0; +} + +static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) +{ + struct exynos_drm_ipp *ipp = task->ipp; + const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt; + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + unsigned int rotation = task->transform.rotation; + int ret = 0; + bool swap = drm_rotation_90_or_270(rotation); + bool rotate = (rotation != DRM_MODE_ROTATE_0); + bool scale = false; + + DRM_DEBUG_DRIVER("Checking task %pK\n", task); + + if (src->rect.w == UINT_MAX) + src->rect.w = src->buf.width; + if (src->rect.h == UINT_MAX) + src->rect.h = src->buf.height; + if (dst->rect.w == UINT_MAX) + dst->rect.w = dst->buf.width; + if (dst->rect.h == UINT_MAX) + dst->rect.h = dst->buf.height; + + if (src->rect.x + src->rect.w > (src->buf.width) || + src->rect.y + src->rect.h > (src->buf.height) || + dst->rect.x + dst->rect.w > (dst->buf.width) || + dst->rect.y + dst->rect.h > (dst->buf.height)) { + DRM_DEBUG_DRIVER("Task %pK: defined area is outside provided buffers\n", + task); + return -EINVAL; + } + + if ((!swap && (src->rect.w != dst->rect.w || + src->rect.h != dst->rect.h)) || + (swap && (src->rect.w != dst->rect.h || + src->rect.h != dst->rect.w))) + scale = true; + + if ((!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CROP) && + (src->rect.x || src->rect.y || dst->rect.x || dst->rect.y)) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_ROTATE) && rotate) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && + src->buf.fourcc != dst->buf.fourcc)) { + DRM_DEBUG_DRIVER("Task %pK: hw capabilities exceeded\n", task); + return -EINVAL; + } + + src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_SOURCE); + if (!src_fmt) { + DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits, + src_fmt->num_limits, + rotate, false); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + src_fmt->limits, + src_fmt->num_limits, swap); + if (ret) + return ret; + + dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_DESTINATION); + if (!dst_fmt) { + DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits, + dst_fmt->num_limits, + false, swap); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + dst_fmt->limits, + dst_fmt->num_limits, swap); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Task %pK: all checks done.\n", task); + + return ret; +} + +static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, + struct drm_file *filp) +{ + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + int ret = 0; + + DRM_DEBUG_DRIVER("Setting buffer for task %pK\n", task); + + ret = exynos_drm_ipp_task_setup_buffer(src, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: src buffer setup failed\n", task); + return ret; + } + ret = exynos_drm_ipp_task_setup_buffer(dst, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: dst buffer setup failed\n", task); + return ret; + } + + DRM_DEBUG_DRIVER("Task %pK: buffers prepared.\n", task); + + return ret; +} + + +static int exynos_drm_ipp_event_create(struct exynos_drm_ipp_task *task, + struct drm_file *file_priv, uint64_t user_data) +{ + struct drm_pending_exynos_ipp_event *e = NULL; + int ret; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->event.base.type = DRM_EXYNOS_IPP_EVENT; + e->event.base.length = sizeof(e->event); + e->event.user_data = user_data; + + ret = drm_event_reserve_init(task->dev, file_priv, &e->base, + &e->event.base); + if (ret) + goto free; + + task->event = e; + return 0; +free: + kfree(e); + return ret; +} + +static void exynos_drm_ipp_event_send(struct exynos_drm_ipp_task *task) +{ + struct timespec64 now; + + ktime_get_ts64(&now); + task->event->event.tv_sec = now.tv_sec; + task->event->event.tv_usec = now.tv_nsec / NSEC_PER_USEC; + task->event->event.sequence = atomic_inc_return(&task->ipp->sequence); + + drm_send_event(task->dev, &task->event->base); +} + +static int exynos_drm_ipp_task_cleanup(struct exynos_drm_ipp_task *task) +{ + int ret = task->ret; + + if (ret == 0 && task->event) { + exynos_drm_ipp_event_send(task); + /* ensure event won't be canceled on task free */ + task->event = NULL; + } + + exynos_drm_ipp_task_free(task->ipp, task); + return ret; +} + +static void exynos_drm_ipp_cleanup_work(struct work_struct *work) +{ + struct exynos_drm_ipp_task *task = container_of(work, + struct exynos_drm_ipp_task, cleanup_work); + + exynos_drm_ipp_task_cleanup(task); +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp); + +/** + * exynos_drm_ipp_task_done - finish given task and set return code + * @task: ipp task to finish + * @ret: error code or 0 if operation has been performed successfully + */ +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) +{ + struct exynos_drm_ipp *ipp = task->ipp; + unsigned long flags; + + DRM_DEBUG_DRIVER("ipp: %d, task %pK done: %d\n", ipp->id, task, ret); + + spin_lock_irqsave(&ipp->lock, flags); + if (ipp->task == task) + ipp->task = NULL; + task->flags |= DRM_EXYNOS_IPP_TASK_DONE; + task->ret = ret; + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); + wake_up(&ipp->done_wq); + + if (task->flags & DRM_EXYNOS_IPP_TASK_ASYNC) { + INIT_WORK(&task->cleanup_work, exynos_drm_ipp_cleanup_work); + schedule_work(&task->cleanup_work); + } +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + unsigned long flags; + int ret; + + DRM_DEBUG_DRIVER("ipp: %d, try to run new task\n", ipp->id); + + spin_lock_irqsave(&ipp->lock, flags); + + if (ipp->task || list_empty(&ipp->todo_list)) { + spin_unlock_irqrestore(&ipp->lock, flags); + return; + } + + task = list_first_entry(&ipp->todo_list, struct exynos_drm_ipp_task, + head); + list_del_init(&task->head); + ipp->task = task; + + spin_unlock_irqrestore(&ipp->lock, flags); + + DRM_DEBUG_DRIVER("ipp: %d, selected task %pK to run\n", ipp->id, task); + + ret = ipp->funcs->commit(ipp, task); + if (ret) + exynos_drm_ipp_task_done(task, ret); +} + +static void exynos_drm_ipp_schedule_task(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + list_add(&task->head, &ipp->todo_list); + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); +} + +static void exynos_drm_ipp_task_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + if (task->flags & DRM_EXYNOS_IPP_TASK_DONE) { + /* already completed task */ + exynos_drm_ipp_task_cleanup(task); + } else if (ipp->task != task) { + /* task has not been scheduled for execution yet */ + list_del_init(&task->head); + exynos_drm_ipp_task_cleanup(task); + } else { + /* + * currently processed task, call abort() and perform + * cleanup with async worker + */ + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + spin_unlock_irqrestore(&ipp->lock, flags); + if (ipp->funcs->abort) + ipp->funcs->abort(ipp, task); + return; + } + spin_unlock_irqrestore(&ipp->lock, flags); +} + +/** + * exynos_drm_ipp_commit_ioctl - perform image processing operation + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a ipp task from the set of properties provided from the user + * and try to schedule it to framebuffer processor hardware. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_commit *arg = data; + struct exynos_drm_ipp *ipp; + struct exynos_drm_ipp_task *task; + int ret = 0; + + if ((arg->flags & ~DRM_EXYNOS_IPP_FLAGS) || arg->reserved) + return -EINVAL; + + /* can't test and expect an event at the same time */ + if ((arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) && + (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT)) + return -EINVAL; + + ipp = __ipp_get(arg->ipp_id); + if (!ipp) + return -ENOENT; + + task = exynos_drm_ipp_task_alloc(ipp); + if (!task) + return -ENOMEM; + + ret = exynos_drm_ipp_task_set(task, arg); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_check(task); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_setup_buffers(task, file_priv); + if (ret || arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) + goto free; + + if (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT) { + ret = exynos_drm_ipp_event_create(task, file_priv, + arg->user_data); + if (ret) + goto free; + } + + /* + * Queue task for processing on the hardware. task object will be + * then freed after exynos_drm_ipp_task_done() + */ + if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { + DRM_DEBUG_DRIVER("ipp: %d, nonblocking processing task %pK\n", + ipp->id, task); + + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + exynos_drm_ipp_schedule_task(task->ipp, task); + ret = 0; + } else { + DRM_DEBUG_DRIVER("ipp: %d, processing task %pK\n", ipp->id, + task); + exynos_drm_ipp_schedule_task(ipp, task); + ret = wait_event_interruptible(ipp->done_wq, + task->flags & DRM_EXYNOS_IPP_TASK_DONE); + if (ret) + exynos_drm_ipp_task_abort(ipp, task); + else + ret = exynos_drm_ipp_task_cleanup(task); + } + return ret; +free: + exynos_drm_ipp_task_free(ipp, task); + + return ret; +} diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h new file mode 100644 index 000000000000..0b27d4a9bf94 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef _EXYNOS_DRM_IPP_H_ +#define _EXYNOS_DRM_IPP_H_ + +#include <drm/drmP.h> + +struct exynos_drm_ipp; +struct exynos_drm_ipp_task; + +/** + * struct exynos_drm_ipp_funcs - exynos_drm_ipp control functions + */ +struct exynos_drm_ipp_funcs { + /** + * @commit: + * + * This is the main entry point to start framebuffer processing + * in the hardware. The exynos_drm_ipp_task has been already validated. + * This function must not wait until the device finishes processing. + * When the driver finishes processing, it has to call + * exynos_exynos_drm_ipp_task_done() function. + * + * RETURNS: + * + * 0 on success or negative error codes in case of failure. + */ + int (*commit)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); + + /** + * @abort: + * + * Informs the driver that it has to abort the currently running + * task as soon as possible (i.e. as soon as it can stop the device + * safely), even if the task would not have been finished by then. + * After the driver performs the necessary steps, it has to call + * exynos_drm_ipp_task_done() (as if the task ended normally). + * This function does not have to (and will usually not) wait + * until the device enters a state when it can be stopped. + */ + void (*abort)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); +}; + +/** + * struct exynos_drm_ipp - central picture processor module structure + */ +struct exynos_drm_ipp { + struct drm_device *dev; + struct list_head head; + unsigned int id; + + const char *name; + const struct exynos_drm_ipp_funcs *funcs; + unsigned int capabilities; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + atomic_t sequence; + + spinlock_t lock; + struct exynos_drm_ipp_task *task; + struct list_head todo_list; + wait_queue_head_t done_wq; +}; + +struct exynos_drm_ipp_buffer { + struct drm_exynos_ipp_task_buffer buf; + struct drm_exynos_ipp_task_rect rect; + + struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; + const struct drm_format_info *format; + dma_addr_t dma_addr[MAX_FB_BUFFER]; +}; + +/** + * struct exynos_drm_ipp_task - a structure describing transformation that + * has to be performed by the picture processor hardware module + */ +struct exynos_drm_ipp_task { + struct drm_device *dev; + struct exynos_drm_ipp *ipp; + struct list_head head; + + struct exynos_drm_ipp_buffer src; + struct exynos_drm_ipp_buffer dst; + + struct drm_exynos_ipp_task_transform transform; + struct drm_exynos_ipp_task_alpha alpha; + + struct work_struct cleanup_work; + unsigned int flags; + int ret; + + struct drm_pending_exynos_ipp_event *event; +}; + +#define DRM_EXYNOS_IPP_TASK_DONE (1 << 0) +#define DRM_EXYNOS_IPP_TASK_ASYNC (1 << 1) + +struct exynos_drm_ipp_formats { + uint32_t fourcc; + uint32_t type; + uint64_t modifier; + const struct drm_exynos_ipp_limit *limits; + unsigned int num_limits; +}; + +/* helper macros to set exynos_drm_ipp_formats structure and limits*/ +#define IPP_SRCDST_MFORMAT(f, m, l) \ + .fourcc = DRM_FORMAT_##f, .modifier = m, .limits = l, \ + .num_limits = ARRAY_SIZE(l), \ + .type = (DRM_EXYNOS_IPP_FORMAT_SOURCE | \ + DRM_EXYNOS_IPP_FORMAT_DESTINATION) + +#define IPP_SRCDST_FORMAT(f, l) IPP_SRCDST_MFORMAT(f, 0, l) + +#define IPP_SIZE_LIMIT(l, val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE | \ + DRM_EXYNOS_IPP_LIMIT_SIZE_##l), val + +#define IPP_SCALE_LIMIT(val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE), val + +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name); +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp); + +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret); + +#ifdef CONFIG_DRM_EXYNOS_IPP +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); +#else +static inline int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + + resp->count_ipps = 0; + return 0; +} +static inline int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +#endif +#endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index 1b1af359c303..eb9915da7dec 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -287,13 +287,12 @@ static const struct drm_plane_helper_funcs plane_helper_funcs = { }; static void exynos_plane_attach_zpos_property(struct drm_plane *plane, - bool immutable) + int zpos, bool immutable) { - /* FIXME */ if (immutable) - drm_plane_create_zpos_immutable_property(plane, 0); + drm_plane_create_zpos_immutable_property(plane, zpos); else - drm_plane_create_zpos_property(plane, 0, 0, MAX_PLANE - 1); + drm_plane_create_zpos_property(plane, zpos, 0, MAX_PLANE - 1); } int exynos_plane_init(struct drm_device *dev, @@ -318,7 +317,7 @@ int exynos_plane_init(struct drm_device *dev, exynos_plane->index = index; exynos_plane->config = config; - exynos_plane_attach_zpos_property(&exynos_plane->base, + exynos_plane_attach_zpos_property(&exynos_plane->base, config->zpos, !(config->capabilities & EXYNOS_DRM_PLANE_CAP_ZPOS)); return 0; diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 79282a820ecc..1a76dd3d52e1 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/err.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -22,29 +23,18 @@ #include <drm/exynos_drm.h> #include "regs-rotator.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" /* * Rotator supports image crop/rotator and input/output DMA operations. * input DMA reads image data from the memory. * output DMA writes image data to memory. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> Rotator H/W ----> Memory. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. need to add supported list in prop_list. - */ +#define ROTATOR_AUTOSUSPEND_DELAY 2000 -#define get_rot_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct rot_context, ippdrv); -#define rot_read(offset) readl(rot->regs + (offset)) +#define rot_read(offset) readl(rot->regs + (offset)) #define rot_write(cfg, offset) writel(cfg, rot->regs + (offset)) enum rot_irq_status { @@ -52,54 +42,28 @@ enum rot_irq_status { ROT_IRQ_STATUS_ILLEGAL = 9, }; -/* - * A structure of limitation. - * - * @min_w: minimum width. - * @min_h: minimum height. - * @max_w: maximum width. - * @max_h: maximum height. - * @align: align size. - */ -struct rot_limit { - u32 min_w; - u32 min_h; - u32 max_w; - u32 max_h; - u32 align; -}; - -/* - * A structure of limitation table. - * - * @ycbcr420_2p: case of YUV. - * @rgb888: case of RGB. - */ -struct rot_limit_table { - struct rot_limit ycbcr420_2p; - struct rot_limit rgb888; +struct rot_variant { + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; }; /* * A structure of rotator context. * @ippdrv: prepare initialization using ippdrv. - * @regs_res: register resources. * @regs: memory mapped io registers. * @clock: rotator gate clock. * @limit_tbl: limitation of rotator. * @irq: irq number. - * @cur_buf_id: current operation buffer id. - * @suspended: suspended state. */ struct rot_context { - struct exynos_drm_ippdrv ippdrv; - struct resource *regs_res; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; void __iomem *regs; struct clk *clock; - struct rot_limit_table *limit_tbl; - int irq; - int cur_buf_id[EXYNOS_DRM_OPS_MAX]; - bool suspended; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct exynos_drm_ipp_task *task; }; static void rotator_reg_set_irq(struct rot_context *rot, bool enable) @@ -114,15 +78,6 @@ static void rotator_reg_set_irq(struct rot_context *rot, bool enable) rot_write(val, ROT_CONFIG); } -static u32 rotator_reg_get_fmt(struct rot_context *rot) -{ - u32 val = rot_read(ROT_CONTROL); - - val &= ROT_CONTROL_FMT_MASK; - - return val; -} - static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) { u32 val = rot_read(ROT_STATUS); @@ -138,9 +93,6 @@ static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) static irqreturn_t rotator_irq_handler(int irq, void *arg) { struct rot_context *rot = arg; - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = c_node->event_work; enum rot_irq_status irq_status; u32 val; @@ -152,56 +104,21 @@ static irqreturn_t rotator_irq_handler(int irq, void *arg) val |= ROT_STATUS_IRQ_PENDING((u32)irq_status); rot_write(val, ROT_STATUS); - if (irq_status == ROT_IRQ_STATUS_COMPLETE) { - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - rot->cur_buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } else { - DRM_ERROR("the SFR is set illegally\n"); + if (rot->task) { + struct exynos_drm_ipp_task *task = rot->task; + + rot->task = NULL; + pm_runtime_mark_last_busy(rot->dev); + pm_runtime_put_autosuspend(rot->dev); + exynos_drm_ipp_task_done(task, + irq_status == ROT_IRQ_STATUS_COMPLETE ? 0 : -EINVAL); } return IRQ_HANDLED; } -static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize, - u32 *vsize) +static void rotator_src_set_fmt(struct rot_context *rot, u32 fmt) { - struct rot_limit_table *limit_tbl = rot->limit_tbl; - struct rot_limit *limit; - u32 mask, val; - - /* Get size limit */ - if (fmt == ROT_CONTROL_FMT_RGB888) - limit = &limit_tbl->rgb888; - else - limit = &limit_tbl->ycbcr420_2p; - - /* Get mask for rounding to nearest aligned val */ - mask = ~((1 << limit->align) - 1); - - /* Set aligned width */ - val = ROT_ALIGN(*hsize, limit->align, mask); - if (val < limit->min_w) - *hsize = ROT_MIN(limit->min_w, mask); - else if (val > limit->max_w) - *hsize = ROT_MAX(limit->max_w, mask); - else - *hsize = val; - - /* Set aligned height */ - val = ROT_ALIGN(*vsize, limit->align, mask); - if (val < limit->min_h) - *vsize = ROT_MIN(limit->min_h, mask); - else if (val > limit->max_h) - *vsize = ROT_MAX(limit->max_h, mask); - else - *vsize = val; -} - -static int rotator_src_set_fmt(struct device *dev, u32 fmt) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; val = rot_read(ROT_CONTROL); @@ -214,515 +131,176 @@ static int rotator_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_XRGB8888: val |= ROT_CONTROL_FMT_RGB888; break; - default: - DRM_ERROR("invalid image format\n"); - return -EINVAL; } rot_write(val, ROT_CONTROL); - - return 0; } -static inline bool rotator_check_reg_fmt(u32 fmt) +static void rotator_src_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) || - (fmt == ROT_CONTROL_FMT_RGB888)) - return true; - - return false; -} - -static int rotator_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) -{ - struct rot_context *rot = dev_get_drvdata(dev); - u32 fmt, hsize, vsize; u32 val; - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); - /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_SRC_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_SRC_CROP_POS); - val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w); + val = ROT_SRC_CROP_SIZE_H(buf->rect.h) | + ROT_SRC_CROP_SIZE_W(buf->rect.w); rot_write(val, ROT_SRC_CROP_SIZE); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_SRC_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_SRC_BUF_ADDR(1)); } -static int rotator_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_dst_set_transf(struct rot_context *rot, + unsigned int rotation) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - - val = rot_read(ROT_SRC_BUF_SIZE); - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_SRC_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_SRC_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } - - return 0; -} - -static int rotator_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; /* Set transform configuration */ val = rot_read(ROT_CONTROL); val &= ~ROT_CONTROL_FLIP_MASK; - switch (flip) { - case EXYNOS_DRM_FLIP_VERTICAL: - val |= ROT_CONTROL_FLIP_VERTICAL; - break; - case EXYNOS_DRM_FLIP_HORIZONTAL: + if (rotation & DRM_MODE_REFLECT_X) val |= ROT_CONTROL_FLIP_HORIZONTAL; - break; - default: - /* Flip None */ - break; - } + if (rotation & DRM_MODE_REFLECT_Y) + val |= ROT_CONTROL_FLIP_VERTICAL; val &= ~ROT_CONTROL_ROT_MASK; - switch (degree) { - case EXYNOS_DRM_DEGREE_90: + if (rotation & DRM_MODE_ROTATE_90) val |= ROT_CONTROL_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: + else if (rotation & DRM_MODE_ROTATE_180) val |= ROT_CONTROL_ROT_180; - break; - case EXYNOS_DRM_DEGREE_270: + else if (rotation & DRM_MODE_ROTATE_270) val |= ROT_CONTROL_ROT_270; - break; - default: - /* Rotation 0 Degree */ - break; - } rot_write(val, ROT_CONTROL); - - /* Check degree for setting buffer size swap */ - if ((degree == EXYNOS_DRM_DEGREE_90) || - (degree == EXYNOS_DRM_DEGREE_270)) - *swap = true; - else - *swap = false; - - return 0; } -static int rotator_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) +static void rotator_dst_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - struct rot_context *rot = dev_get_drvdata(dev); - u32 val, fmt, hsize, vsize; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); + u32 val; /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_DST_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_DST_CROP_POS); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_DST_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_DST_BUF_ADDR(1)); } -static int rotator_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_start(struct rot_context *rot) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - /* Get buf size */ - val = rot_read(ROT_DST_BUF_SIZE); - - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_DST_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_DST_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } + u32 val; - return 0; + /* Set interrupt enable */ + rotator_reg_set_irq(rot, true); + + val = rot_read(ROT_CONTROL); + val |= ROT_CONTROL_START; + rot_write(val, ROT_CONTROL); } -static struct exynos_drm_ipp_ops rot_src_ops = { - .set_fmt = rotator_src_set_fmt, - .set_size = rotator_src_set_size, - .set_addr = rotator_src_set_addr, -}; +static int rotator_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct rot_context *rot = + container_of(ipp, struct rot_context, ipp); -static struct exynos_drm_ipp_ops rot_dst_ops = { - .set_transf = rotator_dst_set_transf, - .set_size = rotator_dst_set_size, - .set_addr = rotator_dst_set_addr, -}; + pm_runtime_get_sync(rot->dev); + rot->task = task; -static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 0; - prop_list->crop = 0; - prop_list->scale = 0; + rotator_src_set_fmt(rot, task->src.buf.fourcc); + rotator_src_set_buf(rot, &task->src); + rotator_dst_set_transf(rot, task->transform.rotation); + rotator_dst_set_buf(rot, &task->dst); + rotator_start(rot); return 0; } -static inline bool rotator_check_drm_fmt(u32 fmt) -{ - switch (fmt) { - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_NV12: - return true; - default: - DRM_DEBUG_KMS("not support format\n"); - return false; - } -} - -static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} +static const struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = rotator_commit, +}; -static int rotator_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) +static int rotator_bind(struct device *dev, struct device *master, void *data) { - struct drm_exynos_ipp_config *src_config = - &property->config[EXYNOS_DRM_OPS_SRC]; - struct drm_exynos_ipp_config *dst_config = - &property->config[EXYNOS_DRM_OPS_DST]; - struct drm_exynos_pos *src_pos = &src_config->pos; - struct drm_exynos_pos *dst_pos = &dst_config->pos; - struct drm_exynos_sz *src_sz = &src_config->sz; - struct drm_exynos_sz *dst_sz = &dst_config->sz; - bool swap = false; - - /* Check format configuration */ - if (src_config->fmt != dst_config->fmt) { - DRM_DEBUG_KMS("not support csc feature\n"); - return -EINVAL; - } - - if (!rotator_check_drm_fmt(dst_config->fmt)) { - DRM_DEBUG_KMS("invalid format\n"); - return -EINVAL; - } - - /* Check transform configuration */ - if (src_config->degree != EXYNOS_DRM_DEGREE_0) { - DRM_DEBUG_KMS("not support source-side rotation\n"); - return -EINVAL; - } - - switch (dst_config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - /* No problem */ - break; - default: - DRM_DEBUG_KMS("invalid degree\n"); - return -EINVAL; - } - - if (src_config->flip != EXYNOS_DRM_FLIP_NONE) { - DRM_DEBUG_KMS("not support source-side flip\n"); - return -EINVAL; - } + struct rot_context *rot = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - if (!rotator_check_drm_flip(dst_config->flip)) { - DRM_DEBUG_KMS("invalid flip\n"); - return -EINVAL; - } + rot->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); - /* Check size configuration */ - if ((src_pos->x + src_pos->w > src_sz->hsize) || - (src_pos->y + src_pos->h > src_sz->vsize)) { - DRM_DEBUG_KMS("out of source buffer bound\n"); - return -EINVAL; - } + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE, + rot->formats, rot->num_formats, "rotator"); - if (swap) { - if ((dst_pos->x + dst_pos->h > dst_sz->vsize) || - (dst_pos->y + dst_pos->w > dst_sz->hsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } else { - if ((dst_pos->x + dst_pos->w > dst_sz->hsize) || - (dst_pos->y + dst_pos->h > dst_sz->vsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } + dev_info(dev, "The exynos rotator has been probed successfully\n"); return 0; } -static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void rotator_unbind(struct device *dev, struct device *master, + void *data) { struct rot_context *rot = dev_get_drvdata(dev); - u32 val; - - if (rot->suspended) { - DRM_ERROR("suspended state\n"); - return -EPERM; - } - - if (cmd != IPP_CMD_M2M) { - DRM_ERROR("not support cmd: %d\n", cmd); - return -EINVAL; - } - - /* Set interrupt enable */ - rotator_reg_set_irq(rot, true); - - val = rot_read(ROT_CONTROL); - val |= ROT_CONTROL_START; - - rot_write(val, ROT_CONTROL); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - return 0; + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(rot->drm_dev, rot->dev); } -static struct rot_limit_table rot_limit_tbl_4210 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_64K, - .max_h = SZ_64K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_16K, - .max_h = SZ_16K, - .align = 2, - }, -}; - -static struct rot_limit_table rot_limit_tbl_4x12 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 2, - }, +static const struct component_ops rotator_component_ops = { + .bind = rotator_bind, + .unbind = rotator_unbind, }; -static struct rot_limit_table rot_limit_tbl_5250 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 1, - }, -}; - -static const struct of_device_id exynos_rotator_match[] = { - { - .compatible = "samsung,exynos4210-rotator", - .data = &rot_limit_tbl_4210, - }, - { - .compatible = "samsung,exynos4212-rotator", - .data = &rot_limit_tbl_4x12, - }, - { - .compatible = "samsung,exynos5250-rotator", - .data = &rot_limit_tbl_5250, - }, - {}, -}; -MODULE_DEVICE_TABLE(of, exynos_rotator_match); - static int rotator_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct resource *regs_res; struct rot_context *rot; - struct exynos_drm_ippdrv *ippdrv; + const struct rot_variant *variant; + int irq; int ret; - if (!dev->of_node) { - dev_err(dev, "cannot find of_node.\n"); - return -ENODEV; - } - rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL); if (!rot) return -ENOMEM; - rot->limit_tbl = (struct rot_limit_table *) - of_device_get_match_data(dev); - rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rot->regs = devm_ioremap_resource(dev, rot->regs_res); + variant = of_device_get_match_data(dev); + rot->formats = variant->formats; + rot->num_formats = variant->num_formats; + rot->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rot->regs = devm_ioremap_resource(dev, regs_res); if (IS_ERR(rot->regs)) return PTR_ERR(rot->regs); - rot->irq = platform_get_irq(pdev, 0); - if (rot->irq < 0) { + irq = platform_get_irq(pdev, 0); + if (irq < 0) { dev_err(dev, "failed to get irq\n"); - return rot->irq; + return irq; } - ret = devm_request_threaded_irq(dev, rot->irq, NULL, - rotator_irq_handler, IRQF_ONESHOT, "drm_rotator", rot); + ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev), + rot); if (ret < 0) { dev_err(dev, "failed to request irq\n"); return ret; @@ -734,35 +312,19 @@ static int rotator_probe(struct platform_device *pdev) return PTR_ERR(rot->clock); } + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, ROTATOR_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - - ippdrv = &rot->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops; - ippdrv->check_property = rotator_ippdrv_check_property; - ippdrv->start = rotator_ippdrv_start; - ret = rotator_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_ippdrv_register; - } - - DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv); - platform_set_drvdata(pdev, rot); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm rotator device\n"); - goto err_ippdrv_register; - } - - dev_info(dev, "The exynos rotator is probed successfully\n"); + ret = component_add(dev, &rotator_component_ops); + if (ret) + goto err_component; return 0; -err_ippdrv_register: +err_component: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -770,45 +332,101 @@ err_ippdrv_register: static int rotator_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct rot_context *rot = dev_get_drvdata(dev); - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &rotator_component_ops); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int rotator_clk_crtl(struct rot_context *rot, bool enable) -{ - if (enable) { - clk_prepare_enable(rot->clock); - rot->suspended = false; - } else { - clk_disable_unprepare(rot->clock); - rot->suspended = true; - } - - return 0; -} - static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, false); + clk_disable_unprepare(rot->clock); + return 0; } static int rotator_runtime_resume(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, true); + return clk_prepare_enable(rot->clock); } #endif +static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_32K }, .v = { 32, SZ_32K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4210_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4412_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4412_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_5250_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_5250_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct rot_variant rotator_4210_data = { + .formats = rotator_4210_formats, + .num_formats = ARRAY_SIZE(rotator_4210_formats), +}; + +static const struct rot_variant rotator_4412_data = { + .formats = rotator_4412_formats, + .num_formats = ARRAY_SIZE(rotator_4412_formats), +}; + +static const struct rot_variant rotator_5250_data = { + .formats = rotator_5250_formats, + .num_formats = ARRAY_SIZE(rotator_5250_formats), +}; + +static const struct of_device_id exynos_rotator_match[] = { + { + .compatible = "samsung,exynos4210-rotator", + .data = &rotator_4210_data, + }, { + .compatible = "samsung,exynos4212-rotator", + .data = &rotator_4412_data, + }, { + .compatible = "samsung,exynos5250-rotator", + .data = &rotator_5250_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_rotator_match); + static const struct dev_pm_ops rotator_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) @@ -820,7 +438,7 @@ struct platform_driver rotator_driver = { .probe = rotator_probe, .remove = rotator_remove, .driver = { - .name = "exynos-rot", + .name = "exynos-rotator", .owner = THIS_MODULE, .pm = &rotator_pm_ops, .of_match_table = exynos_rotator_match, diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c new file mode 100644 index 000000000000..91d4382343d0 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -0,0 +1,694 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Author: + * Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundationr + */ + +#include <linux/kernel.h> +#include <linux/component.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> + +#include <drm/drmP.h> +#include <drm/exynos_drm.h> +#include "regs-scaler.h" +#include "exynos_drm_fb.h" +#include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" +#include "exynos_drm_ipp.h" + +#define scaler_read(offset) readl(scaler->regs + (offset)) +#define scaler_write(cfg, offset) writel(cfg, scaler->regs + (offset)) +#define SCALER_MAX_CLK 4 +#define SCALER_AUTOSUSPEND_DELAY 2000 + +struct scaler_data { + const char *clk_name[SCALER_MAX_CLK]; + unsigned int num_clk; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; +}; + +struct scaler_context { + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + void __iomem *regs; + struct clk *clock[SCALER_MAX_CLK]; + struct exynos_drm_ipp_task *task; + const struct scaler_data *scaler_data; +}; + +static u32 scaler_get_format(u32 drm_fmt) +{ + switch (drm_fmt) { + case DRM_FORMAT_NV21: + return SCALER_YUV420_2P_UV; + case DRM_FORMAT_NV12: + return SCALER_YUV420_2P_VU; + case DRM_FORMAT_YUV420: + return SCALER_YUV420_3P; + case DRM_FORMAT_YUYV: + return SCALER_YUV422_1P_YUYV; + case DRM_FORMAT_UYVY: + return SCALER_YUV422_1P_UYVY; + case DRM_FORMAT_YVYU: + return SCALER_YUV422_1P_YVYU; + case DRM_FORMAT_NV61: + return SCALER_YUV422_2P_UV; + case DRM_FORMAT_NV16: + return SCALER_YUV422_2P_VU; + case DRM_FORMAT_YUV422: + return SCALER_YUV422_3P; + case DRM_FORMAT_NV42: + return SCALER_YUV444_2P_UV; + case DRM_FORMAT_NV24: + return SCALER_YUV444_2P_VU; + case DRM_FORMAT_YUV444: + return SCALER_YUV444_3P; + case DRM_FORMAT_RGB565: + return SCALER_RGB_565; + case DRM_FORMAT_XRGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_ARGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_XRGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_ARGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_XRGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_ARGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_RGBX8888: + return SCALER_RGBA8888; + case DRM_FORMAT_RGBA8888: + return SCALER_RGBA8888; + default: + break; + } + + return 0; +} + +static inline void scaler_enable_int(struct scaler_context *scaler) +{ + u32 val; + + val = SCALER_INT_EN_TIMEOUT | + SCALER_INT_EN_ILLEGAL_BLEND | + SCALER_INT_EN_ILLEGAL_RATIO | + SCALER_INT_EN_ILLEGAL_DST_HEIGHT | + SCALER_INT_EN_ILLEGAL_DST_WIDTH | + SCALER_INT_EN_ILLEGAL_DST_V_POS | + SCALER_INT_EN_ILLEGAL_DST_H_POS | + SCALER_INT_EN_ILLEGAL_DST_C_SPAN | + SCALER_INT_EN_ILLEGAL_DST_Y_SPAN | + SCALER_INT_EN_ILLEGAL_DST_CR_BASE | + SCALER_INT_EN_ILLEGAL_DST_CB_BASE | + SCALER_INT_EN_ILLEGAL_DST_Y_BASE | + SCALER_INT_EN_ILLEGAL_DST_COLOR | + SCALER_INT_EN_ILLEGAL_SRC_HEIGHT | + SCALER_INT_EN_ILLEGAL_SRC_WIDTH | + SCALER_INT_EN_ILLEGAL_SRC_CV_POS | + SCALER_INT_EN_ILLEGAL_SRC_CH_POS | + SCALER_INT_EN_ILLEGAL_SRC_YV_POS | + SCALER_INT_EN_ILLEGAL_SRC_YH_POS | + SCALER_INT_EN_ILLEGAL_DST_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_CR_BASE | + SCALER_INT_EN_ILLEGAL_SRC_CB_BASE | + SCALER_INT_EN_ILLEGAL_SRC_Y_BASE | + SCALER_INT_EN_ILLEGAL_SRC_COLOR | + SCALER_INT_EN_FRAME_END; + scaler_write(val, SCALER_INT_EN); +} + +static inline void scaler_set_src_fmt(struct scaler_context *scaler, + u32 src_fmt) +{ + u32 val; + + val = SCALER_SRC_CFG_SET_COLOR_FORMAT(src_fmt); + scaler_write(val, SCALER_SRC_CFG); +} + +static inline void scaler_set_src_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + static unsigned int bases[] = { + SCALER_SRC_Y_BASE, + SCALER_SRC_CB_BASE, + SCALER_SRC_CR_BASE, + }; + int i; + + for (i = 0; i < src_buf->format->num_planes; ++i) + scaler_write(src_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_src_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + u32 val; + + val = SCALER_SRC_SPAN_SET_Y_SPAN(src_buf->buf.pitch[0] / + src_buf->format->cpp[0]); + + if (src_buf->format->num_planes > 1) + val |= SCALER_SRC_SPAN_SET_C_SPAN(src_buf->buf.pitch[1]); + + scaler_write(val, SCALER_SRC_SPAN); +} + +static inline void scaler_set_src_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_Y_POS_SET_YH_POS(src_pos->x << 2); + val |= SCALER_SRC_Y_POS_SET_YV_POS(src_pos->y << 2); + scaler_write(val, SCALER_SRC_Y_POS); + scaler_write(val, SCALER_SRC_C_POS); /* ATTENTION! */ +} + +static inline void scaler_set_src_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_WH_SET_WIDTH(src_pos->w); + val |= SCALER_SRC_WH_SET_HEIGHT(src_pos->h); + scaler_write(val, SCALER_SRC_WH); +} + +static inline void scaler_set_dst_fmt(struct scaler_context *scaler, + u32 dst_fmt) +{ + u32 val; + + val = SCALER_DST_CFG_SET_COLOR_FORMAT(dst_fmt); + scaler_write(val, SCALER_DST_CFG); +} + +static inline void scaler_set_dst_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + static unsigned int bases[] = { + SCALER_DST_Y_BASE, + SCALER_DST_CB_BASE, + SCALER_DST_CR_BASE, + }; + int i; + + for (i = 0; i < dst_buf->format->num_planes; ++i) + scaler_write(dst_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_dst_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + u32 val; + + val = SCALER_DST_SPAN_SET_Y_SPAN(dst_buf->buf.pitch[0] / + dst_buf->format->cpp[0]); + + if (dst_buf->format->num_planes > 1) + val |= SCALER_DST_SPAN_SET_C_SPAN(dst_buf->buf.pitch[1]); + + scaler_write(val, SCALER_DST_SPAN); +} + +static inline void scaler_set_dst_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_WH_SET_WIDTH(dst_pos->w); + val |= SCALER_DST_WH_SET_HEIGHT(dst_pos->h); + scaler_write(val, SCALER_DST_WH); +} + +static inline void scaler_set_dst_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_POS_SET_H_POS(dst_pos->x); + val |= SCALER_DST_POS_SET_V_POS(dst_pos->y); + scaler_write(val, SCALER_DST_POS); +} + +static inline void scaler_set_hv_ratio(struct scaler_context *scaler, + unsigned int rotation, + struct drm_exynos_ipp_task_rect *src_pos, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val, h_ratio, v_ratio; + + if (drm_rotation_90_or_270(rotation)) { + h_ratio = (src_pos->h << 16) / dst_pos->w; + v_ratio = (src_pos->w << 16) / dst_pos->h; + } else { + h_ratio = (src_pos->w << 16) / dst_pos->w; + v_ratio = (src_pos->h << 16) / dst_pos->h; + } + + val = SCALER_H_RATIO_SET(h_ratio); + scaler_write(val, SCALER_H_RATIO); + + val = SCALER_V_RATIO_SET(v_ratio); + scaler_write(val, SCALER_V_RATIO); +} + +static inline void scaler_set_rotation(struct scaler_context *scaler, + unsigned int rotation) +{ + u32 val = 0; + + if (rotation & DRM_MODE_ROTATE_90) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_90); + else if (rotation & DRM_MODE_ROTATE_180) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_180); + else if (rotation & DRM_MODE_ROTATE_270) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_270); + if (rotation & DRM_MODE_REFLECT_X) + val |= SCALER_ROT_CFG_FLIP_X_EN; + if (rotation & DRM_MODE_REFLECT_Y) + val |= SCALER_ROT_CFG_FLIP_Y_EN; + scaler_write(val, SCALER_ROT_CFG); +} + +static inline void scaler_set_csc(struct scaler_context *scaler, + const struct drm_format_info *fmt) +{ + static const u32 csc_mtx[2][3][3] = { + { /* YCbCr to RGB */ + {0x254, 0x000, 0x331}, + {0x254, 0xf38, 0xe60}, + {0x254, 0x409, 0x000}, + }, + { /* RGB to YCbCr */ + {0x084, 0x102, 0x032}, + {0xfb4, 0xf6b, 0x0e1}, + {0x0e1, 0xf44, 0xfdc}, + }, + }; + int i, j, dir; + + switch (fmt->format) { + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB1555: + case DRM_FORMAT_ARGB1555: + case DRM_FORMAT_XRGB4444: + case DRM_FORMAT_ARGB4444: + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_RGBA8888: + dir = 1; + break; + default: + dir = 0; + } + + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + scaler_write(csc_mtx[dir][i][j], SCALER_CSC_COEF(j, i)); +} + +static inline void scaler_set_timer(struct scaler_context *scaler, + unsigned int timer, unsigned int divider) +{ + u32 val; + + val = SCALER_TIMEOUT_CTRL_TIMER_ENABLE; + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(timer); + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(divider); + scaler_write(val, SCALER_TIMEOUT_CTRL); +} + +static inline void scaler_start_hw(struct scaler_context *scaler) +{ + scaler_write(SCALER_CFG_START_CMD, SCALER_CFG); +} + +static int scaler_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct scaler_context *scaler = + container_of(ipp, struct scaler_context, ipp); + + u32 src_fmt = scaler_get_format(task->src.buf.fourcc); + struct drm_exynos_ipp_task_rect *src_pos = &task->src.rect; + + u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc); + struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect; + + scaler->task = task; + + pm_runtime_get_sync(scaler->dev); + + scaler_set_src_fmt(scaler, src_fmt); + scaler_set_src_base(scaler, &task->src); + scaler_set_src_span(scaler, &task->src); + scaler_set_src_luma_pos(scaler, src_pos); + scaler_set_src_wh(scaler, src_pos); + + scaler_set_dst_fmt(scaler, dst_fmt); + scaler_set_dst_base(scaler, &task->dst); + scaler_set_dst_span(scaler, &task->dst); + scaler_set_dst_luma_pos(scaler, dst_pos); + scaler_set_dst_wh(scaler, dst_pos); + + scaler_set_hv_ratio(scaler, task->transform.rotation, src_pos, dst_pos); + scaler_set_rotation(scaler, task->transform.rotation); + + scaler_set_csc(scaler, task->src.format); + + scaler_set_timer(scaler, 0xffff, 0xf); + + scaler_enable_int(scaler); + scaler_start_hw(scaler); + + return 0; +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = scaler_commit, +}; + +static inline void scaler_disable_int(struct scaler_context *scaler) +{ + scaler_write(0, SCALER_INT_EN); +} + +static inline u32 scaler_get_int_status(struct scaler_context *scaler) +{ + return scaler_read(SCALER_INT_STATUS); +} + +static inline int scaler_task_done(u32 val) +{ + return val & SCALER_INT_STATUS_FRAME_END ? 0 : -EINVAL; +} + +static irqreturn_t scaler_irq_handler(int irq, void *arg) +{ + struct scaler_context *scaler = arg; + + u32 val = scaler_get_int_status(scaler); + + scaler_disable_int(scaler); + + if (scaler->task) { + struct exynos_drm_ipp_task *task = scaler->task; + + scaler->task = NULL; + pm_runtime_mark_last_busy(scaler->dev); + pm_runtime_put_autosuspend(scaler->dev); + exynos_drm_ipp_task_done(task, scaler_task_done(val)); + } + + return IRQ_HANDLED; +} + +static int scaler_bind(struct device *dev, struct device *master, void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + scaler->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + scaler->scaler_data->formats, + scaler->scaler_data->num_formats, "scaler"); + + dev_info(dev, "The exynos scaler has been probed successfully\n"); + + return 0; +} + +static void scaler_unbind(struct device *dev, struct device *master, + void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(scaler->drm_dev, scaler->dev); +} + +static const struct component_ops scaler_component_ops = { + .bind = scaler_bind, + .unbind = scaler_unbind, +}; + +static int scaler_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *regs_res; + struct scaler_context *scaler; + int irq; + int ret, i; + + scaler = devm_kzalloc(dev, sizeof(*scaler), GFP_KERNEL); + if (!scaler) + return -ENOMEM; + + scaler->scaler_data = + (struct scaler_data *)of_device_get_match_data(dev); + + scaler->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + scaler->regs = devm_ioremap_resource(dev, regs_res); + if (IS_ERR(scaler->regs)) + return PTR_ERR(scaler->regs); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to get irq\n"); + return irq; + } + + ret = devm_request_threaded_irq(dev, irq, NULL, scaler_irq_handler, + IRQF_ONESHOT, "drm_scaler", scaler); + if (ret < 0) { + dev_err(dev, "failed to request irq\n"); + return ret; + } + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) { + scaler->clock[i] = devm_clk_get(dev, + scaler->scaler_data->clk_name[i]); + if (IS_ERR(scaler->clock[i])) { + dev_err(dev, "failed to get clock\n"); + return PTR_ERR(scaler->clock[i]); + } + } + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, SCALER_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); + platform_set_drvdata(pdev, scaler); + + ret = component_add(dev, &scaler_component_ops); + if (ret) + goto err_ippdrv_register; + + return 0; + +err_ippdrv_register: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + return ret; +} + +static int scaler_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &scaler_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + + return 0; +} + +#ifdef CONFIG_PM + +static int clk_disable_unprepare_wrapper(struct clk *clk) +{ + clk_disable_unprepare(clk); + + return 0; +} + +static int scaler_clk_ctrl(struct scaler_context *scaler, bool enable) +{ + int (*clk_fun)(struct clk *clk), i; + + clk_fun = enable ? clk_prepare_enable : clk_disable_unprepare_wrapper; + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) + clk_fun(scaler->clock[i]); + + return 0; +} + +static int scaler_runtime_suspend(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, false); +} + +static int scaler_runtime_resume(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, true); +} +#endif + +static const struct dev_pm_ops scaler_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(scaler_runtime_suspend, scaler_runtime_resume, NULL) +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_hv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_h_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 1) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_one_pixel_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct exynos_drm_ipp_formats exynos5420_formats[] = { + /* SCALER_YUV420_2P_UV */ + { IPP_SRCDST_FORMAT(NV21, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_2P_VU */ + { IPP_SRCDST_FORMAT(NV12, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_3P */ + { IPP_SRCDST_FORMAT(YUV420, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV422_1P_YUYV */ + { IPP_SRCDST_FORMAT(YUYV, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_UYVY */ + { IPP_SRCDST_FORMAT(UYVY, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_YVYU */ + { IPP_SRCDST_FORMAT(YVYU, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_UV */ + { IPP_SRCDST_FORMAT(NV61, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_VU */ + { IPP_SRCDST_FORMAT(NV16, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_3P */ + { IPP_SRCDST_FORMAT(YUV422, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV444_2P_UV */ + { IPP_SRCDST_FORMAT(NV42, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_2P_VU */ + { IPP_SRCDST_FORMAT(NV24, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_3P */ + { IPP_SRCDST_FORMAT(YUV444, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGB_565 */ + { IPP_SRCDST_FORMAT(RGB565, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(XRGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(ARGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(XRGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(ARGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(XRGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(ARGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBX8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBA8888, scaler_5420_one_pixel_limits) }, +}; + +static const struct scaler_data exynos5420_data = { + .clk_name = {"mscl"}, + .num_clk = 1, + .formats = exynos5420_formats, + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct scaler_data exynos5433_data = { + .clk_name = {"pclk", "aclk", "aclk_xiu"}, + .num_clk = 3, + .formats = exynos5420_formats, /* intentional */ + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct of_device_id exynos_scaler_match[] = { + { + .compatible = "samsung,exynos5420-scaler", + .data = &exynos5420_data, + }, { + .compatible = "samsung,exynos5433-scaler", + .data = &exynos5433_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_scaler_match); + +struct platform_driver scaler_driver = { + .probe = scaler_probe, + .remove = scaler_remove, + .driver = { + .name = "exynos-scaler", + .owner = THIS_MODULE, + .pm = &scaler_pm_ops, + .of_match_table = exynos_scaler_match, + }, +}; diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index abd84cbcf1c2..09c4bc0b1859 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder) drm_mode_connector_attach_encoder(connector, encoder); if (hdata->bridge) { - encoder->bridge = hdata->bridge; - hdata->bridge->encoder = encoder; ret = drm_bridge_attach(encoder, hdata->bridge, NULL); if (ret) DRM_ERROR("Failed to attach bridge\n"); diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 257299ec95c4..272c79f5f5bf 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx, chroma_addr[1] = chroma_addr[0] + 0x40; } else { luma_addr[1] = luma_addr[0] + fb->pitches[0]; - chroma_addr[1] = chroma_addr[0] + fb->pitches[0]; + chroma_addr[1] = chroma_addr[0] + fb->pitches[1]; } } else { luma_addr[1] = 0; @@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx, spin_lock_irqsave(&ctx->reg_slock, flags); + vp_reg_write(ctx, VP_SHADOW_UPDATE, 1); /* interlace or progressive scan mode */ val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0); vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP); @@ -495,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx, vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) | VP_IMG_VSIZE(fb->height)); /* chroma plane for NV12/NV21 is half the height of the luma plane */ - vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) | + vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) | VP_IMG_VSIZE(fb->height / 2)); vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w); - vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); vp_reg_write(ctx, VP_SRC_H_POSITION, VP_SRC_H_POSITION_VAL(state->src.x)); - vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); - vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w); vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x); + if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2); } else { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y); } @@ -699,6 +702,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) /* interlace scan need to check shadow register */ if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && + vp_reg_read(ctx, VP_SHADOW_UPDATE)) + goto out; + + base = mixer_reg_read(ctx, MXR_CFG); + shadow = mixer_reg_read(ctx, MXR_CFG_S); + if (base != shadow) + goto out; + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); if (base != shadow) diff --git a/drivers/gpu/drm/exynos/regs-mixer.h b/drivers/gpu/drm/exynos/regs-mixer.h index c311f571bdf9..189cfa2470a8 100644 --- a/drivers/gpu/drm/exynos/regs-mixer.h +++ b/drivers/gpu/drm/exynos/regs-mixer.h @@ -47,6 +47,7 @@ #define MXR_MO 0x0304 #define MXR_RESOLUTION 0x0310 +#define MXR_CFG_S 0x2004 #define MXR_GRAPHIC0_BASE_S 0x2024 #define MXR_GRAPHIC1_BASE_S 0x2044 diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h new file mode 100644 index 000000000000..fc7ccad75e74 --- /dev/null +++ b/drivers/gpu/drm/exynos/regs-scaler.h @@ -0,0 +1,426 @@ +/* drivers/gpu/drm/exynos/regs-scaler.h + * + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * Register definition file for Samsung scaler driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef EXYNOS_REGS_SCALER_H +#define EXYNOS_REGS_SCALER_H + +/* Register part */ + +/* Global setting */ +#define SCALER_STATUS 0x0 /* no shadow */ +#define SCALER_CFG 0x4 + +/* Interrupt */ +#define SCALER_INT_EN 0x8 /* no shadow */ +#define SCALER_INT_STATUS 0xc /* no shadow */ + +/* SRC */ +#define SCALER_SRC_CFG 0x10 +#define SCALER_SRC_Y_BASE 0x14 +#define SCALER_SRC_CB_BASE 0x18 +#define SCALER_SRC_CR_BASE 0x294 +#define SCALER_SRC_SPAN 0x1c +#define SCALER_SRC_Y_POS 0x20 +#define SCALER_SRC_WH 0x24 +#define SCALER_SRC_C_POS 0x28 + +/* DST */ +#define SCALER_DST_CFG 0x30 +#define SCALER_DST_Y_BASE 0x34 +#define SCALER_DST_CB_BASE 0x38 +#define SCALER_DST_CR_BASE 0x298 +#define SCALER_DST_SPAN 0x3c +#define SCALER_DST_WH 0x40 +#define SCALER_DST_POS 0x44 + +/* Ratio */ +#define SCALER_H_RATIO 0x50 +#define SCALER_V_RATIO 0x54 + +/* Rotation */ +#define SCALER_ROT_CFG 0x58 + +/* Coefficient */ +/* + * YHCOEF_{x}{A|B|C|D} CHCOEF_{x}{A|B|C|D} + * + * A B C D A B C D + * 0 60 64 68 6c 140 144 148 14c + * 1 70 74 78 7c 150 154 158 15c + * 2 80 84 88 8c 160 164 168 16c + * 3 90 94 98 9c 170 174 178 17c + * 4 a0 a4 a8 ac 180 184 188 18c + * 5 b0 b4 b8 bc 190 194 198 19c + * 6 c0 c4 c8 cc 1a0 1a4 1a8 1ac + * 7 d0 d4 d8 dc 1b0 1b4 1b8 1bc + * 8 e0 e4 e8 ec 1c0 1c4 1c8 1cc + * + * + * YVCOEF_{x}{A|B} CVCOEF_{x}{A|B} + * + * A B A B + * 0 f0 f4 1d0 1d4 + * 1 f8 fc 1d8 1dc + * 2 100 104 1e0 1e4 + * 3 108 10c 1e8 1ec + * 4 110 114 1f0 1f4 + * 5 118 11c 1f8 1fc + * 6 120 124 200 204 + * 7 128 12c 208 20c + * 8 130 134 210 214 + */ +#define _SCALER_HCOEF_DELTA(r, c) ((r) * 0x10 + (c) * 0x4) +#define _SCALER_VCOEF_DELTA(r, c) ((r) * 0x8 + (c) * 0x4) + +#define SCALER_YHCOEF(r, c) (0x60 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_YVCOEF(r, c) (0xf0 + _SCALER_VCOEF_DELTA((r), (c))) +#define SCALER_CHCOEF(r, c) (0x140 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_CVCOEF(r, c) (0x1d0 + _SCALER_VCOEF_DELTA((r), (c))) + + +/* Color Space Conversion */ +#define SCALER_CSC_COEF(x, y) (0x220 + (y) * 0xc + (x) * 0x4) + +/* Dithering */ +#define SCALER_DITH_CFG 0x250 + +/* Version Number */ +#define SCALER_VER 0x260 /* no shadow */ + +/* Cycle count and Timeout */ +#define SCALER_CYCLE_COUNT 0x278 /* no shadow */ +#define SCALER_TIMEOUT_CTRL 0x2c0 /* no shadow */ +#define SCALER_TIMEOUT_CNT 0x2c4 /* no shadow */ + +/* Blending */ +#define SCALER_SRC_BLEND_COLOR 0x280 +#define SCALER_SRC_BLEND_ALPHA 0x284 +#define SCALER_DST_BLEND_COLOR 0x288 +#define SCALER_DST_BLEND_ALPHA 0x28c + +/* Color Fill */ +#define SCALER_FILL_COLOR 0x290 + +/* Multiple Command Queue */ +#define SCALER_ADDR_Q_CONFIG 0x2a0 /* no shadow */ +#define SCALER_SRC_ADDR_Q_STATUS 0x2a4 /* no shadow */ +#define SCALER_SRC_ADDR_Q 0x2a8 /* no shadow */ + +/* CRC */ +#define SCALER_CRC_COLOR00_10 0x2b0 /* no shadow */ +#define SCALER_CRC_COLOR20_30 0x2b4 /* no shadow */ +#define SCALER_CRC_COLOR01_11 0x2b8 /* no shadow */ +#define SCALER_CRC_COLOR21_31 0x2bc /* no shadow */ + +/* Shadow Registers */ +#define SCALER_SHADOW_OFFSET 0x1000 + + +/* Bit definition part */ +#define SCALER_MASK(hi_b, lo_b) ((1 << ((hi_b) - (lo_b) + 1)) - 1) +#define SCALER_GET(reg, hi_b, lo_b) \ + (((reg) >> (lo_b)) & SCALER_MASK(hi_b, lo_b)) +#define SCALER_SET(val, hi_b, lo_b) \ + (((val) & SCALER_MASK(hi_b, lo_b)) << lo_b) + +/* SCALER_STATUS */ +#define SCALER_STATUS_SCALER_RUNNING (1 << 1) +#define SCALER_STATUS_SCALER_READY_CLK_DOWN (1 << 0) + +/* SCALER_CFG */ +#define SCALER_CFG_FILL_EN (1 << 24) +#define SCALER_CFG_BLEND_COLOR_DIVIDE_ALPHA_EN (1 << 17) +#define SCALER_CFG_BLEND_EN (1 << 16) +#define SCALER_CFG_CSC_Y_OFFSET_SRC_EN (1 << 10) +#define SCALER_CFG_CSC_Y_OFFSET_DST_EN (1 << 9) +#define SCALER_CFG_16_BURST_MODE (1 << 8) +#define SCALER_CFG_SOFT_RESET (1 << 1) +#define SCALER_CFG_START_CMD (1 << 0) + +/* SCALER_INT_EN */ +#define SCALER_INT_EN_TIMEOUT (1 << 31) +#define SCALER_INT_EN_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_EN_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_EN_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_EN_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_EN_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_EN_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_EN_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_EN_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_EN_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_EN_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_EN_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_EN_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_EN_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_EN_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_EN_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_EN_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_EN_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_EN_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_EN_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_EN_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_EN_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_EN_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_EN_FRAME_END (1 << 0) + +/* SCALER_INT_STATUS */ +#define SCALER_INT_STATUS_TIMEOUT (1 << 31) +#define SCALER_INT_STATUS_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_STATUS_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_STATUS_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_STATUS_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_STATUS_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_STATUS_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_STATUS_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_STATUS_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_STATUS_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_STATUS_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_STATUS_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_STATUS_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_STATUS_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_STATUS_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_STATUS_FRAME_END (1 << 0) + +/* SCALER_SRC_CFG */ +#define SCALER_SRC_CFG_TILE_EN (1 << 10) +#define SCALER_SRC_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_SRC_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_SRC_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_SRC_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) +#define SCALER_YUV420_2P_UV 0 +#define SCALER_YUV422_2P_UV 2 +#define SCALER_YUV444_2P_UV 3 +#define SCALER_RGB_565 4 +#define SCALER_ARGB1555 5 +#define SCALER_ARGB8888 6 +#define SCALER_ARGB8888_PRE 7 +#define SCALER_YUV422_1P_YVYU 9 +#define SCALER_YUV422_1P_YUYV 10 +#define SCALER_YUV422_1P_UYVY 11 +#define SCALER_ARGB4444 12 +#define SCALER_L8A8 13 +#define SCALER_RGBA8888 14 +#define SCALER_L8 15 +#define SCALER_YUV420_2P_VU 16 +#define SCALER_YUV422_2P_VU 18 +#define SCALER_YUV444_2P_VU 19 +#define SCALER_YUV420_3P 20 +#define SCALER_YUV422_3P 22 +#define SCALER_YUV444_3P 23 + +/* SCALER_SRC_SPAN */ +#define SCALER_SRC_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_Y_POS */ +#define SCALER_SRC_Y_POS_GET_YH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_Y_POS_SET_YH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_Y_POS_GET_YV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_Y_POS_SET_YV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_SRC_WH */ +#define SCALER_SRC_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_C_POS */ +#define SCALER_SRC_C_POS_GET_CH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_C_POS_SET_CH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_C_POS_GET_CV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_C_POS_SET_CV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_DST_CFG */ +#define SCALER_DST_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_DST_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_DST_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_DST_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) + +/* SCALER_DST_SPAN */ +#define SCALER_DST_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_WH */ +#define SCALER_DST_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_POS */ +#define SCALER_DST_POS_GET_H_POS(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_POS_SET_H_POS(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_POS_GET_V_POS(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_POS_SET_V_POS(v) SCALER_SET(v, 13, 0) + +/* SCALER_H_RATIO */ +#define SCALER_H_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_H_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_V_RATIO */ +#define SCALER_V_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_V_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_ROT_CFG */ +#define SCALER_ROT_CFG_FLIP_X_EN (1 << 3) +#define SCALER_ROT_CFG_FLIP_Y_EN (1 << 2) +#define SCALER_ROT_CFG_GET_ROTMODE(r) SCALER_GET(r, 1, 0) +#define SCALER_ROT_CFG_SET_ROTMODE(v) SCALER_SET(v, 1, 0) +#define SCALER_ROT_MODE_90 1 +#define SCALER_ROT_MODE_180 2 +#define SCALER_ROT_MODE_270 3 + +/* SCALER_HCOEF, SCALER_VCOEF */ +#define SCALER_COEF_SHIFT(i) (16 * (1 - (i) % 2)) +#define SCALER_COEF_GET(r, i) \ + (((r) >> SCALER_COEF_SHIFT(i)) & 0x1ff) +#define SCALER_COEF_SET(v, i) \ + (((v) & 0x1ff) << SCALER_COEF_SHIFT(i)) + +/* SCALER_CSC_COEFxy */ +#define SCALER_CSC_COEF_GET(r) SCALER_GET(r, 11, 0) +#define SCALER_CSC_COEF_SET(v) SCALER_SET(v, 11, 0) + +/* SCALER_DITH_CFG */ +#define SCALER_DITH_CFG_GET_R_TYPE(r) SCALER_GET(r, 8, 6) +#define SCALER_DITH_CFG_SET_R_TYPE(v) SCALER_SET(v, 8, 6) +#define SCALER_DITH_CFG_GET_G_TYPE(r) SCALER_GET(r, 5, 3) +#define SCALER_DITH_CFG_SET_G_TYPE(v) SCALER_SET(v, 5, 3) +#define SCALER_DITH_CFG_GET_B_TYPE(r) SCALER_GET(r, 2, 0) +#define SCALER_DITH_CFG_SET_B_TYPE(v) SCALER_SET(v, 2, 0) + +/* SCALER_TIMEOUT_CTRL */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_VALUE(r) SCALER_GET(r, 31, 16) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(v) SCALER_SET(v, 31, 16) +#define SCALER_TIMEOUT_CTRL_GET_TIMER_DIV(r) SCALER_GET(r, 7, 4) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(v) SCALER_SET(v, 7, 4) +#define SCALER_TIMEOUT_CTRL_TIMER_ENABLE (1 << 0) + +/* SCALER_TIMEOUT_CNT */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_COUNT(r) SCALER_GET(r, 31, 16) + +/* SCALER_SRC_BLEND_COLOR */ +#define SCALER_SRC_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_SRC_BLEND_ALPHA */ +#define SCALER_SRC_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_COLOR */ +#define SCALER_DST_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_DST_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_DST_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_DST_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_DST_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_ALPHA */ +#define SCALER_DST_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_FILL_COLOR */ +#define SCALER_FILL_COLOR_GET_ALPHA(r) SCALER_GET(r, 31, 24) +#define SCALER_FILL_COLOR_SET_ALPHA(v) SCALER_SET(v, 31, 24) +#define SCALER_FILL_COLOR_GET_FILL_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_FILL_COLOR_SET_FILL_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_FILL_COLOR_GET_FILL_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_FILL_COLOR_SET_FILL_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_FILL_COLOR_GET_FILL_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_FILL_COLOR_SET_FILL_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_ADDR_Q_CONFIG */ +#define SCALER_ADDR_Q_CONFIG_RST (1 << 0) + +/* SCALER_SRC_ADDR_Q_STATUS */ +#define SCALER_SRC_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_SRC_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_SRC_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_SRC_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_SRC_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_SRC_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_SRC_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_DST_ADDR_Q_STATUS */ +#define SCALER_DST_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_DST_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_DST_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_DST_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_DST_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_DST_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_DST_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_DST_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_DST_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_CRC_COLOR00_10 */ +#define SCALER_CRC_COLOR00_10_GET_00(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR00_10_GET_10(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR20_30 */ +#define SCALER_CRC_COLOR20_30_GET_20(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR20_30_GET_30(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR01_11 */ +#define SCALER_CRC_COLOR01_11_GET_01(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR01_11_GET_11(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR21_31 */ +#define SCALER_CRC_COLOR21_31_GET_21(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR21_31_GET_31(r) SCALER_GET(r, 15, 0) + +#endif /* EXYNOS_REGS_SCALER_H */ diff --git a/drivers/gpu/drm/i2c/Kconfig b/drivers/gpu/drm/i2c/Kconfig index a6c92beb410a..65d3acb61c03 100644 --- a/drivers/gpu/drm/i2c/Kconfig +++ b/drivers/gpu/drm/i2c/Kconfig @@ -22,8 +22,14 @@ config DRM_I2C_SIL164 config DRM_I2C_NXP_TDA998X tristate "NXP Semiconductors TDA998X HDMI encoder" default m if DRM_TILCDC + select CEC_CORE if CEC_NOTIFIER select SND_SOC_HDMI_CODEC if SND_SOC help Support for NXP Semiconductors TDA998X HDMI encoders. +config DRM_I2C_NXP_TDA9950 + tristate "NXP Semiconductors TDA9950/TDA998X HDMI CEC" + select CEC_NOTIFIER + select CEC_CORE + endmenu diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile index b20100c18ffb..a962f6f08568 100644 --- a/drivers/gpu/drm/i2c/Makefile +++ b/drivers/gpu/drm/i2c/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_DRM_I2C_SIL164) += sil164.o tda998x-y := tda998x_drv.o obj-$(CONFIG_DRM_I2C_NXP_TDA998X) += tda998x.o +obj-$(CONFIG_DRM_I2C_NXP_TDA9950) += tda9950.o diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c new file mode 100644 index 000000000000..3f7396caad48 --- /dev/null +++ b/drivers/gpu/drm/i2c/tda9950.c @@ -0,0 +1,509 @@ +/* + * TDA9950 Consumer Electronics Control driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * The NXP TDA9950 implements the HDMI Consumer Electronics Control + * interface. The host interface is similar to a mailbox: the data + * registers starting at REG_CDR0 are written to send a command to the + * internal CPU, and replies are read from these registers. + * + * As the data registers represent a mailbox, they must be accessed + * as a single I2C transaction. See the TDA9950 data sheet for details. + */ +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/platform_data/tda9950.h> +#include <linux/slab.h> +#include <drm/drm_edid.h> +#include <media/cec.h> +#include <media/cec-notifier.h> + +enum { + REG_CSR = 0x00, + CSR_BUSY = BIT(7), + CSR_INT = BIT(6), + CSR_ERR = BIT(5), + + REG_CER = 0x01, + + REG_CVR = 0x02, + + REG_CCR = 0x03, + CCR_RESET = BIT(7), + CCR_ON = BIT(6), + + REG_ACKH = 0x04, + REG_ACKL = 0x05, + + REG_CCONR = 0x06, + CCONR_ENABLE_ERROR = BIT(4), + CCONR_RETRY_MASK = 7, + + REG_CDR0 = 0x07, + + CDR1_REQ = 0x00, + CDR1_CNF = 0x01, + CDR1_IND = 0x81, + CDR1_ERR = 0x82, + CDR1_IER = 0x83, + + CDR2_CNF_SUCCESS = 0x00, + CDR2_CNF_OFF_STATE = 0x80, + CDR2_CNF_BAD_REQ = 0x81, + CDR2_CNF_CEC_ACCESS = 0x82, + CDR2_CNF_ARB_ERROR = 0x83, + CDR2_CNF_BAD_TIMING = 0x84, + CDR2_CNF_NACK_ADDR = 0x85, + CDR2_CNF_NACK_DATA = 0x86, +}; + +struct tda9950_priv { + struct i2c_client *client; + struct device *hdmi; + struct cec_adapter *adap; + struct tda9950_glue *glue; + u16 addresses; + struct cec_msg rx_msg; + struct cec_notifier *notify; + bool open; +}; + +static int tda9950_write_range(struct i2c_client *client, u8 addr, u8 *p, int cnt) +{ + struct i2c_msg msg; + u8 buf[cnt + 1]; + int ret; + + buf[0] = addr; + memcpy(buf + 1, p, cnt); + + msg.addr = client->addr; + msg.flags = 0; + msg.len = cnt + 1; + msg.buf = buf; + + dev_dbg(&client->dev, "wr 0x%02x: %*ph\n", addr, cnt, p); + + ret = i2c_transfer(client->adapter, &msg, 1); + if (ret < 0) + dev_err(&client->dev, "Error %d writing to cec:0x%x\n", ret, addr); + return ret < 0 ? ret : 0; +} + +static void tda9950_write(struct i2c_client *client, u8 addr, u8 val) +{ + tda9950_write_range(client, addr, &val, 1); +} + +static int tda9950_read_range(struct i2c_client *client, u8 addr, u8 *p, int cnt) +{ + struct i2c_msg msg[2]; + int ret; + + msg[0].addr = client->addr; + msg[0].flags = 0; + msg[0].len = 1; + msg[0].buf = &addr; + msg[1].addr = client->addr; + msg[1].flags = I2C_M_RD; + msg[1].len = cnt; + msg[1].buf = p; + + ret = i2c_transfer(client->adapter, msg, 2); + if (ret < 0) + dev_err(&client->dev, "Error %d reading from cec:0x%x\n", ret, addr); + + dev_dbg(&client->dev, "rd 0x%02x: %*ph\n", addr, cnt, p); + + return ret; +} + +static u8 tda9950_read(struct i2c_client *client, u8 addr) +{ + int ret; + u8 val; + + ret = tda9950_read_range(client, addr, &val, 1); + if (ret < 0) + val = 0; + + return val; +} + +static irqreturn_t tda9950_irq(int irq, void *data) +{ + struct tda9950_priv *priv = data; + unsigned int tx_status; + u8 csr, cconr, buf[19]; + u8 arb_lost_cnt, nack_cnt, err_cnt; + + if (!priv->open) + return IRQ_NONE; + + csr = tda9950_read(priv->client, REG_CSR); + if (!(csr & CSR_INT)) + return IRQ_NONE; + + cconr = tda9950_read(priv->client, REG_CCONR) & CCONR_RETRY_MASK; + + tda9950_read_range(priv->client, REG_CDR0, buf, sizeof(buf)); + + /* + * This should never happen: the data sheet says that there will + * always be a valid message if the interrupt line is asserted. + */ + if (buf[0] == 0) { + dev_warn(&priv->client->dev, "interrupt pending, but no message?\n"); + return IRQ_NONE; + } + + switch (buf[1]) { + case CDR1_CNF: /* transmit result */ + arb_lost_cnt = nack_cnt = err_cnt = 0; + switch (buf[2]) { + case CDR2_CNF_SUCCESS: + tx_status = CEC_TX_STATUS_OK; + break; + + case CDR2_CNF_ARB_ERROR: + tx_status = CEC_TX_STATUS_ARB_LOST; + arb_lost_cnt = cconr; + break; + + case CDR2_CNF_NACK_ADDR: + tx_status = CEC_TX_STATUS_NACK; + nack_cnt = cconr; + break; + + default: /* some other error, refer to TDA9950 docs */ + dev_err(&priv->client->dev, "CNF reply error 0x%02x\n", + buf[2]); + tx_status = CEC_TX_STATUS_ERROR; + err_cnt = cconr; + break; + } + /* TDA9950 executes all retries for us */ + tx_status |= CEC_TX_STATUS_MAX_RETRIES; + cec_transmit_done(priv->adap, tx_status, arb_lost_cnt, + nack_cnt, 0, err_cnt); + break; + + case CDR1_IND: + priv->rx_msg.len = buf[0] - 2; + if (priv->rx_msg.len > CEC_MAX_MSG_SIZE) + priv->rx_msg.len = CEC_MAX_MSG_SIZE; + + memcpy(priv->rx_msg.msg, buf + 2, priv->rx_msg.len); + cec_received_msg(priv->adap, &priv->rx_msg); + break; + + default: /* unknown */ + dev_err(&priv->client->dev, "unknown service id 0x%02x\n", + buf[1]); + break; + } + + return IRQ_HANDLED; +} + +static int tda9950_cec_transmit(struct cec_adapter *adap, u8 attempts, + u32 signal_free_time, struct cec_msg *msg) +{ + struct tda9950_priv *priv = adap->priv; + u8 buf[CEC_MAX_MSG_SIZE + 2]; + + buf[0] = 2 + msg->len; + buf[1] = CDR1_REQ; + memcpy(buf + 2, msg->msg, msg->len); + + if (attempts > 5) + attempts = 5; + + tda9950_write(priv->client, REG_CCONR, attempts); + + return tda9950_write_range(priv->client, REG_CDR0, buf, 2 + msg->len); +} + +static int tda9950_cec_adap_log_addr(struct cec_adapter *adap, u8 addr) +{ + struct tda9950_priv *priv = adap->priv; + u16 addresses; + u8 buf[2]; + + if (addr == CEC_LOG_ADDR_INVALID) + addresses = priv->addresses = 0; + else + addresses = priv->addresses |= BIT(addr); + + /* TDA9950 doesn't want address 15 set */ + addresses &= 0x7fff; + buf[0] = addresses >> 8; + buf[1] = addresses; + + return tda9950_write_range(priv->client, REG_ACKH, buf, 2); +} + +/* + * When operating as part of the TDA998x, we need additional handling + * to initialise and shut down the TDA9950 part of the device. These + * two hooks are provided to allow the TDA998x code to perform those + * activities. + */ +static int tda9950_glue_open(struct tda9950_priv *priv) +{ + int ret = 0; + + if (priv->glue && priv->glue->open) + ret = priv->glue->open(priv->glue->data); + + priv->open = true; + + return ret; +} + +static void tda9950_glue_release(struct tda9950_priv *priv) +{ + priv->open = false; + + if (priv->glue && priv->glue->release) + priv->glue->release(priv->glue->data); +} + +static int tda9950_open(struct tda9950_priv *priv) +{ + struct i2c_client *client = priv->client; + int ret; + + ret = tda9950_glue_open(priv); + if (ret) + return ret; + + /* Reset the TDA9950, and wait 250ms for it to recover */ + tda9950_write(client, REG_CCR, CCR_RESET); + msleep(250); + + tda9950_cec_adap_log_addr(priv->adap, CEC_LOG_ADDR_INVALID); + + /* Start the command processor */ + tda9950_write(client, REG_CCR, CCR_ON); + + return 0; +} + +static void tda9950_release(struct tda9950_priv *priv) +{ + struct i2c_client *client = priv->client; + int timeout = 50; + u8 csr; + + /* Stop the command processor */ + tda9950_write(client, REG_CCR, 0); + + /* Wait up to .5s for it to signal non-busy */ + do { + csr = tda9950_read(client, REG_CSR); + if (!(csr & CSR_BUSY) || --timeout) + break; + msleep(10); + } while (1); + + /* Warn the user that their IRQ may die if it's shared. */ + if (csr & CSR_BUSY) + dev_warn(&client->dev, "command processor failed to stop, irq%d may die (csr=0x%02x)\n", + client->irq, csr); + + tda9950_glue_release(priv); +} + +static int tda9950_cec_adap_enable(struct cec_adapter *adap, bool enable) +{ + struct tda9950_priv *priv = adap->priv; + + if (!enable) { + tda9950_release(priv); + return 0; + } else { + return tda9950_open(priv); + } +} + +static const struct cec_adap_ops tda9950_cec_ops = { + .adap_enable = tda9950_cec_adap_enable, + .adap_log_addr = tda9950_cec_adap_log_addr, + .adap_transmit = tda9950_cec_transmit, +}; + +/* + * When operating as part of the TDA998x, we need to claim additional + * resources. These two hooks permit the management of those resources. + */ +static void tda9950_devm_glue_exit(void *data) +{ + struct tda9950_glue *glue = data; + + if (glue && glue->exit) + glue->exit(glue->data); +} + +static int tda9950_devm_glue_init(struct device *dev, struct tda9950_glue *glue) +{ + int ret; + + if (glue && glue->init) { + ret = glue->init(glue->data); + if (ret) + return ret; + } + + ret = devm_add_action(dev, tda9950_devm_glue_exit, glue); + if (ret) + tda9950_devm_glue_exit(glue); + + return ret; +} + +static void tda9950_cec_del(void *data) +{ + struct tda9950_priv *priv = data; + + cec_delete_adapter(priv->adap); +} + +static int tda9950_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct tda9950_glue *glue = client->dev.platform_data; + struct device *dev = &client->dev; + struct tda9950_priv *priv; + unsigned long irqflags; + int ret; + u8 cvr; + + /* + * We must have I2C functionality: our multi-byte accesses + * must be performed as a single contiguous transaction. + */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { + dev_err(&client->dev, + "adapter does not support I2C functionality\n"); + return -ENXIO; + } + + /* We must have an interrupt to be functional. */ + if (client->irq <= 0) { + dev_err(&client->dev, "driver requires an interrupt\n"); + return -ENXIO; + } + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->client = client; + priv->glue = glue; + + i2c_set_clientdata(client, priv); + + /* + * If we're part of a TDA998x, we want the class devices to be + * associated with the HDMI Tx so we have a tight relationship + * between the HDMI interface and the CEC interface. + */ + priv->hdmi = dev; + if (glue && glue->parent) + priv->hdmi = glue->parent; + + priv->adap = cec_allocate_adapter(&tda9950_cec_ops, priv, "tda9950", + CEC_CAP_DEFAULTS, + CEC_MAX_LOG_ADDRS); + if (IS_ERR(priv->adap)) + return PTR_ERR(priv->adap); + + ret = devm_add_action(dev, tda9950_cec_del, priv); + if (ret) { + cec_delete_adapter(priv->adap); + return ret; + } + + ret = tda9950_devm_glue_init(dev, glue); + if (ret) + return ret; + + ret = tda9950_glue_open(priv); + if (ret) + return ret; + + cvr = tda9950_read(client, REG_CVR); + + dev_info(&client->dev, + "TDA9950 CEC interface, hardware version %u.%u\n", + cvr >> 4, cvr & 15); + + tda9950_glue_release(priv); + + irqflags = IRQF_TRIGGER_FALLING; + if (glue) + irqflags = glue->irq_flags; + + ret = devm_request_threaded_irq(dev, client->irq, NULL, tda9950_irq, + irqflags | IRQF_SHARED | IRQF_ONESHOT, + dev_name(&client->dev), priv); + if (ret < 0) + return ret; + + priv->notify = cec_notifier_get(priv->hdmi); + if (!priv->notify) + return -ENOMEM; + + ret = cec_register_adapter(priv->adap, priv->hdmi); + if (ret < 0) { + cec_notifier_put(priv->notify); + return ret; + } + + /* + * CEC documentation says we must not call cec_delete_adapter + * after a successful call to cec_register_adapter(). + */ + devm_remove_action(dev, tda9950_cec_del, priv); + + cec_register_cec_notifier(priv->adap, priv->notify); + + return 0; +} + +static int tda9950_remove(struct i2c_client *client) +{ + struct tda9950_priv *priv = i2c_get_clientdata(client); + + cec_unregister_adapter(priv->adap); + cec_notifier_put(priv->notify); + + return 0; +} + +static struct i2c_device_id tda9950_ids[] = { + { "tda9950", 0 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, tda9950_ids); + +static struct i2c_driver tda9950_driver = { + .probe = tda9950_probe, + .remove = tda9950_remove, + .driver = { + .name = "tda9950", + }, + .id_table = tda9950_ids, +}; + +module_i2c_driver(tda9950_driver); + +MODULE_AUTHOR("Russell King <rmk+kernel@armlinux.org.uk>"); +MODULE_DESCRIPTION("TDA9950/TDA998x Consumer Electronics Control Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index 0068d714d1bd..0038c976536a 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -16,8 +16,10 @@ */ #include <linux/component.h> +#include <linux/gpio/consumer.h> #include <linux/hdmi.h> #include <linux/module.h> +#include <linux/platform_data/tda9950.h> #include <linux/irq.h> #include <sound/asoundef.h> #include <sound/hdmi-codec.h> @@ -29,6 +31,8 @@ #include <drm/drm_of.h> #include <drm/i2c/tda998x.h> +#include <media/cec-notifier.h> + #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__) struct tda998x_audio_port { @@ -55,6 +59,7 @@ struct tda998x_priv { struct platform_device *audio_pdev; struct mutex audio_mutex; + struct mutex edid_mutex; wait_queue_head_t wq_edid; volatile int wq_edid_wait; @@ -67,6 +72,9 @@ struct tda998x_priv { struct drm_connector connector; struct tda998x_audio_port audio_port[2]; + struct tda9950_glue cec_glue; + struct gpio_desc *calib; + struct cec_notifier *cec_notify; }; #define conn_to_tda998x_priv(x) \ @@ -345,6 +353,12 @@ struct tda998x_priv { #define REG_CEC_INTSTATUS 0xee /* read */ # define CEC_INTSTATUS_CEC (1 << 0) # define CEC_INTSTATUS_HDMI (1 << 1) +#define REG_CEC_CAL_XOSC_CTRL1 0xf2 +# define CEC_CAL_XOSC_CTRL1_ENA_CAL BIT(0) +#define REG_CEC_DES_FREQ2 0xf5 +# define CEC_DES_FREQ2_DIS_AUTOCAL BIT(7) +#define REG_CEC_CLK 0xf6 +# define CEC_CLK_FRO 0x11 #define REG_CEC_FRO_IM_CLK_CTRL 0xfb /* read/write */ # define CEC_FRO_IM_CLK_CTRL_GHOST_DIS (1 << 7) # define CEC_FRO_IM_CLK_CTRL_ENA_OTP (1 << 6) @@ -359,6 +373,7 @@ struct tda998x_priv { # define CEC_RXSHPDLEV_HPD (1 << 1) #define REG_CEC_ENAMODS 0xff /* read/write */ +# define CEC_ENAMODS_EN_CEC_CLK (1 << 7) # define CEC_ENAMODS_DIS_FRO (1 << 6) # define CEC_ENAMODS_DIS_CCLK (1 << 5) # define CEC_ENAMODS_EN_RXSENS (1 << 2) @@ -417,6 +432,114 @@ cec_read(struct tda998x_priv *priv, u8 addr) return val; } +static void cec_enamods(struct tda998x_priv *priv, u8 mods, bool enable) +{ + int val = cec_read(priv, REG_CEC_ENAMODS); + + if (val < 0) + return; + + if (enable) + val |= mods; + else + val &= ~mods; + + cec_write(priv, REG_CEC_ENAMODS, val); +} + +static void tda998x_cec_set_calibration(struct tda998x_priv *priv, bool enable) +{ + if (enable) { + u8 val; + + cec_write(priv, 0xf3, 0xc0); + cec_write(priv, 0xf4, 0xd4); + + /* Enable automatic calibration mode */ + val = cec_read(priv, REG_CEC_DES_FREQ2); + val &= ~CEC_DES_FREQ2_DIS_AUTOCAL; + cec_write(priv, REG_CEC_DES_FREQ2, val); + + /* Enable free running oscillator */ + cec_write(priv, REG_CEC_CLK, CEC_CLK_FRO); + cec_enamods(priv, CEC_ENAMODS_DIS_FRO, false); + + cec_write(priv, REG_CEC_CAL_XOSC_CTRL1, + CEC_CAL_XOSC_CTRL1_ENA_CAL); + } else { + cec_write(priv, REG_CEC_CAL_XOSC_CTRL1, 0); + } +} + +/* + * Calibration for the internal oscillator: we need to set calibration mode, + * and then pulse the IRQ line low for a 10ms ± 1% period. + */ +static void tda998x_cec_calibration(struct tda998x_priv *priv) +{ + struct gpio_desc *calib = priv->calib; + + mutex_lock(&priv->edid_mutex); + if (priv->hdmi->irq > 0) + disable_irq(priv->hdmi->irq); + gpiod_direction_output(calib, 1); + tda998x_cec_set_calibration(priv, true); + + local_irq_disable(); + gpiod_set_value(calib, 0); + mdelay(10); + gpiod_set_value(calib, 1); + local_irq_enable(); + + tda998x_cec_set_calibration(priv, false); + gpiod_direction_input(calib); + if (priv->hdmi->irq > 0) + enable_irq(priv->hdmi->irq); + mutex_unlock(&priv->edid_mutex); +} + +static int tda998x_cec_hook_init(void *data) +{ + struct tda998x_priv *priv = data; + struct gpio_desc *calib; + + calib = gpiod_get(&priv->hdmi->dev, "nxp,calib", GPIOD_ASIS); + if (IS_ERR(calib)) { + dev_warn(&priv->hdmi->dev, "failed to get calibration gpio: %ld\n", + PTR_ERR(calib)); + return PTR_ERR(calib); + } + + priv->calib = calib; + + return 0; +} + +static void tda998x_cec_hook_exit(void *data) +{ + struct tda998x_priv *priv = data; + + gpiod_put(priv->calib); + priv->calib = NULL; +} + +static int tda998x_cec_hook_open(void *data) +{ + struct tda998x_priv *priv = data; + + cec_enamods(priv, CEC_ENAMODS_EN_CEC_CLK | CEC_ENAMODS_EN_CEC, true); + tda998x_cec_calibration(priv); + + return 0; +} + +static void tda998x_cec_hook_release(void *data) +{ + struct tda998x_priv *priv = data; + + cec_enamods(priv, CEC_ENAMODS_EN_CEC_CLK | CEC_ENAMODS_EN_CEC, false); +} + static int set_page(struct tda998x_priv *priv, u16 reg) { @@ -666,10 +789,13 @@ static irqreturn_t tda998x_irq_thread(int irq, void *data) sta, cec, lvl, flag0, flag1, flag2); if (cec & CEC_RXSHPDINT_HPD) { - if (lvl & CEC_RXSHPDLEV_HPD) + if (lvl & CEC_RXSHPDLEV_HPD) { tda998x_edid_delay_start(priv); - else + } else { schedule_work(&priv->detect_work); + cec_notifier_set_phys_addr(priv->cec_notify, + CEC_PHYS_ADDR_INVALID); + } handled = true; } @@ -990,6 +1116,8 @@ static int tda998x_connector_fill_modes(struct drm_connector *connector, if (connector->edid_blob_ptr) { struct edid *edid = (void *)connector->edid_blob_ptr->data; + cec_notifier_set_phys_addr_from_edid(priv->cec_notify, edid); + priv->sink_has_audio = drm_detect_monitor_audio(edid); } else { priv->sink_has_audio = false; @@ -1033,6 +1161,8 @@ static int read_edid_block(void *data, u8 *buf, unsigned int blk, size_t length) offset = (blk & 1) ? 128 : 0; segptr = blk / 2; + mutex_lock(&priv->edid_mutex); + reg_write(priv, REG_DDC_ADDR, 0xa0); reg_write(priv, REG_DDC_OFFS, offset); reg_write(priv, REG_DDC_SEGM_ADDR, 0x60); @@ -1052,14 +1182,15 @@ static int read_edid_block(void *data, u8 *buf, unsigned int blk, size_t length) msecs_to_jiffies(100)); if (i < 0) { dev_err(&priv->hdmi->dev, "read edid wait err %d\n", i); - return i; + ret = i; + goto failed; } } else { for (i = 100; i > 0; i--) { msleep(1); ret = reg_read(priv, REG_INT_FLAGS_2); if (ret < 0) - return ret; + goto failed; if (ret & INT_FLAGS_2_EDID_BLK_RD) break; } @@ -1067,17 +1198,22 @@ static int read_edid_block(void *data, u8 *buf, unsigned int blk, size_t length) if (i == 0) { dev_err(&priv->hdmi->dev, "read edid timeout\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto failed; } ret = reg_read_range(priv, REG_EDID_DATA_0, buf, length); if (ret != length) { dev_err(&priv->hdmi->dev, "failed to read edid block %d: %d\n", blk, ret); - return ret; + goto failed; } - return 0; + ret = 0; + + failed: + mutex_unlock(&priv->edid_mutex); + return ret; } static int tda998x_connector_get_modes(struct drm_connector *connector) @@ -1432,6 +1568,9 @@ static void tda998x_destroy(struct tda998x_priv *priv) cancel_work_sync(&priv->detect_work); i2c_unregister_device(priv->cec); + + if (priv->cec_notify) + cec_notifier_put(priv->cec_notify); } /* I2C driver functions */ @@ -1481,10 +1620,16 @@ static int tda998x_get_audio_ports(struct tda998x_priv *priv, static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) { struct device_node *np = client->dev.of_node; + struct i2c_board_info cec_info; u32 video; int rev_lo, rev_hi, ret; - mutex_init(&priv->audio_mutex); /* Protect access from audio thread */ + mutex_init(&priv->mutex); /* protect the page access */ + mutex_init(&priv->audio_mutex); /* protect access from audio thread */ + mutex_init(&priv->edid_mutex); + init_waitqueue_head(&priv->edid_delay_waitq); + timer_setup(&priv->edid_delay_timer, tda998x_edid_delay_done, 0); + INIT_WORK(&priv->detect_work, tda998x_detect_work); priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3); priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1); @@ -1494,14 +1639,6 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) priv->cec_addr = 0x34 + (client->addr & 0x03); priv->current_page = 0xff; priv->hdmi = client; - priv->cec = i2c_new_dummy(client->adapter, priv->cec_addr); - if (!priv->cec) - return -ENODEV; - - mutex_init(&priv->mutex); /* protect the page access */ - init_waitqueue_head(&priv->edid_delay_waitq); - timer_setup(&priv->edid_delay_timer, tda998x_edid_delay_done, 0); - INIT_WORK(&priv->detect_work, tda998x_detect_work); /* wake up the device: */ cec_write(priv, REG_CEC_ENAMODS, @@ -1511,10 +1648,15 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) /* read version: */ rev_lo = reg_read(priv, REG_VERSION_LSB); + if (rev_lo < 0) { + dev_err(&client->dev, "failed to read version: %d\n", rev_lo); + return rev_lo; + } + rev_hi = reg_read(priv, REG_VERSION_MSB); - if (rev_lo < 0 || rev_hi < 0) { - ret = rev_lo < 0 ? rev_lo : rev_hi; - goto fail; + if (rev_hi < 0) { + dev_err(&client->dev, "failed to read version: %d\n", rev_hi); + return rev_hi; } priv->rev = rev_lo | rev_hi << 8; @@ -1538,7 +1680,7 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) default: dev_err(&client->dev, "found unsupported device: %04x\n", priv->rev); - goto fail; + return -ENXIO; } /* after reset, enable DDC: */ @@ -1554,6 +1696,15 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) cec_write(priv, REG_CEC_FRO_IM_CLK_CTRL, CEC_FRO_IM_CLK_CTRL_GHOST_DIS | CEC_FRO_IM_CLK_CTRL_IMCLK_SEL); + /* ensure interrupts are disabled */ + cec_write(priv, REG_CEC_RXSHPDINTENA, 0); + + /* clear pending interrupts */ + cec_read(priv, REG_CEC_RXSHPDINT); + reg_read(priv, REG_INT_FLAGS_0); + reg_read(priv, REG_INT_FLAGS_1); + reg_read(priv, REG_INT_FLAGS_2); + /* initialize the optional IRQ */ if (client->irq) { unsigned long irq_flags; @@ -1561,13 +1712,11 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) /* init read EDID waitqueue and HDP work */ init_waitqueue_head(&priv->wq_edid); - /* clear pending interrupts */ - reg_read(priv, REG_INT_FLAGS_0); - reg_read(priv, REG_INT_FLAGS_1); - reg_read(priv, REG_INT_FLAGS_2); - irq_flags = irqd_get_trigger_type(irq_get_irq_data(client->irq)); + + priv->cec_glue.irq_flags = irq_flags; + irq_flags |= IRQF_SHARED | IRQF_ONESHOT; ret = request_threaded_irq(client->irq, NULL, tda998x_irq_thread, irq_flags, @@ -1576,13 +1725,46 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) dev_err(&client->dev, "failed to request IRQ#%u: %d\n", client->irq, ret); - goto fail; + goto err_irq; } /* enable HPD irq */ cec_write(priv, REG_CEC_RXSHPDINTENA, CEC_RXSHPDLEV_HPD); } + priv->cec_notify = cec_notifier_get(&client->dev); + if (!priv->cec_notify) { + ret = -ENOMEM; + goto fail; + } + + priv->cec_glue.parent = &client->dev; + priv->cec_glue.data = priv; + priv->cec_glue.init = tda998x_cec_hook_init; + priv->cec_glue.exit = tda998x_cec_hook_exit; + priv->cec_glue.open = tda998x_cec_hook_open; + priv->cec_glue.release = tda998x_cec_hook_release; + + /* + * Some TDA998x are actually two I2C devices merged onto one piece + * of silicon: TDA9989 and TDA19989 combine the HDMI transmitter + * with a slightly modified TDA9950 CEC device. The CEC device + * is at the TDA9950 address, with the address pins strapped across + * to the TDA998x address pins. Hence, it always has the same + * offset. + */ + memset(&cec_info, 0, sizeof(cec_info)); + strlcpy(cec_info.type, "tda9950", sizeof(cec_info.type)); + cec_info.addr = priv->cec_addr; + cec_info.platform_data = &priv->cec_glue; + cec_info.irq = client->irq; + + priv->cec = i2c_new_device(client->adapter, &cec_info); + if (!priv->cec) { + ret = -ENODEV; + goto fail; + } + /* enable EDID read irq: */ reg_set(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD); @@ -1605,12 +1787,18 @@ static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv) tda998x_audio_codec_init(priv, &client->dev); return 0; + fail: /* if encoder_init fails, the encoder slave is never registered, * so cleanup here: */ i2c_unregister_device(priv->cec); - return -ENXIO; + if (priv->cec_notify) + cec_notifier_put(priv->cec_notify); + if (client->irq) + free_irq(client->irq, priv); +err_irq: + return ret; } static void tda998x_encoder_prepare(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9bee52a949a9..4c6adae23e18 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -18,6 +18,10 @@ subdir-ccflags-y += $(call cc-disable-warning, type-limits) subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) +# clang warnings +subdir-ccflags-y += $(call cc-disable-warning, sign-compare) +subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) +subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable @@ -67,11 +71,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -154,7 +158,8 @@ i915-y += dvo_ch7017.o \ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ - selftests/i915_selftest.o + selftests/i915_selftest.o \ + selftests/igt_flush_test.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d85939bd7b47..b51c05d03f14 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -813,15 +813,31 @@ static inline bool is_force_nonpriv_mmio(unsigned int offset) } static int force_nonpriv_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index) + unsigned int offset, unsigned int index, char *cmd) { struct intel_gvt *gvt = s->vgpu->gvt; - unsigned int data = cmd_val(s, index + 1); + unsigned int data; + u32 ring_base; + u32 nopid; + struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + + if (!strcmp(cmd, "lri")) + data = cmd_val(s, index + 1); + else { + gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n", + offset, cmd); + return -EINVAL; + } + + ring_base = dev_priv->engine[s->ring_id]->mmio_base; + nopid = i915_mmio_reg_offset(RING_NOPID(ring_base)); - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { + if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) && + data != nopid) { gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", offset, data); - return -EPERM; + patch_value(s, cmd_ptr(s, index), nopid); + return 0; } return 0; } @@ -869,7 +885,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EINVAL; if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index)) + force_nonpriv_reg_handler(s, offset, index, cmd)) return -EPERM; if (offset == i915_mmio_reg_offset(DERRMR) || @@ -1604,7 +1620,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { /* BDW decides privilege based on address space */ - if (cmd_val(s, 0) & (1 << 8)) + if (cmd_val(s, 0) & (1 << 8) && + !(s->vgpu->scan_nonprivbb & (1 << s->ring_id))) return 0; } return 1; @@ -1618,6 +1635,8 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; *bb_size = 0; @@ -1629,18 +1648,22 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) cmd = cmd_val(s, 0); info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } do { - if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + if (copy_gma_to_hva(s->vgpu, mm, gma, gma + 4, &cmd) < 0) return -EFAULT; info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } @@ -1666,6 +1689,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) unsigned long gma = 0; unsigned long bb_size; int ret = 0; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; + unsigned long gma_start_offset = 0; /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); @@ -1680,8 +1706,24 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (!bb) return -ENOMEM; + bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; + + /* the gma_start_offset stores the batch buffer's start gma's + * offset relative to page boundary. so for non-privileged batch + * buffer, the shadowed gem object holds exactly the same page + * layout as original gem object. This is for the convience of + * replacing the whole non-privilged batch buffer page to this + * shadowed one in PPGTT at the same gma address. (this replacing + * action is not implemented yet now, but may be necessary in + * future). + * for prileged batch buffer, we just change start gma address to + * that of shadowed page. + */ + if (bb->ppgtt) + gma_start_offset = gma & ~I915_GTT_PAGE_MASK; + bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv, - roundup(bb_size, PAGE_SIZE)); + roundup(bb_size + gma_start_offset, PAGE_SIZE)); if (IS_ERR(bb->obj)) { ret = PTR_ERR(bb->obj); goto err_free_bb; @@ -1702,9 +1744,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) bb->clflush &= ~CLFLUSH_BEFORE; } - ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + ret = copy_gma_to_hva(s->vgpu, mm, gma, gma + bb_size, - bb->va); + bb->va + gma_start_offset); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); ret = -EFAULT; @@ -1730,7 +1772,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) * buffer's gma in pair. After all, we don't want to pin the shadow * buffer here (too early). */ - s->ip_va = bb->va; + s->ip_va = bb->va + gma_start_offset; s->ip_gma = gma; return 0; err_unmap: @@ -2469,15 +2511,18 @@ static int cmd_parser_exec(struct parser_exec_state *s) info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } s->info = info; trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, - cmd_length(s), s->buf_type); + cmd_length(s), s->buf_type, s->buf_addr_type, + s->workload, info->name); if (info->handler) { ret = info->handler(s); @@ -2864,6 +2909,7 @@ static int init_cmd_table(struct intel_gvt *gvt) if (info) { gvt_err("%s %s duplicated\n", e->info->name, info->name); + kfree(e); return -EEXIST; } diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index f7d0078eb61b..2ec89bcb59f1 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -124,6 +124,68 @@ static int vgpu_mmio_diff_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(vgpu_mmio_diff); +static int +vgpu_scan_nonprivbb_get(void *data, u64 *val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + *val = vgpu->scan_nonprivbb; + return 0; +} + +/* + * set/unset bit engine_id of vgpu->scan_nonprivbb to turn on/off scanning + * of non-privileged batch buffer. e.g. + * if vgpu->scan_nonprivbb=3, then it will scan non-privileged batch buffer + * on engine 0 and 1. + */ +static int +vgpu_scan_nonprivbb_set(void *data, u64 val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + enum intel_engine_id id; + char buf[128], *s; + int len; + + val &= (1 << I915_NUM_ENGINES) - 1; + + if (vgpu->scan_nonprivbb == val) + return 0; + + if (!val) + goto done; + + len = sprintf(buf, + "gvt: vgpu %d turns on non-privileged batch buffers scanning on Engines:", + vgpu->id); + + s = buf + len; + + for (id = 0; id < I915_NUM_ENGINES; id++) { + struct intel_engine_cs *engine; + + engine = dev_priv->engine[id]; + if (engine && (val & (1 << id))) { + len = snprintf(s, 4, "%d, ", engine->id); + s += len; + } else + val &= ~(1 << id); + } + + if (val) + sprintf(s, "low performance expected."); + + pr_warn("%s\n", buf); + +done: + vgpu->scan_nonprivbb = val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, + vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set, + "0x%llx\n"); + /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU * @vgpu: a vGPU @@ -151,6 +213,11 @@ int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) if (!ent) return -ENOMEM; + ent = debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, + vgpu, &vgpu_scan_nonprivbb_fops); + if (!ent) + return -ENOMEM; + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index b46b86892d58..ea7c1c525b8c 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -67,7 +67,7 @@ #define AUX_NATIVE_REPLY_NAK (0x1 << 4) #define AUX_NATIVE_REPLY_DEFER (0x2 << 4) -#define AUX_BURST_SIZE 16 +#define AUX_BURST_SIZE 20 /* DPCD addresses */ #define DPCD_REV 0x000 diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index efacd8abbedc..05d15a095310 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -99,7 +99,6 @@ struct intel_vgpu_fence { struct intel_vgpu_mmio { void *vreg; void *sreg; - bool disable_warn_untrack; }; #define INTEL_GVT_MAX_BAR_NUM 4 @@ -226,6 +225,7 @@ struct intel_vgpu { struct completion vblank_done; + u32 scan_nonprivbb; }; /* validating GM healthy status*/ diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a33c1c3e4a21..bcbc47a88a70 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -191,6 +191,8 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int max_fence = vgpu_fence_sz(vgpu); if (fence_num >= max_fence) { + gvt_vgpu_err("access oob fence reg %d/%d\n", + fence_num, max_fence); /* When guest access oob fence regs without access * pv_info first, we treat guest not supporting GVT, @@ -200,11 +202,6 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); - if (!vgpu->mmio.disable_warn_untrack) { - gvt_vgpu_err("found oob fence register access\n"); - gvt_vgpu_err("total fence %d, access fence %d\n", - max_fence, fence_num); - } memset(p_data, 0, bytes); return -EINVAL; } @@ -477,22 +474,28 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 reg_nonpriv = *(u32 *)p_data; + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); + u32 ring_base; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret = -EINVAL; - if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", - vgpu->id, offset, bytes); + if ((bytes != 4) || ((offset & (bytes - 1)) != 0) || ring_id < 0) { + gvt_err("vgpu(%d) ring %d Invalid FORCE_NONPRIV offset %x(%dB)\n", + vgpu->id, ring_id, offset, bytes); return ret; } - if (in_whitelist(reg_nonpriv)) { + ring_base = dev_priv->engine[ring_id]->mmio_base; + + if (in_whitelist(reg_nonpriv) || + reg_nonpriv == i915_mmio_reg_offset(RING_NOPID(ring_base))) { ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes); - } else { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", - vgpu->id, reg_nonpriv); - } - return ret; + } else + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n", + vgpu->id, reg_nonpriv, offset); + + return 0; } static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, @@ -900,11 +903,14 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, } /* - * Write request format: (command + address) occupies - * 3 bytes, followed by (len + 1) bytes of data. + * Write request format: Headr (command + address + size) occupies + * 4 bytes, followed by (len + 1) bytes of data. See details at + * intel_dp_aux_transfer(). */ - if (WARN_ON((len + 4) > AUX_BURST_SIZE)) + if ((len + 1 + 4) > AUX_BURST_SIZE) { + gvt_vgpu_err("dp_aux_header: len %d is too large\n", len); return -EINVAL; + } /* unpack data from vreg to buf */ for (t = 0; t < 4; t++) { @@ -968,8 +974,10 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, /* * Read reply format: ACK (1 byte) plus (len + 1) bytes of data. */ - if (WARN_ON((len + 2) > AUX_BURST_SIZE)) + if ((len + 2) > AUX_BURST_SIZE) { + gvt_vgpu_err("dp_aux_header: len %d is too large\n", len); return -EINVAL; + } /* read from virtual DPCD to vreg */ /* first 4 bytes: [ACK][addr][addr+1][addr+2] */ @@ -3092,9 +3100,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, */ mmio_info = find_mmio_info(gvt, offset); if (!mmio_info) { - if (!vgpu->mmio.disable_warn_untrack) - gvt_vgpu_err("untracked MMIO %08x len %d\n", - offset, bytes); + gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes); goto default_rw; } diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1466d8769ec9..df4e4a07db3d 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -123,6 +123,12 @@ static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn, return -EINVAL; } + if (!pfn_valid(pfn)) { + gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); + vfio_unpin_pages(mdev_dev(vgpu->vdev.mdev), &gfn, 1); + return -EINVAL; + } + /* Setup DMA mapping. */ page = pfn_to_page(pfn); *dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, @@ -583,6 +589,17 @@ out: return ret; } +static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) +{ + struct eventfd_ctx *trigger; + + trigger = vgpu->vdev.msi_trigger; + if (trigger) { + eventfd_ctx_put(trigger); + vgpu->vdev.msi_trigger = NULL; + } +} + static void __intel_vgpu_release(struct intel_vgpu *vgpu) { struct kvmgt_guest_info *info; @@ -607,6 +624,8 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) info = (struct kvmgt_guest_info *)vgpu->handle; kvmgt_guest_exit(info); + intel_vgpu_release_msi_eventfd_ctx(vgpu); + vgpu->vdev.kvm = NULL; vgpu->handle = 0; } @@ -987,7 +1006,8 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, return PTR_ERR(trigger); } vgpu->vdev.msi_trigger = trigger; - } + } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count) + intel_vgpu_release_msi_eventfd_ctx(vgpu); return 0; } @@ -1592,6 +1612,18 @@ static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) info = (struct kvmgt_guest_info *)handle; vgpu = info->vgpu; + /* + * When guest is poweroff, msi_trigger is set to NULL, but vgpu's + * config and mmio register isn't restored to default during guest + * poweroff. If this vgpu is still used in next vm, this vgpu's pipe + * may be enabled, then once this vgpu is active, it will get inject + * vblank interrupt request. But msi_trigger is null until msi is + * enabled by guest. so if msi_trigger is null, success is still + * returned and don't inject interrupt into guest. + */ + if (vgpu->vdev.msi_trigger == NULL) + return 0; + if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1) return 0; diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 11b71b33f1c0..e4960aff68bd 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -244,8 +244,6 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; - - vgpu->mmio.disable_warn_untrack = false; } else { #define GVT_GEN8_MMIO_RESET_OFFSET (0x44200) /* only reset the engine related, so starting with 0x44200 diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index a5bac83d53a9..0f949554d118 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -448,7 +448,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) { - u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; + u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 75b7bc7b344c..d053cbe1dc94 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -53,7 +53,6 @@ struct vgpu_sched_data { bool active; ktime_t sched_in_time; - ktime_t sched_out_time; ktime_t sched_time; ktime_t left_ts; ktime_t allocated_ts; @@ -66,17 +65,22 @@ struct gvt_sched_data { struct hrtimer timer; unsigned long period; struct list_head lru_runq_head; + ktime_t expire_time; }; -static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) +static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time) { ktime_t delta_ts; - struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; + struct vgpu_sched_data *vgpu_data; - delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; + if (!vgpu || vgpu == vgpu->gvt->idle_vgpu) + return; - vgpu_data->sched_time += delta_ts; - vgpu_data->left_ts -= delta_ts; + vgpu_data = vgpu->sched_data; + delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time); + vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts); + vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts); + vgpu_data->sched_in_time = cur_time; } #define GVT_TS_BALANCE_PERIOD_MS 100 @@ -150,11 +154,7 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) } cur_time = ktime_get(); - if (scheduler->current_vgpu) { - vgpu_data = scheduler->current_vgpu->sched_data; - vgpu_data->sched_out_time = cur_time; - vgpu_update_timeslice(scheduler->current_vgpu); - } + vgpu_update_timeslice(scheduler->current_vgpu, cur_time); vgpu_data = scheduler->next_vgpu->sched_data; vgpu_data->sched_in_time = cur_time; @@ -226,17 +226,22 @@ out: void intel_gvt_schedule(struct intel_gvt *gvt) { struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; - static uint64_t timer_check; + ktime_t cur_time; mutex_lock(&gvt->lock); + cur_time = ktime_get(); if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, (void *)&gvt->service_request)) { - if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) + if (cur_time >= sched_data->expire_time) { gvt_balance_timeslice(sched_data); + sched_data->expire_time = ktime_add_ms( + cur_time, GVT_TS_BALANCE_PERIOD_MS); + } } clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request); + vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time); tbs_sched_func(sched_data); mutex_unlock(&gvt->lock); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 638abe84857c..c2d183b91500 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -58,7 +58,7 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -97,7 +97,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, i915_mmio_reg_offset(EU_PERF_CNTL6), }; - if (!workload || !reg_state || workload->ring_id != RCS) + if (workload->ring_id != RCS) return; if (save) { @@ -130,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -283,7 +283,7 @@ static int shadow_context_status_change(struct notifier_block *nb, static void shadow_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -389,7 +389,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) * shadow_ctx pages invalid. So gvt need to pin itself. After update * the guest context, gvt can unpin the shadow_ctx safely. */ - ring = engine->context_pin(engine, shadow_ctx); + ring = intel_context_pin(shadow_ctx, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); gvt_vgpu_err("fail to pin shadow context\n"); @@ -403,7 +403,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); err_scan: @@ -437,7 +437,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -452,12 +452,6 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) int ret; list_for_each_entry(bb, &workload->shadow_bb, list) { - bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); - if (IS_ERR(bb->vma)) { - ret = PTR_ERR(bb->vma); - goto err; - } - /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va * is only updated into ring_scan_buffer, not real ring address * allocated in later copy_workload_to_ring_buffer. pls be noted @@ -469,25 +463,53 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb->bb_start_cmd_va = workload->shadow_ring_buffer_va + bb->bb_offset; - /* relocate shadow batch buffer */ - bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); - if (gmadr_bytes == 8) - bb->bb_start_cmd_va[2] = 0; + if (bb->ppgtt) { + /* for non-priv bb, scan&shadow is only for + * debugging purpose, so the content of shadow bb + * is the same as original bb. Therefore, + * here, rather than switch to shadow bb's gma + * address, we directly use original batch buffer's + * gma address, and send original bb to hardware + * directly + */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + } else { + bb->vma = i915_gem_object_ggtt_pin(bb->obj, + NULL, 0, 0, 0); + if (IS_ERR(bb->vma)) { + ret = PTR_ERR(bb->vma); + goto err; + } - /* No one is going to touch shadow bb from now on. */ - if (bb->clflush & CLFLUSH_AFTER) { - drm_clflush_virt_range(bb->va, bb->obj->base.size); - bb->clflush &= ~CLFLUSH_AFTER; - } + /* relocate shadow batch buffer */ + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); + if (gmadr_bytes == 8) + bb->bb_start_cmd_va[2] = 0; - ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); - if (ret) - goto err; + /* No one is going to touch shadow bb from now on. */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } - i915_gem_obj_finish_shmem_access(bb->obj); - bb->accessing = false; + ret = i915_gem_object_set_to_gtt_domain(bb->obj, + false); + if (ret) + goto err; - i915_vma_move_to_active(bb->vma, workload->req, 0); + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + i915_vma_move_to_active(bb->vma, workload->req, 0); + } } return 0; err: @@ -504,7 +526,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) struct intel_vgpu_submission *s = &workload->vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -666,7 +688,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ret = prepare_workload(workload); if (ret) { - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); goto out; } @@ -749,7 +771,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; @@ -876,7 +898,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, s->shadow_ctx); + intel_context_unpin(s->shadow_ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1134,9 +1156,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) if (IS_ERR(s->shadow_ctx)) return PTR_ERR(s->shadow_ctx); - if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) - s->shadow_ctx->priority = INT_MAX; - bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 486ed57a4ad1..6c644782193e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -125,6 +125,7 @@ struct intel_vgpu_shadow_bb { unsigned int clflush; bool accessing; unsigned long bb_offset; + bool ppgtt; }; #define workload_q_head(vgpu, ring_id) \ diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 82093f1e8612..1fd64202d74e 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -224,19 +224,25 @@ TRACE_EVENT(oos_sync, TP_printk("%s", __entry->buf) ); +#define GVT_CMD_STR_LEN 40 TRACE_EVENT(gvt_command, - TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, - u32 buf_type), + TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, + u32 cmd_len, u32 buf_type, u32 buf_addr_type, + void *workload, char *cmd_name), - TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type), + TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, + buf_addr_type, workload, cmd_name), TP_STRUCT__entry( __field(u8, vgpu_id) __field(u8, ring_id) __field(u32, ip_gma) __field(u32, buf_type) + __field(u32, buf_addr_type) __field(u32, cmd_len) + __field(void*, workload) __dynamic_array(u32, raw_cmd, cmd_len) + __array(char, cmd_name, GVT_CMD_STR_LEN) ), TP_fast_assign( @@ -244,17 +250,25 @@ TRACE_EVENT(gvt_command, __entry->ring_id = ring_id; __entry->ip_gma = ip_gma; __entry->buf_type = buf_type; + __entry->buf_addr_type = buf_addr_type; __entry->cmd_len = cmd_len; + __entry->workload = workload; + snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name); memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va)); ), - TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s", + TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n", __entry->vgpu_id, __entry->ring_id, + __entry->buf_addr_type, __entry->buf_type, __entry->ip_gma, - __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4)) + __entry->cmd_name, + __entry->cmd_len, + __print_array(__get_dynamic_array(raw_cmd), + __entry->cmd_len, 4), + __entry->workload) ); #define GVT_TEMP_STR_LEN 10 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2e6652a9bb9e..13e7b9e4a6e6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -377,16 +377,19 @@ static void print_batch_pool_stats(struct seq_file *m, print_file_stats(m, "[k]batch pool", stats); } -static int per_file_ctx_stats(int id, void *ptr, void *data) +static int per_file_ctx_stats(int idx, void *ptr, void *data) { struct i915_gem_context *ctx = ptr; - int n; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce = to_intel_context(ctx, engine); - for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { - if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state->obj, data); - if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->vma->obj, data); + if (ce->state) + per_file_stats(0, ce->state->obj, data); + if (ce->ring) + per_file_stats(0, ce->ring->vma->obj, data); } return 0; @@ -1340,10 +1343,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), - engine->timeline->inflight_seqnos); + intel_engine_last_submit(engine)); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -1960,7 +1962,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); seq_printf(m, "%s: ", engine->name); if (ce->state) @@ -2603,6 +2606,26 @@ static const char *psr2_live_status(u32 val) return "unknown"; } +static const char *psr_sink_status(u8 val) +{ + static const char * const sink_status[] = { + "inactive", + "transition to active, capture and display", + "active, display from RFB", + "active, capture and display on sink device timings", + "transition to inactive, capture and display, timing re-sync", + "reserved", + "reserved", + "sink internal error" + }; + + val &= DP_PSR_SINK_STATE_MASK; + if (val < ARRAY_SIZE(sink_status)) + return sink_status[val]; + + return "unknown"; +} + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2684,12 +2707,61 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n", psr2, psr2_live_status(psr2)); } + + if (dev_priv->psr.enabled) { + struct drm_dp_aux *aux = &dev_priv->psr.enabled->aux; + u8 val; + + if (drm_dp_dpcd_readb(aux, DP_PSR_STATUS, &val) == 1) + seq_printf(m, "Sink PSR status: 0x%x [%s]\n", val, + psr_sink_status(val)); + } mutex_unlock(&dev_priv->psr.lock); + if (READ_ONCE(dev_priv->psr.debug)) { + seq_printf(m, "Last attempted entry at: %lld\n", + dev_priv->psr.last_entry_attempt); + seq_printf(m, "Last exit at: %lld\n", + dev_priv->psr.last_exit); + } + + intel_runtime_pm_put(dev_priv); + return 0; +} + +static int +i915_edp_psr_debug_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + DRM_DEBUG_KMS("PSR debug %s\n", enableddisabled(val)); + + intel_runtime_pm_get(dev_priv); + intel_psr_irq_control(dev_priv, !!val); intel_runtime_pm_put(dev_priv); + return 0; } +static int +i915_edp_psr_debug_get(void *data, u64 *val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + *val = READ_ONCE(dev_priv->psr.debug); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops, + i915_edp_psr_debug_get, i915_edp_psr_debug_set, + "%llu\n"); + static int i915_sink_crc(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -3296,6 +3368,28 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) seq_printf(m, " fp0: 0x%08x\n", pll->state.hw_state.fp0); seq_printf(m, " fp1: 0x%08x\n", pll->state.hw_state.fp1); seq_printf(m, " wrpll: 0x%08x\n", pll->state.hw_state.wrpll); + seq_printf(m, " cfgcr0: 0x%08x\n", pll->state.hw_state.cfgcr0); + seq_printf(m, " cfgcr1: 0x%08x\n", pll->state.hw_state.cfgcr1); + seq_printf(m, " mg_refclkin_ctl: 0x%08x\n", + pll->state.hw_state.mg_refclkin_ctl); + seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n", + pll->state.hw_state.mg_clktop2_coreclkctl1); + seq_printf(m, " mg_clktop2_hsclkctl: 0x%08x\n", + pll->state.hw_state.mg_clktop2_hsclkctl); + seq_printf(m, " mg_pll_div0: 0x%08x\n", + pll->state.hw_state.mg_pll_div0); + seq_printf(m, " mg_pll_div1: 0x%08x\n", + pll->state.hw_state.mg_pll_div1); + seq_printf(m, " mg_pll_lf: 0x%08x\n", + pll->state.hw_state.mg_pll_lf); + seq_printf(m, " mg_pll_frac_lock: 0x%08x\n", + pll->state.hw_state.mg_pll_frac_lock); + seq_printf(m, " mg_pll_ssc: 0x%08x\n", + pll->state.hw_state.mg_pll_ssc); + seq_printf(m, " mg_pll_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_bias); + seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_tdc_coldst_bias); } drm_modeset_unlock_all(dev); @@ -3304,24 +3398,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { - int i; - int ret; - struct intel_engine_cs *engine; struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; struct i915_workarounds *workarounds = &dev_priv->workarounds; - enum intel_engine_id id; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + int i; intel_runtime_pm_get(dev_priv); seq_printf(m, "Workarounds applied: %d\n", workarounds->count); - for_each_engine(engine, dev_priv, id) - seq_printf(m, "HW whitelist count for %s: %d\n", - engine->name, workarounds->hw_whitelist_count[id]); for (i = 0; i < workarounds->count; ++i) { i915_reg_t addr; u32 mask, value, read; @@ -3337,7 +3420,6 @@ static int i915_wa_registers(struct seq_file *m, void *unused) } intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -4177,119 +4259,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, "0x%08llx\n"); static int -i915_max_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); - return 0; -} - -static int -i915_max_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go above the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->max_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops, - i915_max_freq_get, i915_max_freq_set, - "%llu\n"); - -static int -i915_min_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); - return 0; -} - -static int -i915_min_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go below the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || - val > hw_max || val > rps->max_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->min_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops, - i915_min_freq_get, i915_min_freq_set, - "%llu\n"); - -static int i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; @@ -4850,8 +4819,6 @@ static const struct i915_debugfs_files { const struct file_operations *fops; } i915_debugfs_files[] = { {"i915_wedged", &i915_wedged_fops}, - {"i915_max_freq", &i915_max_freq_fops}, - {"i915_min_freq", &i915_min_freq_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, {"i915_ring_test_irq", &i915_ring_test_irq_fops}, @@ -4874,7 +4841,8 @@ static const struct i915_debugfs_files { {"i915_guc_log_relay", &i915_guc_log_relay_fops}, {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, {"i915_ipc_status", &i915_ipc_status_fops}, - {"i915_drrs_ctl", &i915_drrs_ctl_fops} + {"i915_drrs_ctl", &i915_drrs_ctl_fops}, + {"i915_edp_psr_debug", &i915_edp_psr_debug_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c471a7528a50..9c449b8d8eab 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -101,7 +101,13 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, __builtin_return_address(0), &vaf); if (is_error && !shown_bug_once) { - dev_notice(kdev, "%s", FDO_BUG_MSG); + /* + * Ask the user to file a bug report for the error, except + * if they may have caused the bug by fiddling with unsafe + * module parameters. + */ + if (!test_taint(TAINT_USER)) + dev_notice(kdev, "%s", FDO_BUG_MSG); shown_bug_once = true; } @@ -2468,10 +2474,13 @@ static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, /* * RC6 transitioning can be delayed up to 2 msec (see * valleyview_enable_rps), use 3 msec for safety. + * + * This can fail to turn off the rc6 if the GPU is stuck after a failed + * reset and we are trying to force the machine to sleep. */ if (vlv_wait_for_pw_status(dev_priv, mask, val)) - DRM_ERROR("timeout waiting for GT wells to go %s\n", - onoff(wait_for_on)); + DRM_DEBUG_DRIVER("timeout waiting for GT wells to go %s\n", + onoff(wait_for_on)); } static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e50d9589d6e3..34c125e2d90c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,9 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" #include "i915_gpu_error.h" #include "i915_request.h" +#include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -84,8 +85,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20180413" -#define DRIVER_TIMESTAMP 1523611258 +#define DRIVER_DATE "20180514" +#define DRIVER_TIMESTAMP 1526300884 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -609,6 +610,9 @@ struct i915_psr { bool has_hw_tracking; bool psr2_enabled; u8 sink_sync_latency; + bool debug; + ktime_t last_entry_attempt; + ktime_t last_exit; void (*enable_source)(struct intel_dp *, const struct intel_crtc_state *); @@ -1069,6 +1073,7 @@ struct intel_vbt_data { } edp; struct { + bool enable; bool full_link; bool require_aux_wakeup; int idle_frames; @@ -1185,6 +1190,7 @@ struct skl_ddb_allocation { /* packed/y */ struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; struct skl_ddb_entry uv_plane[I915_MAX_PIPES][I915_MAX_PLANES]; + u8 enabled_slices; /* GEN11 has configurable 2 slices */ }; struct skl_ddb_values { @@ -1297,7 +1303,6 @@ struct i915_wa_reg { struct i915_workarounds { struct i915_wa_reg reg[I915_MAX_WA_REGS]; u32 count; - u32 hw_whitelist_count[I915_NUM_ENGINES]; }; struct i915_virtual_gpu { @@ -2056,8 +2061,11 @@ struct drm_i915_private { void (*cleanup_engine)(struct intel_engine_cs *engine); struct list_head timelines; - struct i915_gem_timeline global_timeline; + + struct list_head active_rings; + struct list_head closed_vma; u32 active_requests; + u32 request_serial; /** * Is the GPU currently considered idle, or busy executing @@ -2462,6 +2470,15 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) +#define ICL_REVID_A0 0x0 +#define ICL_REVID_A2 0x1 +#define ICL_REVID_B0 0x3 +#define ICL_REVID_B2 0x4 +#define ICL_REVID_C0 0x5 + +#define IS_ICL_REVID(p, since, until) \ + (IS_ICELAKE(p) && IS_REVID(p, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks @@ -3159,7 +3176,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int priority); + const struct i915_sched_attr *attr); #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX int __must_check @@ -3228,16 +3245,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline struct intel_timeline * -i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct i915_address_space *vm; - - vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; - return &vm->timeline.engine[engine->id]; -} - int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4c9d2a6f7d28..3704f4c0c2c9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -141,6 +141,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) { lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); if (!i915->gt.awake) return I915_EPOCH_INVALID; @@ -161,9 +162,10 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) synchronize_irq(i915->drm.irq); intel_engines_park(i915); - i915_gem_timelines_park(i915); + i915_timelines_park(i915); i915_pmu_gt_parked(i915); + i915_vma_parked(i915); i915->gt.awake = false; @@ -564,7 +566,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, int prio) +static void __fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { struct i915_request *rq; struct intel_engine_cs *engine; @@ -575,13 +578,16 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(rq, prio); + engine->schedule(rq, attr); rcu_read_unlock(); + local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, int prio) +static void fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { @@ -589,16 +595,16 @@ static void fence_set_priority(struct dma_fence *fence, int prio) int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], prio); + __fence_set_priority(array->fences[i], attr); } else { - __fence_set_priority(fence, prio); + __fence_set_priority(fence, attr); } } int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int prio) + const struct i915_sched_attr *attr) { struct dma_fence *excl; @@ -613,7 +619,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], prio); + fence_set_priority(shared[i], attr); dma_fence_put(shared[i]); } @@ -623,7 +629,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, prio); + fence_set_priority(excl, attr); dma_fence_put(excl); } return 0; @@ -2966,27 +2972,26 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) struct i915_request *request, *active = NULL; unsigned long flags; - /* We are called by the error capture and reset at a random - * point in time. In particular, note that neither is crucially - * ordered with an interrupt. After a hang, the GPU is dead and we - * assume that no more writes can happen (we waited long enough for - * all writes that were in transaction to be flushed) - adding an + /* + * We are called by the error capture, reset and to dump engine + * state at random points in time. In particular, note that neither is + * crucially ordered with an interrupt. After a hang, the GPU is dead + * and we assume that no more writes can happen (we waited long enough + * for all writes that were in transaction to be flushed) - adding an * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. + * At all other times, we must assume the GPU is still running, but + * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) { + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { if (__i915_request_completed(request, request->global_seqno)) continue; - GEM_BUG_ON(request->engine != engine); - GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &request->fence.flags)); - active = request; break; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return active; } @@ -3107,15 +3112,15 @@ static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; - struct intel_timeline *timeline; + struct i915_timeline *timeline = request->timeline; unsigned long flags; - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); + GEM_BUG_ON(timeline == &engine->timeline); - spin_lock_irqsave(&engine->timeline->lock, flags); - spin_lock(&timeline->lock); + spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); - list_for_each_entry_continue(request, &engine->timeline->requests, link) + list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->ctx == hung_ctx) skip_request(request); @@ -3123,7 +3128,7 @@ static void engine_skip_context(struct i915_request *request) skip_request(request); spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } /* Returns the request if it was guilty of the hang */ @@ -3180,11 +3185,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine, dma_fence_set_error(&request->fence, -EAGAIN); /* Rewind the engine to replay the incomplete rq */ - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); request = list_prev_entry(request, link); - if (&request->link == &engine->timeline->requests) + if (&request->link == &engine->timeline.requests) request = NULL; - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } } @@ -3232,7 +3237,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, stalled_mask & ENGINE_MASK(id)); ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); /* * Ostensibily, we always want a context loaded for powersaving, @@ -3297,10 +3302,10 @@ static void nop_complete_submit_request(struct i915_request *request) request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&request->engine->timeline->lock, flags); + spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline->lock, flags); + spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -3310,7 +3315,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) GEM_TRACE("start\n"); - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); for_each_engine(engine, i915, id) @@ -3369,10 +3374,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); intel_engine_init_global_seqno(engine, intel_engine_last_submit(engine)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); i915_gem_reset_finish_engine(engine); } @@ -3384,8 +3389,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) bool i915_gem_unset_wedged(struct drm_i915_private *i915) { - struct i915_gem_timeline *tl; - int i; + struct i915_timeline *tl; lockdep_assert_held(&i915->drm.struct_mutex); if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) @@ -3404,29 +3408,27 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * No more can be submitted until we reset the wedged bit. */ list_for_each_entry(tl, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct i915_request *rq; + struct i915_request *rq; - rq = i915_gem_active_peek(&tl->engine[i].last_request, - &i915->drm.struct_mutex); - if (!rq) - continue; + rq = i915_gem_active_peek(&tl->last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; - /* - * We can't use our normal waiter as we want to - * avoid recursively trying to handle the current - * reset. The basic dma_fence_default_wait() installs - * a callback for dma_fence_signal(), which is - * triggered by our nop handler (indirectly, the - * callback enables the signaler thread which is - * woken by the nop_submit_request() advancing the seqno - * and when the seqno passes the fence, the signaler - * then signals the fence waking us up). - */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) - return false; - } + /* + * We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + return false; } i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); @@ -3731,17 +3733,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) +static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) { - int ret, i; - - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); - if (ret) - return ret; - } - - return 0; + return i915_gem_active_wait(&tl->last_request, flags); } static int wait_for_engines(struct drm_i915_private *i915) @@ -3759,30 +3753,37 @@ static int wait_for_engines(struct drm_i915_private *i915) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { - int ret; - /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) return 0; if (flags & I915_WAIT_LOCKED) { - struct i915_gem_timeline *tl; + struct i915_timeline *tl; + int err; lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - ret = wait_for_timeline(tl, flags); - if (ret) - return ret; + err = wait_for_timeline(tl, flags); + if (err) + return err; } i915_retire_requests(i915); - ret = wait_for_engines(i915); + return wait_for_engines(i915); } else { - ret = wait_for_timeline(&i915->gt.global_timeline, flags); - } + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - return ret; + for_each_engine(engine, i915, id) { + err = wait_for_timeline(&engine->timeline, flags); + if (err) + return err; + } + + return 0; + } } static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) @@ -4796,7 +4797,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, &obj->vma_list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_close(vma); + i915_vma_destroy(vma); } GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); @@ -4951,7 +4952,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != kernel_context); } } @@ -5289,7 +5290,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { struct i915_vma *state; - state = ctx->engine[id].state; + state = to_intel_context(ctx, engine)->state; if (!state) continue; @@ -5597,12 +5598,9 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); - err = i915_gem_timeline_init__global(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - if (err) - goto err_priorities; + INIT_LIST_HEAD(&dev_priv->gt.active_rings); + INIT_LIST_HEAD(&dev_priv->gt.closed_vma); i915_gem_init__mm(dev_priv); @@ -5623,8 +5621,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) return 0; -err_priorities: - kmem_cache_destroy(dev_priv->priorities); err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: @@ -5645,11 +5641,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - - mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_timeline_fini(&dev_priv->gt.global_timeline); WARN_ON(!list_empty(&dev_priv->gt.timelines)); - mutex_unlock(&dev_priv->drm.struct_mutex); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index deaf78d2ae8b..525920404ede 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -30,6 +30,9 @@ struct drm_i915_private; #ifdef CONFIG_DRM_I915_DEBUG_GEM + +#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) + #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ pr_err("%s:%d GEM_BUG_ON(%s)\n", \ __func__, __LINE__, __stringify(condition)); \ @@ -45,6 +48,9 @@ struct drm_i915_private; #define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) #else + +#define GEM_SHOW_DEBUG() (0) + #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9b3834a846e8..33f8a4b3c981 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -117,15 +117,15 @@ static void lut_close(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - int i; + unsigned int n; lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); i915_ppgtt_put(ctx->ppgtt); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_context *ce = &ctx->engine[i]; + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; if (!ce->state) continue; @@ -281,7 +281,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, kref_init(&ctx->ref); list_add_tail(&ctx->link, &dev_priv->contexts.list); ctx->i915 = dev_priv; - ctx->priority = I915_PRIORITY_NORMAL; + ctx->sched.priority = I915_PRIORITY_NORMAL; INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -431,7 +431,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); - ctx->priority = prio; + ctx->sched.priority = prio; ctx->ring_size = PAGE_SIZE; GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -521,7 +521,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) if (!engine->last_retired_context) continue; - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); engine->last_retired_context = NULL; } } @@ -577,19 +577,29 @@ void i915_gem_context_close(struct drm_file *file) idr_destroy(&file_priv->context_idr); } -static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +static struct i915_request * +last_request_on_engine(struct i915_timeline *timeline, + struct intel_engine_cs *engine) { - struct i915_gem_timeline *timeline; + struct i915_request *rq; - list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { - struct intel_timeline *tl; + if (timeline == &engine->timeline) + return NULL; - if (timeline == &engine->i915->gt.global_timeline) - continue; + rq = i915_gem_active_raw(&timeline->last_request, + &engine->i915->drm.struct_mutex); + if (rq && rq->engine == engine) + return rq; + + return NULL; +} - tl = &timeline->engine[engine->id]; - if (i915_gem_active_peek(&tl->last_request, - &engine->i915->drm.struct_mutex)) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_timeline *timeline; + + list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { + if (last_request_on_engine(timeline, engine)) return false; } @@ -599,7 +609,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; enum intel_engine_id id; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -619,11 +629,8 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, - &dev_priv->drm.struct_mutex); + prev = last_request_on_engine(timeline, engine); if (prev) i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, @@ -753,7 +760,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_bannable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: - args->value = ctx->priority; + args->value = ctx->sched.priority; break; default: ret = -EINVAL; @@ -826,7 +833,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, !capable(CAP_SYS_NICE)) ret = -EPERM; else - ctx->priority = priority; + ctx->sched.priority = priority; } break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 7854262ddfd9..ace3b129c189 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -137,18 +137,7 @@ struct i915_gem_context { */ u32 user_handle; - /** - * @priority: execution and service priority - * - * All clients are equal, but some are more equal than others! - * - * Requests from a context with a greater (more positive) value of - * @priority will be executed before those with a lower @priority - * value, forming a simple QoS. - * - * The &drm_i915_private.kernel_context is assigned the lowest priority. - */ - int priority; + struct i915_sched_attr sched; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; @@ -160,7 +149,7 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - } engine[I915_NUM_ENGINES]; + } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; @@ -267,6 +256,34 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) return !ctx->file_priv; } +static inline struct intel_context * +to_intel_context(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + return &ctx->__engine[engine->id]; +} + +static inline struct intel_ring * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + return engine->context_pin(engine, ctx); +} + +static inline void __intel_context_pin(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + struct intel_context *ce = to_intel_context(ctx, engine); + + GEM_BUG_ON(!ce->pin_count); + ce->pin_count++; +} + +static inline void intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + engine->context_unpin(engine, ctx); +} + /* i915_gem_context.c */ int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c74f5df3fb5a..f627a8c47c58 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -762,7 +762,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) } /* transfer ref to ctx */ - vma->open_count++; + if (!vma->open_count++) + i915_vma_reopen(vma); list_add(&lut->obj_link, &obj->lut_list); list_add(&lut->ctx_link, &eb->ctx->handles_list); lut->ctx = eb->ctx; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c8eaeacd3471..fcf3a973925b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -110,7 +110,8 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma); static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) { - /* Note that as an uncached mmio write, this should flush the + /* + * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. */ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); @@ -1161,6 +1162,27 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, vaddr[idx.pde] |= GEN8_PDE_IPS_64K; kunmap_atomic(vaddr); page_size = I915_GTT_PAGE_SIZE_64K; + + /* + * We write all 4K page entries, even when using 64K + * pages. In order to verify that the HW isn't cheating + * by using the 4K PTE instead of the 64K PTE, we want + * to remove all the surplus entries. If the HW skipped + * the 64K PTE, it will read/write into the scratch page + * instead - which we detect as missing results during + * selftests. + */ + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + u16 i; + + encode = pte_encode | vma->vm->scratch_page.daddr; + vaddr = kmap_atomic_px(pd->page_table[idx.pde]); + + for (i = 1; i < index; i += 16) + memset64(vaddr + i, encode, 15); + + kunmap_atomic(vaddr); + } } vma->page_sizes.gtt |= page_size; @@ -2111,8 +2133,6 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv, const char *name) { - i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; @@ -2129,7 +2149,6 @@ static void i915_address_space_fini(struct i915_address_space *vm) if (pagevec_count(&vm->free_pages)) vm_free_pages_release(vm, true); - i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); } @@ -2140,15 +2159,15 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv)) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + else if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); /* * To support 64K PTEs we need to first enable the use of the @@ -2222,6 +2241,12 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, void i915_ppgtt_close(struct i915_address_space *vm) { + GEM_BUG_ON(vm->closed); + vm->closed = true; +} + +static void ppgtt_destroy_vma(struct i915_address_space *vm) +{ struct list_head *phases[] = { &vm->active_list, &vm->inactive_list, @@ -2229,15 +2254,12 @@ void i915_ppgtt_close(struct i915_address_space *vm) NULL, }, **phase; - GEM_BUG_ON(vm->closed); vm->closed = true; - for (phase = phases; *phase; phase++) { struct i915_vma *vma, *vn; list_for_each_entry_safe(vma, vn, *phase, vm_link) - if (!i915_vma_is_closed(vma)) - i915_vma_close(vma); + i915_vma_destroy(vma); } } @@ -2248,7 +2270,8 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); - /* vmas should already be unbound and destroyed */ + ppgtt_destroy_vma(&ppgtt->base); + GEM_BUG_ON(!list_empty(&ppgtt->base.active_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list)); @@ -2417,11 +2440,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, for_each_sgt_dma(addr, sgt_iter, vma->pages) gen8_set_pte(gtt_entries++, pte_encode | addr); - wmb(); - - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -2459,11 +2480,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, dma_addr_t addr; for_each_sgt_dma(addr, iter, vma->pages) iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); - wmb(); - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -3325,14 +3345,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - if (INTEL_GEN(dev_priv) >= 9) { - size = gen8_get_total_gtt_size(snb_gmch_ctl); - } else if (IS_CHERRYVIEW(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) size = chv_get_total_gtt_size(snb_gmch_ctl); - } else { + else size = gen8_get_total_gtt_size(snb_gmch_ctl); - } ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; ggtt->base.cleanup = gen6_gmch_remove; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6efc017e8bb3..aec4f73574f4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,10 +38,9 @@ #include <linux/mm.h> #include <linux/pagevec.h> -#include "i915_gem_timeline.h" - #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) #define I915_GTT_PAGE_SIZE_64K BIT(16) @@ -257,7 +256,6 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; - struct i915_gem_timeline timeline; struct drm_i915_private *i915; struct device *dma; /* Every address space belongs to a struct file - except for the global @@ -344,6 +342,7 @@ struct i915_address_space { void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); + I915_SELFTEST_DECLARE(bool scrub_64K); }; #define i915_is_ggtt(V) (!(V)->file) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index af915d041281..ad949cc30928 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -51,6 +51,10 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, if (!drm_mm_initialized(&dev_priv->mm.stolen)) return -ENODEV; + /* WaSkipStolenMemoryFirstPage:bdw+ */ + if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + start = 4096; + mutex_lock(&dev_priv->mm.stolen_lock); ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size, alignment, 0, @@ -343,7 +347,6 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; - resource_size_t stolen_usable_start; mutex_init(&dev_priv->mm.stolen_lock); @@ -435,17 +438,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) (u64)resource_size(&dev_priv->dsm) >> 10, ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); - stolen_usable_start = 0; - /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8) - stolen_usable_start = 4096; - dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; + resource_size(&dev_priv->dsm) - reserved_total; /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, - dev_priv->stolen_usable_size); + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c deleted file mode 100644 index e9fd87604067..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "i915_syncmap.h" - -static void __intel_timeline_init(struct intel_timeline *tl, - struct i915_gem_timeline *parent, - u64 context, - struct lock_class_key *lockclass, - const char *lockname) -{ - tl->fence_context = context; - tl->common = parent; - spin_lock_init(&tl->lock); - lockdep_set_class_and_name(&tl->lock, lockclass, lockname); - init_request_active(&tl->last_request, NULL); - INIT_LIST_HEAD(&tl->requests); - i915_syncmap_init(&tl->sync); -} - -static void __intel_timeline_fini(struct intel_timeline *tl) -{ - GEM_BUG_ON(!list_empty(&tl->requests)); - - i915_syncmap_free(&tl->sync); -} - -static int __i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name, - struct lock_class_key *lockclass, - const char *lockname) -{ - unsigned int i; - u64 fences; - - lockdep_assert_held(&i915->drm.struct_mutex); - - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); - - timeline->i915 = i915; - timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); - if (!timeline->name) - return -ENOMEM; - - list_add(&timeline->link, &i915->gt.timelines); - - /* Called during early_init before we know how many engines there are */ - fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_init(&timeline->engine[i], - timeline, fences++, - lockclass, lockname); - - return 0; -} - -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, timeline, name, - &class, "&timeline->lock"); -} - -int i915_gem_timeline_init__global(struct drm_i915_private *i915) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, - &i915->gt.global_timeline, - "[execution]", - &class, "&global_timeline->lock"); -} - -/** - * i915_gem_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_gem_timelines_park(struct drm_i915_private *i915) -{ - struct i915_gem_timeline *timeline; - int i; - - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&tl->sync); - } - } -} - -void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) -{ - int i; - - lockdep_assert_held(&timeline->i915->drm.struct_mutex); - - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_fini(&timeline->engine[i]); - - list_del(&timeline->link); - kfree(timeline->name); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_gem_timeline.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index d596a8302ca3..854bd51b9478 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + if (!args->user_size) + return -EINVAL; + if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index effaf982b19b..df234dc23274 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -404,16 +404,17 @@ static const char *bannable(const struct drm_i915_error_context *ctx) static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - const struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq, + const unsigned long epoch) { if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->priority, - jiffies_to_msecs(jiffies - erq->jiffies), - erq->head, erq->tail); + erq->context, erq->seqno, erq->sched_attr.priority, + jiffies_to_msecs(erq->jiffies - epoch), + erq->start, erq->head, erq->tail); } static void error_print_context(struct drm_i915_error_state_buf *m, @@ -422,12 +423,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m, { err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->priority, ctx->ban_score, bannable(ctx), + ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee, + const unsigned long epoch) { int n; @@ -497,14 +499,15 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); err_printf(m, " hangcheck action: %s\n", hangcheck_action_to_str(ee->hangcheck_action)); - err_printf(m, " hangcheck action timestamp: %lu, %u ms ago\n", + err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", + jiffies_to_msecs(ee->hangcheck_timestamp - epoch), ee->hangcheck_timestamp, - jiffies_to_msecs(jiffies - ee->hangcheck_timestamp)); + ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { err_printf(m, " ELSP[%d]:", n); - error_print_request(m, " ", &ee->execlist[n]); + error_print_request(m, " ", &ee->execlist[n], epoch); } error_print_context(m, " Active context: ", &ee->context); @@ -650,6 +653,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); + err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); + err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", + error->capture, + jiffies_to_msecs(jiffies - error->capture), + jiffies_to_msecs(error->capture - error->epoch)); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && @@ -710,7 +718,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].engine_id != -1) - error_print_engine(m, &error->engine[i]); + error_print_engine(m, &error->engine[i], error->epoch); } for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { @@ -769,7 +777,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, dev_priv->engine[i]->name, ee->num_requests); for (j = 0; j < ee->num_requests; j++) - error_print_request(m, " ", &ee->requests[j]); + error_print_request(m, " ", + &ee->requests[j], + error->epoch); } if (IS_ERR(ee->waiters)) { @@ -1278,10 +1288,11 @@ static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->priority = request->priotree.priority; + erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; + erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; @@ -1299,7 +1310,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) + list_for_each_entry_from(request, &engine->timeline.requests, link) count++; if (!count) return; @@ -1312,7 +1323,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) { + list_for_each_entry_from(request, &engine->timeline.requests, link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in @@ -1372,7 +1383,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; - e->priority = ctx->priority; + e->sched_attr = ctx->sched; e->ban_score = atomic_read(&ctx->ban_score); e->bannable = i915_gem_context_is_bannable(ctx); e->guilty = atomic_read(&ctx->guilty_count); @@ -1472,7 +1483,8 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - request->ctx->engine[i].state); + to_intel_context(request->ctx, + engine)->state); error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1735,6 +1747,22 @@ static void capture_params(struct i915_gpu_state *error) #undef DUP } +static unsigned long capture_find_epoch(const struct i915_gpu_state *error) +{ + unsigned long epoch = error->capture; + int i; + + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + const struct drm_i915_error_engine *ee = &error->engine[i]; + + if (ee->hangcheck_stalled && + time_before(ee->hangcheck_timestamp, epoch)) + epoch = ee->hangcheck_timestamp; + } + + return epoch; +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1743,6 +1771,7 @@ static int capture(void *data) error->boottime = ktime_get_boottime(); error->uptime = ktime_sub(ktime_get(), error->i915->gt.last_init_time); + error->capture = jiffies; capture_params(error); capture_gen_state(error); @@ -1756,6 +1785,8 @@ static int capture(void *data) error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); + error->epoch = capture_find_epoch(error); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index c05b6034d718..dac0f8c4c1cf 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -20,6 +20,7 @@ #include "i915_gem.h" #include "i915_gem_gtt.h" #include "i915_params.h" +#include "i915_scheduler.h" struct drm_i915_private; struct intel_overlay_error_state; @@ -30,6 +31,8 @@ struct i915_gpu_state { ktime_t time; ktime_t boottime; ktime_t uptime; + unsigned long capture; + unsigned long epoch; struct drm_i915_private *i915; @@ -122,11 +125,11 @@ struct i915_gpu_state { pid_t pid; u32 handle; u32 hw_id; - int priority; int ban_score; int active; int guilty; bool bannable; + struct i915_sched_attr sched_attr; } context; struct drm_i915_error_object { @@ -147,11 +150,12 @@ struct i915_gpu_state { long jiffies; pid_t pid; u32 context; - int priority; int ban_score; u32 seqno; + u32 start; u32 head; u32 tail; + struct i915_sched_attr sched_attr; } *requests, execlist[EXECLIST_MAX_PORTS]; unsigned int num_ports; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b03d18561b55..f9bc3aaa90d0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -247,9 +247,9 @@ static u32 gen11_gt_engine_identity(struct drm_i915_private * const i915, const unsigned int bank, const unsigned int bit); -static bool gen11_reset_one_iir(struct drm_i915_private * const i915, - const unsigned int bank, - const unsigned int bit) +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit) { void __iomem * const regs = i915->regs; u32 dw; @@ -2464,6 +2464,13 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, if (de_iir & DE_ERR_INT_IVB) ivb_err_int_handler(dev_priv); + if (de_iir & DE_EDP_PSR_INT_HSW) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + } + if (de_iir & DE_AUX_CHANNEL_A_IVB) dp_aux_irq_handler(dev_priv); @@ -2593,11 +2600,25 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (master_ctl & GEN8_DE_MISC_IRQ) { iir = I915_READ(GEN8_DE_MISC_IIR); if (iir) { + bool found = false; + I915_WRITE(GEN8_DE_MISC_IIR, iir); ret = IRQ_HANDLED; - if (iir & GEN8_DE_MISC_GSE) + + if (iir & GEN8_DE_MISC_GSE) { intel_opregion_asle_intr(dev_priv); - else + found = true; + } + + if (iir & GEN8_DE_EDP_PSR) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + found = true; + } + + if (!found) DRM_ERROR("Unexpected DE Misc interrupt\n"); } else @@ -3348,6 +3369,11 @@ static void ironlake_irq_reset(struct drm_device *dev) if (IS_GEN7(dev_priv)) I915_WRITE(GEN7_ERR_INT, 0xffffffff); + if (IS_HASWELL(dev_priv)) { + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + } + gen5_gt_irq_reset(dev_priv); ibx_irq_reset(dev_priv); @@ -3386,6 +3412,9 @@ static void gen8_irq_reset(struct drm_device *dev) gen8_gt_irq_reset(dev_priv); + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) @@ -3762,6 +3791,12 @@ static int ironlake_irq_postinstall(struct drm_device *dev) DE_DP_A_HOTPLUG); } + if (IS_HASWELL(dev_priv)) { + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + display_mask |= DE_EDP_PSR_INT_HSW; + } + dev_priv->irq_mask = ~display_mask; ibx_irq_pre_postinstall(dev); @@ -3872,7 +3907,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) uint32_t de_pipe_enables; u32 de_port_masked = GEN8_AUX_CHANNEL_A; u32 de_port_enables; - u32 de_misc_masked = GEN8_DE_MISC_GSE; + u32 de_misc_masked = GEN8_DE_MISC_GSE | GEN8_DE_EDP_PSR; enum pipe pipe; if (INTEL_GEN(dev_priv) >= 9) { @@ -3897,6 +3932,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 08108ce5be21..66ea3552c63e 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -164,6 +164,9 @@ i915_param_named_unsafe(guc_firmware_path, charp, 0400, i915_param_named_unsafe(huc_firmware_path, charp, 0400, "HuC firmware path to use instead of the default one"); +i915_param_named_unsafe(dmc_firmware_path, charp, 0400, + "DMC firmware path to use instead of the default one"); + i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c96360398072..6684025b7af8 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -51,6 +51,7 @@ struct drm_printer; param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ + param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, 0) \ param(int, edp_vswing, 0) \ param(int, reset, 2) \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index bfc906cd4e5e..019bd2d073ad 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1234,7 +1234,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ring = engine->context_pin(engine, stream->ctx); + ring = intel_context_pin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -1246,7 +1246,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * with gen8+ and execlists */ dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(stream->ctx->engine[engine->id].state); + i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); } return 0; @@ -1271,7 +1271,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - engine->context_unpin(engine, stream->ctx); + intel_context_unpin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1695,7 +1695,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr const struct i915_oa_config *oa_config) { struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; struct i915_request *rq; int ret; @@ -1716,15 +1716,11 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, + prev = i915_gem_active_raw(&timeline->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - GFP_KERNEL); + i915_request_await_dma_fence(rq, &prev->fence); } i915_request_add(rq); @@ -1759,6 +1755,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { + struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_context *ctx; int ret; unsigned int wait_flags = I915_WAIT_LOCKED; @@ -1789,7 +1786,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = &ctx->engine[RCS]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 *regs; /* OA settings will be set upon first use */ @@ -1963,11 +1960,19 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN7_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } static void gen8_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN8_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } /** diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fb106026a1f4..f11bb213ec07 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3840,6 +3840,7 @@ enum { #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define MSCUNIT_CLKGATE_DIS (1 << 10) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) @@ -3847,6 +3848,9 @@ enum { #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) +#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) +#define CGPSF_CLKGATE_DIS (1 << 3) + /* * Display engine regs */ @@ -4026,6 +4030,13 @@ enum { #define EDP_PSR_TP1_TIME_0us (3<<4) #define EDP_PSR_IDLE_FRAME_SHIFT 0 +/* Bspec claims those aren't shifted but stay at 0x64800 */ +#define EDP_PSR_IMR _MMIO(0x64834) +#define EDP_PSR_IIR _MMIO(0x64838) +#define EDP_PSR_ERROR(trans) (1 << (((trans) * 8 + 10) & 31)) +#define EDP_PSR_POST_EXIT(trans) (1 << (((trans) * 8 + 9) & 31)) +#define EDP_PSR_PRE_ENTRY(trans) (1 << (((trans) * 8 + 8) & 31)) + #define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) #define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) #define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) @@ -4088,6 +4099,29 @@ enum { #define EDP_PSR2_IDLE_FRAME_MASK 0xf #define EDP_PSR2_IDLE_FRAME_SHIFT 0 +#define _PSR_EVENT_TRANS_A 0x60848 +#define _PSR_EVENT_TRANS_B 0x61848 +#define _PSR_EVENT_TRANS_C 0x62848 +#define _PSR_EVENT_TRANS_D 0x63848 +#define _PSR_EVENT_TRANS_EDP 0x6F848 +#define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) +#define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) +#define PSR_EVENT_PSR2_DISABLED (1 << 16) +#define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) +#define PSR_EVENT_SU_CRC_FIFO_UNDERRUN (1 << 14) +#define PSR_EVENT_GRAPHICS_RESET (1 << 12) +#define PSR_EVENT_PCH_INTERRUPT (1 << 11) +#define PSR_EVENT_MEMORY_UP (1 << 10) +#define PSR_EVENT_FRONT_BUFFER_MODIFY (1 << 9) +#define PSR_EVENT_WD_TIMER_EXPIRE (1 << 8) +#define PSR_EVENT_PIPE_REGISTERS_UPDATE (1 << 6) +#define PSR_EVENT_REGISTER_UPDATE (1 << 5) +#define PSR_EVENT_HDCP_ENABLE (1 << 4) +#define PSR_EVENT_KVMR_SESSION_ENABLE (1 << 3) +#define PSR_EVENT_VBI_ENABLE (1 << 2) +#define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) +#define PSR_EVENT_PSR_DISABLE (1 << 0) + #define EDP_PSR2_STATUS _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 @@ -6377,9 +6411,9 @@ enum { #define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ #define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ -#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-ICL */ #define PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE (1 << 28) -#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) /* Pre-ICL */ #define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) #define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 (1 << 17) #define PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709 (2 << 17) @@ -6474,6 +6508,9 @@ enum { #define _PLANE_BUF_CFG_1_B 0x7127c #define _PLANE_BUF_CFG_2_B 0x7137c +#define SKL_DDB_ENTRY_MASK 0x3FF +#define ICL_DDB_ENTRY_MASK 0x7FF +#define DDB_ENTRY_END_SHIFT 16 #define _PLANE_BUF_CFG_1(pipe) \ _PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B) #define _PLANE_BUF_CFG_2(pipe) \ @@ -6837,6 +6874,7 @@ enum { #define DE_PCH_EVENT_IVB (1<<28) #define DE_DP_A_HOTPLUG_IVB (1<<27) #define DE_AUX_CHANNEL_A_IVB (1<<26) +#define DE_EDP_PSR_INT_HSW (1<<19) #define DE_SPRITEC_FLIP_DONE_IVB (1<<14) #define DE_PLANEC_FLIP_DONE_IVB (1<<13) #define DE_PIPEC_VBLANK_IVB (1<<10) @@ -6961,6 +6999,7 @@ enum { #define GEN8_DE_MISC_IIR _MMIO(0x44468) #define GEN8_DE_MISC_IER _MMIO(0x4446c) #define GEN8_DE_MISC_GSE (1 << 27) +#define GEN8_DE_EDP_PSR (1 << 19) #define GEN8_PCU_ISR _MMIO(0x444e0) #define GEN8_PCU_IMR _MMIO(0x444e4) @@ -7191,18 +7230,22 @@ enum { #define GEN7_L3CNTLREG3 _MMIO(0xB024) #define GEN7_L3_CHICKEN_MODE_REGISTER _MMIO(0xB030) -#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xB114) +#define GEN11_I2M_WRITE_DISABLE (1 << 28) #define GEN7_L3SQCREG4 _MMIO(0xb034) #define L3SQ_URB_READ_CAM_MATCH_DISABLE (1<<27) #define GEN8_L3SQCREG4 _MMIO(0xb118) -#define GEN8_LQSC_RO_PERF_DIS (1<<27) -#define GEN8_LQSC_FLUSH_COHERENT_LINES (1<<21) +#define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) +#define GEN8_LQSC_RO_PERF_DIS (1 << 27) +#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) #define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) +#define ICL_HDC_MODE _MMIO(0xE5F4) #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) @@ -7216,6 +7259,9 @@ enum { #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) +#define GEN9_WM_CHICKEN3 _MMIO(0x5588) +#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) @@ -8214,8 +8260,30 @@ enum { #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1<<4) #define GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE (1<<6) -#define GEN8_GARBCNTL _MMIO(0xB004) -#define GEN9_GAPS_TSV_CREDIT_DISABLE (1<<7) +#define GEN8_GARBCNTL _MMIO(0xB004) +#define GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7) +#define GEN11_ARBITRATION_PRIO_ORDER_MASK (0x3f << 22) +#define GEN11_HASH_CTRL_EXCL_MASK (0x7f << 0) +#define GEN11_HASH_CTRL_EXCL_BIT0 (1 << 0) + +#define GEN11_GLBLINVL _MMIO(0xB404) +#define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5) +#define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5) + +#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550) +#define DFR_DISABLE (1 << 9) + +#define GEN11_GACB_PERF_CTRL _MMIO(0x4B80) +#define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0) +#define GEN11_HASH_CTRL_BIT0 (1 << 0) +#define GEN11_HASH_CTRL_BIT4 (1 << 12) + +#define GEN11_LSN_UNSLCVC _MMIO(0xB43C) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) + +#define GAMW_ECO_DEV_RW_IA_REG _MMIO(0x4080) +#define GAMW_ECO_DEV_CTX_RELOAD_DISABLE (1 << 7) /* IVYBRIDGE DPF */ #define GEN7_L3CDERRST1(slice) _MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */ @@ -8724,6 +8792,12 @@ enum skl_power_gate { #define PORT_CLK_SEL_NONE (7<<29) #define PORT_CLK_SEL_MASK (7<<29) +/* On ICL+ this is the same as PORT_CLK_SEL, but all bits change. */ +#define DDI_CLK_SEL(port) PORT_CLK_SEL(port) +#define DDI_CLK_SEL_NONE (0x0 << 28) +#define DDI_CLK_SEL_MG (0x8 << 28) +#define DDI_CLK_SEL_MASK (0xF << 28) + /* Transcoder clock selection */ #define _TRANS_CLK_SEL_A 0x46140 #define _TRANS_CLK_SEL_B 0x46144 @@ -8854,6 +8928,7 @@ enum skl_power_gate { * CNL Clocks */ #define DPCLKA_CFGCR0 _MMIO(0x6C200) +#define DPCLKA_CFGCR0_ICL _MMIO(0x164280) #define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ (port)+10)) #define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ @@ -8870,10 +8945,141 @@ enum skl_power_gate { #define PLL_POWER_STATE (1 << 26) #define CNL_DPLL_ENABLE(pll) _MMIO_PLL(pll, DPLL0_ENABLE, DPLL1_ENABLE) +#define _MG_PLL1_ENABLE 0x46030 +#define _MG_PLL2_ENABLE 0x46034 +#define _MG_PLL3_ENABLE 0x46038 +#define _MG_PLL4_ENABLE 0x4603C +/* Bits are the same as DPLL0_ENABLE */ +#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ + _MG_PLL2_ENABLE) + +#define _MG_REFCLKIN_CTL_PORT1 0x16892C +#define _MG_REFCLKIN_CTL_PORT2 0x16992C +#define _MG_REFCLKIN_CTL_PORT3 0x16A92C +#define _MG_REFCLKIN_CTL_PORT4 0x16B92C +#define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) +#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_REFCLKIN_CTL_PORT1, \ + _MG_REFCLKIN_CTL_PORT2) + +#define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT3 0x16A8D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT4 0x16B8D8 +#define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO(x) ((x) << 16) +#define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) +#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ + _MG_CLKTOP2_CORECLKCTL1_PORT2) + +#define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT3 0x16A8D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT4 0x16B8D4 +#define MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(x) ((x) << 16) +#define MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(x) ((x) << 14) +#define MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(x) ((x) << 12) +#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) +#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_HSCLKCTL_PORT1, \ + _MG_CLKTOP2_HSCLKCTL_PORT2) + +#define _MG_PLL_DIV0_PORT1 0x168A00 +#define _MG_PLL_DIV0_PORT2 0x169A00 +#define _MG_PLL_DIV0_PORT3 0x16AA00 +#define _MG_PLL_DIV0_PORT4 0x16BA00 +#define MG_PLL_DIV0_FRACNEN_H (1 << 30) +#define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) +#define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) +#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ + _MG_PLL_DIV0_PORT2) + +#define _MG_PLL_DIV1_PORT1 0x168A04 +#define _MG_PLL_DIV1_PORT2 0x169A04 +#define _MG_PLL_DIV1_PORT3 0x16AA04 +#define _MG_PLL_DIV1_PORT4 0x16BA04 +#define MG_PLL_DIV1_IREF_NDIVRATIO(x) ((x) << 16) +#define MG_PLL_DIV1_DITHER_DIV_1 (0 << 12) +#define MG_PLL_DIV1_DITHER_DIV_2 (1 << 12) +#define MG_PLL_DIV1_DITHER_DIV_4 (2 << 12) +#define MG_PLL_DIV1_DITHER_DIV_8 (3 << 12) +#define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) +#define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) +#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ + _MG_PLL_DIV1_PORT2) + +#define _MG_PLL_LF_PORT1 0x168A08 +#define _MG_PLL_LF_PORT2 0x169A08 +#define _MG_PLL_LF_PORT3 0x16AA08 +#define _MG_PLL_LF_PORT4 0x16BA08 +#define MG_PLL_LF_TDCTARGETCNT(x) ((x) << 24) +#define MG_PLL_LF_AFCCNTSEL_256 (0 << 20) +#define MG_PLL_LF_AFCCNTSEL_512 (1 << 20) +#define MG_PLL_LF_GAINCTRL(x) ((x) << 16) +#define MG_PLL_LF_INT_COEFF(x) ((x) << 8) +#define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) +#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ + _MG_PLL_LF_PORT2) + +#define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C +#define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C +#define _MG_PLL_FRAC_LOCK_PORT3 0x16AA0C +#define _MG_PLL_FRAC_LOCK_PORT4 0x16BA0C +#define MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 (1 << 18) +#define MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 (1 << 16) +#define MG_PLL_FRAC_LOCK_LOCKTHRESH(x) ((x) << 11) +#define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) +#define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) +#define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) +#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_FRAC_LOCK_PORT1, \ + _MG_PLL_FRAC_LOCK_PORT2) + +#define _MG_PLL_SSC_PORT1 0x168A10 +#define _MG_PLL_SSC_PORT2 0x169A10 +#define _MG_PLL_SSC_PORT3 0x16AA10 +#define _MG_PLL_SSC_PORT4 0x16BA10 +#define MG_PLL_SSC_EN (1 << 28) +#define MG_PLL_SSC_TYPE(x) ((x) << 26) +#define MG_PLL_SSC_STEPLENGTH(x) ((x) << 16) +#define MG_PLL_SSC_STEPNUM(x) ((x) << 10) +#define MG_PLL_SSC_FLLEN (1 << 9) +#define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) +#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ + _MG_PLL_SSC_PORT2) + +#define _MG_PLL_BIAS_PORT1 0x168A14 +#define _MG_PLL_BIAS_PORT2 0x169A14 +#define _MG_PLL_BIAS_PORT3 0x16AA14 +#define _MG_PLL_BIAS_PORT4 0x16BA14 +#define MG_PLL_BIAS_BIAS_GB_SEL(x) ((x) << 30) +#define MG_PLL_BIAS_INIT_DCOAMP(x) ((x) << 24) +#define MG_PLL_BIAS_BIAS_BONUS(x) ((x) << 16) +#define MG_PLL_BIAS_BIASCAL_EN (1 << 15) +#define MG_PLL_BIAS_CTRIM(x) ((x) << 8) +#define MG_PLL_BIAS_VREF_RDAC(x) ((x) << 5) +#define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) +#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ + _MG_PLL_BIAS_PORT2) + +#define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT3 0x16AA18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT4 0x16BA18 +#define MG_PLL_TDC_COLDST_IREFINT_EN (1 << 27) +#define MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(x) ((x) << 17) +#define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) +#define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) +#define MG_PLL_TDC_TDCSEL(x) ((x) << 0) +#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ + _MG_PLL_TDC_COLDST_BIAS_PORT2) + #define _CNL_DPLL0_CFGCR0 0x6C000 #define _CNL_DPLL1_CFGCR0 0x6C080 #define DPLL_CFGCR0_HDMI_MODE (1 << 30) #define DPLL_CFGCR0_SSC_ENABLE (1 << 29) +#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) #define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) #define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) #define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) @@ -8907,8 +9113,19 @@ enum skl_power_gate { #define DPLL_CFGCR1_PDIV_5 (4 << 2) #define DPLL_CFGCR1_PDIV_7 (8 << 2) #define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) +#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) #define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) +#define _ICL_DPLL0_CFGCR0 0x164000 +#define _ICL_DPLL1_CFGCR0 0x164080 +#define ICL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \ + _ICL_DPLL1_CFGCR0) + +#define _ICL_DPLL0_CFGCR1 0x164004 +#define _ICL_DPLL1_CFGCR1 0x164084 +#define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ + _ICL_DPLL1_CFGCR1) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ @@ -9680,6 +9897,13 @@ enum skl_power_gate { #define GEN9_MFX1_MOCS(i) _MMIO(0xca00 + (i) * 4) /* Media 1 MOCS registers */ #define GEN9_VEBOX_MOCS(i) _MMIO(0xcb00 + (i) * 4) /* Video MOCS registers */ #define GEN9_BLT_MOCS(i) _MMIO(0xcc00 + (i) * 4) /* Blitter MOCS registers */ +/* Media decoder 2 MOCS registers */ +#define GEN11_MFX2_MOCS(i) _MMIO(0x10000 + (i) * 4) + +#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) +#define PMFLUSHDONE_LNICRSDROP (1 << 20) +#define PMFLUSH_GAPL3UNBLOCK (1 << 21) +#define PMFLUSHDONE_LNEBLK (1 << 22) /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9ca9c24b4421..8928894dd9c7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -49,7 +49,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->timeline->common->name; + return to_request(fence)->timeline->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -125,22 +125,22 @@ i915_dependency_free(struct drm_i915_private *i915, } static void -__i915_priotree_add_dependency(struct i915_priotree *pt, - struct i915_priotree *signal, - struct i915_dependency *dep, - unsigned long flags) +__i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) { INIT_LIST_HEAD(&dep->dfs_link); list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &pt->signalers_list); + list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; dep->flags = flags; } static int -i915_priotree_add_dependency(struct drm_i915_private *i915, - struct i915_priotree *pt, - struct i915_priotree *signal) +i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal) { struct i915_dependency *dep; @@ -148,16 +148,18 @@ i915_priotree_add_dependency(struct drm_i915_private *i915, if (!dep) return -ENOMEM; - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + __i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC); return 0; } static void -i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node) { - struct i915_dependency *dep, *next; + struct i915_dependency *dep, *tmp; - GEM_BUG_ON(!list_empty(&pt->link)); + GEM_BUG_ON(!list_empty(&node->link)); /* * Everyone we depended upon (the fences we wait to be signaled) @@ -165,8 +167,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) * However, retirement is run independently on each timeline and * so we may be called out-of-order. */ - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->wait_link); @@ -175,8 +177,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != pt); + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->signal_link); @@ -186,17 +188,18 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } static void -i915_priotree_init(struct i915_priotree *pt) +i915_sched_node_init(struct i915_sched_node *node) { - INIT_LIST_HEAD(&pt->signalers_list); - INIT_LIST_HEAD(&pt->waiters_list); - INIT_LIST_HEAD(&pt->link); - pt->priority = I915_PRIORITY_INVALID; + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->attr.priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct intel_engine_cs *engine; + struct i915_timeline *timeline; enum intel_engine_id id; int ret; @@ -211,34 +214,33 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { - struct i915_gem_timeline *timeline; - struct intel_timeline *tl = engine->timeline; - GEM_TRACE("%s seqno %d (current %d) -> %d\n", engine->name, - tl->seqno, + engine->timeline.seqno, intel_engine_get_seqno(engine), seqno); - if (!i915_seqno_passed(seqno, tl->seqno)) { + if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); + intel_engine_init_hangcheck(engine); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } /* Check we are idle before we fiddle with hw state! */ GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); /* Finally reset hw state */ intel_engine_init_global_seqno(engine, seqno); - tl->seqno = seqno; - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].global_sync, 0, - sizeof(timeline->engine[id].global_sync)); + engine->timeline.seqno = seqno; } + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); + + i915->gt.request_serial = seqno; + return 0; } @@ -255,18 +257,22 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) return reset_all_global_seqno(i915, seqno - 1); } -static int reserve_engine(struct intel_engine_cs *engine) +static int reserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; int ret; - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { + /* + * Reservation is fine until we may need to wrap around + * + * By incrementing the serial for every request, we know that no + * individual engine may exceed that serial (as each is reset to 0 + * on any wrap). This protects even the most pessimistic of migrations + * of every request from all engines onto just one. + */ + while (unlikely(++i915->gt.request_serial == 0)) { ret = reset_all_global_seqno(i915, 0); if (ret) { - engine->timeline->inflight_seqnos--; + i915->gt.request_serial--; return ret; } } @@ -277,15 +283,11 @@ static int reserve_engine(struct intel_engine_cs *engine) return 0; } -static void unreserve_engine(struct intel_engine_cs *engine) +static void unreserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - + GEM_BUG_ON(!i915->gt.active_requests); if (!--i915->gt.active_requests) i915_gem_park(i915); - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; } void i915_gem_retire_noop(struct i915_gem_active *active, @@ -296,6 +298,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active, static void advance_ring(struct i915_request *request) { + struct intel_ring *ring = request->ring; unsigned int tail; /* @@ -307,7 +310,8 @@ static void advance_ring(struct i915_request *request) * Note this requires that we are always called in request * completion order. */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) { + GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); + if (list_is_last(&request->ring_link, &ring->request_list)) { /* * We may race here with execlists resubmitting this request * as we retire it. The resubmission will move the ring->tail @@ -317,12 +321,13 @@ static void advance_ring(struct i915_request *request) * noops - they are safe to be replayed on a reset. */ tail = READ_ONCE(request->tail); + list_del(&ring->active_link); } else { tail = request->postfix; } - list_del(&request->ring_link); + list_del_init(&request->ring_link); - request->ring->head = tail; + ring->head = tail; } static void free_capture_list(struct i915_request *request) @@ -338,31 +343,84 @@ static void free_capture_list(struct i915_request *request) } } +static void __retire_engine_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n", + __func__, engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(engine)); + + GEM_BUG_ON(!i915_request_completed(rq)); + + local_irq_disable(); + + spin_lock(&engine->timeline.lock); + GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); + list_del_init(&rq->link); + spin_unlock(&engine->timeline.lock); + + spin_lock(&rq->lock); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + dma_fence_signal_locked(&rq->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + intel_engine_cancel_signaling(rq); + if (rq->waitboost) { + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + } + spin_unlock(&rq->lock); + + local_irq_enable(); + + /* + * The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + intel_context_unpin(engine->last_retired_context, engine); + engine->last_retired_context = rq->ctx; +} + +static void __retire_engine_upto(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + struct i915_request *tmp; + + if (list_empty(&rq->link)) + return; + + do { + tmp = list_first_entry(&engine->timeline.requests, + typeof(*tmp), link); + + GEM_BUG_ON(tmp->engine != engine); + __retire_engine_request(engine, tmp); + } while (tmp != rq); +} + static void i915_request_retire(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", - engine->name, + request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, - intel_engine_get_seqno(engine)); + intel_engine_get_seqno(request->engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_request_completed(request)); - GEM_BUG_ON(!request->i915->gt.active_requests); trace_i915_request_retire(request); - spin_lock_irq(&engine->timeline->lock); - list_del_init(&request->link); - spin_unlock_irq(&engine->timeline->lock); - - unreserve_engine(request->engine); advance_ring(request); - free_capture_list(request); /* @@ -398,65 +456,53 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->ctx->ban_score); + intel_context_unpin(request->ctx, request->engine); - /* - * The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); - engine->last_retired_context = request->ctx; + __retire_engine_upto(request->engine, request); - spin_lock_irq(&request->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) - dma_fence_signal_locked(&request->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - if (request->waitboost) { - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); - atomic_dec(&request->i915->gt_pm.rps.num_waiters); - } - spin_unlock_irq(&request->lock); + unreserve_gt(request->i915); - i915_priotree_fini(request->i915, &request->priotree); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } void i915_request_retire_upto(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *tmp; + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", + rq->engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(rq->engine)); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->link)) + if (list_empty(&rq->ring_link)) return; do { - tmp = list_first_entry(&engine->timeline->requests, - typeof(*tmp), link); + tmp = list_first_entry(&ring->request_list, + typeof(*tmp), ring_link); i915_request_retire(tmp); } while (tmp != rq); } -static u32 timeline_get_seqno(struct intel_timeline *tl) +static u32 timeline_get_seqno(struct i915_timeline *tl) { return ++tl->seqno; } static void move_to_timeline(struct i915_request *request, - struct intel_timeline *timeline) + struct i915_timeline *timeline) { - GEM_BUG_ON(request->timeline == request->engine->timeline); - lockdep_assert_held(&request->engine->timeline->lock); + GEM_BUG_ON(request->timeline == &request->engine->timeline); + lockdep_assert_held(&request->engine->timeline.lock); - spin_lock(&request->timeline->lock); + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); } @@ -469,15 +515,15 @@ void __i915_request_submit(struct i915_request *request) GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, - engine->timeline->seqno + 1, + engine->timeline.seqno + 1, intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(engine->timeline); + seqno = timeline_get_seqno(&engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); @@ -492,7 +538,7 @@ void __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ - move_to_timeline(request, engine->timeline); + move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); @@ -505,11 +551,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -523,17 +569,17 @@ void __i915_request_unsubmit(struct i915_request *request) intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); /* * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), request->global_seqno)); - engine->timeline->seqno--; + engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -560,11 +606,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static int __i915_sw_fence_call @@ -635,12 +681,12 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ring = engine->context_pin(engine, ctx); + ring = intel_context_pin(ctx, engine); if (IS_ERR(ring)) return ERR_CAST(ring); GEM_BUG_ON(!ring); - ret = reserve_engine(engine); + ret = reserve_gt(i915); if (ret) goto err_unpin; @@ -648,10 +694,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unreserve; - /* Move the oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry_or_null(&engine->timeline->requests, - typeof(*rq), link); - if (rq && i915_request_completed(rq)) + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ring->request_list) && + i915_request_completed(rq)) i915_request_retire(rq); /* @@ -711,8 +757,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } } - rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); - GEM_BUG_ON(rq->timeline == engine->timeline); + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; + rq->timeline = ring->timeline; + GEM_BUG_ON(rq->timeline == &engine->timeline); spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, @@ -725,13 +776,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); init_waitqueue_head(&rq->execute); - i915_priotree_init(&rq->priotree); - - INIT_LIST_HEAD(&rq->active_list); - rq->i915 = i915; - rq->engine = engine; - rq->ctx = ctx; - rq->ring = ring; + i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; @@ -768,6 +813,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unwind; + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(rq->ctx, engine); + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; @@ -777,14 +825,14 @@ err_unwind: /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&rq->active_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); + GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); kmem_cache_free(i915->requests, rq); err_unreserve: - unreserve_engine(engine); + unreserve_gt(i915); err_unpin: - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); return ERR_PTR(ret); } @@ -800,9 +848,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; if (to->engine->schedule) { - ret = i915_priotree_add_dependency(to->i915, - &to->priotree, - &from->priotree); + ret = i915_sched_node_add_dependency(to->i915, + &to->sched, + &from->sched); if (ret < 0) return ret; } @@ -880,7 +928,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context != rq->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(rq->timeline, fence)) + i915_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -894,7 +942,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context != rq->i915->mm.unordered_timeline) - intel_timeline_sync_set(rq->timeline, fence); + i915_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -971,7 +1019,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; struct intel_ring *ring = request->ring; - struct intel_timeline *timeline = request->timeline; + struct i915_timeline *timeline = request->timeline; struct i915_request *prev; u32 *cs; int err; @@ -1033,10 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); if (engine->schedule) - __i915_priotree_add_dependency(&request->priotree, - &prev->priotree, - &request->dep, - 0); + __i915_sched_node_add_dependency(&request->sched, + &prev->sched, + &request->dep, + 0); } spin_lock_irq(&timeline->lock); @@ -1047,6 +1095,8 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); + if (list_is_first(&request->ring_link, &ring->request_list)) + list_add(&ring->active_link, &request->i915->gt.active_rings); request->emitted_jiffies = jiffies; /* @@ -1060,12 +1110,11 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(request, request->ctx->priority); + engine->schedule(request, &request->ctx->sched); rcu_read_unlock(); - - local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ @@ -1354,38 +1403,30 @@ complete: return timeout; } -static void engine_retire_requests(struct intel_engine_cs *engine) +static void ring_retire_requests(struct intel_ring *ring) { struct i915_request *request, *next; - u32 seqno = intel_engine_get_seqno(engine); - LIST_HEAD(retire); - spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, - &engine->timeline->requests, link) { - if (!i915_seqno_passed(seqno, request->global_seqno)) + &ring->request_list, ring_link) { + if (!i915_request_completed(request)) break; - list_move_tail(&request->link, &retire); - } - spin_unlock_irq(&engine->timeline->lock); - - list_for_each_entry_safe(request, next, &retire, link) i915_request_retire(request); + } } void i915_retire_requests(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); if (!i915->gt.active_requests) return; - for_each_engine(engine, i915, id) - engine_retire_requests(engine); + list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) + ring_retire_requests(ring); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7d6eb82eeb91..eddbd4245cb3 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,13 +28,16 @@ #include <linux/dma-fence.h> #include "i915_gem.h" +#include "i915_scheduler.h" #include "i915_sw_fence.h" +#include "i915_scheduler.h" #include <uapi/drm/i915_drm.h> struct drm_file; struct drm_i915_gem_object; struct i915_request; +struct i915_timeline; struct intel_wait { struct rb_node node; @@ -48,44 +51,6 @@ struct intel_signal_node { struct list_head link; }; -struct i915_dependency { - struct i915_priotree *signaler; - struct list_head signal_link; - struct list_head wait_link; - struct list_head dfs_link; - unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) -}; - -/* - * "People assume that time is a strict progression of cause to effect, but - * actually, from a nonlinear, non-subjective viewpoint, it's more like a big - * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 - * - * Requests exist in a complex web of interdependencies. Each request - * has to wait for some other request to complete before it is ready to be run - * (e.g. we have to wait until the pixels have been rendering into a texture - * before we can copy from it). We track the readiness of a request in terms - * of fences, but we also need to keep the dependency tree for the lifetime - * of the request (beyond the life of an individual fence). We use the tree - * at various points to reorder the requests whilst keeping the requests - * in order with respect to their various dependencies. - */ -struct i915_priotree { - struct list_head signalers_list; /* those before us, we depend upon */ - struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; - int priority; -}; - -enum { - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - I915_PRIORITY_INVALID = INT_MIN -}; - struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; @@ -131,7 +96,7 @@ struct i915_request { struct i915_gem_context *ctx; struct intel_engine_cs *engine; struct intel_ring *ring; - struct intel_timeline *timeline; + struct i915_timeline *timeline; struct intel_signal_node signaling; /* @@ -154,7 +119,7 @@ struct i915_request { * to retirement), i.e. bidirectional dependency information for the * request not tied to individual fences. */ - struct i915_priotree priotree; + struct i915_sched_node sched; struct i915_dependency dep; /** @@ -343,10 +308,10 @@ static inline bool i915_request_started(const struct i915_request *rq) seqno - 1); } -static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) { const struct i915_request *rq = - container_of(pt, const struct i915_request, priotree); + container_of(node, const struct i915_request, sched); return i915_request_completed(rq); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h new file mode 100644 index 000000000000..70a42220358d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_SCHEDULER_H_ +#define _I915_SCHEDULER_H_ + +#include <linux/bitops.h> + +#include <uapi/drm/i915_drm.h> + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN +}; + +struct i915_sched_attr { + /** + * @priority: execution and service priority + * + * All clients are equal, but some are more equal than others! + * + * Requests from a context with a greater (more positive) value of + * @priority will be executed before those with a lower @priority + * value, forming a simple QoS. + * + * The &drm_i915_private.kernel_context is assigned the lowest priority. + */ + int priority; +}; + +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + * + * There is no active component to the "scheduler". As we know the dependency + * DAG of each request, we are able to insert it into a sorted queue when it + * is ready, and are able to reorder its portion of the graph to accommodate + * dynamic priority changes. + */ +struct i915_sched_node { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct list_head link; + struct i915_sched_attr attr; +}; + +struct i915_dependency { + struct i915_sched_node *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +#endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c new file mode 100644 index 000000000000..4667cc08c416 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "i915_drv.h" + +#include "i915_timeline.h" +#include "i915_syncmap.h" + +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + + timeline->name = name; + + list_add(&timeline->link, &i915->gt.timelines); + + /* Called during early_init before we know how many engines there are */ + + timeline->fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&timeline->lock); + + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); +} + +/** + * i915_timelines_park - called when the driver idles + * @i915: the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void i915_timelines_park(struct drm_i915_private *i915) +{ + struct i915_timeline *timeline; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&timeline->sync); + } +} + +void i915_timeline_fini(struct i915_timeline *timeline) +{ + GEM_BUG_ON(!list_empty(&timeline->requests)); + + i915_syncmap_free(&timeline->sync); + + list_del(&timeline->link); +} + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name) +{ + struct i915_timeline *timeline; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + i915_timeline_init(i915, timeline, name); + kref_init(&timeline->kref); + + return timeline; +} + +void __i915_timeline_free(struct kref *kref) +{ + struct i915_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + i915_timeline_fini(timeline); + kfree(timeline); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_timeline.c" +#include "selftests/i915_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 33e01bf6aa36..dc2a4632faa7 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -22,27 +22,20 @@ * */ -#ifndef I915_GEM_TIMELINE_H -#define I915_GEM_TIMELINE_H +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H #include <linux/list.h> +#include <linux/kref.h> #include "i915_request.h" #include "i915_syncmap.h" #include "i915_utils.h" -struct i915_gem_timeline; - -struct intel_timeline { +struct i915_timeline { u64 fence_context; u32 seqno; - /** - * Count of outstanding requests, from the time they are constructed - * to the moment they are retired. Loosely coupled to hardware. - */ - u32 inflight_seqnos; - spinlock_t lock; /** @@ -77,47 +70,57 @@ struct intel_timeline { */ u32 global_sync[I915_NUM_ENGINES]; - struct i915_gem_timeline *common; -}; - -struct i915_gem_timeline { struct list_head link; - - struct drm_i915_private *i915; const char *name; - struct intel_timeline engine[I915_NUM_ENGINES]; + struct kref kref; }; -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *tl, - const char *name); -int i915_gem_timeline_init__global(struct drm_i915_private *i915); -void i915_gem_timelines_park(struct drm_i915_private *i915); -void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name); +void i915_timeline_fini(struct i915_timeline *tl); + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name); -static inline int __intel_timeline_sync_set(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline struct i915_timeline * +i915_timeline_get(struct i915_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __i915_timeline_free(struct kref *kref); +static inline void i915_timeline_put(struct i915_timeline *timeline) +{ + kref_put(&timeline->kref, __i915_timeline_free); +} + +static inline int __i915_timeline_sync_set(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_set(&tl->sync, context, seqno); } -static inline int intel_timeline_sync_set(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline int i915_timeline_sync_set(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_set(tl, fence->context, fence->seqno); + return __i915_timeline_sync_set(tl, fence->context, fence->seqno); } -static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_is_later(&tl->sync, context, seqno); } -static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); + return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +void i915_timelines_park(struct drm_i915_private *i915); + #endif diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 408827bf5d96..8cc3a256f29d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -679,45 +679,68 @@ DEFINE_EVENT(i915_request, i915_request_execute, TP_ARGS(rq) ); -DECLARE_EVENT_CLASS(i915_request_hw, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, hw_id) - __field(u32, ring) - __field(u32, ctx) - __field(u32, seqno) - __field(u32, global_seqno) - __field(u32, port) - ), - - TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; - __entry->ring = rq->engine->id; - __entry->ctx = rq->fence.context; - __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; - __entry->port = port; - ), - - TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", - __entry->dev, __entry->hw_id, __entry->ring, - __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->port) -); +TRACE_EVENT(i915_request_in, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, port) + __field(u32, prio) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->prio = rq->sched.attr.priority; + __entry->port = port; + ), -DEFINE_EVENT(i915_request_hw, i915_request_in, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, prio=%u, global=%u, port=%u", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->prio, __entry->global_seqno, + __entry->port) ); -DEFINE_EVENT(i915_request, i915_request_out, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) +TRACE_EVENT(i915_request_out, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, completed) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->completed = i915_request_completed(rq); + ), + + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, completed?=%u", + __entry->dev, __entry->hw_id, __entry->ring, + __entry->ctx, __entry->seqno, + __entry->global_seqno, __entry->completed) ); + #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void @@ -811,42 +834,6 @@ DEFINE_EVENT(i915_request, i915_request_wait_end, TP_ARGS(rq) ); -TRACE_EVENT(i915_flip_request, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - -TRACE_EVENT(i915_flip_complete, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - TRACE_EVENT_CONDITION(i915_reg_rw, TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace), diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 0695717522ea..00165ad55fb3 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -120,6 +120,12 @@ static inline u64 ptr_to_u64(const void *ptr) #include <linux/list.h> +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return head->next == list; +} + static inline void __list_del_many(struct list_head *head, struct list_head *first) { diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4bda3bd29bf5..9324d476e0a7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -46,8 +46,6 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) - WARN_ON(i915_vma_unbind(vma)); GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) @@ -232,7 +230,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj, if (!vma) vma = vma_create(obj, vm, view); - GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; @@ -684,13 +681,43 @@ err_unpin: return ret; } -static void i915_vma_destroy(struct i915_vma *vma) +void i915_vma_close(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + /* + * We defer actually closing, unbinding and destroying the VMA until + * the next idle point, or if the object is freed in the meantime. By + * postponing the unbind, we allow for it to be resurrected by the + * client, avoiding the work required to rebind the VMA. This is + * advantageous for DRI, where the client/server pass objects + * between themselves, temporarily opening a local VMA to the + * object, and then closing it again. The same object is then reused + * on the next frame (or two, depending on the depth of the swap queue) + * causing us to rebind the VMA once more. This ends up being a lot + * of wasted work for the steady state. + */ + list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma); +} + +void i915_vma_reopen(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (vma->flags & I915_VMA_CLOSED) { + vma->flags &= ~I915_VMA_CLOSED; + list_del(&vma->closed_link); + } +} + +static void __i915_vma_destroy(struct i915_vma *vma) { int i; GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!i915_vma_is_closed(vma)); GEM_BUG_ON(vma->fence); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) @@ -699,6 +726,7 @@ static void i915_vma_destroy(struct i915_vma *vma) list_del(&vma->obj_link); list_del(&vma->vm_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -706,15 +734,30 @@ static void i915_vma_destroy(struct i915_vma *vma) kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); } -void i915_vma_close(struct i915_vma *vma) +void i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(i915_vma_is_closed(vma)); - vma->flags |= I915_VMA_CLOSED; + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(i915_vma_is_pinned(vma)); + + if (i915_vma_is_closed(vma)) + list_del(&vma->closed_link); + + WARN_ON(i915_vma_unbind(vma)); + __i915_vma_destroy(vma); +} + +void i915_vma_parked(struct drm_i915_private *i915) +{ + struct i915_vma *vma, *next; - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); + list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { + GEM_BUG_ON(!i915_vma_is_closed(vma)); + i915_vma_destroy(vma); + } + + GEM_BUG_ON(!list_empty(&i915->gt.closed_vma)); } static void __i915_vma_iounmap(struct i915_vma *vma) @@ -804,7 +847,7 @@ int i915_vma_unbind(struct i915_vma *vma) return -EBUSY; if (!drm_mm_node_allocated(&vma->node)) - goto destroy; + return 0; GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); @@ -841,10 +884,6 @@ int i915_vma_unbind(struct i915_vma *vma) i915_vma_remove(vma); -destroy: - if (unlikely(i915_vma_is_closed(vma))) - i915_vma_destroy(vma); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8c5022095418..fc4294cfaa91 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -119,6 +119,8 @@ struct i915_vma { /** This vma's place in the eviction list */ struct list_head evict_link; + struct list_head closed_link; + /** * Used for performing relocations during execbuffer insertion. */ @@ -285,6 +287,8 @@ void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); +void i915_vma_reopen(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); @@ -408,6 +412,8 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } +void i915_vma_parked(struct drm_i915_private *i915); + #define for_each_until(cond) if (cond) break; else /** diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index eb0579fb5e54..e8bf4cc499e1 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -177,11 +177,16 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ } /* FIXME pre-g4x don't work like this */ - if (intel_state->base.visible) + if (state->visible) crtc_state->active_planes |= BIT(intel_plane->id); else crtc_state->active_planes &= ~BIT(intel_plane->id); + if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) + crtc_state->nv12_planes |= BIT(intel_plane->id); + else + crtc_state->nv12_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(old_crtc_state, &crtc_state->base, old_plane_state, diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 702d3fab97fc..54270bdde100 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -530,6 +530,7 @@ parse_driver_features(struct drm_i915_private *dev_priv, */ if (!driver->drrs_enabled) dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED; + dev_priv->vbt.psr.enable = driver->psr_enabled; } static void diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 671a6d61e29d..18e643df523e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -82,7 +82,7 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); intel_engine_dump(engine, &p, @@ -130,11 +130,12 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = from_timer(engine, t, - breadcrumbs.fake_irq); + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* The timer persists in case we cannot enable interrupts, + /* + * The timer persists in case we cannot enable interrupts, * or if we have previously seen seqno/interrupt incoherency * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). * Here the worker will wake up every jiffie in order to kick the @@ -148,6 +149,12 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) if (!b->irq_armed) return; + /* If the user has disabled the fake-irq, restore the hangchecking */ + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + mod_timer(&b->fake_irq, jiffies + 1); } @@ -831,8 +838,8 @@ static void cancel_fake_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ del_timer_sync(&b->hangcheck); - del_timer_sync(&b->fake_irq); clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } @@ -840,15 +847,22 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - cancel_fake_irq(engine); spin_lock_irq(&b->irq_lock); + /* + * Leave the fake_irq timer enabled (if it is running), but clear the + * bit so that it turns itself off on its next wake up and goes back + * to the long hangcheck interval if still required. + */ + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + if (b->irq_enabled) irq_enable(engine); else irq_disable(engine); - /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + /* + * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the * GPU is active and may have already executed the MI_USER_INTERRUPT * before the CPU is ready to receive. However, the engine is currently * idle (we haven't started it yet), there is no possibility for a @@ -857,9 +871,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) */ clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - if (b->irq_armed) - enable_fake_irq(b); - spin_unlock_irq(&b->irq_lock); } diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 32d24c69da3c..704ddb4d3ca7 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -2302,9 +2302,44 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } +static int skl_dpll0_vco(struct intel_atomic_state *intel_state) +{ + struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev); + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + int vco, i; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; + + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { + if (!crtc_state->base.enable) + continue; + + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + continue; + + /* + * DPLL0 VCO may need to be adjusted to get the correct + * clock for eDP. This will affect cdclk as well. + */ + switch (crtc_state->port_clock / 2) { + case 108000: + case 216000: + vco = 8640000; + break; + default: + vco = 8100000; + break; + } + } + + return vco; +} + static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); int min_cdclk, cdclk, vco; @@ -2312,9 +2347,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) if (min_cdclk < 0) return min_cdclk; - vco = intel_state->cdclk.logical.vco; - if (!vco) - vco = dev_priv->skl_preferred_vco_freq; + vco = skl_dpll0_vco(intel_state); /* * FIXME should also account for plane ratio diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 41e6c75a7f3c..cf9b600cca79 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -35,6 +35,7 @@ */ #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" +MODULE_FIRMWARE(I915_CSR_GLK); #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) #define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin" @@ -297,7 +298,10 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, csr->version = css_header->version; - if (IS_CANNONLAKE(dev_priv)) { + if (csr->fw_path == i915_modparams.dmc_firmware_path) { + /* Bypass version check for firmware override. */ + required_version = csr->version; + } else if (IS_CANNONLAKE(dev_priv)) { required_version = CNL_CSR_VERSION_REQUIRED; } else if (IS_GEMINILAKE(dev_priv)) { required_version = GLK_CSR_VERSION_REQUIRED; @@ -452,7 +456,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (!HAS_CSR(dev_priv)) return; - if (IS_CANNONLAKE(dev_priv)) + if (i915_modparams.dmc_firmware_path) + csr->fw_path = i915_modparams.dmc_firmware_path; + else if (IS_CANNONLAKE(dev_priv)) csr->fw_path = I915_CSR_CNL; else if (IS_GEMINILAKE(dev_priv)) csr->fw_path = I915_CSR_GLK; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 92cb26b18a9b..f4a8598a2d39 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -870,6 +870,45 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } +static const struct icl_combo_phy_ddi_buf_trans * +icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, + int type, int *n_entries) +{ + u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK; + + if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V); + return icl_combo_phy_ddi_translations_edp_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V); + return icl_combo_phy_ddi_translations_edp_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V); + return icl_combo_phy_ddi_translations_edp_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } else { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V); + return icl_combo_phy_ddi_translations_dp_hdmi_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } +} + static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) { int n_entries, level, default_entry; @@ -1013,6 +1052,25 @@ static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) } } +static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, + const struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return DDI_CLK_SEL_NONE; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return DDI_CLK_SEL_MG; + } +} + /* Starting with Haswell, different DDI ports can work in FDI mode for * connection to the PCH-located connectors. For this, it is necessary to train * both the DDI port and PCH receiver for the desired DDI buffer settings. @@ -2025,7 +2083,13 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) enum port port = encoder->port; int n_entries; - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port == PORT_A || port == PORT_B) + icl_get_combo_buf_trans(dev_priv, port, encoder->type, + &n_entries); + else + n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); + } else if (IS_CANNONLAKE(dev_priv)) { if (encoder->type == INTEL_OUTPUT_EDP) cnl_get_buf_trans_edp(dev_priv, &n_entries); else @@ -2182,6 +2246,146 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); } +static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, + u32 level, enum port port, int type) +{ + const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL; + u32 n_entries, val; + int ln; + + ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, + &n_entries); + if (!ddi_translations) + return; + + if (level >= n_entries) { + DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + level = n_entries - 1; + } + + /* Set PORT_TX_DW5 Rterm Sel to 110b. */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~RTERM_SELECT_MASK; + val |= RTERM_SELECT(0x6); + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW5 */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + /* Set DisableTap2 and DisableTap3 if MIPI DSI + * Clear DisableTap2 and DisableTap3 for all other Ports + */ + if (type == INTEL_OUTPUT_DSI) { + val |= TAP2_DISABLE; + val |= TAP3_DISABLE; + } else { + val &= ~TAP2_DISABLE; + val &= ~TAP3_DISABLE; + } + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW2 */ + val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); + val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | + RCOMP_SCALAR_MASK); + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select); + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select); + /* Program Rcomp scalar for every table entry */ + val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar); + I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); + + /* Program PORT_TX_DW4 */ + /* We cannot write to GRP. It would overwrite individual loadgen. */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | + CURSOR_COEFF_MASK); + val |= ddi_translations[level].dw4_scaling; + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } +} + +static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level, + enum intel_output_type type) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; + int width = 0; + int rate = 0; + u32 val; + int ln = 0; + + if (type == INTEL_OUTPUT_HDMI) { + width = 4; + /* Rate is always < than 6GHz for HDMI */ + } else { + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + width = intel_dp->lane_count; + rate = intel_dp->link_rate; + } + + /* + * 1. If port type is eDP or DP, + * set PORT_PCS_DW1 cmnkeeper_enable to 1b, + * else clear to 0b. + */ + val = I915_READ(ICL_PORT_PCS_DW1_LN0(port)); + if (type == INTEL_OUTPUT_HDMI) + val &= ~COMMON_KEEPER_EN; + else + val |= COMMON_KEEPER_EN; + I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), val); + + /* 2. Program loadgen select */ + /* + * Program PORT_TX_DW4_LN depending on Bit rate and used lanes + * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) + * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) + * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) + */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~LOADGEN_SELECT; + + if ((rate <= 600000 && width == 4 && ln >= 1) || + (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { + val |= LOADGEN_SELECT; + } + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } + + /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ + val = I915_READ(ICL_PORT_CL_DW5(port)); + val |= SUS_CLOCK_CONFIG; + I915_WRITE(ICL_PORT_CL_DW5(port), val); + + /* 4. Clear training enable to change swing values */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* 5. Program swing and de-emphasis */ + icl_ddi_combo_vswing_program(dev_priv, level, port, type); + + /* 6. Set training enable to trigger update */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val |= TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); +} + +static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level, + enum intel_output_type type) +{ + enum port port = encoder->port; + + if (port == PORT_A || port == PORT_B) + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); + else + /* Not Implemented Yet */ + WARN_ON(1); +} + static uint32_t translate_signal_level(int signal_levels) { int i; @@ -2213,7 +2417,9 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_encoder *encoder = &dport->base; int level = intel_ddi_dp_level(intel_dp); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2234,6 +2440,71 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) return DDI_BUF_TRANS_SELECT(level); } +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct intel_shared_dpll *pll = crtc_state->shared_dpll; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *conn_state; + struct drm_connector *conn; + int i; + + for_each_new_connector_in_state(old_state, conn, conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + enum port port; + uint32_t val; + + if (conn_state->crtc != crtc) + continue; + + port = encoder->port; + mutex_lock(&dev_priv->dpll_lock); + + val = I915_READ(DPCLKA_CFGCR0_ICL); + WARN_ON((val & DPCLKA_CFGCR0_DDI_CLK_OFF(port)) == 0); + + if (port == PORT_A || port == PORT_B) { + val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + POSTING_READ(DPCLKA_CFGCR0_ICL); + } + + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + + mutex_unlock(&dev_priv->dpll_lock); + } +} + +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + enum port port; + + if (old_conn_state->crtc != crtc) + continue; + + port = encoder->port; + mutex_lock(&dev_priv->dpll_lock); + I915_WRITE(DPCLKA_CFGCR0_ICL, + I915_READ(DPCLKA_CFGCR0_ICL) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + mutex_unlock(&dev_priv->dpll_lock); + } +} + static void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_shared_dpll *pll) { @@ -2246,7 +2517,11 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, mutex_lock(&dev_priv->dpll_lock); - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), + icl_pll_to_ddi_pll_sel(encoder, pll)); + } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); @@ -2284,14 +2559,18 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); + } else if (IS_CANNONLAKE(dev_priv)) { I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | DPCLKA_CFGCR0_DDI_CLK_OFF(port)); - else if (IS_GEN9_BC(dev_priv)) + } else if (IS_GEN9_BC(dev_priv)) { I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port)); - else if (INTEL_GEN(dev_priv) < 9) + } else if (INTEL_GEN(dev_priv) < 9) { I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); + } } static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, @@ -2316,7 +2595,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2347,7 +2628,9 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index a32ba72c514e..0fd13df424cf 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -848,7 +848,7 @@ void intel_device_info_runtime_init(struct intel_device_info *info) gen9_sseu_info_init(dev_priv); else if (INTEL_GEN(dev_priv) == 10) gen10_sseu_info_init(dev_priv); - else if (INTEL_INFO(dev_priv)->gen >= 11) + else if (INTEL_GEN(dev_priv) >= 11) gen11_sseu_info_init(dev_priv); /* Initialize command stream timestamp frequency */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 91eb47a1c922..dfd14b09f52c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -88,6 +88,22 @@ static const uint32_t skl_primary_formats[] = { DRM_FORMAT_VYUY, }; +static const uint32_t skl_pri_planar_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -489,9 +505,21 @@ static const struct intel_limit intel_limits_bxt = { }; static void +skl_wa_528(struct drm_i915_private *dev_priv, int pipe, bool enable) +{ + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return; + + if (enable) + I915_WRITE(CHICKEN_PIPESL_1(pipe), HSW_FBCQ_DIS); + else + I915_WRITE(CHICKEN_PIPESL_1(pipe), 0); +} + +static void skl_wa_clkgate(struct drm_i915_private *dev_priv, int pipe, bool enable) { - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) return; if (enable) @@ -3089,6 +3117,29 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, return 0; } +static int +skl_check_nv12_surface(const struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) +{ + /* Display WA #1106 */ + if (plane_state->base.rotation != + (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90) && + plane_state->base.rotation != DRM_MODE_ROTATE_270) + return 0; + + /* + * src coordinates are rotated here. + * We check height but report it as width + */ + if (((drm_rect_height(&plane_state->base.src) >> 16) % 4) != 0) { + DRM_DEBUG_KMS("src width must be multiple " + "of 4 for rotated NV12\n"); + return -EINVAL; + } + + return 0; +} + static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -3172,6 +3223,9 @@ int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, * the main surface setup depends on it. */ if (fb->format->format == DRM_FORMAT_NV12) { + ret = skl_check_nv12_surface(crtc_state, plane_state); + if (ret) + return ret; ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; @@ -3622,20 +3676,19 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; u32 plane_color_ctl = 0; - plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; - plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + if (INTEL_GEN(dev_priv) < 11) { + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + } plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); if (intel_format_is_yuv(fb->format->format)) { - if (fb->format->format == DRM_FORMAT_NV12) { - plane_color_ctl |= - PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; - goto out; - } if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; else @@ -3644,7 +3697,7 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; } -out: + return plane_color_ctl; } @@ -4789,8 +4842,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, } if (plane_scaler_check && pixel_format == DRM_FORMAT_NV12 && - (src_h < SKL_MIN_YUV_420_SRC_H || (src_w % 4) != 0 || - (src_h % 4) != 0)) { + (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { DRM_DEBUG_KMS("NV12: src dimensions not met\n"); return -EINVAL; } @@ -5137,6 +5189,22 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s return !old_crtc_state->ips_enabled; } +static bool needs_nv12_wa(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->nv12_planes) + return false; + + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + + if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || + IS_CANNONLAKE(dev_priv)) + return true; + + return false; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); @@ -5161,7 +5229,6 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (old_primary_state) { struct drm_plane_state *new_primary_state = drm_atomic_get_new_plane_state(old_state, primary); - struct drm_framebuffer *fb = new_primary_state->fb; intel_fbc_post_update(crtc); @@ -5169,14 +5236,13 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) (needs_modeset(&pipe_config->base) || !old_primary_state->visible)) intel_post_enable_primary(&crtc->base, pipe_config); + } - /* Display WA 827 */ - if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || - IS_CANNONLAKE(dev_priv)) { - if (fb && fb->format->format == DRM_FORMAT_NV12) - skl_wa_clkgate(dev_priv, crtc->pipe, false); - } - + /* Display WA 827 */ + if (needs_nv12_wa(dev_priv, old_crtc_state) && + !needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, false); + skl_wa_528(dev_priv, crtc->pipe, false); } } @@ -5201,14 +5267,6 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, struct intel_plane_state *new_primary_state = intel_atomic_get_new_plane_state(old_intel_state, to_intel_plane(primary)); - struct drm_framebuffer *fb = new_primary_state->base.fb; - - /* Display WA 827 */ - if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || - IS_CANNONLAKE(dev_priv)) { - if (fb && fb->format->format == DRM_FORMAT_NV12) - skl_wa_clkgate(dev_priv, crtc->pipe, true); - } intel_fbc_pre_update(crtc, pipe_config, new_primary_state); /* @@ -5220,6 +5278,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); } + /* Display WA 827 */ + if (!needs_nv12_wa(dev_priv, old_crtc_state) && + needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, true); + skl_wa_528(dev_priv, crtc->pipe, true); + } + /* * Vblank time updates from the shadow to live plane control register * are blocked if the memory self-refresh mode is active at that @@ -5554,6 +5619,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (intel_crtc->config->shared_dpll) intel_enable_shared_dpll(intel_crtc); + if (INTEL_GEN(dev_priv) >= 11) + icl_map_plls_to_ports(crtc, pipe_config, old_state); + if (intel_crtc_has_dp_encoder(intel_crtc->config)) intel_dp_set_m_n(intel_crtc, M1_N1); @@ -5751,6 +5819,9 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); + + if (INTEL_GEN(dev_priv) >= 11) + icl_unmap_plls_to_ports(crtc, old_crtc_state, old_state); } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -11142,39 +11213,42 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); -#define PIPE_CONF_CHECK_X(name) \ +#define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected 0x%08x, found 0x%08x)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_I(name) \ +#define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_BOOL(name) \ +#define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %s, found %s)\n", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) /* * Checks state where we only read out the enabling, but not the entire * state itself (like full infoframes or ELD for audio). These states * require a full modeset on bootup to fix up. */ -#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \ +#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \ if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ PIPE_CONF_CHECK_BOOL(name); \ } else { \ @@ -11183,18 +11257,20 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_P(name) \ +#define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %p, found %p)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_M_N(name) \ +#define PIPE_CONF_CHECK_M_N(name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name,\ adjust)) { \ @@ -11212,14 +11288,15 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) /* This is required for BDW+ where there is only one set of registers for * switching between high and low RR. * This macro can be used whenever a comparison has to be made between one * hw state and multiple sw state variables. */ -#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) \ +#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name, adjust) && \ !intel_compare_link_m_n(¤t_config->alt_name, \ @@ -11244,9 +11321,10 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_FLAGS(name, mask) \ +#define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ pipe_config_err(adjust, __stringify(name), \ "(%x) (expected %i, found %i)\n", \ @@ -11254,16 +11332,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, current_config->name & (mask), \ pipe_config->name & (mask)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) \ +#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) @@ -11372,6 +11452,16 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.pll9); PIPE_CONF_CHECK_X(dpll_hw_state.pll10); PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_refclkin_ctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_coreclkctl1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_hsclkctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div0); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_lf); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_frac_lock); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -11435,6 +11525,11 @@ static void verify_wm_state(struct drm_crtc *crtc, skl_ddb_get_hw_state(dev_priv, &hw_ddb); sw_ddb = &dev_priv->wm.skl_hw.ddb; + if (INTEL_GEN(dev_priv) >= 11) + if (hw_ddb.enabled_slices != sw_ddb->enabled_slices) + DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n", + sw_ddb->enabled_slices, + hw_ddb.enabled_slices); /* planes */ for_each_universal_plane(dev_priv, pipe, plane) { hw_plane_wm = &hw_wm.planes[plane]; @@ -12241,6 +12336,8 @@ static void skl_update_crtcs(struct drm_atomic_state *state) bool progress; enum pipe pipe; int i; + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u8 required_slices = intel_state->wm_results.ddb.enabled_slices; const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {}; @@ -12249,6 +12346,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) if (new_crtc_state->active) entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; + /* If 2nd DBuf slice required, enable it here */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); + /* * Whenever the number of active pipes changes, we need to make sure we * update the pipes in the right order so that their ddb allocations @@ -12299,6 +12400,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) progress = true; } } while (progress); + + /* If 2nd DBuf slice is no more required disable it */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); } static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) @@ -12762,6 +12867,15 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } +static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_DISPLAY, + }; + + i915_gem_object_wait_priority(obj, 0, &attr); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -12838,7 +12952,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); + fb_obj_bump_render_priority(obj); mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); @@ -13150,6 +13264,7 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -13355,6 +13470,30 @@ static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, return pipe == PIPE_A && plane_id == PLANE_PRIMARY; } +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (plane_id == PLANE_PRIMARY) { + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + else if ((INTEL_GEN(dev_priv) == 9 && pipe == PIPE_C) && + !IS_GEMINILAKE(dev_priv)) + return false; + } else if (plane_id >= PLANE_SPRITE0) { + if (plane_id == PLANE_CURSOR) + return false; + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) == 10) { + if (plane_id != PLANE_SPRITE0) + return false; + } else { + if (plane_id != PLANE_SPRITE0 || pipe == PIPE_C || + IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + } + } + return true; +} + static struct intel_plane * intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -13419,8 +13558,13 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->has_ccs = skl_plane_has_ccs(dev_priv, pipe, PLANE_PRIMARY); - intel_primary_formats = skl_primary_formats; - num_formats = ARRAY_SIZE(skl_primary_formats); + if (skl_plane_has_planar(dev_priv, pipe, PLANE_PRIMARY)) { + intel_primary_formats = skl_pri_planar_formats; + num_formats = ARRAY_SIZE(skl_pri_planar_formats); + } else { + intel_primary_formats = skl_primary_formats; + num_formats = ARRAY_SIZE(skl_primary_formats); + } if (primary->has_ccs) modifiers = skl_format_modifiers_ccs; @@ -14247,6 +14391,20 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err; } break; + case DRM_FORMAT_NV12: + if (mode_cmd->modifier[0] == I915_FORMAT_MOD_Y_TILED_CCS || + mode_cmd->modifier[0] == I915_FORMAT_MOD_Yf_TILED_CCS) { + DRM_DEBUG_KMS("RC not to be enabled with NV12\n"); + goto err; + } + if (INTEL_GEN(dev_priv) < 9 || IS_SKYLAKE(dev_priv) || + IS_BROXTON(dev_priv)) { + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, + &format_name)); + goto err; + } + break; default: DRM_DEBUG_KMS("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); @@ -14259,6 +14417,14 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); + if (fb->format->format == DRM_FORMAT_NV12 && + (fb->width < SKL_MIN_YUV_420_SRC_W || + fb->height < SKL_MIN_YUV_420_SRC_H || + (fb->width % 4) != 0 || (fb->height % 4) != 0)) { + DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); + return -EINVAL; + } + for (i = 0; i < fb->format->num_planes; i++) { u32 stride_alignment; @@ -15313,6 +15479,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); if (crtc_state->base.active) { intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); + crtc->base.mode.hdisplay = crtc_state->pipe_src_w; + crtc->base.mode.vdisplay = crtc_state->pipe_src_h; intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index 4e7418b345bc..2ef31617614a 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -218,6 +218,10 @@ struct intel_link_m_n { for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ for_each_if((__mask) & BIT(__p)) +#define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ + for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++) \ + for_each_if ((__mask) & (1 << (__t))) + #define for_each_universal_plane(__dev_priv, __pipe, __p) \ for ((__p) = 0; \ (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 62f82c4298ac..8320f0e8e3be 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1647,9 +1647,17 @@ void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, } } +struct link_config_limits { + int min_clock, max_clock; + int min_lane_count, max_lane_count; + int min_bpp, max_bpp; +}; + static int intel_dp_compute_bpp(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + struct intel_connector *intel_connector = intel_dp->attached_connector; int bpp, bpc; bpp = pipe_config->pipe_bpp; @@ -1658,31 +1666,153 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); + if (intel_dp_is_edp(intel_dp)) { + /* Get bpp from vbt only for panels that dont have bpp in edid */ + if (intel_connector->base.display_info.bpc == 0 && + dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp) { + DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", + dev_priv->vbt.edp.bpp); + bpp = dev_priv->vbt.edp.bpp; + } + } + + return bpp; +} + +/* Adjust link config limits based on compliance test requests. */ +static void +intel_dp_adjust_compliance_config(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + struct link_config_limits *limits) +{ /* For DP Compliance we override the computed bpp for the pipe */ if (intel_dp->compliance.test_data.bpc != 0) { - pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; - pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; - DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", - pipe_config->pipe_bpp); + int bpp = 3 * intel_dp->compliance.test_data.bpc; + + limits->min_bpp = limits->max_bpp = bpp; + pipe_config->dither_force_disable = bpp == 6 * 3; + + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", bpp); + } + + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + int index; + + /* Validate the compliance test data since max values + * might have changed due to link train fallback. + */ + if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, + intel_dp->compliance.test_lane_count)) { + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + intel_dp->compliance.test_link_rate); + if (index >= 0) + limits->min_clock = limits->max_clock = index; + limits->min_lane_count = limits->max_lane_count = + intel_dp->compliance.test_lane_count; + } } - return bpp; } -static bool intel_edp_compare_alt_mode(struct drm_display_mode *m1, - struct drm_display_mode *m2) +/* Optimize link config in order: max bpp, min clock, min lanes */ +static bool +intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + const struct link_config_limits *limits) { - bool bres = false; + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int bpp, clock, lane_count; + int mode_rate, link_clock, link_avail; - if (m1 && m2) - bres = (m1->hdisplay == m2->hdisplay && - m1->hsync_start == m2->hsync_start && - m1->hsync_end == m2->hsync_end && - m1->htotal == m2->htotal && - m1->vdisplay == m2->vdisplay && - m1->vsync_start == m2->vsync_start && - m1->vsync_end == m2->vsync_end && - m1->vtotal == m2->vtotal); - return bres; + for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { + mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, + bpp); + + for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { + for (lane_count = limits->min_lane_count; + lane_count <= limits->max_lane_count; + lane_count <<= 1) { + link_clock = intel_dp->common_rates[clock]; + link_avail = intel_dp_max_data_rate(link_clock, + lane_count); + + if (mode_rate <= link_avail) { + pipe_config->lane_count = lane_count; + pipe_config->pipe_bpp = bpp; + pipe_config->port_clock = link_clock; + + return true; + } + } + } + } + + return false; +} + +static bool +intel_dp_compute_link_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) +{ + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct link_config_limits limits; + int common_len; + + common_len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_link_rate); + + /* No common link rates between source and sink */ + WARN_ON(common_len <= 0); + + limits.min_clock = 0; + limits.max_clock = common_len - 1; + + limits.min_lane_count = 1; + limits.max_lane_count = intel_dp_max_lane_count(intel_dp); + + limits.min_bpp = 6 * 3; + limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); + + if (intel_dp_is_edp(intel_dp)) { + /* + * Use the maximum clock and number of lanes the eDP panel + * advertizes being capable of. The panels are generally + * designed to support only a single clock and lane + * configuration, and typically these values correspond to the + * native resolution of the panel. + */ + limits.min_lane_count = limits.max_lane_count; + limits.min_clock = limits.max_clock; + } + + intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); + + DRM_DEBUG_KMS("DP link computation with max lane count %i " + "max rate %d max bpp %d pixel clock %iKHz\n", + limits.max_lane_count, + intel_dp->common_rates[limits.max_clock], + limits.max_bpp, adjusted_mode->crtc_clock); + + /* + * Optimize for slow and wide. This is the place to add alternative + * optimization policy. + */ + if (!intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits)) + return false; + + DRM_DEBUG_KMS("DP lane count %d clock %d bpp %d\n", + pipe_config->lane_count, pipe_config->port_clock, + pipe_config->pipe_bpp); + + DRM_DEBUG_KMS("DP link rate required %i available %i\n", + intel_dp_link_required(adjusted_mode->crtc_clock, + pipe_config->pipe_bpp), + intel_dp_max_data_rate(pipe_config->port_clock, + pipe_config->lane_count)); + + return true; } bool @@ -1698,27 +1828,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); - int lane_count, clock; - int min_lane_count = 1; - int max_lane_count = intel_dp_max_lane_count(intel_dp); - /* Conveniently, the link BW constants become indices with a shift...*/ - int min_clock = 0; - int max_clock; - int bpp, mode_rate; - int link_avail, link_clock; - int common_len; - uint8_t link_bw, rate_select; bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_LIMITED_M_N); - common_len = intel_dp_common_len_rate_limit(intel_dp, - intel_dp->max_link_rate); - - /* No common link rates between source and sink */ - WARN_ON(common_len <= 0); - - max_clock = common_len - 1; - if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A) pipe_config->has_pch_encoder = true; @@ -1731,19 +1843,12 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_audio = intel_conn_state->force_audio == HDMI_AUDIO_ON; if (intel_dp_is_edp(intel_dp) && intel_connector->panel.fixed_mode) { - struct drm_display_mode *panel_mode = - intel_connector->panel.alt_fixed_mode; - struct drm_display_mode *req_mode = &pipe_config->base.mode; - - if (!intel_edp_compare_alt_mode(req_mode, panel_mode)) - panel_mode = intel_connector->panel.fixed_mode; - - drm_mode_debug_printmodeline(panel_mode); - - intel_fixed_panel_mode(panel_mode, adjusted_mode); + intel_fixed_panel_mode(intel_connector->panel.fixed_mode, + adjusted_mode); if (INTEL_GEN(dev_priv) >= 9) { int ret; + ret = skl_update_scaler_crtc(pipe_config); if (ret) return ret; @@ -1764,75 +1869,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; - /* Use values requested by Compliance Test Request */ - if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { - int index; - - /* Validate the compliance test data since max values - * might have changed due to link train fallback. - */ - if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, - intel_dp->compliance.test_lane_count)) { - index = intel_dp_rate_index(intel_dp->common_rates, - intel_dp->num_common_rates, - intel_dp->compliance.test_link_rate); - if (index >= 0) - min_clock = max_clock = index; - min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; - } - } - DRM_DEBUG_KMS("DP link computation with max lane count %i " - "max bw %d pixel clock %iKHz\n", - max_lane_count, intel_dp->common_rates[max_clock], - adjusted_mode->crtc_clock); - - /* Walk through all bpp values. Luckily they're all nicely spaced with 2 - * bpc in between. */ - bpp = intel_dp_compute_bpp(intel_dp, pipe_config); - if (intel_dp_is_edp(intel_dp)) { - - /* Get bpp from vbt only for panels that dont have bpp in edid */ - if (intel_connector->base.display_info.bpc == 0 && - (dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp)) { - DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", - dev_priv->vbt.edp.bpp); - bpp = dev_priv->vbt.edp.bpp; - } - - /* - * Use the maximum clock and number of lanes the eDP panel - * advertizes being capable of. The panels are generally - * designed to support only a single clock and lane - * configuration, and typically these values correspond to the - * native resolution of the panel. - */ - min_lane_count = max_lane_count; - min_clock = max_clock; - } - - for (; bpp >= 6*3; bpp -= 2*3) { - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - bpp); - - for (clock = min_clock; clock <= max_clock; clock++) { - for (lane_count = min_lane_count; - lane_count <= max_lane_count; - lane_count <<= 1) { - - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - goto found; - } - } - } - } - - return false; + if (!intel_dp_compute_link_config(encoder, pipe_config)) + return false; -found: if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* * See: @@ -1840,7 +1879,7 @@ found: * VESA DisplayPort Ver.1.2a - 5.1.1.1 Video Colorimetry */ pipe_config->limited_color_range = - bpp != 18 && + pipe_config->pipe_bpp != 18 && drm_default_rgb_quant_range(adjusted_mode) == HDMI_QUANTIZATION_RANGE_LIMITED; } else { @@ -1848,21 +1887,7 @@ found: intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; } - pipe_config->lane_count = lane_count; - - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = intel_dp->common_rates[clock]; - - intel_dp_compute_rate(intel_dp, pipe_config->port_clock, - &link_bw, &rate_select); - - DRM_DEBUG_KMS("DP link bw %02x rate select %02x lane count %d clock %d bpp %d\n", - link_bw, rate_select, pipe_config->lane_count, - pipe_config->port_clock, bpp); - DRM_DEBUG_KMS("DP link bw required %i available %i\n", - mode_rate, link_avail); - - intel_link_compute_m_n(bpp, lane_count, + intel_link_compute_m_n(pipe_config->pipe_bpp, pipe_config->lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, @@ -1871,31 +1896,12 @@ found: if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { pipe_config->has_drrs = true; - intel_link_compute_m_n(bpp, lane_count, - intel_connector->panel.downclock_mode->clock, - pipe_config->port_clock, - &pipe_config->dp_m2_n2, - reduce_m_n); - } - - /* - * DPLL0 VCO may need to be adjusted to get the correct - * clock for eDP. This will affect cdclk as well. - */ - if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { - int vco; - - switch (pipe_config->port_clock / 2) { - case 108000: - case 216000: - vco = 8640000; - break; - default: - vco = 8100000; - break; - } - - to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; + intel_link_compute_m_n(pipe_config->pipe_bpp, + pipe_config->lane_count, + intel_connector->panel.downclock_mode->clock, + pipe_config->port_clock, + &pipe_config->dp_m2_n2, + reduce_m_n); } if (!HAS_DDI(dev_priv)) @@ -6128,7 +6134,6 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_connector *connector = &intel_connector->base; struct drm_display_mode *fixed_mode = NULL; - struct drm_display_mode *alt_fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; bool has_dpcd; struct drm_display_mode *scan; @@ -6183,14 +6188,13 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, } intel_connector->edid = edid; - /* prefer fixed mode from EDID if available, save an alt mode also */ + /* prefer fixed mode from EDID if available */ list_for_each_entry(scan, &connector->probed_modes, head) { if ((scan->type & DRM_MODE_TYPE_PREFERRED)) { fixed_mode = drm_mode_duplicate(dev, scan); downclock_mode = intel_dp_drrs_init( intel_connector, fixed_mode); - } else if (!alt_fixed_mode) { - alt_fixed_mode = drm_mode_duplicate(dev, scan); + break; } } @@ -6227,8 +6231,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, pipe_name(pipe)); } - intel_panel_init(&intel_connector->panel, fixed_mode, alt_fixed_mode, - downclock_mode); + intel_panel_init(&intel_connector->panel, fixed_mode, downclock_mode); intel_connector->panel.backlight.power = intel_edp_backlight_power; intel_panel_setup_backlight(connector, pipe); diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index f59b59bb0a21..3fcaa98b9055 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -139,6 +139,11 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) intel_dp_compute_rate(intel_dp, intel_dp->link_rate, &link_bw, &rate_select); + if (link_bw) + DRM_DEBUG_KMS("Using LINK_BW_SET value %02x\n", link_bw); + else + DRM_DEBUG_KMS("Using LINK_RATE_SET value %02x\n", rate_select); + /* Write the link configuration data */ link_config[0] = link_bw; link_config[1] = intel_dp->lane_count; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index d5e114e9660b..383fbc15113d 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2218,6 +2218,7 @@ cnl_ddi_calculate_wrpll(int clock, struct skl_wrpll_params *wrpll_params) { u32 afe_clock = clock * 5; + uint32_t ref_clock; u32 dco_min = 7998000; u32 dco_max = 10000000; u32 dco_mid = (dco_min + dco_max) / 2; @@ -2250,8 +2251,17 @@ cnl_ddi_calculate_wrpll(int clock, cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); - cnl_wrpll_params_populate(wrpll_params, best_dco, - dev_priv->cdclk.hw.ref, pdiv, qdiv, kdiv); + ref_clock = dev_priv->cdclk.hw.ref; + + /* + * For ICL, the spec states: if reference frequency is 38.4, use 19.2 + * because the DPLL automatically divides that by 2. + */ + if (IS_ICELAKE(dev_priv) && ref_clock == 38400) + ref_clock = 19200; + + cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, pdiv, qdiv, + kdiv); return true; } @@ -2399,6 +2409,644 @@ static const struct intel_dpll_mgr cnl_pll_mgr = { .dump_hw_state = cnl_dump_hw_state, }; +/* + * These values alrea already adjusted: they're the bits we write to the + * registers, not the logical values. + */ +static const struct skl_wrpll_params icl_dp_combo_pll_24MHz_values[] = { + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x195, .dco_fraction = 0x0000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +/* Also used for 38.4 MHz values. */ +static const struct skl_wrpll_params icl_dp_combo_pll_19_2MHz_values[] = { + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1FA, .dco_fraction = 0x2000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +static bool icl_calc_dp_combo_pll(struct drm_i915_private *dev_priv, int clock, + struct skl_wrpll_params *pll_params) +{ + const struct skl_wrpll_params *params; + + params = dev_priv->cdclk.hw.ref == 24000 ? + icl_dp_combo_pll_24MHz_values : + icl_dp_combo_pll_19_2MHz_values; + + switch (clock) { + case 540000: + *pll_params = params[0]; + break; + case 270000: + *pll_params = params[1]; + break; + case 162000: + *pll_params = params[2]; + break; + case 324000: + *pll_params = params[3]; + break; + case 216000: + *pll_params = params[4]; + break; + case 432000: + *pll_params = params[5]; + break; + case 648000: + *pll_params = params[6]; + break; + case 810000: + *pll_params = params[7]; + break; + default: + MISSING_CASE(clock); + return false; + } + + return true; +} + +static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + uint32_t cfgcr0, cfgcr1; + struct skl_wrpll_params pll_params = { 0 }; + bool ret; + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + ret = cnl_ddi_calculate_wrpll(clock, dev_priv, &pll_params); + else + ret = icl_calc_dp_combo_pll(dev_priv, clock, &pll_params); + + if (!ret) + return false; + + cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params.dco_fraction) | + pll_params.dco_integer; + + cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) | + DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) | + DPLL_CFGCR1_KDIV(pll_params.kdiv) | + DPLL_CFGCR1_PDIV(pll_params.pdiv) | + DPLL_CFGCR1_CENTRAL_FREQ_8400; + + pll_state->cfgcr0 = cfgcr0; + pll_state->cfgcr1 = cfgcr1; + return true; +} + +static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +{ + return id - DPLL_ID_ICL_MGPLL1 + PORT_C; +} + +static enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +{ + return port - PORT_C + DPLL_ID_ICL_MGPLL1; +} + +static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, + uint32_t *target_dco_khz, + struct intel_dpll_hw_state *state) +{ + uint32_t dco_min_freq, dco_max_freq; + int div1_vals[] = {7, 5, 3, 2}; + unsigned int i; + int div2; + + dco_min_freq = is_dp ? 8100000 : use_ssc ? 8000000 : 7992000; + dco_max_freq = is_dp ? 8100000 : 10000000; + + for (i = 0; i < ARRAY_SIZE(div1_vals); i++) { + int div1 = div1_vals[i]; + + for (div2 = 10; div2 > 0; div2--) { + int dco = div1 * div2 * clock_khz * 5; + int a_divratio, tlinedrv, inputsel, hsdiv; + + if (dco < dco_min_freq || dco > dco_max_freq) + continue; + + if (div2 >= 2) { + a_divratio = is_dp ? 10 : 5; + tlinedrv = 2; + } else { + a_divratio = 5; + tlinedrv = 0; + } + inputsel = is_dp ? 0 : 1; + + switch (div1) { + default: + MISSING_CASE(div1); + case 2: + hsdiv = 0; + break; + case 3: + hsdiv = 1; + break; + case 5: + hsdiv = 2; + break; + case 7: + hsdiv = 3; + break; + } + + *target_dco_khz = dco; + + state->mg_refclkin_ctl = MG_REFCLKIN_CTL_OD_2_MUX(1); + + state->mg_clktop2_coreclkctl1 = + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(a_divratio); + + state->mg_clktop2_hsclkctl = + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(tlinedrv) | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(inputsel) | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(hsdiv) | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(div2); + + return true; + } + } + + return false; +} + +/* + * The specification for this function uses real numbers, so the math had to be + * adapted to integer-only calculation, that's why it looks so different. + */ +static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int refclk_khz = dev_priv->cdclk.hw.ref; + uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; + uint32_t iref_ndiv, iref_trim, iref_pulse_w; + uint32_t prop_coeff, int_coeff; + uint32_t tdc_targetcnt, feedfwgain; + uint64_t ssc_stepsize, ssc_steplen, ssc_steplog; + uint64_t tmp; + bool use_ssc = false; + bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); + + if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, + pll_state)) { + DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); + return false; + } + + m1div = 2; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + m1div = 4; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", + clock); + return false; + } + } + m2div_rem = dco_khz % (refclk_khz * m1div); + + tmp = (uint64_t)m2div_rem * (1 << 22); + do_div(tmp, refclk_khz * m1div); + m2div_frac = tmp; + + switch (refclk_khz) { + case 19200: + iref_ndiv = 1; + iref_trim = 28; + iref_pulse_w = 1; + break; + case 24000: + iref_ndiv = 1; + iref_trim = 25; + iref_pulse_w = 2; + break; + case 38400: + iref_ndiv = 2; + iref_trim = 28; + iref_pulse_w = 1; + break; + default: + MISSING_CASE(refclk_khz); + return false; + } + + /* + * tdc_res = 0.000003 + * tdc_targetcnt = int(2 / (tdc_res * 8 * 50 * 1.1) / refclk_mhz + 0.5) + * + * The multiplication by 1000 is due to refclk MHz to KHz conversion. It + * was supposed to be a division, but we rearranged the operations of + * the formula to avoid early divisions so we don't multiply the + * rounding errors. + * + * 0.000003 * 8 * 50 * 1.1 = 0.00132, also known as 132 / 100000, which + * we also rearrange to work with integers. + * + * The 0.5 transformed to 5 results in a multiplication by 10 and the + * last division by 10. + */ + tdc_targetcnt = (2 * 1000 * 100000 * 10 / (132 * refclk_khz) + 5) / 10; + + /* + * Here we divide dco_khz by 10 in order to allow the dividend to fit in + * 32 bits. That's not a problem since we round the division down + * anyway. + */ + feedfwgain = (use_ssc || m2div_rem > 0) ? + m1div * 1000000 * 100 / (dco_khz * 3 / 10) : 0; + + if (dco_khz >= 9000000) { + prop_coeff = 5; + int_coeff = 10; + } else { + prop_coeff = 4; + int_coeff = 8; + } + + if (use_ssc) { + tmp = (uint64_t)dco_khz * 47 * 32; + do_div(tmp, refclk_khz * m1div * 10000); + ssc_stepsize = tmp; + + tmp = (uint64_t)dco_khz * 1000; + ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); + } else { + ssc_stepsize = 0; + ssc_steplen = 0; + } + ssc_steplog = 4; + + pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | + MG_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | + MG_PLL_DIV1_DITHER_DIV_2 | + MG_PLL_DIV1_NDIVRATIO(1) | + MG_PLL_DIV1_FBPREDIV(m1div); + + pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | + MG_PLL_LF_AFCCNTSEL_512 | + MG_PLL_LF_GAINCTRL(1) | + MG_PLL_LF_INT_COEFF(int_coeff) | + MG_PLL_LF_PROP_COEFF(prop_coeff); + + pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | + MG_PLL_FRAC_LOCK_DCODITHEREN | + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); + if (use_ssc || m2div_rem > 0) + pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; + + pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | + MG_PLL_SSC_TYPE(2) | + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | + MG_PLL_SSC_STEPNUM(ssc_steplog) | + MG_PLL_SSC_FLLEN | + MG_PLL_SSC_STEPSIZE(ssc_stepsize); + + pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART; + + if (refclk_khz != 38400) { + pll_state->mg_pll_tdc_coldst_bias |= + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); + + pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); + } + + return true; +} + +static struct intel_shared_dpll * +icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + struct intel_shared_dpll *pll; + struct intel_dpll_hw_state pll_state = {}; + enum port port = encoder->port; + enum intel_dpll_id min, max; + int clock = crtc_state->port_clock; + bool ret; + + switch (port) { + case PORT_A: + case PORT_B: + min = DPLL_ID_ICL_DPLL0; + max = DPLL_ID_ICL_DPLL1; + ret = icl_calc_dpll_state(crtc_state, encoder, clock, + &pll_state); + break; + case PORT_C: + case PORT_D: + case PORT_E: + case PORT_F: + min = icl_port_to_mg_pll_id(port); + max = min; + ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, + &pll_state); + break; + default: + MISSING_CASE(port); + return NULL; + } + + if (!ret) { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + return NULL; + } + + crtc_state->dpll_hw_state = pll_state; + + pll = intel_find_shared_dpll(crtc, crtc_state, min, max); + if (!pll) { + DRM_DEBUG_KMS("No PLL selected\n"); + return NULL; + } + + intel_reference_shared_dpll(pll, crtc_state); + + return pll; +} + +static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) +{ + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return CNL_DPLL_ENABLE(id); + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + } +} + +static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + const enum intel_dpll_id id = pll->info->id; + uint32_t val; + enum port port; + bool ret = false; + + if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + return false; + + val = I915_READ(icl_pll_id_to_enable_reg(id)); + if (!(val & PLL_ENABLE)) + goto out; + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + port = icl_mg_pll_id_to_port(id); + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + hw_state->mg_clktop2_coreclkctl1 = + I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + hw_state->mg_clktop2_hsclkctl = + I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); + hw_state->mg_pll_tdc_coldst_bias = + I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + break; + default: + MISSING_CASE(id); + } + + ret = true; +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + return ret; +} + +static void icl_dpll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(ICL_DPLL_CFGCR0(id), hw_state->cfgcr0); + I915_WRITE(ICL_DPLL_CFGCR1(id), hw_state->cfgcr1); + POSTING_READ(ICL_DPLL_CFGCR1(id)); +} + +static void icl_mg_pll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + enum port port = icl_mg_pll_id_to_port(pll->info->id); + + I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl); + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), + hw_state->mg_clktop2_coreclkctl1); + I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), hw_state->mg_clktop2_hsclkctl); + I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); + I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); + I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); + I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); + I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); + I915_WRITE(MG_PLL_BIAS(port), hw_state->mg_pll_bias); + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), + hw_state->mg_pll_tdc_coldst_bias); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); +} + +static void icl_pll_enable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + val = I915_READ(enable_reg); + val |= PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, + PLL_POWER_STATE, 1)) + DRM_ERROR("PLL %d Power not enabled\n", id); + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + icl_dpll_write(dev_priv, pll); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + icl_mg_pll_write(dev_priv, pll); + break; + default: + MISSING_CASE(id); + } + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val |= PLL_ENABLE; + I915_WRITE(enable_reg, val); + + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, PLL_LOCK, + 1)) /* 600us actually. */ + DRM_ERROR("PLL %d not locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ +} + +static void icl_pll_disable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + /* The first steps are done by intel_ddi_post_disable(). */ + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val &= ~PLL_ENABLE; + I915_WRITE(enable_reg, val); + + /* Timeout is actually 1us. */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, 0, 1)) + DRM_ERROR("PLL %d locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ + + val = I915_READ(enable_reg); + val &= ~PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, 0, + 1)) + DRM_ERROR("PLL %d Power not disabled\n", id); +} + +static void icl_dump_hw_state(struct drm_i915_private *dev_priv, + struct intel_dpll_hw_state *hw_state) +{ + DRM_DEBUG_KMS("dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, " + "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, " + "mg_clktop2_hsclkctl: 0x%x, mg_pll_div0: 0x%x, " + "mg_pll_div2: 0x%x, mg_pll_lf: 0x%x, " + "mg_pll_frac_lock: 0x%x, mg_pll_ssc: 0x%x, " + "mg_pll_bias: 0x%x, mg_pll_tdc_coldst_bias: 0x%x\n", + hw_state->cfgcr0, hw_state->cfgcr1, + hw_state->mg_refclkin_ctl, + hw_state->mg_clktop2_coreclkctl1, + hw_state->mg_clktop2_hsclkctl, + hw_state->mg_pll_div0, + hw_state->mg_pll_div1, + hw_state->mg_pll_lf, + hw_state->mg_pll_frac_lock, + hw_state->mg_pll_ssc, + hw_state->mg_pll_bias, + hw_state->mg_pll_tdc_coldst_bias); +} + +static const struct intel_shared_dpll_funcs icl_pll_funcs = { + .enable = icl_pll_enable, + .disable = icl_pll_disable, + .get_hw_state = icl_pll_get_hw_state, +}; + +static const struct dpll_info icl_plls[] = { + { "DPLL 0", &icl_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, + { "DPLL 1", &icl_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "MG PLL 1", &icl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, + { "MG PLL 2", &icl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, + { "MG PLL 3", &icl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, + { "MG PLL 4", &icl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, + { }, +}; + +static const struct intel_dpll_mgr icl_pll_mgr = { + .dpll_info = icl_plls, + .get_dpll = icl_get_dpll, + .dump_hw_state = icl_dump_hw_state, +}; + /** * intel_shared_dpll_init - Initialize shared DPLLs * @dev: drm device @@ -2412,7 +3060,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dpll_mgr = &icl_pll_mgr; + else if (IS_CANNONLAKE(dev_priv)) dpll_mgr = &cnl_pll_mgr; else if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index 4febfaa90bde..7a0cd564a9ee 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -103,6 +103,32 @@ enum intel_dpll_id { * @DPLL_ID_SKL_DPLL3: SKL and later DPLL3 */ DPLL_ID_SKL_DPLL3 = 3, + + + /** + * @DPLL_ID_ICL_DPLL0: ICL combo PHY DPLL0 + */ + DPLL_ID_ICL_DPLL0 = 0, + /** + * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1 + */ + DPLL_ID_ICL_DPLL1 = 1, + /** + * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C) + */ + DPLL_ID_ICL_MGPLL1 = 2, + /** + * @DPLL_ID_ICL_MGPLL2: ICL MG PLL 1 port 2 (D) + */ + DPLL_ID_ICL_MGPLL2 = 3, + /** + * @DPLL_ID_ICL_MGPLL3: ICL MG PLL 1 port 3 (E) + */ + DPLL_ID_ICL_MGPLL3 = 4, + /** + * @DPLL_ID_ICL_MGPLL4: ICL MG PLL 1 port 4 (F) + */ + DPLL_ID_ICL_MGPLL4 = 5, }; #define I915_NUM_PLLS 6 @@ -135,6 +161,21 @@ struct intel_dpll_hw_state { /* bxt */ uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; + + /* + * ICL uses the following, already defined: + * uint32_t cfgcr0, cfgcr1; + */ + uint32_t mg_refclkin_ctl; + uint32_t mg_clktop2_coreclkctl1; + uint32_t mg_clktop2_hsclkctl; + uint32_t mg_pll_div0; + uint32_t mg_pll_div1; + uint32_t mg_pll_lf; + uint32_t mg_pll_frac_lock; + uint32_t mg_pll_ssc; + uint32_t mg_pll_bias; + uint32_t mg_pll_tdc_coldst_bias; }; /** diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3c7921ecf6f4..3e850b745390 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -56,6 +56,8 @@ for (;;) { \ const bool expired__ = ktime_after(ktime_get_raw(), end__); \ OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret__ = 0; \ break; \ @@ -96,6 +98,8 @@ u64 now = local_clock(); \ if (!(ATOMIC)) \ preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret = 0; \ break; \ @@ -140,6 +144,10 @@ #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) +#define KBps(x) (1000 * (x)) +#define MBps(x) KBps(1000 * (x)) +#define GBps(x) ((u64)1000 * MBps((x))) + /* * Display related stuff */ @@ -269,7 +277,6 @@ struct intel_encoder { struct intel_panel { struct drm_display_mode *fixed_mode; - struct drm_display_mode *alt_fixed_mode; struct drm_display_mode *downclock_mode; /* backlight */ @@ -882,6 +889,7 @@ struct intel_crtc_state { /* bitmask of visible planes (enum plane_id) */ u8 active_planes; + u8 nv12_planes; /* HDMI scrambling status */ bool hdmi_scrambling; @@ -1330,6 +1338,9 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv); void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit); void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); @@ -1399,6 +1410,12 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, bool enable); +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, int plane, unsigned int height); @@ -1581,8 +1598,6 @@ void bxt_enable_dc9(struct drm_i915_private *dev_priv); void bxt_disable_dc9(struct drm_i915_private *dev_priv); void gen9_enable_dc5(struct drm_i915_private *dev_priv); unsigned int skl_cdclk_get_vco(unsigned int freq); -void skl_enable_dc6(struct drm_i915_private *dev_priv); -void skl_disable_dc6(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n); @@ -1835,7 +1850,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv); /* intel_panel.c */ int intel_panel_init(struct intel_panel *panel, struct drm_display_mode *fixed_mode, - struct drm_display_mode *alt_fixed_mode, struct drm_display_mode *downclock_mode); void intel_panel_fini(struct intel_panel *panel); void intel_fixed_panel_mode(const struct drm_display_mode *fixed_mode, @@ -1902,6 +1916,8 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug); +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); @@ -1925,6 +1941,8 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices); static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) @@ -2063,6 +2081,8 @@ bool skl_plane_get_hw_state(struct intel_plane *plane); bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id); bool intel_format_is_yuv(uint32_t format); +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 51a1d6868b1e..cf39ca90d887 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1846,7 +1846,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) connector->display_info.width_mm = fixed_mode->width_mm; connector->display_info.height_mm = fixed_mode->height_mm; - intel_panel_init(&intel_connector->panel, fixed_mode, NULL, NULL); + intel_panel_init(&intel_connector->panel, fixed_mode, NULL); intel_panel_setup_backlight(connector, INVALID_PIPE); intel_dsi_add_properties(intel_connector); diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 91c07b0c8db9..4d6ffa7b3e7b 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -647,6 +647,11 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) /* prepare count */ prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); + if (prepare_cnt > PREPARE_CNT_MAX) { + DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt); + prepare_cnt = PREPARE_CNT_MAX; + } + /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, @@ -662,32 +667,29 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num) exit_zero_cnt += 1; + if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt); + exit_zero_cnt = EXIT_ZERO_CNT_MAX; + } + /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( (tclk_prepare_clkzero - ths_prepare_ns) * ui_den, ui_num * mul); + if (clk_zero_cnt > CLK_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt); + clk_zero_cnt = CLK_ZERO_CNT_MAX; + } + /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); - if (prepare_cnt > PREPARE_CNT_MAX || - exit_zero_cnt > EXIT_ZERO_CNT_MAX || - clk_zero_cnt > CLK_ZERO_CNT_MAX || - trail_cnt > TRAIL_CNT_MAX) - DRM_DEBUG_DRIVER("Values crossing maximum limits, restricting to max values\n"); - - if (prepare_cnt > PREPARE_CNT_MAX) - prepare_cnt = PREPARE_CNT_MAX; - - if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) - exit_zero_cnt = EXIT_ZERO_CNT_MAX; - - if (clk_zero_cnt > CLK_ZERO_CNT_MAX) - clk_zero_cnt = CLK_ZERO_CNT_MAX; - - if (trail_cnt > TRAIL_CNT_MAX) + if (trail_cnt > TRAIL_CNT_MAX) { + DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt); trail_cnt = TRAIL_CNT_MAX; + } /* B080 */ intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 | diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index eb0c559b2715..a70d767313aa 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -536,7 +536,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) */ intel_panel_init(&intel_connector->panel, intel_dvo_get_current_mode(intel_encoder), - NULL, NULL); + NULL); intel_dvo->panel_wants_dither = true; } diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1a8370779bbb..1590375f31cb 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -306,7 +306,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; - spin_lock_init(&engine->stats.lock); + seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -451,11 +451,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } -static void intel_engine_init_timeline(struct intel_engine_cs *engine) -{ - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; -} - static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { i915_gem_batch_pool_init(&engine->batch_pool, engine); @@ -463,21 +458,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* - * IOMMU adds unpredictable latency causing the CSB write (from the - * GPU into the HWSP) to only be visible some time after the interrupt - * (missed breadcrumb syndrome). - */ - if (intel_vtd_active()) - return true; - /* Older GVT emulation depends upon intercepting CSB mmio */ if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) return true; - if (IS_CANNONLAKE(i915)) - return true; - return false; } @@ -507,8 +491,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { + i915_timeline_init(engine->i915, &engine->timeline, engine->name); + intel_engine_init_execlist(engine); - intel_engine_init_timeline(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); @@ -541,8 +526,6 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) goto err_unref; engine->scratch = vma; - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unref: @@ -636,9 +619,6 @@ static int init_status_page(struct intel_engine_cs *engine) engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); - - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unpin: @@ -690,7 +670,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = engine->context_pin(engine, engine->i915->kernel_context); + ring = intel_context_pin(engine->i915->kernel_context, engine); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -699,8 +679,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * we can interrupt the engine at any time. */ if (engine->i915->preempt_context) { - ring = engine->context_pin(engine, - engine->i915->preempt_context); + ring = intel_context_pin(engine->i915->preempt_context, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto err_unpin_kernel; @@ -724,9 +703,9 @@ err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); + intel_context_unpin(engine->i915->preempt_context, engine); err_unpin_kernel: - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->kernel_context, engine); return ret; } @@ -754,8 +733,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) i915_gem_object_put(engine->default_state); if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->preempt_context, engine); + intel_context_unpin(engine->i915->kernel_context, engine); + + i915_timeline_fini(&engine->timeline); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) @@ -1008,7 +989,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline->last_request); + rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) return rq->ctx == kernel_context; else @@ -1081,6 +1062,8 @@ void intel_engines_unpark(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { if (engine->unpark) engine->unpark(engine); + + intel_engine_init_hangcheck(engine); } } @@ -1113,17 +1096,35 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) return which; } +static int print_sched_attr(struct drm_i915_private *i915, + const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { const char *name = rq->fence.ops->get_timeline_name(&rq->fence); + char buf[80] = ""; + int x = 0; - drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, + x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", + prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", rq->fence.context, rq->fence.seqno, - rq->priotree.priority, + buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); } @@ -1266,8 +1267,9 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, rq: ", - idx, count); + "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + idx, count, + i915_ggtt_offset(rq->ring->vma)); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1289,11 +1291,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { + const int MAX_REQUESTS_TO_SHOW = 8; struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq; + struct i915_request *rq, *last; struct rb_node *rb; + int count; if (header) { va_list ap; @@ -1306,12 +1310,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1320,14 +1323,14 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline->requests, + rq = list_first_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->timeline->requests, + rq = list_last_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); @@ -1359,18 +1362,49 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry(rq, &engine->timeline->requests, link) - print_request(m, rq, "\t\tE "); + spin_lock_irq(&engine->timeline.lock); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = execlists->first; rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - list_for_each_entry(rq, &p->requests, priotree.link) - print_request(m, rq, "\t\tQ "); + list_for_each_entry(rq, &p->requests, sched.link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tQ "); + else + last = rq; + } } - spin_unlock_irq(&engine->timeline->lock); + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tQ "); + } + + spin_unlock_irq(&engine->timeline.lock); spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -1435,7 +1469,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) return -ENODEV; tasklet_disable(&execlists->tasklet); - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1459,7 +1493,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); tasklet_enable(&execlists->tasklet); return err; @@ -1488,12 +1522,13 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) { + unsigned int seq; ktime_t total; - unsigned long flags; - spin_lock_irqsave(&engine->stats.lock, flags); - total = __intel_engine_get_busy_time(engine); - spin_unlock_irqrestore(&engine->stats.lock, flags); + do { + seq = read_seqbegin(&engine->stats.lock); + total = __intel_engine_get_busy_time(engine); + } while (read_seqretry(&engine->stats.lock, seq)); return total; } @@ -1511,13 +1546,13 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); WARN_ON_ONCE(engine->stats.enabled == 0); if (--engine->stats.enabled == 0) { engine->stats.total = __intel_engine_get_busy_time(engine); engine->stats.active = 0; } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index a00a59a7d9ec..116f4ccf1bbd 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -203,26 +203,6 @@ void intel_guc_fini(struct intel_guc *guc) guc_shared_data_destroy(guc); } -static u32 get_gt_type(struct drm_i915_private *dev_priv) -{ - /* XXX: GT type based on PCI device ID? field seems unused by fw */ - return 0; -} - -static u32 get_core_family(struct drm_i915_private *dev_priv) -{ - u32 gen = INTEL_GEN(dev_priv); - - switch (gen) { - case 9: - return GUC_CORE_FAMILY_GEN9; - - default: - MISSING_CASE(gen); - return GUC_CORE_FAMILY_UNKNOWN; - } -} - static u32 get_log_control_flags(void) { u32 level = i915_modparams.guc_log_level; @@ -255,10 +235,6 @@ void intel_guc_init_params(struct intel_guc *guc) memset(params, 0, sizeof(params)); - params[GUC_CTL_DEVICE_INFO] |= - (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | - (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); - /* * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one * second. This ARAR is calculated by: diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index 334cb5202e1c..dcaa3fb71765 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -121,7 +121,8 @@ int intel_guc_ads_create(struct intel_guc *guc) * to find it. Note that we have to skip our header (1 page), * because our GuC shared data is there. */ - kernel_ctx_vma = dev_priv->kernel_context->engine[RCS].state; + kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, + dev_priv->engine[RCS])->state; blob->ads.golden_context_lrca = intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index d73673f5d30c..0867ba76d445 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -23,9 +23,6 @@ #ifndef _INTEL_GUC_FWIF_H #define _INTEL_GUC_FWIF_H -#define GUC_CORE_FAMILY_GEN9 12 -#define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff - #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 @@ -82,8 +79,6 @@ #define GUC_CTL_ARAT_LOW 2 #define GUC_CTL_DEVICE_INFO 3 -#define GUC_CTL_GT_TYPE_SHIFT 0 -#define GUC_CTL_CORE_FAMILY_SHIFT 7 #define GUC_CTL_LOG_PARAMS 4 #define GUC_LOG_VALID (1 << 0) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 97121230656c..2feb65096966 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -124,9 +124,17 @@ static int reserve_doorbell(struct intel_guc_client *client) return 0; } +static bool has_doorbell(struct intel_guc_client *client) +{ + if (client->doorbell_id == GUC_DOORBELL_INVALID) + return false; + + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); +} + static void unreserve_doorbell(struct intel_guc_client *client) { - GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); + GEM_BUG_ON(!has_doorbell(client)); __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); client->doorbell_id = GUC_DOORBELL_INVALID; @@ -184,14 +192,6 @@ static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) return client->vaddr + client->doorbell_offset; } -static bool has_doorbell(struct intel_guc_client *client) -{ - if (client->doorbell_id == GUC_DOORBELL_INVALID) - return false; - - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); -} - static void __create_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *doorbell; @@ -207,7 +207,6 @@ static void __destroy_doorbell(struct intel_guc_client *client) struct guc_doorbell_info *doorbell; u16 db_id = client->doorbell_id; - doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_DISABLED; doorbell->cookie = 0; @@ -224,6 +223,9 @@ static int create_doorbell(struct intel_guc_client *client) { int ret; + if (WARN_ON(!has_doorbell(client))) + return -ENODEV; /* internal setup error, should never happen */ + __update_doorbell_desc(client, client->doorbell_id); __create_doorbell(client); @@ -362,7 +364,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -659,7 +661,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.attr.priority; } static inline int port_prio(const struct execlist_port *port) @@ -667,7 +669,7 @@ static inline int port_prio(const struct execlist_port *port) return rq_prio(port_request(port)); } -static void guc_dequeue(struct intel_engine_cs *engine) +static bool __guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -677,7 +679,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - spin_lock_irq(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); + rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -692,13 +695,13 @@ static void guc_dequeue(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_PREEMPT); queue_work(engine->i915->guc.preempt_wq, &preempt_work->work); - goto unlock; + return false; } } port++; if (port_isset(port)) - goto unlock; + return false; } GEM_BUG_ON(port_isset(port)); @@ -706,11 +709,11 @@ static void guc_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { if (last && rq->ctx != last->ctx) { if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -719,7 +722,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) port++; } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); @@ -736,19 +739,34 @@ static void guc_dequeue(struct intel_engine_cs *engine) done: execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; - if (submit) { + if (submit) port_assign(port, last); + if (last) execlists_user_begin(execlists, execlists->port); - guc_submit(engine); - } /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); + return submit; +} + +static void guc_dequeue(struct intel_engine_cs *engine) +{ + unsigned long flags; + bool submit; + + local_irq_save(flags); + + spin_lock(&engine->timeline.lock); + submit = __guc_dequeue(engine); + spin_unlock(&engine->timeline.lock); + + if (submit) + guc_submit(engine); + + local_irq_restore(flags); } static void guc_submission_tasklet(unsigned long data) @@ -990,7 +1008,8 @@ static void guc_fill_preempt_context(struct intel_guc *guc) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &client->owner->engine[id]; + struct intel_context *ce = + to_intel_context(client->owner, engine); u32 addr = intel_hws_preempt_done_address(engine); u32 *cs; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index fd0ffb8328d0..d47e346bd49e 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -356,7 +356,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, break; case ENGINE_DEAD: - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer("hangcheck"); intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -452,6 +452,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + engine->hangcheck.action_timestamp = jiffies; } void intel_hangcheck_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c7c85134a84a..15434cad5430 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -177,7 +177,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.attr.priority; } static inline bool need_preempt(const struct intel_engine_cs *engine, @@ -185,7 +185,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, int prio) { return (intel_engine_has_preemption(engine) && - __execlists_need_preempt(prio, rq_prio(last))); + __execlists_need_preempt(prio, rq_prio(last)) && + !i915_request_completed(last)); } /** @@ -223,7 +224,7 @@ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -257,9 +258,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, } static struct i915_priolist * -lookup_priolist(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +lookup_priolist(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; @@ -330,10 +329,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, + &engine->timeline.requests, link) { if (i915_request_completed(rq)) return; @@ -344,10 +343,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != last_prio) { last_prio = rq_prio(rq); - p = lookup_priolist(engine, &rq->priotree, last_prio); + p = lookup_priolist(engine, last_prio); } - list_add(&rq->priotree.link, &p->requests); + GEM_BUG_ON(p->priority != rq_prio(rq)); + list_add(&rq->sched.link, &p->requests); } } @@ -356,10 +356,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_irq(&engine->timeline->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline->lock); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static inline void @@ -397,10 +400,11 @@ execlists_context_schedule_in(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq) +execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + execlists_context_status_change(rq, status); + trace_i915_request_out(rq); } static void @@ -414,7 +418,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; + struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -523,7 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = - &engine->i915->preempt_context->engine[engine->id]; + to_intel_context(engine->i915->preempt_context, engine); unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -552,7 +556,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void execlists_dequeue(struct intel_engine_cs *engine) +static bool __execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -562,6 +566,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; + lockdep_assert_held(&engine->timeline.lock); + /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -583,7 +589,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -598,7 +603,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) - goto unlock; + return false; /* * If we write to ELSP a second time before the HW has had @@ -608,11 +613,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the HW to indicate that it has had a chance to respond. */ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - goto unlock; + return false; if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - goto unlock; + return false; } /* @@ -637,7 +642,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * priorities of the ports haven't been switch. */ if (port_count(&port[1])) - goto unlock; + return false; /* * WaIdleLiteRestore:bdw,skl @@ -654,7 +659,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -674,7 +679,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -688,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (ctx_single_port_submission(last->ctx) || ctx_single_port_submission(rq->ctx)) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -701,7 +706,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); last = rq; @@ -742,13 +747,25 @@ done: /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); - - if (submit) { + /* Re-evaluate the executing context setup after each preemptive kick */ + if (last) execlists_user_begin(execlists, execlists->port); + + return submit; +} + +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; + bool submit; + + spin_lock_irqsave(&engine->timeline.lock, flags); + submit = __execlists_dequeue(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + if (submit) execlists_submit_ports(engine); - } GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); @@ -771,12 +788,10 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) intel_engine_get_seqno(rq->engine)); GEM_BUG_ON(!execlists->active); - intel_engine_context_out(rq->engine); - - execlists_context_status_change(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); + execlists_context_schedule_out(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_request_put(rq); @@ -789,22 +804,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) static void clear_gtiir(struct intel_engine_cs *engine) { - static const u8 gtiir[] = { - [RCS] = 0, - [BCS] = 0, - [VCS] = 1, - [VCS2] = 1, - [VECS] = 3, - }; struct drm_i915_private *dev_priv = engine->i915; int i; - /* TODO: correctly reset irqs for gen11 */ - if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11)) - return; - - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); - /* * Clear any pending interrupt state. * @@ -812,13 +814,50 @@ static void clear_gtiir(struct intel_engine_cs *engine) * double buffered, and so if we only reset it once there may * still be an interrupt pending. */ - for (i = 0; i < 2; i++) { - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + if (INTEL_GEN(dev_priv) >= 11) { + static const struct { + u8 bank; + u8 bit; + } gen11_gtiir[] = { + [RCS] = {0, GEN11_RCS0}, + [BCS] = {0, GEN11_BCS}, + [_VCS(0)] = {1, GEN11_VCS(0)}, + [_VCS(1)] = {1, GEN11_VCS(1)}, + [_VCS(2)] = {1, GEN11_VCS(2)}, + [_VCS(3)] = {1, GEN11_VCS(3)}, + [_VECS(0)] = {1, GEN11_VECS(0)}, + [_VECS(1)] = {1, GEN11_VECS(1)}, + }; + unsigned long irqflags; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir)); + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + for (i = 0; i < 2; i++) { + gen11_reset_one_iir(dev_priv, + gen11_gtiir[engine->id].bank, + gen11_gtiir[engine->id].bit); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + } else { + static const u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + for (i = 0; i < 2; i++) { + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + engine->irq_keep_mask); + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); + } + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & engine->irq_keep_mask); - POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); } - GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & - engine->irq_keep_mask); } static void reset_irq(struct intel_engine_cs *engine) @@ -868,10 +907,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists_cancel_port_requests(execlists); reset_irq(engine); - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline->requests, link) { + list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -882,8 +921,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) while (rb) { struct i915_priolist *p = to_priolist(rb); - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { - INIT_LIST_HEAD(&rq->priotree.link); + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { + INIT_LIST_HEAD(&rq->sched.link); dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); @@ -903,7 +942,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock(&engine->timeline->lock); + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); } @@ -969,6 +1008,7 @@ static void execlists_submission_tasklet(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); + rmb(); /* Hopefully paired with a wmb() in HW */ } GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, @@ -1079,8 +1119,8 @@ static void execlists_submission_tasklet(unsigned long data) */ GEM_BUG_ON(!i915_request_completed(rq)); - execlists_context_schedule_out(rq); - trace_i915_request_out(rq); + execlists_context_schedule_out(rq, + INTEL_CONTEXT_SCHEDULE_OUT); i915_request_put(rq); GEM_TRACE("%s completed ctx=%d\n", @@ -1116,10 +1156,11 @@ static void execlists_submission_tasklet(unsigned long data) } static void queue_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, + struct i915_sched_node *node, int prio) { - list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); + list_add_tail(&node->link, + &lookup_priolist(engine, prio)->requests); } static void __submit_queue(struct intel_engine_cs *engine, int prio) @@ -1140,42 +1181,45 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); - queue_request(engine, &request->priotree, rq_prio(request)); + queue_request(engine, &request->sched, rq_prio(request)); submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); - GEM_BUG_ON(list_empty(&request->priotree.link)); + GEM_BUG_ON(list_empty(&request->sched.link)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *sched_to_request(struct i915_sched_node *node) { - return container_of(pt, struct i915_request, priotree); + return container_of(node, struct i915_request, sched); } static struct intel_engine_cs * -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = pt_to_request(pt)->engine; + struct intel_engine_cs *engine = sched_to_request(node)->engine; GEM_BUG_ON(!locked); if (engine != locked) { - spin_unlock(&locked->timeline->lock); - spin_lock(&engine->timeline->lock); + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); } return engine; } -static void execlists_schedule(struct i915_request *request, int prio) +static void execlists_schedule(struct i915_request *request, + const struct i915_sched_attr *attr) { - struct intel_engine_cs *engine; + struct i915_priolist *uninitialized_var(pl); + struct intel_engine_cs *engine, *last; struct i915_dependency *dep, *p; struct i915_dependency stack; + const int prio = attr->priority; LIST_HEAD(dfs); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); @@ -1183,23 +1227,23 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_request_completed(request)) return; - if (prio <= READ_ONCE(request->priotree.priority)) + if (prio <= READ_ONCE(request->sched.attr.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ lockdep_assert_held(&request->i915->drm.struct_mutex); - stack.signaler = &request->priotree; + stack.signaler = &request->sched; list_add(&stack.dfs_link, &dfs); /* * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: - * static void update_priorities(struct i915_priotree *pt, prio) { - * list_for_each_entry(dep, &pt->signalers_list, signal_link) + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * queue_request(pt); + * queue_request(node); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1211,7 +1255,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * last element in the list is the request we must execute first. */ list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; /* * Within an engine, there can be no cycle, but we may @@ -1219,14 +1263,14 @@ static void execlists_schedule(struct i915_request *request, int prio) * (redundant dependencies are not eliminated) and across * engines. */ - list_for_each_entry(p, &pt->signalers_list, signal_link) { + list_for_each_entry(p, &node->signalers_list, signal_link) { GEM_BUG_ON(p == dep); /* no cycles! */ - if (i915_priotree_signaled(p->signaler)) + if (i915_sched_node_signaled(p->signaler)) continue; - GEM_BUG_ON(p->signaler->priority < pt->priority); - if (prio > READ_ONCE(p->signaler->priority)) + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) list_move_tail(&p->dfs_link, &dfs); } } @@ -1237,40 +1281,45 @@ static void execlists_schedule(struct i915_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->priotree.link)); - request->priotree.priority = prio; + if (request->sched.attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&request->sched.link)); + request->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) return; __list_del_entry(&stack.dfs_link); } + last = NULL; engine = request->engine; - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; INIT_LIST_HEAD(&dep->dfs_link); - engine = pt_lock_engine(pt, engine); + engine = sched_lock_engine(node, engine); - if (prio <= pt->priority) + if (prio <= node->attr.priority) continue; - pt->priority = prio; - if (!list_empty(&pt->link)) { - __list_del_entry(&pt->link); - queue_request(engine, pt, prio); + node->attr.priority = prio; + if (!list_empty(&node->link)) { + if (last != engine) { + pl = lookup_priolist(engine, prio); + last = engine; + } + GEM_BUG_ON(pl->priority != prio); + list_move_tail(&node->link, &pl->requests); } if (prio > engine->execlists.queue_priority && - i915_sw_fence_done(&pt_to_request(pt)->submit)) + i915_sw_fence_done(&sched_to_request(node)->submit)) __submit_queue(engine, prio); } - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) @@ -1300,7 +1349,7 @@ static struct intel_ring * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; @@ -1353,7 +1402,7 @@ err: static void execlists_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1372,8 +1421,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, static int execlists_request_alloc(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(request->ctx, request->engine); int ret; GEM_BUG_ON(!ce->pin_count); @@ -1633,6 +1682,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return -EINVAL; switch (INTEL_GEN(engine->i915)) { + case 11: + return 0; case 10: wa_bb_fn[0] = gen10_init_indirectctx_bb; wa_bb_fn[1] = NULL; @@ -1744,9 +1795,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_whitelist_workarounds_apply(engine); - if (ret) - return ret; + intel_whitelist_workarounds_apply(engine); /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be @@ -1769,9 +1818,7 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_whitelist_workarounds_apply(engine); - if (ret) - return ret; + intel_whitelist_workarounds_apply(engine); return 0; } @@ -1780,8 +1827,8 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct intel_context *ce; unsigned long flags; + u32 *regs; GEM_TRACE("%s request global=%x, current=%d\n", engine->name, request ? request->global_seqno : 0, @@ -1803,9 +1850,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock(&engine->timeline->lock); + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); @@ -1831,14 +1878,24 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - ce = &request->ctx->engine[engine->id]; - execlists_init_reg_state(ce->lrc_reg_state, - request->ctx, engine, ce->ring); + regs = to_intel_context(request->ctx, engine)->lrc_reg_state; + if (engine->default_state) { + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR(defaults)) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + defaults + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + i915_gem_object_unpin_map(engine->default_state); + } + } + execlists_init_reg_state(regs, request->ctx, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); + regs[CTX_RING_HEAD + 1] = request->postfix; request->ring->head = request->postfix; intel_ring_update_space(request->ring); @@ -1899,7 +1956,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(rq, 4); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1928,6 +1985,9 @@ static int gen8_emit_bb_start(struct i915_request *rq, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); return 0; @@ -2070,7 +2130,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2086,7 +2146,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2272,9 +2332,13 @@ static int logical_ring_init(struct intel_engine_cs *engine) } engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) + if (engine->i915->preempt_context) { + struct intel_context *ce = + to_intel_context(engine->i915->preempt_context, engine); + engine->execlists.preempt_complete_status = - upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); + upper_32_bits(ce->lrc_desc); + } return 0; @@ -2528,8 +2592,10 @@ populate_lr_context(struct i915_gem_context *ctx, defaults = i915_gem_object_pin_map(engine->default_state, I915_MAP_WB); - if (IS_ERR(defaults)) - return PTR_ERR(defaults); + if (IS_ERR(defaults)) { + ret = PTR_ERR(defaults); + goto err_unpin_ctx; + } memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); @@ -2547,19 +2613,20 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); +err_unpin_ctx: i915_gem_object_unpin_map(ctx_obj); - - return 0; + return ret; } static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; + struct i915_timeline *timeline; int ret; if (ce->state) @@ -2575,8 +2642,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { - DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); - return PTR_ERR(ctx_obj); + ret = PTR_ERR(ctx_obj); + goto error_deref_obj; } vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); @@ -2585,7 +2652,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - ring = intel_engine_create_ring(engine, ctx->ring_size); + timeline = i915_timeline_create(ctx->i915, ctx->name); + if (IS_ERR(timeline)) { + ret = PTR_ERR(timeline); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, timeline, ctx->ring_size); + i915_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -2627,7 +2701,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); u32 *reg; if (!ce->state) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 59d7b86012e9..4ec7d8dd13c8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -108,7 +108,7 @@ static inline uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - return ctx->engine[engine->id].lrc_desc; + return to_intel_context(ctx, engine)->lrc_desc; } #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index d35d2d50f595..d8ece907ff54 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -326,7 +326,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder, I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); POSTING_READ(lvds_encoder->reg); - if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) + + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 5000)) DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(pipe_config, conn_state); @@ -1139,8 +1140,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) out: mutex_unlock(&dev->mode_config.mutex); - intel_panel_init(&intel_connector->panel, fixed_mode, NULL, - downclock_mode); + intel_panel_init(&intel_connector->panel, fixed_mode, downclock_mode); intel_panel_setup_backlight(connector, INVALID_PIPE); lvds_encoder->is_dual_link = compute_is_dual_link_lvds(lvds_encoder); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c0b34b7943b9..9f0bd6a4cb79 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,8 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || + IS_ICELAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; @@ -217,6 +218,8 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) return GEN9_VEBOX_MOCS(index); case VCS2: return GEN9_MFX1_MOCS(index); + case VCS3: + return GEN11_MFX2_MOCS(index); default: MISSING_CASE(engine_id); return INVALID_MMIO_REG; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 41d00b1603e3..b443278e569c 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1928,13 +1928,11 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel) int intel_panel_init(struct intel_panel *panel, struct drm_display_mode *fixed_mode, - struct drm_display_mode *alt_fixed_mode, struct drm_display_mode *downclock_mode) { intel_panel_init_backlight_funcs(panel); panel->fixed_mode = fixed_mode; - panel->alt_fixed_mode = alt_fixed_mode; panel->downclock_mode = downclock_mode; return 0; @@ -1948,10 +1946,6 @@ void intel_panel_fini(struct intel_panel *panel) if (panel->fixed_mode) drm_mode_destroy(intel_connector->base.dev, panel->fixed_mode); - if (panel->alt_fixed_mode) - drm_mode_destroy(intel_connector->base.dev, - panel->alt_fixed_mode); - if (panel->downclock_mode) drm_mode_destroy(intel_connector->base.dev, panel->downclock_mode); diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 4f367c16e9e5..39a4e4edda07 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -766,13 +766,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) { int i; - for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++) - if (!strcmp(buf, pipe_crc_objects[i])) { - *o = i; - return 0; - } + i = match_string(pipe_crc_objects, ARRAY_SIZE(pipe_crc_objects), buf); + if (i < 0) + return i; - return -EINVAL; + *o = i; + return 0; } static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, @@ -798,13 +797,12 @@ display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) return 0; } - for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++) - if (!strcmp(buf, pipe_crc_sources[i])) { - *s = i; - return 0; - } + i = match_string(pipe_crc_sources, ARRAY_SIZE(pipe_crc_sources), buf); + if (i < 0) + return i; - return -EINVAL; + *s = i; + return 0; } static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4baab858e442..53aaaa3e6886 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3567,6 +3567,23 @@ bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } +static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) +{ + u8 enabled_slices; + + /* Slice 1 will always be enabled */ + enabled_slices = 1; + + /* Gen prior to GEN11 have only one DBuf slice */ + if (INTEL_GEN(dev_priv) < 11) + return enabled_slices; + + if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE) + enabled_slices++; + + return enabled_slices; +} + /* * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. @@ -3754,9 +3771,42 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) return true; } +static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + const int num_active, + struct skl_ddb_allocation *ddb) +{ + const struct drm_display_mode *adjusted_mode; + u64 total_data_bw; + u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size; + + WARN_ON(ddb_size == 0); + + if (INTEL_GEN(dev_priv) < 11) + return ddb_size - 4; /* 4 blocks for bypass path allocation */ + + adjusted_mode = &cstate->base.adjusted_mode; + total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode); + + /* + * 12GB/s is maximum BW supported by single DBuf slice. + */ + if (total_data_bw >= GBps(12) || num_active > 1) { + ddb->enabled_slices = 2; + } else { + ddb->enabled_slices = 1; + ddb_size /= 2; + } + + return ddb_size; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + struct skl_ddb_allocation *ddb, struct skl_ddb_entry *alloc, /* out */ int *num_active /* out */) { @@ -3779,11 +3829,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, else *num_active = hweight32(dev_priv->active_crtcs); - ddb_size = INTEL_INFO(dev_priv)->ddb_size; - WARN_ON(ddb_size == 0); - - if (INTEL_GEN(dev_priv) < 11) - ddb_size -= 4; /* 4 blocks for bypass path allocation */ + ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate, + *num_active, ddb); /* * If the state doesn't change the active CRTC's, then there's @@ -3817,10 +3864,18 @@ static unsigned int skl_cursor_allocation(int num_active) return 8; } -static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) +static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv, + struct skl_ddb_entry *entry, u32 reg) { - entry->start = reg & 0x3ff; - entry->end = (reg >> 16) & 0x3ff; + u16 mask; + + if (INTEL_GEN(dev_priv) >= 11) + mask = ICL_DDB_ENTRY_MASK; + else + mask = SKL_DDB_ENTRY_MASK; + entry->start = reg & mask; + entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask; + if (entry->end) entry->end += 1; } @@ -3837,7 +3892,8 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, /* Cursor doesn't support NV12/planar, so no extra calculation needed */ if (plane_id == PLANE_CURSOR) { val = I915_READ(CUR_BUF_CFG(pipe)); - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); return; } @@ -3856,10 +3912,13 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); if (fourcc == DRM_FORMAT_NV12) { - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val2); - skl_ddb_entry_init_from_hw(&ddb->uv_plane[pipe][plane_id], val); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val2); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->uv_plane[pipe][plane_id], val); } else { - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); } } @@ -3870,6 +3929,8 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, memset(ddb, 0, sizeof(*ddb)); + ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv); + for_each_intel_crtc(&dev_priv->drm, crtc) { enum intel_display_power_domain power_domain; enum plane_id plane_id; @@ -4242,7 +4303,11 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return 0; } - skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); + total_data_rate = skl_get_total_relative_data_rate(cstate, + plane_data_rate, + uv_plane_data_rate); + skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb, + alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); if (alloc_size == 0) return 0; @@ -4277,9 +4342,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * * FIXME: we may not allocate every single block here. */ - total_data_rate = skl_get_total_relative_data_rate(cstate, - plane_data_rate, - uv_plane_data_rate); if (total_data_rate == 0) return 0; @@ -5472,8 +5534,12 @@ void skl_wm_get_hw_state(struct drm_device *dev) /* Fully recompute DDB on first atomic commit */ dev_priv->wm.distrust_bios_wm = true; } else { - /* Easy/common case; just sanitize DDB now if everything off */ - memset(ddb, 0, sizeof(*ddb)); + /* + * Easy/common case; just sanitize DDB now if everything off + * Keep dbuf slice info intact + */ + memset(ddb->plane, 0, sizeof(ddb->plane)); + memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane)); } } @@ -8597,6 +8663,13 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, I915_WRITE(GEN7_MISCCPCTL, misccpctl); } +static void icl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable to reduce Sampler power */ + I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN, + I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -9123,7 +9196,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dev_priv->display.init_clock_gating = icl_init_clock_gating; + else if (IS_CANNONLAKE(dev_priv)) dev_priv->display.init_clock_gating = cnl_init_clock_gating; else if (IS_COFFEELAKE(dev_priv)) dev_priv->display.init_clock_gating = cfl_init_clock_gating; diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 69a5b276f4d8..db27f2faa1de 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -93,6 +93,114 @@ static void psr_aux_io_power_put(struct intel_dp *intel_dp) intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); } +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug) +{ + u32 debug_mask, mask; + + /* No PSR interrupts on VLV/CHV */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return; + + mask = EDP_PSR_ERROR(TRANSCODER_EDP); + debug_mask = EDP_PSR_POST_EXIT(TRANSCODER_EDP) | + EDP_PSR_PRE_ENTRY(TRANSCODER_EDP); + + if (INTEL_GEN(dev_priv) >= 8) { + mask |= EDP_PSR_ERROR(TRANSCODER_A) | + EDP_PSR_ERROR(TRANSCODER_B) | + EDP_PSR_ERROR(TRANSCODER_C); + + debug_mask |= EDP_PSR_POST_EXIT(TRANSCODER_A) | + EDP_PSR_PRE_ENTRY(TRANSCODER_A) | + EDP_PSR_POST_EXIT(TRANSCODER_B) | + EDP_PSR_PRE_ENTRY(TRANSCODER_B) | + EDP_PSR_POST_EXIT(TRANSCODER_C) | + EDP_PSR_PRE_ENTRY(TRANSCODER_C); + } + + if (debug) + mask |= debug_mask; + + WRITE_ONCE(dev_priv->psr.debug, debug); + I915_WRITE(EDP_PSR_IMR, ~mask); +} + +static void psr_event_print(u32 val, bool psr2_enabled) +{ + DRM_DEBUG_KMS("PSR exit events: 0x%x\n", val); + if (val & PSR_EVENT_PSR2_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR2 watchdog timer expired\n"); + if ((val & PSR_EVENT_PSR2_DISABLED) && psr2_enabled) + DRM_DEBUG_KMS("\tPSR2 disabled\n"); + if (val & PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU dirty FIFO underrun\n"); + if (val & PSR_EVENT_SU_CRC_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU CRC FIFO underrun\n"); + if (val & PSR_EVENT_GRAPHICS_RESET) + DRM_DEBUG_KMS("\tGraphics reset\n"); + if (val & PSR_EVENT_PCH_INTERRUPT) + DRM_DEBUG_KMS("\tPCH interrupt\n"); + if (val & PSR_EVENT_MEMORY_UP) + DRM_DEBUG_KMS("\tMemory up\n"); + if (val & PSR_EVENT_FRONT_BUFFER_MODIFY) + DRM_DEBUG_KMS("\tFront buffer modification\n"); + if (val & PSR_EVENT_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR watchdog timer expired\n"); + if (val & PSR_EVENT_PIPE_REGISTERS_UPDATE) + DRM_DEBUG_KMS("\tPIPE registers updated\n"); + if (val & PSR_EVENT_REGISTER_UPDATE) + DRM_DEBUG_KMS("\tRegister updated\n"); + if (val & PSR_EVENT_HDCP_ENABLE) + DRM_DEBUG_KMS("\tHDCP enabled\n"); + if (val & PSR_EVENT_KVMR_SESSION_ENABLE) + DRM_DEBUG_KMS("\tKVMR session enabled\n"); + if (val & PSR_EVENT_VBI_ENABLE) + DRM_DEBUG_KMS("\tVBI enabled\n"); + if (val & PSR_EVENT_LPSP_MODE_EXIT) + DRM_DEBUG_KMS("\tLPSP mode exited\n"); + if ((val & PSR_EVENT_PSR_DISABLE) && !psr2_enabled) + DRM_DEBUG_KMS("\tPSR disabled\n"); +} + +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) +{ + u32 transcoders = BIT(TRANSCODER_EDP); + enum transcoder cpu_transcoder; + ktime_t time_ns = ktime_get(); + + if (INTEL_GEN(dev_priv) >= 8) + transcoders |= BIT(TRANSCODER_A) | + BIT(TRANSCODER_B) | + BIT(TRANSCODER_C); + + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { + /* FIXME: Exit PSR and link train manually when this happens. */ + if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) + DRM_DEBUG_KMS("[transcoder %s] PSR aux error\n", + transcoder_name(cpu_transcoder)); + + if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) { + dev_priv->psr.last_entry_attempt = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", + transcoder_name(cpu_transcoder)); + } + + if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) { + dev_priv->psr.last_exit = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", + transcoder_name(cpu_transcoder)); + + if (INTEL_GEN(dev_priv) >= 9) { + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); + bool psr2_enabled = dev_priv->psr.psr2_enabled; + + I915_WRITE(PSR_EVENT(cpu_transcoder), val); + psr_event_print(val, psr2_enabled); + } + } + } +} + static bool intel_dp_get_y_coord_required(struct intel_dp *intel_dp) { uint8_t psr_caps = 0; @@ -400,9 +508,8 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * mesh at all with our frontbuffer tracking. And the hw alone isn't * good enough. */ val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - val |= EDP_Y_COORDINATE_VALID | EDP_Y_COORDINATE_ENABLE; - } + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + val |= EDP_Y_COORDINATE_ENABLE; val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1); @@ -604,7 +711,8 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, I915_WRITE(EDP_PSR_DEBUG, EDP_PSR_DEBUG_MASK_MEMUP | EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP); + EDP_PSR_DEBUG_MASK_LPSP | + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); } } @@ -1065,9 +1173,12 @@ void intel_psr_init(struct drm_i915_private *dev_priv) if (!dev_priv->psr.sink_support) return; - /* Per platform default: all disabled. */ - if (i915_modparams.enable_psr == -1) + if (i915_modparams.enable_psr == -1) { + i915_modparams.enable_psr = dev_priv->vbt.psr.enable; + + /* Per platform default: all disabled. */ i915_modparams.enable_psr = 0; + } /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8948b30ea5d6..cd4fb796d97d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -558,7 +558,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(request->ctx, + engine); struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -618,9 +619,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_whitelist_workarounds_apply(engine); - if (ret) - return ret; + intel_whitelist_workarounds_apply(engine); /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN(dev_priv, 4, 6)) @@ -698,17 +697,17 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline->requests, link) { + list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -1069,7 +1068,6 @@ err: void intel_ring_reset(struct intel_ring *ring, u32 tail) { - GEM_BUG_ON(!list_empty(&ring->request_list)); ring->tail = tail; ring->head = tail; ring->emit = tail; @@ -1121,19 +1119,24 @@ err: } struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size) { struct intel_ring *ring; struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + GEM_BUG_ON(timeline == &engine->timeline); + lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ring->request_list); + ring->timeline = i915_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1164,12 +1167,13 @@ intel_ring_free(struct intel_ring *ring) i915_vma_close(ring->vma); __i915_gem_object_release_unless_active(obj); + i915_timeline_put(ring->timeline); kfree(ring); } -static int context_pin(struct i915_gem_context *ctx) +static int context_pin(struct intel_context *ce) { - struct i915_vma *vma = ctx->engine[RCS].state; + struct i915_vma *vma = ce->state; int ret; /* @@ -1260,7 +1264,7 @@ static struct intel_ring * intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -1282,7 +1286,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, } if (ce->state) { - ret = context_pin(ctx); + ret = context_pin(ce); if (ret) goto err; @@ -1303,7 +1307,7 @@ err: static void intel_ring_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1322,6 +1326,7 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct intel_ring *ring; + struct i915_timeline *timeline; int err; intel_engine_setup_common(engine); @@ -1330,7 +1335,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + timeline = i915_timeline_create(engine->i915, engine->name); + if (IS_ERR(timeline)) { + err = PTR_ERR(timeline); + goto err; + } + + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); + i915_timeline_put(timeline); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; @@ -1431,7 +1443,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; + *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1522,7 +1534,7 @@ static int switch_context(struct i915_request *rq) hw_flags = MI_FORCE_RESTORE; } - if (to_ctx->engine[engine->id].state && + if (to_intel_context(to_ctx, engine)->state && (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { GEM_BUG_ON(engine->id != RCS); @@ -1570,7 +1582,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); + GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -1721,22 +1733,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - u32 *cs; + int num_dwords; + void *cs; + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); - while (num_dwords--) - *cs++ = MI_NOOP; - + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); intel_ring_advance(rq, cs); + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 717041640135..010750e8ee44 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,17 +3,19 @@ #define _INTEL_RINGBUFFER_H_ #include <linux/hashtable.h> +#include <linux/seqlock.h> #include "i915_gem_batch_pool.h" -#include "i915_gem_timeline.h" #include "i915_reg.h" #include "i915_pmu.h" #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #include "intel_gpu_commands.h" struct drm_printer; +struct i915_sched_attr; #define I915_CMD_HASH_ORDER 9 @@ -127,7 +129,9 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; + struct i915_timeline *timeline; struct list_head request_list; + struct list_head active_link; u32 head; u32 tail; @@ -334,7 +338,8 @@ struct intel_engine_cs { u32 mmio_base; struct intel_ring *buffer; - struct intel_timeline *timeline; + + struct i915_timeline timeline; struct drm_i915_gem_object *default_state; @@ -460,7 +465,8 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct i915_request *request, int priority); + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); /* * Cancel all requests on the hardware, or queued for execution. @@ -593,7 +599,7 @@ struct intel_engine_cs { /** * @lock: Lock protecting the below fields. */ - spinlock_t lock; + seqlock_t lock; /** * @enabled: Reference count indicating number of listeners. */ @@ -764,7 +770,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define CNL_HWS_CSB_WRITE_INDEX 0x2f struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size); int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, unsigned int offset_bias); @@ -882,7 +890,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->seqno); + return READ_ONCE(engine->timeline.seqno); } void intel_engine_get_instdone(struct intel_engine_cs *engine, @@ -1062,7 +1070,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { if (engine->stats.active++ == 0) @@ -1070,7 +1078,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) GEM_BUG_ON(engine->stats.active == 0); } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) @@ -1080,7 +1088,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { ktime_t last; @@ -1107,7 +1115,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) } } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } int intel_enable_engine_stats(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 66de4b2dc8b7..53a6eaa9671a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -542,6 +542,29 @@ void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) dev_priv->csr.dc_state = val; } +/** + * gen9_set_dc_state - set target display C power state + * @dev_priv: i915 device instance + * @state: target DC power state + * - DC_STATE_DISABLE + * - DC_STATE_EN_UPTO_DC5 + * - DC_STATE_EN_UPTO_DC6 + * - DC_STATE_EN_DC9 + * + * Signal to DMC firmware/HW the target DC power state passed in @state. + * DMC/HW can turn off individual display clocks and power rails when entering + * a deeper DC power state (higher in number) and turns these back when exiting + * that state to a shallower power state (lower in number). The HW will decide + * when to actually enter a given state on an on-demand basis, for instance + * depending on the active state of display pipes. The state of display + * registers backed by affected power rails are saved/restored as needed. + * + * Based on the above enabling a deeper DC power state is asynchronous wrt. + * enabling it. Disabling a deeper power state is synchronous: for instance + * setting %DC_STATE_DISABLE won't complete until all HW resources are turned + * back on and register state is restored. This is guaranteed by the MMIO write + * to DC_STATE_EN blocking until the state is restored. + */ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state) { uint32_t val; @@ -635,7 +658,7 @@ static void assert_can_enable_dc6(struct drm_i915_private *dev_priv) assert_csr_loaded(dev_priv); } -void skl_enable_dc6(struct drm_i915_private *dev_priv) +static void skl_enable_dc6(struct drm_i915_private *dev_priv) { assert_can_enable_dc6(dev_priv); @@ -649,13 +672,6 @@ void skl_enable_dc6(struct drm_i915_private *dev_priv) gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); } -void skl_disable_dc6(struct drm_i915_private *dev_priv) -{ - DRM_DEBUG_KMS("Disabling DC6\n"); - - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); -} - static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -2626,32 +2642,69 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +static inline +bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, + i915_reg_t reg, bool enable) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + u32 val, status; + val = I915_READ(reg); + val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST); + I915_WRITE(reg, val); + POSTING_READ(reg); udelay(10); - if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE)) - DRM_ERROR("DBuf power enable timeout\n"); + status = I915_READ(reg) & DBUF_POWER_STATE; + if ((enable && !status) || (!enable && status)) { + DRM_ERROR("DBus power %s timeout!\n", + enable ? "enable" : "disable"); + return false; + } + return true; +} + +static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +{ + intel_dbuf_slice_set(dev_priv, DBUF_CTL, true); } static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + intel_dbuf_slice_set(dev_priv, DBUF_CTL, false); +} - udelay(10); +static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) < 11) + return 1; + return 2; +} - if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE) - DRM_ERROR("DBuf power disable timeout!\n"); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices) +{ + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u32 val; + bool ret; + + if (req_slices > intel_dbuf_max_slices(dev_priv)) { + DRM_ERROR("Invalid number of dbuf slices requested\n"); + return; + } + + if (req_slices == hw_enabled_slices || req_slices == 0) + return; + + val = I915_READ(DBUF_CTL_S2); + if (req_slices > hw_enabled_slices) + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true); + else + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false); + + if (ret) + dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices; } -/* - * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when - * needed and keep it disabled as much as possible. - */ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) { I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST); @@ -2663,6 +2716,8 @@ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power enable timeout\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 2; } static void icl_dbuf_disable(struct drm_i915_private *dev_priv) @@ -2676,6 +2731,8 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power disable timeout!\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 0; } static void icl_mbus_init(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 04428cea8f3b..214cc730642c 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -131,7 +131,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) if (scanline < min || scanline > max) break; - if (timeout <= 0) { + if (!timeout) { DRM_ERROR("Potential atomic update failure on pipe %c\n", pipe_name(crtc->pipe)); break; @@ -1011,6 +1011,7 @@ intel_check_sprite_plane(struct intel_plane *plane, src->y2 = (src_y + src_h) << 16; if (intel_format_is_yuv(fb->format->format) && + fb->format->format != DRM_FORMAT_NV12 && (src_x % 2 || src_w % 2)) { DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n", src_x, src_w); @@ -1179,6 +1180,19 @@ static uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; +static uint32_t skl_planar_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_plane_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -1318,6 +1332,7 @@ static bool skl_plane_format_mod_supported(struct drm_plane *_plane, case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -1431,8 +1446,14 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->disable_plane = skl_disable_plane; intel_plane->get_hw_state = skl_plane_get_hw_state; - plane_formats = skl_plane_formats; - num_plane_formats = ARRAY_SIZE(skl_plane_formats); + if (skl_plane_has_planar(dev_priv, pipe, + PLANE_SPRITE0 + plane)) { + plane_formats = skl_planar_formats; + num_plane_formats = ARRAY_SIZE(skl_planar_formats); + } else { + plane_formats = skl_plane_formats; + num_plane_formats = ARRAY_SIZE(skl_plane_formats); + } if (intel_plane->has_ccs) modifiers = skl_plane_format_modifiers_ccs; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index dc33b12394de..87910aa83267 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -30,7 +30,7 @@ struct drm_i915_private; struct i915_vma; /* Home of GuC, HuC and DMC firmwares */ -#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" enum intel_uc_fw_status { INTEL_UC_FIRMWARE_FAIL = -1, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index d6e20f0f4c28..448293eb638d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -139,7 +139,9 @@ fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915, * in the hope that the original ack will be delivered along with * the fallback ack. * - * This workaround is described in HSDES #1604254524 + * This workaround is described in HSDES #1604254524 and it's known as: + * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl + * although the name is a bit misleading. */ pass = 1; @@ -1394,7 +1396,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 11) { int i; - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_get = + fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index bbbf4ed4aa97..2df3538ceba5 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -270,6 +270,10 @@ static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); + return 0; } @@ -441,6 +445,27 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv) return 0; } +static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + /* Wa_1604370585:icl (pre-prod) + * Formerly known as WaPushConstantDereferenceHoldDisable + */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + PUSH_CONSTANT_DEREF_DISABLE); + + /* WaForceEnableNonCoherent:icl + * This is not the same workaround as in early Gen9 platforms, where + * lacking this could cause system hangs, but coherency performance + * overhead is high and only a few compute workloads really need it + * (the register is whitelisted in hardware now, so UMDs can opt in + * for coherency if they have a good reason). + */ + WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + + return 0; +} + int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) { int err = 0; @@ -465,6 +490,8 @@ int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) err = cfl_ctx_workarounds_init(dev_priv); else if (IS_CANNONLAKE(dev_priv)) err = cnl_ctx_workarounds_init(dev_priv); + else if (IS_ICELAKE(dev_priv)) + err = icl_ctx_workarounds_init(dev_priv); else MISSING_CASE(INTEL_GEN(dev_priv)); if (err) @@ -663,6 +690,90 @@ static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); } +static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable for better image quality */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); + + /* WaInPlaceDecompressionHang:icl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaPipelineFlushCoherentLines:icl */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* Wa_1405543622:icl + * Formerly known as WaGAPZPriorityScheme + */ + I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) | + GEN11_ARBITRATION_PRIO_ORDER_MASK); + + /* Wa_1604223664:icl + * Formerly known as WaL3BankAddressHashing + */ + I915_WRITE(GEN8_GARBCNTL, + (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) | + GEN11_HASH_CTRL_EXCL_BIT0); + I915_WRITE(GEN11_GLBLINVL, + (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) | + GEN11_BANK_HASH_ADDR_EXCL_BIT0); + + /* WaModifyGamTlbPartitioning:icl */ + I915_WRITE(GEN11_GACB_PERF_CTRL, + (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) | + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); + + /* Wa_1405733216:icl + * Formerly known as WaDisableCleanEvicts + */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN11_LQSC_CLEAN_EVICT_DISABLE); + + /* Wa_1405766107:icl + * Formerly known as WaCL2SFHalfMaxAlloc + */ + I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) | + GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | + GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); + + /* Wa_220166154:icl + * Formerly known as WaDisCtxReload + */ + I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) | + GAMW_ECO_DEV_CTX_RELOAD_DISABLE); + + /* Wa_1405779004:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) + I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | + MSCUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl */ + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) | + GWUNIT_CLKGATE_DIS); + + /* Wa_1604302699:icl */ + I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER, + I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) | + GEN11_I2M_WRITE_DISABLE); + + /* Wa_1406838659:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + I915_WRITE(INF_UNIT_LEVEL_CLKGATE, + I915_READ(INF_UNIT_LEVEL_CLKGATE) | + CGPSF_CLKGATE_DIS); + + /* WaForwardProgressSoftReset:icl */ + I915_WRITE(GEN10_SCRATCH_LNCF2, + I915_READ(GEN10_SCRATCH_LNCF2) | + PMFLUSHDONE_LNICRSDROP | + PMFLUSH_GAPL3UNBLOCK | + PMFLUSHDONE_LNEBLK); +} + void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) < 8) @@ -683,174 +794,156 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) cfl_gt_workarounds_apply(dev_priv); else if (IS_CANNONLAKE(dev_priv)) cnl_gt_workarounds_apply(dev_priv); + else if (IS_ICELAKE(dev_priv)) + icl_gt_workarounds_apply(dev_priv); else MISSING_CASE(INTEL_GEN(dev_priv)); } -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const unsigned int index = wa->hw_whitelist_count[engine->id]; +struct whitelist { + i915_reg_t reg[RING_MAX_NONPRIV_SLOTS]; + unsigned int count; + u32 nopid; +}; - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; +static void whitelist_reg(struct whitelist *w, i915_reg_t reg) +{ + if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS)) + return; - return 0; + w->reg[w->count++] = reg; } -static int bdw_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void bdw_whitelist_build(struct whitelist *w) { - return 0; } -static int chv_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void chv_whitelist_build(struct whitelist *w) { - return 0; } -static int gen9_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void gen9_whitelist_build(struct whitelist *w) { - int ret; - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; + whitelist_reg(w, GEN8_CS_CHICKEN1); /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN8_HDC_CHICKEN1); } -static int skl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void skl_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; + gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN8_L3SQCREG4); } -static int bxt_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void bxt_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; - - return 0; + gen9_whitelist_build(w); } -static int kbl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void kbl_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; + gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN8_L3SQCREG4); } -static int glk_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void glk_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; + gen9_whitelist_build(w); /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); } -static int cfl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void cfl_whitelist_build(struct whitelist *w) { - int ret; + gen9_whitelist_build(w); +} - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; +static void cnl_whitelist_build(struct whitelist *w) +{ + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + whitelist_reg(w, GEN8_CS_CHICKEN1); +} - return 0; +static void icl_whitelist_build(struct whitelist *w) +{ } -static int cnl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static struct whitelist *whitelist_build(struct intel_engine_cs *engine, + struct whitelist *w) { - int ret; + struct drm_i915_private *i915 = engine->i915; - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; + GEM_BUG_ON(engine->id != RCS); - return 0; + w->count = 0; + w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base)); + + if (INTEL_GEN(i915) < 8) + return NULL; + else if (IS_BROADWELL(i915)) + bdw_whitelist_build(w); + else if (IS_CHERRYVIEW(i915)) + chv_whitelist_build(w); + else if (IS_SKYLAKE(i915)) + skl_whitelist_build(w); + else if (IS_BROXTON(i915)) + bxt_whitelist_build(w); + else if (IS_KABYLAKE(i915)) + kbl_whitelist_build(w); + else if (IS_GEMINILAKE(i915)) + glk_whitelist_build(w); + else if (IS_COFFEELAKE(i915)) + cfl_whitelist_build(w); + else if (IS_CANNONLAKE(i915)) + cnl_whitelist_build(w); + else if (IS_ICELAKE(i915)) + icl_whitelist_build(w); + else + MISSING_CASE(INTEL_GEN(i915)); + + return w; } -int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void whitelist_apply(struct intel_engine_cs *engine, + const struct whitelist *w) { struct drm_i915_private *dev_priv = engine->i915; - int err = 0; + const u32 base = engine->mmio_base; + unsigned int i; - WARN_ON(engine->id != RCS); + if (!w) + return; - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - if (INTEL_GEN(dev_priv) < 8) - err = 0; - else if (IS_BROADWELL(dev_priv)) - err = bdw_whitelist_workarounds_apply(engine); - else if (IS_CHERRYVIEW(dev_priv)) - err = chv_whitelist_workarounds_apply(engine); - else if (IS_SKYLAKE(dev_priv)) - err = skl_whitelist_workarounds_apply(engine); - else if (IS_BROXTON(dev_priv)) - err = bxt_whitelist_workarounds_apply(engine); - else if (IS_KABYLAKE(dev_priv)) - err = kbl_whitelist_workarounds_apply(engine); - else if (IS_GEMINILAKE(dev_priv)) - err = glk_whitelist_workarounds_apply(engine); - else if (IS_COFFEELAKE(dev_priv)) - err = cfl_whitelist_workarounds_apply(engine); - else if (IS_CANNONLAKE(dev_priv)) - err = cnl_whitelist_workarounds_apply(engine); - else - MISSING_CASE(INTEL_GEN(dev_priv)); - if (err) - return err; + for (i = 0; i < w->count; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(w->reg[i])); - DRM_DEBUG_DRIVER("%s: Number of whitelist w/a: %d\n", engine->name, - dev_priv->workarounds.hw_whitelist_count[engine->id]); - return 0; + /* And clear the rest just in case of garbage */ + for (; i < RING_MAX_NONPRIV_SLOTS; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } + +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) +{ + struct whitelist w; + + whitelist_apply(engine, whitelist_build(engine, &w)); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_workarounds.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index d9b0cc5afb4a..b11d0623e626 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -12,6 +12,6 @@ int intel_ctx_workarounds_emit(struct i915_request *rq); void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv); -int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 05bbef363fff..91c72911be3c 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1091,7 +1091,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_close(vma); + i915_vma_destroy(vma); return err; } @@ -1757,6 +1757,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) goto out_unlock; } + if (ctx->ppgtt) + ctx->ppgtt->base.scrub_64K = true; + err = i915_subtests(tests, ctx); out_unlock: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 7ecaed50d0b9..ddb03f009232 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -23,6 +23,7 @@ */ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_drm.h" #include "huge_gem_object.h" @@ -411,6 +412,8 @@ static int igt_ctx_exec(void *arg) } out_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 8bf6aa573226..a00e2bd08bce 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,6 +11,7 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) +selftest(workarounds, intel_workarounds_live_selftests) selftest(requests, i915_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 3000e6a7d82d..19f1c6a5c8fb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -1,25 +1,7 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #include "../i915_selftest.h" @@ -35,21 +17,21 @@ struct __igt_sync { bool set; }; -static int __igt_sync(struct intel_timeline *tl, +static int __igt_sync(struct i915_timeline *tl, u64 ctx, const struct __igt_sync *p, const char *name) { int ret; - if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", name, p->name, ctx, p->seqno, yesno(p->expected)); return -EINVAL; } if (p->set) { - ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + ret = __i915_timeline_sync_set(tl, ctx, p->seqno); if (ret) return ret; } @@ -77,37 +59,31 @@ static int igt_sync(void *arg) { "unwrap", UINT_MAX, true, false }, {}, }, *p; - struct intel_timeline *tl; + struct i915_timeline tl; int order, offset; int ret = -ENODEV; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - + mock_timeline_init(&tl, 0); for (p = pass; p->name; p++) { for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; - ret = __igt_sync(tl, ctx, p, "1"); + ret = __igt_sync(&tl, ctx, p, "1"); if (ret) goto out; } } } - mock_timeline_destroy(tl); - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_fini(&tl); + mock_timeline_init(&tl, 0); for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; for (p = pass; p->name; p++) { - ret = __igt_sync(tl, ctx, p, "2"); + ret = __igt_sync(&tl, ctx, p, "2"); if (ret) goto out; } @@ -115,7 +91,7 @@ static int igt_sync(void *arg) } out: - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return ret; } @@ -127,15 +103,13 @@ static unsigned int random_engine(struct rnd_state *rnd) static int bench_sync(void *arg) { struct rnd_state prng; - struct intel_timeline *tl; + struct i915_timeline tl; unsigned long end_time, count; u64 prng32_1M; ktime_t kt; int order, last_order; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Lookups from cache are very fast and so the random number generation * and the loop itself becomes a significant factor in the per-iteration @@ -167,7 +141,7 @@ static int bench_sync(void *arg) do { u64 id = i915_prandom_u64_state(&prng); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); @@ -182,8 +156,8 @@ static int bench_sync(void *arg) while (end_time--) { u64 id = i915_prandom_u64_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, 0)) { - mock_timeline_destroy(tl); + if (!__i915_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); pr_err("Lookup of %llu failed\n", id); return -EINVAL; } @@ -193,19 +167,17 @@ static int bench_sync(void *arg) pr_info("%s: %lu random lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark setting the first N (in order) contexts */ count = 0; kt = ktime_get(); end_time = jiffies + HZ/10; do { - __intel_timeline_sync_set(tl, count++, 0); + __i915_timeline_sync_set(&tl, count++, 0); } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); pr_info("%s: %lu in-order insertions, %lluns/insert\n", @@ -215,9 +187,9 @@ static int bench_sync(void *arg) end_time = count; kt = ktime_get(); while (end_time--) { - if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { + if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { pr_err("Lookup of %lu failed\n", end_time); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return -EINVAL; } } @@ -225,12 +197,10 @@ static int bench_sync(void *arg) pr_info("%s: %lu in-order lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark searching for a random context id and maybe changing it */ prandom_seed_state(&prng, i915_selftest.random_seed); @@ -241,8 +211,8 @@ static int bench_sync(void *arg) u32 id = random_engine(&prng); u32 seqno = prandom_u32_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, seqno)) - __intel_timeline_sync_set(tl, id, seqno); + if (!__i915_timeline_sync_is_later(&tl, id, seqno)) + __i915_timeline_sync_set(&tl, id, seqno); count++; } while (!time_after(jiffies, end_time)); @@ -250,7 +220,7 @@ static int bench_sync(void *arg) kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); /* Benchmark searching for a known context id and changing the seqno */ @@ -258,9 +228,7 @@ static int bench_sync(void *arg) ({ int tmp = last_order; last_order = order; order += tmp; })) { unsigned int mask = BIT(order) - 1; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); count = 0; kt = ktime_get(); @@ -272,8 +240,8 @@ static int bench_sync(void *arg) */ u64 id = (u64)(count & mask) << order; - __intel_timeline_sync_is_later(tl, id, 0); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_is_later(&tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); @@ -281,7 +249,7 @@ static int bench_sync(void *arg) pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", __func__, count, order, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index eb89e301b602..e90f97236e50 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -81,7 +81,7 @@ checked_vma_instance(struct drm_i915_gem_object *obj, } if (i915_vma_compare(vma, vm, view)) { - pr_err("i915_vma_compare failed with create parmaters!\n"); + pr_err("i915_vma_compare failed with create parameters!\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c new file mode 100644 index 000000000000..0d06f559243f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_drv.h" + +#include "../i915_selftest.h" +#include "igt_flush_test.h" + +struct wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const void *symbol; +}; + +static void wedge_me(struct work_struct *work) +{ + struct wedge_me *w = container_of(work, typeof(*w), work.work); + + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); + + GEM_TRACE("%pS timed out.\n", w->symbol); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(w->i915); +} + +static void __init_wedge(struct wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const void *symbol) +{ + w->i915 = i915; + w->symbol = symbol; + + INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +static void __fini_wedge(struct wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} + +#define wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ + (W)->i915; \ + __fini_wedge((W))) + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +{ + struct wedge_me w; + + cond_resched(); + + if (flags & I915_WAIT_LOCKED && + i915_gem_switch_to_kernel_context(i915)) { + pr_err("Failed to switch back to kernel context; declaring wedged\n"); + i915_gem_set_wedged(i915); + } + + wedge_on_timeout(&w, i915, HZ) + i915_gem_wait_for_idle(i915, flags); + + return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h new file mode 100644 index 000000000000..63e009927c43 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef IGT_FLUSH_TEST_H +#define IGT_FLUSH_TEST_H + +struct drm_i915_private; + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); + +#endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 46580026c7fc..d6926e7820e5 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -412,10 +412,11 @@ static int igt_wakeup(void *arg) * that they are ready for the next test. We wait until all * threads are complete and waiting for us (i.e. not a seqno). */ - err = wait_var_event_timeout(&done, !atomic_read(&done), 10 * HZ); - if (err) { + if (!wait_var_event_timeout(&done, + !atomic_read(&done), 10 * HZ)) { pr_err("Timed out waiting for %d remaining waiters\n", atomic_read(&done)); + err = -ETIMEDOUT; break; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 24f913f26a7b..438e0b045a2c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -26,10 +26,13 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "igt_flush_test.h" #include "mock_context.h" #include "mock_drm.h" +#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ + struct hang { struct drm_i915_private *i915; struct drm_i915_gem_object *hws; @@ -251,61 +254,6 @@ static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const void *symbol; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); - - GEM_TRACE("%pS timed out.\n", w->symbol); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const void *symbol) -{ - w->i915 = i915; - w->symbol = symbol; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ - (W)->i915; \ - __fini_wedge((W))) - -static noinline int -flush_test(struct drm_i915_private *i915, unsigned int flags) -{ - struct wedge_me w; - - cond_resched(); - - wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); - - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; -} - static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; @@ -319,7 +267,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - flush_test(h->i915, I915_WAIT_LOCKED); + igt_flush_test(h->i915, I915_WAIT_LOCKED); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -454,6 +402,11 @@ static int igt_global_reset(void *arg) return err; } +static bool wait_for_idle(struct intel_engine_cs *engine) +{ + return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; +} + static int __igt_reset_engine(struct drm_i915_private *i915, bool active) { struct intel_engine_cs *engine; @@ -481,6 +434,13 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) if (active && !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("%s failed to idle before reset\n", + engine->name); + err = -EIO; + break; + } + reset_count = i915_reset_count(&i915->gpu_error); reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); @@ -542,13 +502,26 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) err = -EINVAL; break; } + + if (!wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("%s failed to idle after reset\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -628,7 +601,7 @@ static int active_engine(void *data) } if (arg->flags & TEST_PRIORITY) - ctx[idx]->priority = + ctx[idx]->sched.priority = i915_prandom_u32_max_state(512, &prng); rq[idx] = i915_request_get(new); @@ -683,7 +656,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, return err; if (flags & TEST_PRIORITY) - h.ctx->priority = 1024; + h.ctx->sched.priority = 1024; } for_each_engine(engine, i915, id) { @@ -696,6 +669,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915, !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", + engine->name, test_name); + err = -EIO; + break; + } + memset(threads, 0, sizeof(threads)); for_each_engine(other, i915, tmp) { struct task_struct *tsk; @@ -772,6 +752,20 @@ static int __igt_reset_engines(struct drm_i915_private *i915, i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); i915_request_put(rq); } + + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("i915_reset_engine(%s:%s):" + " failed to idle after reset\n", + engine->name, test_name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("i915_reset_engine(%s:%s): %lu resets\n", @@ -826,7 +820,7 @@ unwind: if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -981,7 +975,7 @@ static int wait_for_others(struct drm_i915_private *i915, if (engine == exclude) continue; - if (wait_for(intel_engine_is_idle(engine), 10)) + if (!wait_for_idle(engine)) return -EIO; } @@ -1120,7 +1114,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = flush_test(i915, I915_WAIT_LOCKED); + err = igt_flush_test(i915, I915_WAIT_LOCKED); if (err) break; } @@ -1232,7 +1226,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, i915); mutex_lock(&i915->drm.struct_mutex); - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 0481e2e01146..1b8a07125150 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -5,6 +5,7 @@ */ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_context.h" @@ -168,61 +169,6 @@ static u32 hws_seqno(const struct spinner *spin, const struct i915_request *rq) return READ_ONCE(*seqno); } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const void *symbol; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); - - GEM_TRACE("%pS timed out.\n", w->symbol); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const void *symbol) -{ - w->i915 = i915; - w->symbol = symbol; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ - (W)->i915; \ - __fini_wedge((W))) - -static noinline int -flush_test(struct drm_i915_private *i915, unsigned int flags) -{ - struct wedge_me w; - - cond_resched(); - - wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); - - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; -} - static void spinner_end(struct spinner *spin) { *spin->batch = MI_BATCH_BUFFER_END; @@ -295,7 +241,7 @@ static int live_sanitycheck(void *arg) } spinner_end(&spin); - if (flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { err = -EIO; goto err_ctx; } @@ -307,7 +253,7 @@ err_ctx: err_spin: spinner_fini(&spin); err_unlock: - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -335,12 +281,12 @@ static int live_preempt(void *arg) ctx_hi = kernel_context(i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->priority = I915_CONTEXT_MAX_USER_PRIORITY; + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->priority = I915_CONTEXT_MIN_USER_PRIORITY; + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; for_each_engine(engine, i915, id) { struct i915_request *rq; @@ -380,7 +326,7 @@ static int live_preempt(void *arg) spinner_end(&spin_hi); spinner_end(&spin_lo); - if (flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { err = -EIO; goto err_ctx_lo; } @@ -396,7 +342,7 @@ err_spin_lo: err_spin_hi: spinner_fini(&spin_hi); err_unlock: - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -407,6 +353,7 @@ static int live_late_preempt(void *arg) struct i915_gem_context *ctx_hi, *ctx_lo; struct spinner spin_hi, spin_lo; struct intel_engine_cs *engine; + struct i915_sched_attr attr = {}; enum intel_engine_id id; int err = -ENOMEM; @@ -458,7 +405,8 @@ static int live_late_preempt(void *arg) goto err_wedged; } - engine->schedule(rq, I915_PRIORITY_MAX); + attr.priority = I915_PRIORITY_MAX; + engine->schedule(rq, &attr); if (!wait_for_spinner(&spin_hi, rq)) { pr_err("High priority context failed to preempt the low priority context\n"); @@ -468,7 +416,7 @@ static int live_late_preempt(void *arg) spinner_end(&spin_hi); spinner_end(&spin_lo); - if (flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { err = -EIO; goto err_ctx_lo; } @@ -484,7 +432,7 @@ err_spin_lo: err_spin_hi: spinner_fini(&spin_hi); err_unlock: - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -503,5 +451,9 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt), SUBTEST(live_late_preempt), }; + + if (!HAS_EXECLISTS(i915)) + return 0; + return i915_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c new file mode 100644 index 000000000000..17444a3abbb9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -0,0 +1,291 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "mock_context.h" + +static struct drm_i915_gem_object * +read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *result; + struct i915_request *rq; + struct i915_vma *vma; + const u32 base = engine->mmio_base; + u32 srm, *cs; + int err; + int i; + + result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(result)) + return result; + + i915_gem_object_set_cache_level(result, I915_CACHE_LLC); + + cs = i915_gem_object_pin_map(result, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_obj; + } + memset(cs, 0xc5, PAGE_SIZE); + i915_gem_object_unpin_map(result); + + vma = i915_vma_instance(result, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_obj; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + + i915_gem_object_get(result); + i915_gem_object_set_active_reference(result); + + __i915_request_add(rq, true); + i915_vma_unpin(vma); + + return result; + +err_req: + i915_request_add(rq); +err_pin: + i915_vma_unpin(vma); +err_obj: + i915_gem_object_put(result); + return ERR_PTR(err); +} + +static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i) +{ + return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid; +} + +static void print_results(const struct whitelist *w, const u32 *results) +{ + unsigned int i; + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = results[i]; + + pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", + i, expected, actual); + } +} + +static int check_whitelist(const struct whitelist *w, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *results; + u32 *vaddr; + int err; + int i; + + results = read_nonprivs(ctx, engine); + if (IS_ERR(results)) + return PTR_ERR(results); + + err = i915_gem_object_set_to_cpu_domain(results, false); + if (err) + goto out_put; + + vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = vaddr[i]; + + if (expected != actual) { + print_results(w, vaddr); + pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", + i, expected, actual); + + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(results); +out_put: + i915_gem_object_put(results); + return err; +} + +static int do_device_reset(struct intel_engine_cs *engine) +{ + i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL); + return 0; +} + +static int do_engine_reset(struct intel_engine_cs *engine) +{ + return i915_reset_engine(engine, NULL); +} + +static int switch_to_scratch_context(struct intel_engine_cs *engine) +{ + struct i915_gem_context *ctx; + struct i915_request *rq; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + rq = i915_request_alloc(engine, ctx); + kernel_context_close(ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + return 0; +} + +static int check_whitelist_across_reset(struct intel_engine_cs *engine, + int (*reset)(struct intel_engine_cs *), + const struct whitelist *w, + const char *name) +{ + struct i915_gem_context *ctx; + int err; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *before* %s reset!\n", name); + goto out; + } + + err = switch_to_scratch_context(engine); + if (err) + goto out; + + err = reset(engine); + if (err) { + pr_err("%s reset failed\n", name); + goto out; + } + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Whitelist not preserved in context across %s reset!\n", + name); + goto out; + } + + kernel_context_close(ctx); + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *after* %s reset in fresh context!\n", + name); + goto out; + } + +out: + kernel_context_close(ctx); + return err; +} + +static int live_reset_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS]; + struct i915_gpu_error *error = &i915->gpu_error; + struct whitelist w; + int err = 0; + + /* If we reset the gpu, we should not lose the RING_NONPRIV */ + + if (!engine) + return 0; + + if (!whitelist_build(engine, &w)) + return 0; + + pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count); + + set_bit(I915_RESET_BACKOFF, &error->flags); + set_bit(I915_RESET_ENGINE + engine->id, &error->flags); + + if (intel_has_reset_engine(i915)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, &w, + "engine"); + if (err) + goto out; + } + + if (intel_has_gpu_reset(i915)) { + err = check_whitelist_across_reset(engine, + do_device_reset, &w, + "device"); + if (err) + goto out; + } + +out: + clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); + clear_bit(I915_RESET_BACKOFF, &error->flags); + return err; +} + +int intel_workarounds_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_reset_whitelist), + }; + int err; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 78a89efa1119..26bf29d97007 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -25,6 +25,11 @@ #include "mock_engine.h" #include "mock_request.h" +struct mock_ring { + struct intel_ring base; + struct i915_timeline timeline; +}; + static struct mock_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -71,14 +76,21 @@ static struct intel_ring * mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_get(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!ce->pin_count++) + i915_gem_context_get(ctx); + return engine->buffer; } static void mock_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_put(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!--ce->pin_count) + i915_gem_context_put(ctx); } static int mock_request_alloc(struct i915_request *request) @@ -125,7 +137,7 @@ static void mock_submit_request(struct i915_request *request) static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; - struct intel_ring *ring; + struct mock_ring *ring; BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); @@ -133,14 +145,25 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - ring->size = sz; - ring->effective_size = sz; - ring->vaddr = (void *)(ring + 1); + i915_timeline_init(engine->i915, &ring->timeline, engine->name); + + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; - INIT_LIST_HEAD(&ring->request_list); - intel_ring_update_space(ring); + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); - return ring; + return &ring->base; +} + +static void mock_ring_free(struct intel_ring *base) +{ + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); + kfree(ring); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -155,12 +178,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, if (!engine) return NULL; - engine->base.buffer = mock_ring(&engine->base); - if (!engine->base.buffer) { - kfree(engine); - return NULL; - } - /* minimal engine setup for requests */ engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); @@ -174,9 +191,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - engine->base.timeline = - &i915->gt.global_timeline.engine[engine->base.id]; - + i915_timeline_init(i915, &engine->base.timeline, engine->base.name); intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ @@ -185,7 +200,17 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) + goto err_breadcrumbs; + return &engine->base; + +err_breadcrumbs: + intel_engine_fini_breadcrumbs(&engine->base); + i915_timeline_fini(&engine->base.timeline); + kfree(engine); + return NULL; } void mock_engine_flush(struct intel_engine_cs *engine) @@ -217,10 +242,12 @@ void mock_engine_free(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); + + mock_ring_free(engine->buffer); intel_engine_fini_breadcrumbs(engine); + i915_timeline_fini(&engine->timeline); - kfree(engine->buffer); kfree(engine); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e6d4b882599a..94baedfa0f74 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,6 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) mock_engine_flush(engine); i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); } static void mock_device_release(struct drm_device *dev) @@ -72,8 +73,8 @@ static void mock_device_release(struct drm_device *dev) mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(i915); - i915_gem_timeline_fini(&i915->gt.global_timeline); mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -223,26 +224,25 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - mutex_lock(&i915->drm.struct_mutex); INIT_LIST_HEAD(&i915->gt.timelines); - err = i915_gem_timeline_init__global(i915); - if (err) { - mutex_unlock(&i915->drm.struct_mutex); - goto err_priorities; - } + INIT_LIST_HEAD(&i915->gt.active_rings); + INIT_LIST_HEAD(&i915->gt.closed_vma); + + mutex_lock(&i915->drm.struct_mutex); mock_init_ggtt(i915); - mutex_unlock(&i915->drm.struct_mutex); mkwrite_device_info(i915)->ring_mask = BIT(0); i915->engine[RCS] = mock_engine(i915, "mock", RCS); if (!i915->engine[RCS]) - goto err_priorities; + goto err_unlock; i915->kernel_context = mock_context(i915, NULL); if (!i915->kernel_context) goto err_engine; + mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(i915_gemfs_init(i915)); return i915; @@ -250,7 +250,8 @@ struct drm_i915_private *mock_gem_device(void) err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); -err_priorities: +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index e96873f96116..36c112088940 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915, INIT_LIST_HEAD(&ppgtt->base.global_link); drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); - i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); ppgtt->base.clear_range = nop_clear_range; ppgtt->base.insert_page = mock_insert_page; diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index 47b1f47c5812..dcf3b16f5a07 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -1,45 +1,28 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ +#include "../i915_timeline.h" + #include "mock_timeline.h" -struct intel_timeline *mock_timeline(u64 context) +void mock_timeline_init(struct i915_timeline *timeline, u64 context) { - static struct lock_class_key class; - struct intel_timeline *tl; + timeline->fence_context = context; + + spin_lock_init(&timeline->lock); - tl = kzalloc(sizeof(*tl), GFP_KERNEL); - if (!tl) - return NULL; + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); - __intel_timeline_init(tl, NULL, context, &class, "mock"); + i915_syncmap_init(&timeline->sync); - return tl; + INIT_LIST_HEAD(&timeline->link); } -void mock_timeline_destroy(struct intel_timeline *tl) +void mock_timeline_fini(struct i915_timeline *timeline) { - __intel_timeline_fini(tl); - kfree(tl); + i915_timeline_fini(timeline); } diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h index c27ff4639b8b..b6deaa61110d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h @@ -1,33 +1,15 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #ifndef __MOCK_TIMELINE__ #define __MOCK_TIMELINE__ -#include "../i915_gem_timeline.h" +struct i915_timeline; -struct intel_timeline *mock_timeline(u64 context); -void mock_timeline_destroy(struct intel_timeline *tl); +void mock_timeline_init(struct i915_timeline *timeline, u64 context); +void mock_timeline_fini(struct i915_timeline *timeline); #endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 294de4549922..119ec0a21de2 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -11,6 +11,7 @@ config DRM_MEDIATEK select DRM_PANEL select MEMORY select MTK_SMI + select VIDEOMODE_HELPERS help Choose this option if you have a Mediatek SoCs. The module will be called mediatek-drm diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index e80a603e5fb0..6c0ea39d5739 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <linux/clk.h> +#include <video/videomode.h> #include "mtk_dpi_regs.h" #include "mtk_drm_ddp_comp.h" @@ -429,34 +430,35 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, struct mtk_dpi_sync_param vsync_leven = { 0 }; struct mtk_dpi_sync_param vsync_rodd = { 0 }; struct mtk_dpi_sync_param vsync_reven = { 0 }; - unsigned long pix_rate; + struct videomode vm = { 0 }; unsigned long pll_rate; unsigned int factor; /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ - pix_rate = 1000UL * mode->clock; + if (mode->clock <= 27000) - factor = 16 * 3; + factor = 3 << 4; else if (mode->clock <= 84000) - factor = 8 * 3; + factor = 3 << 3; else if (mode->clock <= 167000) - factor = 4 * 3; + factor = 3 << 2; else - factor = 2 * 3; - pll_rate = pix_rate * factor; + factor = 3 << 1; + drm_display_mode_to_videomode(mode, &vm); + pll_rate = vm.pixelclock * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); clk_set_rate(dpi->tvd_clk, pll_rate); pll_rate = clk_get_rate(dpi->tvd_clk); - pix_rate = pll_rate / factor; - clk_set_rate(dpi->pixel_clk, pix_rate); - pix_rate = clk_get_rate(dpi->pixel_clk); + vm.pixelclock = pll_rate / factor; + clk_set_rate(dpi->pixel_clk, vm.pixelclock); + vm.pixelclock = clk_get_rate(dpi->pixel_clk); dev_dbg(dpi->dev, "Got PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); limit.c_bottom = 0x0010; limit.c_top = 0x0FE0; @@ -465,33 +467,31 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING; dpi_pol.de_pol = MTK_DPI_POLARITY_RISING; - dpi_pol.hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ? + dpi_pol.hsync_pol = vm.flags & DISPLAY_FLAGS_HSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - dpi_pol.vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ? + dpi_pol.vsync_pol = vm.flags & DISPLAY_FLAGS_VSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - - hsync.sync_width = mode->hsync_end - mode->hsync_start; - hsync.back_porch = mode->htotal - mode->hsync_end; - hsync.front_porch = mode->hsync_start - mode->hdisplay; + hsync.sync_width = vm.hsync_len; + hsync.back_porch = vm.hback_porch; + hsync.front_porch = vm.hfront_porch; hsync.shift_half_line = false; - - vsync_lodd.sync_width = mode->vsync_end - mode->vsync_start; - vsync_lodd.back_porch = mode->vtotal - mode->vsync_end; - vsync_lodd.front_porch = mode->vsync_start - mode->vdisplay; + vsync_lodd.sync_width = vm.vsync_len; + vsync_lodd.back_porch = vm.vback_porch; + vsync_lodd.front_porch = vm.vfront_porch; vsync_lodd.shift_half_line = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE && + if (vm.flags & DISPLAY_FLAGS_INTERLACED && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_leven = vsync_lodd; vsync_rodd = vsync_lodd; vsync_reven = vsync_lodd; vsync_leven.shift_half_line = true; vsync_reven.shift_half_line = true; - } else if (mode->flags & DRM_MODE_FLAG_INTERLACE && + } else if (vm.flags & DISPLAY_FLAGS_INTERLACED && !(mode->flags & DRM_MODE_FLAG_3D_MASK)) { vsync_leven = vsync_lodd; vsync_leven.shift_half_line = true; - } else if (!(mode->flags & DRM_MODE_FLAG_INTERLACE) && + } else if (!(vm.flags & DISPLAY_FLAGS_INTERLACED) && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_rodd = vsync_lodd; } @@ -505,12 +505,12 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, mtk_dpi_config_vsync_reven(dpi, &vsync_reven); mtk_dpi_config_3d(dpi, !!(mode->flags & DRM_MODE_FLAG_3D_MASK)); - mtk_dpi_config_interface(dpi, !!(mode->flags & - DRM_MODE_FLAG_INTERLACE)); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay / 2); + mtk_dpi_config_interface(dpi, !!(vm.flags & + DISPLAY_FLAGS_INTERLACED)); + if (vm.flags & DISPLAY_FLAGS_INTERLACED) + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive >> 1); else - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay); + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive); mtk_dpi_config_channel_limit(dpi, &limit); mtk_dpi_config_bit_num(dpi, dpi->bit_num); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index f595ac816b55..259b7b0de1d2 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -220,7 +220,7 @@ struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev, mtk_gem = mtk_drm_gem_init(dev, attach->dmabuf->size); if (IS_ERR(mtk_gem)) - return ERR_PTR(PTR_ERR(mtk_gem)); + return ERR_CAST(mtk_gem); expected = sg_dma_address(sg->sgl); for_each_sg(sg->sgl, s, sg->nents, i) { diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 7e5e24c2152a..aa0943ec32b0 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -551,13 +551,12 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) } /** - * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000 * htotal_time = htotal * byte_per_pixel / num_lanes * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit * mipi_ratio = (htotal_time + overhead_time) / htotal_time * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes; */ - pixel_clock = dsi->vm.pixelclock * 1000; + pixel_clock = dsi->vm.pixelclock; htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch + dsi->vm.hsync_len; htotal_bits = htotal * bit_per_pixel; @@ -725,16 +724,7 @@ static void mtk_dsi_encoder_mode_set(struct drm_encoder *encoder, { struct mtk_dsi *dsi = encoder_to_dsi(encoder); - dsi->vm.pixelclock = adjusted->clock; - dsi->vm.hactive = adjusted->hdisplay; - dsi->vm.hback_porch = adjusted->htotal - adjusted->hsync_end; - dsi->vm.hfront_porch = adjusted->hsync_start - adjusted->hdisplay; - dsi->vm.hsync_len = adjusted->hsync_end - adjusted->hsync_start; - - dsi->vm.vactive = adjusted->vdisplay; - dsi->vm.vback_porch = adjusted->vtotal - adjusted->vsync_end; - dsi->vm.vfront_porch = adjusted->vsync_start - adjusted->vdisplay; - dsi->vm.vsync_len = adjusted->vsync_end - adjusted->vsync_start; + drm_display_mode_to_videomode(adjusted, &dsi->vm); } static void mtk_dsi_encoder_disable(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c index 782b1e27f040..7b641fa6dc4d 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_plane.c @@ -98,21 +98,6 @@ static const struct drm_plane_funcs mdp4_plane_funcs = { .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, }; -static int mdp4_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) -{ - struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane); - struct mdp4_kms *mdp4_kms = get_kms(plane); - struct msm_kms *kms = &mdp4_kms->base.base; - struct drm_framebuffer *fb = new_state->fb; - - if (!fb) - return 0; - - DBG("%s: prepare: FB[%u]", mdp4_plane->name, fb->base.id); - return msm_framebuffer_prepare(fb, kms->aspace); -} - static void mdp4_plane_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { @@ -152,7 +137,7 @@ static void mdp4_plane_atomic_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs mdp4_plane_helper_funcs = { - .prepare_fb = mdp4_plane_prepare_fb, + .prepare_fb = msm_atomic_prepare_fb, .cleanup_fb = mdp4_plane_cleanup_fb, .atomic_check = mdp4_plane_atomic_check, .atomic_update = mdp4_plane_atomic_update, diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index efedcac6e641..24e00274844b 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -430,6 +430,7 @@ static void mdp5_crtc_atomic_disable(struct drm_crtc *crtc, struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state); struct mdp5_kms *mdp5_kms = get_kms(crtc); struct device *dev = &mdp5_kms->pdev->dev; + unsigned long flags; DBG("%s", crtc->name); @@ -445,6 +446,14 @@ static void mdp5_crtc_atomic_disable(struct drm_crtc *crtc, mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->err); pm_runtime_put_sync(dev); + if (crtc->state->event && !crtc->state->active) { + WARN_ON(mdp5_crtc->event); + spin_lock_irqsave(&mdp5_kms->dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irqrestore(&mdp5_kms->dev->event_lock, flags); + } + mdp5_crtc->enabled = false; } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 6d8e3a9a6fc0..6e12e275deba 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -70,60 +70,110 @@ static int mdp5_hw_init(struct msm_kms *kms) return 0; } -struct mdp5_state *mdp5_get_state(struct drm_atomic_state *s) +/* Global/shared object state funcs */ + +/* + * This is a helper that returns the private state currently in operation. + * Note that this would return the "old_state" if called in the atomic check + * path, and the "new_state" after the atomic swap has been done. + */ +struct mdp5_global_state * +mdp5_get_existing_global_state(struct mdp5_kms *mdp5_kms) +{ + return to_mdp5_global_state(mdp5_kms->glob_state.state); +} + +/* + * This acquires the modeset lock set aside for global state, creates + * a new duplicated private object state. + */ +struct mdp5_global_state *mdp5_get_global_state(struct drm_atomic_state *s) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); - struct msm_kms_state *state = to_kms_state(s); - struct mdp5_state *new_state; + struct drm_private_state *priv_state; int ret; - if (state->state) - return state->state; - - ret = drm_modeset_lock(&mdp5_kms->state_lock, s->acquire_ctx); + ret = drm_modeset_lock(&mdp5_kms->glob_state_lock, s->acquire_ctx); if (ret) return ERR_PTR(ret); - new_state = kmalloc(sizeof(*mdp5_kms->state), GFP_KERNEL); - if (!new_state) - return ERR_PTR(-ENOMEM); + priv_state = drm_atomic_get_private_obj_state(s, &mdp5_kms->glob_state); + if (IS_ERR(priv_state)) + return ERR_CAST(priv_state); - /* Copy state: */ - new_state->hwpipe = mdp5_kms->state->hwpipe; - new_state->hwmixer = mdp5_kms->state->hwmixer; - if (mdp5_kms->smp) - new_state->smp = mdp5_kms->state->smp; + return to_mdp5_global_state(priv_state); +} + +static struct drm_private_state * +mdp5_global_duplicate_state(struct drm_private_obj *obj) +{ + struct mdp5_global_state *state; + + state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; - state->state = new_state; + __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base); - return new_state; + return &state->base; } -static void mdp5_swap_state(struct msm_kms *kms, struct drm_atomic_state *state) +static void mdp5_global_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) { - struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); - swap(to_kms_state(state)->state, mdp5_kms->state); + struct mdp5_global_state *mdp5_state = to_mdp5_global_state(state); + + kfree(mdp5_state); +} + +static const struct drm_private_state_funcs mdp5_global_state_funcs = { + .atomic_duplicate_state = mdp5_global_duplicate_state, + .atomic_destroy_state = mdp5_global_destroy_state, +}; + +static int mdp5_global_obj_init(struct mdp5_kms *mdp5_kms) +{ + struct mdp5_global_state *state; + + drm_modeset_lock_init(&mdp5_kms->glob_state_lock); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + state->mdp5_kms = mdp5_kms; + + drm_atomic_private_obj_init(&mdp5_kms->glob_state, + &state->base, + &mdp5_global_state_funcs); + return 0; } static void mdp5_prepare_commit(struct msm_kms *kms, struct drm_atomic_state *state) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); struct device *dev = &mdp5_kms->pdev->dev; + struct mdp5_global_state *global_state; + + global_state = mdp5_get_existing_global_state(mdp5_kms); pm_runtime_get_sync(dev); if (mdp5_kms->smp) - mdp5_smp_prepare_commit(mdp5_kms->smp, &mdp5_kms->state->smp); + mdp5_smp_prepare_commit(mdp5_kms->smp, &global_state->smp); } static void mdp5_complete_commit(struct msm_kms *kms, struct drm_atomic_state *state) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); struct device *dev = &mdp5_kms->pdev->dev; + struct mdp5_global_state *global_state; + + global_state = mdp5_get_existing_global_state(mdp5_kms); if (mdp5_kms->smp) - mdp5_smp_complete_commit(mdp5_kms->smp, &mdp5_kms->state->smp); + mdp5_smp_complete_commit(mdp5_kms->smp, &global_state->smp); pm_runtime_put_sync(dev); } @@ -229,7 +279,6 @@ static const struct mdp_kms_funcs kms_funcs = { .irq = mdp5_irq, .enable_vblank = mdp5_enable_vblank, .disable_vblank = mdp5_disable_vblank, - .swap_state = mdp5_swap_state, .prepare_commit = mdp5_prepare_commit, .complete_commit = mdp5_complete_commit, .wait_for_crtc_commit_done = mdp5_wait_for_crtc_commit_done, @@ -727,7 +776,8 @@ static void mdp5_destroy(struct platform_device *pdev) if (mdp5_kms->rpm_enabled) pm_runtime_disable(&pdev->dev); - kfree(mdp5_kms->state); + drm_atomic_private_obj_fini(&mdp5_kms->glob_state); + drm_modeset_lock_fini(&mdp5_kms->glob_state_lock); } static int construct_pipes(struct mdp5_kms *mdp5_kms, int cnt, @@ -880,12 +930,9 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) mdp5_kms->dev = dev; mdp5_kms->pdev = pdev; - drm_modeset_lock_init(&mdp5_kms->state_lock); - mdp5_kms->state = kzalloc(sizeof(*mdp5_kms->state), GFP_KERNEL); - if (!mdp5_kms->state) { - ret = -ENOMEM; + ret = mdp5_global_obj_init(mdp5_kms); + if (ret) goto fail; - } mdp5_kms->mmio = msm_ioremap(pdev, "mdp_phys", "MDP5"); if (IS_ERR(mdp5_kms->mmio)) { diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h index 425a03d213e5..854dfd30e829 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.h @@ -28,8 +28,6 @@ #include "mdp5_ctl.h" #include "mdp5_smp.h" -struct mdp5_state; - struct mdp5_kms { struct mdp_kms base; @@ -49,11 +47,12 @@ struct mdp5_kms { struct mdp5_cfg_handler *cfg; uint32_t caps; /* MDP capabilities (MDP_CAP_XXX bits) */ - /** - * Global atomic state. Do not access directly, use mdp5_get_state() + /* + * Global private object state, Do not access directly, use + * mdp5_global_get_state() */ - struct mdp5_state *state; - struct drm_modeset_lock state_lock; + struct drm_modeset_lock glob_state_lock; + struct drm_private_obj glob_state; struct mdp5_smp *smp; struct mdp5_ctl_manager *ctlm; @@ -81,19 +80,23 @@ struct mdp5_kms { }; #define to_mdp5_kms(x) container_of(x, struct mdp5_kms, base) -/* Global atomic state for tracking resources that are shared across +/* Global private object state for tracking resources that are shared across * multiple kms objects (planes/crtcs/etc). - * - * For atomic updates which require modifying global state, */ -struct mdp5_state { +#define to_mdp5_global_state(x) container_of(x, struct mdp5_global_state, base) +struct mdp5_global_state { + struct drm_private_state base; + + struct drm_atomic_state *state; + struct mdp5_kms *mdp5_kms; + struct mdp5_hw_pipe_state hwpipe; struct mdp5_hw_mixer_state hwmixer; struct mdp5_smp_state smp; }; -struct mdp5_state *__must_check -mdp5_get_state(struct drm_atomic_state *s); +struct mdp5_global_state * mdp5_get_existing_global_state(struct mdp5_kms *mdp5_kms); +struct mdp5_global_state *__must_check mdp5_get_global_state(struct drm_atomic_state *s); /* Atomic plane state. Subclasses the base drm_plane_state in order to * track assigned hwpipe and hw specific state. diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c index 8a00991f03c7..113e6b569562 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_mixer.c @@ -52,14 +52,14 @@ int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); - struct mdp5_state *state = mdp5_get_state(s); + struct mdp5_global_state *global_state = mdp5_get_global_state(s); struct mdp5_hw_mixer_state *new_state; int i; - if (IS_ERR(state)) - return PTR_ERR(state); + if (IS_ERR(global_state)) + return PTR_ERR(global_state); - new_state = &state->hwmixer; + new_state = &global_state->hwmixer; for (i = 0; i < mdp5_kms->num_hwmixers; i++) { struct mdp5_hw_mixer *cur = mdp5_kms->hwmixers[i]; @@ -129,8 +129,8 @@ int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc, void mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer) { - struct mdp5_state *state = mdp5_get_state(s); - struct mdp5_hw_mixer_state *new_state = &state->hwmixer; + struct mdp5_global_state *global_state = mdp5_get_global_state(s); + struct mdp5_hw_mixer_state *new_state = &global_state->hwmixer; if (!mixer) return; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c index ff52c49095f9..1ef26bc63163 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c @@ -24,17 +24,19 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); - struct mdp5_state *state; + struct mdp5_global_state *new_global_state, *old_global_state; struct mdp5_hw_pipe_state *old_state, *new_state; int i, j; - state = mdp5_get_state(s); - if (IS_ERR(state)) - return PTR_ERR(state); + new_global_state = mdp5_get_global_state(s); + if (IS_ERR(new_global_state)) + return PTR_ERR(new_global_state); - /* grab old_state after mdp5_get_state(), since now we hold lock: */ - old_state = &mdp5_kms->state->hwpipe; - new_state = &state->hwpipe; + /* grab old_state after mdp5_get_global_state(), since now we hold lock: */ + old_global_state = mdp5_get_existing_global_state(mdp5_kms); + + old_state = &old_global_state->hwpipe; + new_state = &new_global_state->hwpipe; for (i = 0; i < mdp5_kms->num_hwpipes; i++) { struct mdp5_hw_pipe *cur = mdp5_kms->hwpipes[i]; @@ -107,7 +109,7 @@ int mdp5_pipe_assign(struct drm_atomic_state *s, struct drm_plane *plane, WARN_ON(r_hwpipe); DBG("%s: alloc SMP blocks", (*hwpipe)->name); - ret = mdp5_smp_assign(mdp5_kms->smp, &state->smp, + ret = mdp5_smp_assign(mdp5_kms->smp, &new_global_state->smp, (*hwpipe)->pipe, blkcfg); if (ret) return -ENOMEM; @@ -132,7 +134,7 @@ void mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); - struct mdp5_state *state = mdp5_get_state(s); + struct mdp5_global_state *state = mdp5_get_global_state(s); struct mdp5_hw_pipe_state *new_state = &state->hwpipe; if (!hwpipe) diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c index 6826aa10f3ac..c4f115fe96ff 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c @@ -245,20 +245,6 @@ static const struct drm_plane_funcs mdp5_plane_funcs = { .atomic_print_state = mdp5_plane_atomic_print_state, }; -static int mdp5_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) -{ - struct mdp5_kms *mdp5_kms = get_kms(plane); - struct msm_kms *kms = &mdp5_kms->base.base; - struct drm_framebuffer *fb = new_state->fb; - - if (!new_state->fb) - return 0; - - DBG("%s: prepare: FB[%u]", plane->name, fb->base.id); - return msm_framebuffer_prepare(fb, kms->aspace); -} - static void mdp5_plane_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { @@ -543,7 +529,7 @@ static void mdp5_plane_atomic_async_update(struct drm_plane *plane, } static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = { - .prepare_fb = mdp5_plane_prepare_fb, + .prepare_fb = msm_atomic_prepare_fb, .cleanup_fb = mdp5_plane_cleanup_fb, .atomic_check = mdp5_plane_atomic_check, .atomic_update = mdp5_plane_atomic_update, diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c index ae4983d9d0a5..96c2b828dba4 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c @@ -340,17 +340,20 @@ void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p) struct mdp5_kms *mdp5_kms = get_kms(smp); struct mdp5_hw_pipe_state *hwpstate; struct mdp5_smp_state *state; + struct mdp5_global_state *global_state; int total = 0, i, j; drm_printf(p, "name\tinuse\tplane\n"); drm_printf(p, "----\t-----\t-----\n"); if (drm_can_sleep()) - drm_modeset_lock(&mdp5_kms->state_lock, NULL); + drm_modeset_lock(&mdp5_kms->glob_state_lock, NULL); + + global_state = mdp5_get_existing_global_state(mdp5_kms); /* grab these *after* we hold the state_lock */ - hwpstate = &mdp5_kms->state->hwpipe; - state = &mdp5_kms->state->smp; + hwpstate = &global_state->hwpipe; + state = &global_state->smp; for (i = 0; i < mdp5_kms->num_hwpipes; i++) { struct mdp5_hw_pipe *hwpipe = mdp5_kms->hwpipes[i]; @@ -374,7 +377,7 @@ void mdp5_smp_dump(struct mdp5_smp *smp, struct drm_printer *p) bitmap_weight(state->state, smp->blk_cnt)); if (drm_can_sleep()) - drm_modeset_unlock(&mdp5_kms->state_lock); + drm_modeset_unlock(&mdp5_kms->glob_state_lock); } void mdp5_smp_destroy(struct mdp5_smp *smp) @@ -384,7 +387,8 @@ void mdp5_smp_destroy(struct mdp5_smp *smp) struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_block *cfg) { - struct mdp5_smp_state *state = &mdp5_kms->state->smp; + struct mdp5_smp_state *state; + struct mdp5_global_state *global_state; struct mdp5_smp *smp = NULL; int ret; @@ -398,6 +402,9 @@ struct mdp5_smp *mdp5_smp_init(struct mdp5_kms *mdp5_kms, const struct mdp5_smp_ smp->blk_cnt = cfg->mmb_count; smp->blk_size = cfg->mmb_size; + global_state = mdp5_get_existing_global_state(mdp5_kms); + state = &global_state->smp; + /* statically tied MMBs cannot be re-allocated: */ bitmap_copy(state->state, cfg->reserved_state, smp->blk_cnt); memcpy(smp->reserved, cfg->reserved, sizeof(smp->reserved)); diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 8baba30d6c65..2f1a2780658a 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -1036,7 +1036,6 @@ static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size) ret = msm_gem_get_iova(msm_host->tx_gem_obj, priv->kms->aspace, &iova); - mutex_unlock(&dev->struct_mutex); if (ret) { pr_err("%s: failed to get iova, %d\n", __func__, ret); return ret; @@ -1067,9 +1066,20 @@ static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size) static void dsi_tx_buf_free(struct msm_dsi_host *msm_host) { struct drm_device *dev = msm_host->dev; + struct msm_drm_private *priv; + /* + * This is possible if we're tearing down before we've had a chance to + * fully initialize. A very real possibility if our probe is deferred, + * in which case we'll hit msm_dsi_host_destroy() without having run + * through the dsi_tx_buf_alloc(). + */ + if (!dev) + return; + + priv = dev->dev_private; if (msm_host->tx_gem_obj) { - msm_gem_put_iova(msm_host->tx_gem_obj, 0); + msm_gem_put_iova(msm_host->tx_gem_obj, priv->kms->aspace); drm_gem_object_put_unlocked(msm_host->tx_gem_obj); msm_host->tx_gem_obj = NULL; } diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index bf5f8c39f34d..f0635c3da7f4 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -16,69 +16,8 @@ */ #include "msm_drv.h" -#include "msm_kms.h" #include "msm_gem.h" -#include "msm_fence.h" - -struct msm_commit { - struct drm_device *dev; - struct drm_atomic_state *state; - struct work_struct work; - uint32_t crtc_mask; -}; - -static void commit_worker(struct work_struct *work); - -/* block until specified crtcs are no longer pending update, and - * atomically mark them as pending update - */ -static int start_atomic(struct msm_drm_private *priv, uint32_t crtc_mask) -{ - int ret; - - spin_lock(&priv->pending_crtcs_event.lock); - ret = wait_event_interruptible_locked(priv->pending_crtcs_event, - !(priv->pending_crtcs & crtc_mask)); - if (ret == 0) { - DBG("start: %08x", crtc_mask); - priv->pending_crtcs |= crtc_mask; - } - spin_unlock(&priv->pending_crtcs_event.lock); - - return ret; -} - -/* clear specified crtcs (no longer pending update) - */ -static void end_atomic(struct msm_drm_private *priv, uint32_t crtc_mask) -{ - spin_lock(&priv->pending_crtcs_event.lock); - DBG("end: %08x", crtc_mask); - priv->pending_crtcs &= ~crtc_mask; - wake_up_all_locked(&priv->pending_crtcs_event); - spin_unlock(&priv->pending_crtcs_event.lock); -} - -static struct msm_commit *commit_init(struct drm_atomic_state *state) -{ - struct msm_commit *c = kzalloc(sizeof(*c), GFP_KERNEL); - - if (!c) - return NULL; - - c->dev = state->dev; - c->state = state; - - INIT_WORK(&c->work, commit_worker); - - return c; -} - -static void commit_destroy(struct msm_commit *c) -{ - end_atomic(c->dev->dev_private, c->crtc_mask); - kfree(c); -} +#include "msm_kms.h" static void msm_atomic_wait_for_commit_done(struct drm_device *dev, struct drm_atomic_state *old_state) @@ -97,195 +36,48 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev, } } -/* The (potentially) asynchronous part of the commit. At this point - * nothing can fail short of armageddon. - */ -static void complete_commit(struct msm_commit *c, bool async) +int msm_atomic_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) { - struct drm_atomic_state *state = c->state; - struct drm_device *dev = state->dev; - struct msm_drm_private *priv = dev->dev_private; + struct msm_drm_private *priv = plane->dev->dev_private; struct msm_kms *kms = priv->kms; + struct drm_gem_object *obj; + struct msm_gem_object *msm_obj; + struct dma_fence *fence; - drm_atomic_helper_wait_for_fences(dev, state, false); - - kms->funcs->prepare_commit(kms, state); - - drm_atomic_helper_commit_modeset_disables(dev, state); - - drm_atomic_helper_commit_planes(dev, state, 0); - - drm_atomic_helper_commit_modeset_enables(dev, state); - - /* NOTE: _wait_for_vblanks() only waits for vblank on - * enabled CRTCs. So we end up faulting when disabling - * due to (potentially) unref'ing the outgoing fb's - * before the vblank when the disable has latched. - * - * But if it did wait on disabled (or newly disabled) - * CRTCs, that would be racy (ie. we could have missed - * the irq. We need some way to poll for pipe shut - * down. Or just live with occasionally hitting the - * timeout in the CRTC disable path (which really should - * not be critical path) - */ - - msm_atomic_wait_for_commit_done(dev, state); - - drm_atomic_helper_cleanup_planes(dev, state); + if (!new_state->fb) + return 0; - kms->funcs->complete_commit(kms, state); + obj = msm_framebuffer_bo(new_state->fb, 0); + msm_obj = to_msm_bo(obj); + fence = reservation_object_get_excl_rcu(msm_obj->resv); - drm_atomic_state_put(state); + drm_atomic_set_fence_for_plane(new_state, fence); - commit_destroy(c); -} - -static void commit_worker(struct work_struct *work) -{ - complete_commit(container_of(work, struct msm_commit, work), true); + return msm_framebuffer_prepare(new_state->fb, kms->aspace); } -/** - * drm_atomic_helper_commit - commit validated state object - * @dev: DRM device - * @state: the driver state object - * @nonblock: nonblocking commit - * - * This function commits a with drm_atomic_helper_check() pre-validated state - * object. This can still fail when e.g. the framebuffer reservation fails. - * - * RETURNS - * Zero for success or -errno. - */ -int msm_atomic_commit(struct drm_device *dev, - struct drm_atomic_state *state, bool nonblock) +void msm_atomic_commit_tail(struct drm_atomic_state *state) { + struct drm_device *dev = state->dev; struct msm_drm_private *priv = dev->dev_private; - struct msm_commit *c; - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - struct drm_plane *plane; - struct drm_plane_state *old_plane_state, *new_plane_state; - int i, ret; - - ret = drm_atomic_helper_prepare_planes(dev, state); - if (ret) - return ret; - - /* - * Note that plane->atomic_async_check() should fail if we need - * to re-assign hwpipe or anything that touches global atomic - * state, so we'll never go down the async update path in those - * cases. - */ - if (state->async_update) { - drm_atomic_helper_async_commit(dev, state); - drm_atomic_helper_cleanup_planes(dev, state); - return 0; - } - - c = commit_init(state); - if (!c) { - ret = -ENOMEM; - goto error; - } - - /* - * Figure out what crtcs we have: - */ - for_each_new_crtc_in_state(state, crtc, crtc_state, i) - c->crtc_mask |= drm_crtc_mask(crtc); - - /* - * Figure out what fence to wait for: - */ - for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { - if ((new_plane_state->fb != old_plane_state->fb) && new_plane_state->fb) { - struct drm_gem_object *obj = msm_framebuffer_bo(new_plane_state->fb, 0); - struct msm_gem_object *msm_obj = to_msm_bo(obj); - struct dma_fence *fence = reservation_object_get_excl_rcu(msm_obj->resv); + struct msm_kms *kms = priv->kms; - drm_atomic_set_fence_for_plane(new_plane_state, fence); - } - } + kms->funcs->prepare_commit(kms, state); - /* - * Wait for pending updates on any of the same crtc's and then - * mark our set of crtc's as busy: - */ - ret = start_atomic(dev->dev_private, c->crtc_mask); - if (ret) - goto err_free; + drm_atomic_helper_commit_modeset_disables(dev, state); - BUG_ON(drm_atomic_helper_swap_state(state, false) < 0); + drm_atomic_helper_commit_planes(dev, state, 0); - /* - * This is the point of no return - everything below never fails except - * when the hw goes bonghits. Which means we can commit the new state on - * the software side now. - * - * swap driver private state while still holding state_lock - */ - if (to_kms_state(state)->state) - priv->kms->funcs->swap_state(priv->kms, state); + drm_atomic_helper_commit_modeset_enables(dev, state); - /* - * Everything below can be run asynchronously without the need to grab - * any modeset locks at all under one conditions: It must be guaranteed - * that the asynchronous work has either been cancelled (if the driver - * supports it, which at least requires that the framebuffers get - * cleaned up with drm_atomic_helper_cleanup_planes()) or completed - * before the new state gets committed on the software side with - * drm_atomic_helper_swap_state(). - * - * This scheme allows new atomic state updates to be prepared and - * checked in parallel to the asynchronous completion of the previous - * update. Which is important since compositors need to figure out the - * composition of the next frame right after having submitted the - * current layout. - */ + msm_atomic_wait_for_commit_done(dev, state); - drm_atomic_state_get(state); - if (nonblock) { - queue_work(priv->atomic_wq, &c->work); - return 0; - } + kms->funcs->complete_commit(kms, state); - complete_commit(c, false); + drm_atomic_helper_wait_for_vblanks(dev, state); - return 0; + drm_atomic_helper_commit_hw_done(state); -err_free: - kfree(c); -error: drm_atomic_helper_cleanup_planes(dev, state); - return ret; -} - -struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev) -{ - struct msm_kms_state *state = kzalloc(sizeof(*state), GFP_KERNEL); - - if (!state || drm_atomic_state_init(dev, &state->base) < 0) { - kfree(state); - return NULL; - } - - return &state->base; -} - -void msm_atomic_state_clear(struct drm_atomic_state *s) -{ - struct msm_kms_state *state = to_kms_state(s); - drm_atomic_state_default_clear(&state->base); - kfree(state->state); - state->state = NULL; -} - -void msm_atomic_state_free(struct drm_atomic_state *state) -{ - kfree(to_kms_state(state)->state); - drm_atomic_state_default_release(state); - kfree(state); } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 30cd514d8f7c..021a0b6f9a59 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -41,10 +41,11 @@ static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = msm_framebuffer_create, .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = drm_atomic_helper_check, - .atomic_commit = msm_atomic_commit, - .atomic_state_alloc = msm_atomic_state_alloc, - .atomic_state_clear = msm_atomic_state_clear, - .atomic_state_free = msm_atomic_state_free, + .atomic_commit = drm_atomic_helper_commit, +}; + +static const struct drm_mode_config_helper_funcs mode_config_helper_funcs = { + .atomic_commit_tail = msm_atomic_commit_tail, }; #ifdef CONFIG_DRM_MSM_REGISTER_LOGGING @@ -384,7 +385,6 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) priv->wq = alloc_ordered_workqueue("msm", 0); priv->atomic_wq = alloc_ordered_workqueue("msm:atomic", 0); - init_waitqueue_head(&priv->pending_crtcs_event); INIT_LIST_HEAD(&priv->inactive_list); INIT_LIST_HEAD(&priv->vblank_ctrl.event_list); @@ -442,6 +442,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) } ddev->mode_config.funcs = &mode_config_funcs; + ddev->mode_config.helper_private = &mode_config_helper_funcs; ret = drm_vblank_init(ddev, priv->num_crtcs); if (ret < 0) { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 48ed5b9a8580..b2da1fbf81e0 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -117,10 +117,6 @@ struct msm_drm_private { struct workqueue_struct *wq; struct workqueue_struct *atomic_wq; - /* crtcs pending async atomic updates: */ - uint32_t pending_crtcs; - wait_queue_head_t pending_crtcs_event; - unsigned int num_planes; struct drm_plane *planes[16]; @@ -160,8 +156,9 @@ struct msm_format { uint32_t pixel_format; }; -int msm_atomic_commit(struct drm_device *dev, - struct drm_atomic_state *state, bool nonblock); +int msm_atomic_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state); +void msm_atomic_commit_tail(struct drm_atomic_state *state); struct drm_atomic_state *msm_atomic_state_alloc(struct drm_device *dev); void msm_atomic_state_clear(struct drm_atomic_state *state); void msm_atomic_state_free(struct drm_atomic_state *state); diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index aaa329dc020e..dfd92947de2c 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -40,8 +40,6 @@ struct msm_kms_funcs { irqreturn_t (*irq)(struct msm_kms *kms); int (*enable_vblank)(struct msm_kms *kms, struct drm_crtc *crtc); void (*disable_vblank)(struct msm_kms *kms, struct drm_crtc *crtc); - /* swap global atomic state: */ - void (*swap_state)(struct msm_kms *kms, struct drm_atomic_state *state); /* modeset, bracketing atomic_commit(): */ void (*prepare_commit)(struct msm_kms *kms, struct drm_atomic_state *state); void (*complete_commit)(struct msm_kms *kms, struct drm_atomic_state *state); @@ -80,18 +78,6 @@ struct msm_kms { struct msm_gem_address_space *aspace; }; -/** - * Subclass of drm_atomic_state, to allow kms backend to have driver - * private global state. The kms backend can do whatever it wants - * with the ->state ptr. On ->atomic_state_clear() the ->state ptr - * is kfree'd and set back to NULL. - */ -struct msm_kms_state { - struct drm_atomic_state base; - void *state; -}; -#define to_kms_state(x) container_of(x, struct msm_kms_state, base) - static inline void msm_kms_init(struct msm_kms *kms, const struct msm_kms_funcs *funcs) { diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index 9c0c650655e9..b17843dd050d 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -38,14 +38,16 @@ nouveau-y += nouveau_vmm.o # DRM - modesetting nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o +nouveau-y += nouveau_bios.o nouveau-y += nouveau_connector.o nouveau-y += nouveau_display.o -nouveau-y += nv50_display.o nouveau-y += nouveau_dp.o nouveau-y += nouveau_fbcon.o nouveau-y += nv04_fbcon.o nouveau-y += nv50_fbcon.o nouveau-y += nvc0_fbcon.o +include $(src)/dispnv04/Kbuild +include $(src)/dispnv50/Kbuild # DRM - command submission nouveau-y += nouveau_abi16.o @@ -59,8 +61,4 @@ nouveau-y += nv50_fence.o nouveau-y += nv84_fence.o nouveau-y += nvc0_fence.o -# DRM - prehistoric modesetting (NV04-G7x) -nouveau-y += nouveau_bios.o -include $(src)/dispnv04/Kbuild - obj-$(CONFIG_DRM_NOUVEAU) += nouveau.o diff --git a/drivers/gpu/drm/nouveau/dispnv50/Kbuild b/drivers/gpu/drm/nouveau/dispnv50/Kbuild new file mode 100644 index 000000000000..849b0f45afb8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/Kbuild @@ -0,0 +1,51 @@ +nouveau-y += dispnv50/disp.o +nouveau-y += dispnv50/lut.o + +nouveau-y += dispnv50/core.o +nouveau-y += dispnv50/core507d.o +nouveau-y += dispnv50/core827d.o +nouveau-y += dispnv50/core907d.o +nouveau-y += dispnv50/core917d.o +nouveau-y += dispnv50/corec37d.o + +nouveau-y += dispnv50/dac507d.o +nouveau-y += dispnv50/dac907d.o + +nouveau-y += dispnv50/pior507d.o + +nouveau-y += dispnv50/sor507d.o +nouveau-y += dispnv50/sor907d.o +nouveau-y += dispnv50/sorc37d.o + +nouveau-y += dispnv50/head.o +nouveau-y += dispnv50/head507d.o +nouveau-y += dispnv50/head827d.o +nouveau-y += dispnv50/head907d.o +nouveau-y += dispnv50/head917d.o +nouveau-y += dispnv50/headc37d.o + +nouveau-y += dispnv50/wimm.o +nouveau-y += dispnv50/wimmc37b.o + +nouveau-y += dispnv50/wndw.o +nouveau-y += dispnv50/wndwc37e.o + +nouveau-y += dispnv50/base.o +nouveau-y += dispnv50/base507c.o +nouveau-y += dispnv50/base827c.o +nouveau-y += dispnv50/base907c.o +nouveau-y += dispnv50/base917c.o + +nouveau-y += dispnv50/curs.o +nouveau-y += dispnv50/curs507a.o +nouveau-y += dispnv50/curs907a.o +nouveau-y += dispnv50/cursc37a.o + +nouveau-y += dispnv50/oimm.o +nouveau-y += dispnv50/oimm507b.o + +nouveau-y += dispnv50/ovly.o +nouveau-y += dispnv50/ovly507e.o +nouveau-y += dispnv50/ovly827e.o +nouveau-y += dispnv50/ovly907e.o +nouveau-y += dispnv50/ovly917e.o diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h new file mode 100644 index 000000000000..908feb1fc60f --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h @@ -0,0 +1,222 @@ +#ifndef __NV50_KMS_ATOM_H__ +#define __NV50_KMS_ATOM_H__ +#define nv50_atom(p) container_of((p), struct nv50_atom, state) +#include <drm/drm_atomic.h> + +struct nv50_atom { + struct drm_atomic_state state; + + struct list_head outp; + bool lock_core; + bool flush_disable; +}; + +#define nv50_head_atom(p) container_of((p), struct nv50_head_atom, state) + +struct nv50_head_atom { + struct drm_crtc_state state; + + struct { + u32 mask; + u32 olut; + } wndw; + + struct { + u16 iW; + u16 iH; + u16 oW; + u16 oH; + } view; + + struct nv50_head_mode { + bool interlace; + u32 clock; + struct { + u16 active; + u16 synce; + u16 blanke; + u16 blanks; + } h; + struct { + u32 active; + u16 synce; + u16 blanke; + u16 blanks; + u16 blank2s; + u16 blank2e; + u16 blankus; + } v; + } mode; + + struct { + bool visible; + u32 handle; + u64 offset:40; + u8 buffer:1; + u8 mode:4; + u8 size:2; + u8 range:2; + u8 output_mode:2; + } olut; + + struct { + bool visible; + u32 handle; + u64 offset:40; + u8 format; + u8 kind:7; + u8 layout:1; + u8 blockh:4; + u16 blocks:12; + u32 pitch:20; + u16 x; + u16 y; + u16 w; + u16 h; + } core; + + struct { + bool visible; + u32 handle; + u64 offset:40; + u8 layout:2; + u8 format:8; + } curs; + + struct { + u8 depth; + u8 cpp; + u16 x; + u16 y; + u16 w; + u16 h; + } base; + + struct { + u8 cpp; + } ovly; + + struct { + bool enable:1; + u8 bits:2; + u8 mode:4; + } dither; + + struct { + struct { + u16 cos:12; + u16 sin:12; + } sat; + } procamp; + + struct { + u8 nhsync:1; + u8 nvsync:1; + u8 depth:4; + } or; + + union nv50_head_atom_mask { + struct { + bool olut:1; + bool core:1; + bool curs:1; + bool view:1; + bool mode:1; + bool base:1; + bool ovly:1; + bool dither:1; + bool procamp:1; + bool or:1; + }; + u16 mask; + } set, clr; +}; + +static inline struct nv50_head_atom * +nv50_head_atom_get(struct drm_atomic_state *state, struct drm_crtc *crtc) +{ + struct drm_crtc_state *statec = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(statec)) + return (void *)statec; + return nv50_head_atom(statec); +} + +#define nv50_wndw_atom(p) container_of((p), struct nv50_wndw_atom, state) + +struct nv50_wndw_atom { + struct drm_plane_state state; + + struct drm_property_blob *ilut; + bool visible; + + struct { + u32 handle; + u16 offset:12; + bool awaken:1; + } ntfy; + + struct { + u32 handle; + u16 offset:12; + u32 acquire; + u32 release; + } sema; + + struct { + u32 handle; + struct { + u64 offset:40; + u8 buffer:1; + u8 enable:2; + u8 mode:4; + u8 size:2; + u8 range:2; + u8 output_mode:2; + } i; + } xlut; + + struct { + u8 mode:2; + u8 interval:4; + + u8 colorspace:2; + u8 format; + u8 kind:7; + u8 layout:1; + u8 blockh:4; + u16 blocks[3]; + u32 pitch[3]; + u16 w; + u16 h; + + u32 handle[6]; + u64 offset[6]; + } image; + + struct { + u16 sx; + u16 sy; + u16 sw; + u16 sh; + u16 dw; + u16 dh; + } scale; + + struct { + u16 x; + u16 y; + } point; + + union nv50_wndw_atom_mask { + struct { + bool ntfy:1; + bool sema:1; + bool xlut:1; + bool image:1; + bool scale:1; + bool point:1; + }; + u8 mask; + } set, clr; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/base.c b/drivers/gpu/drm/nouveau/dispnv50/base.c new file mode 100644 index 000000000000..7c752acf2b48 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/base.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "base.h" + +#include <nvif/class.h> + +int +nv50_base_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw) +{ + struct { + s32 oclass; + int version; + int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **); + } bases[] = { + { GK110_DISP_BASE_CHANNEL_DMA, 0, base917c_new }, + { GK104_DISP_BASE_CHANNEL_DMA, 0, base917c_new }, + { GF110_DISP_BASE_CHANNEL_DMA, 0, base907c_new }, + { GT214_DISP_BASE_CHANNEL_DMA, 0, base827c_new }, + { GT200_DISP_BASE_CHANNEL_DMA, 0, base827c_new }, + { G82_DISP_BASE_CHANNEL_DMA, 0, base827c_new }, + { NV50_DISP_BASE_CHANNEL_DMA, 0, base507c_new }, + {} + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int cid; + + cid = nvif_mclass(&disp->disp->object, bases); + if (cid < 0) { + NV_ERROR(drm, "No supported base class\n"); + return cid; + } + + return bases[cid].new(drm, head, bases[cid].oclass, pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/base.h b/drivers/gpu/drm/nouveau/dispnv50/base.h new file mode 100644 index 000000000000..e7f14f230f35 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/base.h @@ -0,0 +1,31 @@ +#ifndef __NV50_KMS_BASE_H__ +#define __NV50_KMS_BASE_H__ +#include "wndw.h" + +int base507c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); +int base507c_new_(const struct nv50_wndw_func *, const u32 *format, + struct nouveau_drm *, int head, s32 oclass, + u32 interlock_data, struct nv50_wndw **); +extern const u32 base507c_format[]; +int base507c_acquire(struct nv50_wndw *, struct nv50_wndw_atom *, + struct nv50_head_atom *); +void base507c_release(struct nv50_wndw *, struct nv50_wndw_atom *, + struct nv50_head_atom *); +void base507c_sema_set(struct nv50_wndw *, struct nv50_wndw_atom *); +void base507c_sema_clr(struct nv50_wndw *); +void base507c_ntfy_set(struct nv50_wndw *, struct nv50_wndw_atom *); +void base507c_ntfy_clr(struct nv50_wndw *); +void base507c_xlut_set(struct nv50_wndw *, struct nv50_wndw_atom *); +void base507c_xlut_clr(struct nv50_wndw *); +void base507c_image_clr(struct nv50_wndw *); +void base507c_update(struct nv50_wndw *, u32 *); + +int base827c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); + +int base907c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); +extern const struct nv50_wndw_func base907c; + +int base917c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); + +int nv50_base_new(struct nouveau_drm *, int head, struct nv50_wndw **); +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/base507c.c b/drivers/gpu/drm/nouveau/dispnv50/base507c.c new file mode 100644 index 000000000000..d5e295ca2caa --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/base507c.c @@ -0,0 +1,286 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "base.h" + +#include <nvif/cl507c.h> +#include <nvif/event.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include "nouveau_bo.h" + +void +base507c_update(struct nv50_wndw *wndw, u32 *interlock) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x0080, 1); + evo_data(push, interlock[NV50_DISP_INTERLOCK_CORE]); + evo_kick(push, &wndw->wndw); + } +} + +void +base507c_image_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 4))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x00c0, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +static void +base507c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 10))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, asyw->image.mode << 8 | + asyw->image.interval << 4); + evo_mthd(push, 0x00c0, 1); + evo_data(push, asyw->image.handle[0]); + evo_mthd(push, 0x0800, 5); + evo_data(push, asyw->image.offset[0] >> 8); + evo_data(push, 0x00000000); + evo_data(push, asyw->image.h << 16 | asyw->image.w); + evo_data(push, asyw->image.layout << 20 | + (asyw->image.pitch[0] >> 8) << 8 | + asyw->image.blocks[0] << 8 | + asyw->image.blockh); + evo_data(push, asyw->image.kind << 16 | + asyw->image.format << 8); + evo_kick(push, &wndw->wndw); + } +} + +void +base507c_xlut_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x00e0, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +void +base507c_xlut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x00e0, 1); + evo_data(push, 0x40000000); + evo_kick(push, &wndw->wndw); + } +} + +int +base507c_ntfy_wait_begun(struct nouveau_bo *bo, u32 offset, + struct nvif_device *device) +{ + s64 time = nvif_msec(device, 2000ULL, + u32 data = nouveau_bo_rd32(bo, offset / 4); + if ((data & 0xc0000000) == 0x40000000) + break; + usleep_range(1, 2); + ); + return time < 0 ? time : 0; +} + +void +base507c_ntfy_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x00a4, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +void +base507c_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 3))) { + evo_mthd(push, 0x00a0, 2); + evo_data(push, asyw->ntfy.awaken << 30 | asyw->ntfy.offset); + evo_data(push, asyw->ntfy.handle); + evo_kick(push, &wndw->wndw); + } +} + +void +base507c_ntfy_reset(struct nouveau_bo *bo, u32 offset) +{ + nouveau_bo_wr32(bo, offset / 4, 0x00000000); +} + +void +base507c_sema_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x0094, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +void +base507c_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 5))) { + evo_mthd(push, 0x0088, 4); + evo_data(push, asyw->sema.offset); + evo_data(push, asyw->sema.acquire); + evo_data(push, asyw->sema.release); + evo_data(push, asyw->sema.handle); + evo_kick(push, &wndw->wndw); + } +} + +void +base507c_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + asyh->base.cpp = 0; +} + +int +base507c_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + const struct drm_framebuffer *fb = asyw->state.fb; + int ret; + + if (!fb->format->depth) + return -EINVAL; + + ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, true); + if (ret) + return ret; + + if (!wndw->func->ilut) { + if ((asyh->base.cpp != 1) ^ (fb->format->cpp[0] != 1)) + asyh->state.color_mgmt_changed = true; + } + + asyh->base.depth = fb->format->depth; + asyh->base.cpp = fb->format->cpp[0]; + asyh->base.x = asyw->state.src.x1 >> 16; + asyh->base.y = asyw->state.src.y1 >> 16; + asyh->base.w = asyw->state.fb->width; + asyh->base.h = asyw->state.fb->height; + return 0; +} + +const u32 +base507c_format[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + 0 +}; + +static const struct nv50_wndw_func +base507c = { + .acquire = base507c_acquire, + .release = base507c_release, + .sema_set = base507c_sema_set, + .sema_clr = base507c_sema_clr, + .ntfy_reset = base507c_ntfy_reset, + .ntfy_set = base507c_ntfy_set, + .ntfy_clr = base507c_ntfy_clr, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .olut_core = 1, + .xlut_set = base507c_xlut_set, + .xlut_clr = base507c_xlut_clr, + .image_set = base507c_image_set, + .image_clr = base507c_image_clr, + .update = base507c_update, +}; + +int +base507c_new_(const struct nv50_wndw_func *func, const u32 *format, + struct nouveau_drm *drm, int head, s32 oclass, u32 interlock_data, + struct nv50_wndw **pwndw) +{ + struct nv50_disp_base_channel_dma_v0 args = { + .head = head, + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + struct nv50_wndw *wndw; + int ret; + + ret = nv50_wndw_new_(func, drm->dev, DRM_PLANE_TYPE_PRIMARY, + "base", head, format, BIT(head), + NV50_DISP_INTERLOCK_BASE, interlock_data, &wndw); + if (*pwndw = wndw, ret) + return ret; + + ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, + &oclass, head, &args, sizeof(args), + disp->sync->bo.offset, &wndw->wndw); + if (ret) { + NV_ERROR(drm, "base%04x allocation failed: %d\n", oclass, ret); + return ret; + } + + ret = nvif_notify_init(&wndw->wndw.base.user, wndw->notify.func, + false, NV50_DISP_BASE_CHANNEL_DMA_V0_NTFY_UEVENT, + &(struct nvif_notify_uevent_req) {}, + sizeof(struct nvif_notify_uevent_req), + sizeof(struct nvif_notify_uevent_rep), + &wndw->notify); + if (ret) + return ret; + + wndw->ntfy = NV50_DISP_BASE_NTFY(wndw->id); + wndw->sema = NV50_DISP_BASE_SEM0(wndw->id); + wndw->data = 0x00000000; + return 0; +} + +int +base507c_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return base507c_new_(&base507c, base507c_format, drm, head, oclass, + 0x00000002 << (head * 8), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/base827c.c b/drivers/gpu/drm/nouveau/dispnv50/base827c.c new file mode 100644 index 000000000000..73646819a0d6 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/base827c.c @@ -0,0 +1,71 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "base.h" + +static void +base827c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 10))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, asyw->image.mode << 8 | + asyw->image.interval << 4); + evo_mthd(push, 0x00c0, 1); + evo_data(push, asyw->image.handle[0]); + evo_mthd(push, 0x0800, 5); + evo_data(push, asyw->image.offset[0] >> 8); + evo_data(push, 0x00000000); + evo_data(push, asyw->image.h << 16 | asyw->image.w); + evo_data(push, asyw->image.layout << 20 | + (asyw->image.pitch[0] >> 8) << 8 | + asyw->image.blocks[0] << 8 | + asyw->image.blockh); + evo_data(push, asyw->image.format << 8); + evo_kick(push, &wndw->wndw); + } +} + +static const struct nv50_wndw_func +base827c = { + .acquire = base507c_acquire, + .release = base507c_release, + .sema_set = base507c_sema_set, + .sema_clr = base507c_sema_clr, + .ntfy_reset = base507c_ntfy_reset, + .ntfy_set = base507c_ntfy_set, + .ntfy_clr = base507c_ntfy_clr, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .olut_core = 1, + .xlut_set = base507c_xlut_set, + .xlut_clr = base507c_xlut_clr, + .image_set = base827c_image_set, + .image_clr = base507c_image_clr, + .update = base507c_update, +}; + +int +base827c_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return base507c_new_(&base827c, base507c_format, drm, head, oclass, + 0x00000002 << (head * 8), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/base907c.c b/drivers/gpu/drm/nouveau/dispnv50/base907c.c new file mode 100644 index 000000000000..a562fc94ce59 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/base907c.c @@ -0,0 +1,110 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "base.h" + +static void +base907c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 10))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, asyw->image.mode << 8 | + asyw->image.interval << 4); + evo_mthd(push, 0x00c0, 1); + evo_data(push, asyw->image.handle[0]); + evo_mthd(push, 0x0400, 5); + evo_data(push, asyw->image.offset[0] >> 8); + evo_data(push, 0x00000000); + evo_data(push, asyw->image.h << 16 | asyw->image.w); + evo_data(push, asyw->image.layout << 24 | + (asyw->image.pitch[0] >> 8) << 8 | + asyw->image.blocks[0] << 8 | + asyw->image.blockh); + evo_data(push, asyw->image.format << 8); + evo_kick(push, &wndw->wndw); + } +} + +static void +base907c_xlut_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 6))) { + evo_mthd(push, 0x00e0, 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x00e8, 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x00fc, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +static void +base907c_xlut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 6))) { + evo_mthd(push, 0x00e0, 3); + evo_data(push, asyw->xlut.i.enable << 30 | + asyw->xlut.i.mode << 24); + evo_data(push, asyw->xlut.i.offset >> 8); + evo_data(push, 0x40000000); + evo_mthd(push, 0x00fc, 1); + evo_data(push, asyw->xlut.handle); + evo_kick(push, &wndw->wndw); + } +} + +static void +base907c_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + asyw->xlut.i.mode = 7; + asyw->xlut.i.enable = 2; +} + +const struct nv50_wndw_func +base907c = { + .acquire = base507c_acquire, + .release = base507c_release, + .sema_set = base507c_sema_set, + .sema_clr = base507c_sema_clr, + .ntfy_reset = base507c_ntfy_reset, + .ntfy_set = base507c_ntfy_set, + .ntfy_clr = base507c_ntfy_clr, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .ilut = base907c_ilut, + .olut_core = true, + .xlut_set = base907c_xlut_set, + .xlut_clr = base907c_xlut_clr, + .image_set = base907c_image_set, + .image_clr = base507c_image_clr, + .update = base507c_update, +}; + +int +base907c_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return base507c_new_(&base907c, base507c_format, drm, head, oclass, + 0x00000002 << (head * 4), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/base917c.c b/drivers/gpu/drm/nouveau/dispnv50/base917c.c new file mode 100644 index 000000000000..54d705bb81a5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/base917c.c @@ -0,0 +1,48 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "base.h" +#include "atom.h" + +const u32 +base917c_format[] = { + DRM_FORMAT_C8, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, + 0 +}; + +int +base917c_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return base507c_new_(&base907c, base917c_format, drm, head, oclass, + 0x00000002 << (head * 4), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c new file mode 100644 index 000000000000..f3c49adb1bdb --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/core.c @@ -0,0 +1,70 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "core.h" + +#include <nvif/class.h> + +void +nv50_core_del(struct nv50_core **pcore) +{ + struct nv50_core *core = *pcore; + if (core) { + nv50_dmac_destroy(&core->chan); + kfree(*pcore); + *pcore = NULL; + } +} + +int +nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore) +{ + struct { + s32 oclass; + int version; + int (*new)(struct nouveau_drm *, s32, struct nv50_core **); + } cores[] = { + { GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new }, + { GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, + { GP100_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, + { GM200_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, + { GM107_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, + { GK110_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, + { GK104_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, + { GF110_DISP_CORE_CHANNEL_DMA, 0, core907d_new }, + { GT214_DISP_CORE_CHANNEL_DMA, 0, core827d_new }, + { GT206_DISP_CORE_CHANNEL_DMA, 0, core827d_new }, + { GT200_DISP_CORE_CHANNEL_DMA, 0, core827d_new }, + { G82_DISP_CORE_CHANNEL_DMA, 0, core827d_new }, + { NV50_DISP_CORE_CHANNEL_DMA, 0, core507d_new }, + {} + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int cid; + + cid = nvif_mclass(&disp->disp->object, cores); + if (cid < 0) { + NV_ERROR(drm, "No supported core channel class\n"); + return cid; + } + + return cores[cid].new(drm, cores[cid].oclass, pcore); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.h b/drivers/gpu/drm/nouveau/dispnv50/core.h new file mode 100644 index 000000000000..8470df9dd13d --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/core.h @@ -0,0 +1,50 @@ +#ifndef __NV50_KMS_CORE_H__ +#define __NV50_KMS_CORE_H__ +#include "disp.h" +#include "atom.h" + +struct nv50_core { + const struct nv50_core_func *func; + struct nv50_dmac chan; +}; + +int nv50_core_new(struct nouveau_drm *, struct nv50_core **); +void nv50_core_del(struct nv50_core **); + +struct nv50_core_func { + void (*init)(struct nv50_core *); + void (*ntfy_init)(struct nouveau_bo *, u32 offset); + int (*ntfy_wait_done)(struct nouveau_bo *, u32 offset, + struct nvif_device *); + void (*update)(struct nv50_core *, u32 *interlock, bool ntfy); + + const struct nv50_head_func *head; + const struct nv50_outp_func { + void (*ctrl)(struct nv50_core *, int or, u32 ctrl, + struct nv50_head_atom *); + } *dac, *pior, *sor; +}; + +int core507d_new(struct nouveau_drm *, s32, struct nv50_core **); +int core507d_new_(const struct nv50_core_func *, struct nouveau_drm *, s32, + struct nv50_core **); +void core507d_init(struct nv50_core *); +void core507d_ntfy_init(struct nouveau_bo *, u32); +int core507d_ntfy_wait_done(struct nouveau_bo *, u32, struct nvif_device *); +void core507d_update(struct nv50_core *, u32 *, bool); + +extern const struct nv50_outp_func dac507d; +extern const struct nv50_outp_func sor507d; +extern const struct nv50_outp_func pior507d; + +int core827d_new(struct nouveau_drm *, s32, struct nv50_core **); + +int core907d_new(struct nouveau_drm *, s32, struct nv50_core **); +extern const struct nv50_outp_func dac907d; +extern const struct nv50_outp_func sor907d; + +int core917d_new(struct nouveau_drm *, s32, struct nv50_core **); + +int corec37d_new(struct nouveau_drm *, s32, struct nv50_core **); +extern const struct nv50_outp_func sorc37d; +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/core507d.c b/drivers/gpu/drm/nouveau/dispnv50/core507d.c new file mode 100644 index 000000000000..e7fcfa6e6467 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/core507d.c @@ -0,0 +1,115 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "core.h" +#include "head.h" + +#include <nvif/cl507d.h> + +#include "nouveau_bo.h" + +void +core507d_update(struct nv50_core *core, u32 *interlock, bool ntfy) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 5))) { + if (ntfy) { + evo_mthd(push, 0x0084, 1); + evo_data(push, 0x80000000 | NV50_DISP_CORE_NTFY); + } + evo_mthd(push, 0x0080, 2); + evo_data(push, interlock[NV50_DISP_INTERLOCK_BASE] | + interlock[NV50_DISP_INTERLOCK_OVLY]); + evo_data(push, 0x00000000); + evo_kick(push, &core->chan); + } +} + +int +core507d_ntfy_wait_done(struct nouveau_bo *bo, u32 offset, + struct nvif_device *device) +{ + s64 time = nvif_msec(device, 2000ULL, + if (nouveau_bo_rd32(bo, offset / 4)) + break; + usleep_range(1, 2); + ); + return time < 0 ? time : 0; +} + +void +core507d_ntfy_init(struct nouveau_bo *bo, u32 offset) +{ + nouveau_bo_wr32(bo, offset / 4, 0x00000000); +} + +void +core507d_init(struct nv50_core *core) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 2))) { + evo_mthd(push, 0x0088, 1); + evo_data(push, core->chan.sync.handle); + evo_kick(push, &core->chan); + } +} + +static const struct nv50_core_func +core507d = { + .init = core507d_init, + .ntfy_init = core507d_ntfy_init, + .ntfy_wait_done = core507d_ntfy_wait_done, + .update = core507d_update, + .head = &head507d, + .dac = &dac507d, + .sor = &sor507d, + .pior = &pior507d, +}; + +int +core507d_new_(const struct nv50_core_func *func, struct nouveau_drm *drm, + s32 oclass, struct nv50_core **pcore) +{ + struct nv50_disp_core_channel_dma_v0 args = {}; + struct nv50_disp *disp = nv50_disp(drm->dev); + struct nv50_core *core; + int ret; + + if (!(core = *pcore = kzalloc(sizeof(*core), GFP_KERNEL))) + return -ENOMEM; + core->func = func; + + ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, + &oclass, 0, &args, sizeof(args), + disp->sync->bo.offset, &core->chan); + if (ret) { + NV_ERROR(drm, "core%04x allocation failed: %d\n", oclass, ret); + return ret; + } + + return 0; +} + +int +core507d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore) +{ + return core507d_new_(&core507d, drm, oclass, pcore); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/core827d.c b/drivers/gpu/drm/nouveau/dispnv50/core827d.c new file mode 100644 index 000000000000..6123a068f836 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/core827d.c @@ -0,0 +1,41 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "core.h" +#include "head.h" + +static const struct nv50_core_func +core827d = { + .init = core507d_init, + .ntfy_init = core507d_ntfy_init, + .ntfy_wait_done = core507d_ntfy_wait_done, + .update = core507d_update, + .head = &head827d, + .dac = &dac507d, + .sor = &sor507d, + .pior = &pior507d, +}; + +int +core827d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore) +{ + return core507d_new_(&core827d, drm, oclass, pcore); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt215.c b/drivers/gpu/drm/nouveau/dispnv50/core907d.c index 08e2b1fa3806..ef822f813435 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt215.c +++ b/drivers/gpu/drm/nouveau/dispnv50/core907d.c @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,21 +18,23 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "core.h" +#include "head.h" -const struct nv50_disp_dmac_oclass -gt215_disp_base_oclass = { - .base.oclass = GT214_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &nv50_disp_dmac_func, - .mthd = &g84_disp_base_chan_mthd, - .chid = 1, +static const struct nv50_core_func +core907d = { + .init = core507d_init, + .ntfy_init = core507d_ntfy_init, + .ntfy_wait_done = core507d_ntfy_wait_done, + .update = core507d_update, + .head = &head907d, + .dac = &dac907d, + .sor = &sor907d, }; + +int +core907d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore) +{ + return core507d_new_(&core907d, drm, oclass, pcore); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt200.c b/drivers/gpu/drm/nouveau/dispnv50/core917d.c index 93451e46570c..392338df5bfd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt200.c +++ b/drivers/gpu/drm/nouveau/dispnv50/core917d.c @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,21 +18,23 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "core.h" +#include "head.h" -const struct nv50_disp_dmac_oclass -gt200_disp_base_oclass = { - .base.oclass = GT200_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &nv50_disp_dmac_func, - .mthd = &g84_disp_base_chan_mthd, - .chid = 1, +static const struct nv50_core_func +core917d = { + .init = core507d_init, + .ntfy_init = core507d_ntfy_init, + .ntfy_wait_done = core507d_ntfy_wait_done, + .update = core507d_update, + .head = &head917d, + .dac = &dac907d, + .sor = &sor907d, }; + +int +core917d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore) +{ + return core507d_new_(&core917d, drm, oclass, pcore); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c new file mode 100644 index 000000000000..b5c17c948918 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c @@ -0,0 +1,110 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "core.h" +#include "head.h" + +#include <nouveau_bo.h> + +static void +corec37d_update(struct nv50_core *core, u32 *interlock, bool ntfy) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 9))) { + if (ntfy) { + evo_mthd(push, 0x020c, 1); + evo_data(push, 0x00001000 | NV50_DISP_CORE_NTFY); + } + + evo_mthd(push, 0x0218, 2); + evo_data(push, interlock[NV50_DISP_INTERLOCK_CURS]); + evo_data(push, interlock[NV50_DISP_INTERLOCK_WNDW]); + evo_mthd(push, 0x0200, 1); + evo_data(push, 0x00000001); + + if (ntfy) { + evo_mthd(push, 0x020c, 1); + evo_data(push, 0x00000000); + } + evo_kick(push, &core->chan); + } +} + +int +corec37d_ntfy_wait_done(struct nouveau_bo *bo, u32 offset, + struct nvif_device *device) +{ + u32 data; + s64 time = nvif_msec(device, 2000ULL, + data = nouveau_bo_rd32(bo, offset / 4 + 0); + if ((data & 0xc0000000) == 0x80000000) + break; + usleep_range(1, 2); + ); + return time < 0 ? time : 0; +} + +void +corec37d_ntfy_init(struct nouveau_bo *bo, u32 offset) +{ + nouveau_bo_wr32(bo, offset / 4 + 0, 0x00000000); + nouveau_bo_wr32(bo, offset / 4 + 1, 0x00000000); + nouveau_bo_wr32(bo, offset / 4 + 2, 0x00000000); + nouveau_bo_wr32(bo, offset / 4 + 3, 0x00000000); +} + +void +corec37d_init(struct nv50_core *core) +{ + const u32 windows = 8; /*XXX*/ + u32 *push, i; + if ((push = evo_wait(&core->chan, 2 + 6 * windows + 2))) { + evo_mthd(push, 0x0208, 1); + evo_data(push, core->chan.sync.handle); + for (i = 0; i < windows; i++) { + evo_mthd(push, 0x1000 + (i * 0x080), 3); + evo_data(push, i >> 1); + evo_data(push, 0x00000017); + evo_data(push, 0x00000000); + evo_mthd(push, 0x1010 + (i * 0x080), 1); + evo_data(push, 0x00127fff); + } + evo_mthd(push, 0x0200, 1); + evo_data(push, 0x00000001); + evo_kick(push, &core->chan); + } +} + +static const struct nv50_core_func +corec37d = { + .init = corec37d_init, + .ntfy_init = corec37d_ntfy_init, + .ntfy_wait_done = corec37d_ntfy_wait_done, + .update = corec37d_update, + .head = &headc37d, + .sor = &sorc37d, +}; + +int +corec37d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore) +{ + return core507d_new_(&corec37d, drm, oclass, pcore); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c new file mode 100644 index 000000000000..f592087338c4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c @@ -0,0 +1,52 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "curs.h" + +#include <nvif/class.h> + +int +nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw) +{ + struct { + s32 oclass; + int version; + int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **); + } curses[] = { + { GV100_DISP_CURSOR, 0, cursc37a_new }, + { GK104_DISP_CURSOR, 0, curs907a_new }, + { GF110_DISP_CURSOR, 0, curs907a_new }, + { GT214_DISP_CURSOR, 0, curs507a_new }, + { G82_DISP_CURSOR, 0, curs507a_new }, + { NV50_DISP_CURSOR, 0, curs507a_new }, + {} + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int cid; + + cid = nvif_mclass(&disp->disp->object, curses); + if (cid < 0) { + NV_ERROR(drm, "No supported cursor immediate class\n"); + return cid; + } + + return curses[cid].new(drm, head, curses[cid].oclass, pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.h b/drivers/gpu/drm/nouveau/dispnv50/curs.h new file mode 100644 index 000000000000..23aff5fd6747 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/curs.h @@ -0,0 +1,14 @@ +#ifndef __NV50_KMS_CURS_H__ +#define __NV50_KMS_CURS_H__ +#include "wndw.h" + +int curs507a_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); +int curs507a_new_(const struct nv50_wimm_func *, struct nouveau_drm *, + int head, s32 oclass, u32 interlock_data, + struct nv50_wndw **); + +int curs907a_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); +int cursc37a_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); + +int nv50_curs_new(struct nouveau_drm *, int head, struct nv50_wndw **); +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c new file mode 100644 index 000000000000..291c08117ab6 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c @@ -0,0 +1,145 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "curs.h" +#include "core.h" +#include "head.h" + +#include <nvif/cl507a.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> + +static void +curs507a_update(struct nv50_wndw *wndw, u32 *interlock) +{ + nvif_wr32(&wndw->wimm.base.user, 0x0080, 0x00000000); +} + +static void +curs507a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + nvif_wr32(&wndw->wimm.base.user, 0x0084, asyw->point.y << 16 | + asyw->point.x); +} + +const struct nv50_wimm_func +curs507a = { + .point = curs507a_point, + .update = curs507a_update, +}; + +static void +curs507a_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh, + struct nv50_wndw_atom *asyw) +{ + u32 handle = nv50_disp(wndw->plane.dev)->core->chan.vram.handle; + u32 offset = asyw->image.offset[0]; + if (asyh->curs.handle != handle || asyh->curs.offset != offset) { + asyh->curs.handle = handle; + asyh->curs.offset = offset; + asyh->set.curs = asyh->curs.visible; + } +} + +static void +curs507a_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + asyh->curs.visible = false; +} + +static int +curs507a_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + struct nv50_head *head = nv50_head(asyw->state.crtc); + int ret; + + ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); + asyh->curs.visible = asyw->state.visible; + if (ret || !asyh->curs.visible) + return ret; + + if (asyw->image.w != asyw->image.h) + return -EINVAL; + + ret = head->func->curs_layout(head, asyw, asyh); + if (ret) + return ret; + + return head->func->curs_format(head, asyw, asyh); +} + +static const u32 +curs507a_format[] = { + DRM_FORMAT_ARGB8888, + 0 +}; + +static const struct nv50_wndw_func +curs507a_wndw = { + .acquire = curs507a_acquire, + .release = curs507a_release, + .prepare = curs507a_prepare, +}; + +int +curs507a_new_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, + int head, s32 oclass, u32 interlock_data, + struct nv50_wndw **pwndw) +{ + struct nv50_disp_cursor_v0 args = { + .head = head, + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + struct nv50_wndw *wndw; + int ret; + + ret = nv50_wndw_new_(&curs507a_wndw, drm->dev, DRM_PLANE_TYPE_CURSOR, + "curs", head, curs507a_format, BIT(head), + NV50_DISP_INTERLOCK_CURS, interlock_data, &wndw); + if (*pwndw = wndw, ret) + return ret; + + ret = nvif_object_init(&disp->disp->object, 0, oclass, &args, + sizeof(args), &wndw->wimm.base.user); + if (ret) { + NV_ERROR(drm, "curs%04x allocation failed: %d\n", oclass, ret); + return ret; + } + + nvif_object_map(&wndw->wimm.base.user, NULL, 0); + wndw->immd = func; + wndw->ctxdma.parent = &disp->core->chan.base.user; + return 0; +} + +int +curs507a_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return curs507a_new_(&curs507a, drm, head, oclass, + 0x00000001 << (head * 8), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c b/drivers/gpu/drm/nouveau/dispnv50/curs907a.c index a9aa69c82e8e..d742362de03e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c +++ b/drivers/gpu/drm/nouveau/dispnv50/curs907a.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,17 +18,13 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs */ -#include "changk104.h" - -#include <nvif/class.h> +#include "curs.h" -const struct nvkm_fifo_chan_oclass -gk110_fifo_gpfifo_oclass = { - .base.oclass = KEPLER_CHANNEL_GPFIFO_B, - .base.minver = 0, - .base.maxver = 0, - .ctor = gk104_fifo_gpfifo_new, -}; +int +curs907a_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return curs507a_new_(&curs507a, drm, head, oclass, + 0x00000001 << (head * 4), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c b/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c new file mode 100644 index 000000000000..23fb29d41efe --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c @@ -0,0 +1,50 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "curs.h" +#include "atom.h" + +static void +cursc37a_update(struct nv50_wndw *wndw, u32 *interlock) +{ + nvif_wr32(&wndw->wimm.base.user, 0x0200, 0x00000001); +} + +static void +cursc37a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + nvif_wr32(&wndw->wimm.base.user, 0x0208, asyw->point.y << 16 | + asyw->point.x); +} + +static const struct nv50_wimm_func +cursc37a = { + .point = cursc37a_point, + .update = cursc37a_update, +}; + +int +cursc37a_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return curs507a_new_(&cursc37a, drm, head, oclass, + 0x00000001 << head, pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/dac507d.c b/drivers/gpu/drm/nouveau/dispnv50/dac507d.c new file mode 100644 index 000000000000..2a10ef7d30a8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/dac507d.c @@ -0,0 +1,44 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "core.h" + +static void +dac507d_ctrl(struct nv50_core *core, int or, u32 ctrl, + struct nv50_head_atom *asyh) +{ + u32 *push, sync = 0; + if ((push = evo_wait(&core->chan, 3))) { + if (asyh) { + sync |= asyh->or.nvsync << 1; + sync |= asyh->or.nhsync; + } + evo_mthd(push, 0x0400 + (or * 0x080), 2); + evo_data(push, ctrl); + evo_data(push, sync); + evo_kick(push, &core->chan); + } +} + +const struct nv50_outp_func +dac507d = { + .ctrl = dac507d_ctrl, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c b/drivers/gpu/drm/nouveau/dispnv50/dac907d.c index 00a7f3564450..11e87fa53fac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c +++ b/drivers/gpu/drm/nouveau/dispnv50/dac907d.c @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,20 +18,22 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "channv50.h" -#include "rootnv50.h" +#include "core.h" -#include <nvif/class.h> +static void +dac907d_ctrl(struct nv50_core *core, int or, u32 ctrl, + struct nv50_head_atom *asyh) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 2))) { + evo_mthd(push, 0x0180 + (or * 0x020), 1); + evo_data(push, ctrl); + evo_kick(push, &core->chan); + } +} -const struct nv50_disp_pioc_oclass -gt215_disp_curs_oclass = { - .base.oclass = GT214_DISP_CURSOR, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_curs_new, - .func = &nv50_disp_pioc_func, - .chid = { 7, 7 }, +const struct nv50_outp_func +dac907d = { + .ctrl = dac907d_ctrl, }; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c new file mode 100644 index 000000000000..b83465ae7c1b --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -0,0 +1,2238 @@ +/* + * Copyright 2011 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "disp.h" +#include "atom.h" +#include "core.h" +#include "head.h" +#include "wndw.h" + +#include <linux/dma-mapping.h> +#include <linux/hdmi.h> + +#include <drm/drmP.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_dp_helper.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_edid.h> + +#include <nvif/class.h> +#include <nvif/cl0002.h> +#include <nvif/cl5070.h> +#include <nvif/cl507d.h> +#include <nvif/event.h> + +#include "nouveau_drv.h" +#include "nouveau_dma.h" +#include "nouveau_gem.h" +#include "nouveau_connector.h" +#include "nouveau_encoder.h" +#include "nouveau_fence.h" +#include "nouveau_fbcon.h" + +#include <subdev/bios/dp.h> + +/****************************************************************************** + * Atomic state + *****************************************************************************/ + +struct nv50_outp_atom { + struct list_head head; + + struct drm_encoder *encoder; + bool flush_disable; + + union nv50_outp_atom_mask { + struct { + bool ctrl:1; + }; + u8 mask; + } set, clr; +}; + +/****************************************************************************** + * EVO channel + *****************************************************************************/ + +static int +nv50_chan_create(struct nvif_device *device, struct nvif_object *disp, + const s32 *oclass, u8 head, void *data, u32 size, + struct nv50_chan *chan) +{ + struct nvif_sclass *sclass; + int ret, i, n; + + chan->device = device; + + ret = n = nvif_object_sclass_get(disp, &sclass); + if (ret < 0) + return ret; + + while (oclass[0]) { + for (i = 0; i < n; i++) { + if (sclass[i].oclass == oclass[0]) { + ret = nvif_object_init(disp, 0, oclass[0], + data, size, &chan->user); + if (ret == 0) + nvif_object_map(&chan->user, NULL, 0); + nvif_object_sclass_put(&sclass); + return ret; + } + } + oclass++; + } + + nvif_object_sclass_put(&sclass); + return -ENOSYS; +} + +static void +nv50_chan_destroy(struct nv50_chan *chan) +{ + nvif_object_fini(&chan->user); +} + +/****************************************************************************** + * DMA EVO channel + *****************************************************************************/ + +void +nv50_dmac_destroy(struct nv50_dmac *dmac) +{ + nvif_object_fini(&dmac->vram); + nvif_object_fini(&dmac->sync); + + nv50_chan_destroy(&dmac->base); + + nvif_mem_fini(&dmac->push); +} + +int +nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, + const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf, + struct nv50_dmac *dmac) +{ + struct nouveau_cli *cli = (void *)device->object.client; + struct nv50_disp_core_channel_dma_v0 *args = data; + int ret; + + mutex_init(&dmac->lock); + + ret = nvif_mem_init_map(&cli->mmu, NVIF_MEM_COHERENT, 0x1000, + &dmac->push); + if (ret) + return ret; + + dmac->ptr = dmac->push.object.map.ptr; + + args->pushbuf = nvif_handle(&dmac->push.object); + + ret = nv50_chan_create(device, disp, oclass, head, data, size, + &dmac->base); + if (ret) + return ret; + + if (!syncbuf) + return 0; + + ret = nvif_object_init(&dmac->base.user, 0xf0000000, NV_DMA_IN_MEMORY, + &(struct nv_dma_v0) { + .target = NV_DMA_V0_TARGET_VRAM, + .access = NV_DMA_V0_ACCESS_RDWR, + .start = syncbuf + 0x0000, + .limit = syncbuf + 0x0fff, + }, sizeof(struct nv_dma_v0), + &dmac->sync); + if (ret) + return ret; + + ret = nvif_object_init(&dmac->base.user, 0xf0000001, NV_DMA_IN_MEMORY, + &(struct nv_dma_v0) { + .target = NV_DMA_V0_TARGET_VRAM, + .access = NV_DMA_V0_ACCESS_RDWR, + .start = 0, + .limit = device->info.ram_user - 1, + }, sizeof(struct nv_dma_v0), + &dmac->vram); + if (ret) + return ret; + + return ret; +} + +/****************************************************************************** + * EVO channel helpers + *****************************************************************************/ +u32 * +evo_wait(struct nv50_dmac *evoc, int nr) +{ + struct nv50_dmac *dmac = evoc; + struct nvif_device *device = dmac->base.device; + u32 put = nvif_rd32(&dmac->base.user, 0x0000) / 4; + + mutex_lock(&dmac->lock); + if (put + nr >= (PAGE_SIZE / 4) - 8) { + dmac->ptr[put] = 0x20000000; + + nvif_wr32(&dmac->base.user, 0x0000, 0x00000000); + if (nvif_msec(device, 2000, + if (!nvif_rd32(&dmac->base.user, 0x0004)) + break; + ) < 0) { + mutex_unlock(&dmac->lock); + pr_err("nouveau: evo channel stalled\n"); + return NULL; + } + + put = 0; + } + + return dmac->ptr + put; +} + +void +evo_kick(u32 *push, struct nv50_dmac *evoc) +{ + struct nv50_dmac *dmac = evoc; + nvif_wr32(&dmac->base.user, 0x0000, (push - dmac->ptr) << 2); + mutex_unlock(&dmac->lock); +} + +/****************************************************************************** + * Output path helpers + *****************************************************************************/ +static void +nv50_outp_release(struct nouveau_encoder *nv_encoder) +{ + struct nv50_disp *disp = nv50_disp(nv_encoder->base.base.dev); + struct { + struct nv50_disp_mthd_v1 base; + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_RELEASE, + .base.hasht = nv_encoder->dcb->hasht, + .base.hashm = nv_encoder->dcb->hashm, + }; + + nvif_mthd(&disp->disp->object, 0, &args, sizeof(args)); + nv_encoder->or = -1; + nv_encoder->link = 0; +} + +static int +nv50_outp_acquire(struct nouveau_encoder *nv_encoder) +{ + struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); + struct nv50_disp *disp = nv50_disp(drm->dev); + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_acquire_v0 info; + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_ACQUIRE, + .base.hasht = nv_encoder->dcb->hasht, + .base.hashm = nv_encoder->dcb->hashm, + }; + int ret; + + ret = nvif_mthd(&disp->disp->object, 0, &args, sizeof(args)); + if (ret) { + NV_ERROR(drm, "error acquiring output path: %d\n", ret); + return ret; + } + + nv_encoder->or = args.info.or; + nv_encoder->link = args.info.link; + return 0; +} + +static int +nv50_outp_atomic_check_view(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + struct drm_display_mode *native_mode) +{ + struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; + struct drm_display_mode *mode = &crtc_state->mode; + struct drm_connector *connector = conn_state->connector; + struct nouveau_conn_atom *asyc = nouveau_conn_atom(conn_state); + struct nouveau_drm *drm = nouveau_drm(encoder->dev); + + NV_ATOMIC(drm, "%s atomic_check\n", encoder->name); + asyc->scaler.full = false; + if (!native_mode) + return 0; + + if (asyc->scaler.mode == DRM_MODE_SCALE_NONE) { + switch (connector->connector_type) { + case DRM_MODE_CONNECTOR_LVDS: + case DRM_MODE_CONNECTOR_eDP: + /* Force use of scaler for non-EDID modes. */ + if (adjusted_mode->type & DRM_MODE_TYPE_DRIVER) + break; + mode = native_mode; + asyc->scaler.full = true; + break; + default: + break; + } + } else { + mode = native_mode; + } + + if (!drm_mode_equal(adjusted_mode, mode)) { + drm_mode_copy(adjusted_mode, mode); + crtc_state->mode_changed = true; + } + + return 0; +} + +static int +nv50_outp_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct nouveau_connector *nv_connector = + nouveau_connector(conn_state->connector); + return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, + nv_connector->native_mode); +} + +/****************************************************************************** + * DAC + *****************************************************************************/ +static void +nv50_dac_disable(struct drm_encoder *encoder) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nv50_core *core = nv50_disp(encoder->dev)->core; + if (nv_encoder->crtc) + core->func->dac->ctrl(core, nv_encoder->or, 0x00000000, NULL); + nv_encoder->crtc = NULL; + nv50_outp_release(nv_encoder); +} + +static void +nv50_dac_enable(struct drm_encoder *encoder) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); + struct nv50_head_atom *asyh = nv50_head_atom(nv_crtc->base.state); + struct nv50_core *core = nv50_disp(encoder->dev)->core; + + nv50_outp_acquire(nv_encoder); + + core->func->dac->ctrl(core, nv_encoder->or, 1 << nv_crtc->index, asyh); + asyh->or.depth = 0; + + nv_encoder->crtc = encoder->crtc; +} + +static enum drm_connector_status +nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nv50_disp *disp = nv50_disp(encoder->dev); + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_dac_load_v0 load; + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_DAC_LOAD, + .base.hasht = nv_encoder->dcb->hasht, + .base.hashm = nv_encoder->dcb->hashm, + }; + int ret; + + args.load.data = nouveau_drm(encoder->dev)->vbios.dactestval; + if (args.load.data == 0) + args.load.data = 340; + + ret = nvif_mthd(&disp->disp->object, 0, &args, sizeof(args)); + if (ret || !args.load.load) + return connector_status_disconnected; + + return connector_status_connected; +} + +static const struct drm_encoder_helper_funcs +nv50_dac_help = { + .atomic_check = nv50_outp_atomic_check, + .enable = nv50_dac_enable, + .disable = nv50_dac_disable, + .detect = nv50_dac_detect +}; + +static void +nv50_dac_destroy(struct drm_encoder *encoder) +{ + drm_encoder_cleanup(encoder); + kfree(encoder); +} + +static const struct drm_encoder_funcs +nv50_dac_func = { + .destroy = nv50_dac_destroy, +}; + +static int +nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe) +{ + struct nouveau_drm *drm = nouveau_drm(connector->dev); + struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device); + struct nvkm_i2c_bus *bus; + struct nouveau_encoder *nv_encoder; + struct drm_encoder *encoder; + int type = DRM_MODE_ENCODER_DAC; + + nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL); + if (!nv_encoder) + return -ENOMEM; + nv_encoder->dcb = dcbe; + + bus = nvkm_i2c_bus_find(i2c, dcbe->i2c_index); + if (bus) + nv_encoder->i2c = &bus->i2c; + + encoder = to_drm_encoder(nv_encoder); + encoder->possible_crtcs = dcbe->heads; + encoder->possible_clones = 0; + drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type, + "dac-%04x-%04x", dcbe->hasht, dcbe->hashm); + drm_encoder_helper_add(encoder, &nv50_dac_help); + + drm_mode_connector_attach_encoder(connector, encoder); + return 0; +} + +/****************************************************************************** + * Audio + *****************************************************************************/ +static void +nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nv50_disp *disp = nv50_disp(encoder->dev); + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_sor_hda_eld_v0 eld; + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_SOR_HDA_ELD, + .base.hasht = nv_encoder->dcb->hasht, + .base.hashm = (0xf0ff & nv_encoder->dcb->hashm) | + (0x0100 << nv_crtc->index), + }; + + nvif_mthd(&disp->disp->object, 0, &args, sizeof(args)); +} + +static void +nv50_audio_enable(struct drm_encoder *encoder, struct drm_display_mode *mode) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); + struct nouveau_connector *nv_connector; + struct nv50_disp *disp = nv50_disp(encoder->dev); + struct __packed { + struct { + struct nv50_disp_mthd_v1 mthd; + struct nv50_disp_sor_hda_eld_v0 eld; + } base; + u8 data[sizeof(nv_connector->base.eld)]; + } args = { + .base.mthd.version = 1, + .base.mthd.method = NV50_DISP_MTHD_V1_SOR_HDA_ELD, + .base.mthd.hasht = nv_encoder->dcb->hasht, + .base.mthd.hashm = (0xf0ff & nv_encoder->dcb->hashm) | + (0x0100 << nv_crtc->index), + }; + + nv_connector = nouveau_encoder_connector_get(nv_encoder); + if (!drm_detect_monitor_audio(nv_connector->edid)) + return; + + memcpy(args.data, nv_connector->base.eld, sizeof(args.data)); + + nvif_mthd(&disp->disp->object, 0, &args, + sizeof(args.base) + drm_eld_size(args.data)); +} + +/****************************************************************************** + * HDMI + *****************************************************************************/ +static void +nv50_hdmi_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nv50_disp *disp = nv50_disp(encoder->dev); + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_sor_hdmi_pwr_v0 pwr; + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR, + .base.hasht = nv_encoder->dcb->hasht, + .base.hashm = (0xf0ff & nv_encoder->dcb->hashm) | + (0x0100 << nv_crtc->index), + }; + + nvif_mthd(&disp->disp->object, 0, &args, sizeof(args)); +} + +static void +nv50_hdmi_enable(struct drm_encoder *encoder, struct drm_display_mode *mode) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); + struct nv50_disp *disp = nv50_disp(encoder->dev); + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_sor_hdmi_pwr_v0 pwr; + u8 infoframes[2 * 17]; /* two frames, up to 17 bytes each */ + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR, + .base.hasht = nv_encoder->dcb->hasht, + .base.hashm = (0xf0ff & nv_encoder->dcb->hashm) | + (0x0100 << nv_crtc->index), + .pwr.state = 1, + .pwr.rekey = 56, /* binary driver, and tegra, constant */ + }; + struct nouveau_connector *nv_connector; + u32 max_ac_packet; + union hdmi_infoframe avi_frame; + union hdmi_infoframe vendor_frame; + int ret; + int size; + + nv_connector = nouveau_encoder_connector_get(nv_encoder); + if (!drm_detect_hdmi_monitor(nv_connector->edid)) + return; + + ret = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame.avi, mode, + false); + if (!ret) { + /* We have an AVI InfoFrame, populate it to the display */ + args.pwr.avi_infoframe_length + = hdmi_infoframe_pack(&avi_frame, args.infoframes, 17); + } + + ret = drm_hdmi_vendor_infoframe_from_display_mode(&vendor_frame.vendor.hdmi, + &nv_connector->base, mode); + if (!ret) { + /* We have a Vendor InfoFrame, populate it to the display */ + args.pwr.vendor_infoframe_length + = hdmi_infoframe_pack(&vendor_frame, + args.infoframes + + args.pwr.avi_infoframe_length, + 17); + } + + max_ac_packet = mode->htotal - mode->hdisplay; + max_ac_packet -= args.pwr.rekey; + max_ac_packet -= 18; /* constant from tegra */ + args.pwr.max_ac_packet = max_ac_packet / 32; + + size = sizeof(args.base) + + sizeof(args.pwr) + + args.pwr.avi_infoframe_length + + args.pwr.vendor_infoframe_length; + nvif_mthd(&disp->disp->object, 0, &args, size); + nv50_audio_enable(encoder, mode); +} + +/****************************************************************************** + * MST + *****************************************************************************/ +#define nv50_mstm(p) container_of((p), struct nv50_mstm, mgr) +#define nv50_mstc(p) container_of((p), struct nv50_mstc, connector) +#define nv50_msto(p) container_of((p), struct nv50_msto, encoder) + +struct nv50_mstm { + struct nouveau_encoder *outp; + + struct drm_dp_mst_topology_mgr mgr; + struct nv50_msto *msto[4]; + + bool modified; + bool disabled; + int links; +}; + +struct nv50_mstc { + struct nv50_mstm *mstm; + struct drm_dp_mst_port *port; + struct drm_connector connector; + + struct drm_display_mode *native; + struct edid *edid; + + int pbn; +}; + +struct nv50_msto { + struct drm_encoder encoder; + + struct nv50_head *head; + struct nv50_mstc *mstc; + bool disabled; +}; + +static struct drm_dp_payload * +nv50_msto_payload(struct nv50_msto *msto) +{ + struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev); + struct nv50_mstc *mstc = msto->mstc; + struct nv50_mstm *mstm = mstc->mstm; + int vcpi = mstc->port->vcpi.vcpi, i; + + NV_ATOMIC(drm, "%s: vcpi %d\n", msto->encoder.name, vcpi); + for (i = 0; i < mstm->mgr.max_payloads; i++) { + struct drm_dp_payload *payload = &mstm->mgr.payloads[i]; + NV_ATOMIC(drm, "%s: %d: vcpi %d start 0x%02x slots 0x%02x\n", + mstm->outp->base.base.name, i, payload->vcpi, + payload->start_slot, payload->num_slots); + } + + for (i = 0; i < mstm->mgr.max_payloads; i++) { + struct drm_dp_payload *payload = &mstm->mgr.payloads[i]; + if (payload->vcpi == vcpi) + return payload; + } + + return NULL; +} + +static void +nv50_msto_cleanup(struct nv50_msto *msto) +{ + struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev); + struct nv50_mstc *mstc = msto->mstc; + struct nv50_mstm *mstm = mstc->mstm; + + NV_ATOMIC(drm, "%s: msto cleanup\n", msto->encoder.name); + if (mstc->port && mstc->port->vcpi.vcpi > 0 && !nv50_msto_payload(msto)) + drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port); + if (msto->disabled) { + msto->mstc = NULL; + msto->head = NULL; + msto->disabled = false; + } +} + +static void +nv50_msto_prepare(struct nv50_msto *msto) +{ + struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev); + struct nv50_mstc *mstc = msto->mstc; + struct nv50_mstm *mstm = mstc->mstm; + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_sor_dp_mst_vcpi_v0 vcpi; + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI, + .base.hasht = mstm->outp->dcb->hasht, + .base.hashm = (0xf0ff & mstm->outp->dcb->hashm) | + (0x0100 << msto->head->base.index), + }; + + NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name); + if (mstc->port && mstc->port->vcpi.vcpi > 0) { + struct drm_dp_payload *payload = nv50_msto_payload(msto); + if (payload) { + args.vcpi.start_slot = payload->start_slot; + args.vcpi.num_slots = payload->num_slots; + args.vcpi.pbn = mstc->port->vcpi.pbn; + args.vcpi.aligned_pbn = mstc->port->vcpi.aligned_pbn; + } + } + + NV_ATOMIC(drm, "%s: %s: %02x %02x %04x %04x\n", + msto->encoder.name, msto->head->base.base.name, + args.vcpi.start_slot, args.vcpi.num_slots, + args.vcpi.pbn, args.vcpi.aligned_pbn); + nvif_mthd(&drm->display->disp.object, 0, &args, sizeof(args)); +} + +static int +nv50_msto_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct nv50_mstc *mstc = nv50_mstc(conn_state->connector); + struct nv50_mstm *mstm = mstc->mstm; + int bpp = conn_state->connector->display_info.bpc * 3; + int slots; + + mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, bpp); + + slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn); + if (slots < 0) + return slots; + + return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, + mstc->native); +} + +static void +nv50_msto_enable(struct drm_encoder *encoder) +{ + struct nv50_head *head = nv50_head(encoder->crtc); + struct nv50_msto *msto = nv50_msto(encoder); + struct nv50_mstc *mstc = NULL; + struct nv50_mstm *mstm = NULL; + struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + u8 proto, depth; + int slots; + bool r; + + drm_connector_list_iter_begin(encoder->dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (connector->state->best_encoder == &msto->encoder) { + mstc = nv50_mstc(connector); + mstm = mstc->mstm; + break; + } + } + drm_connector_list_iter_end(&conn_iter); + + if (WARN_ON(!mstc)) + return; + + slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn); + r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, mstc->pbn, slots); + WARN_ON(!r); + + if (!mstm->links++) + nv50_outp_acquire(mstm->outp); + + if (mstm->outp->link & 1) + proto = 0x8; + else + proto = 0x9; + + switch (mstc->connector.display_info.bpc) { + case 6: depth = 0x2; break; + case 8: depth = 0x5; break; + case 10: + default: depth = 0x6; break; + } + + mstm->outp->update(mstm->outp, head->base.index, + nv50_head_atom(head->base.base.state), proto, depth); + + msto->head = head; + msto->mstc = mstc; + mstm->modified = true; +} + +static void +nv50_msto_disable(struct drm_encoder *encoder) +{ + struct nv50_msto *msto = nv50_msto(encoder); + struct nv50_mstc *mstc = msto->mstc; + struct nv50_mstm *mstm = mstc->mstm; + + if (mstc->port) + drm_dp_mst_reset_vcpi_slots(&mstm->mgr, mstc->port); + + mstm->outp->update(mstm->outp, msto->head->base.index, NULL, 0, 0); + mstm->modified = true; + if (!--mstm->links) + mstm->disabled = true; + msto->disabled = true; +} + +static const struct drm_encoder_helper_funcs +nv50_msto_help = { + .disable = nv50_msto_disable, + .enable = nv50_msto_enable, + .atomic_check = nv50_msto_atomic_check, +}; + +static void +nv50_msto_destroy(struct drm_encoder *encoder) +{ + struct nv50_msto *msto = nv50_msto(encoder); + drm_encoder_cleanup(&msto->encoder); + kfree(msto); +} + +static const struct drm_encoder_funcs +nv50_msto = { + .destroy = nv50_msto_destroy, +}; + +static int +nv50_msto_new(struct drm_device *dev, u32 heads, const char *name, int id, + struct nv50_msto **pmsto) +{ + struct nv50_msto *msto; + int ret; + + if (!(msto = *pmsto = kzalloc(sizeof(*msto), GFP_KERNEL))) + return -ENOMEM; + + ret = drm_encoder_init(dev, &msto->encoder, &nv50_msto, + DRM_MODE_ENCODER_DPMST, "%s-mst-%d", name, id); + if (ret) { + kfree(*pmsto); + *pmsto = NULL; + return ret; + } + + drm_encoder_helper_add(&msto->encoder, &nv50_msto_help); + msto->encoder.possible_crtcs = heads; + return 0; +} + +static struct drm_encoder * +nv50_mstc_atomic_best_encoder(struct drm_connector *connector, + struct drm_connector_state *connector_state) +{ + struct nv50_head *head = nv50_head(connector_state->crtc); + struct nv50_mstc *mstc = nv50_mstc(connector); + if (mstc->port) { + struct nv50_mstm *mstm = mstc->mstm; + return &mstm->msto[head->base.index]->encoder; + } + return NULL; +} + +static struct drm_encoder * +nv50_mstc_best_encoder(struct drm_connector *connector) +{ + struct nv50_mstc *mstc = nv50_mstc(connector); + if (mstc->port) { + struct nv50_mstm *mstm = mstc->mstm; + return &mstm->msto[0]->encoder; + } + return NULL; +} + +static enum drm_mode_status +nv50_mstc_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + return MODE_OK; +} + +static int +nv50_mstc_get_modes(struct drm_connector *connector) +{ + struct nv50_mstc *mstc = nv50_mstc(connector); + int ret = 0; + + mstc->edid = drm_dp_mst_get_edid(&mstc->connector, mstc->port->mgr, mstc->port); + drm_mode_connector_update_edid_property(&mstc->connector, mstc->edid); + if (mstc->edid) + ret = drm_add_edid_modes(&mstc->connector, mstc->edid); + + if (!mstc->connector.display_info.bpc) + mstc->connector.display_info.bpc = 8; + + if (mstc->native) + drm_mode_destroy(mstc->connector.dev, mstc->native); + mstc->native = nouveau_conn_native_mode(&mstc->connector); + return ret; +} + +static const struct drm_connector_helper_funcs +nv50_mstc_help = { + .get_modes = nv50_mstc_get_modes, + .mode_valid = nv50_mstc_mode_valid, + .best_encoder = nv50_mstc_best_encoder, + .atomic_best_encoder = nv50_mstc_atomic_best_encoder, +}; + +static enum drm_connector_status +nv50_mstc_detect(struct drm_connector *connector, bool force) +{ + struct nv50_mstc *mstc = nv50_mstc(connector); + if (!mstc->port) + return connector_status_disconnected; + return drm_dp_mst_detect_port(connector, mstc->port->mgr, mstc->port); +} + +static void +nv50_mstc_destroy(struct drm_connector *connector) +{ + struct nv50_mstc *mstc = nv50_mstc(connector); + drm_connector_cleanup(&mstc->connector); + kfree(mstc); +} + +static const struct drm_connector_funcs +nv50_mstc = { + .reset = nouveau_conn_reset, + .detect = nv50_mstc_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = nv50_mstc_destroy, + .atomic_duplicate_state = nouveau_conn_atomic_duplicate_state, + .atomic_destroy_state = nouveau_conn_atomic_destroy_state, + .atomic_set_property = nouveau_conn_atomic_set_property, + .atomic_get_property = nouveau_conn_atomic_get_property, +}; + +static int +nv50_mstc_new(struct nv50_mstm *mstm, struct drm_dp_mst_port *port, + const char *path, struct nv50_mstc **pmstc) +{ + struct drm_device *dev = mstm->outp->base.base.dev; + struct nv50_mstc *mstc; + int ret, i; + + if (!(mstc = *pmstc = kzalloc(sizeof(*mstc), GFP_KERNEL))) + return -ENOMEM; + mstc->mstm = mstm; + mstc->port = port; + + ret = drm_connector_init(dev, &mstc->connector, &nv50_mstc, + DRM_MODE_CONNECTOR_DisplayPort); + if (ret) { + kfree(*pmstc); + *pmstc = NULL; + return ret; + } + + drm_connector_helper_add(&mstc->connector, &nv50_mstc_help); + + mstc->connector.funcs->reset(&mstc->connector); + nouveau_conn_attach_properties(&mstc->connector); + + for (i = 0; i < ARRAY_SIZE(mstm->msto) && mstm->msto[i]; i++) + drm_mode_connector_attach_encoder(&mstc->connector, &mstm->msto[i]->encoder); + + drm_object_attach_property(&mstc->connector.base, dev->mode_config.path_property, 0); + drm_object_attach_property(&mstc->connector.base, dev->mode_config.tile_property, 0); + drm_mode_connector_set_path_property(&mstc->connector, path); + return 0; +} + +static void +nv50_mstm_cleanup(struct nv50_mstm *mstm) +{ + struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev); + struct drm_encoder *encoder; + int ret; + + NV_ATOMIC(drm, "%s: mstm cleanup\n", mstm->outp->base.base.name); + ret = drm_dp_check_act_status(&mstm->mgr); + + ret = drm_dp_update_payload_part2(&mstm->mgr); + + drm_for_each_encoder(encoder, mstm->outp->base.base.dev) { + if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) { + struct nv50_msto *msto = nv50_msto(encoder); + struct nv50_mstc *mstc = msto->mstc; + if (mstc && mstc->mstm == mstm) + nv50_msto_cleanup(msto); + } + } + + mstm->modified = false; +} + +static void +nv50_mstm_prepare(struct nv50_mstm *mstm) +{ + struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev); + struct drm_encoder *encoder; + int ret; + + NV_ATOMIC(drm, "%s: mstm prepare\n", mstm->outp->base.base.name); + ret = drm_dp_update_payload_part1(&mstm->mgr); + + drm_for_each_encoder(encoder, mstm->outp->base.base.dev) { + if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) { + struct nv50_msto *msto = nv50_msto(encoder); + struct nv50_mstc *mstc = msto->mstc; + if (mstc && mstc->mstm == mstm) + nv50_msto_prepare(msto); + } + } + + if (mstm->disabled) { + if (!mstm->links) + nv50_outp_release(mstm->outp); + mstm->disabled = false; + } +} + +static void +nv50_mstm_hotplug(struct drm_dp_mst_topology_mgr *mgr) +{ + struct nv50_mstm *mstm = nv50_mstm(mgr); + drm_kms_helper_hotplug_event(mstm->outp->base.base.dev); +} + +static void +nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr, + struct drm_connector *connector) +{ + struct nouveau_drm *drm = nouveau_drm(connector->dev); + struct nv50_mstc *mstc = nv50_mstc(connector); + + drm_connector_unregister(&mstc->connector); + + drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector); + + drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL); + mstc->port = NULL; + drm_modeset_unlock(&drm->dev->mode_config.connection_mutex); + + drm_connector_unreference(&mstc->connector); +} + +static void +nv50_mstm_register_connector(struct drm_connector *connector) +{ + struct nouveau_drm *drm = nouveau_drm(connector->dev); + + drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector); + + drm_connector_register(connector); +} + +static struct drm_connector * +nv50_mstm_add_connector(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_port *port, const char *path) +{ + struct nv50_mstm *mstm = nv50_mstm(mgr); + struct nv50_mstc *mstc; + int ret; + + ret = nv50_mstc_new(mstm, port, path, &mstc); + if (ret) { + if (mstc) + mstc->connector.funcs->destroy(&mstc->connector); + return NULL; + } + + return &mstc->connector; +} + +static const struct drm_dp_mst_topology_cbs +nv50_mstm = { + .add_connector = nv50_mstm_add_connector, + .register_connector = nv50_mstm_register_connector, + .destroy_connector = nv50_mstm_destroy_connector, + .hotplug = nv50_mstm_hotplug, +}; + +void +nv50_mstm_service(struct nv50_mstm *mstm) +{ + struct drm_dp_aux *aux = mstm ? mstm->mgr.aux : NULL; + bool handled = true; + int ret; + u8 esi[8] = {}; + + if (!aux) + return; + + while (handled) { + ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8); + if (ret != 8) { + drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false); + return; + } + + drm_dp_mst_hpd_irq(&mstm->mgr, esi, &handled); + if (!handled) + break; + + drm_dp_dpcd_write(aux, DP_SINK_COUNT_ESI + 1, &esi[1], 3); + } +} + +void +nv50_mstm_remove(struct nv50_mstm *mstm) +{ + if (mstm) + drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false); +} + +static int +nv50_mstm_enable(struct nv50_mstm *mstm, u8 dpcd, int state) +{ + struct nouveau_encoder *outp = mstm->outp; + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_sor_dp_mst_link_v0 mst; + } args = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_LINK, + .base.hasht = outp->dcb->hasht, + .base.hashm = outp->dcb->hashm, + .mst.state = state, + }; + struct nouveau_drm *drm = nouveau_drm(outp->base.base.dev); + struct nvif_object *disp = &drm->display->disp.object; + int ret; + + if (dpcd >= 0x12) { + ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CTRL, &dpcd); + if (ret < 0) + return ret; + + dpcd &= ~DP_MST_EN; + if (state) + dpcd |= DP_MST_EN; + + ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, dpcd); + if (ret < 0) + return ret; + } + + return nvif_mthd(disp, 0, &args, sizeof(args)); +} + +int +nv50_mstm_detect(struct nv50_mstm *mstm, u8 dpcd[8], int allow) +{ + int ret, state = 0; + + if (!mstm) + return 0; + + if (dpcd[0] >= 0x12) { + ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CAP, &dpcd[1]); + if (ret < 0) + return ret; + + if (!(dpcd[1] & DP_MST_CAP)) + dpcd[0] = 0x11; + else + state = allow; + } + + ret = nv50_mstm_enable(mstm, dpcd[0], state); + if (ret) + return ret; + + ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, state); + if (ret) + return nv50_mstm_enable(mstm, dpcd[0], 0); + + return mstm->mgr.mst_state; +} + +static void +nv50_mstm_fini(struct nv50_mstm *mstm) +{ + if (mstm && mstm->mgr.mst_state) + drm_dp_mst_topology_mgr_suspend(&mstm->mgr); +} + +static void +nv50_mstm_init(struct nv50_mstm *mstm) +{ + if (mstm && mstm->mgr.mst_state) + drm_dp_mst_topology_mgr_resume(&mstm->mgr); +} + +static void +nv50_mstm_del(struct nv50_mstm **pmstm) +{ + struct nv50_mstm *mstm = *pmstm; + if (mstm) { + kfree(*pmstm); + *pmstm = NULL; + } +} + +static int +nv50_mstm_new(struct nouveau_encoder *outp, struct drm_dp_aux *aux, int aux_max, + int conn_base_id, struct nv50_mstm **pmstm) +{ + const int max_payloads = hweight8(outp->dcb->heads); + struct drm_device *dev = outp->base.base.dev; + struct nv50_mstm *mstm; + int ret, i; + u8 dpcd; + + /* This is a workaround for some monitors not functioning + * correctly in MST mode on initial module load. I think + * some bad interaction with the VBIOS may be responsible. + * + * A good ol' off and on again seems to work here ;) + */ + ret = drm_dp_dpcd_readb(aux, DP_DPCD_REV, &dpcd); + if (ret >= 0 && dpcd >= 0x12) + drm_dp_dpcd_writeb(aux, DP_MSTM_CTRL, 0); + + if (!(mstm = *pmstm = kzalloc(sizeof(*mstm), GFP_KERNEL))) + return -ENOMEM; + mstm->outp = outp; + mstm->mgr.cbs = &nv50_mstm; + + ret = drm_dp_mst_topology_mgr_init(&mstm->mgr, dev, aux, aux_max, + max_payloads, conn_base_id); + if (ret) + return ret; + + for (i = 0; i < max_payloads; i++) { + ret = nv50_msto_new(dev, outp->dcb->heads, outp->base.base.name, + i, &mstm->msto[i]); + if (ret) + return ret; + } + + return 0; +} + +/****************************************************************************** + * SOR + *****************************************************************************/ +static void +nv50_sor_update(struct nouveau_encoder *nv_encoder, u8 head, + struct nv50_head_atom *asyh, u8 proto, u8 depth) +{ + struct nv50_disp *disp = nv50_disp(nv_encoder->base.base.dev); + struct nv50_core *core = disp->core; + + if (!asyh) { + nv_encoder->ctrl &= ~BIT(head); + if (!(nv_encoder->ctrl & 0x0000000f)) + nv_encoder->ctrl = 0; + } else { + nv_encoder->ctrl |= proto << 8; + nv_encoder->ctrl |= BIT(head); + asyh->or.depth = depth; + } + + core->func->sor->ctrl(core, nv_encoder->or, nv_encoder->ctrl, asyh); +} + +static void +nv50_sor_disable(struct drm_encoder *encoder) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc); + + nv_encoder->crtc = NULL; + + if (nv_crtc) { + struct nvkm_i2c_aux *aux = nv_encoder->aux; + u8 pwr; + + if (aux) { + int ret = nvkm_rdaux(aux, DP_SET_POWER, &pwr, 1); + if (ret == 0) { + pwr &= ~DP_SET_POWER_MASK; + pwr |= DP_SET_POWER_D3; + nvkm_wraux(aux, DP_SET_POWER, &pwr, 1); + } + } + + nv_encoder->update(nv_encoder, nv_crtc->index, NULL, 0, 0); + nv50_audio_disable(encoder, nv_crtc); + nv50_hdmi_disable(&nv_encoder->base.base, nv_crtc); + nv50_outp_release(nv_encoder); + } +} + +static void +nv50_sor_enable(struct drm_encoder *encoder) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); + struct nv50_head_atom *asyh = nv50_head_atom(nv_crtc->base.state); + struct drm_display_mode *mode = &asyh->state.adjusted_mode; + struct { + struct nv50_disp_mthd_v1 base; + struct nv50_disp_sor_lvds_script_v0 lvds; + } lvds = { + .base.version = 1, + .base.method = NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT, + .base.hasht = nv_encoder->dcb->hasht, + .base.hashm = nv_encoder->dcb->hashm, + }; + struct nv50_disp *disp = nv50_disp(encoder->dev); + struct drm_device *dev = encoder->dev; + struct nouveau_drm *drm = nouveau_drm(dev); + struct nouveau_connector *nv_connector; + struct nvbios *bios = &drm->vbios; + u8 proto = 0xf; + u8 depth = 0x0; + + nv_connector = nouveau_encoder_connector_get(nv_encoder); + nv_encoder->crtc = encoder->crtc; + nv50_outp_acquire(nv_encoder); + + switch (nv_encoder->dcb->type) { + case DCB_OUTPUT_TMDS: + if (nv_encoder->link & 1) { + proto = 0x1; + /* Only enable dual-link if: + * - Need to (i.e. rate > 165MHz) + * - DCB says we can + * - Not an HDMI monitor, since there's no dual-link + * on HDMI. + */ + if (mode->clock >= 165000 && + nv_encoder->dcb->duallink_possible && + !drm_detect_hdmi_monitor(nv_connector->edid)) + proto |= 0x4; + } else { + proto = 0x2; + } + + nv50_hdmi_enable(&nv_encoder->base.base, mode); + break; + case DCB_OUTPUT_LVDS: + proto = 0x0; + + if (bios->fp_no_ddc) { + if (bios->fp.dual_link) + lvds.lvds.script |= 0x0100; + if (bios->fp.if_is_24bit) + lvds.lvds.script |= 0x0200; + } else { + if (nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) { + if (((u8 *)nv_connector->edid)[121] == 2) + lvds.lvds.script |= 0x0100; + } else + if (mode->clock >= bios->fp.duallink_transition_clk) { + lvds.lvds.script |= 0x0100; + } + + if (lvds.lvds.script & 0x0100) { + if (bios->fp.strapless_is_24bit & 2) + lvds.lvds.script |= 0x0200; + } else { + if (bios->fp.strapless_is_24bit & 1) + lvds.lvds.script |= 0x0200; + } + + if (nv_connector->base.display_info.bpc == 8) + lvds.lvds.script |= 0x0200; + } + + nvif_mthd(&disp->disp->object, 0, &lvds, sizeof(lvds)); + break; + case DCB_OUTPUT_DP: + if (nv_connector->base.display_info.bpc == 6) + depth = 0x2; + else + if (nv_connector->base.display_info.bpc == 8) + depth = 0x5; + else + depth = 0x6; + + if (nv_encoder->link & 1) + proto = 0x8; + else + proto = 0x9; + + nv50_audio_enable(encoder, mode); + break; + default: + BUG(); + break; + } + + nv_encoder->update(nv_encoder, nv_crtc->index, asyh, proto, depth); +} + +static const struct drm_encoder_helper_funcs +nv50_sor_help = { + .atomic_check = nv50_outp_atomic_check, + .enable = nv50_sor_enable, + .disable = nv50_sor_disable, +}; + +static void +nv50_sor_destroy(struct drm_encoder *encoder) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + nv50_mstm_del(&nv_encoder->dp.mstm); + drm_encoder_cleanup(encoder); + kfree(encoder); +} + +static const struct drm_encoder_funcs +nv50_sor_func = { + .destroy = nv50_sor_destroy, +}; + +static int +nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe) +{ + struct nouveau_connector *nv_connector = nouveau_connector(connector); + struct nouveau_drm *drm = nouveau_drm(connector->dev); + struct nvkm_bios *bios = nvxx_bios(&drm->client.device); + struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device); + struct nouveau_encoder *nv_encoder; + struct drm_encoder *encoder; + u8 ver, hdr, cnt, len; + u32 data; + int type, ret; + + switch (dcbe->type) { + case DCB_OUTPUT_LVDS: type = DRM_MODE_ENCODER_LVDS; break; + case DCB_OUTPUT_TMDS: + case DCB_OUTPUT_DP: + default: + type = DRM_MODE_ENCODER_TMDS; + break; + } + + nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL); + if (!nv_encoder) + return -ENOMEM; + nv_encoder->dcb = dcbe; + nv_encoder->update = nv50_sor_update; + + encoder = to_drm_encoder(nv_encoder); + encoder->possible_crtcs = dcbe->heads; + encoder->possible_clones = 0; + drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type, + "sor-%04x-%04x", dcbe->hasht, dcbe->hashm); + drm_encoder_helper_add(encoder, &nv50_sor_help); + + drm_mode_connector_attach_encoder(connector, encoder); + + if (dcbe->type == DCB_OUTPUT_DP) { + struct nv50_disp *disp = nv50_disp(encoder->dev); + struct nvkm_i2c_aux *aux = + nvkm_i2c_aux_find(i2c, dcbe->i2c_index); + if (aux) { + if (disp->disp->object.oclass < GF110_DISP) { + /* HW has no support for address-only + * transactions, so we're required to + * use custom I2C-over-AUX code. + */ + nv_encoder->i2c = &aux->i2c; + } else { + nv_encoder->i2c = &nv_connector->aux.ddc; + } + nv_encoder->aux = aux; + } + + if ((data = nvbios_dp_table(bios, &ver, &hdr, &cnt, &len)) && + ver >= 0x40 && (nvbios_rd08(bios, data + 0x08) & 0x04)) { + ret = nv50_mstm_new(nv_encoder, &nv_connector->aux, 16, + nv_connector->base.base.id, + &nv_encoder->dp.mstm); + if (ret) + return ret; + } + } else { + struct nvkm_i2c_bus *bus = + nvkm_i2c_bus_find(i2c, dcbe->i2c_index); + if (bus) + nv_encoder->i2c = &bus->i2c; + } + + return 0; +} + +/****************************************************************************** + * PIOR + *****************************************************************************/ +static int +nv50_pior_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + int ret = nv50_outp_atomic_check(encoder, crtc_state, conn_state); + if (ret) + return ret; + crtc_state->adjusted_mode.clock *= 2; + return 0; +} + +static void +nv50_pior_disable(struct drm_encoder *encoder) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nv50_core *core = nv50_disp(encoder->dev)->core; + if (nv_encoder->crtc) + core->func->pior->ctrl(core, nv_encoder->or, 0x00000000, NULL); + nv_encoder->crtc = NULL; + nv50_outp_release(nv_encoder); +} + +static void +nv50_pior_enable(struct drm_encoder *encoder) +{ + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); + struct nouveau_connector *nv_connector; + struct nv50_head_atom *asyh = nv50_head_atom(nv_crtc->base.state); + struct nv50_core *core = nv50_disp(encoder->dev)->core; + u8 owner = 1 << nv_crtc->index; + u8 proto; + + nv50_outp_acquire(nv_encoder); + + nv_connector = nouveau_encoder_connector_get(nv_encoder); + switch (nv_connector->base.display_info.bpc) { + case 10: asyh->or.depth = 0x6; break; + case 8: asyh->or.depth = 0x5; break; + case 6: asyh->or.depth = 0x2; break; + default: asyh->or.depth = 0x0; break; + } + + switch (nv_encoder->dcb->type) { + case DCB_OUTPUT_TMDS: + case DCB_OUTPUT_DP: + proto = 0x0; + break; + default: + BUG(); + break; + } + + core->func->pior->ctrl(core, nv_encoder->or, (proto << 8) | owner, asyh); + nv_encoder->crtc = encoder->crtc; +} + +static const struct drm_encoder_helper_funcs +nv50_pior_help = { + .atomic_check = nv50_pior_atomic_check, + .enable = nv50_pior_enable, + .disable = nv50_pior_disable, +}; + +static void +nv50_pior_destroy(struct drm_encoder *encoder) +{ + drm_encoder_cleanup(encoder); + kfree(encoder); +} + +static const struct drm_encoder_funcs +nv50_pior_func = { + .destroy = nv50_pior_destroy, +}; + +static int +nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe) +{ + struct nouveau_drm *drm = nouveau_drm(connector->dev); + struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device); + struct nvkm_i2c_bus *bus = NULL; + struct nvkm_i2c_aux *aux = NULL; + struct i2c_adapter *ddc; + struct nouveau_encoder *nv_encoder; + struct drm_encoder *encoder; + int type; + + switch (dcbe->type) { + case DCB_OUTPUT_TMDS: + bus = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_EXT(dcbe->extdev)); + ddc = bus ? &bus->i2c : NULL; + type = DRM_MODE_ENCODER_TMDS; + break; + case DCB_OUTPUT_DP: + aux = nvkm_i2c_aux_find(i2c, NVKM_I2C_AUX_EXT(dcbe->extdev)); + ddc = aux ? &aux->i2c : NULL; + type = DRM_MODE_ENCODER_TMDS; + break; + default: + return -ENODEV; + } + + nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL); + if (!nv_encoder) + return -ENOMEM; + nv_encoder->dcb = dcbe; + nv_encoder->i2c = ddc; + nv_encoder->aux = aux; + + encoder = to_drm_encoder(nv_encoder); + encoder->possible_crtcs = dcbe->heads; + encoder->possible_clones = 0; + drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type, + "pior-%04x-%04x", dcbe->hasht, dcbe->hashm); + drm_encoder_helper_add(encoder, &nv50_pior_help); + + drm_mode_connector_attach_encoder(connector, encoder); + return 0; +} + +/****************************************************************************** + * Atomic + *****************************************************************************/ + +static void +nv50_disp_atomic_commit_core(struct nouveau_drm *drm, u32 *interlock) +{ + struct nv50_disp *disp = nv50_disp(drm->dev); + struct nv50_core *core = disp->core; + struct nv50_mstm *mstm; + struct drm_encoder *encoder; + + NV_ATOMIC(drm, "commit core %08x\n", interlock[NV50_DISP_INTERLOCK_BASE]); + + drm_for_each_encoder(encoder, drm->dev) { + if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { + mstm = nouveau_encoder(encoder)->dp.mstm; + if (mstm && mstm->modified) + nv50_mstm_prepare(mstm); + } + } + + core->func->ntfy_init(disp->sync, NV50_DISP_CORE_NTFY); + core->func->update(core, interlock, true); + if (core->func->ntfy_wait_done(disp->sync, NV50_DISP_CORE_NTFY, + disp->core->chan.base.device)) + NV_ERROR(drm, "core notifier timeout\n"); + + drm_for_each_encoder(encoder, drm->dev) { + if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { + mstm = nouveau_encoder(encoder)->dp.mstm; + if (mstm && mstm->modified) + nv50_mstm_cleanup(mstm); + } + } +} + +static void +nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_crtc_state *new_crtc_state, *old_crtc_state; + struct drm_crtc *crtc; + struct drm_plane_state *new_plane_state; + struct drm_plane *plane; + struct nouveau_drm *drm = nouveau_drm(dev); + struct nv50_disp *disp = nv50_disp(dev); + struct nv50_atom *atom = nv50_atom(state); + struct nv50_outp_atom *outp, *outt; + u32 interlock[NV50_DISP_INTERLOCK__SIZE] = {}; + int i; + + NV_ATOMIC(drm, "commit %d %d\n", atom->lock_core, atom->flush_disable); + drm_atomic_helper_wait_for_fences(dev, state, false); + drm_atomic_helper_wait_for_dependencies(state); + drm_atomic_helper_update_legacy_modeset_state(dev, state); + + if (atom->lock_core) + mutex_lock(&disp->mutex); + + /* Disable head(s). */ + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state); + struct nv50_head *head = nv50_head(crtc); + + NV_ATOMIC(drm, "%s: clr %04x (set %04x)\n", crtc->name, + asyh->clr.mask, asyh->set.mask); + if (old_crtc_state->active && !new_crtc_state->active) + drm_crtc_vblank_off(crtc); + + if (asyh->clr.mask) { + nv50_head_flush_clr(head, asyh, atom->flush_disable); + interlock[NV50_DISP_INTERLOCK_CORE] |= 1; + } + } + + /* Disable plane(s). */ + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); + struct nv50_wndw *wndw = nv50_wndw(plane); + + NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", plane->name, + asyw->clr.mask, asyw->set.mask); + if (!asyw->clr.mask) + continue; + + nv50_wndw_flush_clr(wndw, interlock, atom->flush_disable, asyw); + } + + /* Disable output path(s). */ + list_for_each_entry(outp, &atom->outp, head) { + const struct drm_encoder_helper_funcs *help; + struct drm_encoder *encoder; + + encoder = outp->encoder; + help = encoder->helper_private; + + NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", encoder->name, + outp->clr.mask, outp->set.mask); + + if (outp->clr.mask) { + help->disable(encoder); + interlock[NV50_DISP_INTERLOCK_CORE] |= 1; + if (outp->flush_disable) { + nv50_disp_atomic_commit_core(drm, interlock); + memset(interlock, 0x00, sizeof(interlock)); + } + } + } + + /* Flush disable. */ + if (interlock[NV50_DISP_INTERLOCK_CORE]) { + if (atom->flush_disable) { + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + struct nv50_wndw *wndw = nv50_wndw(plane); + if (interlock[wndw->interlock.type] & wndw->interlock.data) { + if (wndw->func->update) + wndw->func->update(wndw, interlock); + } + } + + nv50_disp_atomic_commit_core(drm, interlock); + memset(interlock, 0x00, sizeof(interlock)); + } + } + + /* Update output path(s). */ + list_for_each_entry_safe(outp, outt, &atom->outp, head) { + const struct drm_encoder_helper_funcs *help; + struct drm_encoder *encoder; + + encoder = outp->encoder; + help = encoder->helper_private; + + NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", encoder->name, + outp->set.mask, outp->clr.mask); + + if (outp->set.mask) { + help->enable(encoder); + interlock[NV50_DISP_INTERLOCK_CORE] = 1; + } + + list_del(&outp->head); + kfree(outp); + } + + /* Update head(s). */ + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state); + struct nv50_head *head = nv50_head(crtc); + + NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name, + asyh->set.mask, asyh->clr.mask); + + if (asyh->set.mask) { + nv50_head_flush_set(head, asyh); + interlock[NV50_DISP_INTERLOCK_CORE] = 1; + } + + if (new_crtc_state->active) { + if (!old_crtc_state->active) + drm_crtc_vblank_on(crtc); + if (new_crtc_state->event) + drm_crtc_vblank_get(crtc); + } + } + + /* Update plane(s). */ + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); + struct nv50_wndw *wndw = nv50_wndw(plane); + + NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", plane->name, + asyw->set.mask, asyw->clr.mask); + if ( !asyw->set.mask && + (!asyw->clr.mask || atom->flush_disable)) + continue; + + nv50_wndw_flush_set(wndw, interlock, asyw); + } + + /* Flush update. */ + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + struct nv50_wndw *wndw = nv50_wndw(plane); + if (interlock[wndw->interlock.type] & wndw->interlock.data) { + if (wndw->func->update) + wndw->func->update(wndw, interlock); + } + } + + if (interlock[NV50_DISP_INTERLOCK_CORE]) { + if (interlock[NV50_DISP_INTERLOCK_BASE] || + !atom->state.legacy_cursor_update) + nv50_disp_atomic_commit_core(drm, interlock); + else + disp->core->func->update(disp->core, interlock, false); + } + + if (atom->lock_core) + mutex_unlock(&disp->mutex); + + /* Wait for HW to signal completion. */ + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); + struct nv50_wndw *wndw = nv50_wndw(plane); + int ret = nv50_wndw_wait_armed(wndw, asyw); + if (ret) + NV_ERROR(drm, "%s: timeout\n", plane->name); + } + + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->event) { + unsigned long flags; + /* Get correct count/ts if racing with vblank irq */ + if (new_crtc_state->active) + drm_crtc_accurate_vblank_count(crtc); + spin_lock_irqsave(&crtc->dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, new_crtc_state->event); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + + new_crtc_state->event = NULL; + if (new_crtc_state->active) + drm_crtc_vblank_put(crtc); + } + } + + drm_atomic_helper_commit_hw_done(state); + drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_commit_cleanup_done(state); + drm_atomic_state_put(state); +} + +static void +nv50_disp_atomic_commit_work(struct work_struct *work) +{ + struct drm_atomic_state *state = + container_of(work, typeof(*state), commit_work); + nv50_disp_atomic_commit_tail(state); +} + +static int +nv50_disp_atomic_commit(struct drm_device *dev, + struct drm_atomic_state *state, bool nonblock) +{ + struct nouveau_drm *drm = nouveau_drm(dev); + struct drm_plane_state *new_plane_state; + struct drm_plane *plane; + struct drm_crtc *crtc; + bool active = false; + int ret, i; + + ret = pm_runtime_get_sync(dev->dev); + if (ret < 0 && ret != -EACCES) + return ret; + + ret = drm_atomic_helper_setup_commit(state, nonblock); + if (ret) + goto done; + + INIT_WORK(&state->commit_work, nv50_disp_atomic_commit_work); + + ret = drm_atomic_helper_prepare_planes(dev, state); + if (ret) + goto done; + + if (!nonblock) { + ret = drm_atomic_helper_wait_for_fences(dev, state, true); + if (ret) + goto err_cleanup; + } + + ret = drm_atomic_helper_swap_state(state, true); + if (ret) + goto err_cleanup; + + for_each_new_plane_in_state(state, plane, new_plane_state, i) { + struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); + struct nv50_wndw *wndw = nv50_wndw(plane); + + if (asyw->set.image) + nv50_wndw_ntfy_enable(wndw, asyw); + } + + drm_atomic_state_get(state); + + if (nonblock) + queue_work(system_unbound_wq, &state->commit_work); + else + nv50_disp_atomic_commit_tail(state); + + drm_for_each_crtc(crtc, dev) { + if (crtc->state->enable) { + if (!drm->have_disp_power_ref) { + drm->have_disp_power_ref = true; + return 0; + } + active = true; + break; + } + } + + if (!active && drm->have_disp_power_ref) { + pm_runtime_put_autosuspend(dev->dev); + drm->have_disp_power_ref = false; + } + +err_cleanup: + if (ret) + drm_atomic_helper_cleanup_planes(dev, state); +done: + pm_runtime_put_autosuspend(dev->dev); + return ret; +} + +static struct nv50_outp_atom * +nv50_disp_outp_atomic_add(struct nv50_atom *atom, struct drm_encoder *encoder) +{ + struct nv50_outp_atom *outp; + + list_for_each_entry(outp, &atom->outp, head) { + if (outp->encoder == encoder) + return outp; + } + + outp = kzalloc(sizeof(*outp), GFP_KERNEL); + if (!outp) + return ERR_PTR(-ENOMEM); + + list_add(&outp->head, &atom->outp); + outp->encoder = encoder; + return outp; +} + +static int +nv50_disp_outp_atomic_check_clr(struct nv50_atom *atom, + struct drm_connector_state *old_connector_state) +{ + struct drm_encoder *encoder = old_connector_state->best_encoder; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_crtc *crtc; + struct nv50_outp_atom *outp; + + if (!(crtc = old_connector_state->crtc)) + return 0; + + old_crtc_state = drm_atomic_get_old_crtc_state(&atom->state, crtc); + new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc); + if (old_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) { + outp = nv50_disp_outp_atomic_add(atom, encoder); + if (IS_ERR(outp)) + return PTR_ERR(outp); + + if (outp->encoder->encoder_type == DRM_MODE_ENCODER_DPMST) { + outp->flush_disable = true; + atom->flush_disable = true; + } + outp->clr.ctrl = true; + atom->lock_core = true; + } + + return 0; +} + +static int +nv50_disp_outp_atomic_check_set(struct nv50_atom *atom, + struct drm_connector_state *connector_state) +{ + struct drm_encoder *encoder = connector_state->best_encoder; + struct drm_crtc_state *new_crtc_state; + struct drm_crtc *crtc; + struct nv50_outp_atom *outp; + + if (!(crtc = connector_state->crtc)) + return 0; + + new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc); + if (new_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) { + outp = nv50_disp_outp_atomic_add(atom, encoder); + if (IS_ERR(outp)) + return PTR_ERR(outp); + + outp->set.ctrl = true; + atom->lock_core = true; + } + + return 0; +} + +static int +nv50_disp_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + struct nv50_atom *atom = nv50_atom(state); + struct drm_connector_state *old_connector_state, *new_connector_state; + struct drm_connector *connector; + struct drm_crtc_state *new_crtc_state; + struct drm_crtc *crtc; + int ret, i; + + /* We need to handle colour management on a per-plane basis. */ + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->color_mgmt_changed) { + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + return ret; + } + } + + ret = drm_atomic_helper_check(dev, state); + if (ret) + return ret; + + for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) { + ret = nv50_disp_outp_atomic_check_clr(atom, old_connector_state); + if (ret) + return ret; + + ret = nv50_disp_outp_atomic_check_set(atom, new_connector_state); + if (ret) + return ret; + } + + return 0; +} + +static void +nv50_disp_atomic_state_clear(struct drm_atomic_state *state) +{ + struct nv50_atom *atom = nv50_atom(state); + struct nv50_outp_atom *outp, *outt; + + list_for_each_entry_safe(outp, outt, &atom->outp, head) { + list_del(&outp->head); + kfree(outp); + } + + drm_atomic_state_default_clear(state); +} + +static void +nv50_disp_atomic_state_free(struct drm_atomic_state *state) +{ + struct nv50_atom *atom = nv50_atom(state); + drm_atomic_state_default_release(&atom->state); + kfree(atom); +} + +static struct drm_atomic_state * +nv50_disp_atomic_state_alloc(struct drm_device *dev) +{ + struct nv50_atom *atom; + if (!(atom = kzalloc(sizeof(*atom), GFP_KERNEL)) || + drm_atomic_state_init(dev, &atom->state) < 0) { + kfree(atom); + return NULL; + } + INIT_LIST_HEAD(&atom->outp); + return &atom->state; +} + +static const struct drm_mode_config_funcs +nv50_disp_func = { + .fb_create = nouveau_user_framebuffer_create, + .output_poll_changed = drm_fb_helper_output_poll_changed, + .atomic_check = nv50_disp_atomic_check, + .atomic_commit = nv50_disp_atomic_commit, + .atomic_state_alloc = nv50_disp_atomic_state_alloc, + .atomic_state_clear = nv50_disp_atomic_state_clear, + .atomic_state_free = nv50_disp_atomic_state_free, +}; + +/****************************************************************************** + * Init + *****************************************************************************/ + +void +nv50_display_fini(struct drm_device *dev) +{ + struct nouveau_encoder *nv_encoder; + struct drm_encoder *encoder; + struct drm_plane *plane; + + drm_for_each_plane(plane, dev) { + struct nv50_wndw *wndw = nv50_wndw(plane); + if (plane->funcs != &nv50_wndw) + continue; + nv50_wndw_fini(wndw); + } + + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { + nv_encoder = nouveau_encoder(encoder); + nv50_mstm_fini(nv_encoder->dp.mstm); + } + } +} + +int +nv50_display_init(struct drm_device *dev) +{ + struct nv50_core *core = nv50_disp(dev)->core; + struct drm_encoder *encoder; + struct drm_plane *plane; + + core->func->init(core); + + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { + struct nouveau_encoder *nv_encoder = + nouveau_encoder(encoder); + nv50_mstm_init(nv_encoder->dp.mstm); + } + } + + drm_for_each_plane(plane, dev) { + struct nv50_wndw *wndw = nv50_wndw(plane); + if (plane->funcs != &nv50_wndw) + continue; + nv50_wndw_init(wndw); + } + + return 0; +} + +void +nv50_display_destroy(struct drm_device *dev) +{ + struct nv50_disp *disp = nv50_disp(dev); + + nv50_core_del(&disp->core); + + nouveau_bo_unmap(disp->sync); + if (disp->sync) + nouveau_bo_unpin(disp->sync); + nouveau_bo_ref(NULL, &disp->sync); + + nouveau_display(dev)->priv = NULL; + kfree(disp); +} + +MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); +static int nouveau_atomic = 0; +module_param_named(atomic, nouveau_atomic, int, 0400); + +int +nv50_display_create(struct drm_device *dev) +{ + struct nvif_device *device = &nouveau_drm(dev)->client.device; + struct nouveau_drm *drm = nouveau_drm(dev); + struct dcb_table *dcb = &drm->vbios.dcb; + struct drm_connector *connector, *tmp; + struct nv50_disp *disp; + struct dcb_output *dcbe; + int crtcs, ret, i; + + disp = kzalloc(sizeof(*disp), GFP_KERNEL); + if (!disp) + return -ENOMEM; + + mutex_init(&disp->mutex); + + nouveau_display(dev)->priv = disp; + nouveau_display(dev)->dtor = nv50_display_destroy; + nouveau_display(dev)->init = nv50_display_init; + nouveau_display(dev)->fini = nv50_display_fini; + disp->disp = &nouveau_display(dev)->disp; + dev->mode_config.funcs = &nv50_disp_func; + dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP; + if (nouveau_atomic) + dev->driver->driver_features |= DRIVER_ATOMIC; + + /* small shared memory area we use for notifiers and semaphores */ + ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM, + 0, 0x0000, NULL, NULL, &disp->sync); + if (!ret) { + ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM, true); + if (!ret) { + ret = nouveau_bo_map(disp->sync); + if (ret) + nouveau_bo_unpin(disp->sync); + } + if (ret) + nouveau_bo_ref(NULL, &disp->sync); + } + + if (ret) + goto out; + + /* allocate master evo channel */ + ret = nv50_core_new(drm, &disp->core); + if (ret) + goto out; + + /* create crtc objects to represent the hw heads */ + if (disp->disp->object.oclass >= GV100_DISP) + crtcs = nvif_rd32(&device->object, 0x610060) & 0xff; + else + if (disp->disp->object.oclass >= GF110_DISP) + crtcs = nvif_rd32(&device->object, 0x612004) & 0xf; + else + crtcs = 0x3; + + for (i = 0; i < fls(crtcs); i++) { + if (!(crtcs & (1 << i))) + continue; + ret = nv50_head_create(dev, i); + if (ret) + goto out; + } + + /* create encoder/connector objects based on VBIOS DCB table */ + for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) { + connector = nouveau_connector_create(dev, dcbe->connector); + if (IS_ERR(connector)) + continue; + + if (dcbe->location == DCB_LOC_ON_CHIP) { + switch (dcbe->type) { + case DCB_OUTPUT_TMDS: + case DCB_OUTPUT_LVDS: + case DCB_OUTPUT_DP: + ret = nv50_sor_create(connector, dcbe); + break; + case DCB_OUTPUT_ANALOG: + ret = nv50_dac_create(connector, dcbe); + break; + default: + ret = -ENODEV; + break; + } + } else { + ret = nv50_pior_create(connector, dcbe); + } + + if (ret) { + NV_WARN(drm, "failed to create encoder %d/%d/%d: %d\n", + dcbe->location, dcbe->type, + ffs(dcbe->or) - 1, ret); + ret = 0; + } + } + + /* cull any connectors we created that don't have an encoder */ + list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) { + if (connector->encoder_ids[0]) + continue; + + NV_WARN(drm, "%s has no encoders, removing\n", + connector->name); + connector->funcs->destroy(connector); + } + +out: + if (ret) + nv50_display_destroy(dev); + return ret; +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h new file mode 100644 index 000000000000..e48c5eb35b49 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -0,0 +1,89 @@ +#ifndef __NV50_KMS_H__ +#define __NV50_KMS_H__ +#include <nvif/mem.h> + +#include "nouveau_display.h" + +struct nv50_disp { + struct nvif_disp *disp; + struct nv50_core *core; + +#define NV50_DISP_SYNC(c, o) ((c) * 0x040 + (o)) +#define NV50_DISP_CORE_NTFY NV50_DISP_SYNC(0 , 0x00) +#define NV50_DISP_WNDW_SEM0(c) NV50_DISP_SYNC(1 + (c), 0x00) +#define NV50_DISP_WNDW_SEM1(c) NV50_DISP_SYNC(1 + (c), 0x10) +#define NV50_DISP_WNDW_NTFY(c) NV50_DISP_SYNC(1 + (c), 0x20) +#define NV50_DISP_BASE_SEM0(c) NV50_DISP_WNDW_SEM0(0 + (c)) +#define NV50_DISP_BASE_SEM1(c) NV50_DISP_WNDW_SEM1(0 + (c)) +#define NV50_DISP_BASE_NTFY(c) NV50_DISP_WNDW_NTFY(0 + (c)) +#define NV50_DISP_OVLY_SEM0(c) NV50_DISP_WNDW_SEM0(4 + (c)) +#define NV50_DISP_OVLY_SEM1(c) NV50_DISP_WNDW_SEM1(4 + (c)) +#define NV50_DISP_OVLY_NTFY(c) NV50_DISP_WNDW_NTFY(4 + (c)) + struct nouveau_bo *sync; + + struct mutex mutex; +}; + +static inline struct nv50_disp * +nv50_disp(struct drm_device *dev) +{ + return nouveau_display(dev)->priv; +} + +struct nv50_disp_interlock { + enum nv50_disp_interlock_type { + NV50_DISP_INTERLOCK_CORE = 0, + NV50_DISP_INTERLOCK_CURS, + NV50_DISP_INTERLOCK_BASE, + NV50_DISP_INTERLOCK_OVLY, + NV50_DISP_INTERLOCK_WNDW, + NV50_DISP_INTERLOCK_WIMM, + NV50_DISP_INTERLOCK__SIZE + } type; + u32 data; +}; + +void corec37d_ntfy_init(struct nouveau_bo *, u32); + +struct nv50_chan { + struct nvif_object user; + struct nvif_device *device; +}; + +struct nv50_dmac { + struct nv50_chan base; + + struct nvif_mem push; + u32 *ptr; + + struct nvif_object sync; + struct nvif_object vram; + + /* Protects against concurrent pushbuf access to this channel, lock is + * grabbed by evo_wait (if the pushbuf reservation is successful) and + * dropped again by evo_kick. */ + struct mutex lock; +}; + +int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, + const s32 *oclass, u8 head, void *data, u32 size, + u64 syncbuf, struct nv50_dmac *dmac); +void nv50_dmac_destroy(struct nv50_dmac *); + +u32 *evo_wait(struct nv50_dmac *, int nr); +void evo_kick(u32 *, struct nv50_dmac *); + +#define evo_mthd(p, m, s) do { \ + const u32 _m = (m), _s = (s); \ + if (drm_debug & DRM_UT_KMS) \ + pr_err("%04x %d %s\n", _m, _s, __func__); \ + *((p)++) = ((_s << 18) | _m); \ +} while(0) + +#define evo_data(p, d) do { \ + const u32 _d = (d); \ + if (drm_debug & DRM_UT_KMS) \ + pr_err("\t%08x\n", _d); \ + *((p)++) = _d; \ +} while(0) +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c new file mode 100644 index 000000000000..4f57e5379796 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -0,0 +1,511 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "head.h" +#include "base.h" +#include "core.h" +#include "curs.h" +#include "ovly.h" + +#include <nvif/class.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include "nouveau_connector.h" +void +nv50_head_flush_clr(struct nv50_head *head, + struct nv50_head_atom *asyh, bool flush) +{ + union nv50_head_atom_mask clr = { + .mask = asyh->clr.mask & ~(flush ? 0 : asyh->set.mask), + }; + if (clr.olut) head->func->olut_clr(head); + if (clr.core) head->func->core_clr(head); + if (clr.curs) head->func->curs_clr(head); +} + +void +nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + if (asyh->set.view ) head->func->view (head, asyh); + if (asyh->set.mode ) head->func->mode (head, asyh); + if (asyh->set.core ) head->func->core_set(head, asyh); + if (asyh->set.olut ) { + asyh->olut.offset = nv50_lut_load(&head->olut, + asyh->olut.mode <= 1, + asyh->olut.buffer, + asyh->state.gamma_lut); + head->func->olut_set(head, asyh); + } + if (asyh->set.curs ) head->func->curs_set(head, asyh); + if (asyh->set.base ) head->func->base (head, asyh); + if (asyh->set.ovly ) head->func->ovly (head, asyh); + if (asyh->set.dither ) head->func->dither (head, asyh); + if (asyh->set.procamp) head->func->procamp (head, asyh); + if (asyh->set.or ) head->func->or (head, asyh); +} + +static void +nv50_head_atomic_check_procamp(struct nv50_head_atom *armh, + struct nv50_head_atom *asyh, + struct nouveau_conn_atom *asyc) +{ + const int vib = asyc->procamp.color_vibrance - 100; + const int hue = asyc->procamp.vibrant_hue - 90; + const int adj = (vib > 0) ? 50 : 0; + asyh->procamp.sat.cos = ((vib * 2047 + adj) / 100) & 0xfff; + asyh->procamp.sat.sin = ((hue * 2047) / 100) & 0xfff; + asyh->set.procamp = true; +} + +static void +nv50_head_atomic_check_dither(struct nv50_head_atom *armh, + struct nv50_head_atom *asyh, + struct nouveau_conn_atom *asyc) +{ + struct drm_connector *connector = asyc->state.connector; + u32 mode = 0x00; + + if (asyc->dither.mode == DITHERING_MODE_AUTO) { + if (asyh->base.depth > connector->display_info.bpc * 3) + mode = DITHERING_MODE_DYNAMIC2X2; + } else { + mode = asyc->dither.mode; + } + + if (asyc->dither.depth == DITHERING_DEPTH_AUTO) { + if (connector->display_info.bpc >= 8) + mode |= DITHERING_DEPTH_8BPC; + } else { + mode |= asyc->dither.depth; + } + + asyh->dither.enable = mode; + asyh->dither.bits = mode >> 1; + asyh->dither.mode = mode >> 3; + asyh->set.dither = true; +} + +static void +nv50_head_atomic_check_view(struct nv50_head_atom *armh, + struct nv50_head_atom *asyh, + struct nouveau_conn_atom *asyc) +{ + struct drm_connector *connector = asyc->state.connector; + struct drm_display_mode *omode = &asyh->state.adjusted_mode; + struct drm_display_mode *umode = &asyh->state.mode; + int mode = asyc->scaler.mode; + struct edid *edid; + int umode_vdisplay, omode_hdisplay, omode_vdisplay; + + if (connector->edid_blob_ptr) + edid = (struct edid *)connector->edid_blob_ptr->data; + else + edid = NULL; + + if (!asyc->scaler.full) { + if (mode == DRM_MODE_SCALE_NONE) + omode = umode; + } else { + /* Non-EDID LVDS/eDP mode. */ + mode = DRM_MODE_SCALE_FULLSCREEN; + } + + /* For the user-specified mode, we must ignore doublescan and + * the like, but honor frame packing. + */ + umode_vdisplay = umode->vdisplay; + if ((umode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) + umode_vdisplay += umode->vtotal; + asyh->view.iW = umode->hdisplay; + asyh->view.iH = umode_vdisplay; + /* For the output mode, we can just use the stock helper. */ + drm_mode_get_hv_timing(omode, &omode_hdisplay, &omode_vdisplay); + asyh->view.oW = omode_hdisplay; + asyh->view.oH = omode_vdisplay; + + /* Add overscan compensation if necessary, will keep the aspect + * ratio the same as the backend mode unless overridden by the + * user setting both hborder and vborder properties. + */ + if ((asyc->scaler.underscan.mode == UNDERSCAN_ON || + (asyc->scaler.underscan.mode == UNDERSCAN_AUTO && + drm_detect_hdmi_monitor(edid)))) { + u32 bX = asyc->scaler.underscan.hborder; + u32 bY = asyc->scaler.underscan.vborder; + u32 r = (asyh->view.oH << 19) / asyh->view.oW; + + if (bX) { + asyh->view.oW -= (bX * 2); + if (bY) asyh->view.oH -= (bY * 2); + else asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19; + } else { + asyh->view.oW -= (asyh->view.oW >> 4) + 32; + if (bY) asyh->view.oH -= (bY * 2); + else asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19; + } + } + + /* Handle CENTER/ASPECT scaling, taking into account the areas + * removed already for overscan compensation. + */ + switch (mode) { + case DRM_MODE_SCALE_CENTER: + asyh->view.oW = min((u16)umode->hdisplay, asyh->view.oW); + asyh->view.oH = min((u16)umode_vdisplay, asyh->view.oH); + /* fall-through */ + case DRM_MODE_SCALE_ASPECT: + if (asyh->view.oH < asyh->view.oW) { + u32 r = (asyh->view.iW << 19) / asyh->view.iH; + asyh->view.oW = ((asyh->view.oH * r) + (r / 2)) >> 19; + } else { + u32 r = (asyh->view.iH << 19) / asyh->view.iW; + asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19; + } + break; + default: + break; + } + + asyh->set.view = true; +} + +static int +nv50_head_atomic_check_lut(struct nv50_head *head, + struct nv50_head_atom *asyh) +{ + struct nv50_disp *disp = nv50_disp(head->base.base.dev); + struct drm_property_blob *olut = asyh->state.gamma_lut; + + /* Determine whether core output LUT should be enabled. */ + if (olut) { + /* Check if any window(s) have stolen the core output LUT + * to as an input LUT for legacy gamma + I8 colour format. + */ + if (asyh->wndw.olut) { + /* If any window has stolen the core output LUT, + * all of them must. + */ + if (asyh->wndw.olut != asyh->wndw.mask) + return -EINVAL; + olut = NULL; + } + } + + if (!olut) { + asyh->olut.handle = 0; + return 0; + } + + asyh->olut.handle = disp->core->chan.vram.handle; + asyh->olut.buffer = !asyh->olut.buffer; + head->func->olut(head, asyh); + return 0; +} + +static void +nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct drm_display_mode *mode = &asyh->state.adjusted_mode; + struct nv50_head_mode *m = &asyh->mode; + u32 blankus; + + drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V | CRTC_STEREO_DOUBLE); + + /* + * DRM modes are defined in terms of a repeating interval + * starting with the active display area. The hardware modes + * are defined in terms of a repeating interval starting one + * unit (pixel or line) into the sync pulse. So, add bias. + */ + + m->h.active = mode->crtc_htotal; + m->h.synce = mode->crtc_hsync_end - mode->crtc_hsync_start - 1; + m->h.blanke = mode->crtc_hblank_end - mode->crtc_hsync_start - 1; + m->h.blanks = m->h.blanke + mode->crtc_hdisplay; + + m->v.active = mode->crtc_vtotal; + m->v.synce = mode->crtc_vsync_end - mode->crtc_vsync_start - 1; + m->v.blanke = mode->crtc_vblank_end - mode->crtc_vsync_start - 1; + m->v.blanks = m->v.blanke + mode->crtc_vdisplay; + + /*XXX: Safe underestimate, even "0" works */ + blankus = (m->v.active - mode->crtc_vdisplay - 2) * m->h.active; + blankus *= 1000; + blankus /= mode->crtc_clock; + m->v.blankus = blankus; + + if (mode->flags & DRM_MODE_FLAG_INTERLACE) { + m->v.blank2e = m->v.active + m->v.blanke; + m->v.blank2s = m->v.blank2e + mode->crtc_vdisplay; + m->v.active = (m->v.active * 2) + 1; + m->interlace = true; + } else { + m->v.blank2e = 0; + m->v.blank2s = 1; + m->interlace = false; + } + m->clock = mode->crtc_clock; + + asyh->or.nhsync = !!(mode->flags & DRM_MODE_FLAG_NHSYNC); + asyh->or.nvsync = !!(mode->flags & DRM_MODE_FLAG_NVSYNC); + asyh->set.or = head->func->or != NULL; + asyh->set.mode = true; +} + +static int +nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) +{ + struct nouveau_drm *drm = nouveau_drm(crtc->dev); + struct nv50_head *head = nv50_head(crtc); + struct nv50_head_atom *armh = nv50_head_atom(crtc->state); + struct nv50_head_atom *asyh = nv50_head_atom(state); + struct nouveau_conn_atom *asyc = NULL; + struct drm_connector_state *conns; + struct drm_connector *conn; + int i; + + NV_ATOMIC(drm, "%s atomic_check %d\n", crtc->name, asyh->state.active); + if (asyh->state.active) { + for_each_new_connector_in_state(asyh->state.state, conn, conns, i) { + if (conns->crtc == crtc) { + asyc = nouveau_conn_atom(conns); + break; + } + } + + if (armh->state.active) { + if (asyc) { + if (asyh->state.mode_changed) + asyc->set.scaler = true; + if (armh->base.depth != asyh->base.depth) + asyc->set.dither = true; + } + } else { + if (asyc) + asyc->set.mask = ~0; + asyh->set.mask = ~0; + asyh->set.or = head->func->or != NULL; + } + + if (asyh->state.mode_changed) + nv50_head_atomic_check_mode(head, asyh); + + if (asyh->state.color_mgmt_changed || + memcmp(&armh->wndw, &asyh->wndw, sizeof(asyh->wndw))) { + int ret = nv50_head_atomic_check_lut(head, asyh); + if (ret) + return ret; + + asyh->olut.visible = asyh->olut.handle != 0; + } + + if (asyc) { + if (asyc->set.scaler) + nv50_head_atomic_check_view(armh, asyh, asyc); + if (asyc->set.dither) + nv50_head_atomic_check_dither(armh, asyh, asyc); + if (asyc->set.procamp) + nv50_head_atomic_check_procamp(armh, asyh, asyc); + } + + if (head->func->core_calc) { + head->func->core_calc(head, asyh); + if (!asyh->core.visible) + asyh->olut.visible = false; + } + + asyh->set.base = armh->base.cpp != asyh->base.cpp; + asyh->set.ovly = armh->ovly.cpp != asyh->ovly.cpp; + } else { + asyh->olut.visible = false; + asyh->core.visible = false; + asyh->curs.visible = false; + asyh->base.cpp = 0; + asyh->ovly.cpp = 0; + } + + if (!drm_atomic_crtc_needs_modeset(&asyh->state)) { + if (asyh->core.visible) { + if (memcmp(&armh->core, &asyh->core, sizeof(asyh->core))) + asyh->set.core = true; + } else + if (armh->core.visible) { + asyh->clr.core = true; + } + + if (asyh->curs.visible) { + if (memcmp(&armh->curs, &asyh->curs, sizeof(asyh->curs))) + asyh->set.curs = true; + } else + if (armh->curs.visible) { + asyh->clr.curs = true; + } + + if (asyh->olut.visible) { + if (memcmp(&armh->olut, &asyh->olut, sizeof(asyh->olut))) + asyh->set.olut = true; + } else + if (armh->olut.visible) { + asyh->clr.olut = true; + } + } else { + asyh->clr.olut = armh->olut.visible; + asyh->clr.core = armh->core.visible; + asyh->clr.curs = armh->curs.visible; + asyh->set.olut = asyh->olut.visible; + asyh->set.core = asyh->core.visible; + asyh->set.curs = asyh->curs.visible; + } + + if (asyh->clr.mask || asyh->set.mask) + nv50_atom(asyh->state.state)->lock_core = true; + return 0; +} + +static const struct drm_crtc_helper_funcs +nv50_head_help = { + .atomic_check = nv50_head_atomic_check, +}; + +static void +nv50_head_atomic_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct nv50_head_atom *asyh = nv50_head_atom(state); + __drm_atomic_helper_crtc_destroy_state(&asyh->state); + kfree(asyh); +} + +static struct drm_crtc_state * +nv50_head_atomic_duplicate_state(struct drm_crtc *crtc) +{ + struct nv50_head_atom *armh = nv50_head_atom(crtc->state); + struct nv50_head_atom *asyh; + if (!(asyh = kmalloc(sizeof(*asyh), GFP_KERNEL))) + return NULL; + __drm_atomic_helper_crtc_duplicate_state(crtc, &asyh->state); + asyh->wndw = armh->wndw; + asyh->view = armh->view; + asyh->mode = armh->mode; + asyh->olut = armh->olut; + asyh->core = armh->core; + asyh->curs = armh->curs; + asyh->base = armh->base; + asyh->ovly = armh->ovly; + asyh->dither = armh->dither; + asyh->procamp = armh->procamp; + asyh->clr.mask = 0; + asyh->set.mask = 0; + return &asyh->state; +} + +static void +__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + if (crtc->state) + crtc->funcs->atomic_destroy_state(crtc, crtc->state); + crtc->state = state; + crtc->state->crtc = crtc; +} + +static void +nv50_head_reset(struct drm_crtc *crtc) +{ + struct nv50_head_atom *asyh; + + if (WARN_ON(!(asyh = kzalloc(sizeof(*asyh), GFP_KERNEL)))) + return; + + __drm_atomic_helper_crtc_reset(crtc, &asyh->state); +} + +static void +nv50_head_destroy(struct drm_crtc *crtc) +{ + struct nv50_head *head = nv50_head(crtc); + nv50_lut_fini(&head->olut); + drm_crtc_cleanup(crtc); + kfree(head); +} + +static const struct drm_crtc_funcs +nv50_head_func = { + .reset = nv50_head_reset, + .gamma_set = drm_atomic_helper_legacy_gamma_set, + .destroy = nv50_head_destroy, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .atomic_duplicate_state = nv50_head_atomic_duplicate_state, + .atomic_destroy_state = nv50_head_atomic_destroy_state, +}; + +int +nv50_head_create(struct drm_device *dev, int index) +{ + struct nouveau_drm *drm = nouveau_drm(dev); + struct nv50_disp *disp = nv50_disp(dev); + struct nv50_head *head; + struct nv50_wndw *curs, *wndw; + struct drm_crtc *crtc; + int ret; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (!head) + return -ENOMEM; + + head->func = disp->core->func->head; + head->base.index = index; + + if (disp->disp->object.oclass < GV100_DISP) { + ret = nv50_ovly_new(drm, head->base.index, &wndw); + ret = nv50_base_new(drm, head->base.index, &wndw); + } else { + ret = nv50_wndw_new(drm, DRM_PLANE_TYPE_OVERLAY, + head->base.index * 2 + 1, &wndw); + ret = nv50_wndw_new(drm, DRM_PLANE_TYPE_PRIMARY, + head->base.index * 2 + 0, &wndw); + } + if (ret == 0) + ret = nv50_curs_new(drm, head->base.index, &curs); + if (ret) { + kfree(head); + return ret; + } + + crtc = &head->base.base; + drm_crtc_init_with_planes(dev, crtc, &wndw->plane, &curs->plane, + &nv50_head_func, "head-%d", head->base.index); + drm_crtc_helper_add(crtc, &nv50_head_help); + drm_mode_crtc_set_gamma_size(crtc, 256); + + if (head->func->olut_set) { + ret = nv50_lut_init(disp, &drm->client.mmu, &head->olut); + if (ret) + goto out; + } + +out: + if (ret) + nv50_head_destroy(crtc); + return ret; +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.h b/drivers/gpu/drm/nouveau/dispnv50/head.h new file mode 100644 index 000000000000..37b3248c6dae --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/head.h @@ -0,0 +1,78 @@ +#ifndef __NV50_KMS_HEAD_H__ +#define __NV50_KMS_HEAD_H__ +#define nv50_head(c) container_of((c), struct nv50_head, base.base) +#include "disp.h" +#include "atom.h" +#include "lut.h" + +#include "nouveau_crtc.h" + +struct nv50_head { + const struct nv50_head_func *func; + struct nouveau_crtc base; + struct nv50_lut olut; +}; + +int nv50_head_create(struct drm_device *, int index); +void nv50_head_flush_set(struct nv50_head *, struct nv50_head_atom *); +void nv50_head_flush_clr(struct nv50_head *, struct nv50_head_atom *, bool y); + +struct nv50_head_func { + void (*view)(struct nv50_head *, struct nv50_head_atom *); + void (*mode)(struct nv50_head *, struct nv50_head_atom *); + void (*olut)(struct nv50_head *, struct nv50_head_atom *); + void (*olut_set)(struct nv50_head *, struct nv50_head_atom *); + void (*olut_clr)(struct nv50_head *); + void (*core_calc)(struct nv50_head *, struct nv50_head_atom *); + void (*core_set)(struct nv50_head *, struct nv50_head_atom *); + void (*core_clr)(struct nv50_head *); + int (*curs_layout)(struct nv50_head *, struct nv50_wndw_atom *, + struct nv50_head_atom *); + int (*curs_format)(struct nv50_head *, struct nv50_wndw_atom *, + struct nv50_head_atom *); + void (*curs_set)(struct nv50_head *, struct nv50_head_atom *); + void (*curs_clr)(struct nv50_head *); + void (*base)(struct nv50_head *, struct nv50_head_atom *); + void (*ovly)(struct nv50_head *, struct nv50_head_atom *); + void (*dither)(struct nv50_head *, struct nv50_head_atom *); + void (*procamp)(struct nv50_head *, struct nv50_head_atom *); + void (*or)(struct nv50_head *, struct nv50_head_atom *); +}; + +extern const struct nv50_head_func head507d; +void head507d_view(struct nv50_head *, struct nv50_head_atom *); +void head507d_mode(struct nv50_head *, struct nv50_head_atom *); +void head507d_olut(struct nv50_head *, struct nv50_head_atom *); +void head507d_core_calc(struct nv50_head *, struct nv50_head_atom *); +void head507d_core_clr(struct nv50_head *); +int head507d_curs_layout(struct nv50_head *, struct nv50_wndw_atom *, + struct nv50_head_atom *); +int head507d_curs_format(struct nv50_head *, struct nv50_wndw_atom *, + struct nv50_head_atom *); +void head507d_base(struct nv50_head *, struct nv50_head_atom *); +void head507d_ovly(struct nv50_head *, struct nv50_head_atom *); +void head507d_dither(struct nv50_head *, struct nv50_head_atom *); +void head507d_procamp(struct nv50_head *, struct nv50_head_atom *); + +extern const struct nv50_head_func head827d; + +extern const struct nv50_head_func head907d; +void head907d_view(struct nv50_head *, struct nv50_head_atom *); +void head907d_mode(struct nv50_head *, struct nv50_head_atom *); +void head907d_olut(struct nv50_head *, struct nv50_head_atom *); +void head907d_olut_set(struct nv50_head *, struct nv50_head_atom *); +void head907d_olut_clr(struct nv50_head *); +void head907d_core_set(struct nv50_head *, struct nv50_head_atom *); +void head907d_core_clr(struct nv50_head *); +void head907d_curs_set(struct nv50_head *, struct nv50_head_atom *); +void head907d_curs_clr(struct nv50_head *); +void head907d_ovly(struct nv50_head *, struct nv50_head_atom *); +void head907d_procamp(struct nv50_head *, struct nv50_head_atom *); +void head907d_or(struct nv50_head *, struct nv50_head_atom *); + +extern const struct nv50_head_func head917d; +int head917d_curs_layout(struct nv50_head *, struct nv50_wndw_atom *, + struct nv50_head_atom *); + +extern const struct nv50_head_func headc37d; +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/head507d.c b/drivers/gpu/drm/nouveau/dispnv50/head507d.c new file mode 100644 index 000000000000..51bc5996fd37 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/head507d.c @@ -0,0 +1,325 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "head.h" +#include "core.h" + +void +head507d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x08a8 + (head->base.index * 0x400), 1); + evo_data(push, asyh->procamp.sat.sin << 20 | + asyh->procamp.sat.cos << 8); + evo_kick(push, core); + } +} + +void +head507d_dither(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x08a0 + (head->base.index * 0x0400), 1); + evo_data(push, asyh->dither.mode << 3 | + asyh->dither.bits << 1 | + asyh->dither.enable); + evo_kick(push, core); + } +} + +void +head507d_ovly(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 bounds = 0; + u32 *push; + + if (asyh->ovly.cpp) { + switch (asyh->ovly.cpp) { + case 4: bounds |= 0x00000300; break; + case 2: bounds |= 0x00000100; break; + default: + WARN_ON(1); + break; + } + bounds |= 0x00000001; + } else { + bounds |= 0x00000100; + } + + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0904 + head->base.index * 0x400, 1); + evo_data(push, bounds); + evo_kick(push, core); + } +} + +void +head507d_base(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 bounds = 0; + u32 *push; + + if (asyh->base.cpp) { + switch (asyh->base.cpp) { + case 8: bounds |= 0x00000500; break; + case 4: bounds |= 0x00000300; break; + case 2: bounds |= 0x00000100; break; + case 1: bounds |= 0x00000000; break; + default: + WARN_ON(1); + break; + } + bounds |= 0x00000001; + } + + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0900 + head->base.index * 0x400, 1); + evo_data(push, bounds); + evo_kick(push, core); + } +} + +static void +head507d_curs_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0880 + head->base.index * 0x400, 1); + evo_data(push, 0x05000000); + evo_kick(push, core); + } +} + +static void +head507d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 3))) { + evo_mthd(push, 0x0880 + head->base.index * 0x400, 2); + evo_data(push, 0x80000000 | asyh->curs.layout << 26 | + asyh->curs.format << 24); + evo_data(push, asyh->curs.offset >> 8); + evo_kick(push, core); + } +} + +int +head507d_curs_format(struct nv50_head *head, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + switch (asyw->image.format) { + case 0xcf: asyh->curs.format = 1; break; + default: + WARN_ON(1); + return -EINVAL; + } + return 0; +} + +int +head507d_curs_layout(struct nv50_head *head, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + switch (asyw->image.w) { + case 32: asyh->curs.layout = 0; break; + case 64: asyh->curs.layout = 1; break; + default: + return -EINVAL; + } + return 0; +} + +void +head507d_core_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0874 + head->base.index * 0x400, 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +static void +head507d_core_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 9))) { + evo_mthd(push, 0x0860 + head->base.index * 0x400, 1); + evo_data(push, asyh->core.offset >> 8); + evo_mthd(push, 0x0868 + head->base.index * 0x400, 4); + evo_data(push, asyh->core.h << 16 | asyh->core.w); + evo_data(push, asyh->core.layout << 20 | + (asyh->core.pitch >> 8) << 8 | + asyh->core.blocks << 8 | + asyh->core.blockh); + evo_data(push, asyh->core.kind << 16 | + asyh->core.format << 8); + evo_data(push, asyh->core.handle); + evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1); + evo_data(push, asyh->core.y << 16 | asyh->core.x); + evo_kick(push, core); + + /* EVO will complain with INVALID_STATE if we have an + * active cursor and (re)specify HeadSetContextDmaIso + * without also updating HeadSetOffsetCursor. + */ + asyh->set.curs = asyh->curs.visible; + asyh->set.olut = asyh->olut.handle != 0; + } +} + +void +head507d_core_calc(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_disp *disp = nv50_disp(head->base.base.dev); + if ((asyh->core.visible = (asyh->base.cpp != 0))) { + asyh->core.x = asyh->base.x; + asyh->core.y = asyh->base.y; + asyh->core.w = asyh->base.w; + asyh->core.h = asyh->base.h; + } else + if ((asyh->core.visible = (asyh->ovly.cpp != 0)) || + (asyh->core.visible = asyh->curs.visible)) { + /*XXX: We need to either find some way of having the + * primary base layer appear black, while still + * being able to display the other layers, or we + * need to allocate a dummy black surface here. + */ + asyh->core.x = 0; + asyh->core.y = 0; + asyh->core.w = asyh->state.mode.hdisplay; + asyh->core.h = asyh->state.mode.vdisplay; + } + asyh->core.handle = disp->core->chan.vram.handle; + asyh->core.offset = 0; + asyh->core.format = 0xcf; + asyh->core.kind = 0; + asyh->core.layout = 1; + asyh->core.blockh = 0; + asyh->core.blocks = 0; + asyh->core.pitch = ALIGN(asyh->core.w, 64) * 4; +} + +static void +head507d_olut_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +static void +head507d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 3))) { + evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2); + evo_data(push, 0x80000000 | asyh->olut.mode << 30); + evo_data(push, asyh->olut.offset >> 8); + evo_kick(push, core); + } +} + +void +head507d_olut(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + if (asyh->base.cpp == 1) + asyh->olut.mode = 0; + else + asyh->olut.mode = 1; +} + +void +head507d_mode(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + struct nv50_head_mode *m = &asyh->mode; + u32 *push; + if ((push = evo_wait(core, 13))) { + evo_mthd(push, 0x0804 + (head->base.index * 0x400), 2); + evo_data(push, 0x00800000 | m->clock); + evo_data(push, m->interlace ? 0x00000002 : 0x00000000); + evo_mthd(push, 0x0810 + (head->base.index * 0x400), 7); + evo_data(push, 0x00000000); + evo_data(push, m->v.active << 16 | m->h.active ); + evo_data(push, m->v.synce << 16 | m->h.synce ); + evo_data(push, m->v.blanke << 16 | m->h.blanke ); + evo_data(push, m->v.blanks << 16 | m->h.blanks ); + evo_data(push, m->v.blank2e << 16 | m->v.blank2s); + evo_data(push, asyh->mode.v.blankus); + evo_mthd(push, 0x082c + (head->base.index * 0x400), 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +void +head507d_view(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 7))) { + evo_mthd(push, 0x08a4 + (head->base.index * 0x400), 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x08c8 + (head->base.index * 0x400), 1); + evo_data(push, asyh->view.iH << 16 | asyh->view.iW); + evo_mthd(push, 0x08d8 + (head->base.index * 0x400), 2); + evo_data(push, asyh->view.oH << 16 | asyh->view.oW); + evo_data(push, asyh->view.oH << 16 | asyh->view.oW); + evo_kick(push, core); + } +} + +const struct nv50_head_func +head507d = { + .view = head507d_view, + .mode = head507d_mode, + .olut = head507d_olut, + .olut_set = head507d_olut_set, + .olut_clr = head507d_olut_clr, + .core_calc = head507d_core_calc, + .core_set = head507d_core_set, + .core_clr = head507d_core_clr, + .curs_layout = head507d_curs_layout, + .curs_format = head507d_curs_format, + .curs_set = head507d_curs_set, + .curs_clr = head507d_curs_clr, + .base = head507d_base, + .ovly = head507d_ovly, + .dither = head507d_dither, + .procamp = head507d_procamp, +}; diff --git a/drivers/gpu/drm/nouveau/dispnv50/head827d.c b/drivers/gpu/drm/nouveau/dispnv50/head827d.c new file mode 100644 index 000000000000..af5e7bd5978b --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/head827d.c @@ -0,0 +1,124 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "head.h" +#include "core.h" + +static void +head827d_curs_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 4))) { + evo_mthd(push, 0x0880 + head->base.index * 0x400, 1); + evo_data(push, 0x05000000); + evo_mthd(push, 0x089c + head->base.index * 0x400, 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +static void +head827d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 5))) { + evo_mthd(push, 0x0880 + head->base.index * 0x400, 2); + evo_data(push, 0x80000000 | asyh->curs.layout << 26 | + asyh->curs.format << 24); + evo_data(push, asyh->curs.offset >> 8); + evo_mthd(push, 0x089c + head->base.index * 0x400, 1); + evo_data(push, asyh->curs.handle); + evo_kick(push, core); + } +} + +static void +head827d_core_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 9))) { + evo_mthd(push, 0x0860 + head->base.index * 0x400, 1); + evo_data(push, asyh->core.offset >> 8); + evo_mthd(push, 0x0868 + head->base.index * 0x400, 4); + evo_data(push, asyh->core.h << 16 | asyh->core.w); + evo_data(push, asyh->core.layout << 20 | + (asyh->core.pitch >> 8) << 8 | + asyh->core.blocks << 8 | + asyh->core.blockh); + evo_data(push, asyh->core.format << 8); + evo_data(push, asyh->core.handle); + evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1); + evo_data(push, asyh->core.y << 16 | asyh->core.x); + evo_kick(push, core); + } +} + +static void +head827d_olut_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 4))) { + evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x085c + (head->base.index * 0x400), 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +static void +head827d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 5))) { + evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2); + evo_data(push, 0x80000000 | asyh->olut.mode << 30); + evo_data(push, asyh->olut.offset >> 8); + evo_mthd(push, 0x085c + (head->base.index * 0x400), 1); + evo_data(push, asyh->olut.handle); + evo_kick(push, core); + } +} + +const struct nv50_head_func +head827d = { + .view = head507d_view, + .mode = head507d_mode, + .olut = head507d_olut, + .olut_set = head827d_olut_set, + .olut_clr = head827d_olut_clr, + .core_calc = head507d_core_calc, + .core_set = head827d_core_set, + .core_clr = head507d_core_clr, + .curs_layout = head507d_curs_layout, + .curs_format = head507d_curs_format, + .curs_set = head827d_curs_set, + .curs_clr = head827d_curs_clr, + .base = head507d_base, + .ovly = head507d_ovly, + .dither = head507d_dither, + .procamp = head507d_procamp, +}; diff --git a/drivers/gpu/drm/nouveau/dispnv50/head907d.c b/drivers/gpu/drm/nouveau/dispnv50/head907d.c new file mode 100644 index 000000000000..633907163eb1 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/head907d.c @@ -0,0 +1,284 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "head.h" +#include "core.h" + +void +head907d_or(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 3))) { + evo_mthd(push, 0x0404 + (head->base.index * 0x300), 2); + evo_data(push, 0x00000001 | asyh->or.depth << 6 | + asyh->or.nvsync << 4 | + asyh->or.nhsync << 3); + evo_data(push, 0x31ec6000 | head->base.index << 25 | + asyh->mode.interlace); + evo_kick(push, core); + } +} + +void +head907d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0498 + (head->base.index * 0x300), 1); + evo_data(push, asyh->procamp.sat.sin << 20 | + asyh->procamp.sat.cos << 8); + evo_kick(push, core); + } +} + +static void +head907d_dither(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0490 + (head->base.index * 0x0300), 1); + evo_data(push, asyh->dither.mode << 3 | + asyh->dither.bits << 1 | + asyh->dither.enable); + evo_kick(push, core); + } +} + +void +head907d_ovly(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 bounds = 0; + u32 *push; + + if (asyh->ovly.cpp) { + switch (asyh->ovly.cpp) { + case 8: bounds |= 0x00000500; break; + case 4: bounds |= 0x00000300; break; + case 2: bounds |= 0x00000100; break; + default: + WARN_ON(1); + break; + } + bounds |= 0x00000001; + } else { + bounds |= 0x00000100; + } + + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x04d4 + head->base.index * 0x300, 1); + evo_data(push, bounds); + evo_kick(push, core); + } +} + +static void +head907d_base(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 bounds = 0; + u32 *push; + + if (asyh->base.cpp) { + switch (asyh->base.cpp) { + case 8: bounds |= 0x00000500; break; + case 4: bounds |= 0x00000300; break; + case 2: bounds |= 0x00000100; break; + case 1: bounds |= 0x00000000; break; + default: + WARN_ON(1); + break; + } + bounds |= 0x00000001; + } + + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x04d0 + head->base.index * 0x300, 1); + evo_data(push, bounds); + evo_kick(push, core); + } +} + +void +head907d_curs_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 4))) { + evo_mthd(push, 0x0480 + head->base.index * 0x300, 1); + evo_data(push, 0x05000000); + evo_mthd(push, 0x048c + head->base.index * 0x300, 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +void +head907d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 5))) { + evo_mthd(push, 0x0480 + head->base.index * 0x300, 2); + evo_data(push, 0x80000000 | asyh->curs.layout << 26 | + asyh->curs.format << 24); + evo_data(push, asyh->curs.offset >> 8); + evo_mthd(push, 0x048c + head->base.index * 0x300, 1); + evo_data(push, asyh->curs.handle); + evo_kick(push, core); + } +} + +void +head907d_core_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x0474 + head->base.index * 0x300, 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +void +head907d_core_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 9))) { + evo_mthd(push, 0x0460 + head->base.index * 0x300, 1); + evo_data(push, asyh->core.offset >> 8); + evo_mthd(push, 0x0468 + head->base.index * 0x300, 4); + evo_data(push, asyh->core.h << 16 | asyh->core.w); + evo_data(push, asyh->core.layout << 24 | + (asyh->core.pitch >> 8) << 8 | + asyh->core.blocks << 8 | + asyh->core.blockh); + evo_data(push, asyh->core.format << 8); + evo_data(push, asyh->core.handle); + evo_mthd(push, 0x04b0 + head->base.index * 0x300, 1); + evo_data(push, asyh->core.y << 16 | asyh->core.x); + evo_kick(push, core); + } +} + +void +head907d_olut_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 4))) { + evo_mthd(push, 0x0448 + (head->base.index * 0x300), 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x045c + (head->base.index * 0x300), 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +void +head907d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 5))) { + evo_mthd(push, 0x0448 + (head->base.index * 0x300), 2); + evo_data(push, 0x80000000 | asyh->olut.mode << 24); + evo_data(push, asyh->olut.offset >> 8); + evo_mthd(push, 0x045c + (head->base.index * 0x300), 1); + evo_data(push, asyh->olut.handle); + evo_kick(push, core); + } +} + +void +head907d_olut(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + asyh->olut.mode = 7; +} + +void +head907d_mode(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + struct nv50_head_mode *m = &asyh->mode; + u32 *push; + if ((push = evo_wait(core, 14))) { + evo_mthd(push, 0x0410 + (head->base.index * 0x300), 6); + evo_data(push, 0x00000000); + evo_data(push, m->v.active << 16 | m->h.active ); + evo_data(push, m->v.synce << 16 | m->h.synce ); + evo_data(push, m->v.blanke << 16 | m->h.blanke ); + evo_data(push, m->v.blanks << 16 | m->h.blanks ); + evo_data(push, m->v.blank2e << 16 | m->v.blank2s); + evo_mthd(push, 0x042c + (head->base.index * 0x300), 2); + evo_data(push, 0x00000000); /* ??? */ + evo_data(push, 0xffffff00); + evo_mthd(push, 0x0450 + (head->base.index * 0x300), 3); + evo_data(push, m->clock * 1000); + evo_data(push, 0x00200000); /* ??? */ + evo_data(push, m->clock * 1000); + evo_kick(push, core); + } +} + +void +head907d_view(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 8))) { + evo_mthd(push, 0x0494 + (head->base.index * 0x300), 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x04b8 + (head->base.index * 0x300), 1); + evo_data(push, asyh->view.iH << 16 | asyh->view.iW); + evo_mthd(push, 0x04c0 + (head->base.index * 0x300), 3); + evo_data(push, asyh->view.oH << 16 | asyh->view.oW); + evo_data(push, asyh->view.oH << 16 | asyh->view.oW); + evo_data(push, asyh->view.oH << 16 | asyh->view.oW); + evo_kick(push, core); + } +} + +const struct nv50_head_func +head907d = { + .view = head907d_view, + .mode = head907d_mode, + .olut = head907d_olut, + .olut_set = head907d_olut_set, + .olut_clr = head907d_olut_clr, + .core_calc = head507d_core_calc, + .core_set = head907d_core_set, + .core_clr = head907d_core_clr, + .curs_layout = head507d_curs_layout, + .curs_format = head507d_curs_format, + .curs_set = head907d_curs_set, + .curs_clr = head907d_curs_clr, + .base = head907d_base, + .ovly = head907d_ovly, + .dither = head907d_dither, + .procamp = head907d_procamp, + .or = head907d_or, +}; diff --git a/drivers/gpu/drm/nouveau/dispnv50/head917d.c b/drivers/gpu/drm/nouveau/dispnv50/head917d.c new file mode 100644 index 000000000000..303df8459ca8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/head917d.c @@ -0,0 +1,100 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "head.h" +#include "core.h" + +static void +head917d_dither(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x04a0 + (head->base.index * 0x0300), 1); + evo_data(push, asyh->dither.mode << 3 | + asyh->dither.bits << 1 | + asyh->dither.enable); + evo_kick(push, core); + } +} + +static void +head917d_base(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 bounds = 0; + u32 *push; + + if (asyh->base.cpp) { + switch (asyh->base.cpp) { + case 8: bounds |= 0x00000500; break; + case 4: bounds |= 0x00000300; break; + case 2: bounds |= 0x00000100; break; + case 1: bounds |= 0x00000000; break; + default: + WARN_ON(1); + break; + } + bounds |= 0x00020001; + } + + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x04d0 + head->base.index * 0x300, 1); + evo_data(push, bounds); + evo_kick(push, core); + } +} + +int +head917d_curs_layout(struct nv50_head *head, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + switch (asyw->state.fb->width) { + case 32: asyh->curs.layout = 0; break; + case 64: asyh->curs.layout = 1; break; + case 128: asyh->curs.layout = 2; break; + case 256: asyh->curs.layout = 3; break; + default: + return -EINVAL; + } + return 0; +} + +const struct nv50_head_func +head917d = { + .view = head907d_view, + .mode = head907d_mode, + .olut = head907d_olut, + .olut_set = head907d_olut_set, + .olut_clr = head907d_olut_clr, + .core_calc = head507d_core_calc, + .core_set = head907d_core_set, + .core_clr = head907d_core_clr, + .curs_layout = head917d_curs_layout, + .curs_format = head507d_curs_format, + .curs_set = head907d_curs_set, + .curs_clr = head907d_curs_clr, + .base = head917d_base, + .ovly = head907d_ovly, + .dither = head917d_dither, + .procamp = head907d_procamp, + .or = head907d_or, +}; diff --git a/drivers/gpu/drm/nouveau/dispnv50/headc37d.c b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c new file mode 100644 index 000000000000..989c14083066 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c @@ -0,0 +1,212 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "head.h" +#include "atom.h" +#include "core.h" + +static void +headc37d_or(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + /*XXX: This is a dirty hack until OR depth handling is + * improved later for deep colour etc. + */ + switch (asyh->or.depth) { + case 6: asyh->or.depth = 5; break; + case 5: asyh->or.depth = 4; break; + case 2: asyh->or.depth = 1; break; + case 0: asyh->or.depth = 4; break; + default: + WARN_ON(1); + break; + } + + evo_mthd(push, 0x2004 + (head->base.index * 0x400), 1); + evo_data(push, 0x00000001 | + asyh->or.depth << 4 | + asyh->or.nvsync << 3 | + asyh->or.nhsync << 2); + evo_kick(push, core); + } +} + +static void +headc37d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x2000 + (head->base.index * 0x400), 1); + evo_data(push, 0x80000000 | + asyh->procamp.sat.sin << 16 | + asyh->procamp.sat.cos << 4); + evo_kick(push, core); + } +} + +static void +headc37d_dither(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x2018 + (head->base.index * 0x0400), 1); + evo_data(push, asyh->dither.mode << 8 | + asyh->dither.bits << 4 | + asyh->dither.enable); + evo_kick(push, core); + } +} + +static void +headc37d_curs_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 4))) { + evo_mthd(push, 0x209c + head->base.index * 0x400, 1); + evo_data(push, 0x000000cf); + evo_mthd(push, 0x2088 + head->base.index * 0x400, 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +static void +headc37d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 7))) { + evo_mthd(push, 0x209c + head->base.index * 0x400, 2); + evo_data(push, 0x80000000 | + asyh->curs.layout << 8 | + asyh->curs.format << 0); + evo_data(push, 0x000072ff); + evo_mthd(push, 0x2088 + head->base.index * 0x400, 1); + evo_data(push, asyh->curs.handle); + evo_mthd(push, 0x2090 + head->base.index * 0x400, 1); + evo_data(push, asyh->curs.offset >> 8); + evo_kick(push, core); + } +} + +static int +headc37d_curs_format(struct nv50_head *head, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + asyh->curs.format = asyw->image.format; + return 0; +} + +static void +headc37d_olut_clr(struct nv50_head *head) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 2))) { + evo_mthd(push, 0x20ac + (head->base.index * 0x400), 1); + evo_data(push, 0x00000000); + evo_kick(push, core); + } +} + +static void +headc37d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 4))) { + evo_mthd(push, 0x20a4 + (head->base.index * 0x400), 3); + evo_data(push, asyh->olut.output_mode << 8 | + asyh->olut.range << 4 | + asyh->olut.size); + evo_data(push, asyh->olut.offset >> 8); + evo_data(push, asyh->olut.handle); + evo_kick(push, core); + } +} + +static void +headc37d_olut(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + asyh->olut.mode = 2; + asyh->olut.size = 0; + asyh->olut.range = 0; + asyh->olut.output_mode = 1; +} + +static void +headc37d_mode(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + struct nv50_head_mode *m = &asyh->mode; + u32 *push; + if ((push = evo_wait(core, 12))) { + evo_mthd(push, 0x2064 + (head->base.index * 0x400), 5); + evo_data(push, (m->v.active << 16) | m->h.active ); + evo_data(push, (m->v.synce << 16) | m->h.synce ); + evo_data(push, (m->v.blanke << 16) | m->h.blanke ); + evo_data(push, (m->v.blanks << 16) | m->h.blanks ); + evo_data(push, (m->v.blank2e << 16) | m->v.blank2s); + evo_mthd(push, 0x200c + (head->base.index * 0x400), 1); + evo_data(push, m->clock * 1000); + evo_mthd(push, 0x2028 + (head->base.index * 0x400), 1); + evo_data(push, m->clock * 1000); + /*XXX: HEAD_USAGE_BOUNDS, doesn't belong here. */ + evo_mthd(push, 0x2030 + (head->base.index * 0x400), 1); + evo_data(push, 0x00000124); + evo_kick(push, core); + } +} + +static void +headc37d_view(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan; + u32 *push; + if ((push = evo_wait(core, 4))) { + evo_mthd(push, 0x204c + (head->base.index * 0x400), 1); + evo_data(push, (asyh->view.iH << 16) | asyh->view.iW); + evo_mthd(push, 0x2058 + (head->base.index * 0x400), 1); + evo_data(push, (asyh->view.oH << 16) | asyh->view.oW); + evo_kick(push, core); + } +} + +const struct nv50_head_func +headc37d = { + .view = headc37d_view, + .mode = headc37d_mode, + .olut = headc37d_olut, + .olut_set = headc37d_olut_set, + .olut_clr = headc37d_olut_clr, + .curs_layout = head917d_curs_layout, + .curs_format = headc37d_curs_format, + .curs_set = headc37d_curs_set, + .curs_clr = headc37d_curs_clr, + .dither = headc37d_dither, + .procamp = headc37d_procamp, + .or = headc37d_or, +}; diff --git a/drivers/gpu/drm/nouveau/dispnv50/lut.c b/drivers/gpu/drm/nouveau/dispnv50/lut.c new file mode 100644 index 000000000000..a6b96ae2a22f --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/lut.c @@ -0,0 +1,95 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "lut.h" +#include "disp.h" + +#include <drm/drm_color_mgmt.h> +#include <drm/drm_mode.h> +#include <drm/drm_property.h> + +#include <nvif/class.h> + +u32 +nv50_lut_load(struct nv50_lut *lut, bool legacy, int buffer, + struct drm_property_blob *blob) +{ + struct drm_color_lut *in = (struct drm_color_lut *)blob->data; + void __iomem *mem = lut->mem[buffer].object.map.ptr; + const int size = blob->length / sizeof(*in); + int bits, shift, i; + u16 zero, r, g, b; + u32 addr = lut->mem[buffer].addr; + + /* This can't happen.. But it shuts the compiler up. */ + if (WARN_ON(size != 256)) + return 0; + + if (legacy) { + bits = 11; + shift = 3; + zero = 0x0000; + } else { + bits = 14; + shift = 0; + zero = 0x6000; + } + + for (i = 0; i < size; i++) { + r = (drm_color_lut_extract(in[i]. red, bits) + zero) << shift; + g = (drm_color_lut_extract(in[i].green, bits) + zero) << shift; + b = (drm_color_lut_extract(in[i]. blue, bits) + zero) << shift; + writew(r, mem + (i * 0x08) + 0); + writew(g, mem + (i * 0x08) + 2); + writew(b, mem + (i * 0x08) + 4); + } + + /* INTERPOLATE modes require a "next" entry to interpolate with, + * so we replicate the last entry to deal with this for now. + */ + writew(r, mem + (i * 0x08) + 0); + writew(g, mem + (i * 0x08) + 2); + writew(b, mem + (i * 0x08) + 4); + return addr; +} + +void +nv50_lut_fini(struct nv50_lut *lut) +{ + int i; + for (i = 0; i < ARRAY_SIZE(lut->mem); i++) + nvif_mem_fini(&lut->mem[i]); +} + +int +nv50_lut_init(struct nv50_disp *disp, struct nvif_mmu *mmu, + struct nv50_lut *lut) +{ + const u32 size = disp->disp->object.oclass < GF110_DISP ? 257 : 1025; + int i; + for (i = 0; i < ARRAY_SIZE(lut->mem); i++) { + int ret = nvif_mem_init_map(mmu, NVIF_MEM_VRAM, size * 8, + &lut->mem[i]); + if (ret) + return ret; + } + return 0; +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/lut.h b/drivers/gpu/drm/nouveau/dispnv50/lut.h new file mode 100644 index 000000000000..6d7b8352e4cb --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/lut.h @@ -0,0 +1,15 @@ +#ifndef __NV50_KMS_LUT_H__ +#define __NV50_KMS_LUT_H__ +#include <nvif/mem.h> +struct drm_property_blob; +struct nv50_disp; + +struct nv50_lut { + struct nvif_mem mem[2]; +}; + +int nv50_lut_init(struct nv50_disp *, struct nvif_mmu *, struct nv50_lut *); +void nv50_lut_fini(struct nv50_lut *); +u32 nv50_lut_load(struct nv50_lut *, bool legacy, int buffer, + struct drm_property_blob *); +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/oimm.c b/drivers/gpu/drm/nouveau/dispnv50/oimm.c new file mode 100644 index 000000000000..2a2841d344c8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/oimm.c @@ -0,0 +1,51 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "oimm.h" + +#include <nvif/class.h> + +int +nv50_oimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw) +{ + static const struct { + s32 oclass; + int version; + int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *); + } oimms[] = { + { GK104_DISP_OVERLAY, 0, oimm507b_init }, + { GF110_DISP_OVERLAY, 0, oimm507b_init }, + { GT214_DISP_OVERLAY, 0, oimm507b_init }, + { G82_DISP_OVERLAY, 0, oimm507b_init }, + { NV50_DISP_OVERLAY, 0, oimm507b_init }, + {} + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int cid; + + cid = nvif_mclass(&disp->disp->object, oimms); + if (cid < 0) { + NV_ERROR(drm, "No supported overlay immediate class\n"); + return cid; + } + + return oimms[cid].init(drm, oimms[cid].oclass, wndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/oimm.h b/drivers/gpu/drm/nouveau/dispnv50/oimm.h new file mode 100644 index 000000000000..6fa51f101e94 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/oimm.h @@ -0,0 +1,8 @@ +#ifndef __NV50_KMS_OIMM_H__ +#define __NV50_KMS_OIMM_H__ +#include "wndw.h" + +int oimm507b_init(struct nouveau_drm *, s32, struct nv50_wndw *); + +int nv50_oimm_init(struct nouveau_drm *, struct nv50_wndw *); +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/oimm507b.c b/drivers/gpu/drm/nouveau/dispnv50/oimm507b.c new file mode 100644 index 000000000000..2ee404b3e19f --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/oimm507b.c @@ -0,0 +1,52 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "oimm.h" + +#include <nvif/cl507b.h> + +static int +oimm507b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, + s32 oclass, struct nv50_wndw *wndw) +{ + struct nv50_disp_overlay_v0 args = { + .head = wndw->id, + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int ret; + + ret = nvif_object_init(&disp->disp->object, 0, oclass, &args, + sizeof(args), &wndw->wimm.base.user); + if (ret) { + NV_ERROR(drm, "oimm%04x allocation failed: %d\n", oclass, ret); + return ret; + } + + nvif_object_map(&wndw->wimm.base.user, NULL, 0); + wndw->immd = func; + return 0; +} + +int +oimm507b_init(struct nouveau_drm *drm, s32 oclass, struct nv50_wndw *wndw) +{ + return oimm507b_init_(&curs507a, drm, oclass, wndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly.c b/drivers/gpu/drm/nouveau/dispnv50/ovly.c new file mode 100644 index 000000000000..90c246d47604 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/ovly.c @@ -0,0 +1,57 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ovly.h" +#include "oimm.h" + +#include <nvif/class.h> + +int +nv50_ovly_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw) +{ + static const struct { + s32 oclass; + int version; + int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **); + } ovlys[] = { + { GK104_DISP_OVERLAY_CONTROL_DMA, 0, ovly917e_new }, + { GF110_DISP_OVERLAY_CONTROL_DMA, 0, ovly907e_new }, + { GT214_DISP_OVERLAY_CHANNEL_DMA, 0, ovly827e_new }, + { GT200_DISP_OVERLAY_CHANNEL_DMA, 0, ovly827e_new }, + { G82_DISP_OVERLAY_CHANNEL_DMA, 0, ovly827e_new }, + { NV50_DISP_OVERLAY_CHANNEL_DMA, 0, ovly507e_new }, + {} + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int cid, ret; + + cid = nvif_mclass(&disp->disp->object, ovlys); + if (cid < 0) { + NV_ERROR(drm, "No supported overlay class\n"); + return cid; + } + + ret = ovlys[cid].new(drm, head, ovlys[cid].oclass, pwndw); + if (ret) + return ret; + + return nv50_oimm_init(drm, *pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly.h b/drivers/gpu/drm/nouveau/dispnv50/ovly.h new file mode 100644 index 000000000000..4869d52d1786 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/ovly.h @@ -0,0 +1,30 @@ +#ifndef __NV50_KMS_OVLY_H__ +#define __NV50_KMS_OVLY_H__ +#include "wndw.h" + +int ovly507e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); +int ovly507e_new_(const struct nv50_wndw_func *, const u32 *format, + struct nouveau_drm *, int head, s32 oclass, + u32 interlock_data, struct nv50_wndw **); +int ovly507e_acquire(struct nv50_wndw *, struct nv50_wndw_atom *, + struct nv50_head_atom *); +void ovly507e_release(struct nv50_wndw *, struct nv50_wndw_atom *, + struct nv50_head_atom *); +void ovly507e_ntfy_set(struct nv50_wndw *, struct nv50_wndw_atom *); +void ovly507e_ntfy_clr(struct nv50_wndw *); +void ovly507e_image_clr(struct nv50_wndw *); +void ovly507e_scale_set(struct nv50_wndw *, struct nv50_wndw_atom *); +void ovly507e_update(struct nv50_wndw *, u32 *); + +extern const u32 ovly827e_format[]; +void ovly827e_ntfy_reset(struct nouveau_bo *, u32); +int ovly827e_ntfy_wait_begun(struct nouveau_bo *, u32, struct nvif_device *); + +extern const struct nv50_wndw_func ovly907e; + +int ovly827e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); +int ovly907e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); +int ovly917e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **); + +int nv50_ovly_new(struct nouveau_drm *, int head, struct nv50_wndw **); +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly507e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly507e.c new file mode 100644 index 000000000000..cc417664f823 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/ovly507e.c @@ -0,0 +1,217 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ovly.h" +#include "atom.h" + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> + +#include <nvif/cl507e.h> +#include <nvif/event.h> + +void +ovly507e_update(struct nv50_wndw *wndw, u32 *interlock) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x0080, 1); + evo_data(push, interlock[NV50_DISP_INTERLOCK_CORE]); + evo_kick(push, &wndw->wndw); + } +} + +void +ovly507e_scale_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 4))) { + evo_mthd(push, 0x00e0, 3); + evo_data(push, asyw->scale.sy << 16 | asyw->scale.sx); + evo_data(push, asyw->scale.sh << 16 | asyw->scale.sw); + evo_data(push, asyw->scale.dw); + evo_kick(push, &wndw->wndw); + } +} + +void +ovly507e_image_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 4))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x00c0, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +static void +ovly507e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 12))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, asyw->image.interval << 4); + evo_mthd(push, 0x00c0, 1); + evo_data(push, asyw->image.handle[0]); + evo_mthd(push, 0x0100, 1); + evo_data(push, 0x00000002); + evo_mthd(push, 0x0800, 1); + evo_data(push, asyw->image.offset[0] >> 8); + evo_mthd(push, 0x0808, 3); + evo_data(push, asyw->image.h << 16 | asyw->image.w); + evo_data(push, asyw->image.layout << 20 | + (asyw->image.pitch[0] >> 8) << 8 | + asyw->image.blocks[0] << 8 | + asyw->image.blockh); + evo_data(push, asyw->image.kind << 16 | + asyw->image.format << 8 | + asyw->image.colorspace); + evo_kick(push, &wndw->wndw); + } +} + +void +ovly507e_ntfy_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x00a4, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +void +ovly507e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 3))) { + evo_mthd(push, 0x00a0, 2); + evo_data(push, asyw->ntfy.awaken << 30 | asyw->ntfy.offset); + evo_data(push, asyw->ntfy.handle); + evo_kick(push, &wndw->wndw); + } +} + +void +ovly507e_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + asyh->ovly.cpp = 0; +} + +int +ovly507e_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + const struct drm_framebuffer *fb = asyw->state.fb; + int ret; + + ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); + if (ret) + return ret; + + asyh->ovly.cpp = fb->format->cpp[0]; + return 0; +} + +#include "nouveau_bo.h" + +static const struct nv50_wndw_func +ovly507e = { + .acquire = ovly507e_acquire, + .release = ovly507e_release, + .ntfy_set = ovly507e_ntfy_set, + .ntfy_clr = ovly507e_ntfy_clr, + .ntfy_reset = base507c_ntfy_reset, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .image_set = ovly507e_image_set, + .image_clr = ovly507e_image_clr, + .scale_set = ovly507e_scale_set, + .update = ovly507e_update, +}; + +static const u32 +ovly507e_format[] = { + DRM_FORMAT_YUYV, + DRM_FORMAT_UYVY, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, + 0 +}; + +int +ovly507e_new_(const struct nv50_wndw_func *func, const u32 *format, + struct nouveau_drm *drm, int head, s32 oclass, u32 interlock_data, + struct nv50_wndw **pwndw) +{ + struct nv50_disp_overlay_channel_dma_v0 args = { + .head = head, + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + struct nv50_wndw *wndw; + int ret; + + ret = nv50_wndw_new_(func, drm->dev, DRM_PLANE_TYPE_OVERLAY, + "ovly", head, format, BIT(head), + NV50_DISP_INTERLOCK_OVLY, interlock_data, + &wndw); + if (*pwndw = wndw, ret) + return ret; + + ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, + &oclass, 0, &args, sizeof(args), + disp->sync->bo.offset, &wndw->wndw); + if (ret) { + NV_ERROR(drm, "ovly%04x allocation failed: %d\n", oclass, ret); + return ret; + } + + ret = nvif_notify_init(&wndw->wndw.base.user, wndw->notify.func, false, + NV50_DISP_OVERLAY_CHANNEL_DMA_V0_NTFY_UEVENT, + &(struct nvif_notify_uevent_req) {}, + sizeof(struct nvif_notify_uevent_req), + sizeof(struct nvif_notify_uevent_rep), + &wndw->notify); + if (ret) + return ret; + + wndw->ntfy = NV50_DISP_OVLY_NTFY(wndw->id); + wndw->sema = NV50_DISP_OVLY_SEM0(wndw->id); + wndw->data = 0x00000000; + return 0; +} + +int +ovly507e_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return ovly507e_new_(&ovly507e, ovly507e_format, drm, head, oclass, + 0x00000004 << (head * 8), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c new file mode 100644 index 000000000000..aaa9fe5a4fc8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c @@ -0,0 +1,107 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ovly.h" +#include "atom.h" + +#include <nouveau_bo.h> + +static void +ovly827e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 12))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, asyw->image.interval << 4); + evo_mthd(push, 0x00c0, 1); + evo_data(push, asyw->image.handle[0]); + evo_mthd(push, 0x0100, 1); + evo_data(push, 0x00000002); + evo_mthd(push, 0x0800, 1); + evo_data(push, asyw->image.offset[0] >> 8); + evo_mthd(push, 0x0808, 3); + evo_data(push, asyw->image.h << 16 | asyw->image.w); + evo_data(push, asyw->image.layout << 20 | + (asyw->image.pitch[0] >> 8) << 8 | + asyw->image.blocks[0] << 8 | + asyw->image.blockh); + evo_data(push, asyw->image.format << 8 | + asyw->image.colorspace); + evo_kick(push, &wndw->wndw); + } +} + +int +ovly827e_ntfy_wait_begun(struct nouveau_bo *bo, u32 offset, + struct nvif_device *device) +{ + s64 time = nvif_msec(device, 2000ULL, + u32 data = nouveau_bo_rd32(bo, offset / 4 + 3); + if ((data & 0xffff0000) == 0xffff0000) + break; + usleep_range(1, 2); + ); + return time < 0 ? time : 0; +} + +void +ovly827e_ntfy_reset(struct nouveau_bo *bo, u32 offset) +{ + nouveau_bo_wr32(bo, offset / 4 + 0, 0x00000000); + nouveau_bo_wr32(bo, offset / 4 + 1, 0x00000000); + nouveau_bo_wr32(bo, offset / 4 + 2, 0x00000000); + nouveau_bo_wr32(bo, offset / 4 + 3, 0x80000000); +} + +static const struct nv50_wndw_func +ovly827e = { + .acquire = ovly507e_acquire, + .release = ovly507e_release, + .ntfy_set = ovly507e_ntfy_set, + .ntfy_clr = ovly507e_ntfy_clr, + .ntfy_reset = ovly827e_ntfy_reset, + .ntfy_wait_begun = ovly827e_ntfy_wait_begun, + .image_set = ovly827e_image_set, + .image_clr = ovly507e_image_clr, + .scale_set = ovly507e_scale_set, + .update = ovly507e_update, +}; + +const u32 +ovly827e_format[] = { + DRM_FORMAT_YUYV, + DRM_FORMAT_UYVY, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ABGR2101010, + 0 +}; + +int +ovly827e_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return ovly507e_new_(&ovly827e, ovly827e_format, drm, head, oclass, + 0x00000004 << (head * 8), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly907e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly907e.c new file mode 100644 index 000000000000..a3ce53046015 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/ovly907e.c @@ -0,0 +1,70 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ovly.h" +#include "atom.h" + +static void +ovly907e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 12))) { + evo_mthd(push, 0x0084, 1); + evo_data(push, asyw->image.interval << 4); + evo_mthd(push, 0x00c0, 1); + evo_data(push, asyw->image.handle[0]); + evo_mthd(push, 0x0100, 1); + evo_data(push, 0x00000002); + evo_mthd(push, 0x0400, 1); + evo_data(push, asyw->image.offset[0] >> 8); + evo_mthd(push, 0x0408, 3); + evo_data(push, asyw->image.h << 16 | asyw->image.w); + evo_data(push, asyw->image.layout << 24 | + (asyw->image.pitch[0] >> 8) << 8 | + asyw->image.blocks[0] << 8 | + asyw->image.blockh); + evo_data(push, asyw->image.format << 8 | + asyw->image.colorspace); + evo_kick(push, &wndw->wndw); + } +} + +const struct nv50_wndw_func +ovly907e = { + .acquire = ovly507e_acquire, + .release = ovly507e_release, + .ntfy_set = ovly507e_ntfy_set, + .ntfy_clr = ovly507e_ntfy_clr, + .ntfy_reset = ovly827e_ntfy_reset, + .ntfy_wait_begun = ovly827e_ntfy_wait_begun, + .image_set = ovly907e_image_set, + .image_clr = ovly507e_image_clr, + .scale_set = ovly507e_scale_set, + .update = ovly507e_update, +}; + +int +ovly907e_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return ovly507e_new_(&ovly907e, ovly827e_format, drm, head, oclass, + 0x00000004 << (head * 4), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly917e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly917e.c new file mode 100644 index 000000000000..505fa7e78523 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/ovly917e.c @@ -0,0 +1,45 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ovly.h" + +static const u32 +ovly917e_format[] = { + DRM_FORMAT_YUYV, + DRM_FORMAT_UYVY, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, + 0 +}; + +int +ovly917e_new(struct nouveau_drm *drm, int head, s32 oclass, + struct nv50_wndw **pwndw) +{ + return ovly507e_new_(&ovly907e, ovly917e_format, drm, head, oclass, + 0x00000004 << (head * 4), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/pior507d.c b/drivers/gpu/drm/nouveau/dispnv50/pior507d.c new file mode 100644 index 000000000000..d2bac6a341dc --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/pior507d.c @@ -0,0 +1,44 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "core.h" + +static void +pior507d_ctrl(struct nv50_core *core, int or, u32 ctrl, + struct nv50_head_atom *asyh) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 2))) { + if (asyh) { + ctrl |= asyh->or.depth << 16; + ctrl |= asyh->or.nvsync << 13; + ctrl |= asyh->or.nhsync << 12; + } + evo_mthd(push, 0x0700 + (or * 0x040), 1); + evo_data(push, ctrl); + evo_kick(push, &core->chan); + } +} + +const struct nv50_outp_func +pior507d = { + .ctrl = pior507d_ctrl, +}; diff --git a/drivers/gpu/drm/nouveau/dispnv50/sor507d.c b/drivers/gpu/drm/nouveau/dispnv50/sor507d.c new file mode 100644 index 000000000000..5222fe6a9b21 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/sor507d.c @@ -0,0 +1,44 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "core.h" + +static void +sor507d_ctrl(struct nv50_core *core, int or, u32 ctrl, + struct nv50_head_atom *asyh) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 2))) { + if (asyh) { + ctrl |= asyh->or.depth << 16; + ctrl |= asyh->or.nvsync << 13; + ctrl |= asyh->or.nhsync << 12; + } + evo_mthd(push, 0x0600 + (or * 0x40), 1); + evo_data(push, ctrl); + evo_kick(push, &core->chan); + } +} + +const struct nv50_outp_func +sor507d = { + .ctrl = sor507d_ctrl, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt200.c b/drivers/gpu/drm/nouveau/dispnv50/sor907d.c index b234547708fc..b0314ec11fb3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt200.c +++ b/drivers/gpu/drm/nouveau/dispnv50/sor907d.c @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,21 +18,24 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "core.h" #include <nvif/class.h> -const struct nv50_disp_dmac_oclass -gt200_disp_core_oclass = { - .base.oclass = GT200_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &nv50_disp_core_func, - .mthd = &g84_disp_core_chan_mthd, - .chid = 0, +static void +sor907d_ctrl(struct nv50_core *core, int or, u32 ctrl, + struct nv50_head_atom *asyh) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 2))) { + evo_mthd(push, 0x0200 + (or * 0x20), 1); + evo_data(push, ctrl); + evo_kick(push, &core->chan); + } +} + +const struct nv50_outp_func +sor907d = { + .ctrl = sor907d_ctrl, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c b/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c index 2a99db4bf8f8..dff059241c5d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c +++ b/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c @@ -1,5 +1,5 @@ /* - * Copyright 2012 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,20 +18,22 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs */ -#include "channv50.h" -#include "rootnv50.h" +#include "core.h" -#include <nvif/class.h> +static void +sorc37d_ctrl(struct nv50_core *core, int or, u32 ctrl, + struct nv50_head_atom *asyh) +{ + u32 *push; + if ((push = evo_wait(&core->chan, 2))) { + evo_mthd(push, 0x0300 + (or * 0x20), 1); + evo_data(push, ctrl); + evo_kick(push, &core->chan); + } +} -const struct nv50_disp_pioc_oclass -gk104_disp_curs_oclass = { - .base.oclass = GK104_DISP_CURSOR, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_curs_new, - .func = &gf119_disp_pioc_func, - .chid = { 13, 13 }, +const struct nv50_outp_func +sorc37d = { + .ctrl = sorc37d_ctrl, }; diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c new file mode 100644 index 000000000000..fc36e0696407 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c @@ -0,0 +1,47 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wimm.h" + +#include <nvif/class.h> + +int +nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw) +{ + struct { + s32 oclass; + int version; + int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *); + } wimms[] = { + { GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, + {} + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int cid; + + cid = nvif_mclass(&disp->disp->object, wimms); + if (cid < 0) { + NV_ERROR(drm, "No supported window immediate class\n"); + return cid; + } + + return wimms[cid].init(drm, wimms[cid].oclass, wndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.h b/drivers/gpu/drm/nouveau/dispnv50/wimm.h new file mode 100644 index 000000000000..363052309be9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.h @@ -0,0 +1,8 @@ +#ifndef __NV50_KMS_WIMM_H__ +#define __NV50_KMS_WIMM_H__ +#include "wndw.h" + +int nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *); + +int wimmc37b_init(struct nouveau_drm *, s32, struct nv50_wndw *); +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c new file mode 100644 index 000000000000..9103b8494279 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c @@ -0,0 +1,86 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wimm.h" +#include "atom.h" +#include "wndw.h" + +#include <nvif/clc37b.h> + +static void +wimmc37b_update(struct nv50_wndw *wndw, u32 *interlock) +{ + u32 *push; + if ((push = evo_wait(&wndw->wimm, 2))) { + evo_mthd(push, 0x0200, 1); + if (interlock[NV50_DISP_INTERLOCK_WNDW] & wndw->interlock.data) + evo_data(push, 0x00000003); + else + evo_data(push, 0x00000001); + evo_kick(push, &wndw->wimm); + } +} + +static void +wimmc37b_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wimm, 2))) { + evo_mthd(push, 0x0208, 1); + evo_data(push, asyw->point.y << 16 | asyw->point.x); + evo_kick(push, &wndw->wimm); + } +} + +static const struct nv50_wimm_func +wimmc37b = { + .point = wimmc37b_point, + .update = wimmc37b_update, +}; + +static int +wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, + s32 oclass, struct nv50_wndw *wndw) +{ + struct nvc37b_window_imm_channel_dma_v0 args = { + .pushbuf = 0xb0007b00 | wndw->id, + .index = wndw->id, + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int ret; + + ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, + &oclass, 0, &args, sizeof(args), 0, + &wndw->wimm); + if (ret) { + NV_ERROR(drm, "wimm%04x allocation failed: %d\n", oclass, ret); + return ret; + } + + wndw->immd = func; + return 0; +} + +int +wimmc37b_init(struct nouveau_drm *drm, s32 oclass, struct nv50_wndw *wndw) +{ + return wimmc37b_init_(&wimmc37b, drm, oclass, wndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c new file mode 100644 index 000000000000..224963b533a6 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -0,0 +1,641 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wndw.h" +#include "wimm.h" + +#include <nvif/class.h> +#include <nvif/cl0002.h> + +#include <drm/drm_atomic_helper.h> +#include "nouveau_bo.h" + +static void +nv50_wndw_ctxdma_del(struct nv50_wndw_ctxdma *ctxdma) +{ + nvif_object_fini(&ctxdma->object); + list_del(&ctxdma->head); + kfree(ctxdma); +} + +static struct nv50_wndw_ctxdma * +nv50_wndw_ctxdma_new(struct nv50_wndw *wndw, struct nouveau_framebuffer *fb) +{ + struct nouveau_drm *drm = nouveau_drm(fb->base.dev); + struct nv50_wndw_ctxdma *ctxdma; + const u8 kind = fb->nvbo->kind; + const u32 handle = 0xfb000000 | kind; + struct { + struct nv_dma_v0 base; + union { + struct nv50_dma_v0 nv50; + struct gf100_dma_v0 gf100; + struct gf119_dma_v0 gf119; + }; + } args = {}; + u32 argc = sizeof(args.base); + int ret; + + list_for_each_entry(ctxdma, &wndw->ctxdma.list, head) { + if (ctxdma->object.handle == handle) + return ctxdma; + } + + if (!(ctxdma = kzalloc(sizeof(*ctxdma), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + list_add(&ctxdma->head, &wndw->ctxdma.list); + + args.base.target = NV_DMA_V0_TARGET_VRAM; + args.base.access = NV_DMA_V0_ACCESS_RDWR; + args.base.start = 0; + args.base.limit = drm->client.device.info.ram_user - 1; + + if (drm->client.device.info.chipset < 0x80) { + args.nv50.part = NV50_DMA_V0_PART_256; + argc += sizeof(args.nv50); + } else + if (drm->client.device.info.chipset < 0xc0) { + args.nv50.part = NV50_DMA_V0_PART_256; + args.nv50.kind = kind; + argc += sizeof(args.nv50); + } else + if (drm->client.device.info.chipset < 0xd0) { + args.gf100.kind = kind; + argc += sizeof(args.gf100); + } else { + args.gf119.page = GF119_DMA_V0_PAGE_LP; + args.gf119.kind = kind; + argc += sizeof(args.gf119); + } + + ret = nvif_object_init(wndw->ctxdma.parent, handle, NV_DMA_IN_MEMORY, + &args, argc, &ctxdma->object); + if (ret) { + nv50_wndw_ctxdma_del(ctxdma); + return ERR_PTR(ret); + } + + return ctxdma; +} + +int +nv50_wndw_wait_armed(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + struct nv50_disp *disp = nv50_disp(wndw->plane.dev); + if (asyw->set.ntfy) { + return wndw->func->ntfy_wait_begun(disp->sync, + asyw->ntfy.offset, + wndw->wndw.base.device); + } + return 0; +} + +void +nv50_wndw_flush_clr(struct nv50_wndw *wndw, u32 *interlock, bool flush, + struct nv50_wndw_atom *asyw) +{ + union nv50_wndw_atom_mask clr = { + .mask = asyw->clr.mask & ~(flush ? 0 : asyw->set.mask), + }; + if (clr.sema ) wndw->func-> sema_clr(wndw); + if (clr.ntfy ) wndw->func-> ntfy_clr(wndw); + if (clr.xlut ) wndw->func-> xlut_clr(wndw); + if (clr.image) wndw->func->image_clr(wndw); + + interlock[wndw->interlock.type] |= wndw->interlock.data; +} + +void +nv50_wndw_flush_set(struct nv50_wndw *wndw, u32 *interlock, + struct nv50_wndw_atom *asyw) +{ + if (interlock) { + asyw->image.mode = 0; + asyw->image.interval = 1; + } + + if (asyw->set.sema ) wndw->func->sema_set (wndw, asyw); + if (asyw->set.ntfy ) wndw->func->ntfy_set (wndw, asyw); + if (asyw->set.image) wndw->func->image_set(wndw, asyw); + + if (asyw->set.xlut ) { + if (asyw->ilut) { + asyw->xlut.i.offset = + nv50_lut_load(&wndw->ilut, + asyw->xlut.i.mode <= 1, + asyw->xlut.i.buffer, + asyw->ilut); + } + wndw->func->xlut_set(wndw, asyw); + } + + if (asyw->set.scale) wndw->func->scale_set(wndw, asyw); + if (asyw->set.point) { + if (asyw->set.point = false, asyw->set.mask) + interlock[wndw->interlock.type] |= wndw->interlock.data; + interlock[NV50_DISP_INTERLOCK_WIMM] |= wndw->interlock.data; + + wndw->immd->point(wndw, asyw); + wndw->immd->update(wndw, interlock); + } else { + interlock[wndw->interlock.type] |= wndw->interlock.data; + } +} + +void +nv50_wndw_ntfy_enable(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + struct nv50_disp *disp = nv50_disp(wndw->plane.dev); + + asyw->ntfy.handle = wndw->wndw.sync.handle; + asyw->ntfy.offset = wndw->ntfy; + asyw->ntfy.awaken = false; + asyw->set.ntfy = true; + + wndw->func->ntfy_reset(disp->sync, wndw->ntfy); + wndw->ntfy ^= 0x10; +} + +static void +nv50_wndw_atomic_check_release(struct nv50_wndw *wndw, + struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev); + NV_ATOMIC(drm, "%s release\n", wndw->plane.name); + wndw->func->release(wndw, asyw, asyh); + asyw->ntfy.handle = 0; + asyw->sema.handle = 0; +} + +static int +nv50_wndw_atomic_check_acquire_yuv(struct nv50_wndw_atom *asyw) +{ + switch (asyw->state.fb->format->format) { + case DRM_FORMAT_YUYV: asyw->image.format = 0x28; break; + case DRM_FORMAT_UYVY: asyw->image.format = 0x29; break; + default: + WARN_ON(1); + return -EINVAL; + } + asyw->image.colorspace = 1; + return 0; +} + +static int +nv50_wndw_atomic_check_acquire_rgb(struct nv50_wndw_atom *asyw) +{ + switch (asyw->state.fb->format->format) { + case DRM_FORMAT_C8 : asyw->image.format = 0x1e; break; + case DRM_FORMAT_XRGB8888 : + case DRM_FORMAT_ARGB8888 : asyw->image.format = 0xcf; break; + case DRM_FORMAT_RGB565 : asyw->image.format = 0xe8; break; + case DRM_FORMAT_XRGB1555 : + case DRM_FORMAT_ARGB1555 : asyw->image.format = 0xe9; break; + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ABGR2101010: asyw->image.format = 0xd1; break; + case DRM_FORMAT_XBGR8888 : + case DRM_FORMAT_ABGR8888 : asyw->image.format = 0xd5; break; + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: asyw->image.format = 0xdf; break; + default: + return -EINVAL; + } + asyw->image.colorspace = 0; + return 0; +} + +static int +nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset, + struct nv50_wndw_atom *armw, + struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb); + struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev); + int ret; + + NV_ATOMIC(drm, "%s acquire\n", wndw->plane.name); + + if (asyw->state.fb != armw->state.fb || !armw->visible || modeset) { + asyw->image.w = fb->base.width; + asyw->image.h = fb->base.height; + asyw->image.kind = fb->nvbo->kind; + + ret = nv50_wndw_atomic_check_acquire_rgb(asyw); + if (ret) { + ret = nv50_wndw_atomic_check_acquire_yuv(asyw); + if (ret) + return ret; + } + + if (asyw->image.kind) { + asyw->image.layout = 0; + if (drm->client.device.info.chipset >= 0xc0) + asyw->image.blockh = fb->nvbo->mode >> 4; + else + asyw->image.blockh = fb->nvbo->mode; + asyw->image.blocks[0] = fb->base.pitches[0] / 64; + asyw->image.pitch[0] = 0; + } else { + asyw->image.layout = 1; + asyw->image.blockh = 0; + asyw->image.blocks[0] = 0; + asyw->image.pitch[0] = fb->base.pitches[0]; + } + + if (!(asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)) + asyw->image.interval = 1; + else + asyw->image.interval = 0; + asyw->image.mode = asyw->image.interval ? 0 : 1; + asyw->set.image = wndw->func->image_set != NULL; + } + + if (wndw->func->scale_set) { + asyw->scale.sx = asyw->state.src_x >> 16; + asyw->scale.sy = asyw->state.src_y >> 16; + asyw->scale.sw = asyw->state.src_w >> 16; + asyw->scale.sh = asyw->state.src_h >> 16; + asyw->scale.dw = asyw->state.crtc_w; + asyw->scale.dh = asyw->state.crtc_h; + if (memcmp(&armw->scale, &asyw->scale, sizeof(asyw->scale))) + asyw->set.scale = true; + } + + if (wndw->immd) { + asyw->point.x = asyw->state.crtc_x; + asyw->point.y = asyw->state.crtc_y; + if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point))) + asyw->set.point = true; + } + + return wndw->func->acquire(wndw, asyw, asyh); +} + +static void +nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw, + struct nv50_wndw_atom *armw, + struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + struct drm_property_blob *ilut = asyh->state.degamma_lut; + + /* I8 format without an input LUT makes no sense, and the + * HW error-checks for this. + * + * In order to handle legacy gamma, when there's no input + * LUT we need to steal the output LUT and use it instead. + */ + if (!ilut && asyw->state.fb->format->format == DRM_FORMAT_C8) { + /* This should be an error, but there's legacy clients + * that do a modeset before providing a gamma table. + * + * We keep the window disabled to avoid angering HW. + */ + if (!(ilut = asyh->state.gamma_lut)) { + asyw->visible = false; + return; + } + + if (wndw->func->ilut) + asyh->wndw.olut |= BIT(wndw->id); + } else { + asyh->wndw.olut &= ~BIT(wndw->id); + } + + /* Recalculate LUT state. */ + memset(&asyw->xlut, 0x00, sizeof(asyw->xlut)); + if ((asyw->ilut = wndw->func->ilut ? ilut : NULL)) { + wndw->func->ilut(wndw, asyw); + asyw->xlut.handle = wndw->wndw.vram.handle; + asyw->xlut.i.buffer = !asyw->xlut.i.buffer; + asyw->set.xlut = true; + } + + /* Handle setting base SET_OUTPUT_LUT_LO_ENABLE_USE_CORE_LUT. */ + if (wndw->func->olut_core && + (!armw->visible || (armw->xlut.handle && !asyw->xlut.handle))) + asyw->set.xlut = true; + + /* Can't do an immediate flip while changing the LUT. */ + asyh->state.pageflip_flags &= ~DRM_MODE_PAGE_FLIP_ASYNC; +} + +static int +nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) +{ + struct nouveau_drm *drm = nouveau_drm(plane->dev); + struct nv50_wndw *wndw = nv50_wndw(plane); + struct nv50_wndw_atom *armw = nv50_wndw_atom(wndw->plane.state); + struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); + struct nv50_head_atom *harm = NULL, *asyh = NULL; + bool modeset = false; + int ret; + + NV_ATOMIC(drm, "%s atomic_check\n", plane->name); + + /* Fetch the assembly state for the head the window will belong to, + * and determine whether the window will be visible. + */ + if (asyw->state.crtc) { + asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc); + if (IS_ERR(asyh)) + return PTR_ERR(asyh); + modeset = drm_atomic_crtc_needs_modeset(&asyh->state); + asyw->visible = asyh->state.active; + } else { + asyw->visible = false; + } + + /* Fetch assembly state for the head the window used to belong to. */ + if (armw->state.crtc) { + harm = nv50_head_atom_get(asyw->state.state, armw->state.crtc); + if (IS_ERR(harm)) + return PTR_ERR(harm); + } + + /* LUT configuration can potentially cause the window to be disabled. */ + if (asyw->visible && wndw->func->xlut_set && + (!armw->visible || + asyh->state.color_mgmt_changed || + asyw->state.fb->format->format != + armw->state.fb->format->format)) + nv50_wndw_atomic_check_lut(wndw, armw, asyw, asyh); + + /* Calculate new window state. */ + if (asyw->visible) { + ret = nv50_wndw_atomic_check_acquire(wndw, modeset, + armw, asyw, asyh); + if (ret) + return ret; + + asyh->wndw.mask |= BIT(wndw->id); + } else + if (armw->visible) { + nv50_wndw_atomic_check_release(wndw, asyw, harm); + harm->wndw.mask &= ~BIT(wndw->id); + } else { + return 0; + } + + /* Aside from the obvious case where the window is actively being + * disabled, we might also need to temporarily disable the window + * when performing certain modeset operations. + */ + if (!asyw->visible || modeset) { + asyw->clr.ntfy = armw->ntfy.handle != 0; + asyw->clr.sema = armw->sema.handle != 0; + asyw->clr.xlut = armw->xlut.handle != 0; + if (wndw->func->image_clr) + asyw->clr.image = armw->image.handle[0] != 0; + } + + return 0; +} + +static void +nv50_wndw_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) +{ + struct nouveau_framebuffer *fb = nouveau_framebuffer(old_state->fb); + struct nouveau_drm *drm = nouveau_drm(plane->dev); + + NV_ATOMIC(drm, "%s cleanup: %p\n", plane->name, old_state->fb); + if (!old_state->fb) + return; + + nouveau_bo_unpin(fb->nvbo); +} + +static int +nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) +{ + struct nouveau_framebuffer *fb = nouveau_framebuffer(state->fb); + struct nouveau_drm *drm = nouveau_drm(plane->dev); + struct nv50_wndw *wndw = nv50_wndw(plane); + struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); + struct nv50_head_atom *asyh; + struct nv50_wndw_ctxdma *ctxdma; + int ret; + + NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, state->fb); + if (!asyw->state.fb) + return 0; + + ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM, true); + if (ret) + return ret; + + ctxdma = nv50_wndw_ctxdma_new(wndw, fb); + if (IS_ERR(ctxdma)) { + nouveau_bo_unpin(fb->nvbo); + return PTR_ERR(ctxdma); + } + + asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv); + asyw->image.handle[0] = ctxdma->object.handle; + asyw->image.offset[0] = fb->nvbo->bo.offset; + + if (wndw->func->prepare) { + asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc); + if (IS_ERR(asyh)) + return PTR_ERR(asyh); + + wndw->func->prepare(wndw, asyh, asyw); + } + + return 0; +} + +static const struct drm_plane_helper_funcs +nv50_wndw_helper = { + .prepare_fb = nv50_wndw_prepare_fb, + .cleanup_fb = nv50_wndw_cleanup_fb, + .atomic_check = nv50_wndw_atomic_check, +}; + +static void +nv50_wndw_atomic_destroy_state(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); + __drm_atomic_helper_plane_destroy_state(&asyw->state); + kfree(asyw); +} + +static struct drm_plane_state * +nv50_wndw_atomic_duplicate_state(struct drm_plane *plane) +{ + struct nv50_wndw_atom *armw = nv50_wndw_atom(plane->state); + struct nv50_wndw_atom *asyw; + if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL))) + return NULL; + __drm_atomic_helper_plane_duplicate_state(plane, &asyw->state); + asyw->sema = armw->sema; + asyw->ntfy = armw->ntfy; + asyw->ilut = NULL; + asyw->xlut = armw->xlut; + asyw->image = armw->image; + asyw->point = armw->point; + asyw->clr.mask = 0; + asyw->set.mask = 0; + return &asyw->state; +} + +static void +nv50_wndw_reset(struct drm_plane *plane) +{ + struct nv50_wndw_atom *asyw; + + if (WARN_ON(!(asyw = kzalloc(sizeof(*asyw), GFP_KERNEL)))) + return; + + if (plane->state) + plane->funcs->atomic_destroy_state(plane, plane->state); + plane->state = &asyw->state; + plane->state->plane = plane; + plane->state->rotation = DRM_MODE_ROTATE_0; +} + +static void +nv50_wndw_destroy(struct drm_plane *plane) +{ + struct nv50_wndw *wndw = nv50_wndw(plane); + struct nv50_wndw_ctxdma *ctxdma, *ctxtmp; + + list_for_each_entry_safe(ctxdma, ctxtmp, &wndw->ctxdma.list, head) { + nv50_wndw_ctxdma_del(ctxdma); + } + + nvif_notify_fini(&wndw->notify); + nv50_dmac_destroy(&wndw->wimm); + nv50_dmac_destroy(&wndw->wndw); + + nv50_lut_fini(&wndw->ilut); + + drm_plane_cleanup(&wndw->plane); + kfree(wndw); +} + +const struct drm_plane_funcs +nv50_wndw = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = nv50_wndw_destroy, + .reset = nv50_wndw_reset, + .atomic_duplicate_state = nv50_wndw_atomic_duplicate_state, + .atomic_destroy_state = nv50_wndw_atomic_destroy_state, +}; + +static int +nv50_wndw_notify(struct nvif_notify *notify) +{ + return NVIF_NOTIFY_KEEP; +} + +void +nv50_wndw_fini(struct nv50_wndw *wndw) +{ + nvif_notify_put(&wndw->notify); +} + +void +nv50_wndw_init(struct nv50_wndw *wndw) +{ + nvif_notify_get(&wndw->notify); +} + +int +nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, + enum drm_plane_type type, const char *name, int index, + const u32 *format, u32 heads, + enum nv50_disp_interlock_type interlock_type, u32 interlock_data, + struct nv50_wndw **pwndw) +{ + struct nouveau_drm *drm = nouveau_drm(dev); + struct nvif_mmu *mmu = &drm->client.mmu; + struct nv50_disp *disp = nv50_disp(dev); + struct nv50_wndw *wndw; + int nformat; + int ret; + + if (!(wndw = *pwndw = kzalloc(sizeof(*wndw), GFP_KERNEL))) + return -ENOMEM; + wndw->func = func; + wndw->id = index; + wndw->interlock.type = interlock_type; + wndw->interlock.data = interlock_data; + wndw->ctxdma.parent = &wndw->wndw.base.user; + + wndw->ctxdma.parent = &wndw->wndw.base.user; + INIT_LIST_HEAD(&wndw->ctxdma.list); + + for (nformat = 0; format[nformat]; nformat++); + + ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw, + format, nformat, NULL, + type, "%s-%d", name, index); + if (ret) { + kfree(*pwndw); + *pwndw = NULL; + return ret; + } + + drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); + + if (wndw->func->ilut) { + ret = nv50_lut_init(disp, mmu, &wndw->ilut); + if (ret) + return ret; + } + + wndw->notify.func = nv50_wndw_notify; + return 0; +} + +int +nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, + struct nv50_wndw **pwndw) +{ + struct { + s32 oclass; + int version; + int (*new)(struct nouveau_drm *, enum drm_plane_type, + int, s32, struct nv50_wndw **); + } wndws[] = { + { GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new }, + {} + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + int cid, ret; + + cid = nvif_mclass(&disp->disp->object, wndws); + if (cid < 0) { + NV_ERROR(drm, "No supported window class\n"); + return cid; + } + + ret = wndws[cid].new(drm, type, index, wndws[cid].oclass, pwndw); + if (ret) + return ret; + + return nv50_wimm_init(drm, *pwndw); +} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h new file mode 100644 index 000000000000..b0b6428034b0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h @@ -0,0 +1,96 @@ +#ifndef __NV50_KMS_WNDW_H__ +#define __NV50_KMS_WNDW_H__ +#define nv50_wndw(p) container_of((p), struct nv50_wndw, plane) +#include "disp.h" +#include "atom.h" +#include "lut.h" + +#include <nvif/notify.h> + +struct nv50_wndw_ctxdma { + struct list_head head; + struct nvif_object object; +}; + +struct nv50_wndw { + const struct nv50_wndw_func *func; + const struct nv50_wimm_func *immd; + int id; + struct nv50_disp_interlock interlock; + + struct { + struct nvif_object *parent; + struct list_head list; + } ctxdma; + + struct drm_plane plane; + + struct nv50_lut ilut; + + struct nv50_dmac wndw; + struct nv50_dmac wimm; + + struct nvif_notify notify; + u16 ntfy; + u16 sema; + u32 data; +}; + +int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *, + enum drm_plane_type, const char *name, int index, + const u32 *format, enum nv50_disp_interlock_type, + u32 interlock_data, u32 heads, struct nv50_wndw **); +void nv50_wndw_init(struct nv50_wndw *); +void nv50_wndw_fini(struct nv50_wndw *); +void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock, + struct nv50_wndw_atom *); +void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush, + struct nv50_wndw_atom *); +void nv50_wndw_ntfy_enable(struct nv50_wndw *, struct nv50_wndw_atom *); +int nv50_wndw_wait_armed(struct nv50_wndw *, struct nv50_wndw_atom *); + +struct nv50_wndw_func { + int (*acquire)(struct nv50_wndw *, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh); + void (*release)(struct nv50_wndw *, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh); + void (*prepare)(struct nv50_wndw *, struct nv50_head_atom *asyh, + struct nv50_wndw_atom *asyw); + + void (*sema_set)(struct nv50_wndw *, struct nv50_wndw_atom *); + void (*sema_clr)(struct nv50_wndw *); + void (*ntfy_reset)(struct nouveau_bo *, u32 offset); + void (*ntfy_set)(struct nv50_wndw *, struct nv50_wndw_atom *); + void (*ntfy_clr)(struct nv50_wndw *); + int (*ntfy_wait_begun)(struct nouveau_bo *, u32 offset, + struct nvif_device *); + void (*ilut)(struct nv50_wndw *, struct nv50_wndw_atom *); + bool olut_core; + void (*xlut_set)(struct nv50_wndw *, struct nv50_wndw_atom *); + void (*xlut_clr)(struct nv50_wndw *); + void (*image_set)(struct nv50_wndw *, struct nv50_wndw_atom *); + void (*image_clr)(struct nv50_wndw *); + void (*scale_set)(struct nv50_wndw *, struct nv50_wndw_atom *); + + void (*update)(struct nv50_wndw *, u32 *interlock); +}; + +extern const struct drm_plane_funcs nv50_wndw; + +void base507c_ntfy_reset(struct nouveau_bo *, u32); +int base507c_ntfy_wait_begun(struct nouveau_bo *, u32, struct nvif_device *); + +struct nv50_wimm_func { + void (*point)(struct nv50_wndw *, struct nv50_wndw_atom *); + + void (*update)(struct nv50_wndw *, u32 *interlock); +}; + +extern const struct nv50_wimm_func curs507a; + +int wndwc37e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, + struct nv50_wndw **); + +int nv50_wndw_new(struct nouveau_drm *, enum drm_plane_type, int index, + struct nv50_wndw **); +#endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c new file mode 100644 index 000000000000..44afb0f069a5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c @@ -0,0 +1,278 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wndw.h" +#include "atom.h" + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include <nouveau_bo.h> + +#include <nvif/clc37e.h> + +static void +wndwc37e_ilut_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x02b8, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 4))) { + evo_mthd(push, 0x02b0, 3); + evo_data(push, asyw->xlut.i.output_mode << 8 | + asyw->xlut.i.range << 4 | + asyw->xlut.i.size); + evo_data(push, asyw->xlut.i.offset >> 8); + evo_data(push, asyw->xlut.handle); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + asyw->xlut.i.mode = 2; + asyw->xlut.i.size = 0; + asyw->xlut.i.range = 0; + asyw->xlut.i.output_mode = 1; +} + +static void +wndwc37e_image_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 4))) { + evo_mthd(push, 0x0308, 1); + evo_data(push, 0x00000000); + evo_mthd(push, 0x0240, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + + if (!(push = evo_wait(&wndw->wndw, 25))) + return; + + evo_mthd(push, 0x0308, 1); + evo_data(push, asyw->image.mode << 4 | asyw->image.interval); + evo_mthd(push, 0x0224, 4); + evo_data(push, asyw->image.h << 16 | asyw->image.w); + evo_data(push, asyw->image.layout << 4 | asyw->image.blockh); + evo_data(push, asyw->image.colorspace << 8 | asyw->image.format); + evo_data(push, asyw->image.blocks[0] | (asyw->image.pitch[0] >> 6)); + evo_mthd(push, 0x0240, 1); + evo_data(push, asyw->image.handle[0]); + evo_mthd(push, 0x0260, 1); + evo_data(push, asyw->image.offset[0] >> 8); + evo_mthd(push, 0x0290, 1); + evo_data(push, (asyw->state.src_y >> 16) << 16 | + (asyw->state.src_x >> 16)); + evo_mthd(push, 0x0298, 1); + evo_data(push, (asyw->state.src_h >> 16) << 16 | + (asyw->state.src_w >> 16)); + evo_mthd(push, 0x02a4, 1); + evo_data(push, asyw->state.crtc_h << 16 | + asyw->state.crtc_w); + + /*XXX: Composition-related stuff. Need to implement properly. */ + evo_mthd(push, 0x02ec, 1); + evo_data(push, (2 - (wndw->id & 1)) << 4); + evo_mthd(push, 0x02f4, 5); + evo_data(push, 0x00000011); + evo_data(push, 0xffff0000); + evo_data(push, 0xffff0000); + evo_data(push, 0xffff0000); + evo_data(push, 0xffff0000); + evo_kick(push, &wndw->wndw); +} + +static void +wndwc37e_ntfy_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x021c, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 3))) { + evo_mthd(push, 0x021c, 2); + evo_data(push, asyw->ntfy.handle); + evo_data(push, asyw->ntfy.offset | asyw->ntfy.awaken); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_sema_clr(struct nv50_wndw *wndw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 2))) { + evo_mthd(push, 0x0218, 1); + evo_data(push, 0x00000000); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 5))) { + evo_mthd(push, 0x020c, 4); + evo_data(push, asyw->sema.offset); + evo_data(push, asyw->sema.acquire); + evo_data(push, asyw->sema.release); + evo_data(push, asyw->sema.handle); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_update(struct nv50_wndw *wndw, u32 *interlock) +{ + u32 *push; + if ((push = evo_wait(&wndw->wndw, 5))) { + evo_mthd(push, 0x0370, 2); + evo_data(push, interlock[NV50_DISP_INTERLOCK_CURS] << 1 | + interlock[NV50_DISP_INTERLOCK_CORE]); + evo_data(push, interlock[NV50_DISP_INTERLOCK_WNDW]); + evo_mthd(push, 0x0200, 1); + if (interlock[NV50_DISP_INTERLOCK_WIMM] & wndw->interlock.data) + evo_data(push, 0x00001001); + else + evo_data(push, 0x00000001); + evo_kick(push, &wndw->wndw); + } +} + +static void +wndwc37e_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ +} + +static int +wndwc37e_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, + struct nv50_head_atom *asyh) +{ + return drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, true); +} + +static const u32 +wndwc37e_format[] = { + DRM_FORMAT_C8, + DRM_FORMAT_YUYV, + DRM_FORMAT_UYVY, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, + 0 +}; + +static const struct nv50_wndw_func +wndwc37e = { + .acquire = wndwc37e_acquire, + .release = wndwc37e_release, + .sema_set = wndwc37e_sema_set, + .sema_clr = wndwc37e_sema_clr, + .ntfy_set = wndwc37e_ntfy_set, + .ntfy_clr = wndwc37e_ntfy_clr, + .ntfy_reset = corec37d_ntfy_init, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .ilut = wndwc37e_ilut, + .xlut_set = wndwc37e_ilut_set, + .xlut_clr = wndwc37e_ilut_clr, + .image_set = wndwc37e_image_set, + .image_clr = wndwc37e_image_clr, + .update = wndwc37e_update, +}; + +static int +wndwc37e_new_(const struct nv50_wndw_func *func, struct nouveau_drm *drm, + enum drm_plane_type type, int index, s32 oclass, u32 heads, + struct nv50_wndw **pwndw) +{ + struct nvc37e_window_channel_dma_v0 args = { + .pushbuf = 0xb0007e00 | index, + .index = index, + }; + struct nv50_disp *disp = nv50_disp(drm->dev); + struct nv50_wndw *wndw; + int ret; + + ret = nv50_wndw_new_(func, drm->dev, type, "wndw", index, + wndwc37e_format, heads, NV50_DISP_INTERLOCK_WNDW, + BIT(index), &wndw); + if (*pwndw = wndw, ret) + return ret; + + ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, + &oclass, 0, &args, sizeof(args), + disp->sync->bo.offset, &wndw->wndw); + if (ret) { + NV_ERROR(drm, "qndw%04x allocation failed: %d\n", oclass, ret); + return ret; + } + + wndw->ntfy = NV50_DISP_WNDW_NTFY(wndw->id); + wndw->sema = NV50_DISP_WNDW_SEM0(wndw->id); + wndw->data = 0x00000000; + return 0; +} + +int +wndwc37e_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, + s32 oclass, struct nv50_wndw **pwndw) +{ + return wndwc37e_new_(&wndwc37e, drm, type, index, oclass, + BIT(index >> 1), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h index 2740278d226b..4f5233107f5f 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h @@ -31,6 +31,7 @@ struct nv_device_info_v0 { #define NV_DEVICE_INFO_V0_KEPLER 0x08 #define NV_DEVICE_INFO_V0_MAXWELL 0x09 #define NV_DEVICE_INFO_V0_PASCAL 0x0a +#define NV_DEVICE_INFO_V0_VOLTA 0x0b __u8 family; __u8 pad06[2]; __u64 ram_size; @@ -39,9 +40,55 @@ struct nv_device_info_v0 { char name[64]; }; +struct nv_device_info_v1 { + __u8 version; + __u8 count; + __u8 pad02[6]; + struct nv_device_info_v1_data { + __u64 mthd; /* NV_DEVICE_INFO_* (see below). */ + __u64 data; + } data[]; +}; + struct nv_device_time_v0 { __u8 version; __u8 pad01[7]; __u64 time; }; + +#define NV_DEVICE_INFO_UNIT (0xffffffffULL << 32) +#define NV_DEVICE_INFO(n) ((n) | (0x00000000ULL << 32)) +#define NV_DEVICE_FIFO(n) ((n) | (0x00000001ULL << 32)) + +/* This will be returned for unsupported queries. */ +#define NV_DEVICE_INFO_INVALID ~0ULL + +/* These return a mask of available engines of particular type. */ +#define NV_DEVICE_INFO_ENGINE_SW NV_DEVICE_INFO(0x00000000) +#define NV_DEVICE_INFO_ENGINE_GR NV_DEVICE_INFO(0x00000001) +#define NV_DEVICE_INFO_ENGINE_MPEG NV_DEVICE_INFO(0x00000002) +#define NV_DEVICE_INFO_ENGINE_ME NV_DEVICE_INFO(0x00000003) +#define NV_DEVICE_INFO_ENGINE_CIPHER NV_DEVICE_INFO(0x00000004) +#define NV_DEVICE_INFO_ENGINE_BSP NV_DEVICE_INFO(0x00000005) +#define NV_DEVICE_INFO_ENGINE_VP NV_DEVICE_INFO(0x00000006) +#define NV_DEVICE_INFO_ENGINE_CE NV_DEVICE_INFO(0x00000007) +#define NV_DEVICE_INFO_ENGINE_SEC NV_DEVICE_INFO(0x00000008) +#define NV_DEVICE_INFO_ENGINE_MSVLD NV_DEVICE_INFO(0x00000009) +#define NV_DEVICE_INFO_ENGINE_MSPDEC NV_DEVICE_INFO(0x0000000a) +#define NV_DEVICE_INFO_ENGINE_MSPPP NV_DEVICE_INFO(0x0000000b) +#define NV_DEVICE_INFO_ENGINE_MSENC NV_DEVICE_INFO(0x0000000c) +#define NV_DEVICE_INFO_ENGINE_VIC NV_DEVICE_INFO(0x0000000d) +#define NV_DEVICE_INFO_ENGINE_SEC2 NV_DEVICE_INFO(0x0000000e) +#define NV_DEVICE_INFO_ENGINE_NVDEC NV_DEVICE_INFO(0x0000000f) +#define NV_DEVICE_INFO_ENGINE_NVENC NV_DEVICE_INFO(0x00000010) + +/* Returns the number of available channels. */ +#define NV_DEVICE_FIFO_CHANNELS NV_DEVICE_FIFO(0x00000000) + +/* Returns a mask of available runlists. */ +#define NV_DEVICE_FIFO_RUNLISTS NV_DEVICE_FIFO(0x00000001) + +/* These return a mask of engines available on a particular runlist. */ +#define NV_DEVICE_FIFO_RUNLIST_ENGINES(n) ((n) + NV_DEVICE_FIFO(0x00000010)) +#define NV_DEVICE_FIFO_RUNLIST_ENGINES__SIZE 64 #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h index 56f5bd81e480..fbfcffc5feb2 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h @@ -4,25 +4,11 @@ struct kepler_channel_gpfifo_a_v0 { __u8 version; - __u8 pad01[5]; + __u8 pad01[1]; __u16 chid; -#define NVA06F_V0_ENGINE_SW 0x00000001 -#define NVA06F_V0_ENGINE_GR 0x00000002 -#define NVA06F_V0_ENGINE_SEC 0x00000004 -#define NVA06F_V0_ENGINE_MSVLD 0x00000010 -#define NVA06F_V0_ENGINE_MSPDEC 0x00000020 -#define NVA06F_V0_ENGINE_MSPPP 0x00000040 -#define NVA06F_V0_ENGINE_MSENC 0x00000080 -#define NVA06F_V0_ENGINE_VIC 0x00000100 -#define NVA06F_V0_ENGINE_NVDEC 0x00000200 -#define NVA06F_V0_ENGINE_NVENC0 0x00000400 -#define NVA06F_V0_ENGINE_NVENC1 0x00000800 -#define NVA06F_V0_ENGINE_CE0 0x00010000 -#define NVA06F_V0_ENGINE_CE1 0x00020000 -#define NVA06F_V0_ENGINE_CE2 0x00040000 - __u32 engines; __u32 ilength; __u64 ioffset; + __u64 runlist; __u64 vmm; }; diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index a7c5bf572788..6db56bd7d67e 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -52,6 +52,8 @@ #define NV04_DISP /* cl0046.h */ 0x00000046 +#define VOLTA_USERMODE_A 0x0000c361 + #define NV03_CHANNEL_DMA /* cl506b.h */ 0x0000006b #define NV10_CHANNEL_DMA /* cl506b.h */ 0x0000006e #define NV17_CHANNEL_DMA /* cl506b.h */ 0x0000176e @@ -66,6 +68,7 @@ #define KEPLER_CHANNEL_GPFIFO_B /* cla06f.h */ 0x0000a16f #define MAXWELL_CHANNEL_GPFIFO_A /* cla06f.h */ 0x0000b06f #define PASCAL_CHANNEL_GPFIFO_A /* cla06f.h */ 0x0000c06f +#define VOLTA_CHANNEL_GPFIFO_A /* cla06f.h */ 0x0000c36f #define NV50_DISP /* cl5070.h */ 0x00005070 #define G82_DISP /* cl5070.h */ 0x00008270 @@ -79,6 +82,7 @@ #define GM200_DISP /* cl5070.h */ 0x00009570 #define GP100_DISP /* cl5070.h */ 0x00009770 #define GP102_DISP /* cl5070.h */ 0x00009870 +#define GV100_DISP /* cl5070.h */ 0x0000c370 #define NV31_MPEG 0x00003174 #define G82_MPEG 0x00008274 @@ -90,6 +94,7 @@ #define GT214_DISP_CURSOR /* cl507a.h */ 0x0000857a #define GF110_DISP_CURSOR /* cl507a.h */ 0x0000907a #define GK104_DISP_CURSOR /* cl507a.h */ 0x0000917a +#define GV100_DISP_CURSOR /* cl507a.h */ 0x0000c37a #define NV50_DISP_OVERLAY /* cl507b.h */ 0x0000507b #define G82_DISP_OVERLAY /* cl507b.h */ 0x0000827b @@ -97,6 +102,8 @@ #define GF110_DISP_OVERLAY /* cl507b.h */ 0x0000907b #define GK104_DISP_OVERLAY /* cl507b.h */ 0x0000917b +#define GV100_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c37b + #define NV50_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000507c #define G82_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000827c #define GT200_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000837c @@ -117,6 +124,7 @@ #define GM200_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000957d #define GP100_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000977d #define GP102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000987d +#define GV100_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c37d #define NV50_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000507e #define G82_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000827e @@ -125,6 +133,8 @@ #define GF110_DISP_OVERLAY_CONTROL_DMA /* cl507e.h */ 0x0000907e #define GK104_DISP_OVERLAY_CONTROL_DMA /* cl507e.h */ 0x0000917e +#define GV100_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c37e + #define NV50_TESLA 0x00005097 #define G82_TESLA 0x00008297 #define GT200_TESLA 0x00008397 @@ -145,6 +155,8 @@ #define PASCAL_A /* cl9097.h */ 0x0000c097 #define PASCAL_B /* cl9097.h */ 0x0000c197 +#define VOLTA_A /* cl9097.h */ 0x0000c397 + #define NV74_BSP 0x000074b0 #define GT212_MSVLD 0x000085b1 @@ -170,6 +182,7 @@ #define MAXWELL_DMA_COPY_A 0x0000b0b5 #define PASCAL_DMA_COPY_A 0x0000c0b5 #define PASCAL_DMA_COPY_B 0x0000c1b5 +#define VOLTA_DMA_COPY_A 0x0000c3b5 #define FERMI_DECOMPRESS 0x000090b8 @@ -183,6 +196,7 @@ #define MAXWELL_COMPUTE_B 0x0000b1c0 #define PASCAL_COMPUTE_A 0x0000c0c0 #define PASCAL_COMPUTE_B 0x0000c1c0 +#define VOLTA_COMPUTE_A 0x0000c3c0 #define NV74_CIPHER 0x000074c1 #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/clc37b.h b/drivers/gpu/drm/nouveau/include/nvif/clc37b.h new file mode 100644 index 000000000000..89b18189d43b --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/clc37b.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NVIF_CLC37B_H__ +#define __NVIF_CLC37B_H__ + +struct nvc37b_window_imm_channel_dma_v0 { + __u8 version; + __u8 index; + __u8 pad02[6]; + __u64 pushbuf; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/clc37e.h b/drivers/gpu/drm/nouveau/include/nvif/clc37e.h new file mode 100644 index 000000000000..899db9e915ef --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/clc37e.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NVIF_CLC37E_H__ +#define __NVIF_CLC37E_H__ + +struct nvc37e_window_channel_dma_v0 { + __u8 version; + __u8 index; + __u8 pad02[6]; + __u64 pushbuf; +}; + +#define NVC37E_WINDOW_CHANNEL_DMA_V0_NTFY_UEVENT 0x00 +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/device.h b/drivers/gpu/drm/nouveau/include/nvif/device.h index 6edb6266857e..ef839bd1d37e 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/device.h +++ b/drivers/gpu/drm/nouveau/include/nvif/device.h @@ -4,10 +4,18 @@ #include <nvif/object.h> #include <nvif/cl0080.h> +#include <nvif/user.h> struct nvif_device { struct nvif_object object; struct nv_device_info_v0 info; + + struct nvif_fifo_runlist { + u64 engines; + } *runlist; + int runlists; + + struct nvif_user user; }; int nvif_device_init(struct nvif_object *, u32 handle, s32 oclass, void *, u32, @@ -67,6 +75,5 @@ u64 nvif_device_time(struct nvif_device *); #include <engine/fifo.h> #include <engine/gr.h> -#define nvxx_fifo(a) nvxx_device(a)->fifo #define nvxx_gr(a) nvxx_device(a)->gr #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/disp.h b/drivers/gpu/drm/nouveau/include/nvif/disp.h new file mode 100644 index 000000000000..7c0eda375c01 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/disp.h @@ -0,0 +1,12 @@ +#ifndef __NVIF_DISP_H__ +#define __NVIF_DISP_H__ +#include <nvif/object.h> +struct nvif_device; + +struct nvif_disp { + struct nvif_object object; +}; + +int nvif_disp_ctor(struct nvif_device *, s32 oclass, struct nvif_disp *); +void nvif_disp_dtor(struct nvif_disp *); +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/fifo.h b/drivers/gpu/drm/nouveau/include/nvif/fifo.h new file mode 100644 index 000000000000..e9468c9f9abf --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/fifo.h @@ -0,0 +1,18 @@ +#ifndef __NVIF_FIFO_H__ +#define __NVIF_FIFO_H__ +#include <nvif/device.h> + +/* Returns mask of runlists that support a NV_DEVICE_INFO_ENGINE_* type. */ +u64 nvif_fifo_runlist(struct nvif_device *, u64 engine); + +/* CE-supporting runlists (excluding GRCE, if others exist). */ +static inline u64 +nvif_fifo_runlist_ce(struct nvif_device *device) +{ + u64 runmgr = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR); + u64 runmce = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_CE); + if (runmce && !(runmce &= ~runmgr)) + runmce = runmgr; + return runmce; +} +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/mem.h b/drivers/gpu/drm/nouveau/include/nvif/mem.h index b542fe38398e..80ee4ab0f016 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/mem.h +++ b/drivers/gpu/drm/nouveau/include/nvif/mem.h @@ -15,4 +15,6 @@ int nvif_mem_init_type(struct nvif_mmu *mmu, s32 oclass, int type, u8 page, int nvif_mem_init(struct nvif_mmu *mmu, s32 oclass, u8 type, u8 page, u64 size, void *argv, u32 argc, struct nvif_mem *); void nvif_mem_fini(struct nvif_mem *); + +int nvif_mem_init_map(struct nvif_mmu *, u8 type, u64 size, struct nvif_mem *); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvif/mmu.h b/drivers/gpu/drm/nouveau/include/nvif/mmu.h index c8cd5b5b0688..747ecf67e403 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvif/mmu.h @@ -8,6 +8,7 @@ struct nvif_mmu { u8 heap_nr; u8 type_nr; u16 kind_nr; + s32 mem; struct { u64 size; diff --git a/drivers/gpu/drm/nouveau/include/nvif/object.h b/drivers/gpu/drm/nouveau/include/nvif/object.h index a2d5244ff2b7..20754d9e6883 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/object.h +++ b/drivers/gpu/drm/nouveau/include/nvif/object.h @@ -99,6 +99,22 @@ struct nvif_mclass { ret; \ }) +#define nvif_sclass(o,m,u) ({ \ + const typeof(m[0]) *_mclass = (m); \ + s32 _oclass = (u); \ + int _cid; \ + if (_oclass) { \ + for (_cid = 0; _mclass[_cid].oclass; _cid++) { \ + if (_mclass[_cid].oclass == _oclass) \ + break; \ + } \ + _cid = _mclass[_cid].oclass ? _cid : -ENOSYS; \ + } else { \ + _cid = nvif_mclass((o), _mclass); \ + } \ + _cid; \ +}) + /*XXX*/ #include <core/object.h> #define nvxx_object(a) ({ \ diff --git a/drivers/gpu/drm/nouveau/include/nvif/user.h b/drivers/gpu/drm/nouveau/include/nvif/user.h new file mode 100644 index 000000000000..03c11826b693 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/user.h @@ -0,0 +1,19 @@ +#ifndef __NVIF_USER_H__ +#define __NVIF_USER_H__ +#include <nvif/object.h> +struct nvif_device; + +struct nvif_user { + const struct nvif_user_func *func; + struct nvif_object object; +}; + +struct nvif_user_func { + void (*doorbell)(struct nvif_user *, u32 token); +}; + +int nvif_user_init(struct nvif_device *); +void nvif_user_fini(struct nvif_device *); + +extern const struct nvif_user_func nvif_userc361; +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 560265b15ec2..d83d834b7452 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -22,6 +22,7 @@ enum nvkm_devidx { NVKM_SUBDEV_LTC, NVKM_SUBDEV_MMU, NVKM_SUBDEV_BAR, + NVKM_SUBDEV_FAULT, NVKM_SUBDEV_PMU, NVKM_SUBDEV_VOLT, NVKM_SUBDEV_ICCSENSE, @@ -37,7 +38,10 @@ enum nvkm_devidx { NVKM_ENGINE_CE3, NVKM_ENGINE_CE4, NVKM_ENGINE_CE5, - NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE5, + NVKM_ENGINE_CE6, + NVKM_ENGINE_CE7, + NVKM_ENGINE_CE8, + NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE8, NVKM_ENGINE_CIPHER, NVKM_ENGINE_DISP, @@ -109,6 +113,7 @@ struct nvkm_device { NV_E0 = 0xe0, GM100 = 0x110, GP100 = 0x130, + GV100 = 0x140, } card_type; u32 chipset; u8 chiprev; @@ -123,6 +128,7 @@ struct nvkm_device { struct nvkm_bus *bus; struct nvkm_clk *clk; struct nvkm_devinit *devinit; + struct nvkm_fault *fault; struct nvkm_fb *fb; struct nvkm_fuse *fuse; struct nvkm_gpio *gpio; @@ -143,7 +149,7 @@ struct nvkm_device { struct nvkm_volt *volt; struct nvkm_engine *bsp; - struct nvkm_engine *ce[6]; + struct nvkm_engine *ce[9]; struct nvkm_engine *cipher; struct nvkm_disp *disp; struct nvkm_dma *dma; @@ -194,6 +200,7 @@ struct nvkm_device_chip { int (*bus )(struct nvkm_device *, int idx, struct nvkm_bus **); int (*clk )(struct nvkm_device *, int idx, struct nvkm_clk **); int (*devinit )(struct nvkm_device *, int idx, struct nvkm_devinit **); + int (*fault )(struct nvkm_device *, int idx, struct nvkm_fault **); int (*fb )(struct nvkm_device *, int idx, struct nvkm_fb **); int (*fuse )(struct nvkm_device *, int idx, struct nvkm_fuse **); int (*gpio )(struct nvkm_device *, int idx, struct nvkm_gpio **); @@ -214,7 +221,7 @@ struct nvkm_device_chip { int (*volt )(struct nvkm_device *, int idx, struct nvkm_volt **); int (*bsp )(struct nvkm_device *, int idx, struct nvkm_engine **); - int (*ce[6] )(struct nvkm_device *, int idx, struct nvkm_engine **); + int (*ce[9] )(struct nvkm_device *, int idx, struct nvkm_engine **); int (*cipher )(struct nvkm_device *, int idx, struct nvkm_engine **); int (*disp )(struct nvkm_device *, int idx, struct nvkm_disp **); int (*dma )(struct nvkm_device *, int idx, struct nvkm_dma **); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h index ebf8473a39fe..8a2be5b635e2 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h @@ -18,6 +18,7 @@ struct nvkm_engine_func { void *(*dtor)(struct nvkm_engine *); void (*preinit)(struct nvkm_engine *); int (*oneinit)(struct nvkm_engine *); + int (*info)(struct nvkm_engine *, u64 mthd, u64 *data); int (*init)(struct nvkm_engine *); int (*fini)(struct nvkm_engine *, bool suspend); void (*intr)(struct nvkm_engine *); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h index 63df2290177f..85a0777c2ce4 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h @@ -17,6 +17,7 @@ struct nvkm_subdev_func { void *(*dtor)(struct nvkm_subdev *); int (*preinit)(struct nvkm_subdev *); int (*oneinit)(struct nvkm_subdev *); + int (*info)(struct nvkm_subdev *, u64 mthd, u64 *data); int (*init)(struct nvkm_subdev *); int (*fini)(struct nvkm_subdev *, bool suspend); void (*intr)(struct nvkm_subdev *); @@ -29,6 +30,7 @@ void nvkm_subdev_del(struct nvkm_subdev **); int nvkm_subdev_preinit(struct nvkm_subdev *); int nvkm_subdev_init(struct nvkm_subdev *); int nvkm_subdev_fini(struct nvkm_subdev *, bool suspend); +int nvkm_subdev_info(struct nvkm_subdev *, u64, u64 *); void nvkm_subdev_intr(struct nvkm_subdev *); /* subdev logging */ diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h index 553245994450..fc295e1faa19 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h @@ -10,4 +10,5 @@ int gm107_ce_new(struct nvkm_device *, int, struct nvkm_engine **); int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **); int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **); int gp102_ce_new(struct nvkm_device *, int, struct nvkm_engine **); +int gv100_ce_new(struct nvkm_device *, int, struct nvkm_engine **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h index e83193d3ccab..ef7dc0844d26 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h @@ -35,4 +35,5 @@ int gm107_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); +int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h index 0f9c1c702ed6..f0c1b2c8c78c 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h @@ -27,4 +27,5 @@ int nv04_dma_new(struct nvkm_device *, int, struct nvkm_dma **); int nv50_dma_new(struct nvkm_device *, int, struct nvkm_dma **); int gf100_dma_new(struct nvkm_device *, int, struct nvkm_dma **); int gf119_dma_new(struct nvkm_device *, int, struct nvkm_dma **); +int gv100_dma_new(struct nvkm_device *, int, struct nvkm_dma **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h index c17b3a9bf8fb..7e39fbed2519 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h @@ -4,6 +4,7 @@ #include <core/engine.h> #include <core/object.h> #include <core/event.h> +struct nvkm_fault_data; #define NVKM_FIFO_CHID_NR 4096 @@ -45,6 +46,7 @@ struct nvkm_fifo { struct nvkm_event kevent; /* channel killed */ }; +void nvkm_fifo_fault(struct nvkm_fifo *, struct nvkm_fault_data *); void nvkm_fifo_pause(struct nvkm_fifo *, unsigned long *); void nvkm_fifo_start(struct nvkm_fifo *, unsigned long *); @@ -71,4 +73,5 @@ int gm200_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); int gp10b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); +int gv100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h index fb18f105fc43..ba1518ff8b66 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h @@ -45,6 +45,8 @@ int gm200_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gm20b_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gp100_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gp102_gr_new(struct nvkm_device *, int, struct nvkm_gr **); +int gp104_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gp107_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gp10b_gr_new(struct nvkm_device *, int, struct nvkm_gr **); +int gv100_gr_new(struct nvkm_device *, int, struct nvkm_gr **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h index df34b41838d6..512e25a41803 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h @@ -1,6 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NVBIOS_DP_H__ #define __NVBIOS_DP_H__ + +u16 +nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len); + struct nvbios_dpout { u16 type; u16 mask; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h index 40558064d589..486e7635c29d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h @@ -30,4 +30,5 @@ int mcp89_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gf100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); +int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h new file mode 100644 index 000000000000..5a77498fe6a0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h @@ -0,0 +1,33 @@ +#ifndef __NVKM_FAULT_H__ +#define __NVKM_FAULT_H__ +#include <core/subdev.h> +#include <core/notify.h> + +struct nvkm_fault { + const struct nvkm_fault_func *func; + struct nvkm_subdev subdev; + + struct nvkm_fault_buffer *buffer[2]; + int buffer_nr; + + struct nvkm_event event; + + struct nvkm_notify nrpfb; +}; + +struct nvkm_fault_data { + u64 addr; + u64 inst; + u64 time; + u8 engine; + u8 valid; + u8 gpc; + u8 hub; + u8 access; + u8 client; + u8 reason; +}; + +int gp100_fault_new(struct nvkm_device *, int, struct nvkm_fault **); +int gv100_fault_new(struct nvkm_device *, int, struct nvkm_fault **); +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 92be0e5269c6..96ccc624ee81 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -83,6 +83,7 @@ int gm20b_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp102_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp10b_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int gv100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); #include <subdev/bios.h> #include <subdev/bios/ramcfg.h> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h index 95b611554d53..9db5f8293198 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h @@ -21,12 +21,14 @@ struct nvkm_ltc { int zbc_max; u32 zbc_color[NVKM_LTC_MAX_ZBC_CNT][4]; u32 zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; + u32 zbc_stencil[NVKM_LTC_MAX_ZBC_CNT]; }; void nvkm_ltc_tags_clear(struct nvkm_device *, u32 first, u32 count); int nvkm_ltc_zbc_color_get(struct nvkm_ltc *, int index, const u32[4]); int nvkm_ltc_zbc_depth_get(struct nvkm_ltc *, int index, const u32); +int nvkm_ltc_zbc_stencil_get(struct nvkm_ltc *, int index, const u32); void nvkm_ltc_invalidate(struct nvkm_ltc *); void nvkm_ltc_flush(struct nvkm_ltc *); @@ -37,4 +39,5 @@ int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); +int gp102_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index baab93398e54..688595545e21 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -129,4 +129,5 @@ int gm200_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gm20b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gp100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); int gp10b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); +int gv100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index ece650a0c5f9..e2211bb2cf79 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -23,6 +23,7 @@ #include <nvif/client.h> #include <nvif/driver.h> +#include <nvif/fifo.h> #include <nvif/ioctl.h> #include <nvif/class.h> #include <nvif/cl0002.h> @@ -102,6 +103,7 @@ nouveau_abi16_swclass(struct nouveau_drm *drm) case NV_DEVICE_INFO_V0_KEPLER: case NV_DEVICE_INFO_V0_MAXWELL: case NV_DEVICE_INFO_V0_PASCAL: + case NV_DEVICE_INFO_V0_VOLTA: return NVIF_CLASS_SW_GF100; } @@ -256,6 +258,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv); struct nouveau_abi16_chan *chan; struct nvif_device *device; + u64 engine; int ret; if (unlikely(!abi16)) @@ -268,25 +271,26 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) /* hack to allow channel engine type specification on kepler */ if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { - if (init->fb_ctxdma_handle != ~0) - init->fb_ctxdma_handle = NVA06F_V0_ENGINE_GR; - else { - init->fb_ctxdma_handle = 0; -#define _(A,B) if (init->tt_ctxdma_handle & (A)) init->fb_ctxdma_handle |= (B) - _(0x01, NVA06F_V0_ENGINE_GR); - _(0x02, NVA06F_V0_ENGINE_MSPDEC); - _(0x04, NVA06F_V0_ENGINE_MSPPP); - _(0x08, NVA06F_V0_ENGINE_MSVLD); - _(0x10, NVA06F_V0_ENGINE_CE0); - _(0x20, NVA06F_V0_ENGINE_CE1); - _(0x40, NVA06F_V0_ENGINE_MSENC); -#undef _ + if (init->fb_ctxdma_handle == ~0) { + switch (init->tt_ctxdma_handle) { + case 0x01: engine = NV_DEVICE_INFO_ENGINE_GR ; break; + case 0x02: engine = NV_DEVICE_INFO_ENGINE_MSPDEC; break; + case 0x04: engine = NV_DEVICE_INFO_ENGINE_MSPPP ; break; + case 0x08: engine = NV_DEVICE_INFO_ENGINE_MSVLD ; break; + case 0x30: engine = NV_DEVICE_INFO_ENGINE_CE ; break; + default: + return nouveau_abi16_put(abi16, -ENOSYS); + } + } else { + engine = NV_DEVICE_INFO_ENGINE_GR; } - /* allow flips to be executed if this is a graphics channel */ + if (engine != NV_DEVICE_INFO_ENGINE_CE) + engine = nvif_fifo_runlist(device, engine); + else + engine = nvif_fifo_runlist_ce(device); + init->fb_ctxdma_handle = engine; init->tt_ctxdma_handle = 0; - if (init->fb_ctxdma_handle == NVA06F_V0_ENGINE_GR) - init->tt_ctxdma_handle = 1; } if (init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0) diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 5ffcb6683776..ffb195850314 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -193,7 +193,7 @@ static int nouveau_dsm_power_state(enum vga_switcheroo_client_id id, return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dhandle, state); } -static int nouveau_dsm_get_client_id(struct pci_dev *pdev) +static enum vga_switcheroo_client_id nouveau_dsm_get_client_id(struct pci_dev *pdev) { /* easy option one - intel vendor ID means Integrated */ if (pdev->vendor == PCI_VENDOR_ID_INTEL) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6f402c4f2bdd..7214022dfb91 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -214,7 +214,6 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, INIT_LIST_HEAD(&nvbo->entry); INIT_LIST_HEAD(&nvbo->vma_list); nvbo->bo.bdev = &drm->ttm.bdev; - nvbo->cli = cli; /* This is confusing, and doesn't actually mean we want an uncached * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated @@ -1142,6 +1141,8 @@ nouveau_bo_move_init(struct nouveau_drm *drm) struct ttm_mem_reg *, struct ttm_mem_reg *); int (*init)(struct nouveau_channel *, u32 handle); } _methods[] = { + { "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init }, + { "GRCE", 0, 0xc3b5, nve0_bo_move_copy, nvc0_bo_move_init }, { "COPY", 4, 0xc1b5, nve0_bo_move_copy, nve0_bo_move_init }, { "GRCE", 0, 0xc1b5, nve0_bo_move_copy, nvc0_bo_move_init }, { "COPY", 4, 0xc0b5, nve0_bo_move_copy, nve0_bo_move_init }, diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index be8e00b49cde..73c48440d4d7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -26,8 +26,6 @@ struct nouveau_bo { struct list_head vma_list; - struct nouveau_cli *cli; - unsigned contig:1; unsigned page:5; unsigned kind:8; diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index af1116655910..92d3115f96b5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -163,12 +163,15 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, return ret; } + chan->push.addr = chan->push.vma->addr; + + if (device->info.family >= NV_DEVICE_INFO_V0_FERMI) + return 0; + args.target = NV_DMA_V0_TARGET_VM; args.access = NV_DMA_V0_ACCESS_VM; args.start = 0; args.limit = cli->vmm.vmm.limit - 1; - - chan->push.addr = chan->push.vma->addr; } else if (chan->push.buffer->bo.mem.mem_type == TTM_PL_VRAM) { if (device->info.family == NV_DEVICE_INFO_V0_TNT) { @@ -214,10 +217,11 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, static int nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, - u32 engine, struct nouveau_channel **pchan) + u64 runlist, struct nouveau_channel **pchan) { struct nouveau_cli *cli = (void *)device->object.client; - static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A, + static const u16 oclasses[] = { VOLTA_CHANNEL_GPFIFO_A, + PASCAL_CHANNEL_GPFIFO_A, MAXWELL_CHANNEL_GPFIFO_A, KEPLER_CHANNEL_GPFIFO_B, KEPLER_CHANNEL_GPFIFO_A, @@ -245,9 +249,9 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device, do { if (oclass[0] >= KEPLER_CHANNEL_GPFIFO_A) { args.kepler.version = 0; - args.kepler.engines = engine; args.kepler.ilength = 0x02000; args.kepler.ioffset = 0x10000 + chan->push.addr; + args.kepler.runlist = runlist; args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object); size = sizeof(args.kepler); } else @@ -474,3 +478,28 @@ done: cli->base.super = super; return ret; } + +int +nouveau_channels_init(struct nouveau_drm *drm) +{ + struct { + struct nv_device_info_v1 m; + struct { + struct nv_device_info_v1_data channels; + } v; + } args = { + .m.version = 1, + .m.count = sizeof(args.v) / sizeof(args.v.channels), + .v.channels.mthd = NV_DEVICE_FIFO_CHANNELS, + }; + struct nvif_object *device = &drm->client.device.object; + int ret; + + ret = nvif_object_mthd(device, NV_DEVICE_V0_INFO, &args, sizeof(args)); + if (ret || args.v.channels.mthd == NV_DEVICE_INFO_INVALID) + return -ENODEV; + + drm->chan.nr = args.v.channels.data; + drm->chan.context_base = dma_fence_context_alloc(drm->chan.nr); + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h index 14607c16a2bd..64454c2ebd90 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.h +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h @@ -45,6 +45,7 @@ struct nouveau_channel { atomic_t killed; }; +int nouveau_channels_init(struct nouveau_drm *); int nouveau_channel_new(struct nouveau_drm *, struct nvif_device *, u32 arg0, u32 arg1, struct nouveau_channel **); diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 6ed9cb053dfa..7b557c354307 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -151,7 +151,7 @@ nouveau_conn_atomic_set_property(struct drm_connector *connector, /* ... except prior to G80, where the code * doesn't support such things. */ - if (disp->disp.oclass < NV50_DISP) + if (disp->disp.object.oclass < NV50_DISP) return -EINVAL; break; default: @@ -260,7 +260,7 @@ nouveau_conn_reset(struct drm_connector *connector) asyc->procamp.color_vibrance = 150; asyc->procamp.vibrant_hue = 90; - if (nouveau_display(connector->dev)->disp.oclass < NV50_DISP) { + if (nouveau_display(connector->dev)->disp.object.oclass < NV50_DISP) { switch (connector->connector_type) { case DRM_MODE_CONNECTOR_LVDS: /* See note in nouveau_conn_atomic_set_property(). */ @@ -314,7 +314,7 @@ nouveau_conn_attach_properties(struct drm_connector *connector) case DRM_MODE_CONNECTOR_TV: break; case DRM_MODE_CONNECTOR_VGA: - if (disp->disp.oclass < NV50_DISP) + if (disp->disp.object.oclass < NV50_DISP) break; /* Can only scale on DFPs. */ /* Fall-through. */ default: @@ -1005,7 +1005,7 @@ get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi) return 112000; } -static int +static enum drm_mode_status nouveau_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { @@ -1321,7 +1321,7 @@ nouveau_connector_create(struct drm_device *dev, int index) } /* HDMI 3D support */ - if ((disp->disp.oclass >= G82_DISP) + if ((disp->disp.object.oclass >= G82_DISP) && ((type == DRM_MODE_CONNECTOR_DisplayPort) || (type == DRM_MODE_CONNECTOR_eDP) || (type == DRM_MODE_CONNECTOR_HDMIA))) @@ -1343,7 +1343,7 @@ nouveau_connector_create(struct drm_device *dev, int index) case DCB_CONNECTOR_LVDS_SPWG: case DCB_CONNECTOR_eDP: /* see note in nouveau_connector_set_property() */ - if (disp->disp.oclass < NV50_DISP) { + if (disp->disp.object.oclass < NV50_DISP) { nv_connector->scaling_mode = DRM_MODE_SCALE_FULLSCREEN; break; } @@ -1366,8 +1366,8 @@ nouveau_connector_create(struct drm_device *dev, int index) break; } - ret = nvif_notify_init(&disp->disp, nouveau_connector_hotplug, true, - NV04_DISP_NTFY_CONN, + ret = nvif_notify_init(&disp->disp.object, nouveau_connector_hotplug, + true, NV04_DISP_NTFY_CONN, &(struct nvif_notify_conn_req_v0) { .mask = NVIF_NOTIFY_CONN_V0_ANY, .conn = index, diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 7d0bec8dd03d..774b429142bc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -116,7 +116,7 @@ nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos, bool ret = false; do { - ret = nvif_mthd(&disp->disp, 0, &args, sizeof(args)); + ret = nvif_mthd(&disp->disp.object, 0, &args, sizeof(args)); if (ret != 0) return false; @@ -175,7 +175,7 @@ nouveau_display_vblank_init(struct drm_device *dev) list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - ret = nvif_notify_init(&disp->disp, + ret = nvif_notify_init(&disp->disp.object, nouveau_display_vblank_handler, false, NV04_DISP_NTFY_VBLANK, &(struct nvif_notify_head_req_v0) { @@ -454,10 +454,10 @@ nouveau_display_create_properties(struct drm_device *dev) struct nouveau_display *disp = nouveau_display(dev); int gen; - if (disp->disp.oclass < NV50_DISP) + if (disp->disp.object.oclass < NV50_DISP) gen = 0; else - if (disp->disp.oclass < GF110_DISP) + if (disp->disp.object.oclass < GF110_DISP) gen = 1; else gen = 2; @@ -533,31 +533,10 @@ nouveau_display_create(struct drm_device *dev) drm_kms_helper_poll_disable(dev); if (nouveau_modeset != 2 && drm->vbios.dcb.entries) { - static const u16 oclass[] = { - GP102_DISP, - GP100_DISP, - GM200_DISP, - GM107_DISP, - GK110_DISP, - GK104_DISP, - GF110_DISP, - GT214_DISP, - GT206_DISP, - GT200_DISP, - G82_DISP, - NV50_DISP, - NV04_DISP, - }; - int i; - - for (i = 0, ret = -ENODEV; ret && i < ARRAY_SIZE(oclass); i++) { - ret = nvif_object_init(&drm->client.device.object, 0, - oclass[i], NULL, 0, &disp->disp); - } - + ret = nvif_disp_ctor(&drm->client.device, 0, &disp->disp); if (ret == 0) { nouveau_display_create_properties(dev); - if (disp->disp.oclass < NV50_DISP) + if (disp->disp.object.oclass < NV50_DISP) ret = nv04_display_create(dev); else ret = nv50_display_create(dev); @@ -611,7 +590,7 @@ nouveau_display_destroy(struct drm_device *dev) if (disp->dtor) disp->dtor(dev); - nvif_object_fini(&disp->disp); + nvif_disp_dtor(&disp->disp); nouveau_drm(dev)->display = NULL; kfree(disp); diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 270ba56f2756..54aa7c3fa42d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -2,6 +2,7 @@ #ifndef __NOUVEAU_DISPLAY_H__ #define __NOUVEAU_DISPLAY_H__ #include "nouveau_drv.h" +#include <nvif/disp.h> struct nouveau_framebuffer { struct drm_framebuffer base; @@ -38,7 +39,7 @@ struct nouveau_display { int (*init)(struct drm_device *); void (*fini)(struct drm_device *); - struct nvif_object disp; + struct nvif_disp disp; struct drm_property *dithering_mode; struct drm_property *dithering_depth; diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index 10e84f6ca2b7..945afd34138e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c @@ -28,6 +28,8 @@ #include "nouveau_dma.h" #include "nouveau_vmm.h" +#include <nvif/user.h> + void OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords) { @@ -80,18 +82,11 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout) } void -nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, - int delta, int length) +nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length) { - struct nouveau_cli *cli = (void *)chan->user.client; + struct nvif_user *user = &chan->drm->client.device.user; struct nouveau_bo *pb = chan->push.buffer; - struct nouveau_vma *vma; int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; - u64 offset; - - vma = nouveau_vma_find(bo, &cli->vmm); - BUG_ON(!vma); - offset = vma->addr + delta; BUG_ON(chan->dma.ib_free < 1); @@ -105,6 +100,8 @@ nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, nouveau_bo_rd32(pb, 0); nvif_wr32(&chan->user, 0x8c, chan->dma.ib_put); + if (user->func && user->func->doorbell) + user->func->doorbell(user, chan->chid); chan->dma.ib_free--; } diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index 74e10b14a7da..fc5e3f41282d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -31,8 +31,7 @@ #include "nouveau_chan.h" int nouveau_dma_wait(struct nouveau_channel *, int slots, int size); -void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *, - int delta, int length); +void nv50_dma_push(struct nouveau_channel *, u64 addr, int length); /* * There's a hw race condition where you can't jump to your PUT offset, @@ -55,7 +54,6 @@ enum { NvSub2D = 3, /* DO NOT CHANGE - hardcoded for kepler gr fifo */ NvSubCopy = 4, /* DO NOT CHANGE - hardcoded for kepler gr fifo */ - FermiSw = 5, /* DO NOT CHANGE (well.. 6/7 will work...) */ }; /* Object handles - for stuff that's doesn't use handle == oclass. */ @@ -151,7 +149,7 @@ FIRE_RING(struct nouveau_channel *chan) chan->accel_done = true; if (chan->dma.ib_max) { - nv50_dma_push(chan, chan->push.buffer, chan->dma.put << 2, + nv50_dma_push(chan, chan->push.addr + (chan->dma.put << 2), (chan->dma.cur - chan->dma.put) << 2); } else { WRITE_PUT(chan->dma.cur); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index bbbf353682e1..775443c9af94 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -38,6 +38,8 @@ #include <core/tegra.h> #include <nvif/driver.h> +#include <nvif/fifo.h> +#include <nvif/user.h> #include <nvif/class.h> #include <nvif/cl0002.h> @@ -112,24 +114,22 @@ nouveau_name(struct drm_device *dev) } static inline bool -nouveau_cli_work_ready(struct dma_fence *fence, bool wait) +nouveau_cli_work_ready(struct dma_fence *fence) { - if (!dma_fence_is_signaled(fence)) { - if (!wait) - return false; - WARN_ON(dma_fence_wait_timeout(fence, false, 2 * HZ) <= 0); - } + if (!dma_fence_is_signaled(fence)) + return false; dma_fence_put(fence); return true; } static void -nouveau_cli_work_flush(struct nouveau_cli *cli, bool wait) +nouveau_cli_work(struct work_struct *w) { + struct nouveau_cli *cli = container_of(w, typeof(*cli), work); struct nouveau_cli_work *work, *wtmp; mutex_lock(&cli->lock); list_for_each_entry_safe(work, wtmp, &cli->worker, head) { - if (!work->fence || nouveau_cli_work_ready(work->fence, wait)) { + if (!work->fence || nouveau_cli_work_ready(work->fence)) { list_del(&work->head); work->func(work); } @@ -158,16 +158,16 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, } static void -nouveau_cli_work(struct work_struct *w) -{ - struct nouveau_cli *cli = container_of(w, typeof(*cli), work); - nouveau_cli_work_flush(cli, false); -} - -static void nouveau_cli_fini(struct nouveau_cli *cli) { - nouveau_cli_work_flush(cli, true); + /* All our channels are dead now, which means all the fences they + * own are signalled, and all callback functions have been called. + * + * So, after flushing the workqueue, there should be nothing left. + */ + flush_work(&cli->work); + WARN_ON(!list_empty(&cli->worker)); + usif_client_fini(cli); nouveau_vmm_fini(&cli->vmm); nvif_mmu_fini(&cli->mmu); @@ -307,6 +307,16 @@ nouveau_accel_init(struct nouveau_drm *drm) if (nouveau_noaccel) return; + ret = nouveau_channels_init(drm); + if (ret) + return; + + if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) { + ret = nvif_user_init(device); + if (ret) + return; + } + /* initialise synchronisation routines */ /*XXX: this is crap, but the fence/channel stuff is a little * backwards in some places. this will be fixed. @@ -338,6 +348,7 @@ nouveau_accel_init(struct nouveau_drm *drm) case KEPLER_CHANNEL_GPFIFO_B: case MAXWELL_CHANNEL_GPFIFO_A: case PASCAL_CHANNEL_GPFIFO_A: + case VOLTA_CHANNEL_GPFIFO_A: ret = nvc0_fence_create(drm); break; default: @@ -354,13 +365,12 @@ nouveau_accel_init(struct nouveau_drm *drm) if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) { ret = nouveau_channel_new(drm, &drm->client.device, - NVA06F_V0_ENGINE_CE0 | - NVA06F_V0_ENGINE_CE1, - 0, &drm->cechan); + nvif_fifo_runlist_ce(device), 0, + &drm->cechan); if (ret) NV_ERROR(drm, "failed to create ce channel, %d\n", ret); - arg0 = NVA06F_V0_ENGINE_GR; + arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR); arg1 = 1; } else if (device->info.chipset >= 0xa3 && @@ -386,38 +396,36 @@ nouveau_accel_init(struct nouveau_drm *drm) return; } - ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW, - nouveau_abi16_swclass(drm), NULL, 0, &drm->nvsw); - if (ret == 0) { - ret = RING_SPACE(drm->channel, 2); + if (device->info.family < NV_DEVICE_INFO_V0_TESLA) { + ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW, + nouveau_abi16_swclass(drm), NULL, 0, + &drm->nvsw); if (ret == 0) { - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) { + ret = RING_SPACE(drm->channel, 2); + if (ret == 0) { BEGIN_NV04(drm->channel, NvSubSw, 0, 1); - OUT_RING (drm->channel, NVDRM_NVSW); - } else - if (device->info.family < NV_DEVICE_INFO_V0_KEPLER) { - BEGIN_NVC0(drm->channel, FermiSw, 0, 1); - OUT_RING (drm->channel, 0x001f0000); + OUT_RING (drm->channel, drm->nvsw.handle); + } + + ret = nvif_notify_init(&drm->nvsw, + nouveau_flip_complete, + false, NV04_NVSW_NTFY_UEVENT, + NULL, 0, 0, &drm->flip); + if (ret == 0) + ret = nvif_notify_get(&drm->flip); + if (ret) { + nouveau_accel_fini(drm); + return; } } - ret = nvif_notify_init(&drm->nvsw, nouveau_flip_complete, - false, NV04_NVSW_NTFY_UEVENT, - NULL, 0, 0, &drm->flip); - if (ret == 0) - ret = nvif_notify_get(&drm->flip); if (ret) { + NV_ERROR(drm, "failed to allocate sw class, %d\n", ret); nouveau_accel_fini(drm); return; } } - if (ret) { - NV_ERROR(drm, "failed to allocate software object, %d\n", ret); - nouveau_accel_fini(drm); - return; - } - if (device->info.family < NV_DEVICE_INFO_V0_FERMI) { ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0, false, NULL, &drm->notify); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 881b44b89a01..6e1acaec3400 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -170,6 +170,12 @@ struct nouveau_drm { /* synchronisation */ void *fence; + /* Global channel management. */ + struct { + int nr; + u64 context_base; + } chan; + /* context for accelerated drm-internal operations */ struct nouveau_channel *cechan; struct nouveau_channel *channel; diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h index e28d966946a1..3517f920bf89 100644 --- a/drivers/gpu/drm/nouveau/nouveau_encoder.h +++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h @@ -32,6 +32,7 @@ #include <drm/drm_encoder_slave.h> #include <drm/drm_dp_mst_helper.h> #include "dispnv04/disp.h" +struct nv50_head_atom; #define NV_DPMS_CLEARED 0x80 @@ -68,7 +69,7 @@ struct nouveau_encoder { void (*enc_save)(struct drm_encoder *encoder); void (*enc_restore)(struct drm_encoder *encoder); void (*update)(struct nouveau_encoder *, u8 head, - struct drm_display_mode *, u8 proto, u8 depth); + struct nv50_head_atom *, u8 proto, u8 depth); }; struct nouveau_encoder * diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 503fa94dc06d..412d49bc6e56 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -74,15 +74,14 @@ nouveau_fence_signal(struct nouveau_fence *fence) } static struct nouveau_fence * -nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) { - struct nouveau_fence_priv *priv = (void*)drm->fence; - +nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) +{ if (fence->ops != &nouveau_fence_ops_legacy && fence->ops != &nouveau_fence_ops_uevent) return NULL; - if (fence->context < priv->context_base || - fence->context >= priv->context_base + priv->contexts) + if (fence->context < drm->chan.context_base || + fence->context >= drm->chan.context_base + drm->chan.nr) return NULL; return from_fence(fence); @@ -176,7 +175,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); - fctx->context = priv->context_base + chan->chid; + fctx->context = chan->drm->chan.context_base + chan->chid; if (chan == chan->drm->cechan) strcpy(fctx->name, "copy engine channel"); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 5bd8d30d1657..b999e6058046 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -55,8 +55,6 @@ struct nouveau_fence_priv { int (*context_new)(struct nouveau_channel *); void (*context_del)(struct nouveau_channel *); - u32 contexts; - u64 context_base; bool uevent; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index e72a7e37eb0a..300daee74209 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -99,6 +99,7 @@ struct nouveau_gem_object_unmap { static void nouveau_gem_object_delete(struct nouveau_vma *vma) { + nouveau_fence_unref(&vma->fence); nouveau_vma_del(&vma); } @@ -114,25 +115,12 @@ nouveau_gem_object_delete_work(struct nouveau_cli_work *w) static void nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma) { - const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM; - struct reservation_object *resv = nvbo->bo.resv; - struct reservation_object_list *fobj; + struct dma_fence *fence = vma->fence ? &vma->fence->base : NULL; struct nouveau_gem_object_unmap *work; - struct dma_fence *fence = NULL; - - fobj = reservation_object_get_list(resv); list_del_init(&vma->head); - if (fobj && fobj->shared_count > 1) - ttm_bo_wait(&nvbo->bo, false, false); - else if (fobj && fobj->shared_count == 1) - fence = rcu_dereference_protected(fobj->shared[0], - reservation_object_held(resv)); - else - fence = reservation_object_get_excl(nvbo->bo.resv); - - if (!fence || !mapped) { + if (!fence) { nouveau_gem_object_delete(vma); return; } @@ -344,9 +332,20 @@ validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence, nvbo = list_entry(op->list.next, struct nouveau_bo, entry); b = &pbbo[nvbo->pbbo_index]; - if (likely(fence)) + if (likely(fence)) { + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); + struct nouveau_vma *vma; + nouveau_bo_fence(nvbo, fence, !!b->write_domains); + if (drm->client.vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { + vma = (void *)(unsigned long)b->user_priv; + nouveau_fence_unref(&vma->fence); + dma_fence_get(&fence->base); + vma->fence = fence; + } + } + if (unlikely(nvbo->validate_mapped)) { ttm_bo_kunmap(&nvbo->kmap); nvbo->validate_mapped = false; @@ -432,7 +431,20 @@ retry: } } - b->user_priv = (uint64_t)(unsigned long)nvbo; + if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) { + struct nouveau_vmm *vmm = &cli->vmm; + struct nouveau_vma *vma = nouveau_vma_find(nvbo, vmm); + if (!vma) { + NV_PRINTK(err, cli, "vma not found!\n"); + ret = -EINVAL; + break; + } + + b->user_priv = (uint64_t)(unsigned long)vma; + } else { + b->user_priv = (uint64_t)(unsigned long)nvbo; + } + nvbo->reserved_by = file_priv; nvbo->pbbo_index = i; if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) && @@ -763,10 +775,10 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, } for (i = 0; i < req->nr_push; i++) { - struct nouveau_bo *nvbo = (void *)(unsigned long) + struct nouveau_vma *vma = (void *)(unsigned long) bo[push[i].bo_index].user_priv; - nv50_dma_push(chan, nvbo, push[i].offset, + nv50_dma_push(chan, vma->addr + push[i].offset, push[i].length); } } else diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c index 7c965648df80..44178b4c3599 100644 --- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c +++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c @@ -327,7 +327,7 @@ nouveau_temp_is_visible(const void *data, u32 attr, int channel) struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data); struct nvkm_therm *therm = nvxx_therm(&drm->client.device); - if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0) + if (!therm || !therm->attr_get || nvkm_therm_temp_get(therm) < 0) return 0; switch (attr) { @@ -351,8 +351,8 @@ nouveau_pwm_is_visible(const void *data, u32 attr, int channel) struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data); struct nvkm_therm *therm = nvxx_therm(&drm->client.device); - if (therm && therm->attr_get && therm->fan_get && - therm->fan_get(therm) < 0) + if (!therm || !therm->attr_get || !therm->fan_get || + therm->fan_get(therm) < 0) return 0; switch (attr) { @@ -707,13 +707,20 @@ nouveau_hwmon_init(struct drm_device *dev) { #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE)) struct nouveau_drm *drm = nouveau_drm(dev); + struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device); struct nvkm_therm *therm = nvxx_therm(&drm->client.device); + struct nvkm_volt *volt = nvxx_volt(&drm->client.device); const struct attribute_group *special_groups[N_ATTR_GROUPS]; struct nouveau_hwmon *hwmon; struct device *hwmon_dev; int ret = 0; int i = 0; + if (!iccsense && !therm && !volt) { + NV_DEBUG(drm, "Skipping hwmon registration\n"); + return 0; + } + hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return -ENOMEM; @@ -749,6 +756,9 @@ nouveau_hwmon_fini(struct drm_device *dev) #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE)) struct nouveau_hwmon *hwmon = nouveau_hwmon(dev); + if (!hwmon) + return; + if (hwmon->hwmon) hwmon_device_unregister(hwmon->hwmon); diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index dff51a0ee028..8c093ca4222e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -63,7 +63,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_mem *mem; int ret; @@ -103,7 +103,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_mem *mem; int ret; @@ -131,7 +131,7 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_mem *mem; int ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c index f5371d96b003..2032c3e4f6e5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c @@ -92,6 +92,7 @@ nouveau_vma_new(struct nouveau_bo *nvbo, struct nouveau_vmm *vmm, vma->refs = 1; vma->addr = ~0ULL; vma->mem = NULL; + vma->fence = NULL; list_add_tail(&vma->head, &nvbo->vma_list); if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM && diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h index 5c31f43678d3..7e3b118cf7c4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h @@ -11,6 +11,8 @@ struct nouveau_vma { u64 addr; struct nouveau_mem *mem; + + struct nouveau_fence *fence; }; struct nouveau_vma *nouveau_vma_find(struct nouveau_bo *, struct nouveau_vmm *); diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c index fa8f2375c398..c41e82be4893 100644 --- a/drivers/gpu/drm/nouveau/nv04_fence.c +++ b/drivers/gpu/drm/nouveau/nv04_fence.c @@ -109,7 +109,5 @@ nv04_fence_create(struct nouveau_drm *drm) priv->base.dtor = nv04_fence_destroy; priv->base.context_new = nv04_fence_context_new; priv->base.context_del = nv04_fence_context_del; - priv->base.contexts = 15; - priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c index 2998bde29211..4476b712dc84 100644 --- a/drivers/gpu/drm/nouveau/nv10_fence.c +++ b/drivers/gpu/drm/nouveau/nv10_fence.c @@ -103,8 +103,6 @@ nv10_fence_create(struct nouveau_drm *drm) priv->base.dtor = nv10_fence_destroy; priv->base.context_new = nv10_fence_context_new; priv->base.context_del = nv10_fence_context_del; - priv->base.contexts = 31; - priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); spin_lock_init(&priv->lock); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c index 6477b7069e14..5d613d43b84d 100644 --- a/drivers/gpu/drm/nouveau/nv17_fence.c +++ b/drivers/gpu/drm/nouveau/nv17_fence.c @@ -125,8 +125,6 @@ nv17_fence_create(struct nouveau_drm *drm) priv->base.resume = nv17_fence_resume; priv->base.context_new = nv17_fence_context_new; priv->base.context_del = nv10_fence_context_del; - priv->base.contexts = 31; - priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); spin_lock_init(&priv->lock); ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM, diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c deleted file mode 100644 index 8bd739cfd00d..000000000000 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ /dev/null @@ -1,4559 +0,0 @@ -/* - * Copyright 2011 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs - */ - -#include <linux/dma-mapping.h> -#include <linux/hdmi.h> - -#include <drm/drmP.h> -#include <drm/drm_atomic.h> -#include <drm/drm_atomic_helper.h> -#include <drm/drm_crtc_helper.h> -#include <drm/drm_dp_helper.h> -#include <drm/drm_fb_helper.h> -#include <drm/drm_plane_helper.h> -#include <drm/drm_edid.h> - -#include <nvif/class.h> -#include <nvif/cl0002.h> -#include <nvif/cl5070.h> -#include <nvif/cl507a.h> -#include <nvif/cl507b.h> -#include <nvif/cl507c.h> -#include <nvif/cl507d.h> -#include <nvif/cl507e.h> -#include <nvif/event.h> - -#include "nouveau_drv.h" -#include "nouveau_dma.h" -#include "nouveau_gem.h" -#include "nouveau_connector.h" -#include "nouveau_encoder.h" -#include "nouveau_crtc.h" -#include "nouveau_fence.h" -#include "nouveau_fbcon.h" -#include "nv50_display.h" - -#define EVO_DMA_NR 9 - -#define EVO_MASTER (0x00) -#define EVO_FLIP(c) (0x01 + (c)) -#define EVO_OVLY(c) (0x05 + (c)) -#define EVO_OIMM(c) (0x09 + (c)) -#define EVO_CURS(c) (0x0d + (c)) - -/* offsets in shared sync bo of various structures */ -#define EVO_SYNC(c, o) ((c) * 0x0100 + (o)) -#define EVO_MAST_NTFY EVO_SYNC( 0, 0x00) -#define EVO_FLIP_SEM0(c) EVO_SYNC((c) + 1, 0x00) -#define EVO_FLIP_SEM1(c) EVO_SYNC((c) + 1, 0x10) -#define EVO_FLIP_NTFY0(c) EVO_SYNC((c) + 1, 0x20) -#define EVO_FLIP_NTFY1(c) EVO_SYNC((c) + 1, 0x30) - -/****************************************************************************** - * Atomic state - *****************************************************************************/ -#define nv50_atom(p) container_of((p), struct nv50_atom, state) - -struct nv50_atom { - struct drm_atomic_state state; - - struct list_head outp; - bool lock_core; - bool flush_disable; -}; - -struct nv50_outp_atom { - struct list_head head; - - struct drm_encoder *encoder; - bool flush_disable; - - union { - struct { - bool ctrl:1; - }; - u8 mask; - } clr; - - union { - struct { - bool ctrl:1; - }; - u8 mask; - } set; -}; - -#define nv50_head_atom(p) container_of((p), struct nv50_head_atom, state) - -struct nv50_head_atom { - struct drm_crtc_state state; - - struct { - u16 iW; - u16 iH; - u16 oW; - u16 oH; - } view; - - struct nv50_head_mode { - bool interlace; - u32 clock; - struct { - u16 active; - u16 synce; - u16 blanke; - u16 blanks; - } h; - struct { - u32 active; - u16 synce; - u16 blanke; - u16 blanks; - u16 blank2s; - u16 blank2e; - u16 blankus; - } v; - } mode; - - struct { - bool visible; - u32 handle; - u64 offset:40; - u8 mode:4; - } lut; - - struct { - bool visible; - u32 handle; - u64 offset:40; - u8 format; - u8 kind:7; - u8 layout:1; - u8 block:4; - u32 pitch:20; - u16 x; - u16 y; - u16 w; - u16 h; - } core; - - struct { - bool visible; - u32 handle; - u64 offset:40; - u8 layout:1; - u8 format:1; - } curs; - - struct { - u8 depth; - u8 cpp; - u16 x; - u16 y; - u16 w; - u16 h; - } base; - - struct { - u8 cpp; - } ovly; - - struct { - bool enable:1; - u8 bits:2; - u8 mode:4; - } dither; - - struct { - struct { - u16 cos:12; - u16 sin:12; - } sat; - } procamp; - - union { - struct { - bool ilut:1; - bool core:1; - bool curs:1; - }; - u8 mask; - } clr; - - union { - struct { - bool ilut:1; - bool core:1; - bool curs:1; - bool view:1; - bool mode:1; - bool base:1; - bool ovly:1; - bool dither:1; - bool procamp:1; - }; - u16 mask; - } set; -}; - -static inline struct nv50_head_atom * -nv50_head_atom_get(struct drm_atomic_state *state, struct drm_crtc *crtc) -{ - struct drm_crtc_state *statec = drm_atomic_get_crtc_state(state, crtc); - if (IS_ERR(statec)) - return (void *)statec; - return nv50_head_atom(statec); -} - -#define nv50_wndw_atom(p) container_of((p), struct nv50_wndw_atom, state) - -struct nv50_wndw_atom { - struct drm_plane_state state; - u8 interval; - - struct { - u32 handle; - u16 offset:12; - bool awaken:1; - } ntfy; - - struct { - u32 handle; - u16 offset:12; - u32 acquire; - u32 release; - } sema; - - struct { - u8 enable:2; - } lut; - - struct { - u8 mode:2; - u8 interval:4; - - u8 format; - u8 kind:7; - u8 layout:1; - u8 block:4; - u32 pitch:20; - u16 w; - u16 h; - - u32 handle; - u64 offset; - } image; - - struct { - u16 x; - u16 y; - } point; - - union { - struct { - bool ntfy:1; - bool sema:1; - bool image:1; - }; - u8 mask; - } clr; - - union { - struct { - bool ntfy:1; - bool sema:1; - bool image:1; - bool lut:1; - bool point:1; - }; - u8 mask; - } set; -}; - -/****************************************************************************** - * EVO channel - *****************************************************************************/ - -struct nv50_chan { - struct nvif_object user; - struct nvif_device *device; -}; - -static int -nv50_chan_create(struct nvif_device *device, struct nvif_object *disp, - const s32 *oclass, u8 head, void *data, u32 size, - struct nv50_chan *chan) -{ - struct nvif_sclass *sclass; - int ret, i, n; - - chan->device = device; - - ret = n = nvif_object_sclass_get(disp, &sclass); - if (ret < 0) - return ret; - - while (oclass[0]) { - for (i = 0; i < n; i++) { - if (sclass[i].oclass == oclass[0]) { - ret = nvif_object_init(disp, 0, oclass[0], - data, size, &chan->user); - if (ret == 0) - nvif_object_map(&chan->user, NULL, 0); - nvif_object_sclass_put(&sclass); - return ret; - } - } - oclass++; - } - - nvif_object_sclass_put(&sclass); - return -ENOSYS; -} - -static void -nv50_chan_destroy(struct nv50_chan *chan) -{ - nvif_object_fini(&chan->user); -} - -/****************************************************************************** - * PIO EVO channel - *****************************************************************************/ - -struct nv50_pioc { - struct nv50_chan base; -}; - -static void -nv50_pioc_destroy(struct nv50_pioc *pioc) -{ - nv50_chan_destroy(&pioc->base); -} - -static int -nv50_pioc_create(struct nvif_device *device, struct nvif_object *disp, - const s32 *oclass, u8 head, void *data, u32 size, - struct nv50_pioc *pioc) -{ - return nv50_chan_create(device, disp, oclass, head, data, size, - &pioc->base); -} - -/****************************************************************************** - * Overlay Immediate - *****************************************************************************/ - -struct nv50_oimm { - struct nv50_pioc base; -}; - -static int -nv50_oimm_create(struct nvif_device *device, struct nvif_object *disp, - int head, struct nv50_oimm *oimm) -{ - struct nv50_disp_cursor_v0 args = { - .head = head, - }; - static const s32 oclass[] = { - GK104_DISP_OVERLAY, - GF110_DISP_OVERLAY, - GT214_DISP_OVERLAY, - G82_DISP_OVERLAY, - NV50_DISP_OVERLAY, - 0 - }; - - return nv50_pioc_create(device, disp, oclass, head, &args, sizeof(args), - &oimm->base); -} - -/****************************************************************************** - * DMA EVO channel - *****************************************************************************/ - -struct nv50_dmac_ctxdma { - struct list_head head; - struct nvif_object object; -}; - -struct nv50_dmac { - struct nv50_chan base; - dma_addr_t handle; - u32 *ptr; - - struct nvif_object sync; - struct nvif_object vram; - struct list_head ctxdma; - - /* Protects against concurrent pushbuf access to this channel, lock is - * grabbed by evo_wait (if the pushbuf reservation is successful) and - * dropped again by evo_kick. */ - struct mutex lock; -}; - -static void -nv50_dmac_ctxdma_del(struct nv50_dmac_ctxdma *ctxdma) -{ - nvif_object_fini(&ctxdma->object); - list_del(&ctxdma->head); - kfree(ctxdma); -} - -static struct nv50_dmac_ctxdma * -nv50_dmac_ctxdma_new(struct nv50_dmac *dmac, struct nouveau_framebuffer *fb) -{ - struct nouveau_drm *drm = nouveau_drm(fb->base.dev); - struct nv50_dmac_ctxdma *ctxdma; - const u8 kind = fb->nvbo->kind; - const u32 handle = 0xfb000000 | kind; - struct { - struct nv_dma_v0 base; - union { - struct nv50_dma_v0 nv50; - struct gf100_dma_v0 gf100; - struct gf119_dma_v0 gf119; - }; - } args = {}; - u32 argc = sizeof(args.base); - int ret; - - list_for_each_entry(ctxdma, &dmac->ctxdma, head) { - if (ctxdma->object.handle == handle) - return ctxdma; - } - - if (!(ctxdma = kzalloc(sizeof(*ctxdma), GFP_KERNEL))) - return ERR_PTR(-ENOMEM); - list_add(&ctxdma->head, &dmac->ctxdma); - - args.base.target = NV_DMA_V0_TARGET_VRAM; - args.base.access = NV_DMA_V0_ACCESS_RDWR; - args.base.start = 0; - args.base.limit = drm->client.device.info.ram_user - 1; - - if (drm->client.device.info.chipset < 0x80) { - args.nv50.part = NV50_DMA_V0_PART_256; - argc += sizeof(args.nv50); - } else - if (drm->client.device.info.chipset < 0xc0) { - args.nv50.part = NV50_DMA_V0_PART_256; - args.nv50.kind = kind; - argc += sizeof(args.nv50); - } else - if (drm->client.device.info.chipset < 0xd0) { - args.gf100.kind = kind; - argc += sizeof(args.gf100); - } else { - args.gf119.page = GF119_DMA_V0_PAGE_LP; - args.gf119.kind = kind; - argc += sizeof(args.gf119); - } - - ret = nvif_object_init(&dmac->base.user, handle, NV_DMA_IN_MEMORY, - &args, argc, &ctxdma->object); - if (ret) { - nv50_dmac_ctxdma_del(ctxdma); - return ERR_PTR(ret); - } - - return ctxdma; -} - -static void -nv50_dmac_destroy(struct nv50_dmac *dmac, struct nvif_object *disp) -{ - struct nvif_device *device = dmac->base.device; - struct nv50_dmac_ctxdma *ctxdma, *ctxtmp; - - list_for_each_entry_safe(ctxdma, ctxtmp, &dmac->ctxdma, head) { - nv50_dmac_ctxdma_del(ctxdma); - } - - nvif_object_fini(&dmac->vram); - nvif_object_fini(&dmac->sync); - - nv50_chan_destroy(&dmac->base); - - if (dmac->ptr) { - struct device *dev = nvxx_device(device)->dev; - dma_free_coherent(dev, PAGE_SIZE, dmac->ptr, dmac->handle); - } -} - -static int -nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, - const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf, - struct nv50_dmac *dmac) -{ - struct nv50_disp_core_channel_dma_v0 *args = data; - struct nvif_object pushbuf; - int ret; - - mutex_init(&dmac->lock); - INIT_LIST_HEAD(&dmac->ctxdma); - - dmac->ptr = dma_alloc_coherent(nvxx_device(device)->dev, PAGE_SIZE, - &dmac->handle, GFP_KERNEL); - if (!dmac->ptr) - return -ENOMEM; - - ret = nvif_object_init(&device->object, 0, NV_DMA_FROM_MEMORY, - &(struct nv_dma_v0) { - .target = NV_DMA_V0_TARGET_PCI_US, - .access = NV_DMA_V0_ACCESS_RD, - .start = dmac->handle + 0x0000, - .limit = dmac->handle + 0x0fff, - }, sizeof(struct nv_dma_v0), &pushbuf); - if (ret) - return ret; - - args->pushbuf = nvif_handle(&pushbuf); - - ret = nv50_chan_create(device, disp, oclass, head, data, size, - &dmac->base); - nvif_object_fini(&pushbuf); - if (ret) - return ret; - - ret = nvif_object_init(&dmac->base.user, 0xf0000000, NV_DMA_IN_MEMORY, - &(struct nv_dma_v0) { - .target = NV_DMA_V0_TARGET_VRAM, - .access = NV_DMA_V0_ACCESS_RDWR, - .start = syncbuf + 0x0000, - .limit = syncbuf + 0x0fff, - }, sizeof(struct nv_dma_v0), - &dmac->sync); - if (ret) - return ret; - - ret = nvif_object_init(&dmac->base.user, 0xf0000001, NV_DMA_IN_MEMORY, - &(struct nv_dma_v0) { - .target = NV_DMA_V0_TARGET_VRAM, - .access = NV_DMA_V0_ACCESS_RDWR, - .start = 0, - .limit = device->info.ram_user - 1, - }, sizeof(struct nv_dma_v0), - &dmac->vram); - if (ret) - return ret; - - return ret; -} - -/****************************************************************************** - * Core - *****************************************************************************/ - -struct nv50_mast { - struct nv50_dmac base; -}; - -static int -nv50_core_create(struct nvif_device *device, struct nvif_object *disp, - u64 syncbuf, struct nv50_mast *core) -{ - struct nv50_disp_core_channel_dma_v0 args = { - .pushbuf = 0xb0007d00, - }; - static const s32 oclass[] = { - GP102_DISP_CORE_CHANNEL_DMA, - GP100_DISP_CORE_CHANNEL_DMA, - GM200_DISP_CORE_CHANNEL_DMA, - GM107_DISP_CORE_CHANNEL_DMA, - GK110_DISP_CORE_CHANNEL_DMA, - GK104_DISP_CORE_CHANNEL_DMA, - GF110_DISP_CORE_CHANNEL_DMA, - GT214_DISP_CORE_CHANNEL_DMA, - GT206_DISP_CORE_CHANNEL_DMA, - GT200_DISP_CORE_CHANNEL_DMA, - G82_DISP_CORE_CHANNEL_DMA, - NV50_DISP_CORE_CHANNEL_DMA, - 0 - }; - - return nv50_dmac_create(device, disp, oclass, 0, &args, sizeof(args), - syncbuf, &core->base); -} - -/****************************************************************************** - * Base - *****************************************************************************/ - -struct nv50_sync { - struct nv50_dmac base; - u32 addr; - u32 data; -}; - -static int -nv50_base_create(struct nvif_device *device, struct nvif_object *disp, - int head, u64 syncbuf, struct nv50_sync *base) -{ - struct nv50_disp_base_channel_dma_v0 args = { - .pushbuf = 0xb0007c00 | head, - .head = head, - }; - static const s32 oclass[] = { - GK110_DISP_BASE_CHANNEL_DMA, - GK104_DISP_BASE_CHANNEL_DMA, - GF110_DISP_BASE_CHANNEL_DMA, - GT214_DISP_BASE_CHANNEL_DMA, - GT200_DISP_BASE_CHANNEL_DMA, - G82_DISP_BASE_CHANNEL_DMA, - NV50_DISP_BASE_CHANNEL_DMA, - 0 - }; - - return nv50_dmac_create(device, disp, oclass, head, &args, sizeof(args), - syncbuf, &base->base); -} - -/****************************************************************************** - * Overlay - *****************************************************************************/ - -struct nv50_ovly { - struct nv50_dmac base; -}; - -static int -nv50_ovly_create(struct nvif_device *device, struct nvif_object *disp, - int head, u64 syncbuf, struct nv50_ovly *ovly) -{ - struct nv50_disp_overlay_channel_dma_v0 args = { - .pushbuf = 0xb0007e00 | head, - .head = head, - }; - static const s32 oclass[] = { - GK104_DISP_OVERLAY_CONTROL_DMA, - GF110_DISP_OVERLAY_CONTROL_DMA, - GT214_DISP_OVERLAY_CHANNEL_DMA, - GT200_DISP_OVERLAY_CHANNEL_DMA, - G82_DISP_OVERLAY_CHANNEL_DMA, - NV50_DISP_OVERLAY_CHANNEL_DMA, - 0 - }; - - return nv50_dmac_create(device, disp, oclass, head, &args, sizeof(args), - syncbuf, &ovly->base); -} - -struct nv50_head { - struct nouveau_crtc base; - struct { - struct nouveau_bo *nvbo[2]; - int next; - } lut; - struct nv50_ovly ovly; - struct nv50_oimm oimm; -}; - -#define nv50_head(c) ((struct nv50_head *)nouveau_crtc(c)) -#define nv50_ovly(c) (&nv50_head(c)->ovly) -#define nv50_oimm(c) (&nv50_head(c)->oimm) -#define nv50_chan(c) (&(c)->base.base) -#define nv50_vers(c) nv50_chan(c)->user.oclass - -struct nv50_disp { - struct nvif_object *disp; - struct nv50_mast mast; - - struct nouveau_bo *sync; - - struct mutex mutex; -}; - -static struct nv50_disp * -nv50_disp(struct drm_device *dev) -{ - return nouveau_display(dev)->priv; -} - -#define nv50_mast(d) (&nv50_disp(d)->mast) - -/****************************************************************************** - * EVO channel helpers - *****************************************************************************/ -static u32 * -evo_wait(void *evoc, int nr) -{ - struct nv50_dmac *dmac = evoc; - struct nvif_device *device = dmac->base.device; - u32 put = nvif_rd32(&dmac->base.user, 0x0000) / 4; - - mutex_lock(&dmac->lock); - if (put + nr >= (PAGE_SIZE / 4) - 8) { - dmac->ptr[put] = 0x20000000; - - nvif_wr32(&dmac->base.user, 0x0000, 0x00000000); - if (nvif_msec(device, 2000, - if (!nvif_rd32(&dmac->base.user, 0x0004)) - break; - ) < 0) { - mutex_unlock(&dmac->lock); - pr_err("nouveau: evo channel stalled\n"); - return NULL; - } - - put = 0; - } - - return dmac->ptr + put; -} - -static void -evo_kick(u32 *push, void *evoc) -{ - struct nv50_dmac *dmac = evoc; - nvif_wr32(&dmac->base.user, 0x0000, (push - dmac->ptr) << 2); - mutex_unlock(&dmac->lock); -} - -#define evo_mthd(p, m, s) do { \ - const u32 _m = (m), _s = (s); \ - if (drm_debug & DRM_UT_KMS) \ - pr_err("%04x %d %s\n", _m, _s, __func__); \ - *((p)++) = ((_s << 18) | _m); \ -} while(0) - -#define evo_data(p, d) do { \ - const u32 _d = (d); \ - if (drm_debug & DRM_UT_KMS) \ - pr_err("\t%08x\n", _d); \ - *((p)++) = _d; \ -} while(0) - -/****************************************************************************** - * Plane - *****************************************************************************/ -#define nv50_wndw(p) container_of((p), struct nv50_wndw, plane) - -struct nv50_wndw { - const struct nv50_wndw_func *func; - struct nv50_dmac *dmac; - - struct drm_plane plane; - - struct nvif_notify notify; - u16 ntfy; - u16 sema; - u32 data; -}; - -struct nv50_wndw_func { - void *(*dtor)(struct nv50_wndw *); - int (*acquire)(struct nv50_wndw *, struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh); - void (*release)(struct nv50_wndw *, struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh); - void (*prepare)(struct nv50_wndw *, struct nv50_head_atom *asyh, - struct nv50_wndw_atom *asyw); - - void (*sema_set)(struct nv50_wndw *, struct nv50_wndw_atom *); - void (*sema_clr)(struct nv50_wndw *); - void (*ntfy_set)(struct nv50_wndw *, struct nv50_wndw_atom *); - void (*ntfy_clr)(struct nv50_wndw *); - int (*ntfy_wait_begun)(struct nv50_wndw *, struct nv50_wndw_atom *); - void (*image_set)(struct nv50_wndw *, struct nv50_wndw_atom *); - void (*image_clr)(struct nv50_wndw *); - void (*lut)(struct nv50_wndw *, struct nv50_wndw_atom *); - void (*point)(struct nv50_wndw *, struct nv50_wndw_atom *); - - u32 (*update)(struct nv50_wndw *, u32 interlock); -}; - -static int -nv50_wndw_wait_armed(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) -{ - if (asyw->set.ntfy) - return wndw->func->ntfy_wait_begun(wndw, asyw); - return 0; -} - -static u32 -nv50_wndw_flush_clr(struct nv50_wndw *wndw, u32 interlock, bool flush, - struct nv50_wndw_atom *asyw) -{ - if (asyw->clr.sema && (!asyw->set.sema || flush)) - wndw->func->sema_clr(wndw); - if (asyw->clr.ntfy && (!asyw->set.ntfy || flush)) - wndw->func->ntfy_clr(wndw); - if (asyw->clr.image && (!asyw->set.image || flush)) - wndw->func->image_clr(wndw); - - return flush ? wndw->func->update(wndw, interlock) : 0; -} - -static u32 -nv50_wndw_flush_set(struct nv50_wndw *wndw, u32 interlock, - struct nv50_wndw_atom *asyw) -{ - if (interlock) { - asyw->image.mode = 0; - asyw->image.interval = 1; - } - - if (asyw->set.sema ) wndw->func->sema_set (wndw, asyw); - if (asyw->set.ntfy ) wndw->func->ntfy_set (wndw, asyw); - if (asyw->set.image) wndw->func->image_set(wndw, asyw); - if (asyw->set.lut ) wndw->func->lut (wndw, asyw); - if (asyw->set.point) wndw->func->point (wndw, asyw); - - return wndw->func->update(wndw, interlock); -} - -static void -nv50_wndw_atomic_check_release(struct nv50_wndw *wndw, - struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh) -{ - struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev); - NV_ATOMIC(drm, "%s release\n", wndw->plane.name); - wndw->func->release(wndw, asyw, asyh); - asyw->ntfy.handle = 0; - asyw->sema.handle = 0; -} - -static int -nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, - struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh) -{ - struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb); - struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev); - int ret; - - NV_ATOMIC(drm, "%s acquire\n", wndw->plane.name); - - asyw->image.w = fb->base.width; - asyw->image.h = fb->base.height; - asyw->image.kind = fb->nvbo->kind; - - if (asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) - asyw->interval = 0; - else - asyw->interval = 1; - - if (asyw->image.kind) { - asyw->image.layout = 0; - if (drm->client.device.info.chipset >= 0xc0) - asyw->image.block = fb->nvbo->mode >> 4; - else - asyw->image.block = fb->nvbo->mode; - asyw->image.pitch = (fb->base.pitches[0] / 4) << 4; - } else { - asyw->image.layout = 1; - asyw->image.block = 0; - asyw->image.pitch = fb->base.pitches[0]; - } - - ret = wndw->func->acquire(wndw, asyw, asyh); - if (ret) - return ret; - - if (asyw->set.image) { - if (!(asyw->image.mode = asyw->interval ? 0 : 1)) - asyw->image.interval = asyw->interval; - else - asyw->image.interval = 0; - } - - return 0; -} - -static int -nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) -{ - struct nouveau_drm *drm = nouveau_drm(plane->dev); - struct nv50_wndw *wndw = nv50_wndw(plane); - struct nv50_wndw_atom *armw = nv50_wndw_atom(wndw->plane.state); - struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); - struct nv50_head_atom *harm = NULL, *asyh = NULL; - bool varm = false, asyv = false, asym = false; - int ret; - - NV_ATOMIC(drm, "%s atomic_check\n", plane->name); - if (asyw->state.crtc) { - asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc); - if (IS_ERR(asyh)) - return PTR_ERR(asyh); - asym = drm_atomic_crtc_needs_modeset(&asyh->state); - asyv = asyh->state.active; - } - - if (armw->state.crtc) { - harm = nv50_head_atom_get(asyw->state.state, armw->state.crtc); - if (IS_ERR(harm)) - return PTR_ERR(harm); - varm = harm->state.crtc->state->active; - } - - if (asyv) { - asyw->point.x = asyw->state.crtc_x; - asyw->point.y = asyw->state.crtc_y; - if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point))) - asyw->set.point = true; - - ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh); - if (ret) - return ret; - } else - if (varm) { - nv50_wndw_atomic_check_release(wndw, asyw, harm); - } else { - return 0; - } - - if (!asyv || asym) { - asyw->clr.ntfy = armw->ntfy.handle != 0; - asyw->clr.sema = armw->sema.handle != 0; - if (wndw->func->image_clr) - asyw->clr.image = armw->image.handle != 0; - asyw->set.lut = wndw->func->lut && asyv; - } - - return 0; -} - -static void -nv50_wndw_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state) -{ - struct nouveau_framebuffer *fb = nouveau_framebuffer(old_state->fb); - struct nouveau_drm *drm = nouveau_drm(plane->dev); - - NV_ATOMIC(drm, "%s cleanup: %p\n", plane->name, old_state->fb); - if (!old_state->fb) - return; - - nouveau_bo_unpin(fb->nvbo); -} - -static int -nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) -{ - struct nouveau_framebuffer *fb = nouveau_framebuffer(state->fb); - struct nouveau_drm *drm = nouveau_drm(plane->dev); - struct nv50_wndw *wndw = nv50_wndw(plane); - struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); - struct nv50_head_atom *asyh; - struct nv50_dmac_ctxdma *ctxdma; - int ret; - - NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, state->fb); - if (!asyw->state.fb) - return 0; - - ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM, true); - if (ret) - return ret; - - ctxdma = nv50_dmac_ctxdma_new(wndw->dmac, fb); - if (IS_ERR(ctxdma)) { - nouveau_bo_unpin(fb->nvbo); - return PTR_ERR(ctxdma); - } - - asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv); - asyw->image.handle = ctxdma->object.handle; - asyw->image.offset = fb->nvbo->bo.offset; - - if (wndw->func->prepare) { - asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc); - if (IS_ERR(asyh)) - return PTR_ERR(asyh); - - wndw->func->prepare(wndw, asyh, asyw); - } - - return 0; -} - -static const struct drm_plane_helper_funcs -nv50_wndw_helper = { - .prepare_fb = nv50_wndw_prepare_fb, - .cleanup_fb = nv50_wndw_cleanup_fb, - .atomic_check = nv50_wndw_atomic_check, -}; - -static void -nv50_wndw_atomic_destroy_state(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); - __drm_atomic_helper_plane_destroy_state(&asyw->state); - kfree(asyw); -} - -static struct drm_plane_state * -nv50_wndw_atomic_duplicate_state(struct drm_plane *plane) -{ - struct nv50_wndw_atom *armw = nv50_wndw_atom(plane->state); - struct nv50_wndw_atom *asyw; - if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL))) - return NULL; - __drm_atomic_helper_plane_duplicate_state(plane, &asyw->state); - asyw->interval = 1; - asyw->sema = armw->sema; - asyw->ntfy = armw->ntfy; - asyw->image = armw->image; - asyw->point = armw->point; - asyw->lut = armw->lut; - asyw->clr.mask = 0; - asyw->set.mask = 0; - return &asyw->state; -} - -static void -nv50_wndw_reset(struct drm_plane *plane) -{ - struct nv50_wndw_atom *asyw; - - if (WARN_ON(!(asyw = kzalloc(sizeof(*asyw), GFP_KERNEL)))) - return; - - if (plane->state) - plane->funcs->atomic_destroy_state(plane, plane->state); - plane->state = &asyw->state; - plane->state->plane = plane; - plane->state->rotation = DRM_MODE_ROTATE_0; -} - -static void -nv50_wndw_destroy(struct drm_plane *plane) -{ - struct nv50_wndw *wndw = nv50_wndw(plane); - void *data; - nvif_notify_fini(&wndw->notify); - data = wndw->func->dtor(wndw); - drm_plane_cleanup(&wndw->plane); - kfree(data); -} - -static const struct drm_plane_funcs -nv50_wndw = { - .update_plane = drm_atomic_helper_update_plane, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = nv50_wndw_destroy, - .reset = nv50_wndw_reset, - .atomic_duplicate_state = nv50_wndw_atomic_duplicate_state, - .atomic_destroy_state = nv50_wndw_atomic_destroy_state, -}; - -static void -nv50_wndw_fini(struct nv50_wndw *wndw) -{ - nvif_notify_put(&wndw->notify); -} - -static void -nv50_wndw_init(struct nv50_wndw *wndw) -{ - nvif_notify_get(&wndw->notify); -} - -static int -nv50_wndw_ctor(const struct nv50_wndw_func *func, struct drm_device *dev, - enum drm_plane_type type, const char *name, int index, - struct nv50_dmac *dmac, const u32 *format, int nformat, - struct nv50_wndw *wndw) -{ - int ret; - - wndw->func = func; - wndw->dmac = dmac; - - ret = drm_universal_plane_init(dev, &wndw->plane, 0, &nv50_wndw, - format, nformat, NULL, - type, "%s-%d", name, index); - if (ret) - return ret; - - drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); - return 0; -} - -/****************************************************************************** - * Cursor plane - *****************************************************************************/ -#define nv50_curs(p) container_of((p), struct nv50_curs, wndw) - -struct nv50_curs { - struct nv50_wndw wndw; - struct nvif_object chan; -}; - -static u32 -nv50_curs_update(struct nv50_wndw *wndw, u32 interlock) -{ - struct nv50_curs *curs = nv50_curs(wndw); - nvif_wr32(&curs->chan, 0x0080, 0x00000000); - return 0; -} - -static void -nv50_curs_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) -{ - struct nv50_curs *curs = nv50_curs(wndw); - nvif_wr32(&curs->chan, 0x0084, (asyw->point.y << 16) | asyw->point.x); -} - -static void -nv50_curs_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh, - struct nv50_wndw_atom *asyw) -{ - u32 handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle; - u32 offset = asyw->image.offset; - if (asyh->curs.handle != handle || asyh->curs.offset != offset) { - asyh->curs.handle = handle; - asyh->curs.offset = offset; - asyh->set.curs = asyh->curs.visible; - } -} - -static void -nv50_curs_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh) -{ - asyh->curs.visible = false; -} - -static int -nv50_curs_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh) -{ - int ret; - - ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - true, true); - asyh->curs.visible = asyw->state.visible; - if (ret || !asyh->curs.visible) - return ret; - - switch (asyw->state.fb->width) { - case 32: asyh->curs.layout = 0; break; - case 64: asyh->curs.layout = 1; break; - default: - return -EINVAL; - } - - if (asyw->state.fb->width != asyw->state.fb->height) - return -EINVAL; - - switch (asyw->state.fb->format->format) { - case DRM_FORMAT_ARGB8888: asyh->curs.format = 1; break; - default: - WARN_ON(1); - return -EINVAL; - } - - return 0; -} - -static void * -nv50_curs_dtor(struct nv50_wndw *wndw) -{ - struct nv50_curs *curs = nv50_curs(wndw); - nvif_object_fini(&curs->chan); - return curs; -} - -static const u32 -nv50_curs_format[] = { - DRM_FORMAT_ARGB8888, -}; - -static const struct nv50_wndw_func -nv50_curs = { - .dtor = nv50_curs_dtor, - .acquire = nv50_curs_acquire, - .release = nv50_curs_release, - .prepare = nv50_curs_prepare, - .point = nv50_curs_point, - .update = nv50_curs_update, -}; - -static int -nv50_curs_new(struct nouveau_drm *drm, struct nv50_head *head, - struct nv50_curs **pcurs) -{ - static const struct nvif_mclass curses[] = { - { GK104_DISP_CURSOR, 0 }, - { GF110_DISP_CURSOR, 0 }, - { GT214_DISP_CURSOR, 0 }, - { G82_DISP_CURSOR, 0 }, - { NV50_DISP_CURSOR, 0 }, - {} - }; - struct nv50_disp_cursor_v0 args = { - .head = head->base.index, - }; - struct nv50_disp *disp = nv50_disp(drm->dev); - struct nv50_curs *curs; - int cid, ret; - - cid = nvif_mclass(disp->disp, curses); - if (cid < 0) { - NV_ERROR(drm, "No supported cursor immediate class\n"); - return cid; - } - - if (!(curs = *pcurs = kzalloc(sizeof(*curs), GFP_KERNEL))) - return -ENOMEM; - - ret = nv50_wndw_ctor(&nv50_curs, drm->dev, DRM_PLANE_TYPE_CURSOR, - "curs", head->base.index, &disp->mast.base, - nv50_curs_format, ARRAY_SIZE(nv50_curs_format), - &curs->wndw); - if (ret) { - kfree(curs); - return ret; - } - - ret = nvif_object_init(disp->disp, 0, curses[cid].oclass, &args, - sizeof(args), &curs->chan); - if (ret) { - NV_ERROR(drm, "curs%04x allocation failed: %d\n", - curses[cid].oclass, ret); - return ret; - } - - return 0; -} - -/****************************************************************************** - * Primary plane - *****************************************************************************/ -#define nv50_base(p) container_of((p), struct nv50_base, wndw) - -struct nv50_base { - struct nv50_wndw wndw; - struct nv50_sync chan; - int id; -}; - -static int -nv50_base_notify(struct nvif_notify *notify) -{ - return NVIF_NOTIFY_KEEP; -} - -static void -nv50_base_lut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) -{ - struct nv50_base *base = nv50_base(wndw); - u32 *push; - if ((push = evo_wait(&base->chan, 2))) { - evo_mthd(push, 0x00e0, 1); - evo_data(push, asyw->lut.enable << 30); - evo_kick(push, &base->chan); - } -} - -static void -nv50_base_image_clr(struct nv50_wndw *wndw) -{ - struct nv50_base *base = nv50_base(wndw); - u32 *push; - if ((push = evo_wait(&base->chan, 4))) { - evo_mthd(push, 0x0084, 1); - evo_data(push, 0x00000000); - evo_mthd(push, 0x00c0, 1); - evo_data(push, 0x00000000); - evo_kick(push, &base->chan); - } -} - -static void -nv50_base_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) -{ - struct nv50_base *base = nv50_base(wndw); - const s32 oclass = base->chan.base.base.user.oclass; - u32 *push; - if ((push = evo_wait(&base->chan, 10))) { - evo_mthd(push, 0x0084, 1); - evo_data(push, (asyw->image.mode << 8) | - (asyw->image.interval << 4)); - evo_mthd(push, 0x00c0, 1); - evo_data(push, asyw->image.handle); - if (oclass < G82_DISP_BASE_CHANNEL_DMA) { - evo_mthd(push, 0x0800, 5); - evo_data(push, asyw->image.offset >> 8); - evo_data(push, 0x00000000); - evo_data(push, (asyw->image.h << 16) | asyw->image.w); - evo_data(push, (asyw->image.layout << 20) | - asyw->image.pitch | - asyw->image.block); - evo_data(push, (asyw->image.kind << 16) | - (asyw->image.format << 8)); - } else - if (oclass < GF110_DISP_BASE_CHANNEL_DMA) { - evo_mthd(push, 0x0800, 5); - evo_data(push, asyw->image.offset >> 8); - evo_data(push, 0x00000000); - evo_data(push, (asyw->image.h << 16) | asyw->image.w); - evo_data(push, (asyw->image.layout << 20) | - asyw->image.pitch | - asyw->image.block); - evo_data(push, asyw->image.format << 8); - } else { - evo_mthd(push, 0x0400, 5); - evo_data(push, asyw->image.offset >> 8); - evo_data(push, 0x00000000); - evo_data(push, (asyw->image.h << 16) | asyw->image.w); - evo_data(push, (asyw->image.layout << 24) | - asyw->image.pitch | - asyw->image.block); - evo_data(push, asyw->image.format << 8); - } - evo_kick(push, &base->chan); - } -} - -static void -nv50_base_ntfy_clr(struct nv50_wndw *wndw) -{ - struct nv50_base *base = nv50_base(wndw); - u32 *push; - if ((push = evo_wait(&base->chan, 2))) { - evo_mthd(push, 0x00a4, 1); - evo_data(push, 0x00000000); - evo_kick(push, &base->chan); - } -} - -static void -nv50_base_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) -{ - struct nv50_base *base = nv50_base(wndw); - u32 *push; - if ((push = evo_wait(&base->chan, 3))) { - evo_mthd(push, 0x00a0, 2); - evo_data(push, (asyw->ntfy.awaken << 30) | asyw->ntfy.offset); - evo_data(push, asyw->ntfy.handle); - evo_kick(push, &base->chan); - } -} - -static void -nv50_base_sema_clr(struct nv50_wndw *wndw) -{ - struct nv50_base *base = nv50_base(wndw); - u32 *push; - if ((push = evo_wait(&base->chan, 2))) { - evo_mthd(push, 0x0094, 1); - evo_data(push, 0x00000000); - evo_kick(push, &base->chan); - } -} - -static void -nv50_base_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) -{ - struct nv50_base *base = nv50_base(wndw); - u32 *push; - if ((push = evo_wait(&base->chan, 5))) { - evo_mthd(push, 0x0088, 4); - evo_data(push, asyw->sema.offset); - evo_data(push, asyw->sema.acquire); - evo_data(push, asyw->sema.release); - evo_data(push, asyw->sema.handle); - evo_kick(push, &base->chan); - } -} - -static u32 -nv50_base_update(struct nv50_wndw *wndw, u32 interlock) -{ - struct nv50_base *base = nv50_base(wndw); - u32 *push; - - if (!(push = evo_wait(&base->chan, 2))) - return 0; - evo_mthd(push, 0x0080, 1); - evo_data(push, interlock); - evo_kick(push, &base->chan); - - if (base->chan.base.base.user.oclass < GF110_DISP_BASE_CHANNEL_DMA) - return interlock ? 2 << (base->id * 8) : 0; - return interlock ? 2 << (base->id * 4) : 0; -} - -static int -nv50_base_ntfy_wait_begun(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) -{ - struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev); - struct nv50_disp *disp = nv50_disp(wndw->plane.dev); - if (nvif_msec(&drm->client.device, 2000ULL, - u32 data = nouveau_bo_rd32(disp->sync, asyw->ntfy.offset / 4); - if ((data & 0xc0000000) == 0x40000000) - break; - usleep_range(1, 2); - ) < 0) - return -ETIMEDOUT; - return 0; -} - -static void -nv50_base_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh) -{ - asyh->base.cpp = 0; -} - -static int -nv50_base_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, - struct nv50_head_atom *asyh) -{ - const struct drm_framebuffer *fb = asyw->state.fb; - int ret; - - if (!fb->format->depth) - return -EINVAL; - - ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state, - DRM_PLANE_HELPER_NO_SCALING, - DRM_PLANE_HELPER_NO_SCALING, - false, true); - if (ret) - return ret; - - asyh->base.depth = fb->format->depth; - asyh->base.cpp = fb->format->cpp[0]; - asyh->base.x = asyw->state.src.x1 >> 16; - asyh->base.y = asyw->state.src.y1 >> 16; - asyh->base.w = asyw->state.fb->width; - asyh->base.h = asyw->state.fb->height; - - switch (fb->format->format) { - case DRM_FORMAT_C8 : asyw->image.format = 0x1e; break; - case DRM_FORMAT_RGB565 : asyw->image.format = 0xe8; break; - case DRM_FORMAT_XRGB1555 : - case DRM_FORMAT_ARGB1555 : asyw->image.format = 0xe9; break; - case DRM_FORMAT_XRGB8888 : - case DRM_FORMAT_ARGB8888 : asyw->image.format = 0xcf; break; - case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: asyw->image.format = 0xd1; break; - case DRM_FORMAT_XBGR8888 : - case DRM_FORMAT_ABGR8888 : asyw->image.format = 0xd5; break; - default: - WARN_ON(1); - return -EINVAL; - } - - asyw->lut.enable = 1; - asyw->set.image = true; - return 0; -} - -static void * -nv50_base_dtor(struct nv50_wndw *wndw) -{ - struct nv50_disp *disp = nv50_disp(wndw->plane.dev); - struct nv50_base *base = nv50_base(wndw); - nv50_dmac_destroy(&base->chan.base, disp->disp); - return base; -} - -static const u32 -nv50_base_format[] = { - DRM_FORMAT_C8, - DRM_FORMAT_RGB565, - DRM_FORMAT_XRGB1555, - DRM_FORMAT_ARGB1555, - DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ABGR2101010, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_ABGR8888, -}; - -static const struct nv50_wndw_func -nv50_base = { - .dtor = nv50_base_dtor, - .acquire = nv50_base_acquire, - .release = nv50_base_release, - .sema_set = nv50_base_sema_set, - .sema_clr = nv50_base_sema_clr, - .ntfy_set = nv50_base_ntfy_set, - .ntfy_clr = nv50_base_ntfy_clr, - .ntfy_wait_begun = nv50_base_ntfy_wait_begun, - .image_set = nv50_base_image_set, - .image_clr = nv50_base_image_clr, - .lut = nv50_base_lut, - .update = nv50_base_update, -}; - -static int -nv50_base_new(struct nouveau_drm *drm, struct nv50_head *head, - struct nv50_base **pbase) -{ - struct nv50_disp *disp = nv50_disp(drm->dev); - struct nv50_base *base; - int ret; - - if (!(base = *pbase = kzalloc(sizeof(*base), GFP_KERNEL))) - return -ENOMEM; - base->id = head->base.index; - base->wndw.ntfy = EVO_FLIP_NTFY0(base->id); - base->wndw.sema = EVO_FLIP_SEM0(base->id); - base->wndw.data = 0x00000000; - - ret = nv50_wndw_ctor(&nv50_base, drm->dev, DRM_PLANE_TYPE_PRIMARY, - "base", base->id, &base->chan.base, - nv50_base_format, ARRAY_SIZE(nv50_base_format), - &base->wndw); - if (ret) { - kfree(base); - return ret; - } - - ret = nv50_base_create(&drm->client.device, disp->disp, base->id, - disp->sync->bo.offset, &base->chan); - if (ret) - return ret; - - return nvif_notify_init(&base->chan.base.base.user, nv50_base_notify, - false, - NV50_DISP_BASE_CHANNEL_DMA_V0_NTFY_UEVENT, - &(struct nvif_notify_uevent_req) {}, - sizeof(struct nvif_notify_uevent_req), - sizeof(struct nvif_notify_uevent_rep), - &base->wndw.notify); -} - -/****************************************************************************** - * Head - *****************************************************************************/ -static void -nv50_head_procamp(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 2))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) - evo_mthd(push, 0x08a8 + (head->base.index * 0x400), 1); - else - evo_mthd(push, 0x0498 + (head->base.index * 0x300), 1); - evo_data(push, (asyh->procamp.sat.sin << 20) | - (asyh->procamp.sat.cos << 8)); - evo_kick(push, core); - } -} - -static void -nv50_head_dither(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 2))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) - evo_mthd(push, 0x08a0 + (head->base.index * 0x0400), 1); - else - if (core->base.user.oclass < GK104_DISP_CORE_CHANNEL_DMA) - evo_mthd(push, 0x0490 + (head->base.index * 0x0300), 1); - else - evo_mthd(push, 0x04a0 + (head->base.index * 0x0300), 1); - evo_data(push, (asyh->dither.mode << 3) | - (asyh->dither.bits << 1) | - asyh->dither.enable); - evo_kick(push, core); - } -} - -static void -nv50_head_ovly(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 bounds = 0; - u32 *push; - - if (asyh->base.cpp) { - switch (asyh->base.cpp) { - case 8: bounds |= 0x00000500; break; - case 4: bounds |= 0x00000300; break; - case 2: bounds |= 0x00000100; break; - default: - WARN_ON(1); - break; - } - bounds |= 0x00000001; - } - - if ((push = evo_wait(core, 2))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) - evo_mthd(push, 0x0904 + head->base.index * 0x400, 1); - else - evo_mthd(push, 0x04d4 + head->base.index * 0x300, 1); - evo_data(push, bounds); - evo_kick(push, core); - } -} - -static void -nv50_head_base(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 bounds = 0; - u32 *push; - - if (asyh->base.cpp) { - switch (asyh->base.cpp) { - case 8: bounds |= 0x00000500; break; - case 4: bounds |= 0x00000300; break; - case 2: bounds |= 0x00000100; break; - case 1: bounds |= 0x00000000; break; - default: - WARN_ON(1); - break; - } - bounds |= 0x00000001; - } - - if ((push = evo_wait(core, 2))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) - evo_mthd(push, 0x0900 + head->base.index * 0x400, 1); - else - evo_mthd(push, 0x04d0 + head->base.index * 0x300, 1); - evo_data(push, bounds); - evo_kick(push, core); - } -} - -static void -nv50_head_curs_clr(struct nv50_head *head) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 4))) { - if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0880 + head->base.index * 0x400, 1); - evo_data(push, 0x05000000); - } else - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0880 + head->base.index * 0x400, 1); - evo_data(push, 0x05000000); - evo_mthd(push, 0x089c + head->base.index * 0x400, 1); - evo_data(push, 0x00000000); - } else { - evo_mthd(push, 0x0480 + head->base.index * 0x300, 1); - evo_data(push, 0x05000000); - evo_mthd(push, 0x048c + head->base.index * 0x300, 1); - evo_data(push, 0x00000000); - } - evo_kick(push, core); - } -} - -static void -nv50_head_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 5))) { - if (core->base.user.oclass < G82_DISP_BASE_CHANNEL_DMA) { - evo_mthd(push, 0x0880 + head->base.index * 0x400, 2); - evo_data(push, 0x80000000 | (asyh->curs.layout << 26) | - (asyh->curs.format << 24)); - evo_data(push, asyh->curs.offset >> 8); - } else - if (core->base.user.oclass < GF110_DISP_BASE_CHANNEL_DMA) { - evo_mthd(push, 0x0880 + head->base.index * 0x400, 2); - evo_data(push, 0x80000000 | (asyh->curs.layout << 26) | - (asyh->curs.format << 24)); - evo_data(push, asyh->curs.offset >> 8); - evo_mthd(push, 0x089c + head->base.index * 0x400, 1); - evo_data(push, asyh->curs.handle); - } else { - evo_mthd(push, 0x0480 + head->base.index * 0x300, 2); - evo_data(push, 0x80000000 | (asyh->curs.layout << 26) | - (asyh->curs.format << 24)); - evo_data(push, asyh->curs.offset >> 8); - evo_mthd(push, 0x048c + head->base.index * 0x300, 1); - evo_data(push, asyh->curs.handle); - } - evo_kick(push, core); - } -} - -static void -nv50_head_core_clr(struct nv50_head *head) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 2))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) - evo_mthd(push, 0x0874 + head->base.index * 0x400, 1); - else - evo_mthd(push, 0x0474 + head->base.index * 0x300, 1); - evo_data(push, 0x00000000); - evo_kick(push, core); - } -} - -static void -nv50_head_core_set(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 9))) { - if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0860 + head->base.index * 0x400, 1); - evo_data(push, asyh->core.offset >> 8); - evo_mthd(push, 0x0868 + head->base.index * 0x400, 4); - evo_data(push, (asyh->core.h << 16) | asyh->core.w); - evo_data(push, asyh->core.layout << 20 | - (asyh->core.pitch >> 8) << 8 | - asyh->core.block); - evo_data(push, asyh->core.kind << 16 | - asyh->core.format << 8); - evo_data(push, asyh->core.handle); - evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1); - evo_data(push, (asyh->core.y << 16) | asyh->core.x); - /* EVO will complain with INVALID_STATE if we have an - * active cursor and (re)specify HeadSetContextDmaIso - * without also updating HeadSetOffsetCursor. - */ - asyh->set.curs = asyh->curs.visible; - } else - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0860 + head->base.index * 0x400, 1); - evo_data(push, asyh->core.offset >> 8); - evo_mthd(push, 0x0868 + head->base.index * 0x400, 4); - evo_data(push, (asyh->core.h << 16) | asyh->core.w); - evo_data(push, asyh->core.layout << 20 | - (asyh->core.pitch >> 8) << 8 | - asyh->core.block); - evo_data(push, asyh->core.format << 8); - evo_data(push, asyh->core.handle); - evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1); - evo_data(push, (asyh->core.y << 16) | asyh->core.x); - } else { - evo_mthd(push, 0x0460 + head->base.index * 0x300, 1); - evo_data(push, asyh->core.offset >> 8); - evo_mthd(push, 0x0468 + head->base.index * 0x300, 4); - evo_data(push, (asyh->core.h << 16) | asyh->core.w); - evo_data(push, asyh->core.layout << 24 | - (asyh->core.pitch >> 8) << 8 | - asyh->core.block); - evo_data(push, asyh->core.format << 8); - evo_data(push, asyh->core.handle); - evo_mthd(push, 0x04b0 + head->base.index * 0x300, 1); - evo_data(push, (asyh->core.y << 16) | asyh->core.x); - } - evo_kick(push, core); - } -} - -static void -nv50_head_lut_clr(struct nv50_head *head) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 4))) { - if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1); - evo_data(push, 0x40000000); - } else - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1); - evo_data(push, 0x40000000); - evo_mthd(push, 0x085c + (head->base.index * 0x400), 1); - evo_data(push, 0x00000000); - } else { - evo_mthd(push, 0x0440 + (head->base.index * 0x300), 1); - evo_data(push, 0x03000000); - evo_mthd(push, 0x045c + (head->base.index * 0x300), 1); - evo_data(push, 0x00000000); - } - evo_kick(push, core); - } -} - -static void -nv50_head_lut_load(struct drm_property_blob *blob, int mode, - struct nouveau_bo *nvbo) -{ - struct drm_color_lut *in = (struct drm_color_lut *)blob->data; - void __iomem *lut = (u8 *)nvbo_kmap_obj_iovirtual(nvbo); - const int size = blob->length / sizeof(*in); - int bits, shift, i; - u16 zero, r, g, b; - - /* This can't happen.. But it shuts the compiler up. */ - if (WARN_ON(size != 256)) - return; - - switch (mode) { - case 0: /* LORES. */ - case 1: /* HIRES. */ - bits = 11; - shift = 3; - zero = 0x0000; - break; - case 7: /* INTERPOLATE_257_UNITY_RANGE. */ - bits = 14; - shift = 0; - zero = 0x6000; - break; - default: - WARN_ON(1); - return; - } - - for (i = 0; i < size; i++) { - r = (drm_color_lut_extract(in[i]. red, bits) + zero) << shift; - g = (drm_color_lut_extract(in[i].green, bits) + zero) << shift; - b = (drm_color_lut_extract(in[i]. blue, bits) + zero) << shift; - writew(r, lut + (i * 0x08) + 0); - writew(g, lut + (i * 0x08) + 2); - writew(b, lut + (i * 0x08) + 4); - } - - /* INTERPOLATE modes require a "next" entry to interpolate with, - * so we replicate the last entry to deal with this for now. - */ - writew(r, lut + (i * 0x08) + 0); - writew(g, lut + (i * 0x08) + 2); - writew(b, lut + (i * 0x08) + 4); -} - -static void -nv50_head_lut_set(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 7))) { - if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2); - evo_data(push, 0x80000000 | asyh->lut.mode << 30); - evo_data(push, asyh->lut.offset >> 8); - } else - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2); - evo_data(push, 0x80000000 | asyh->lut.mode << 30); - evo_data(push, asyh->lut.offset >> 8); - evo_mthd(push, 0x085c + (head->base.index * 0x400), 1); - evo_data(push, asyh->lut.handle); - } else { - evo_mthd(push, 0x0440 + (head->base.index * 0x300), 4); - evo_data(push, 0x80000000 | asyh->lut.mode << 24); - evo_data(push, asyh->lut.offset >> 8); - evo_data(push, 0x00000000); - evo_data(push, 0x00000000); - evo_mthd(push, 0x045c + (head->base.index * 0x300), 1); - evo_data(push, asyh->lut.handle); - } - evo_kick(push, core); - } -} - -static void -nv50_head_mode(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - struct nv50_head_mode *m = &asyh->mode; - u32 *push; - if ((push = evo_wait(core, 14))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0804 + (head->base.index * 0x400), 2); - evo_data(push, 0x00800000 | m->clock); - evo_data(push, m->interlace ? 0x00000002 : 0x00000000); - evo_mthd(push, 0x0810 + (head->base.index * 0x400), 7); - evo_data(push, 0x00000000); - evo_data(push, (m->v.active << 16) | m->h.active ); - evo_data(push, (m->v.synce << 16) | m->h.synce ); - evo_data(push, (m->v.blanke << 16) | m->h.blanke ); - evo_data(push, (m->v.blanks << 16) | m->h.blanks ); - evo_data(push, (m->v.blank2e << 16) | m->v.blank2s); - evo_data(push, asyh->mode.v.blankus); - evo_mthd(push, 0x082c + (head->base.index * 0x400), 1); - evo_data(push, 0x00000000); - } else { - evo_mthd(push, 0x0410 + (head->base.index * 0x300), 6); - evo_data(push, 0x00000000); - evo_data(push, (m->v.active << 16) | m->h.active ); - evo_data(push, (m->v.synce << 16) | m->h.synce ); - evo_data(push, (m->v.blanke << 16) | m->h.blanke ); - evo_data(push, (m->v.blanks << 16) | m->h.blanks ); - evo_data(push, (m->v.blank2e << 16) | m->v.blank2s); - evo_mthd(push, 0x042c + (head->base.index * 0x300), 2); - evo_data(push, 0x00000000); /* ??? */ - evo_data(push, 0xffffff00); - evo_mthd(push, 0x0450 + (head->base.index * 0x300), 3); - evo_data(push, m->clock * 1000); - evo_data(push, 0x00200000); /* ??? */ - evo_data(push, m->clock * 1000); - } - evo_kick(push, core); - } -} - -static void -nv50_head_view(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base; - u32 *push; - if ((push = evo_wait(core, 10))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x08a4 + (head->base.index * 0x400), 1); - evo_data(push, 0x00000000); - evo_mthd(push, 0x08c8 + (head->base.index * 0x400), 1); - evo_data(push, (asyh->view.iH << 16) | asyh->view.iW); - evo_mthd(push, 0x08d8 + (head->base.index * 0x400), 2); - evo_data(push, (asyh->view.oH << 16) | asyh->view.oW); - evo_data(push, (asyh->view.oH << 16) | asyh->view.oW); - } else { - evo_mthd(push, 0x0494 + (head->base.index * 0x300), 1); - evo_data(push, 0x00000000); - evo_mthd(push, 0x04b8 + (head->base.index * 0x300), 1); - evo_data(push, (asyh->view.iH << 16) | asyh->view.iW); - evo_mthd(push, 0x04c0 + (head->base.index * 0x300), 3); - evo_data(push, (asyh->view.oH << 16) | asyh->view.oW); - evo_data(push, (asyh->view.oH << 16) | asyh->view.oW); - evo_data(push, (asyh->view.oH << 16) | asyh->view.oW); - } - evo_kick(push, core); - } -} - -static void -nv50_head_flush_clr(struct nv50_head *head, struct nv50_head_atom *asyh, bool y) -{ - if (asyh->clr.ilut && (!asyh->set.ilut || y)) - nv50_head_lut_clr(head); - if (asyh->clr.core && (!asyh->set.core || y)) - nv50_head_core_clr(head); - if (asyh->clr.curs && (!asyh->set.curs || y)) - nv50_head_curs_clr(head); -} - -static void -nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - if (asyh->set.view ) nv50_head_view (head, asyh); - if (asyh->set.mode ) nv50_head_mode (head, asyh); - if (asyh->set.ilut ) { - struct nouveau_bo *nvbo = head->lut.nvbo[head->lut.next]; - struct drm_property_blob *blob = asyh->state.gamma_lut; - if (blob) - nv50_head_lut_load(blob, asyh->lut.mode, nvbo); - asyh->lut.offset = nvbo->bo.offset; - head->lut.next ^= 1; - nv50_head_lut_set(head, asyh); - } - if (asyh->set.core ) nv50_head_core_set(head, asyh); - if (asyh->set.curs ) nv50_head_curs_set(head, asyh); - if (asyh->set.base ) nv50_head_base (head, asyh); - if (asyh->set.ovly ) nv50_head_ovly (head, asyh); - if (asyh->set.dither ) nv50_head_dither (head, asyh); - if (asyh->set.procamp) nv50_head_procamp (head, asyh); -} - -static void -nv50_head_atomic_check_procamp(struct nv50_head_atom *armh, - struct nv50_head_atom *asyh, - struct nouveau_conn_atom *asyc) -{ - const int vib = asyc->procamp.color_vibrance - 100; - const int hue = asyc->procamp.vibrant_hue - 90; - const int adj = (vib > 0) ? 50 : 0; - asyh->procamp.sat.cos = ((vib * 2047 + adj) / 100) & 0xfff; - asyh->procamp.sat.sin = ((hue * 2047) / 100) & 0xfff; - asyh->set.procamp = true; -} - -static void -nv50_head_atomic_check_dither(struct nv50_head_atom *armh, - struct nv50_head_atom *asyh, - struct nouveau_conn_atom *asyc) -{ - struct drm_connector *connector = asyc->state.connector; - u32 mode = 0x00; - - if (asyc->dither.mode == DITHERING_MODE_AUTO) { - if (asyh->base.depth > connector->display_info.bpc * 3) - mode = DITHERING_MODE_DYNAMIC2X2; - } else { - mode = asyc->dither.mode; - } - - if (asyc->dither.depth == DITHERING_DEPTH_AUTO) { - if (connector->display_info.bpc >= 8) - mode |= DITHERING_DEPTH_8BPC; - } else { - mode |= asyc->dither.depth; - } - - asyh->dither.enable = mode; - asyh->dither.bits = mode >> 1; - asyh->dither.mode = mode >> 3; - asyh->set.dither = true; -} - -static void -nv50_head_atomic_check_view(struct nv50_head_atom *armh, - struct nv50_head_atom *asyh, - struct nouveau_conn_atom *asyc) -{ - struct drm_connector *connector = asyc->state.connector; - struct drm_display_mode *omode = &asyh->state.adjusted_mode; - struct drm_display_mode *umode = &asyh->state.mode; - int mode = asyc->scaler.mode; - struct edid *edid; - int umode_vdisplay, omode_hdisplay, omode_vdisplay; - - if (connector->edid_blob_ptr) - edid = (struct edid *)connector->edid_blob_ptr->data; - else - edid = NULL; - - if (!asyc->scaler.full) { - if (mode == DRM_MODE_SCALE_NONE) - omode = umode; - } else { - /* Non-EDID LVDS/eDP mode. */ - mode = DRM_MODE_SCALE_FULLSCREEN; - } - - /* For the user-specified mode, we must ignore doublescan and - * the like, but honor frame packing. - */ - umode_vdisplay = umode->vdisplay; - if ((umode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) - umode_vdisplay += umode->vtotal; - asyh->view.iW = umode->hdisplay; - asyh->view.iH = umode_vdisplay; - /* For the output mode, we can just use the stock helper. */ - drm_mode_get_hv_timing(omode, &omode_hdisplay, &omode_vdisplay); - asyh->view.oW = omode_hdisplay; - asyh->view.oH = omode_vdisplay; - - /* Add overscan compensation if necessary, will keep the aspect - * ratio the same as the backend mode unless overridden by the - * user setting both hborder and vborder properties. - */ - if ((asyc->scaler.underscan.mode == UNDERSCAN_ON || - (asyc->scaler.underscan.mode == UNDERSCAN_AUTO && - drm_detect_hdmi_monitor(edid)))) { - u32 bX = asyc->scaler.underscan.hborder; - u32 bY = asyc->scaler.underscan.vborder; - u32 r = (asyh->view.oH << 19) / asyh->view.oW; - - if (bX) { - asyh->view.oW -= (bX * 2); - if (bY) asyh->view.oH -= (bY * 2); - else asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19; - } else { - asyh->view.oW -= (asyh->view.oW >> 4) + 32; - if (bY) asyh->view.oH -= (bY * 2); - else asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19; - } - } - - /* Handle CENTER/ASPECT scaling, taking into account the areas - * removed already for overscan compensation. - */ - switch (mode) { - case DRM_MODE_SCALE_CENTER: - asyh->view.oW = min((u16)umode->hdisplay, asyh->view.oW); - asyh->view.oH = min((u16)umode_vdisplay, asyh->view.oH); - /* fall-through */ - case DRM_MODE_SCALE_ASPECT: - if (asyh->view.oH < asyh->view.oW) { - u32 r = (asyh->view.iW << 19) / asyh->view.iH; - asyh->view.oW = ((asyh->view.oH * r) + (r / 2)) >> 19; - } else { - u32 r = (asyh->view.iH << 19) / asyh->view.iW; - asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19; - } - break; - default: - break; - } - - asyh->set.view = true; -} - -static void -nv50_head_atomic_check_lut(struct nv50_head *head, - struct nv50_head_atom *armh, - struct nv50_head_atom *asyh) -{ - struct nv50_disp *disp = nv50_disp(head->base.base.dev); - - /* An I8 surface without an input LUT makes no sense, and - * EVO will throw an error if you try. - * - * Legacy clients actually cause this due to the order in - * which they call ioctls, so we will enable the LUT with - * whatever contents the buffer already contains to avoid - * triggering the error check. - */ - if (!asyh->state.gamma_lut && asyh->base.cpp != 1) { - asyh->lut.handle = 0; - asyh->clr.ilut = armh->lut.visible; - return; - } - - if (disp->disp->oclass < GF110_DISP) { - asyh->lut.mode = (asyh->base.cpp == 1) ? 0 : 1; - asyh->set.ilut = true; - } else { - asyh->lut.mode = 7; - asyh->set.ilut = asyh->state.color_mgmt_changed; - } - asyh->lut.handle = disp->mast.base.vram.handle; -} - -static void -nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh) -{ - struct drm_display_mode *mode = &asyh->state.adjusted_mode; - struct nv50_head_mode *m = &asyh->mode; - u32 blankus; - - drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V | CRTC_STEREO_DOUBLE); - - /* - * DRM modes are defined in terms of a repeating interval - * starting with the active display area. The hardware modes - * are defined in terms of a repeating interval starting one - * unit (pixel or line) into the sync pulse. So, add bias. - */ - - m->h.active = mode->crtc_htotal; - m->h.synce = mode->crtc_hsync_end - mode->crtc_hsync_start - 1; - m->h.blanke = mode->crtc_hblank_end - mode->crtc_hsync_start - 1; - m->h.blanks = m->h.blanke + mode->crtc_hdisplay; - - m->v.active = mode->crtc_vtotal; - m->v.synce = mode->crtc_vsync_end - mode->crtc_vsync_start - 1; - m->v.blanke = mode->crtc_vblank_end - mode->crtc_vsync_start - 1; - m->v.blanks = m->v.blanke + mode->crtc_vdisplay; - - /*XXX: Safe underestimate, even "0" works */ - blankus = (m->v.active - mode->crtc_vdisplay - 2) * m->h.active; - blankus *= 1000; - blankus /= mode->crtc_clock; - m->v.blankus = blankus; - - if (mode->flags & DRM_MODE_FLAG_INTERLACE) { - m->v.blank2e = m->v.active + m->v.blanke; - m->v.blank2s = m->v.blank2e + mode->crtc_vdisplay; - m->v.active = (m->v.active * 2) + 1; - m->interlace = true; - } else { - m->v.blank2e = 0; - m->v.blank2s = 1; - m->interlace = false; - } - m->clock = mode->crtc_clock; - - asyh->set.mode = true; -} - -static int -nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) -{ - struct nouveau_drm *drm = nouveau_drm(crtc->dev); - struct nv50_disp *disp = nv50_disp(crtc->dev); - struct nv50_head *head = nv50_head(crtc); - struct nv50_head_atom *armh = nv50_head_atom(crtc->state); - struct nv50_head_atom *asyh = nv50_head_atom(state); - struct nouveau_conn_atom *asyc = NULL; - struct drm_connector_state *conns; - struct drm_connector *conn; - int i; - - NV_ATOMIC(drm, "%s atomic_check %d\n", crtc->name, asyh->state.active); - if (asyh->state.active) { - for_each_new_connector_in_state(asyh->state.state, conn, conns, i) { - if (conns->crtc == crtc) { - asyc = nouveau_conn_atom(conns); - break; - } - } - - if (armh->state.active) { - if (asyc) { - if (asyh->state.mode_changed) - asyc->set.scaler = true; - if (armh->base.depth != asyh->base.depth) - asyc->set.dither = true; - } - } else { - if (asyc) - asyc->set.mask = ~0; - asyh->set.mask = ~0; - } - - if (asyh->state.mode_changed) - nv50_head_atomic_check_mode(head, asyh); - - if (asyh->state.color_mgmt_changed || - asyh->base.cpp != armh->base.cpp) - nv50_head_atomic_check_lut(head, armh, asyh); - asyh->lut.visible = asyh->lut.handle != 0; - - if (asyc) { - if (asyc->set.scaler) - nv50_head_atomic_check_view(armh, asyh, asyc); - if (asyc->set.dither) - nv50_head_atomic_check_dither(armh, asyh, asyc); - if (asyc->set.procamp) - nv50_head_atomic_check_procamp(armh, asyh, asyc); - } - - if ((asyh->core.visible = (asyh->base.cpp != 0))) { - asyh->core.x = asyh->base.x; - asyh->core.y = asyh->base.y; - asyh->core.w = asyh->base.w; - asyh->core.h = asyh->base.h; - } else - if ((asyh->core.visible = asyh->curs.visible) || - (asyh->core.visible = asyh->lut.visible)) { - /*XXX: We need to either find some way of having the - * primary base layer appear black, while still - * being able to display the other layers, or we - * need to allocate a dummy black surface here. - */ - asyh->core.x = 0; - asyh->core.y = 0; - asyh->core.w = asyh->state.mode.hdisplay; - asyh->core.h = asyh->state.mode.vdisplay; - } - asyh->core.handle = disp->mast.base.vram.handle; - asyh->core.offset = 0; - asyh->core.format = 0xcf; - asyh->core.kind = 0; - asyh->core.layout = 1; - asyh->core.block = 0; - asyh->core.pitch = ALIGN(asyh->core.w, 64) * 4; - asyh->set.base = armh->base.cpp != asyh->base.cpp; - asyh->set.ovly = armh->ovly.cpp != asyh->ovly.cpp; - } else { - asyh->lut.visible = false; - asyh->core.visible = false; - asyh->curs.visible = false; - asyh->base.cpp = 0; - asyh->ovly.cpp = 0; - } - - if (!drm_atomic_crtc_needs_modeset(&asyh->state)) { - if (asyh->core.visible) { - if (memcmp(&armh->core, &asyh->core, sizeof(asyh->core))) - asyh->set.core = true; - } else - if (armh->core.visible) { - asyh->clr.core = true; - } - - if (asyh->curs.visible) { - if (memcmp(&armh->curs, &asyh->curs, sizeof(asyh->curs))) - asyh->set.curs = true; - } else - if (armh->curs.visible) { - asyh->clr.curs = true; - } - } else { - asyh->clr.ilut = armh->lut.visible; - asyh->clr.core = armh->core.visible; - asyh->clr.curs = armh->curs.visible; - asyh->set.ilut = asyh->lut.visible; - asyh->set.core = asyh->core.visible; - asyh->set.curs = asyh->curs.visible; - } - - if (asyh->clr.mask || asyh->set.mask) - nv50_atom(asyh->state.state)->lock_core = true; - return 0; -} - -static const struct drm_crtc_helper_funcs -nv50_head_help = { - .atomic_check = nv50_head_atomic_check, -}; - -static void -nv50_head_atomic_destroy_state(struct drm_crtc *crtc, - struct drm_crtc_state *state) -{ - struct nv50_head_atom *asyh = nv50_head_atom(state); - __drm_atomic_helper_crtc_destroy_state(&asyh->state); - kfree(asyh); -} - -static struct drm_crtc_state * -nv50_head_atomic_duplicate_state(struct drm_crtc *crtc) -{ - struct nv50_head_atom *armh = nv50_head_atom(crtc->state); - struct nv50_head_atom *asyh; - if (!(asyh = kmalloc(sizeof(*asyh), GFP_KERNEL))) - return NULL; - __drm_atomic_helper_crtc_duplicate_state(crtc, &asyh->state); - asyh->view = armh->view; - asyh->mode = armh->mode; - asyh->lut = armh->lut; - asyh->core = armh->core; - asyh->curs = armh->curs; - asyh->base = armh->base; - asyh->ovly = armh->ovly; - asyh->dither = armh->dither; - asyh->procamp = armh->procamp; - asyh->clr.mask = 0; - asyh->set.mask = 0; - return &asyh->state; -} - -static void -__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc, - struct drm_crtc_state *state) -{ - if (crtc->state) - crtc->funcs->atomic_destroy_state(crtc, crtc->state); - crtc->state = state; - crtc->state->crtc = crtc; -} - -static void -nv50_head_reset(struct drm_crtc *crtc) -{ - struct nv50_head_atom *asyh; - - if (WARN_ON(!(asyh = kzalloc(sizeof(*asyh), GFP_KERNEL)))) - return; - - __drm_atomic_helper_crtc_reset(crtc, &asyh->state); -} - -static void -nv50_head_destroy(struct drm_crtc *crtc) -{ - struct nv50_disp *disp = nv50_disp(crtc->dev); - struct nv50_head *head = nv50_head(crtc); - int i; - - nv50_dmac_destroy(&head->ovly.base, disp->disp); - nv50_pioc_destroy(&head->oimm.base); - - for (i = 0; i < ARRAY_SIZE(head->lut.nvbo); i++) - nouveau_bo_unmap_unpin_unref(&head->lut.nvbo[i]); - - drm_crtc_cleanup(crtc); - kfree(crtc); -} - -static const struct drm_crtc_funcs -nv50_head_func = { - .reset = nv50_head_reset, - .gamma_set = drm_atomic_helper_legacy_gamma_set, - .destroy = nv50_head_destroy, - .set_config = drm_atomic_helper_set_config, - .page_flip = drm_atomic_helper_page_flip, - .atomic_duplicate_state = nv50_head_atomic_duplicate_state, - .atomic_destroy_state = nv50_head_atomic_destroy_state, -}; - -static int -nv50_head_create(struct drm_device *dev, int index) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nvif_device *device = &drm->client.device; - struct nv50_disp *disp = nv50_disp(dev); - struct nv50_head *head; - struct nv50_base *base; - struct nv50_curs *curs; - struct drm_crtc *crtc; - int ret, i; - - head = kzalloc(sizeof(*head), GFP_KERNEL); - if (!head) - return -ENOMEM; - - head->base.index = index; - ret = nv50_base_new(drm, head, &base); - if (ret == 0) - ret = nv50_curs_new(drm, head, &curs); - if (ret) { - kfree(head); - return ret; - } - - crtc = &head->base.base; - drm_crtc_init_with_planes(dev, crtc, &base->wndw.plane, - &curs->wndw.plane, &nv50_head_func, - "head-%d", head->base.index); - drm_crtc_helper_add(crtc, &nv50_head_help); - drm_mode_crtc_set_gamma_size(crtc, 256); - - for (i = 0; i < ARRAY_SIZE(head->lut.nvbo); i++) { - ret = nouveau_bo_new_pin_map(&drm->client, 1025 * 8, 0x100, - TTM_PL_FLAG_VRAM, - &head->lut.nvbo[i]); - if (ret) - goto out; - } - - /* allocate overlay resources */ - ret = nv50_oimm_create(device, disp->disp, index, &head->oimm); - if (ret) - goto out; - - ret = nv50_ovly_create(device, disp->disp, index, disp->sync->bo.offset, - &head->ovly); - if (ret) - goto out; - -out: - if (ret) - nv50_head_destroy(crtc); - return ret; -} - -/****************************************************************************** - * Output path helpers - *****************************************************************************/ -static void -nv50_outp_release(struct nouveau_encoder *nv_encoder) -{ - struct nv50_disp *disp = nv50_disp(nv_encoder->base.base.dev); - struct { - struct nv50_disp_mthd_v1 base; - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_RELEASE, - .base.hasht = nv_encoder->dcb->hasht, - .base.hashm = nv_encoder->dcb->hashm, - }; - - nvif_mthd(disp->disp, 0, &args, sizeof(args)); - nv_encoder->or = -1; - nv_encoder->link = 0; -} - -static int -nv50_outp_acquire(struct nouveau_encoder *nv_encoder) -{ - struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); - struct nv50_disp *disp = nv50_disp(drm->dev); - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_acquire_v0 info; - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_ACQUIRE, - .base.hasht = nv_encoder->dcb->hasht, - .base.hashm = nv_encoder->dcb->hashm, - }; - int ret; - - ret = nvif_mthd(disp->disp, 0, &args, sizeof(args)); - if (ret) { - NV_ERROR(drm, "error acquiring output path: %d\n", ret); - return ret; - } - - nv_encoder->or = args.info.or; - nv_encoder->link = args.info.link; - return 0; -} - -static int -nv50_outp_atomic_check_view(struct drm_encoder *encoder, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state, - struct drm_display_mode *native_mode) -{ - struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; - struct drm_display_mode *mode = &crtc_state->mode; - struct drm_connector *connector = conn_state->connector; - struct nouveau_conn_atom *asyc = nouveau_conn_atom(conn_state); - struct nouveau_drm *drm = nouveau_drm(encoder->dev); - - NV_ATOMIC(drm, "%s atomic_check\n", encoder->name); - asyc->scaler.full = false; - if (!native_mode) - return 0; - - if (asyc->scaler.mode == DRM_MODE_SCALE_NONE) { - switch (connector->connector_type) { - case DRM_MODE_CONNECTOR_LVDS: - case DRM_MODE_CONNECTOR_eDP: - /* Force use of scaler for non-EDID modes. */ - if (adjusted_mode->type & DRM_MODE_TYPE_DRIVER) - break; - mode = native_mode; - asyc->scaler.full = true; - break; - default: - break; - } - } else { - mode = native_mode; - } - - if (!drm_mode_equal(adjusted_mode, mode)) { - drm_mode_copy(adjusted_mode, mode); - crtc_state->mode_changed = true; - } - - return 0; -} - -static int -nv50_outp_atomic_check(struct drm_encoder *encoder, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - struct nouveau_connector *nv_connector = - nouveau_connector(conn_state->connector); - return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, - nv_connector->native_mode); -} - -/****************************************************************************** - * DAC - *****************************************************************************/ -static void -nv50_dac_disable(struct drm_encoder *encoder) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nv50_mast *mast = nv50_mast(encoder->dev); - const int or = nv_encoder->or; - u32 *push; - - if (nv_encoder->crtc) { - push = evo_wait(mast, 4); - if (push) { - if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0400 + (or * 0x080), 1); - evo_data(push, 0x00000000); - } else { - evo_mthd(push, 0x0180 + (or * 0x020), 1); - evo_data(push, 0x00000000); - } - evo_kick(push, mast); - } - } - - nv_encoder->crtc = NULL; - nv50_outp_release(nv_encoder); -} - -static void -nv50_dac_enable(struct drm_encoder *encoder) -{ - struct nv50_mast *mast = nv50_mast(encoder->dev); - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); - struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode; - u32 *push; - - nv50_outp_acquire(nv_encoder); - - push = evo_wait(mast, 8); - if (push) { - if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) { - u32 syncs = 0x00000000; - - if (mode->flags & DRM_MODE_FLAG_NHSYNC) - syncs |= 0x00000001; - if (mode->flags & DRM_MODE_FLAG_NVSYNC) - syncs |= 0x00000002; - - evo_mthd(push, 0x0400 + (nv_encoder->or * 0x080), 2); - evo_data(push, 1 << nv_crtc->index); - evo_data(push, syncs); - } else { - u32 magic = 0x31ec6000 | (nv_crtc->index << 25); - u32 syncs = 0x00000001; - - if (mode->flags & DRM_MODE_FLAG_NHSYNC) - syncs |= 0x00000008; - if (mode->flags & DRM_MODE_FLAG_NVSYNC) - syncs |= 0x00000010; - - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - magic |= 0x00000001; - - evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2); - evo_data(push, syncs); - evo_data(push, magic); - evo_mthd(push, 0x0180 + (nv_encoder->or * 0x020), 1); - evo_data(push, 1 << nv_crtc->index); - } - - evo_kick(push, mast); - } - - nv_encoder->crtc = encoder->crtc; -} - -static enum drm_connector_status -nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nv50_disp *disp = nv50_disp(encoder->dev); - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_dac_load_v0 load; - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_DAC_LOAD, - .base.hasht = nv_encoder->dcb->hasht, - .base.hashm = nv_encoder->dcb->hashm, - }; - int ret; - - args.load.data = nouveau_drm(encoder->dev)->vbios.dactestval; - if (args.load.data == 0) - args.load.data = 340; - - ret = nvif_mthd(disp->disp, 0, &args, sizeof(args)); - if (ret || !args.load.load) - return connector_status_disconnected; - - return connector_status_connected; -} - -static const struct drm_encoder_helper_funcs -nv50_dac_help = { - .atomic_check = nv50_outp_atomic_check, - .enable = nv50_dac_enable, - .disable = nv50_dac_disable, - .detect = nv50_dac_detect -}; - -static void -nv50_dac_destroy(struct drm_encoder *encoder) -{ - drm_encoder_cleanup(encoder); - kfree(encoder); -} - -static const struct drm_encoder_funcs -nv50_dac_func = { - .destroy = nv50_dac_destroy, -}; - -static int -nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe) -{ - struct nouveau_drm *drm = nouveau_drm(connector->dev); - struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device); - struct nvkm_i2c_bus *bus; - struct nouveau_encoder *nv_encoder; - struct drm_encoder *encoder; - int type = DRM_MODE_ENCODER_DAC; - - nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL); - if (!nv_encoder) - return -ENOMEM; - nv_encoder->dcb = dcbe; - - bus = nvkm_i2c_bus_find(i2c, dcbe->i2c_index); - if (bus) - nv_encoder->i2c = &bus->i2c; - - encoder = to_drm_encoder(nv_encoder); - encoder->possible_crtcs = dcbe->heads; - encoder->possible_clones = 0; - drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type, - "dac-%04x-%04x", dcbe->hasht, dcbe->hashm); - drm_encoder_helper_add(encoder, &nv50_dac_help); - - drm_mode_connector_attach_encoder(connector, encoder); - return 0; -} - -/****************************************************************************** - * Audio - *****************************************************************************/ -static void -nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nv50_disp *disp = nv50_disp(encoder->dev); - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_sor_hda_eld_v0 eld; - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_SOR_HDA_ELD, - .base.hasht = nv_encoder->dcb->hasht, - .base.hashm = (0xf0ff & nv_encoder->dcb->hashm) | - (0x0100 << nv_crtc->index), - }; - - nvif_mthd(disp->disp, 0, &args, sizeof(args)); -} - -static void -nv50_audio_enable(struct drm_encoder *encoder, struct drm_display_mode *mode) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); - struct nouveau_connector *nv_connector; - struct nv50_disp *disp = nv50_disp(encoder->dev); - struct __packed { - struct { - struct nv50_disp_mthd_v1 mthd; - struct nv50_disp_sor_hda_eld_v0 eld; - } base; - u8 data[sizeof(nv_connector->base.eld)]; - } args = { - .base.mthd.version = 1, - .base.mthd.method = NV50_DISP_MTHD_V1_SOR_HDA_ELD, - .base.mthd.hasht = nv_encoder->dcb->hasht, - .base.mthd.hashm = (0xf0ff & nv_encoder->dcb->hashm) | - (0x0100 << nv_crtc->index), - }; - - nv_connector = nouveau_encoder_connector_get(nv_encoder); - if (!drm_detect_monitor_audio(nv_connector->edid)) - return; - - memcpy(args.data, nv_connector->base.eld, sizeof(args.data)); - - nvif_mthd(disp->disp, 0, &args, - sizeof(args.base) + drm_eld_size(args.data)); -} - -/****************************************************************************** - * HDMI - *****************************************************************************/ -static void -nv50_hdmi_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nv50_disp *disp = nv50_disp(encoder->dev); - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_sor_hdmi_pwr_v0 pwr; - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR, - .base.hasht = nv_encoder->dcb->hasht, - .base.hashm = (0xf0ff & nv_encoder->dcb->hashm) | - (0x0100 << nv_crtc->index), - }; - - nvif_mthd(disp->disp, 0, &args, sizeof(args)); -} - -static void -nv50_hdmi_enable(struct drm_encoder *encoder, struct drm_display_mode *mode) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); - struct nv50_disp *disp = nv50_disp(encoder->dev); - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_sor_hdmi_pwr_v0 pwr; - u8 infoframes[2 * 17]; /* two frames, up to 17 bytes each */ - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR, - .base.hasht = nv_encoder->dcb->hasht, - .base.hashm = (0xf0ff & nv_encoder->dcb->hashm) | - (0x0100 << nv_crtc->index), - .pwr.state = 1, - .pwr.rekey = 56, /* binary driver, and tegra, constant */ - }; - struct nouveau_connector *nv_connector; - u32 max_ac_packet; - union hdmi_infoframe avi_frame; - union hdmi_infoframe vendor_frame; - int ret; - int size; - - nv_connector = nouveau_encoder_connector_get(nv_encoder); - if (!drm_detect_hdmi_monitor(nv_connector->edid)) - return; - - ret = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame.avi, mode, - false); - if (!ret) { - /* We have an AVI InfoFrame, populate it to the display */ - args.pwr.avi_infoframe_length - = hdmi_infoframe_pack(&avi_frame, args.infoframes, 17); - } - - ret = drm_hdmi_vendor_infoframe_from_display_mode(&vendor_frame.vendor.hdmi, - &nv_connector->base, mode); - if (!ret) { - /* We have a Vendor InfoFrame, populate it to the display */ - args.pwr.vendor_infoframe_length - = hdmi_infoframe_pack(&vendor_frame, - args.infoframes - + args.pwr.avi_infoframe_length, - 17); - } - - max_ac_packet = mode->htotal - mode->hdisplay; - max_ac_packet -= args.pwr.rekey; - max_ac_packet -= 18; /* constant from tegra */ - args.pwr.max_ac_packet = max_ac_packet / 32; - - size = sizeof(args.base) - + sizeof(args.pwr) - + args.pwr.avi_infoframe_length - + args.pwr.vendor_infoframe_length; - nvif_mthd(disp->disp, 0, &args, size); - nv50_audio_enable(encoder, mode); -} - -/****************************************************************************** - * MST - *****************************************************************************/ -#define nv50_mstm(p) container_of((p), struct nv50_mstm, mgr) -#define nv50_mstc(p) container_of((p), struct nv50_mstc, connector) -#define nv50_msto(p) container_of((p), struct nv50_msto, encoder) - -struct nv50_mstm { - struct nouveau_encoder *outp; - - struct drm_dp_mst_topology_mgr mgr; - struct nv50_msto *msto[4]; - - bool modified; - bool disabled; - int links; -}; - -struct nv50_mstc { - struct nv50_mstm *mstm; - struct drm_dp_mst_port *port; - struct drm_connector connector; - - struct drm_display_mode *native; - struct edid *edid; - - int pbn; -}; - -struct nv50_msto { - struct drm_encoder encoder; - - struct nv50_head *head; - struct nv50_mstc *mstc; - bool disabled; -}; - -static struct drm_dp_payload * -nv50_msto_payload(struct nv50_msto *msto) -{ - struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev); - struct nv50_mstc *mstc = msto->mstc; - struct nv50_mstm *mstm = mstc->mstm; - int vcpi = mstc->port->vcpi.vcpi, i; - - NV_ATOMIC(drm, "%s: vcpi %d\n", msto->encoder.name, vcpi); - for (i = 0; i < mstm->mgr.max_payloads; i++) { - struct drm_dp_payload *payload = &mstm->mgr.payloads[i]; - NV_ATOMIC(drm, "%s: %d: vcpi %d start 0x%02x slots 0x%02x\n", - mstm->outp->base.base.name, i, payload->vcpi, - payload->start_slot, payload->num_slots); - } - - for (i = 0; i < mstm->mgr.max_payloads; i++) { - struct drm_dp_payload *payload = &mstm->mgr.payloads[i]; - if (payload->vcpi == vcpi) - return payload; - } - - return NULL; -} - -static void -nv50_msto_cleanup(struct nv50_msto *msto) -{ - struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev); - struct nv50_mstc *mstc = msto->mstc; - struct nv50_mstm *mstm = mstc->mstm; - - NV_ATOMIC(drm, "%s: msto cleanup\n", msto->encoder.name); - if (mstc->port && mstc->port->vcpi.vcpi > 0 && !nv50_msto_payload(msto)) - drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port); - if (msto->disabled) { - msto->mstc = NULL; - msto->head = NULL; - msto->disabled = false; - } -} - -static void -nv50_msto_prepare(struct nv50_msto *msto) -{ - struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev); - struct nv50_mstc *mstc = msto->mstc; - struct nv50_mstm *mstm = mstc->mstm; - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_sor_dp_mst_vcpi_v0 vcpi; - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI, - .base.hasht = mstm->outp->dcb->hasht, - .base.hashm = (0xf0ff & mstm->outp->dcb->hashm) | - (0x0100 << msto->head->base.index), - }; - - NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name); - if (mstc->port && mstc->port->vcpi.vcpi > 0) { - struct drm_dp_payload *payload = nv50_msto_payload(msto); - if (payload) { - args.vcpi.start_slot = payload->start_slot; - args.vcpi.num_slots = payload->num_slots; - args.vcpi.pbn = mstc->port->vcpi.pbn; - args.vcpi.aligned_pbn = mstc->port->vcpi.aligned_pbn; - } - } - - NV_ATOMIC(drm, "%s: %s: %02x %02x %04x %04x\n", - msto->encoder.name, msto->head->base.base.name, - args.vcpi.start_slot, args.vcpi.num_slots, - args.vcpi.pbn, args.vcpi.aligned_pbn); - nvif_mthd(&drm->display->disp, 0, &args, sizeof(args)); -} - -static int -nv50_msto_atomic_check(struct drm_encoder *encoder, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - struct nv50_mstc *mstc = nv50_mstc(conn_state->connector); - struct nv50_mstm *mstm = mstc->mstm; - int bpp = conn_state->connector->display_info.bpc * 3; - int slots; - - mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, bpp); - - slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn); - if (slots < 0) - return slots; - - return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state, - mstc->native); -} - -static void -nv50_msto_enable(struct drm_encoder *encoder) -{ - struct nv50_head *head = nv50_head(encoder->crtc); - struct nv50_msto *msto = nv50_msto(encoder); - struct nv50_mstc *mstc = NULL; - struct nv50_mstm *mstm = NULL; - struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; - u8 proto, depth; - int slots; - bool r; - - drm_connector_list_iter_begin(encoder->dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - if (connector->state->best_encoder == &msto->encoder) { - mstc = nv50_mstc(connector); - mstm = mstc->mstm; - break; - } - } - drm_connector_list_iter_end(&conn_iter); - - if (WARN_ON(!mstc)) - return; - - slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn); - r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, mstc->pbn, slots); - WARN_ON(!r); - - if (!mstm->links++) - nv50_outp_acquire(mstm->outp); - - if (mstm->outp->link & 1) - proto = 0x8; - else - proto = 0x9; - - switch (mstc->connector.display_info.bpc) { - case 6: depth = 0x2; break; - case 8: depth = 0x5; break; - case 10: - default: depth = 0x6; break; - } - - mstm->outp->update(mstm->outp, head->base.index, - &head->base.base.state->adjusted_mode, proto, depth); - - msto->head = head; - msto->mstc = mstc; - mstm->modified = true; -} - -static void -nv50_msto_disable(struct drm_encoder *encoder) -{ - struct nv50_msto *msto = nv50_msto(encoder); - struct nv50_mstc *mstc = msto->mstc; - struct nv50_mstm *mstm = mstc->mstm; - - if (mstc->port) - drm_dp_mst_reset_vcpi_slots(&mstm->mgr, mstc->port); - - mstm->outp->update(mstm->outp, msto->head->base.index, NULL, 0, 0); - mstm->modified = true; - if (!--mstm->links) - mstm->disabled = true; - msto->disabled = true; -} - -static const struct drm_encoder_helper_funcs -nv50_msto_help = { - .disable = nv50_msto_disable, - .enable = nv50_msto_enable, - .atomic_check = nv50_msto_atomic_check, -}; - -static void -nv50_msto_destroy(struct drm_encoder *encoder) -{ - struct nv50_msto *msto = nv50_msto(encoder); - drm_encoder_cleanup(&msto->encoder); - kfree(msto); -} - -static const struct drm_encoder_funcs -nv50_msto = { - .destroy = nv50_msto_destroy, -}; - -static int -nv50_msto_new(struct drm_device *dev, u32 heads, const char *name, int id, - struct nv50_msto **pmsto) -{ - struct nv50_msto *msto; - int ret; - - if (!(msto = *pmsto = kzalloc(sizeof(*msto), GFP_KERNEL))) - return -ENOMEM; - - ret = drm_encoder_init(dev, &msto->encoder, &nv50_msto, - DRM_MODE_ENCODER_DPMST, "%s-mst-%d", name, id); - if (ret) { - kfree(*pmsto); - *pmsto = NULL; - return ret; - } - - drm_encoder_helper_add(&msto->encoder, &nv50_msto_help); - msto->encoder.possible_crtcs = heads; - return 0; -} - -static struct drm_encoder * -nv50_mstc_atomic_best_encoder(struct drm_connector *connector, - struct drm_connector_state *connector_state) -{ - struct nv50_head *head = nv50_head(connector_state->crtc); - struct nv50_mstc *mstc = nv50_mstc(connector); - if (mstc->port) { - struct nv50_mstm *mstm = mstc->mstm; - return &mstm->msto[head->base.index]->encoder; - } - return NULL; -} - -static struct drm_encoder * -nv50_mstc_best_encoder(struct drm_connector *connector) -{ - struct nv50_mstc *mstc = nv50_mstc(connector); - if (mstc->port) { - struct nv50_mstm *mstm = mstc->mstm; - return &mstm->msto[0]->encoder; - } - return NULL; -} - -static enum drm_mode_status -nv50_mstc_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - return MODE_OK; -} - -static int -nv50_mstc_get_modes(struct drm_connector *connector) -{ - struct nv50_mstc *mstc = nv50_mstc(connector); - int ret = 0; - - mstc->edid = drm_dp_mst_get_edid(&mstc->connector, mstc->port->mgr, mstc->port); - drm_mode_connector_update_edid_property(&mstc->connector, mstc->edid); - if (mstc->edid) - ret = drm_add_edid_modes(&mstc->connector, mstc->edid); - - if (!mstc->connector.display_info.bpc) - mstc->connector.display_info.bpc = 8; - - if (mstc->native) - drm_mode_destroy(mstc->connector.dev, mstc->native); - mstc->native = nouveau_conn_native_mode(&mstc->connector); - return ret; -} - -static const struct drm_connector_helper_funcs -nv50_mstc_help = { - .get_modes = nv50_mstc_get_modes, - .mode_valid = nv50_mstc_mode_valid, - .best_encoder = nv50_mstc_best_encoder, - .atomic_best_encoder = nv50_mstc_atomic_best_encoder, -}; - -static enum drm_connector_status -nv50_mstc_detect(struct drm_connector *connector, bool force) -{ - struct nv50_mstc *mstc = nv50_mstc(connector); - if (!mstc->port) - return connector_status_disconnected; - return drm_dp_mst_detect_port(connector, mstc->port->mgr, mstc->port); -} - -static void -nv50_mstc_destroy(struct drm_connector *connector) -{ - struct nv50_mstc *mstc = nv50_mstc(connector); - drm_connector_cleanup(&mstc->connector); - kfree(mstc); -} - -static const struct drm_connector_funcs -nv50_mstc = { - .reset = nouveau_conn_reset, - .detect = nv50_mstc_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = nv50_mstc_destroy, - .atomic_duplicate_state = nouveau_conn_atomic_duplicate_state, - .atomic_destroy_state = nouveau_conn_atomic_destroy_state, - .atomic_set_property = nouveau_conn_atomic_set_property, - .atomic_get_property = nouveau_conn_atomic_get_property, -}; - -static int -nv50_mstc_new(struct nv50_mstm *mstm, struct drm_dp_mst_port *port, - const char *path, struct nv50_mstc **pmstc) -{ - struct drm_device *dev = mstm->outp->base.base.dev; - struct nv50_mstc *mstc; - int ret, i; - - if (!(mstc = *pmstc = kzalloc(sizeof(*mstc), GFP_KERNEL))) - return -ENOMEM; - mstc->mstm = mstm; - mstc->port = port; - - ret = drm_connector_init(dev, &mstc->connector, &nv50_mstc, - DRM_MODE_CONNECTOR_DisplayPort); - if (ret) { - kfree(*pmstc); - *pmstc = NULL; - return ret; - } - - drm_connector_helper_add(&mstc->connector, &nv50_mstc_help); - - mstc->connector.funcs->reset(&mstc->connector); - nouveau_conn_attach_properties(&mstc->connector); - - for (i = 0; i < ARRAY_SIZE(mstm->msto) && mstm->msto[i]; i++) - drm_mode_connector_attach_encoder(&mstc->connector, &mstm->msto[i]->encoder); - - drm_object_attach_property(&mstc->connector.base, dev->mode_config.path_property, 0); - drm_object_attach_property(&mstc->connector.base, dev->mode_config.tile_property, 0); - drm_mode_connector_set_path_property(&mstc->connector, path); - return 0; -} - -static void -nv50_mstm_cleanup(struct nv50_mstm *mstm) -{ - struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev); - struct drm_encoder *encoder; - int ret; - - NV_ATOMIC(drm, "%s: mstm cleanup\n", mstm->outp->base.base.name); - ret = drm_dp_check_act_status(&mstm->mgr); - - ret = drm_dp_update_payload_part2(&mstm->mgr); - - drm_for_each_encoder(encoder, mstm->outp->base.base.dev) { - if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) { - struct nv50_msto *msto = nv50_msto(encoder); - struct nv50_mstc *mstc = msto->mstc; - if (mstc && mstc->mstm == mstm) - nv50_msto_cleanup(msto); - } - } - - mstm->modified = false; -} - -static void -nv50_mstm_prepare(struct nv50_mstm *mstm) -{ - struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev); - struct drm_encoder *encoder; - int ret; - - NV_ATOMIC(drm, "%s: mstm prepare\n", mstm->outp->base.base.name); - ret = drm_dp_update_payload_part1(&mstm->mgr); - - drm_for_each_encoder(encoder, mstm->outp->base.base.dev) { - if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) { - struct nv50_msto *msto = nv50_msto(encoder); - struct nv50_mstc *mstc = msto->mstc; - if (mstc && mstc->mstm == mstm) - nv50_msto_prepare(msto); - } - } - - if (mstm->disabled) { - if (!mstm->links) - nv50_outp_release(mstm->outp); - mstm->disabled = false; - } -} - -static void -nv50_mstm_hotplug(struct drm_dp_mst_topology_mgr *mgr) -{ - struct nv50_mstm *mstm = nv50_mstm(mgr); - drm_kms_helper_hotplug_event(mstm->outp->base.base.dev); -} - -static void -nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr, - struct drm_connector *connector) -{ - struct nouveau_drm *drm = nouveau_drm(connector->dev); - struct nv50_mstc *mstc = nv50_mstc(connector); - - drm_connector_unregister(&mstc->connector); - - drm_modeset_lock_all(drm->dev); - drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector); - mstc->port = NULL; - drm_modeset_unlock_all(drm->dev); - - drm_connector_unreference(&mstc->connector); -} - -static void -nv50_mstm_register_connector(struct drm_connector *connector) -{ - struct nouveau_drm *drm = nouveau_drm(connector->dev); - - drm_modeset_lock_all(drm->dev); - drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector); - drm_modeset_unlock_all(drm->dev); - - drm_connector_register(connector); -} - -static struct drm_connector * -nv50_mstm_add_connector(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_port *port, const char *path) -{ - struct nv50_mstm *mstm = nv50_mstm(mgr); - struct nv50_mstc *mstc; - int ret; - - ret = nv50_mstc_new(mstm, port, path, &mstc); - if (ret) { - if (mstc) - mstc->connector.funcs->destroy(&mstc->connector); - return NULL; - } - - return &mstc->connector; -} - -static const struct drm_dp_mst_topology_cbs -nv50_mstm = { - .add_connector = nv50_mstm_add_connector, - .register_connector = nv50_mstm_register_connector, - .destroy_connector = nv50_mstm_destroy_connector, - .hotplug = nv50_mstm_hotplug, -}; - -void -nv50_mstm_service(struct nv50_mstm *mstm) -{ - struct drm_dp_aux *aux = mstm ? mstm->mgr.aux : NULL; - bool handled = true; - int ret; - u8 esi[8] = {}; - - if (!aux) - return; - - while (handled) { - ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8); - if (ret != 8) { - drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false); - return; - } - - drm_dp_mst_hpd_irq(&mstm->mgr, esi, &handled); - if (!handled) - break; - - drm_dp_dpcd_write(aux, DP_SINK_COUNT_ESI + 1, &esi[1], 3); - } -} - -void -nv50_mstm_remove(struct nv50_mstm *mstm) -{ - if (mstm) - drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false); -} - -static int -nv50_mstm_enable(struct nv50_mstm *mstm, u8 dpcd, int state) -{ - struct nouveau_encoder *outp = mstm->outp; - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_sor_dp_mst_link_v0 mst; - } args = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_LINK, - .base.hasht = outp->dcb->hasht, - .base.hashm = outp->dcb->hashm, - .mst.state = state, - }; - struct nouveau_drm *drm = nouveau_drm(outp->base.base.dev); - struct nvif_object *disp = &drm->display->disp; - int ret; - - if (dpcd >= 0x12) { - ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CTRL, &dpcd); - if (ret < 0) - return ret; - - dpcd &= ~DP_MST_EN; - if (state) - dpcd |= DP_MST_EN; - - ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, dpcd); - if (ret < 0) - return ret; - } - - return nvif_mthd(disp, 0, &args, sizeof(args)); -} - -int -nv50_mstm_detect(struct nv50_mstm *mstm, u8 dpcd[8], int allow) -{ - int ret, state = 0; - - if (!mstm) - return 0; - - if (dpcd[0] >= 0x12) { - ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CAP, &dpcd[1]); - if (ret < 0) - return ret; - - if (!(dpcd[1] & DP_MST_CAP)) - dpcd[0] = 0x11; - else - state = allow; - } - - ret = nv50_mstm_enable(mstm, dpcd[0], state); - if (ret) - return ret; - - ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, state); - if (ret) - return nv50_mstm_enable(mstm, dpcd[0], 0); - - return mstm->mgr.mst_state; -} - -static void -nv50_mstm_fini(struct nv50_mstm *mstm) -{ - if (mstm && mstm->mgr.mst_state) - drm_dp_mst_topology_mgr_suspend(&mstm->mgr); -} - -static void -nv50_mstm_init(struct nv50_mstm *mstm) -{ - if (mstm && mstm->mgr.mst_state) - drm_dp_mst_topology_mgr_resume(&mstm->mgr); -} - -static void -nv50_mstm_del(struct nv50_mstm **pmstm) -{ - struct nv50_mstm *mstm = *pmstm; - if (mstm) { - kfree(*pmstm); - *pmstm = NULL; - } -} - -static int -nv50_mstm_new(struct nouveau_encoder *outp, struct drm_dp_aux *aux, int aux_max, - int conn_base_id, struct nv50_mstm **pmstm) -{ - const int max_payloads = hweight8(outp->dcb->heads); - struct drm_device *dev = outp->base.base.dev; - struct nv50_mstm *mstm; - int ret, i; - u8 dpcd; - - /* This is a workaround for some monitors not functioning - * correctly in MST mode on initial module load. I think - * some bad interaction with the VBIOS may be responsible. - * - * A good ol' off and on again seems to work here ;) - */ - ret = drm_dp_dpcd_readb(aux, DP_DPCD_REV, &dpcd); - if (ret >= 0 && dpcd >= 0x12) - drm_dp_dpcd_writeb(aux, DP_MSTM_CTRL, 0); - - if (!(mstm = *pmstm = kzalloc(sizeof(*mstm), GFP_KERNEL))) - return -ENOMEM; - mstm->outp = outp; - mstm->mgr.cbs = &nv50_mstm; - - ret = drm_dp_mst_topology_mgr_init(&mstm->mgr, dev, aux, aux_max, - max_payloads, conn_base_id); - if (ret) - return ret; - - for (i = 0; i < max_payloads; i++) { - ret = nv50_msto_new(dev, outp->dcb->heads, outp->base.base.name, - i, &mstm->msto[i]); - if (ret) - return ret; - } - - return 0; -} - -/****************************************************************************** - * SOR - *****************************************************************************/ -static void -nv50_sor_update(struct nouveau_encoder *nv_encoder, u8 head, - struct drm_display_mode *mode, u8 proto, u8 depth) -{ - struct nv50_dmac *core = &nv50_mast(nv_encoder->base.base.dev)->base; - u32 *push; - - if (!mode) { - nv_encoder->ctrl &= ~BIT(head); - if (!(nv_encoder->ctrl & 0x0000000f)) - nv_encoder->ctrl = 0; - } else { - nv_encoder->ctrl |= proto << 8; - nv_encoder->ctrl |= BIT(head); - } - - if ((push = evo_wait(core, 6))) { - if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) { - if (mode) { - if (mode->flags & DRM_MODE_FLAG_NHSYNC) - nv_encoder->ctrl |= 0x00001000; - if (mode->flags & DRM_MODE_FLAG_NVSYNC) - nv_encoder->ctrl |= 0x00002000; - nv_encoder->ctrl |= depth << 16; - } - evo_mthd(push, 0x0600 + (nv_encoder->or * 0x40), 1); - } else { - if (mode) { - u32 magic = 0x31ec6000 | (head << 25); - u32 syncs = 0x00000001; - if (mode->flags & DRM_MODE_FLAG_NHSYNC) - syncs |= 0x00000008; - if (mode->flags & DRM_MODE_FLAG_NVSYNC) - syncs |= 0x00000010; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - magic |= 0x00000001; - - evo_mthd(push, 0x0404 + (head * 0x300), 2); - evo_data(push, syncs | (depth << 6)); - evo_data(push, magic); - } - evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 1); - } - evo_data(push, nv_encoder->ctrl); - evo_kick(push, core); - } -} - -static void -nv50_sor_disable(struct drm_encoder *encoder) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc); - - nv_encoder->crtc = NULL; - - if (nv_crtc) { - struct nvkm_i2c_aux *aux = nv_encoder->aux; - u8 pwr; - - if (aux) { - int ret = nvkm_rdaux(aux, DP_SET_POWER, &pwr, 1); - if (ret == 0) { - pwr &= ~DP_SET_POWER_MASK; - pwr |= DP_SET_POWER_D3; - nvkm_wraux(aux, DP_SET_POWER, &pwr, 1); - } - } - - nv_encoder->update(nv_encoder, nv_crtc->index, NULL, 0, 0); - nv50_audio_disable(encoder, nv_crtc); - nv50_hdmi_disable(&nv_encoder->base.base, nv_crtc); - nv50_outp_release(nv_encoder); - } -} - -static void -nv50_sor_enable(struct drm_encoder *encoder) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); - struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode; - struct { - struct nv50_disp_mthd_v1 base; - struct nv50_disp_sor_lvds_script_v0 lvds; - } lvds = { - .base.version = 1, - .base.method = NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT, - .base.hasht = nv_encoder->dcb->hasht, - .base.hashm = nv_encoder->dcb->hashm, - }; - struct nv50_disp *disp = nv50_disp(encoder->dev); - struct drm_device *dev = encoder->dev; - struct nouveau_drm *drm = nouveau_drm(dev); - struct nouveau_connector *nv_connector; - struct nvbios *bios = &drm->vbios; - u8 proto = 0xf; - u8 depth = 0x0; - - nv_connector = nouveau_encoder_connector_get(nv_encoder); - nv_encoder->crtc = encoder->crtc; - nv50_outp_acquire(nv_encoder); - - switch (nv_encoder->dcb->type) { - case DCB_OUTPUT_TMDS: - if (nv_encoder->link & 1) { - proto = 0x1; - /* Only enable dual-link if: - * - Need to (i.e. rate > 165MHz) - * - DCB says we can - * - Not an HDMI monitor, since there's no dual-link - * on HDMI. - */ - if (mode->clock >= 165000 && - nv_encoder->dcb->duallink_possible && - !drm_detect_hdmi_monitor(nv_connector->edid)) - proto |= 0x4; - } else { - proto = 0x2; - } - - nv50_hdmi_enable(&nv_encoder->base.base, mode); - break; - case DCB_OUTPUT_LVDS: - proto = 0x0; - - if (bios->fp_no_ddc) { - if (bios->fp.dual_link) - lvds.lvds.script |= 0x0100; - if (bios->fp.if_is_24bit) - lvds.lvds.script |= 0x0200; - } else { - if (nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) { - if (((u8 *)nv_connector->edid)[121] == 2) - lvds.lvds.script |= 0x0100; - } else - if (mode->clock >= bios->fp.duallink_transition_clk) { - lvds.lvds.script |= 0x0100; - } - - if (lvds.lvds.script & 0x0100) { - if (bios->fp.strapless_is_24bit & 2) - lvds.lvds.script |= 0x0200; - } else { - if (bios->fp.strapless_is_24bit & 1) - lvds.lvds.script |= 0x0200; - } - - if (nv_connector->base.display_info.bpc == 8) - lvds.lvds.script |= 0x0200; - } - - nvif_mthd(disp->disp, 0, &lvds, sizeof(lvds)); - break; - case DCB_OUTPUT_DP: - if (nv_connector->base.display_info.bpc == 6) - depth = 0x2; - else - if (nv_connector->base.display_info.bpc == 8) - depth = 0x5; - else - depth = 0x6; - - if (nv_encoder->link & 1) - proto = 0x8; - else - proto = 0x9; - - nv50_audio_enable(encoder, mode); - break; - default: - BUG(); - break; - } - - nv_encoder->update(nv_encoder, nv_crtc->index, mode, proto, depth); -} - -static const struct drm_encoder_helper_funcs -nv50_sor_help = { - .atomic_check = nv50_outp_atomic_check, - .enable = nv50_sor_enable, - .disable = nv50_sor_disable, -}; - -static void -nv50_sor_destroy(struct drm_encoder *encoder) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - nv50_mstm_del(&nv_encoder->dp.mstm); - drm_encoder_cleanup(encoder); - kfree(encoder); -} - -static const struct drm_encoder_funcs -nv50_sor_func = { - .destroy = nv50_sor_destroy, -}; - -static int -nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe) -{ - struct nouveau_connector *nv_connector = nouveau_connector(connector); - struct nouveau_drm *drm = nouveau_drm(connector->dev); - struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device); - struct nouveau_encoder *nv_encoder; - struct drm_encoder *encoder; - int type, ret; - - switch (dcbe->type) { - case DCB_OUTPUT_LVDS: type = DRM_MODE_ENCODER_LVDS; break; - case DCB_OUTPUT_TMDS: - case DCB_OUTPUT_DP: - default: - type = DRM_MODE_ENCODER_TMDS; - break; - } - - nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL); - if (!nv_encoder) - return -ENOMEM; - nv_encoder->dcb = dcbe; - nv_encoder->update = nv50_sor_update; - - encoder = to_drm_encoder(nv_encoder); - encoder->possible_crtcs = dcbe->heads; - encoder->possible_clones = 0; - drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type, - "sor-%04x-%04x", dcbe->hasht, dcbe->hashm); - drm_encoder_helper_add(encoder, &nv50_sor_help); - - drm_mode_connector_attach_encoder(connector, encoder); - - if (dcbe->type == DCB_OUTPUT_DP) { - struct nv50_disp *disp = nv50_disp(encoder->dev); - struct nvkm_i2c_aux *aux = - nvkm_i2c_aux_find(i2c, dcbe->i2c_index); - if (aux) { - if (disp->disp->oclass < GF110_DISP) { - /* HW has no support for address-only - * transactions, so we're required to - * use custom I2C-over-AUX code. - */ - nv_encoder->i2c = &aux->i2c; - } else { - nv_encoder->i2c = &nv_connector->aux.ddc; - } - nv_encoder->aux = aux; - } - - /*TODO: Use DP Info Table to check for support. */ - if (disp->disp->oclass >= GF110_DISP) { - ret = nv50_mstm_new(nv_encoder, &nv_connector->aux, 16, - nv_connector->base.base.id, - &nv_encoder->dp.mstm); - if (ret) - return ret; - } - } else { - struct nvkm_i2c_bus *bus = - nvkm_i2c_bus_find(i2c, dcbe->i2c_index); - if (bus) - nv_encoder->i2c = &bus->i2c; - } - - return 0; -} - -/****************************************************************************** - * PIOR - *****************************************************************************/ -static int -nv50_pior_atomic_check(struct drm_encoder *encoder, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - int ret = nv50_outp_atomic_check(encoder, crtc_state, conn_state); - if (ret) - return ret; - crtc_state->adjusted_mode.clock *= 2; - return 0; -} - -static void -nv50_pior_disable(struct drm_encoder *encoder) -{ - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nv50_mast *mast = nv50_mast(encoder->dev); - const int or = nv_encoder->or; - u32 *push; - - if (nv_encoder->crtc) { - push = evo_wait(mast, 4); - if (push) { - if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) { - evo_mthd(push, 0x0700 + (or * 0x040), 1); - evo_data(push, 0x00000000); - } - evo_kick(push, mast); - } - } - - nv_encoder->crtc = NULL; - nv50_outp_release(nv_encoder); -} - -static void -nv50_pior_enable(struct drm_encoder *encoder) -{ - struct nv50_mast *mast = nv50_mast(encoder->dev); - struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); - struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); - struct nouveau_connector *nv_connector; - struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode; - u8 owner = 1 << nv_crtc->index; - u8 proto, depth; - u32 *push; - - nv50_outp_acquire(nv_encoder); - - nv_connector = nouveau_encoder_connector_get(nv_encoder); - switch (nv_connector->base.display_info.bpc) { - case 10: depth = 0x6; break; - case 8: depth = 0x5; break; - case 6: depth = 0x2; break; - default: depth = 0x0; break; - } - - switch (nv_encoder->dcb->type) { - case DCB_OUTPUT_TMDS: - case DCB_OUTPUT_DP: - proto = 0x0; - break; - default: - BUG(); - break; - } - - push = evo_wait(mast, 8); - if (push) { - if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) { - u32 ctrl = (depth << 16) | (proto << 8) | owner; - if (mode->flags & DRM_MODE_FLAG_NHSYNC) - ctrl |= 0x00001000; - if (mode->flags & DRM_MODE_FLAG_NVSYNC) - ctrl |= 0x00002000; - evo_mthd(push, 0x0700 + (nv_encoder->or * 0x040), 1); - evo_data(push, ctrl); - } - - evo_kick(push, mast); - } - - nv_encoder->crtc = encoder->crtc; -} - -static const struct drm_encoder_helper_funcs -nv50_pior_help = { - .atomic_check = nv50_pior_atomic_check, - .enable = nv50_pior_enable, - .disable = nv50_pior_disable, -}; - -static void -nv50_pior_destroy(struct drm_encoder *encoder) -{ - drm_encoder_cleanup(encoder); - kfree(encoder); -} - -static const struct drm_encoder_funcs -nv50_pior_func = { - .destroy = nv50_pior_destroy, -}; - -static int -nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe) -{ - struct nouveau_connector *nv_connector = nouveau_connector(connector); - struct nouveau_drm *drm = nouveau_drm(connector->dev); - struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device); - struct nvkm_i2c_bus *bus = NULL; - struct nvkm_i2c_aux *aux = NULL; - struct i2c_adapter *ddc; - struct nouveau_encoder *nv_encoder; - struct drm_encoder *encoder; - int type; - - switch (dcbe->type) { - case DCB_OUTPUT_TMDS: - bus = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_EXT(dcbe->extdev)); - ddc = bus ? &bus->i2c : NULL; - type = DRM_MODE_ENCODER_TMDS; - break; - case DCB_OUTPUT_DP: - aux = nvkm_i2c_aux_find(i2c, NVKM_I2C_AUX_EXT(dcbe->extdev)); - ddc = aux ? &nv_connector->aux.ddc : NULL; - type = DRM_MODE_ENCODER_TMDS; - break; - default: - return -ENODEV; - } - - nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL); - if (!nv_encoder) - return -ENOMEM; - nv_encoder->dcb = dcbe; - nv_encoder->i2c = ddc; - nv_encoder->aux = aux; - - encoder = to_drm_encoder(nv_encoder); - encoder->possible_crtcs = dcbe->heads; - encoder->possible_clones = 0; - drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type, - "pior-%04x-%04x", dcbe->hasht, dcbe->hashm); - drm_encoder_helper_add(encoder, &nv50_pior_help); - - drm_mode_connector_attach_encoder(connector, encoder); - return 0; -} - -/****************************************************************************** - * Atomic - *****************************************************************************/ - -static void -nv50_disp_atomic_commit_core(struct nouveau_drm *drm, u32 interlock) -{ - struct nv50_disp *disp = nv50_disp(drm->dev); - struct nv50_dmac *core = &disp->mast.base; - struct nv50_mstm *mstm; - struct drm_encoder *encoder; - u32 *push; - - NV_ATOMIC(drm, "commit core %08x\n", interlock); - - drm_for_each_encoder(encoder, drm->dev) { - if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { - mstm = nouveau_encoder(encoder)->dp.mstm; - if (mstm && mstm->modified) - nv50_mstm_prepare(mstm); - } - } - - if ((push = evo_wait(core, 5))) { - evo_mthd(push, 0x0084, 1); - evo_data(push, 0x80000000); - evo_mthd(push, 0x0080, 2); - evo_data(push, interlock); - evo_data(push, 0x00000000); - nouveau_bo_wr32(disp->sync, 0, 0x00000000); - evo_kick(push, core); - if (nvif_msec(&drm->client.device, 2000ULL, - if (nouveau_bo_rd32(disp->sync, 0)) - break; - usleep_range(1, 2); - ) < 0) - NV_ERROR(drm, "EVO timeout\n"); - } - - drm_for_each_encoder(encoder, drm->dev) { - if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { - mstm = nouveau_encoder(encoder)->dp.mstm; - if (mstm && mstm->modified) - nv50_mstm_cleanup(mstm); - } - } -} - -static void -nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) -{ - struct drm_device *dev = state->dev; - struct drm_crtc_state *new_crtc_state, *old_crtc_state; - struct drm_crtc *crtc; - struct drm_plane_state *new_plane_state; - struct drm_plane *plane; - struct nouveau_drm *drm = nouveau_drm(dev); - struct nv50_disp *disp = nv50_disp(dev); - struct nv50_atom *atom = nv50_atom(state); - struct nv50_outp_atom *outp, *outt; - u32 interlock_core = 0; - u32 interlock_chan = 0; - int i; - - NV_ATOMIC(drm, "commit %d %d\n", atom->lock_core, atom->flush_disable); - drm_atomic_helper_wait_for_fences(dev, state, false); - drm_atomic_helper_wait_for_dependencies(state); - drm_atomic_helper_update_legacy_modeset_state(dev, state); - - if (atom->lock_core) - mutex_lock(&disp->mutex); - - /* Disable head(s). */ - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state); - struct nv50_head *head = nv50_head(crtc); - - NV_ATOMIC(drm, "%s: clr %04x (set %04x)\n", crtc->name, - asyh->clr.mask, asyh->set.mask); - if (old_crtc_state->active && !new_crtc_state->active) - drm_crtc_vblank_off(crtc); - - if (asyh->clr.mask) { - nv50_head_flush_clr(head, asyh, atom->flush_disable); - interlock_core |= 1; - } - } - - /* Disable plane(s). */ - for_each_new_plane_in_state(state, plane, new_plane_state, i) { - struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); - struct nv50_wndw *wndw = nv50_wndw(plane); - - NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", plane->name, - asyw->clr.mask, asyw->set.mask); - if (!asyw->clr.mask) - continue; - - interlock_chan |= nv50_wndw_flush_clr(wndw, interlock_core, - atom->flush_disable, - asyw); - } - - /* Disable output path(s). */ - list_for_each_entry(outp, &atom->outp, head) { - const struct drm_encoder_helper_funcs *help; - struct drm_encoder *encoder; - - encoder = outp->encoder; - help = encoder->helper_private; - - NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", encoder->name, - outp->clr.mask, outp->set.mask); - - if (outp->clr.mask) { - help->disable(encoder); - interlock_core |= 1; - if (outp->flush_disable) { - nv50_disp_atomic_commit_core(drm, interlock_chan); - interlock_core = 0; - interlock_chan = 0; - } - } - } - - /* Flush disable. */ - if (interlock_core) { - if (atom->flush_disable) { - nv50_disp_atomic_commit_core(drm, interlock_chan); - interlock_core = 0; - interlock_chan = 0; - } - } - - /* Update output path(s). */ - list_for_each_entry_safe(outp, outt, &atom->outp, head) { - const struct drm_encoder_helper_funcs *help; - struct drm_encoder *encoder; - - encoder = outp->encoder; - help = encoder->helper_private; - - NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", encoder->name, - outp->set.mask, outp->clr.mask); - - if (outp->set.mask) { - help->enable(encoder); - interlock_core = 1; - } - - list_del(&outp->head); - kfree(outp); - } - - /* Update head(s). */ - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state); - struct nv50_head *head = nv50_head(crtc); - - NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name, - asyh->set.mask, asyh->clr.mask); - - if (asyh->set.mask) { - nv50_head_flush_set(head, asyh); - interlock_core = 1; - } - - if (new_crtc_state->active) { - if (!old_crtc_state->active) - drm_crtc_vblank_on(crtc); - if (new_crtc_state->event) - drm_crtc_vblank_get(crtc); - } - } - - /* Update plane(s). */ - for_each_new_plane_in_state(state, plane, new_plane_state, i) { - struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); - struct nv50_wndw *wndw = nv50_wndw(plane); - - NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", plane->name, - asyw->set.mask, asyw->clr.mask); - if ( !asyw->set.mask && - (!asyw->clr.mask || atom->flush_disable)) - continue; - - interlock_chan |= nv50_wndw_flush_set(wndw, interlock_core, asyw); - } - - /* Flush update. */ - if (interlock_core) { - if (!interlock_chan && atom->state.legacy_cursor_update) { - u32 *push = evo_wait(&disp->mast, 2); - if (push) { - evo_mthd(push, 0x0080, 1); - evo_data(push, 0x00000000); - evo_kick(push, &disp->mast); - } - } else { - nv50_disp_atomic_commit_core(drm, interlock_chan); - } - } - - if (atom->lock_core) - mutex_unlock(&disp->mutex); - - /* Wait for HW to signal completion. */ - for_each_new_plane_in_state(state, plane, new_plane_state, i) { - struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); - struct nv50_wndw *wndw = nv50_wndw(plane); - int ret = nv50_wndw_wait_armed(wndw, asyw); - if (ret) - NV_ERROR(drm, "%s: timeout\n", plane->name); - } - - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { - if (new_crtc_state->event) { - unsigned long flags; - /* Get correct count/ts if racing with vblank irq */ - if (new_crtc_state->active) - drm_crtc_accurate_vblank_count(crtc); - spin_lock_irqsave(&crtc->dev->event_lock, flags); - drm_crtc_send_vblank_event(crtc, new_crtc_state->event); - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - - new_crtc_state->event = NULL; - if (new_crtc_state->active) - drm_crtc_vblank_put(crtc); - } - } - - drm_atomic_helper_commit_hw_done(state); - drm_atomic_helper_cleanup_planes(dev, state); - drm_atomic_helper_commit_cleanup_done(state); - drm_atomic_state_put(state); -} - -static void -nv50_disp_atomic_commit_work(struct work_struct *work) -{ - struct drm_atomic_state *state = - container_of(work, typeof(*state), commit_work); - nv50_disp_atomic_commit_tail(state); -} - -static int -nv50_disp_atomic_commit(struct drm_device *dev, - struct drm_atomic_state *state, bool nonblock) -{ - struct nouveau_drm *drm = nouveau_drm(dev); - struct nv50_disp *disp = nv50_disp(dev); - struct drm_plane_state *new_plane_state; - struct drm_plane *plane; - struct drm_crtc *crtc; - bool active = false; - int ret, i; - - ret = pm_runtime_get_sync(dev->dev); - if (ret < 0 && ret != -EACCES) - return ret; - - ret = drm_atomic_helper_setup_commit(state, nonblock); - if (ret) - goto done; - - INIT_WORK(&state->commit_work, nv50_disp_atomic_commit_work); - - ret = drm_atomic_helper_prepare_planes(dev, state); - if (ret) - goto done; - - if (!nonblock) { - ret = drm_atomic_helper_wait_for_fences(dev, state, true); - if (ret) - goto err_cleanup; - } - - ret = drm_atomic_helper_swap_state(state, true); - if (ret) - goto err_cleanup; - - for_each_new_plane_in_state(state, plane, new_plane_state, i) { - struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); - struct nv50_wndw *wndw = nv50_wndw(plane); - - if (asyw->set.image) { - asyw->ntfy.handle = wndw->dmac->sync.handle; - asyw->ntfy.offset = wndw->ntfy; - asyw->ntfy.awaken = false; - asyw->set.ntfy = true; - nouveau_bo_wr32(disp->sync, wndw->ntfy / 4, 0x00000000); - wndw->ntfy ^= 0x10; - } - } - - drm_atomic_state_get(state); - - if (nonblock) - queue_work(system_unbound_wq, &state->commit_work); - else - nv50_disp_atomic_commit_tail(state); - - drm_for_each_crtc(crtc, dev) { - if (crtc->state->enable) { - if (!drm->have_disp_power_ref) { - drm->have_disp_power_ref = true; - return 0; - } - active = true; - break; - } - } - - if (!active && drm->have_disp_power_ref) { - pm_runtime_put_autosuspend(dev->dev); - drm->have_disp_power_ref = false; - } - -err_cleanup: - if (ret) - drm_atomic_helper_cleanup_planes(dev, state); -done: - pm_runtime_put_autosuspend(dev->dev); - return ret; -} - -static struct nv50_outp_atom * -nv50_disp_outp_atomic_add(struct nv50_atom *atom, struct drm_encoder *encoder) -{ - struct nv50_outp_atom *outp; - - list_for_each_entry(outp, &atom->outp, head) { - if (outp->encoder == encoder) - return outp; - } - - outp = kzalloc(sizeof(*outp), GFP_KERNEL); - if (!outp) - return ERR_PTR(-ENOMEM); - - list_add(&outp->head, &atom->outp); - outp->encoder = encoder; - return outp; -} - -static int -nv50_disp_outp_atomic_check_clr(struct nv50_atom *atom, - struct drm_connector_state *old_connector_state) -{ - struct drm_encoder *encoder = old_connector_state->best_encoder; - struct drm_crtc_state *old_crtc_state, *new_crtc_state; - struct drm_crtc *crtc; - struct nv50_outp_atom *outp; - - if (!(crtc = old_connector_state->crtc)) - return 0; - - old_crtc_state = drm_atomic_get_old_crtc_state(&atom->state, crtc); - new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc); - if (old_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) { - outp = nv50_disp_outp_atomic_add(atom, encoder); - if (IS_ERR(outp)) - return PTR_ERR(outp); - - if (outp->encoder->encoder_type == DRM_MODE_ENCODER_DPMST) { - outp->flush_disable = true; - atom->flush_disable = true; - } - outp->clr.ctrl = true; - atom->lock_core = true; - } - - return 0; -} - -static int -nv50_disp_outp_atomic_check_set(struct nv50_atom *atom, - struct drm_connector_state *connector_state) -{ - struct drm_encoder *encoder = connector_state->best_encoder; - struct drm_crtc_state *new_crtc_state; - struct drm_crtc *crtc; - struct nv50_outp_atom *outp; - - if (!(crtc = connector_state->crtc)) - return 0; - - new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc); - if (new_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) { - outp = nv50_disp_outp_atomic_add(atom, encoder); - if (IS_ERR(outp)) - return PTR_ERR(outp); - - outp->set.ctrl = true; - atom->lock_core = true; - } - - return 0; -} - -static int -nv50_disp_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) -{ - struct nv50_atom *atom = nv50_atom(state); - struct drm_connector_state *old_connector_state, *new_connector_state; - struct drm_connector *connector; - int ret, i; - - ret = drm_atomic_helper_check(dev, state); - if (ret) - return ret; - - for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) { - ret = nv50_disp_outp_atomic_check_clr(atom, old_connector_state); - if (ret) - return ret; - - ret = nv50_disp_outp_atomic_check_set(atom, new_connector_state); - if (ret) - return ret; - } - - return 0; -} - -static void -nv50_disp_atomic_state_clear(struct drm_atomic_state *state) -{ - struct nv50_atom *atom = nv50_atom(state); - struct nv50_outp_atom *outp, *outt; - - list_for_each_entry_safe(outp, outt, &atom->outp, head) { - list_del(&outp->head); - kfree(outp); - } - - drm_atomic_state_default_clear(state); -} - -static void -nv50_disp_atomic_state_free(struct drm_atomic_state *state) -{ - struct nv50_atom *atom = nv50_atom(state); - drm_atomic_state_default_release(&atom->state); - kfree(atom); -} - -static struct drm_atomic_state * -nv50_disp_atomic_state_alloc(struct drm_device *dev) -{ - struct nv50_atom *atom; - if (!(atom = kzalloc(sizeof(*atom), GFP_KERNEL)) || - drm_atomic_state_init(dev, &atom->state) < 0) { - kfree(atom); - return NULL; - } - INIT_LIST_HEAD(&atom->outp); - return &atom->state; -} - -static const struct drm_mode_config_funcs -nv50_disp_func = { - .fb_create = nouveau_user_framebuffer_create, - .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = nv50_disp_atomic_check, - .atomic_commit = nv50_disp_atomic_commit, - .atomic_state_alloc = nv50_disp_atomic_state_alloc, - .atomic_state_clear = nv50_disp_atomic_state_clear, - .atomic_state_free = nv50_disp_atomic_state_free, -}; - -/****************************************************************************** - * Init - *****************************************************************************/ - -void -nv50_display_fini(struct drm_device *dev) -{ - struct nouveau_encoder *nv_encoder; - struct drm_encoder *encoder; - struct drm_plane *plane; - - drm_for_each_plane(plane, dev) { - struct nv50_wndw *wndw = nv50_wndw(plane); - if (plane->funcs != &nv50_wndw) - continue; - nv50_wndw_fini(wndw); - } - - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { - nv_encoder = nouveau_encoder(encoder); - nv50_mstm_fini(nv_encoder->dp.mstm); - } - } -} - -int -nv50_display_init(struct drm_device *dev) -{ - struct drm_encoder *encoder; - struct drm_plane *plane; - u32 *push; - - push = evo_wait(nv50_mast(dev), 32); - if (!push) - return -EBUSY; - - evo_mthd(push, 0x0088, 1); - evo_data(push, nv50_mast(dev)->base.sync.handle); - evo_kick(push, nv50_mast(dev)); - - list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { - if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) { - struct nouveau_encoder *nv_encoder = - nouveau_encoder(encoder); - nv50_mstm_init(nv_encoder->dp.mstm); - } - } - - drm_for_each_plane(plane, dev) { - struct nv50_wndw *wndw = nv50_wndw(plane); - if (plane->funcs != &nv50_wndw) - continue; - nv50_wndw_init(wndw); - } - - return 0; -} - -void -nv50_display_destroy(struct drm_device *dev) -{ - struct nv50_disp *disp = nv50_disp(dev); - - nv50_dmac_destroy(&disp->mast.base, disp->disp); - - nouveau_bo_unmap(disp->sync); - if (disp->sync) - nouveau_bo_unpin(disp->sync); - nouveau_bo_ref(NULL, &disp->sync); - - nouveau_display(dev)->priv = NULL; - kfree(disp); -} - -MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)"); -static int nouveau_atomic = 0; -module_param_named(atomic, nouveau_atomic, int, 0400); - -int -nv50_display_create(struct drm_device *dev) -{ - struct nvif_device *device = &nouveau_drm(dev)->client.device; - struct nouveau_drm *drm = nouveau_drm(dev); - struct dcb_table *dcb = &drm->vbios.dcb; - struct drm_connector *connector, *tmp; - struct nv50_disp *disp; - struct dcb_output *dcbe; - int crtcs, ret, i; - - disp = kzalloc(sizeof(*disp), GFP_KERNEL); - if (!disp) - return -ENOMEM; - - mutex_init(&disp->mutex); - - nouveau_display(dev)->priv = disp; - nouveau_display(dev)->dtor = nv50_display_destroy; - nouveau_display(dev)->init = nv50_display_init; - nouveau_display(dev)->fini = nv50_display_fini; - disp->disp = &nouveau_display(dev)->disp; - dev->mode_config.funcs = &nv50_disp_func; - dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP; - if (nouveau_atomic) - dev->driver->driver_features |= DRIVER_ATOMIC; - - /* small shared memory area we use for notifiers and semaphores */ - ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM, - 0, 0x0000, NULL, NULL, &disp->sync); - if (!ret) { - ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM, true); - if (!ret) { - ret = nouveau_bo_map(disp->sync); - if (ret) - nouveau_bo_unpin(disp->sync); - } - if (ret) - nouveau_bo_ref(NULL, &disp->sync); - } - - if (ret) - goto out; - - /* allocate master evo channel */ - ret = nv50_core_create(device, disp->disp, disp->sync->bo.offset, - &disp->mast); - if (ret) - goto out; - - /* create crtc objects to represent the hw heads */ - if (disp->disp->oclass >= GF110_DISP) - crtcs = nvif_rd32(&device->object, 0x612004) & 0xf; - else - crtcs = 0x3; - - for (i = 0; i < fls(crtcs); i++) { - if (!(crtcs & (1 << i))) - continue; - ret = nv50_head_create(dev, i); - if (ret) - goto out; - } - - /* create encoder/connector objects based on VBIOS DCB table */ - for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) { - connector = nouveau_connector_create(dev, dcbe->connector); - if (IS_ERR(connector)) - continue; - - if (dcbe->location == DCB_LOC_ON_CHIP) { - switch (dcbe->type) { - case DCB_OUTPUT_TMDS: - case DCB_OUTPUT_LVDS: - case DCB_OUTPUT_DP: - ret = nv50_sor_create(connector, dcbe); - break; - case DCB_OUTPUT_ANALOG: - ret = nv50_dac_create(connector, dcbe); - break; - default: - ret = -ENODEV; - break; - } - } else { - ret = nv50_pior_create(connector, dcbe); - } - - if (ret) { - NV_WARN(drm, "failed to create encoder %d/%d/%d: %d\n", - dcbe->location, dcbe->type, - ffs(dcbe->or) - 1, ret); - ret = 0; - } - } - - /* cull any connectors we created that don't have an encoder */ - list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) { - if (connector->encoder_ids[0]) - continue; - - NV_WARN(drm, "%s has no encoders, removing\n", - connector->name); - connector->funcs->destroy(connector); - } - -out: - if (ret) - nv50_display_destroy(dev); - return ret; -} diff --git a/drivers/gpu/drm/nouveau/nv50_display.h b/drivers/gpu/drm/nouveau/nv50_display.h index 918187cee84b..fbd3b15583bc 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.h +++ b/drivers/gpu/drm/nouveau/nv50_display.h @@ -28,7 +28,6 @@ #define __NV50_DISPLAY_H__ #include "nouveau_display.h" -#include "nouveau_crtc.h" #include "nouveau_reg.h" int nv50_display_create(struct drm_device *); diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c index a369d978e267..a00ecc3de053 100644 --- a/drivers/gpu/drm/nouveau/nv50_fence.c +++ b/drivers/gpu/drm/nouveau/nv50_fence.c @@ -78,8 +78,6 @@ nv50_fence_create(struct nouveau_drm *drm) priv->base.resume = nv17_fence_resume; priv->base.context_new = nv50_fence_context_new; priv->base.context_del = nv10_fence_context_del; - priv->base.contexts = 127; - priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); spin_lock_init(&priv->lock); ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM, diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index 5f0c0c27d5dc..090664899247 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -141,9 +141,9 @@ nv84_fence_suspend(struct nouveau_drm *drm) struct nv84_fence_priv *priv = drm->fence; int i; - priv->suspend = vmalloc(priv->base.contexts * sizeof(u32)); + priv->suspend = vmalloc(drm->chan.nr * sizeof(u32)); if (priv->suspend) { - for (i = 0; i < priv->base.contexts; i++) + for (i = 0; i < drm->chan.nr; i++) priv->suspend[i] = nouveau_bo_rd32(priv->bo, i*4); } @@ -157,7 +157,7 @@ nv84_fence_resume(struct nouveau_drm *drm) int i; if (priv->suspend) { - for (i = 0; i < priv->base.contexts; i++) + for (i = 0; i < drm->chan.nr; i++) nouveau_bo_wr32(priv->bo, i*4, priv->suspend[i]); vfree(priv->suspend); priv->suspend = NULL; @@ -179,7 +179,6 @@ nv84_fence_destroy(struct nouveau_drm *drm) int nv84_fence_create(struct nouveau_drm *drm) { - struct nvkm_fifo *fifo = nvxx_fifo(&drm->client.device); struct nv84_fence_priv *priv; u32 domain; int ret; @@ -194,8 +193,6 @@ nv84_fence_create(struct nouveau_drm *drm) priv->base.context_new = nv84_fence_context_new; priv->base.context_del = nv84_fence_context_del; - priv->base.contexts = fifo->nr; - priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); priv->base.uevent = true; mutex_init(&priv->mutex); @@ -207,7 +204,7 @@ nv84_fence_create(struct nouveau_drm *drm) * will lose CPU/GPU coherency! */ TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED; - ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0, + ret = nouveau_bo_new(&drm->client, 16 * drm->chan.nr, 0, domain, 0, 0, NULL, NULL, &priv->bo); if (ret == 0) { ret = nouveau_bo_pin(priv->bo, domain, false); diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild index f1675a4ab6fa..42e8c85caa33 100644 --- a/drivers/gpu/drm/nouveau/nvif/Kbuild +++ b/drivers/gpu/drm/nouveau/nvif/Kbuild @@ -1,8 +1,14 @@ nvif-y := nvif/object.o nvif-y += nvif/client.o nvif-y += nvif/device.o +nvif-y += nvif/disp.o nvif-y += nvif/driver.o +nvif-y += nvif/fifo.o nvif-y += nvif/mem.o nvif-y += nvif/mmu.o nvif-y += nvif/notify.o nvif-y += nvif/vmm.o + +# Usermode classes +nvif-y += nvif/user.o +nvif-y += nvif/userc361.o diff --git a/drivers/gpu/drm/nouveau/nvif/device.c b/drivers/gpu/drm/nouveau/nvif/device.c index 252d8c33215b..1ec101ba3b42 100644 --- a/drivers/gpu/drm/nouveau/nvif/device.c +++ b/drivers/gpu/drm/nouveau/nvif/device.c @@ -37,6 +37,9 @@ nvif_device_time(struct nvif_device *device) void nvif_device_fini(struct nvif_device *device) { + nvif_user_fini(device); + kfree(device->runlist); + device->runlist = NULL; nvif_object_fini(&device->object); } @@ -46,6 +49,8 @@ nvif_device_init(struct nvif_object *parent, u32 handle, s32 oclass, { int ret = nvif_object_init(parent, handle, oclass, data, size, &device->object); + device->runlist = NULL; + device->user.func = NULL; if (ret == 0) { device->info.version = 0; ret = nvif_object_mthd(&device->object, NV_DEVICE_V0_INFO, diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h b/drivers/gpu/drm/nouveau/nvif/disp.c index 214f370c5efd..18c7d064f75c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h +++ b/drivers/gpu/drm/nouveau/nvif/disp.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,35 +18,43 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * */ -#ifndef PP_SOC15_H -#define PP_SOC15_H +#include <nvif/disp.h> +#include <nvif/device.h> -#include "soc15_hw_ip.h" -#include "vega10_ip_offset.h" +#include <nvif/class.h> -inline static uint32_t soc15_get_register_offset( - uint32_t hw_id, - uint32_t inst, - uint32_t segment, - uint32_t offset) +void +nvif_disp_dtor(struct nvif_disp *disp) { - uint32_t reg = 0; - - if (hw_id == THM_HWID) - reg = THM_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == NBIF_HWID) - reg = NBIF_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == MP1_HWID) - reg = MP1_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == DF_HWID) - reg = DF_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == GC_HWID) - reg = GC_BASE.instance[inst].segment[segment] + offset; - else if (hw_id == SMUIO_HWID) - reg = SMUIO_BASE.instance[inst].segment[segment] + offset; - return reg; + nvif_object_fini(&disp->object); } -#endif +int +nvif_disp_ctor(struct nvif_device *device, s32 oclass, struct nvif_disp *disp) +{ + static const struct nvif_mclass disps[] = { + { GV100_DISP, -1 }, + { GP102_DISP, -1 }, + { GP100_DISP, -1 }, + { GM200_DISP, -1 }, + { GM107_DISP, -1 }, + { GK110_DISP, -1 }, + { GK104_DISP, -1 }, + { GF110_DISP, -1 }, + { GT214_DISP, -1 }, + { GT206_DISP, -1 }, + { GT200_DISP, -1 }, + { G82_DISP, -1 }, + { NV50_DISP, -1 }, + { NV04_DISP, -1 }, + {} + }; + int cid = nvif_sclass(&device->object, disps, oclass); + disp->object.client = NULL; + if (cid < 0) + return cid; + + return nvif_object_init(&device->object, 0, disps[cid].oclass, + NULL, 0, &disp->object); +} diff --git a/drivers/gpu/drm/nouveau/nvif/fifo.c b/drivers/gpu/drm/nouveau/nvif/fifo.c new file mode 100644 index 000000000000..99d4fd17543c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/fifo.c @@ -0,0 +1,99 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/fifo.h> + +static int +nvif_fifo_runlists(struct nvif_device *device) +{ + struct nvif_object *object = &device->object; + struct { + struct nv_device_info_v1 m; + struct { + struct nv_device_info_v1_data runlists; + struct nv_device_info_v1_data runlist[64]; + } v; + } *a; + int ret, i; + + if (device->runlist) + return 0; + + if (!(a = kmalloc(sizeof(*a), GFP_KERNEL))) + return -ENOMEM; + a->m.version = 1; + a->m.count = sizeof(a->v) / sizeof(a->v.runlists); + a->v.runlists.mthd = NV_DEVICE_FIFO_RUNLISTS; + for (i = 0; i < ARRAY_SIZE(a->v.runlist); i++) + a->v.runlist[i].mthd = NV_DEVICE_FIFO_RUNLIST_ENGINES(i); + + ret = nvif_object_mthd(object, NV_DEVICE_V0_INFO, a, sizeof(*a)); + if (ret) + goto done; + + device->runlists = fls64(a->v.runlists.data); + device->runlist = kzalloc(sizeof(*device->runlist) * + device->runlists, GFP_KERNEL); + if (!device->runlist) { + ret = -ENOMEM; + goto done; + } + + for (i = 0; i < device->runlists; i++) { + if (a->v.runlists.data & BIT_ULL(i)) + device->runlist[i].engines = a->v.runlist[i].data; + } + +done: + kfree(a); + return ret; +} + +u64 +nvif_fifo_runlist(struct nvif_device *device, u64 engine) +{ + struct nvif_object *object = &device->object; + struct { + struct nv_device_info_v1 m; + struct { + struct nv_device_info_v1_data engine; + } v; + } a = { + .m.version = 1, + .m.count = sizeof(a.v) / sizeof(a.v.engine), + .v.engine.mthd = engine, + }; + u64 runm = 0; + int ret, i; + + if ((ret = nvif_fifo_runlists(device))) + return runm; + + ret = nvif_object_mthd(object, NV_DEVICE_V0_INFO, &a, sizeof(a)); + if (ret == 0) { + for (i = 0; i < device->runlists; i++) { + if (device->runlist[i].engines & a.v.engine.data) + runm |= BIT_ULL(i); + } + } + + return runm; +} diff --git a/drivers/gpu/drm/nouveau/nvif/mem.c b/drivers/gpu/drm/nouveau/nvif/mem.c index 0f9382c60145..b6ebb3b58673 100644 --- a/drivers/gpu/drm/nouveau/nvif/mem.c +++ b/drivers/gpu/drm/nouveau/nvif/mem.c @@ -24,6 +24,19 @@ #include <nvif/if000a.h> +int +nvif_mem_init_map(struct nvif_mmu *mmu, u8 type, u64 size, struct nvif_mem *mem) +{ + int ret = nvif_mem_init(mmu, mmu->mem, NVIF_MEM_MAPPABLE | type, 0, + size, NULL, 0, mem); + if (ret == 0) { + ret = nvif_object_map(&mem->object, NULL, 0); + if (ret) + nvif_mem_fini(mem); + } + return ret; +} + void nvif_mem_fini(struct nvif_mem *mem) { diff --git a/drivers/gpu/drm/nouveau/nvif/mmu.c b/drivers/gpu/drm/nouveau/nvif/mmu.c index 15d0dcbf7ab4..358ac4f3cf91 100644 --- a/drivers/gpu/drm/nouveau/nvif/mmu.c +++ b/drivers/gpu/drm/nouveau/nvif/mmu.c @@ -36,6 +36,12 @@ nvif_mmu_fini(struct nvif_mmu *mmu) int nvif_mmu_init(struct nvif_object *parent, s32 oclass, struct nvif_mmu *mmu) { + static const struct nvif_mclass mems[] = { + { NVIF_CLASS_MEM_GF100, -1 }, + { NVIF_CLASS_MEM_NV50 , -1 }, + { NVIF_CLASS_MEM_NV04 , -1 }, + {} + }; struct nvif_mmu_v0 args; int ret, i; @@ -54,6 +60,11 @@ nvif_mmu_init(struct nvif_object *parent, s32 oclass, struct nvif_mmu *mmu) mmu->type_nr = args.type_nr; mmu->kind_nr = args.kind_nr; + ret = nvif_mclass(&mmu->object, mems); + if (ret < 0) + goto done; + mmu->mem = mems[ret].oclass; + mmu->heap = kmalloc(sizeof(*mmu->heap) * mmu->heap_nr, GFP_KERNEL); mmu->type = kmalloc(sizeof(*mmu->type) * mmu->type_nr, GFP_KERNEL); if (ret = -ENOMEM, !mmu->heap || !mmu->type) diff --git a/drivers/gpu/drm/nouveau/nvif/user.c b/drivers/gpu/drm/nouveau/nvif/user.c new file mode 100644 index 000000000000..10da3cdca647 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvif/user.c @@ -0,0 +1,64 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include <nvif/user.h> +#include <nvif/device.h> + +#include <nvif/class.h> + +void +nvif_user_fini(struct nvif_device *device) +{ + if (device->user.func) { + nvif_object_fini(&device->user.object); + device->user.func = NULL; + } +} + +int +nvif_user_init(struct nvif_device *device) +{ + struct { + s32 oclass; + int version; + const struct nvif_user_func *func; + } users[] = { + { VOLTA_USERMODE_A, -1, &nvif_userc361 }, + {} + }; + int cid, ret; + + if (device->user.func) + return 0; + + cid = nvif_mclass(&device->object, users); + if (cid < 0) + return cid; + + ret = nvif_object_init(&device->object, 0, users[cid].oclass, NULL, 0, + &device->user.object); + if (ret) + return ret; + + nvif_object_map(&device->user.object, NULL, 0); + device->user.func = users[cid].func; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c b/drivers/gpu/drm/nouveau/nvif/userc361.c index 1530a9217aea..19f9958e7e01 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c +++ b/drivers/gpu/drm/nouveau/nvif/userc361.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,17 +18,16 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs */ -#include "changk104.h" +#include <nvif/user.h> -#include <nvif/class.h> +static void +nvif_userc361_doorbell(struct nvif_user *user, u32 token) +{ + nvif_wr32(&user->object, 0x90, token); +} -const struct nvkm_fifo_chan_oclass -gp100_fifo_gpfifo_oclass = { - .base.oclass = PASCAL_CHANNEL_GPFIFO_A, - .base.minver = 0, - .base.maxver = 0, - .ctor = gk104_fifo_gpfifo_new, +const struct nvif_user_func +nvif_userc361 = { + .doorbell = nvif_userc361_doorbell, }; diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c index 31cdb2d2e1ff..191832be6c65 100644 --- a/drivers/gpu/drm/nouveau/nvif/vmm.c +++ b/drivers/gpu/drm/nouveau/nvif/vmm.c @@ -37,7 +37,7 @@ nvif_vmm_map(struct nvif_vmm *vmm, u64 addr, u64 size, void *argv, u32 argc, struct nvif_mem *mem, u64 offset) { struct nvif_vmm_map_v0 *args; - u8 stack[16]; + u8 stack[48]; int ret; if (sizeof(*args) + argc > sizeof(stack)) { diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index 657231c3c098..d0322ce85172 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -83,6 +83,20 @@ nvkm_engine_intr(struct nvkm_subdev *subdev) } static int +nvkm_engine_info(struct nvkm_subdev *subdev, u64 mthd, u64 *data) +{ + struct nvkm_engine *engine = nvkm_engine(subdev); + if (engine->func->info) { + if ((engine = nvkm_engine_ref(engine))) { + int ret = engine->func->info(engine, mthd, data); + nvkm_engine_unref(&engine); + return ret; + } + } + return -ENOSYS; +} + +static int nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_engine *engine = nvkm_engine(subdev); @@ -150,6 +164,7 @@ nvkm_engine_func = { .preinit = nvkm_engine_preinit, .init = nvkm_engine_init, .fini = nvkm_engine_fini, + .info = nvkm_engine_info, .intr = nvkm_engine_intr, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c index a134d225f958..03f676c18aad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c @@ -35,6 +35,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = { [NVKM_SUBDEV_BUS ] = "bus", [NVKM_SUBDEV_CLK ] = "clk", [NVKM_SUBDEV_DEVINIT ] = "devinit", + [NVKM_SUBDEV_FAULT ] = "fault", [NVKM_SUBDEV_FB ] = "fb", [NVKM_SUBDEV_FUSE ] = "fuse", [NVKM_SUBDEV_GPIO ] = "gpio", @@ -60,6 +61,9 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = { [NVKM_ENGINE_CE3 ] = "ce3", [NVKM_ENGINE_CE4 ] = "ce4", [NVKM_ENGINE_CE5 ] = "ce5", + [NVKM_ENGINE_CE6 ] = "ce6", + [NVKM_ENGINE_CE7 ] = "ce7", + [NVKM_ENGINE_CE8 ] = "ce8", [NVKM_ENGINE_CIPHER ] = "cipher", [NVKM_ENGINE_DISP ] = "disp", [NVKM_ENGINE_DMAOBJ ] = "dma", @@ -92,6 +96,14 @@ nvkm_subdev_intr(struct nvkm_subdev *subdev) } int +nvkm_subdev_info(struct nvkm_subdev *subdev, u64 mthd, u64 *data) +{ + if (subdev->func->info) + return subdev->func->info(subdev, mthd, data); + return -ENOSYS; +} + +int nvkm_subdev_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_device *device = subdev->device; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild index 255d81ccf916..80d784441904 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild @@ -5,3 +5,4 @@ nvkm-y += nvkm/engine/ce/gm107.o nvkm-y += nvkm/engine/ce/gm200.o nvkm-y += nvkm/engine/ce/gp100.o nvkm-y += nvkm/engine/ce/gp102.o +nvkm-y += nvkm/engine/ce/gv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gv100.c index fa781b5a7e07..fcda3de45857 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gv100.c @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,20 +18,23 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "channv50.h" -#include "rootnv50.h" +#include "priv.h" #include <nvif/class.h> -const struct nv50_disp_pioc_oclass -g84_disp_curs_oclass = { - .base.oclass = G82_DISP_CURSOR, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_curs_new, - .func = &nv50_disp_pioc_func, - .chid = { 7, 7 }, +static const struct nvkm_engine_func +gv100_ce = { + .intr = gp100_ce_intr, + .sclass = { + { -1, -1, VOLTA_DMA_COPY_A }, + {} + } }; + +int +gv100_ce_new(struct nvkm_device *device, int index, + struct nvkm_engine **pengine) +{ + return nvkm_engine_new_(&gv100_ce, device, index, true, pengine); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 05cd674326a6..e294013426ce 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2161,6 +2161,7 @@ nv130_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, + .fault = gp100_fault_new, .fb = gp100_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, @@ -2196,13 +2197,14 @@ nv132_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, + .fault = gp100_fault_new, .fb = gp102_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2231,13 +2233,14 @@ nv134_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, + .fault = gp100_fault_new, .fb = gp102_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2253,7 +2256,7 @@ nv134_chipset = { .disp = gp102_disp_new, .dma = gf119_dma_new, .fifo = gp100_fifo_new, - .gr = gp102_gr_new, + .gr = gp104_gr_new, .nvdec = gp102_nvdec_new, .sec2 = gp102_sec2_new, .sw = gf100_sw_new, @@ -2266,13 +2269,14 @@ nv136_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, + .fault = gp100_fault_new, .fb = gp102_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2288,7 +2292,7 @@ nv136_chipset = { .disp = gp102_disp_new, .dma = gf119_dma_new, .fifo = gp100_fifo_new, - .gr = gp102_gr_new, + .gr = gp104_gr_new, .nvdec = gp102_nvdec_new, .sec2 = gp102_sec2_new, .sw = gf100_sw_new, @@ -2301,13 +2305,14 @@ nv137_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, + .fault = gp100_fault_new, .fb = gp102_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2336,13 +2341,14 @@ nv138_chipset = { .bios = nvkm_bios_new, .bus = gf100_bus_new, .devinit = gm200_devinit_new, + .fault = gp100_fault_new, .fb = gp102_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp100_mc_new, .mmu = gp100_mmu_new, .therm = gp100_therm_new, @@ -2369,11 +2375,12 @@ nv13b_chipset = { .name = "GP10B", .bar = gm20b_bar_new, .bus = gf100_bus_new, + .fault = gp100_fault_new, .fb = gp10b_fb_new, .fuse = gm107_fuse_new, .ibus = gp10b_ibus_new, .imem = gk20a_instmem_new, - .ltc = gp100_ltc_new, + .ltc = gp102_ltc_new, .mc = gp10b_mc_new, .mmu = gp10b_mmu_new, .secboot = gp10b_secboot_new, @@ -2387,6 +2394,46 @@ nv13b_chipset = { .sw = gf100_sw_new, }; +static const struct nvkm_device_chip +nv140_chipset = { + .name = "GV100", + .bar = gm107_bar_new, + .bios = nvkm_bios_new, + .bus = gf100_bus_new, + .devinit = gv100_devinit_new, + .fault = gv100_fault_new, + .fb = gv100_fb_new, + .fuse = gm107_fuse_new, + .gpio = gk104_gpio_new, + .i2c = gm200_i2c_new, + .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, + .ltc = gp102_ltc_new, + .mc = gp100_mc_new, + .mmu = gv100_mmu_new, + .pci = gp100_pci_new, + .pmu = gp102_pmu_new, + .secboot = gp108_secboot_new, + .therm = gp100_therm_new, + .timer = gk20a_timer_new, + .top = gk104_top_new, + .disp = gv100_disp_new, + .ce[0] = gv100_ce_new, + .ce[1] = gv100_ce_new, + .ce[2] = gv100_ce_new, + .ce[3] = gv100_ce_new, + .ce[4] = gv100_ce_new, + .ce[5] = gv100_ce_new, + .ce[6] = gv100_ce_new, + .ce[7] = gv100_ce_new, + .ce[8] = gv100_ce_new, + .dma = gv100_dma_new, + .fifo = gv100_fifo_new, + .gr = gv100_gr_new, + .nvdec = gp102_nvdec_new, + .sec2 = gp102_sec2_new, +}; + static int nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size, struct nvkm_notify *notify) @@ -2420,6 +2467,7 @@ nvkm_device_subdev(struct nvkm_device *device, int index) _(BUS , device->bus , &device->bus->subdev); _(CLK , device->clk , &device->clk->subdev); _(DEVINIT , device->devinit , &device->devinit->subdev); + _(FAULT , device->fault , &device->fault->subdev); _(FB , device->fb , &device->fb->subdev); _(FUSE , device->fuse , &device->fuse->subdev); _(GPIO , device->gpio , &device->gpio->subdev); @@ -2463,6 +2511,9 @@ nvkm_device_engine(struct nvkm_device *device, int index) _(CE3 , device->ce[3] , device->ce[3]); _(CE4 , device->ce[4] , device->ce[4]); _(CE5 , device->ce[5] , device->ce[5]); + _(CE6 , device->ce[6] , device->ce[6]); + _(CE7 , device->ce[7] , device->ce[7]); + _(CE8 , device->ce[8] , device->ce[8]); _(CIPHER , device->cipher , device->cipher); _(DISP , device->disp , &device->disp->engine); _(DMAOBJ , device->dma , &device->dma->engine); @@ -2739,6 +2790,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x110: case 0x120: device->card_type = GM100; break; case 0x130: device->card_type = GP100; break; + case 0x140: device->card_type = GV100; break; default: break; } @@ -2830,6 +2882,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x137: device->chip = &nv137_chipset; break; case 0x138: device->chip = &nv138_chipset; break; case 0x13b: device->chip = &nv13b_chipset; break; + case 0x140: device->chip = &nv140_chipset; break; default: nvdev_error(device, "unknown chipset (%08x)\n", boot0); goto done; @@ -2891,6 +2944,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, _(NVKM_SUBDEV_BUS , bus); _(NVKM_SUBDEV_CLK , clk); _(NVKM_SUBDEV_DEVINIT , devinit); + _(NVKM_SUBDEV_FAULT , fault); _(NVKM_SUBDEV_FB , fb); _(NVKM_SUBDEV_FUSE , fuse); _(NVKM_SUBDEV_GPIO , gpio); @@ -2916,6 +2970,9 @@ nvkm_device_ctor(const struct nvkm_device_func *func, _(NVKM_ENGINE_CE3 , ce[3]); _(NVKM_ENGINE_CE4 , ce[4]); _(NVKM_ENGINE_CE5 , ce[5]); + _(NVKM_ENGINE_CE6 , ce[6]); + _(NVKM_ENGINE_CE7 , ce[7]); + _(NVKM_ENGINE_CE8 , ce[8]); _(NVKM_ENGINE_CIPHER , cipher); _(NVKM_ENGINE_DISP , disp); _(NVKM_ENGINE_DMAOBJ , dma); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h index 08d0bf605722..253ab914a8ef 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h @@ -8,6 +8,7 @@ #include <subdev/bus.h> #include <subdev/clk.h> #include <subdev/devinit.h> +#include <subdev/fault.h> #include <subdev/fb.h> #include <subdev/fuse.h> #include <subdev/gpio.h> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index 17adcb4e8854..dde6bbafa709 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -40,6 +40,66 @@ struct nvkm_udevice { }; static int +nvkm_udevice_info_subdev(struct nvkm_device *device, u64 mthd, u64 *data) +{ + struct nvkm_subdev *subdev; + enum nvkm_devidx subidx; + + switch (mthd & NV_DEVICE_INFO_UNIT) { + case NV_DEVICE_FIFO(0): subidx = NVKM_ENGINE_FIFO; break; + default: + return -EINVAL; + } + + subdev = nvkm_device_subdev(device, subidx); + if (subdev) + return nvkm_subdev_info(subdev, mthd, data); + return -ENODEV; +} + +static void +nvkm_udevice_info_v1(struct nvkm_device *device, + struct nv_device_info_v1_data *args) +{ + if (args->mthd & NV_DEVICE_INFO_UNIT) { + if (nvkm_udevice_info_subdev(device, args->mthd, &args->data)) + args->mthd = NV_DEVICE_INFO_INVALID; + return; + } + + switch (args->mthd) { +#define ENGINE__(A,B,C) NV_DEVICE_INFO_ENGINE_##A: { int _i; \ + for (_i = (B), args->data = 0ULL; _i <= (C); _i++) { \ + if (nvkm_device_engine(device, _i)) \ + args->data |= BIT_ULL(_i); \ + } \ +} +#define ENGINE_A(A) ENGINE__(A, NVKM_ENGINE_##A , NVKM_ENGINE_##A) +#define ENGINE_B(A) ENGINE__(A, NVKM_ENGINE_##A##0, NVKM_ENGINE_##A##_LAST) + case ENGINE_A(SW ); break; + case ENGINE_A(GR ); break; + case ENGINE_A(MPEG ); break; + case ENGINE_A(ME ); break; + case ENGINE_A(CIPHER); break; + case ENGINE_A(BSP ); break; + case ENGINE_A(VP ); break; + case ENGINE_B(CE ); break; + case ENGINE_A(SEC ); break; + case ENGINE_A(MSVLD ); break; + case ENGINE_A(MSPDEC); break; + case ENGINE_A(MSPPP ); break; + case ENGINE_A(MSENC ); break; + case ENGINE_A(VIC ); break; + case ENGINE_A(SEC2 ); break; + case ENGINE_A(NVDEC ); break; + case ENGINE_B(NVENC ); break; + default: + args->mthd = NV_DEVICE_INFO_INVALID; + break; + } +} + +static int nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size) { struct nvkm_object *object = &udev->object; @@ -48,10 +108,21 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size) struct nvkm_instmem *imem = device->imem; union { struct nv_device_info_v0 v0; + struct nv_device_info_v1 v1; } *args = data; - int ret = -ENOSYS; + int ret = -ENOSYS, i; nvif_ioctl(object, "device info size %d\n", size); + if (!(ret = nvif_unpack(ret, &data, &size, args->v1, 1, 1, true))) { + nvif_ioctl(object, "device info vers %d count %d\n", + args->v1.version, args->v1.count); + if (args->v1.count * sizeof(args->v1.data[0]) == size) { + for (i = 0; i < args->v1.count; i++) + nvkm_udevice_info_v1(device, &args->v1.data[i]); + return 0; + } + return -EINVAL; + } else if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { nvif_ioctl(object, "device info vers %d\n", args->v0.version); } else @@ -103,6 +174,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size) case NV_E0: args->v0.family = NV_DEVICE_INFO_V0_KEPLER; break; case GM100: args->v0.family = NV_DEVICE_INFO_V0_MAXWELL; break; case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break; + case GV100: args->v0.family = NV_DEVICE_INFO_V0_VOLTA; break; default: args->v0.family = 0; break; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index 48ce6699183e..3d485dbf310a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -14,12 +14,14 @@ nvkm-y += nvkm/engine/disp/gm107.o nvkm-y += nvkm/engine/disp/gm200.o nvkm-y += nvkm/engine/disp/gp100.o nvkm-y += nvkm/engine/disp/gp102.o +nvkm-y += nvkm/engine/disp/gv100.o nvkm-y += nvkm/engine/disp/vga.o nvkm-y += nvkm/engine/disp/head.o nvkm-y += nvkm/engine/disp/headnv04.o nvkm-y += nvkm/engine/disp/headnv50.o nvkm-y += nvkm/engine/disp/headgf119.o +nvkm-y += nvkm/engine/disp/headgv100.o nvkm-y += nvkm/engine/disp/ior.o nvkm-y += nvkm/engine/disp/dacnv50.o @@ -35,6 +37,7 @@ nvkm-y += nvkm/engine/disp/sorgf119.o nvkm-y += nvkm/engine/disp/sorgk104.o nvkm-y += nvkm/engine/disp/sorgm107.o nvkm-y += nvkm/engine/disp/sorgm200.o +nvkm-y += nvkm/engine/disp/sorgv100.o nvkm-y += nvkm/engine/disp/outp.o nvkm-y += nvkm/engine/disp/dp.o @@ -47,6 +50,7 @@ nvkm-y += nvkm/engine/disp/hdmig84.o nvkm-y += nvkm/engine/disp/hdmigt215.o nvkm-y += nvkm/engine/disp/hdmigf119.o nvkm-y += nvkm/engine/disp/hdmigk104.o +nvkm-y += nvkm/engine/disp/hdmigv100.o nvkm-y += nvkm/engine/disp/conn.o @@ -63,57 +67,49 @@ nvkm-y += nvkm/engine/disp/rootgm107.o nvkm-y += nvkm/engine/disp/rootgm200.o nvkm-y += nvkm/engine/disp/rootgp100.o nvkm-y += nvkm/engine/disp/rootgp102.o +nvkm-y += nvkm/engine/disp/rootgv100.o nvkm-y += nvkm/engine/disp/channv50.o nvkm-y += nvkm/engine/disp/changf119.o +nvkm-y += nvkm/engine/disp/changv100.o nvkm-y += nvkm/engine/disp/dmacnv50.o nvkm-y += nvkm/engine/disp/dmacgf119.o nvkm-y += nvkm/engine/disp/dmacgp102.o +nvkm-y += nvkm/engine/disp/dmacgv100.o nvkm-y += nvkm/engine/disp/basenv50.o nvkm-y += nvkm/engine/disp/baseg84.o -nvkm-y += nvkm/engine/disp/basegt200.o -nvkm-y += nvkm/engine/disp/basegt215.o nvkm-y += nvkm/engine/disp/basegf119.o -nvkm-y += nvkm/engine/disp/basegk104.o -nvkm-y += nvkm/engine/disp/basegk110.o nvkm-y += nvkm/engine/disp/basegp102.o nvkm-y += nvkm/engine/disp/corenv50.o nvkm-y += nvkm/engine/disp/coreg84.o nvkm-y += nvkm/engine/disp/coreg94.o -nvkm-y += nvkm/engine/disp/coregt200.o -nvkm-y += nvkm/engine/disp/coregt215.o nvkm-y += nvkm/engine/disp/coregf119.o nvkm-y += nvkm/engine/disp/coregk104.o -nvkm-y += nvkm/engine/disp/coregk110.o -nvkm-y += nvkm/engine/disp/coregm107.o -nvkm-y += nvkm/engine/disp/coregm200.o -nvkm-y += nvkm/engine/disp/coregp100.o nvkm-y += nvkm/engine/disp/coregp102.o +nvkm-y += nvkm/engine/disp/coregv100.o nvkm-y += nvkm/engine/disp/ovlynv50.o nvkm-y += nvkm/engine/disp/ovlyg84.o nvkm-y += nvkm/engine/disp/ovlygt200.o -nvkm-y += nvkm/engine/disp/ovlygt215.o nvkm-y += nvkm/engine/disp/ovlygf119.o nvkm-y += nvkm/engine/disp/ovlygk104.o nvkm-y += nvkm/engine/disp/ovlygp102.o +nvkm-y += nvkm/engine/disp/wimmgv100.o + +nvkm-y += nvkm/engine/disp/wndwgv100.o + nvkm-y += nvkm/engine/disp/piocnv50.o nvkm-y += nvkm/engine/disp/piocgf119.o nvkm-y += nvkm/engine/disp/cursnv50.o -nvkm-y += nvkm/engine/disp/cursg84.o -nvkm-y += nvkm/engine/disp/cursgt215.o nvkm-y += nvkm/engine/disp/cursgf119.o -nvkm-y += nvkm/engine/disp/cursgk104.o nvkm-y += nvkm/engine/disp/cursgp102.o +nvkm-y += nvkm/engine/disp/cursgv100.o nvkm-y += nvkm/engine/disp/oimmnv50.o -nvkm-y += nvkm/engine/disp/oimmg84.o -nvkm-y += nvkm/engine/disp/oimmgt215.o nvkm-y += nvkm/engine/disp/oimmgf119.o -nvkm-y += nvkm/engine/disp/oimmgk104.o nvkm-y += nvkm/engine/disp/oimmgp102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c index 93a75e5b2791..32fa94a9773f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c @@ -220,6 +220,9 @@ nvkm_disp_fini(struct nvkm_engine *engine, bool suspend) struct nvkm_conn *conn; struct nvkm_outp *outp; + if (disp->func->fini) + disp->func->fini(disp); + list_for_each_entry(outp, &disp->outp, head) { nvkm_outp_fini(outp); } @@ -237,6 +240,7 @@ nvkm_disp_init(struct nvkm_engine *engine) struct nvkm_disp *disp = nvkm_disp(engine); struct nvkm_conn *conn; struct nvkm_outp *outp; + struct nvkm_ior *ior; list_for_each_entry(conn, &disp->conn, head) { nvkm_conn_init(conn); @@ -246,6 +250,19 @@ nvkm_disp_init(struct nvkm_engine *engine) nvkm_outp_init(outp); } + if (disp->func->init) { + int ret = disp->func->init(disp); + if (ret) + return ret; + } + + /* Set 'normal' (ie. when it's attached to a head) state for + * each output resource to 'fully enabled'. + */ + list_for_each_entry(ior, &disp->ior, head) { + ior->func->power(ior, true, true, true, true, true); + } + return 0; } @@ -376,6 +393,12 @@ nvkm_disp_oneinit(struct nvkm_engine *engine) if (ret) return ret; + if (disp->func->oneinit) { + ret = disp->func->oneinit(disp); + if (ret) + return ret; + } + i = 0; list_for_each_entry(head, &disp->head, head) i = max(i, head->id + 1); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c index 6d17630a3dee..01253f4a9946 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list g84_disp_base_mthd_base = { @@ -56,8 +53,8 @@ g84_disp_base_mthd_base = { } }; -const struct nv50_disp_chan_mthd -g84_disp_base_chan_mthd = { +static const struct nv50_disp_chan_mthd +g84_disp_base_mthd = { .name = "Base", .addr = 0x000540, .prev = 0x000004, @@ -68,13 +65,10 @@ g84_disp_base_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -g84_disp_base_oclass = { - .base.oclass = G82_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &nv50_disp_dmac_func, - .mthd = &g84_disp_base_chan_mthd, - .chid = 1, -}; +int +g84_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_base_new_(&nv50_disp_dmac_func, &g84_disp_base_mthd, + disp, 1, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c index ebcb925e9d90..389e19dfc514 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list gf119_disp_base_mthd_base = { @@ -91,7 +88,7 @@ gf119_disp_base_mthd_image = { }; const struct nv50_disp_chan_mthd -gf119_disp_base_chan_mthd = { +gf119_disp_base_mthd = { .name = "Base", .addr = 0x001000, .prev = -0x020000, @@ -102,13 +99,10 @@ gf119_disp_base_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -gf119_disp_base_oclass = { - .base.oclass = GF110_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &gf119_disp_dmac_func, - .mthd = &gf119_disp_base_chan_mthd, - .chid = 1, -}; +int +gf119_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_base_new_(&gf119_disp_dmac_func, &gf119_disp_base_mthd, + disp, 1, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c index 8a3cdeef8d2c..0cb23d673aa0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c @@ -21,18 +21,12 @@ * * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gp102_disp_base_oclass = { - .base.oclass = GK110_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &gp102_disp_dmac_func, - .mthd = &gf119_disp_base_chan_mthd, - .chid = 1, -}; +int +gp102_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_base_new_(&gp102_disp_dmac_func, &gf119_disp_base_mthd, + disp, 1, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c index f1d6b820d482..19eb7dde01f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c @@ -21,33 +21,30 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" +#include "channv50.h" #include "head.h" -#include "rootnv50.h" #include <core/client.h> -#include <nvif/class.h> #include <nvif/cl507c.h> #include <nvif/unpack.h> int -nv50_disp_base_new(const struct nv50_disp_dmac_func *func, - const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **pobject) +nv50_disp_base_new_(const struct nv50_disp_chan_func *func, + const struct nv50_disp_chan_mthd *mthd, + struct nv50_disp *disp, int chid, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) { union { struct nv50_disp_base_channel_dma_v0 v0; - } *args = data; + } *args = argv; struct nvkm_object *parent = oclass->parent; - struct nv50_disp *disp = root->disp; int head, ret = -ENOSYS; u64 push; - nvif_ioctl(parent, "create disp base channel dma size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create disp base channel dma size %d\n", argc); + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { nvif_ioctl(parent, "create disp base channel dma vers %d " "pushbuf %016llx head %d\n", args->v0.version, args->v0.pushbuf, args->v0.head); @@ -58,7 +55,7 @@ nv50_disp_base_new(const struct nv50_disp_dmac_func *func, } else return ret; - return nv50_disp_dmac_new_(func, mthd, root, chid + head, + return nv50_disp_dmac_new_(func, mthd, disp, chid + head, head, push, oclass, pobject); } @@ -102,7 +99,7 @@ nv50_disp_base_mthd_image = { }; static const struct nv50_disp_chan_mthd -nv50_disp_base_chan_mthd = { +nv50_disp_base_mthd = { .name = "Base", .addr = 0x000540, .prev = 0x000004, @@ -113,13 +110,10 @@ nv50_disp_base_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -nv50_disp_base_oclass = { - .base.oclass = NV50_DISP_BASE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_base_new, - .func = &nv50_disp_dmac_func, - .mthd = &nv50_disp_base_chan_mthd, - .chid = 1, -}; +int +nv50_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_base_new_(&nv50_disp_dmac_func, &nv50_disp_base_mthd, + disp, 1, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c index 17a3d835cb42..29e6dd58ac48 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c @@ -47,3 +47,16 @@ gf119_disp_chan_uevent = { .init = gf119_disp_chan_uevent_init, .fini = gf119_disp_chan_uevent_fini, }; + +void +gf119_disp_chan_intr(struct nv50_disp_chan *chan, bool en) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u64 mask = 0x00000001 << chan->chid.user; + if (!en) { + nvkm_mask(device, 0x610090, mask, 0x00000000); + nvkm_mask(device, 0x6100a0, mask, 0x00000000); + } else { + nvkm_mask(device, 0x6100a0, mask, mask); + } +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changv100.c index a13315147391..75247c9c7e10 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changv100.c @@ -1,5 +1,5 @@ /* - * Copyright 2015 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,17 +18,17 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs */ -#include "changk104.h" - -#include <nvif/class.h> +#include "channv50.h" -const struct nvkm_fifo_chan_oclass -gm200_fifo_gpfifo_oclass = { - .base.oclass = MAXWELL_CHANNEL_GPFIFO_A, - .base.minver = 0, - .base.maxver = 0, - .ctor = gk104_fifo_gpfifo_new, +const struct nvkm_event_func +gv100_disp_chan_uevent = { + .ctor = nv50_disp_chan_uevent_ctor, }; + +u64 +gv100_disp_chan_user(struct nv50_disp_chan *chan, u64 *psize) +{ + *psize = 0x1000; + return 0x690000 + ((chan->chid.user - 1) * 0x1000); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 723dcbde2ac2..57719f675eec 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -26,6 +26,7 @@ #include <core/client.h> #include <core/notify.h> +#include <core/oproxy.h> #include <core/ramht.h> #include <engine/dma.h> @@ -65,7 +66,7 @@ nv50_disp_mthd_list(struct nv50_disp *disp, int debug, u32 base, int c, void nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug) { - struct nv50_disp *disp = chan->root->disp; + struct nv50_disp *disp = chan->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; const struct nv50_disp_chan_mthd *mthd = chan->mthd; const struct nv50_disp_mthd_list *list; @@ -154,13 +155,29 @@ nv50_disp_chan_uevent = { .fini = nv50_disp_chan_uevent_fini, }; +u64 +nv50_disp_chan_user(struct nv50_disp_chan *chan, u64 *psize) +{ + *psize = 0x1000; + return 0x640000 + (chan->chid.user * 0x1000); +} + +void +nv50_disp_chan_intr(struct nv50_disp_chan *chan, bool en) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u64 mask = 0x00010001 << chan->chid.user; + const u64 data = en ? 0x00010000 : 0x00000000; + nvkm_mask(device, 0x610028, mask, data); +} + static int nv50_disp_chan_rd32(struct nvkm_object *object, u64 addr, u32 *data) { struct nv50_disp_chan *chan = nv50_disp_chan(object); - struct nv50_disp *disp = chan->root->disp; - struct nvkm_device *device = disp->base.engine.subdev.device; - *data = nvkm_rd32(device, 0x640000 + (chan->chid.user * 0x1000) + addr); + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + u64 size, base = chan->func->user(chan, &size); + *data = nvkm_rd32(device, base + addr); return 0; } @@ -168,9 +185,9 @@ static int nv50_disp_chan_wr32(struct nvkm_object *object, u64 addr, u32 data) { struct nv50_disp_chan *chan = nv50_disp_chan(object); - struct nv50_disp *disp = chan->root->disp; - struct nvkm_device *device = disp->base.engine.subdev.device; - nvkm_wr32(device, 0x640000 + (chan->chid.user * 0x1000) + addr, data); + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + u64 size, base = chan->func->user(chan, &size); + nvkm_wr32(device, base + addr, data); return 0; } @@ -179,7 +196,7 @@ nv50_disp_chan_ntfy(struct nvkm_object *object, u32 type, struct nvkm_event **pevent) { struct nv50_disp_chan *chan = nv50_disp_chan(object); - struct nv50_disp *disp = chan->root->disp; + struct nv50_disp *disp = chan->disp; switch (type) { case NV50_DISP_CORE_CHANNEL_DMA_V0_NTFY_UEVENT: *pevent = &disp->uevent; @@ -195,34 +212,83 @@ nv50_disp_chan_map(struct nvkm_object *object, void *argv, u32 argc, enum nvkm_object_map *type, u64 *addr, u64 *size) { struct nv50_disp_chan *chan = nv50_disp_chan(object); - struct nv50_disp *disp = chan->root->disp; - struct nvkm_device *device = disp->base.engine.subdev.device; + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u64 base = device->func->resource_addr(device, 0); *type = NVKM_OBJECT_MAP_IO; - *addr = device->func->resource_addr(device, 0) + - 0x640000 + (chan->chid.user * 0x1000); - *size = 0x001000; + *addr = base + chan->func->user(chan, size); return 0; } +struct nv50_disp_chan_object { + struct nvkm_oproxy oproxy; + struct nv50_disp *disp; + int hash; +}; + +static void +nv50_disp_chan_child_del_(struct nvkm_oproxy *base) +{ + struct nv50_disp_chan_object *object = + container_of(base, typeof(*object), oproxy); + nvkm_ramht_remove(object->disp->ramht, object->hash); +} + +static const struct nvkm_oproxy_func +nv50_disp_chan_child_func_ = { + .dtor[0] = nv50_disp_chan_child_del_, +}; + static int nv50_disp_chan_child_new(const struct nvkm_oclass *oclass, - void *data, u32 size, struct nvkm_object **pobject) + void *argv, u32 argc, struct nvkm_object **pobject) { struct nv50_disp_chan *chan = nv50_disp_chan(oclass->parent); - return chan->func->child_new(chan, oclass, data, size, pobject); + struct nv50_disp *disp = chan->disp; + struct nvkm_device *device = disp->base.engine.subdev.device; + const struct nvkm_device_oclass *sclass = oclass->priv; + struct nv50_disp_chan_object *object; + int ret; + + if (!(object = kzalloc(sizeof(*object), GFP_KERNEL))) + return -ENOMEM; + nvkm_oproxy_ctor(&nv50_disp_chan_child_func_, oclass, &object->oproxy); + object->disp = disp; + *pobject = &object->oproxy.base; + + ret = sclass->ctor(device, oclass, argv, argc, &object->oproxy.object); + if (ret) + return ret; + + object->hash = chan->func->bind(chan, object->oproxy.object, + oclass->handle); + if (object->hash < 0) + return object->hash; + + return 0; } static int nv50_disp_chan_child_get(struct nvkm_object *object, int index, - struct nvkm_oclass *oclass) + struct nvkm_oclass *sclass) { struct nv50_disp_chan *chan = nv50_disp_chan(object); - if (chan->func->child_get) { - int ret = chan->func->child_get(chan, index, oclass); - if (ret == 0) - oclass->ctor = nv50_disp_chan_child_new; - return ret; + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const struct nvkm_device_oclass *oclass = NULL; + + if (chan->func->bind) + sclass->engine = nvkm_device_engine(device, NVKM_ENGINE_DMAOBJ); + else + sclass->engine = NULL; + + if (sclass->engine && sclass->engine->func->base.sclass) { + sclass->engine->func->base.sclass(sclass, index, &oclass); + if (oclass) { + sclass->ctor = nv50_disp_chan_child_new, + sclass->priv = oclass; + return 0; + } } + return -EINVAL; } @@ -231,6 +297,7 @@ nv50_disp_chan_fini(struct nvkm_object *object, bool suspend) { struct nv50_disp_chan *chan = nv50_disp_chan(object); chan->func->fini(chan); + chan->func->intr(chan, false); return 0; } @@ -238,6 +305,7 @@ static int nv50_disp_chan_init(struct nvkm_object *object) { struct nv50_disp_chan *chan = nv50_disp_chan(object); + chan->func->intr(chan, true); return chan->func->init(chan); } @@ -245,10 +313,11 @@ static void * nv50_disp_chan_dtor(struct nvkm_object *object) { struct nv50_disp_chan *chan = nv50_disp_chan(object); - struct nv50_disp *disp = chan->root->disp; + struct nv50_disp *disp = chan->disp; if (chan->chid.user >= 0) disp->chan[chan->chid.user] = NULL; - return chan->func->dtor ? chan->func->dtor(chan) : chan; + nvkm_memory_unref(&chan->memory); + return chan; } static const struct nvkm_object_func @@ -264,18 +333,22 @@ nv50_disp_chan = { }; int -nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func, +nv50_disp_chan_new_(const struct nv50_disp_chan_func *func, const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int ctrl, int user, int head, + struct nv50_disp *disp, int ctrl, int user, int head, const struct nvkm_oclass *oclass, - struct nv50_disp_chan *chan) + struct nvkm_object **pobject) { - struct nv50_disp *disp = root->disp; + struct nv50_disp_chan *chan; + + if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) + return -ENOMEM; + *pobject = &chan->object; nvkm_object_ctor(&nv50_disp_chan, oclass, &chan->object); chan->func = func; chan->mthd = mthd; - chan->root = root; + chan->disp = disp; chan->chid.ctrl = ctrl; chan->chid.user = user; chan->head = head; @@ -287,20 +360,3 @@ nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func, disp->chan[chan->chid.user] = chan; return 0; } - -int -nv50_disp_chan_new_(const struct nv50_disp_chan_func *func, - const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int ctrl, int user, int head, - const struct nvkm_oclass *oclass, - struct nvkm_object **pobject) -{ - struct nv50_disp_chan *chan; - - if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) - return -ENOMEM; - *pobject = &chan->object; - - return nv50_disp_chan_ctor(func, mthd, root, ctrl, user, - head, oclass, chan); -} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h index 40681db91a02..adc9d76d09cc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h @@ -4,11 +4,12 @@ #define nv50_disp_chan(p) container_of((p), struct nv50_disp_chan, object) #include <core/object.h> #include "nv50.h" +struct nv50_disp_root; struct nv50_disp_chan { const struct nv50_disp_chan_func *func; const struct nv50_disp_chan_mthd *mthd; - struct nv50_disp_root *root; + struct nv50_disp *disp; struct { int ctrl; @@ -17,36 +18,133 @@ struct nv50_disp_chan { int head; struct nvkm_object object; + + struct nvkm_memory *memory; + u64 push; }; struct nv50_disp_chan_func { - void *(*dtor)(struct nv50_disp_chan *); int (*init)(struct nv50_disp_chan *); void (*fini)(struct nv50_disp_chan *); - int (*child_get)(struct nv50_disp_chan *, int index, - struct nvkm_oclass *); - int (*child_new)(struct nv50_disp_chan *, const struct nvkm_oclass *, - void *data, u32 size, struct nvkm_object **); + void (*intr)(struct nv50_disp_chan *, bool en); + u64 (*user)(struct nv50_disp_chan *, u64 *size); + int (*bind)(struct nv50_disp_chan *, struct nvkm_object *, u32 handle); }; -int nv50_disp_chan_ctor(const struct nv50_disp_chan_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int ctrl, int user, int head, - const struct nvkm_oclass *, struct nv50_disp_chan *); int nv50_disp_chan_new_(const struct nv50_disp_chan_func *, const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int ctrl, int user, int head, + struct nv50_disp *, int ctrl, int user, int head, + const struct nvkm_oclass *, struct nvkm_object **); +int nv50_disp_dmac_new_(const struct nv50_disp_chan_func *, + const struct nv50_disp_chan_mthd *, + struct nv50_disp *, int chid, int head, u64 push, const struct nvkm_oclass *, struct nvkm_object **); +void nv50_disp_chan_intr(struct nv50_disp_chan *, bool); +u64 nv50_disp_chan_user(struct nv50_disp_chan *, u64 *); extern const struct nv50_disp_chan_func nv50_disp_pioc_func; -extern const struct nv50_disp_chan_func gf119_disp_pioc_func; - -extern const struct nvkm_event_func nv50_disp_chan_uevent; -int nv50_disp_chan_uevent_ctor(struct nvkm_object *, void *, u32, - struct nvkm_notify *); -void nv50_disp_chan_uevent_send(struct nv50_disp *, int); +extern const struct nv50_disp_chan_func nv50_disp_dmac_func; +int nv50_disp_dmac_bind(struct nv50_disp_chan *, struct nvkm_object *, u32); +extern const struct nv50_disp_chan_func nv50_disp_core_func; -extern const struct nvkm_event_func gf119_disp_chan_uevent; +void gf119_disp_chan_intr(struct nv50_disp_chan *, bool); +extern const struct nv50_disp_chan_func gf119_disp_pioc_func; +extern const struct nv50_disp_chan_func gf119_disp_dmac_func; +void gf119_disp_dmac_fini(struct nv50_disp_chan *); +int gf119_disp_dmac_bind(struct nv50_disp_chan *, struct nvkm_object *, u32); +extern const struct nv50_disp_chan_func gf119_disp_core_func; +void gf119_disp_core_fini(struct nv50_disp_chan *); + +extern const struct nv50_disp_chan_func gp102_disp_dmac_func; + +u64 gv100_disp_chan_user(struct nv50_disp_chan *, u64 *); +int gv100_disp_dmac_init(struct nv50_disp_chan *); +void gv100_disp_dmac_fini(struct nv50_disp_chan *); +int gv100_disp_dmac_bind(struct nv50_disp_chan *, struct nvkm_object *, u32); + +int nv50_disp_curs_new_(const struct nv50_disp_chan_func *, + struct nv50_disp *, int ctrl, int user, + const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); +int nv50_disp_oimm_new_(const struct nv50_disp_chan_func *, + struct nv50_disp *, int ctrl, int user, + const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); +int nv50_disp_base_new_(const struct nv50_disp_chan_func *, + const struct nv50_disp_chan_mthd *, + struct nv50_disp *, int chid, + const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); +int nv50_disp_core_new_(const struct nv50_disp_chan_func *, + const struct nv50_disp_chan_mthd *, + struct nv50_disp *, int chid, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **); +int nv50_disp_ovly_new_(const struct nv50_disp_chan_func *, + const struct nv50_disp_chan_mthd *, + struct nv50_disp *, int chid, + const struct nvkm_oclass *, void *argv, u32 argc, + struct nvkm_object **); + +int nv50_disp_curs_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int nv50_disp_oimm_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int nv50_disp_base_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int nv50_disp_core_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int nv50_disp_ovly_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); + +int g84_disp_base_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int g84_disp_core_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int g84_disp_ovly_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); + +int g94_disp_core_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); + +int gt200_disp_ovly_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); + +int gf119_disp_curs_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gf119_disp_oimm_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gf119_disp_base_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gf119_disp_core_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gf119_disp_ovly_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); + +int gk104_disp_core_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gk104_disp_ovly_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); + +int gp102_disp_curs_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gp102_disp_oimm_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gp102_disp_base_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gp102_disp_core_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gp102_disp_ovly_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); + +int gv100_disp_curs_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gv100_disp_wimm_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gv100_disp_core_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); +int gv100_disp_wndw_new(const struct nvkm_oclass *, void *, u32, + struct nv50_disp *, struct nvkm_object **); struct nv50_disp_mthd_list { u32 mthd; @@ -76,64 +174,18 @@ extern const struct nv50_disp_mthd_list nv50_disp_core_mthd_sor; extern const struct nv50_disp_mthd_list nv50_disp_core_mthd_pior; extern const struct nv50_disp_mthd_list nv50_disp_base_mthd_image; -extern const struct nv50_disp_chan_mthd g84_disp_core_chan_mthd; +extern const struct nv50_disp_chan_mthd g84_disp_core_mthd; extern const struct nv50_disp_mthd_list g84_disp_core_mthd_dac; extern const struct nv50_disp_mthd_list g84_disp_core_mthd_head; -extern const struct nv50_disp_chan_mthd g84_disp_base_chan_mthd; -extern const struct nv50_disp_chan_mthd g84_disp_ovly_chan_mthd; -extern const struct nv50_disp_chan_mthd g94_disp_core_chan_mthd; +extern const struct nv50_disp_chan_mthd g94_disp_core_mthd; extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_base; extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_dac; extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_sor; extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_pior; -extern const struct nv50_disp_chan_mthd gf119_disp_base_chan_mthd; - -extern const struct nv50_disp_chan_mthd gk104_disp_core_chan_mthd; -extern const struct nv50_disp_chan_mthd gk104_disp_ovly_chan_mthd; - -struct nv50_disp_pioc_oclass { - int (*ctor)(const struct nv50_disp_chan_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int ctrl, int user, - const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); - struct nvkm_sclass base; - const struct nv50_disp_chan_func *func; - const struct nv50_disp_chan_mthd *mthd; - struct { - int ctrl; - int user; - } chid; -}; - -extern const struct nv50_disp_pioc_oclass nv50_disp_oimm_oclass; -extern const struct nv50_disp_pioc_oclass nv50_disp_curs_oclass; - -extern const struct nv50_disp_pioc_oclass g84_disp_oimm_oclass; -extern const struct nv50_disp_pioc_oclass g84_disp_curs_oclass; - -extern const struct nv50_disp_pioc_oclass gt215_disp_oimm_oclass; -extern const struct nv50_disp_pioc_oclass gt215_disp_curs_oclass; - -extern const struct nv50_disp_pioc_oclass gf119_disp_oimm_oclass; -extern const struct nv50_disp_pioc_oclass gf119_disp_curs_oclass; - -extern const struct nv50_disp_pioc_oclass gk104_disp_oimm_oclass; -extern const struct nv50_disp_pioc_oclass gk104_disp_curs_oclass; - -extern const struct nv50_disp_pioc_oclass gp102_disp_oimm_oclass; -extern const struct nv50_disp_pioc_oclass gp102_disp_curs_oclass; +extern const struct nv50_disp_chan_mthd gf119_disp_base_mthd; -int nv50_disp_curs_new(const struct nv50_disp_chan_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int ctrl, int user, - const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); -int nv50_disp_oimm_new(const struct nv50_disp_chan_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int ctrl, int user, - const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); +extern const struct nv50_disp_chan_mthd gk104_disp_core_mthd; +extern const struct nv50_disp_chan_mthd gk104_disp_ovly_mthd; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c index 1baa5c34b327..cfc54aad3e7c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" const struct nv50_disp_mthd_list g84_disp_core_mthd_dac = { @@ -91,7 +88,7 @@ g84_disp_core_mthd_head = { }; const struct nv50_disp_chan_mthd -g84_disp_core_chan_mthd = { +g84_disp_core_mthd = { .name = "Core", .addr = 0x000000, .prev = 0x000004, @@ -105,13 +102,10 @@ g84_disp_core_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -g84_disp_core_oclass = { - .base.oclass = G82_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &nv50_disp_core_func, - .mthd = &g84_disp_core_chan_mthd, - .chid = 0, -}; +int +g84_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_core_new_(&nv50_disp_core_func, &g84_disp_core_mthd, + disp, 0, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c index c65c9f3ff69f..e911925f1182 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list g94_disp_core_mthd_sor = { @@ -37,7 +34,7 @@ g94_disp_core_mthd_sor = { }; const struct nv50_disp_chan_mthd -g94_disp_core_chan_mthd = { +g94_disp_core_mthd = { .name = "Core", .addr = 0x000000, .prev = 0x000004, @@ -51,13 +48,10 @@ g94_disp_core_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -g94_disp_core_oclass = { - .base.oclass = GT206_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &nv50_disp_core_func, - .mthd = &g94_disp_core_chan_mthd, - .chid = 0, -}; +int +g94_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_core_new_(&nv50_disp_core_func, &g94_disp_core_mthd, + disp, 0, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c index 21fbf89b6319..d162b9cf4eac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c @@ -21,15 +21,10 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" -#include <core/client.h> #include <subdev/timer.h> -#include <nvif/class.h> -#include <nvif/unpack.h> - const struct nv50_disp_mthd_list gf119_disp_core_mthd_base = { .mthd = 0x0000, @@ -157,7 +152,7 @@ gf119_disp_core_mthd_head = { }; static const struct nv50_disp_chan_mthd -gf119_disp_core_chan_mthd = { +gf119_disp_core_mthd = { .name = "Core", .addr = 0x000000, .prev = -0x020000, @@ -172,10 +167,9 @@ gf119_disp_core_chan_mthd = { }; void -gf119_disp_core_fini(struct nv50_disp_dmac *chan) +gf119_disp_core_fini(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; /* deactivate channel */ @@ -188,22 +182,14 @@ gf119_disp_core_fini(struct nv50_disp_dmac *chan) nvkm_error(subdev, "core fini: %08x\n", nvkm_rd32(device, 0x610490)); } - - /* disable error reporting and completion notification */ - nvkm_mask(device, 0x610090, 0x00000001, 0x00000000); - nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000000); } static int -gf119_disp_core_init(struct nv50_disp_dmac *chan) +gf119_disp_core_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000001); - /* initialise channel for dma command submission */ nvkm_wr32(device, 0x610494, chan->push); nvkm_wr32(device, 0x610498, 0x00010000); @@ -225,20 +211,19 @@ gf119_disp_core_init(struct nv50_disp_dmac *chan) return 0; } -const struct nv50_disp_dmac_func +const struct nv50_disp_chan_func gf119_disp_core_func = { .init = gf119_disp_core_init, .fini = gf119_disp_core_fini, + .intr = gf119_disp_chan_intr, + .user = nv50_disp_chan_user, .bind = gf119_disp_dmac_bind, }; -const struct nv50_disp_dmac_oclass -gf119_disp_core_oclass = { - .base.oclass = GF110_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &gf119_disp_core_func, - .mthd = &gf119_disp_core_chan_mthd, - .chid = 0, -}; +int +gf119_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_core_new_(&gf119_disp_core_func, &gf119_disp_core_mthd, + disp, 0, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c index 088ab222e823..5c800174e079 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list gk104_disp_core_mthd_head = { @@ -106,7 +103,7 @@ gk104_disp_core_mthd_head = { }; const struct nv50_disp_chan_mthd -gk104_disp_core_chan_mthd = { +gk104_disp_core_mthd = { .name = "Core", .addr = 0x000000, .prev = -0x020000, @@ -120,13 +117,10 @@ gk104_disp_core_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -gk104_disp_core_oclass = { - .base.oclass = GK104_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &gf119_disp_core_func, - .mthd = &gk104_disp_core_chan_mthd, - .chid = 0, -}; +int +gk104_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_core_new_(&gf119_disp_core_func, &gk104_disp_core_mthd, + disp, 0, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk110.c deleted file mode 100644 index df0f45c20108..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk110.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2015 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> - */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gk110_disp_core_oclass = { - .base.oclass = GK110_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &gf119_disp_core_func, - .mthd = &gk104_disp_core_chan_mthd, - .chid = 0, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm107.c deleted file mode 100644 index 9e27f8fd98b6..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm107.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2015 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> - */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gm107_disp_core_oclass = { - .base.oclass = GM107_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &gf119_disp_core_func, - .mthd = &gk104_disp_core_chan_mthd, - .chid = 0, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c deleted file mode 100644 index bb23a8658ac0..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2015 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> - */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gm200_disp_core_oclass = { - .base.oclass = GM200_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &gf119_disp_core_func, - .mthd = &gk104_disp_core_chan_mthd, - .chid = 0, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c deleted file mode 100644 index d5dff6619d4d..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2015 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> - */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gp100_disp_core_oclass = { - .base.oclass = GP100_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &gf119_disp_core_func, - .mthd = &gk104_disp_core_chan_mthd, - .chid = 0, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c index b0df4b752b8c..5b7f993c73c7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c @@ -21,23 +21,16 @@ * * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" #include <subdev/timer.h> -#include <nvif/class.h> - static int -gp102_disp_core_init(struct nv50_disp_dmac *chan) +gp102_disp_core_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000001); - /* initialise channel for dma command submission */ nvkm_wr32(device, 0x611494, chan->push); nvkm_wr32(device, 0x611498, 0x00010000); @@ -59,20 +52,19 @@ gp102_disp_core_init(struct nv50_disp_dmac *chan) return 0; } -static const struct nv50_disp_dmac_func +static const struct nv50_disp_chan_func gp102_disp_core_func = { .init = gp102_disp_core_init, .fini = gf119_disp_core_fini, + .intr = gf119_disp_chan_intr, + .user = nv50_disp_chan_user, .bind = gf119_disp_dmac_bind, }; -const struct nv50_disp_dmac_oclass -gp102_disp_core_oclass = { - .base.oclass = GP102_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &gp102_disp_core_func, - .mthd = &gk104_disp_core_chan_mthd, - .chid = 0, -}; +int +gp102_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_core_new_(&gp102_disp_core_func, &gk104_disp_core_mthd, + disp, 0, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt215.c deleted file mode 100644 index 8f5ba2018975..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt215.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2015 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> - */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gt215_disp_core_oclass = { - .base.oclass = GT214_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &nv50_disp_core_func, - .mthd = &g94_disp_core_chan_mthd, - .chid = 0, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c new file mode 100644 index 000000000000..4592d0e69fec --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c @@ -0,0 +1,204 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "channv50.h" + +#include <subdev/timer.h> + +const struct nv50_disp_mthd_list +gv100_disp_core_mthd_base = { + .mthd = 0x0000, + .addr = 0x000000, + .data = { + { 0x0200, 0x680200 }, + { 0x0208, 0x680208 }, + { 0x020c, 0x68020c }, + { 0x0210, 0x680210 }, + { 0x0214, 0x680214 }, + { 0x0218, 0x680218 }, + { 0x021c, 0x68021c }, + {} + } +}; + +const struct nv50_disp_mthd_list +gv100_disp_core_mthd_sor = { + .mthd = 0x0020, + .addr = 0x000020, + .data = { + { 0x0300, 0x680300 }, + { 0x0304, 0x680304 }, + { 0x0308, 0x680308 }, + { 0x030c, 0x68030c }, + {} + } +}; + +static const struct nv50_disp_mthd_list +gv100_disp_core_mthd_wndw = { + .mthd = 0x0080, + .addr = 0x000080, + .data = { + { 0x1000, 0x681000 }, + { 0x1004, 0x681004 }, + { 0x1008, 0x681008 }, + { 0x100c, 0x68100c }, + { 0x1010, 0x681010 }, + {} + } +}; + +static const struct nv50_disp_mthd_list +gv100_disp_core_mthd_head = { + .mthd = 0x0400, + .addr = 0x000400, + .data = { + { 0x2000, 0x682000 }, + { 0x2004, 0x682004 }, + { 0x2008, 0x682008 }, + { 0x200c, 0x68200c }, + { 0x2014, 0x682014 }, + { 0x2018, 0x682018 }, + { 0x201c, 0x68201c }, + { 0x2020, 0x682020 }, + { 0x2028, 0x682028 }, + { 0x202c, 0x68202c }, + { 0x2030, 0x682030 }, + { 0x2038, 0x682038 }, + { 0x203c, 0x68203c }, + { 0x2048, 0x682048 }, + { 0x204c, 0x68204c }, + { 0x2050, 0x682050 }, + { 0x2054, 0x682054 }, + { 0x2058, 0x682058 }, + { 0x205c, 0x68205c }, + { 0x2060, 0x682060 }, + { 0x2064, 0x682064 }, + { 0x2068, 0x682068 }, + { 0x206c, 0x68206c }, + { 0x2070, 0x682070 }, + { 0x2074, 0x682074 }, + { 0x2078, 0x682078 }, + { 0x207c, 0x68207c }, + { 0x2080, 0x682080 }, + { 0x2088, 0x682088 }, + { 0x2090, 0x682090 }, + { 0x209c, 0x68209c }, + { 0x20a0, 0x6820a0 }, + { 0x20a4, 0x6820a4 }, + { 0x20a8, 0x6820a8 }, + { 0x20ac, 0x6820ac }, + { 0x218c, 0x68218c }, + { 0x2194, 0x682194 }, + { 0x2198, 0x682198 }, + { 0x219c, 0x68219c }, + { 0x21a0, 0x6821a0 }, + { 0x21a4, 0x6821a4 }, + { 0x2214, 0x682214 }, + { 0x2218, 0x682218 }, + {} + } +}; + +static const struct nv50_disp_chan_mthd +gv100_disp_core_mthd = { + .name = "Core", + .addr = 0x000000, + .prev = 0x008000, + .data = { + { "Global", 1, &gv100_disp_core_mthd_base }, + { "SOR", 4, &gv100_disp_core_mthd_sor }, + { "WINDOW", 8, &gv100_disp_core_mthd_wndw }, + { "HEAD", 4, &gv100_disp_core_mthd_head }, + {} + } +}; + +static int +gv100_disp_core_idle(struct nv50_disp_chan *chan) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + nvkm_msec(device, 2000, + u32 stat = nvkm_rd32(device, 0x610630); + if ((stat & 0x001f0000) == 0x000b0000) + return 0; + ); + return -EBUSY; +} + +static u64 +gv100_disp_core_user(struct nv50_disp_chan *chan, u64 *psize) +{ + *psize = 0x10000; + return 0x680000; +} + +static void +gv100_disp_core_intr(struct nv50_disp_chan *chan, bool en) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 mask = 0x00000001; + const u32 data = en ? mask : 0; + nvkm_mask(device, 0x611dac, mask, data); +} + +static void +gv100_disp_core_fini(struct nv50_disp_chan *chan) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + nvkm_mask(device, 0x6104e0, 0x00000010, 0x00000000); + gv100_disp_core_idle(chan); + nvkm_mask(device, 0x6104e0, 0x00000002, 0x00000000); +} + +static int +gv100_disp_core_init(struct nv50_disp_chan *chan) +{ + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + + nvkm_wr32(device, 0x610b24, lower_32_bits(chan->push)); + nvkm_wr32(device, 0x610b20, upper_32_bits(chan->push)); + nvkm_wr32(device, 0x610b28, 0x00000001); + nvkm_wr32(device, 0x610b2c, 0x00000040); + + nvkm_mask(device, 0x6104e0, 0x00000010, 0x00000010); + nvkm_wr32(device, 0x680000, 0x00000000); + nvkm_wr32(device, 0x6104e0, 0x00000013); + return gv100_disp_core_idle(chan); +} + +static const struct nv50_disp_chan_func +gv100_disp_core = { + .init = gv100_disp_core_init, + .fini = gv100_disp_core_fini, + .intr = gv100_disp_core_intr, + .user = gv100_disp_core_user, + .bind = gv100_disp_dmac_bind, +}; + +int +gv100_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_core_new_(&gv100_disp_core, &gv100_disp_core_mthd, + disp, 0, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c index b547c8b833ca..55db9a22b4be 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c @@ -21,32 +21,30 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" #include <core/client.h> #include <subdev/timer.h> -#include <nvif/class.h> #include <nvif/cl507d.h> #include <nvif/unpack.h> int -nv50_disp_core_new(const struct nv50_disp_dmac_func *func, - const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **pobject) +nv50_disp_core_new_(const struct nv50_disp_chan_func *func, + const struct nv50_disp_chan_mthd *mthd, + struct nv50_disp *disp, int chid, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) { union { struct nv50_disp_core_channel_dma_v0 v0; - } *args = data; + } *args = argv; struct nvkm_object *parent = oclass->parent; u64 push; int ret = -ENOSYS; - nvif_ioctl(parent, "create disp core channel dma size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create disp core channel dma size %d\n", argc); + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { nvif_ioctl(parent, "create disp core channel dma vers %d " "pushbuf %016llx\n", args->v0.version, args->v0.pushbuf); @@ -54,7 +52,7 @@ nv50_disp_core_new(const struct nv50_disp_dmac_func *func, } else return ret; - return nv50_disp_dmac_new_(func, mthd, root, chid, 0, + return nv50_disp_dmac_new_(func, mthd, disp, chid, 0, push, oclass, pobject); } @@ -151,7 +149,7 @@ nv50_disp_core_mthd_head = { }; static const struct nv50_disp_chan_mthd -nv50_disp_core_chan_mthd = { +nv50_disp_core_mthd = { .name = "Core", .addr = 0x000000, .prev = 0x000004, @@ -166,10 +164,9 @@ nv50_disp_core_chan_mthd = { }; static void -nv50_disp_core_fini(struct nv50_disp_dmac *chan) +nv50_disp_core_fini(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; /* deactivate channel */ @@ -182,21 +179,14 @@ nv50_disp_core_fini(struct nv50_disp_dmac *chan) nvkm_error(subdev, "core fini: %08x\n", nvkm_rd32(device, 0x610200)); } - - /* disable error reporting and completion notifications */ - nvkm_mask(device, 0x610028, 0x00010001, 0x00000000); } static int -nv50_disp_core_init(struct nv50_disp_dmac *chan) +nv50_disp_core_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - /* enable error reporting */ - nvkm_mask(device, 0x610028, 0x00010000, 0x00010000); - /* attempt to unstick channel from some unknown state */ if ((nvkm_rd32(device, 0x610200) & 0x009f0000) == 0x00020000) nvkm_mask(device, 0x610200, 0x00800000, 0x00800000); @@ -224,20 +214,19 @@ nv50_disp_core_init(struct nv50_disp_dmac *chan) return 0; } -const struct nv50_disp_dmac_func +const struct nv50_disp_chan_func nv50_disp_core_func = { .init = nv50_disp_core_init, .fini = nv50_disp_core_fini, + .intr = nv50_disp_chan_intr, + .user = nv50_disp_chan_user, .bind = nv50_disp_dmac_bind, }; -const struct nv50_disp_dmac_oclass -nv50_disp_core_oclass = { - .base.oclass = NV50_DISP_CORE_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_core_new, - .func = &nv50_disp_core_func, - .mthd = &nv50_disp_core_chan_mthd, - .chid = 0, -}; +int +nv50_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_core_new_(&nv50_disp_core_func, &nv50_disp_core_mthd, + disp, 0, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c index 2be6fb052c65..cdda3658dcb3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c @@ -22,16 +22,11 @@ * Authors: Ben Skeggs */ #include "channv50.h" -#include "rootnv50.h" -#include <nvif/class.h> - -const struct nv50_disp_pioc_oclass -gf119_disp_curs_oclass = { - .base.oclass = GF110_DISP_CURSOR, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_curs_new, - .func = &gf119_disp_pioc_func, - .chid = { 13, 13 }, -}; +int +gf119_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_curs_new_(&gf119_disp_pioc_func, disp, 13, 13, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c index e958210d8105..1a4601f975e6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c @@ -22,16 +22,11 @@ * Authors: Ben Skeggs <bskeggs@redhat.com> */ #include "channv50.h" -#include "rootnv50.h" -#include <nvif/class.h> - -const struct nv50_disp_pioc_oclass -gp102_disp_curs_oclass = { - .base.oclass = GK104_DISP_CURSOR, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_curs_new, - .func = &gf119_disp_pioc_func, - .chid = { 13, 17 }, -}; +int +gp102_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_curs_new_(&gf119_disp_pioc_func, disp, 13, 17, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c new file mode 100644 index 000000000000..a3e4f6900245 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c @@ -0,0 +1,81 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "channv50.h" + +#include <subdev/timer.h> + +static int +gv100_disp_curs_idle(struct nv50_disp_chan *chan) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 soff = (chan->chid.ctrl - 1) * 0x04; + nvkm_msec(device, 2000, + u32 stat = nvkm_rd32(device, 0x610664 + soff); + if ((stat & 0x00070000) == 0x00040000) + return 0; + ); + return -EBUSY; +} + +static void +gv100_disp_curs_intr(struct nv50_disp_chan *chan, bool en) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 mask = 0x00010000 << chan->head; + const u32 data = en ? mask : 0; + nvkm_mask(device, 0x611dac, mask, data); +} + +static void +gv100_disp_curs_fini(struct nv50_disp_chan *chan) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 hoff = chan->chid.ctrl * 4; + nvkm_mask(device, 0x6104e0 + hoff, 0x00000010, 0x00000010); + gv100_disp_curs_idle(chan); + nvkm_mask(device, 0x6104e0 + hoff, 0x00000001, 0x00000000); +} + +static int +gv100_disp_curs_init(struct nv50_disp_chan *chan) +{ + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + nvkm_wr32(device, 0x6104e0 + chan->chid.ctrl * 4, 0x00000001); + return gv100_disp_curs_idle(chan); +} + +static const struct nv50_disp_chan_func +gv100_disp_curs = { + .init = gv100_disp_curs_init, + .fini = gv100_disp_curs_fini, + .intr = gv100_disp_curs_intr, + .user = gv100_disp_chan_user, +}; + +int +gv100_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_curs_new_(&gv100_disp_curs, disp, 73, 73, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c index ab51121b7982..d29758504a5f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c @@ -23,30 +23,26 @@ */ #include "channv50.h" #include "head.h" -#include "rootnv50.h" #include <core/client.h> -#include <nvif/class.h> #include <nvif/cl507a.h> #include <nvif/unpack.h> int -nv50_disp_curs_new(const struct nv50_disp_chan_func *func, - const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int ctrl, int user, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **pobject) +nv50_disp_curs_new_(const struct nv50_disp_chan_func *func, + struct nv50_disp *disp, int ctrl, int user, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) { union { struct nv50_disp_cursor_v0 v0; - } *args = data; + } *args = argv; struct nvkm_object *parent = oclass->parent; - struct nv50_disp *disp = root->disp; int head, ret = -ENOSYS; - nvif_ioctl(parent, "create disp cursor size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create disp cursor size %d\n", argc); + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { nvif_ioctl(parent, "create disp cursor vers %d head %d\n", args->v0.version, args->v0.head); if (!nvkm_head_find(&disp->base, args->v0.head)) @@ -55,16 +51,14 @@ nv50_disp_curs_new(const struct nv50_disp_chan_func *func, } else return ret; - return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head, + return nv50_disp_chan_new_(func, NULL, disp, ctrl + head, user + head, head, oclass, pobject); } -const struct nv50_disp_pioc_oclass -nv50_disp_curs_oclass = { - .base.oclass = NV50_DISP_CURSOR, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_curs_new, - .func = &nv50_disp_pioc_func, - .chid = { 7, 7 }, -}; +int +nv50_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_curs_new_(&nv50_disp_pioc_func, disp, 7, 7, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c index dbd032ef352a..71a94777ea2e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c @@ -58,8 +58,13 @@ gf119_dac = { int gf119_dac_new(struct nvkm_disp *disp, int id) { - struct nvkm_device *device = disp->engine.subdev.device; - if (!(nvkm_rd32(device, 0x612004) & (0x00000010 << id))) - return 0; return nvkm_ior_new_(&gf119_dac, disp, DAC, id); } + +int +gf119_dac_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = (nvkm_rd32(device, 0x612004) & 0x000000f0) >> 4; + return 4; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c index 85e692b12260..558012db35f8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c @@ -109,8 +109,13 @@ nv50_dac = { int nv50_dac_new(struct nvkm_disp *disp, int id) { - struct nvkm_device *device = disp->engine.subdev.device; - if (!(nvkm_rd32(device, 0x610184) & (0x00100000 << id))) - return 0; return nvkm_ior_new_(&nv50_dac, disp, DAC, id); } + +int +nv50_dac_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = (nvkm_rd32(device, 0x610184) & 0x00700000) >> 20; + return 3; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c index ce7cd74fbd5d..edf7dd0d931d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c @@ -21,29 +21,27 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" #include <core/ramht.h> #include <subdev/timer.h> int -gf119_disp_dmac_bind(struct nv50_disp_dmac *chan, +gf119_disp_dmac_bind(struct nv50_disp_chan *chan, struct nvkm_object *object, u32 handle) { - return nvkm_ramht_insert(chan->base.root->ramht, object, - chan->base.chid.user, -9, handle, - chan->base.chid.user << 27 | 0x00000001); + return nvkm_ramht_insert(chan->disp->ramht, object, + chan->chid.user, -9, handle, + chan->chid.user << 27 | 0x00000001); } void -gf119_disp_dmac_fini(struct nv50_disp_dmac *chan) +gf119_disp_dmac_fini(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int ctrl = chan->base.chid.ctrl; - int user = chan->base.chid.user; + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; /* deactivate channel */ nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00001010, 0x00001000); @@ -55,23 +53,15 @@ gf119_disp_dmac_fini(struct nv50_disp_dmac *chan) nvkm_error(subdev, "ch %d fini: %08x\n", user, nvkm_rd32(device, 0x610490 + (ctrl * 0x10))); } - - /* disable error reporting and completion notification */ - nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000); - nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000); } static int -gf119_disp_dmac_init(struct nv50_disp_dmac *chan) +gf119_disp_dmac_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int ctrl = chan->base.chid.ctrl; - int user = chan->base.chid.user; - - /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user); + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; /* initialise channel for dma command submission */ nvkm_wr32(device, 0x610494 + (ctrl * 0x0010), chan->push); @@ -94,9 +84,11 @@ gf119_disp_dmac_init(struct nv50_disp_dmac *chan) return 0; } -const struct nv50_disp_dmac_func +const struct nv50_disp_chan_func gf119_disp_dmac_func = { .init = gf119_disp_dmac_init, .fini = gf119_disp_dmac_fini, + .intr = gf119_disp_chan_intr, + .user = nv50_disp_chan_user, .bind = gf119_disp_dmac_bind, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c index cdead9500343..f21a433199aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c @@ -21,22 +21,17 @@ * * Authors: Ben Skeggs <bskeggs@redhat.com> */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" #include <subdev/timer.h> static int -gp102_disp_dmac_init(struct nv50_disp_dmac *chan) +gp102_disp_dmac_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int ctrl = chan->base.chid.ctrl; - int user = chan->base.chid.user; - - /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user); + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; /* initialise channel for dma command submission */ nvkm_wr32(device, 0x611494 + (ctrl * 0x0010), chan->push); @@ -59,9 +54,11 @@ gp102_disp_dmac_init(struct nv50_disp_dmac *chan) return 0; } -const struct nv50_disp_dmac_func +const struct nv50_disp_chan_func gp102_disp_dmac_func = { .init = gp102_disp_dmac_init, .fini = gf119_disp_dmac_fini, + .intr = gf119_disp_chan_intr, + .user = nv50_disp_chan_user, .bind = gf119_disp_dmac_bind, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c new file mode 100644 index 000000000000..eac0e42da354 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c @@ -0,0 +1,77 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "channv50.h" + +#include <core/ramht.h> +#include <subdev/timer.h> + +static int +gv100_disp_dmac_idle(struct nv50_disp_chan *chan) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 soff = (chan->chid.ctrl - 1) * 0x04; + nvkm_msec(device, 2000, + u32 stat = nvkm_rd32(device, 0x610664 + soff); + if ((stat & 0x000f0000) == 0x00040000) + return 0; + ); + return -EBUSY; +} + +int +gv100_disp_dmac_bind(struct nv50_disp_chan *chan, + struct nvkm_object *object, u32 handle) +{ + return nvkm_ramht_insert(chan->disp->ramht, object, + chan->chid.user, -9, handle, + chan->chid.user << 25 | 0x00000040); +} + +void +gv100_disp_dmac_fini(struct nv50_disp_chan *chan) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 coff = chan->chid.ctrl * 0x04; + nvkm_mask(device, 0x6104e0 + coff, 0x00000010, 0x00000000); + gv100_disp_dmac_idle(chan); + nvkm_mask(device, 0x6104e0 + coff, 0x00000002, 0x00000000); +} + +int +gv100_disp_dmac_init(struct nv50_disp_chan *chan) +{ + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + const u32 uoff = (chan->chid.ctrl - 1) * 0x1000; + const u32 poff = chan->chid.ctrl * 0x10; + const u32 coff = chan->chid.ctrl * 0x04; + + nvkm_wr32(device, 0x610b24 + poff, lower_32_bits(chan->push)); + nvkm_wr32(device, 0x610b20 + poff, upper_32_bits(chan->push)); + nvkm_wr32(device, 0x610b28 + poff, 0x00000001); + nvkm_wr32(device, 0x610b2c + poff, 0x00000040); + + nvkm_mask(device, 0x6104e0 + coff, 0x00000010, 0x00000010); + nvkm_wr32(device, 0x690000 + uoff, 0x00000000); + nvkm_wr32(device, 0x6104e0 + coff, 0x00000013); + return gv100_disp_dmac_idle(chan); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c index 070ec5e18fdb..9e8a9d7a9b68 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c @@ -21,176 +21,68 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" #include <core/client.h> -#include <core/oproxy.h> #include <core/ramht.h> #include <subdev/fb.h> +#include <subdev/mmu.h> #include <subdev/timer.h> #include <engine/dma.h> -struct nv50_disp_dmac_object { - struct nvkm_oproxy oproxy; - struct nv50_disp_root *root; - int hash; -}; - -static void -nv50_disp_dmac_child_del_(struct nvkm_oproxy *base) -{ - struct nv50_disp_dmac_object *object = - container_of(base, typeof(*object), oproxy); - nvkm_ramht_remove(object->root->ramht, object->hash); -} - -static const struct nvkm_oproxy_func -nv50_disp_dmac_child_func_ = { - .dtor[0] = nv50_disp_dmac_child_del_, -}; - -static int -nv50_disp_dmac_child_new_(struct nv50_disp_chan *base, - const struct nvkm_oclass *oclass, - void *data, u32 size, struct nvkm_object **pobject) -{ - struct nv50_disp_dmac *chan = nv50_disp_dmac(base); - struct nv50_disp_root *root = chan->base.root; - struct nvkm_device *device = root->disp->base.engine.subdev.device; - const struct nvkm_device_oclass *sclass = oclass->priv; - struct nv50_disp_dmac_object *object; - int ret; - - if (!(object = kzalloc(sizeof(*object), GFP_KERNEL))) - return -ENOMEM; - nvkm_oproxy_ctor(&nv50_disp_dmac_child_func_, oclass, &object->oproxy); - object->root = root; - *pobject = &object->oproxy.base; - - ret = sclass->ctor(device, oclass, data, size, &object->oproxy.object); - if (ret) - return ret; - - object->hash = chan->func->bind(chan, object->oproxy.object, - oclass->handle); - if (object->hash < 0) - return object->hash; - - return 0; -} - -static int -nv50_disp_dmac_child_get_(struct nv50_disp_chan *base, int index, - struct nvkm_oclass *sclass) -{ - struct nv50_disp_dmac *chan = nv50_disp_dmac(base); - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_device *device = disp->base.engine.subdev.device; - const struct nvkm_device_oclass *oclass = NULL; - - sclass->engine = nvkm_device_engine(device, NVKM_ENGINE_DMAOBJ); - if (sclass->engine && sclass->engine->func->base.sclass) { - sclass->engine->func->base.sclass(sclass, index, &oclass); - if (oclass) { - sclass->priv = oclass; - return 0; - } - } - - return -EINVAL; -} - -static void -nv50_disp_dmac_fini_(struct nv50_disp_chan *base) -{ - struct nv50_disp_dmac *chan = nv50_disp_dmac(base); - chan->func->fini(chan); -} - -static int -nv50_disp_dmac_init_(struct nv50_disp_chan *base) -{ - struct nv50_disp_dmac *chan = nv50_disp_dmac(base); - return chan->func->init(chan); -} - -static void * -nv50_disp_dmac_dtor_(struct nv50_disp_chan *base) -{ - return nv50_disp_dmac(base); -} - -static const struct nv50_disp_chan_func -nv50_disp_dmac_func_ = { - .dtor = nv50_disp_dmac_dtor_, - .init = nv50_disp_dmac_init_, - .fini = nv50_disp_dmac_fini_, - .child_get = nv50_disp_dmac_child_get_, - .child_new = nv50_disp_dmac_child_new_, -}; - int -nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func, +nv50_disp_dmac_new_(const struct nv50_disp_chan_func *func, const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, int head, u64 push, + struct nv50_disp *disp, int chid, int head, u64 push, const struct nvkm_oclass *oclass, struct nvkm_object **pobject) { struct nvkm_client *client = oclass->client; - struct nvkm_dmaobj *dmaobj; - struct nv50_disp_dmac *chan; + struct nv50_disp_chan *chan; int ret; - if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) - return -ENOMEM; - *pobject = &chan->base.object; - chan->func = func; - - ret = nv50_disp_chan_ctor(&nv50_disp_dmac_func_, mthd, root, - chid, chid, head, oclass, &chan->base); + ret = nv50_disp_chan_new_(func, mthd, disp, chid, chid, head, oclass, + pobject); + chan = nv50_disp_chan(*pobject); if (ret) return ret; - dmaobj = nvkm_dmaobj_search(client, push); - if (IS_ERR(dmaobj)) - return PTR_ERR(dmaobj); + chan->memory = nvkm_umem_search(client, push); + if (IS_ERR(chan->memory)) + return PTR_ERR(chan->memory); - if (dmaobj->limit - dmaobj->start != 0xfff) + if (nvkm_memory_size(chan->memory) < 0x1000) return -EINVAL; - switch (dmaobj->target) { - case NV_MEM_TARGET_VRAM: - chan->push = 0x00000001 | dmaobj->start >> 8; - break; - case NV_MEM_TARGET_PCI_NOSNOOP: - chan->push = 0x00000003 | dmaobj->start >> 8; - break; + switch (nvkm_memory_target(chan->memory)) { + case NVKM_MEM_TARGET_VRAM: chan->push = 0x00000001; break; + case NVKM_MEM_TARGET_NCOH: chan->push = 0x00000002; break; + case NVKM_MEM_TARGET_HOST: chan->push = 0x00000003; break; default: return -EINVAL; } + chan->push |= nvkm_memory_addr(chan->memory) >> 8; return 0; } int -nv50_disp_dmac_bind(struct nv50_disp_dmac *chan, +nv50_disp_dmac_bind(struct nv50_disp_chan *chan, struct nvkm_object *object, u32 handle) { - return nvkm_ramht_insert(chan->base.root->ramht, object, - chan->base.chid.user, -10, handle, - chan->base.chid.user << 28 | - chan->base.chid.user); + return nvkm_ramht_insert(chan->disp->ramht, object, + chan->chid.user, -10, handle, + chan->chid.user << 28 | + chan->chid.user); } static void -nv50_disp_dmac_fini(struct nv50_disp_dmac *chan) +nv50_disp_dmac_fini(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int ctrl = chan->base.chid.ctrl; - int user = chan->base.chid.user; + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; /* deactivate channel */ nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00001010, 0x00001000); @@ -202,22 +94,15 @@ nv50_disp_dmac_fini(struct nv50_disp_dmac *chan) nvkm_error(subdev, "ch %d fini timeout, %08x\n", user, nvkm_rd32(device, 0x610200 + (ctrl * 0x10))); } - - /* disable error reporting and completion notifications */ - nvkm_mask(device, 0x610028, 0x00010001 << user, 0x00000000 << user); } static int -nv50_disp_dmac_init(struct nv50_disp_dmac *chan) +nv50_disp_dmac_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->base.root->disp; - struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev; struct nvkm_device *device = subdev->device; - int ctrl = chan->base.chid.ctrl; - int user = chan->base.chid.user; - - /* enable error reporting */ - nvkm_mask(device, 0x610028, 0x00010000 << user, 0x00010000 << user); + int ctrl = chan->chid.ctrl; + int user = chan->chid.user; /* initialise channel for dma command submission */ nvkm_wr32(device, 0x610204 + (ctrl * 0x0010), chan->push); @@ -240,9 +125,11 @@ nv50_disp_dmac_init(struct nv50_disp_dmac *chan) return 0; } -const struct nv50_disp_dmac_func +const struct nv50_disp_chan_func nv50_disp_dmac_func = { .init = nv50_disp_dmac_init, .fini = nv50_disp_dmac_fini, + .intr = nv50_disp_chan_intr, + .user = nv50_disp_chan_user, .bind = nv50_disp_dmac_bind, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h deleted file mode 100644 index f9b98211da6a..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NV50_DISP_DMAC_H__ -#define __NV50_DISP_DMAC_H__ -#define nv50_disp_dmac(p) container_of((p), struct nv50_disp_dmac, base) -#include "channv50.h" - -struct nv50_disp_dmac { - const struct nv50_disp_dmac_func *func; - struct nv50_disp_chan base; - u32 push; -}; - -struct nv50_disp_dmac_func { - int (*init)(struct nv50_disp_dmac *); - void (*fini)(struct nv50_disp_dmac *); - int (*bind)(struct nv50_disp_dmac *, struct nvkm_object *, u32 handle); -}; - -int nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, int head, u64 push, - const struct nvkm_oclass *, struct nvkm_object **); - -extern const struct nv50_disp_dmac_func nv50_disp_dmac_func; -int nv50_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32); -extern const struct nv50_disp_dmac_func nv50_disp_core_func; - -extern const struct nv50_disp_dmac_func gf119_disp_dmac_func; -void gf119_disp_dmac_fini(struct nv50_disp_dmac *); -int gf119_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32); -extern const struct nv50_disp_dmac_func gf119_disp_core_func; -void gf119_disp_core_fini(struct nv50_disp_dmac *); - -extern const struct nv50_disp_dmac_func gp102_disp_dmac_func; - -struct nv50_disp_dmac_oclass { - int (*ctor)(const struct nv50_disp_dmac_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, - const struct nvkm_oclass *, void *data, u32 size, - struct nvkm_object **); - struct nvkm_sclass base; - const struct nv50_disp_dmac_func *func; - const struct nv50_disp_chan_mthd *mthd; - int chid; -}; - -int nv50_disp_core_new(const struct nv50_disp_dmac_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **); -int nv50_disp_base_new(const struct nv50_disp_dmac_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **); -int nv50_disp_ovly_new(const struct nv50_disp_dmac_func *, - const struct nv50_disp_chan_mthd *, - struct nv50_disp_root *, int chid, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **); - -extern const struct nv50_disp_dmac_oclass nv50_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass nv50_disp_base_oclass; -extern const struct nv50_disp_dmac_oclass nv50_disp_ovly_oclass; - -extern const struct nv50_disp_dmac_oclass g84_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass g84_disp_base_oclass; -extern const struct nv50_disp_dmac_oclass g84_disp_ovly_oclass; - -extern const struct nv50_disp_dmac_oclass g94_disp_core_oclass; - -extern const struct nv50_disp_dmac_oclass gt200_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass gt200_disp_base_oclass; -extern const struct nv50_disp_dmac_oclass gt200_disp_ovly_oclass; - -extern const struct nv50_disp_dmac_oclass gt215_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass gt215_disp_base_oclass; -extern const struct nv50_disp_dmac_oclass gt215_disp_ovly_oclass; - -extern const struct nv50_disp_dmac_oclass gf119_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass gf119_disp_base_oclass; -extern const struct nv50_disp_dmac_oclass gf119_disp_ovly_oclass; - -extern const struct nv50_disp_dmac_oclass gk104_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass gk104_disp_base_oclass; -extern const struct nv50_disp_dmac_oclass gk104_disp_ovly_oclass; - -extern const struct nv50_disp_dmac_oclass gk110_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass gk110_disp_base_oclass; - -extern const struct nv50_disp_dmac_oclass gm107_disp_core_oclass; - -extern const struct nv50_disp_dmac_oclass gm200_disp_core_oclass; - -extern const struct nv50_disp_dmac_oclass gp100_disp_core_oclass; - -extern const struct nv50_disp_dmac_oclass gp102_disp_core_oclass; -extern const struct nv50_disp_dmac_oclass gp102_disp_base_oclass; -extern const struct nv50_disp_dmac_oclass gp102_disp_ovly_oclass; -#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c index 842e1b72ee42..731f188fc1ee 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func g84_disp = { + .init = nv50_disp_init, + .fini = nv50_disp_fini, .intr = nv50_disp_intr, .uevent = &nv50_disp_chan_uevent, .super = nv50_disp_super, .root = &g84_disp_root_oclass, - .head.new = nv50_head_new, - .dac = { .nr = 3, .new = nv50_dac_new }, - .sor = { .nr = 2, .new = g84_sor_new }, - .pior = { .nr = 3, .new = nv50_pior_new }, + .head = { .cnt = nv50_head_cnt, .new = nv50_head_new }, + .dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new }, + .sor = { .cnt = nv50_sor_cnt, .new = g84_sor_new }, + .pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new }, }; int g84_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(&g84_disp, device, index, 2, pdisp); + return nv50_disp_new_(&g84_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c index d184e6ab8918..def54fe1951e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func g94_disp = { + .init = nv50_disp_init, + .fini = nv50_disp_fini, .intr = nv50_disp_intr, .uevent = &nv50_disp_chan_uevent, .super = nv50_disp_super, .root = &g94_disp_root_oclass, - .head.new = nv50_head_new, - .dac = { .nr = 3, .new = nv50_dac_new }, - .sor = { .nr = 4, .new = g94_sor_new }, - .pior = { .nr = 3, .new = nv50_pior_new }, + .head = { .cnt = nv50_head_cnt, .new = nv50_head_new }, + .dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new }, + .sor = { .cnt = g94_sor_cnt, .new = g94_sor_new }, + .pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new }, }; int g94_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(&g94_disp, device, index, 2, pdisp); + return nv50_disp_new_(&g94_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c index d8765b57180b..794e90982641 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c @@ -24,8 +24,12 @@ #include "nv50.h" #include "head.h" #include "ior.h" +#include "channv50.h" #include "rootnv50.h" +#include <core/ramht.h> +#include <subdev/timer.h> + void gf119_disp_super(struct work_struct *work) { @@ -164,28 +168,99 @@ gf119_disp_intr(struct nv50_disp *disp) } } +void +gf119_disp_fini(struct nv50_disp *disp) +{ + struct nvkm_device *device = disp->base.engine.subdev.device; + /* disable all interrupts */ + nvkm_wr32(device, 0x6100b0, 0x00000000); +} + int -gf119_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device, - int index, struct nvkm_disp **pdisp) +gf119_disp_init(struct nv50_disp *disp) { - u32 heads = nvkm_rd32(device, 0x022448); - return nv50_disp_new_(func, device, index, heads, pdisp); + struct nvkm_device *device = disp->base.engine.subdev.device; + struct nvkm_head *head; + u32 tmp; + int i; + + /* The below segments of code copying values from one register to + * another appear to inform EVO of the display capabilities or + * something similar. + */ + + /* ... CRTC caps */ + list_for_each_entry(head, &disp->base.head, head) { + const u32 hoff = head->id * 0x800; + tmp = nvkm_rd32(device, 0x616104 + hoff); + nvkm_wr32(device, 0x6101b4 + hoff, tmp); + tmp = nvkm_rd32(device, 0x616108 + hoff); + nvkm_wr32(device, 0x6101b8 + hoff, tmp); + tmp = nvkm_rd32(device, 0x61610c + hoff); + nvkm_wr32(device, 0x6101bc + hoff, tmp); + } + + /* ... DAC caps */ + for (i = 0; i < disp->dac.nr; i++) { + tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800)); + nvkm_wr32(device, 0x6101c0 + (i * 0x800), tmp); + } + + /* ... SOR caps */ + for (i = 0; i < disp->sor.nr; i++) { + tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800)); + nvkm_wr32(device, 0x6301c4 + (i * 0x800), tmp); + } + + /* steal display away from vbios, or something like that */ + if (nvkm_rd32(device, 0x6100ac) & 0x00000100) { + nvkm_wr32(device, 0x6100ac, 0x00000100); + nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000); + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002)) + break; + ) < 0) + return -EBUSY; + } + + /* point at display engine memory area (hash table, objects) */ + nvkm_wr32(device, 0x610010, (disp->inst->addr >> 8) | 9); + + /* enable supervisor interrupts, disable everything else */ + nvkm_wr32(device, 0x610090, 0x00000000); + nvkm_wr32(device, 0x6100a0, 0x00000000); + nvkm_wr32(device, 0x6100b0, 0x00000307); + + /* disable underflow reporting, preventing an intermittent issue + * on some gk104 boards where the production vbios left this + * setting enabled by default. + * + * ftp://download.nvidia.com/open-gpu-doc/gk104-disable-underflow-reporting/1/gk104-disable-underflow-reporting.txt + */ + list_for_each_entry(head, &disp->base.head, head) { + const u32 hoff = head->id * 0x800; + nvkm_mask(device, 0x616308 + hoff, 0x00000111, 0x00000010); + } + + return 0; } static const struct nv50_disp_func gf119_disp = { + .init = gf119_disp_init, + .fini = gf119_disp_fini, .intr = gf119_disp_intr, .intr_error = gf119_disp_intr_error, .uevent = &gf119_disp_chan_uevent, .super = gf119_disp_super, .root = &gf119_disp_root_oclass, - .head.new = gf119_head_new, - .dac = { .nr = 3, .new = gf119_dac_new }, - .sor = { .nr = 4, .new = gf119_sor_new }, + .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, + .dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gf119_sor_new }, }; int gf119_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return gf119_disp_new_(&gf119_disp, device, index, pdisp); + return nv50_disp_new_(&gf119_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c index e8fe9f315d64..4c3439b1a62d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func gk104_disp = { + .init = gf119_disp_init, + .fini = gf119_disp_fini, .intr = gf119_disp_intr, .intr_error = gf119_disp_intr_error, .uevent = &gf119_disp_chan_uevent, .super = gf119_disp_super, .root = &gk104_disp_root_oclass, - .head.new = gf119_head_new, - .dac = { .nr = 3, .new = gf119_dac_new }, - .sor = { .nr = 4, .new = gk104_sor_new }, + .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, + .dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gk104_sor_new }, }; int gk104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return gf119_disp_new_(&gk104_disp, device, index, pdisp); + return nv50_disp_new_(&gk104_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c index 769687502e7a..bc6f4750c942 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func gk110_disp = { + .init = gf119_disp_init, + .fini = gf119_disp_fini, .intr = gf119_disp_intr, .intr_error = gf119_disp_intr_error, .uevent = &gf119_disp_chan_uevent, .super = gf119_disp_super, .root = &gk110_disp_root_oclass, - .head.new = gf119_head_new, - .dac = { .nr = 3, .new = gf119_dac_new }, - .sor = { .nr = 4, .new = gk104_sor_new }, + .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, + .dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gk104_sor_new }, }; int gk110_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return gf119_disp_new_(&gk110_disp, device, index, pdisp); + return nv50_disp_new_(&gk110_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c index ede70e5d188e..031cf6b03a76 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func gm107_disp = { + .init = gf119_disp_init, + .fini = gf119_disp_fini, .intr = gf119_disp_intr, .intr_error = gf119_disp_intr_error, .uevent = &gf119_disp_chan_uevent, .super = gf119_disp_super, .root = &gm107_disp_root_oclass, - .head.new = gf119_head_new, - .dac = { .nr = 3, .new = gf119_dac_new }, - .sor = { .nr = 4, .new = gm107_sor_new }, + .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, + .dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gm107_sor_new }, }; int gm107_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return gf119_disp_new_(&gm107_disp, device, index, pdisp); + return nv50_disp_new_(&gm107_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c index 292d3b5f9704..ec9c33a5162d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func gm200_disp = { + .init = gf119_disp_init, + .fini = gf119_disp_fini, .intr = gf119_disp_intr, .intr_error = gf119_disp_intr_error, .uevent = &gf119_disp_chan_uevent, .super = gf119_disp_super, .root = &gm200_disp_root_oclass, - .head.new = gf119_head_new, - .dac = { .nr = 3, .new = gf119_dac_new }, - .sor = { .nr = 4, .new = gm200_sor_new }, + .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, + .dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new }, }; int gm200_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return gf119_disp_new_(&gm200_disp, device, index, pdisp); + return nv50_disp_new_(&gm200_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c index 39eb98b2c3a2..fd6216684f6d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c @@ -28,17 +28,19 @@ static const struct nv50_disp_func gp100_disp = { + .init = gf119_disp_init, + .fini = gf119_disp_fini, .intr = gf119_disp_intr, .intr_error = gf119_disp_intr_error, .uevent = &gf119_disp_chan_uevent, .super = gf119_disp_super, .root = &gp100_disp_root_oclass, - .head.new = gf119_head_new, - .sor = { .nr = 4, .new = gm200_sor_new }, + .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new }, }; int gp100_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return gf119_disp_new_(&gp100_disp, device, index, pdisp); + return nv50_disp_new_(&gp100_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c index 91d70fe18275..3468ddec1270 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c @@ -24,6 +24,7 @@ #include "nv50.h" #include "head.h" #include "ior.h" +#include "channv50.h" #include "rootnv50.h" static void @@ -54,17 +55,19 @@ gp102_disp_intr_error(struct nv50_disp *disp, int chid) static const struct nv50_disp_func gp102_disp = { + .init = gf119_disp_init, + .fini = gf119_disp_fini, .intr = gf119_disp_intr, .intr_error = gp102_disp_intr_error, .uevent = &gf119_disp_chan_uevent, .super = gf119_disp_super, .root = &gp102_disp_root_oclass, - .head.new = gf119_head_new, - .sor = { .nr = 4, .new = gm200_sor_new }, + .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new }, }; int gp102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return gf119_disp_new_(&gp102_disp, device, index, pdisp); + return nv50_disp_new_(&gp102_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c index bf00c4e3be3a..f80183701f44 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func gt200_disp = { + .init = nv50_disp_init, + .fini = nv50_disp_fini, .intr = nv50_disp_intr, .uevent = &nv50_disp_chan_uevent, .super = nv50_disp_super, .root = >200_disp_root_oclass, - .head.new = nv50_head_new, - .dac = { .nr = 3, .new = nv50_dac_new }, - .sor = { .nr = 2, .new = g84_sor_new }, - .pior = { .nr = 3, .new = nv50_pior_new }, + .head = { .cnt = nv50_head_cnt, .new = nv50_head_new }, + .dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new }, + .sor = { .cnt = nv50_sor_cnt, .new = g84_sor_new }, + .pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new }, }; int gt200_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(>200_disp, device, index, 2, pdisp); + return nv50_disp_new_(>200_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c index 2cdd4d7a98d3..7581efc1357e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c @@ -28,18 +28,20 @@ static const struct nv50_disp_func gt215_disp = { + .init = nv50_disp_init, + .fini = nv50_disp_fini, .intr = nv50_disp_intr, .uevent = &nv50_disp_chan_uevent, .super = nv50_disp_super, .root = >215_disp_root_oclass, - .head.new = nv50_head_new, - .dac = { .nr = 3, .new = nv50_dac_new }, - .sor = { .nr = 4, .new = gt215_sor_new }, - .pior = { .nr = 3, .new = nv50_pior_new }, + .head = { .cnt = nv50_head_cnt, .new = nv50_head_new }, + .dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new }, + .sor = { .cnt = g94_sor_cnt, .new = gt215_sor_new }, + .pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new }, }; int gt215_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(>215_disp, device, index, 2, pdisp); + return nv50_disp_new_(>215_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c new file mode 100644 index 000000000000..d0a7e3456da1 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c @@ -0,0 +1,427 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" +#include "head.h" +#include "ior.h" +#include "channv50.h" +#include "rootnv50.h" + +#include <core/gpuobj.h> +#include <subdev/timer.h> + +static int +gv100_disp_wndw_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = nvkm_rd32(device, 0x610064); + return (nvkm_rd32(device, 0x610074) & 0x03f00000) >> 20; +} + +static void +gv100_disp_super(struct work_struct *work) +{ + struct nv50_disp *disp = + container_of(work, struct nv50_disp, supervisor); + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + struct nvkm_head *head; + u32 stat = nvkm_rd32(device, 0x6107a8); + u32 mask[4]; + + nvkm_debug(subdev, "supervisor %d: %08x\n", ffs(disp->super), stat); + list_for_each_entry(head, &disp->base.head, head) { + mask[head->id] = nvkm_rd32(device, 0x6107ac + (head->id * 4)); + HEAD_DBG(head, "%08x", mask[head->id]); + } + + if (disp->super & 0x00000001) { + nv50_disp_chan_mthd(disp->chan[0], NV_DBG_DEBUG); + nv50_disp_super_1(disp); + list_for_each_entry(head, &disp->base.head, head) { + if (!(mask[head->id] & 0x00001000)) + continue; + nv50_disp_super_1_0(disp, head); + } + } else + if (disp->super & 0x00000002) { + list_for_each_entry(head, &disp->base.head, head) { + if (!(mask[head->id] & 0x00001000)) + continue; + nv50_disp_super_2_0(disp, head); + } + nvkm_outp_route(&disp->base); + list_for_each_entry(head, &disp->base.head, head) { + if (!(mask[head->id] & 0x00010000)) + continue; + nv50_disp_super_2_1(disp, head); + } + list_for_each_entry(head, &disp->base.head, head) { + if (!(mask[head->id] & 0x00001000)) + continue; + nv50_disp_super_2_2(disp, head); + } + } else + if (disp->super & 0x00000004) { + list_for_each_entry(head, &disp->base.head, head) { + if (!(mask[head->id] & 0x00001000)) + continue; + nv50_disp_super_3_0(disp, head); + } + } + + list_for_each_entry(head, &disp->base.head, head) + nvkm_wr32(device, 0x6107ac + (head->id * 4), 0x00000000); + nvkm_wr32(device, 0x6107a8, 0x80000000); +} + +static void +gv100_disp_exception(struct nv50_disp *disp, int chid) +{ + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + u32 stat = nvkm_rd32(device, 0x611020 + (chid * 12)); + u32 type = (stat & 0x00007000) >> 12; + u32 mthd = (stat & 0x00000fff) << 2; + u32 data = nvkm_rd32(device, 0x611024 + (chid * 12)); + u32 code = nvkm_rd32(device, 0x611028 + (chid * 12)); + + nvkm_error(subdev, "chid %d %08x [type %d mthd %04x] " + "data %08x code %08x\n", + chid, stat, type, mthd, data, code); + + if (chid < ARRAY_SIZE(disp->chan) && disp->chan[chid]) { + switch (mthd) { + case 0x0200: + nv50_disp_chan_mthd(disp->chan[chid], NV_DBG_ERROR); + break; + default: + break; + } + } + + nvkm_wr32(device, 0x611020 + (chid * 12), 0x90000000); +} + +static void +gv100_disp_intr_ctrl_disp(struct nv50_disp *disp) +{ + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + u32 stat = nvkm_rd32(device, 0x611c30); + + if (stat & 0x00000007) { + disp->super = (stat & 0x00000007); + queue_work(disp->wq, &disp->supervisor); + nvkm_wr32(device, 0x611860, disp->super); + stat &= ~0x00000007; + } + + /*TODO: I would guess this is VBIOS_RELEASE, however, NFI how to + * ACK it, nor does RM appear to bother. + */ + if (stat & 0x00000008) + stat &= ~0x00000008; + + if (stat & 0x00000100) { + unsigned long wndws = nvkm_rd32(device, 0x611858); + unsigned long other = nvkm_rd32(device, 0x61185c); + int wndw; + + nvkm_wr32(device, 0x611858, wndws); + nvkm_wr32(device, 0x61185c, other); + + /* AWAKEN_OTHER_CORE. */ + if (other & 0x00000001) + nv50_disp_chan_uevent_send(disp, 0); + + /* AWAKEN_WIN_CH(n). */ + for_each_set_bit(wndw, &wndws, disp->wndw.nr) { + nv50_disp_chan_uevent_send(disp, 1 + wndw); + } + } + + if (stat) + nvkm_warn(subdev, "ctrl %08x\n", stat); +} + +static void +gv100_disp_intr_exc_other(struct nv50_disp *disp) +{ + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + u32 stat = nvkm_rd32(device, 0x611854); + unsigned long mask; + int head; + + if (stat & 0x00000001) { + nvkm_wr32(device, 0x611854, 0x00000001); + gv100_disp_exception(disp, 0); + stat &= ~0x00000001; + } + + if ((mask = (stat & 0x00ff0000) >> 16)) { + for_each_set_bit(head, &mask, disp->wndw.nr) { + nvkm_wr32(device, 0x611854, 0x00010000 << head); + gv100_disp_exception(disp, 73 + head); + stat &= ~(0x00010000 << head); + } + } + + if (stat) { + nvkm_warn(subdev, "exception %08x\n", stat); + nvkm_wr32(device, 0x611854, stat); + } +} + +static void +gv100_disp_intr_exc_winim(struct nv50_disp *disp) +{ + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + unsigned long stat = nvkm_rd32(device, 0x611850); + int wndw; + + for_each_set_bit(wndw, &stat, disp->wndw.nr) { + nvkm_wr32(device, 0x611850, BIT(wndw)); + gv100_disp_exception(disp, 33 + wndw); + stat &= ~BIT(wndw); + } + + if (stat) { + nvkm_warn(subdev, "wimm %08x\n", (u32)stat); + nvkm_wr32(device, 0x611850, stat); + } +} + +static void +gv100_disp_intr_exc_win(struct nv50_disp *disp) +{ + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + unsigned long stat = nvkm_rd32(device, 0x61184c); + int wndw; + + for_each_set_bit(wndw, &stat, disp->wndw.nr) { + nvkm_wr32(device, 0x61184c, BIT(wndw)); + gv100_disp_exception(disp, 1 + wndw); + stat &= ~BIT(wndw); + } + + if (stat) { + nvkm_warn(subdev, "wndw %08x\n", (u32)stat); + nvkm_wr32(device, 0x61184c, stat); + } +} + +static void +gv100_disp_intr_head_timing(struct nv50_disp *disp, int head) +{ + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + u32 stat = nvkm_rd32(device, 0x611800 + (head * 0x04)); + + /* LAST_DATA, LOADV. */ + if (stat & 0x00000003) { + nvkm_wr32(device, 0x611800 + (head * 0x04), stat & 0x00000003); + stat &= ~0x00000003; + } + + if (stat & 0x00000004) { + nvkm_disp_vblank(&disp->base, head); + nvkm_wr32(device, 0x611800 + (head * 0x04), 0x00000004); + stat &= ~0x00000004; + } + + if (stat) { + nvkm_warn(subdev, "head %08x\n", stat); + nvkm_wr32(device, 0x611800 + (head * 0x04), stat); + } +} + +static void +gv100_disp_intr(struct nv50_disp *disp) +{ + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + u32 stat = nvkm_rd32(device, 0x611ec0); + unsigned long mask; + int head; + + if ((mask = (stat & 0x000000ff))) { + for_each_set_bit(head, &mask, 8) { + gv100_disp_intr_head_timing(disp, head); + stat &= ~BIT(head); + } + } + + if (stat & 0x00000200) { + gv100_disp_intr_exc_win(disp); + stat &= ~0x00000200; + } + + if (stat & 0x00000400) { + gv100_disp_intr_exc_winim(disp); + stat &= ~0x00000400; + } + + if (stat & 0x00000800) { + gv100_disp_intr_exc_other(disp); + stat &= ~0x00000800; + } + + if (stat & 0x00001000) { + gv100_disp_intr_ctrl_disp(disp); + stat &= ~0x00001000; + } + + if (stat) + nvkm_warn(subdev, "intr %08x\n", stat); +} + +static void +gv100_disp_fini(struct nv50_disp *disp) +{ + struct nvkm_device *device = disp->base.engine.subdev.device; + nvkm_wr32(device, 0x611db0, 0x00000000); +} + +static int +gv100_disp_init(struct nv50_disp *disp) +{ + struct nvkm_device *device = disp->base.engine.subdev.device; + struct nvkm_head *head; + int i, j; + u32 tmp; + + /* Claim ownership of display. */ + if (nvkm_rd32(device, 0x6254e8) & 0x00000002) { + nvkm_mask(device, 0x6254e8, 0x00000001, 0x00000000); + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x6254e8) & 0x00000002)) + break; + ) < 0) + return -EBUSY; + } + + /* Lock pin capabilities. */ + tmp = nvkm_rd32(device, 0x610068); + nvkm_wr32(device, 0x640008, tmp); + + /* SOR capabilities. */ + for (i = 0; i < disp->sor.nr; i++) { + tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800)); + nvkm_mask(device, 0x640000, 0x00000100 << i, 0x00000100 << i); + nvkm_wr32(device, 0x640144 + (i * 0x08), tmp); + } + + /* Head capabilities. */ + list_for_each_entry(head, &disp->base.head, head) { + const int id = head->id; + + /* RG. */ + tmp = nvkm_rd32(device, 0x616300 + (id * 0x800)); + nvkm_wr32(device, 0x640048 + (id * 0x020), tmp); + + /* POSTCOMP. */ + for (j = 0; j < 6 * 4; j += 4) { + tmp = nvkm_rd32(device, 0x616100 + (id * 0x800) + j); + nvkm_wr32(device, 0x640030 + (id * 0x20) + j, tmp); + } + } + + /* Window capabilities. */ + for (i = 0; i < disp->wndw.nr; i++) { + nvkm_mask(device, 0x640004, 1 << i, 1 << i); + for (j = 0; j < 6 * 4; j += 4) { + tmp = nvkm_rd32(device, 0x630050 + (i * 0x800) + j); + nvkm_wr32(device, 0x6401e4 + (i * 0x20) + j, tmp); + } + } + + /* IHUB capabilities. */ + for (i = 0; i < 4; i++) { + tmp = nvkm_rd32(device, 0x62e000 + (i * 0x04)); + nvkm_wr32(device, 0x640010 + (i * 0x04), tmp); + } + + nvkm_mask(device, 0x610078, 0x00000001, 0x00000001); + + /* Setup instance memory. */ + switch (nvkm_memory_target(disp->inst->memory)) { + case NVKM_MEM_TARGET_VRAM: tmp = 0x00000001; break; + case NVKM_MEM_TARGET_NCOH: tmp = 0x00000002; break; + case NVKM_MEM_TARGET_HOST: tmp = 0x00000003; break; + default: + break; + } + nvkm_wr32(device, 0x610010, 0x00000008 | tmp); + nvkm_wr32(device, 0x610014, disp->inst->addr >> 16); + + /* CTRL_DISP: AWAKEN, ERROR, SUPERVISOR[1-3]. */ + nvkm_wr32(device, 0x611cf0, 0x00000187); /* MSK. */ + nvkm_wr32(device, 0x611db0, 0x00000187); /* EN. */ + + /* EXC_OTHER: CURSn, CORE. */ + nvkm_wr32(device, 0x611cec, disp->head.mask << 16 | + 0x00000001); /* MSK. */ + nvkm_wr32(device, 0x611dac, 0x00000000); /* EN. */ + + /* EXC_WINIM. */ + nvkm_wr32(device, 0x611ce8, disp->wndw.mask); /* MSK. */ + nvkm_wr32(device, 0x611da8, 0x00000000); /* EN. */ + + /* EXC_WIN. */ + nvkm_wr32(device, 0x611ce4, disp->wndw.mask); /* MSK. */ + nvkm_wr32(device, 0x611da4, 0x00000000); /* EN. */ + + /* HEAD_TIMING(n): VBLANK. */ + list_for_each_entry(head, &disp->base.head, head) { + const u32 hoff = head->id * 4; + nvkm_wr32(device, 0x611cc0 + hoff, 0x00000004); /* MSK. */ + nvkm_wr32(device, 0x611d80 + hoff, 0x00000000); /* EN. */ + } + + /* OR. */ + nvkm_wr32(device, 0x611cf4, 0x00000000); /* MSK. */ + nvkm_wr32(device, 0x611db4, 0x00000000); /* EN. */ + return 0; +} + +static const struct nv50_disp_func +gv100_disp = { + .init = gv100_disp_init, + .fini = gv100_disp_fini, + .intr = gv100_disp_intr, + .uevent = &gv100_disp_chan_uevent, + .super = gv100_disp_super, + .root = &gv100_disp_root_oclass, + .wndw = { .cnt = gv100_disp_wndw_cnt }, + .head = { .cnt = gv100_head_cnt, .new = gv100_head_new }, + .sor = { .cnt = gv100_sor_cnt, .new = gv100_sor_new }, + .ramht_size = 0x2000, +}; + +int +gv100_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) +{ + return nv50_disp_new_(&gv100_disp, device, index, pdisp); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c new file mode 100644 index 000000000000..6e3c450eaace --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c @@ -0,0 +1,85 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "hdmi.h" + +void +gv100_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, + u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size) +{ + struct nvkm_device *device = ior->disp->engine.subdev.device; + const u32 ctrl = 0x40000000 * enable | + max_ac_packet << 16 | + rekey; + const u32 hoff = head * 0x800; + const u32 hdmi = head * 0x400; + struct packed_hdmi_infoframe avi_infoframe; + struct packed_hdmi_infoframe vendor_infoframe; + + pack_hdmi_infoframe(&avi_infoframe, avi, avi_size); + pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size); + + if (!(ctrl & 0x40000000)) { + nvkm_mask(device, 0x6165c0 + hoff, 0x40000000, 0x00000000); + nvkm_mask(device, 0x6f0100 + hdmi, 0x00000001, 0x00000000); + nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000000); + nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000000); + return; + } + + /* AVI InfoFrame (AVI). */ + nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000000); + if (avi_size) { + nvkm_wr32(device, 0x6f0008 + hdmi, avi_infoframe.header); + nvkm_wr32(device, 0x6f000c + hdmi, avi_infoframe.subpack0_low); + nvkm_wr32(device, 0x6f0010 + hdmi, avi_infoframe.subpack0_high); + nvkm_wr32(device, 0x6f0014 + hdmi, avi_infoframe.subpack1_low); + nvkm_wr32(device, 0x6f0018 + hdmi, avi_infoframe.subpack1_high); + nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000001); + } + + /* Vendor-specific InfoFrame (VSI). */ + nvkm_mask(device, 0x6f0100 + hdmi, 0x00010001, 0x00000000); + if (vendor_size) { + nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header); + nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low); + nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high); + nvkm_wr32(device, 0x6f0110 + hdmi, 0x00000000); + nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000); + nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000); + nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000); + nvkm_wr32(device, 0x6f0120 + hdmi, 0x00000000); + nvkm_wr32(device, 0x6f0124 + hdmi, 0x00000000); + nvkm_mask(device, 0x6f0100 + hdmi, 0x00000001, 0x00000001); + } + + + /* General Control (GCP). */ + nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000000); + nvkm_wr32(device, 0x6f00cc + hdmi, 0x00000010); + nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000001); + + /* Audio Clock Regeneration (ACR). */ + nvkm_wr32(device, 0x6f0080 + hdmi, 0x82000000); + + /* NV_PDISP_SF_HDMI_CTRL. */ + nvkm_mask(device, 0x6165c0 + hoff, 0x401f007f, ctrl); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h index 57030b3a4a75..7d55faf52fcb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h @@ -52,6 +52,14 @@ void nv50_head_rgpos(struct nvkm_head *, u16 *, u16 *); #define HEAD_DBG(h,f,a...) HEAD_MSG((h), debug, f, ##a) int nv04_head_new(struct nvkm_disp *, int id); + +int nv50_head_cnt(struct nvkm_disp *, unsigned long *); int nv50_head_new(struct nvkm_disp *, int id); + +int gf119_head_cnt(struct nvkm_disp *, unsigned long *); int gf119_head_new(struct nvkm_disp *, int id); +void gf119_head_rgclk(struct nvkm_head *, int); + +int gv100_head_cnt(struct nvkm_disp *, unsigned long *); +int gv100_head_new(struct nvkm_disp *, int id); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c index 9fd7ae331308..e86298b35902 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c @@ -39,7 +39,7 @@ gf119_head_vblank_get(struct nvkm_head *head) nvkm_mask(device, 0x6100c0 + hoff, 0x00000001, 0x00000001); } -static void +void gf119_head_rgclk(struct nvkm_head *head, int div) { struct nvkm_device *device = head->disp->engine.subdev.device; @@ -92,8 +92,13 @@ gf119_head = { int gf119_head_new(struct nvkm_disp *disp, int id) { - struct nvkm_device *device = disp->engine.subdev.device; - if (!(nvkm_rd32(device, 0x612004) & (0x00000001 << id))) - return 0; return nvkm_head_new_(&gf119_head, disp, id); } + +int +gf119_head_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = nvkm_rd32(device, 0x612004) & 0x0000000f; + return nvkm_rd32(device, 0x022448); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c new file mode 100644 index 000000000000..1a061b42ae5c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c @@ -0,0 +1,105 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "head.h" + +static void +gv100_head_vblank_put(struct nvkm_head *head) +{ + struct nvkm_device *device = head->disp->engine.subdev.device; + nvkm_mask(device, 0x611d80 + (head->id * 4), 0x00000004, 0x00000000); +} + +static void +gv100_head_vblank_get(struct nvkm_head *head) +{ + struct nvkm_device *device = head->disp->engine.subdev.device; + nvkm_mask(device, 0x611d80 + (head->id * 4), 0x00000004, 0x00000004); +} + +static void +gv100_head_rgpos(struct nvkm_head *head, u16 *hline, u16 *vline) +{ + struct nvkm_device *device = head->disp->engine.subdev.device; + const u32 hoff = head->id * 0x800; + /* vline read locks hline. */ + *vline = nvkm_rd32(device, 0x616330 + hoff) & 0x0000ffff; + *hline = nvkm_rd32(device, 0x616334 + hoff) & 0x0000ffff; +} + +static void +gv100_head_state(struct nvkm_head *head, struct nvkm_head_state *state) +{ + struct nvkm_device *device = head->disp->engine.subdev.device; + const u32 hoff = (state == &head->arm) * 0x8000 + head->id * 0x400; + u32 data; + + data = nvkm_rd32(device, 0x682064 + hoff); + state->vtotal = (data & 0xffff0000) >> 16; + state->htotal = (data & 0x0000ffff); + data = nvkm_rd32(device, 0x682068 + hoff); + state->vsynce = (data & 0xffff0000) >> 16; + state->hsynce = (data & 0x0000ffff); + data = nvkm_rd32(device, 0x68206c + hoff); + state->vblanke = (data & 0xffff0000) >> 16; + state->hblanke = (data & 0x0000ffff); + data = nvkm_rd32(device, 0x682070 + hoff); + state->vblanks = (data & 0xffff0000) >> 16; + state->hblanks = (data & 0x0000ffff); + state->hz = nvkm_rd32(device, 0x68200c + hoff); + + data = nvkm_rd32(device, 0x682004 + hoff); + switch ((data & 0x000000f0) >> 4) { + case 5: state->or.depth = 30; break; + case 4: state->or.depth = 24; break; + case 1: state->or.depth = 18; break; + default: + state->or.depth = 18; + WARN_ON(1); + break; + } +} + +static const struct nvkm_head_func +gv100_head = { + .state = gv100_head_state, + .rgpos = gv100_head_rgpos, + .rgclk = gf119_head_rgclk, + .vblank_get = gv100_head_vblank_get, + .vblank_put = gv100_head_vblank_put, +}; + +int +gv100_head_new(struct nvkm_disp *disp, int id) +{ + struct nvkm_device *device = disp->engine.subdev.device; + if (!(nvkm_rd32(device, 0x610060) & (0x00000001 << id))) + return 0; + return nvkm_head_new_(&gv100_head, disp, id); +} + +int +gv100_head_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = nvkm_rd32(device, 0x610060) & 0x000000ff; + return nvkm_rd32(device, 0x610074) & 0x0000000f; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c index c80d06d5168f..e7d5c397cd29 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c @@ -90,3 +90,10 @@ nv50_head_new(struct nvkm_disp *disp, int id) { return nvkm_head_new_(&nv50_head, disp, id); } + +int +nv50_head_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + *pmask = 3; + return 2; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index 4548c031b937..e0b4e0c5704e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -30,7 +30,7 @@ struct nvkm_ior { UNKNOWN } proto:3; unsigned link:2; - unsigned head:4; + unsigned head:8; } arm, asy; /* Armed DP state. */ @@ -106,7 +106,6 @@ nv50_sor_link(struct nvkm_ior *ior) return nv50_ior_base(ior) + ((ior->asy.link == 2) * 0x80); } -int nv50_sor_new_(const struct nvkm_ior_func *, struct nvkm_disp *, int id); void nv50_sor_state(struct nvkm_ior *, struct nvkm_ior_state *); void nv50_sor_power(struct nvkm_ior *, bool, bool, bool, bool, bool); void nv50_sor_clock(struct nvkm_ior *); @@ -122,7 +121,6 @@ void g94_sor_dp_watermark(struct nvkm_ior *, int, u8); void gt215_sor_dp_audio(struct nvkm_ior *, int, bool); -int gf119_sor_new_(const struct nvkm_ior_func *, struct nvkm_disp *, int id); void gf119_sor_state(struct nvkm_ior *, struct nvkm_ior_state *); void gf119_sor_clock(struct nvkm_ior *); int gf119_sor_dp_links(struct nvkm_ior *, struct nvkm_i2c_aux *); @@ -135,10 +133,15 @@ void gf119_sor_dp_watermark(struct nvkm_ior *, int, u8); void gm107_sor_dp_pattern(struct nvkm_ior *, int); +void gm200_sor_route_set(struct nvkm_outp *, struct nvkm_ior *); +int gm200_sor_route_get(struct nvkm_outp *, int *); +void gm200_sor_dp_drive(struct nvkm_ior *, int, int, int, int, int); + void g84_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gt215_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gf119_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gk104_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); +void gv100_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gt215_hda_hpd(struct nvkm_ior *, int, bool); void gt215_hda_eld(struct nvkm_ior *, u8 *, u8); @@ -153,19 +156,34 @@ void gf119_hda_eld(struct nvkm_ior *, u8 *, u8); #define IOR_WARN(i,f,a...) IOR_MSG((i), warn, f, ##a) #define IOR_DBG(i,f,a...) IOR_MSG((i), debug, f, ##a) +int nv50_dac_cnt(struct nvkm_disp *, unsigned long *); int nv50_dac_new(struct nvkm_disp *, int); + +int gf119_dac_cnt(struct nvkm_disp *, unsigned long *); int gf119_dac_new(struct nvkm_disp *, int); +int nv50_pior_cnt(struct nvkm_disp *, unsigned long *); int nv50_pior_new(struct nvkm_disp *, int); +int nv50_sor_cnt(struct nvkm_disp *, unsigned long *); int nv50_sor_new(struct nvkm_disp *, int); + int g84_sor_new(struct nvkm_disp *, int); + +int g94_sor_cnt(struct nvkm_disp *, unsigned long *); int g94_sor_new(struct nvkm_disp *, int); + int mcp77_sor_new(struct nvkm_disp *, int); int gt215_sor_new(struct nvkm_disp *, int); int mcp89_sor_new(struct nvkm_disp *, int); + +int gf119_sor_cnt(struct nvkm_disp *, unsigned long *); int gf119_sor_new(struct nvkm_disp *, int); + int gk104_sor_new(struct nvkm_disp *, int); int gm107_sor_new(struct nvkm_disp *, int); int gm200_sor_new(struct nvkm_disp *, int); + +int gv100_sor_cnt(struct nvkm_disp *, unsigned long *); +int gv100_sor_new(struct nvkm_disp *, int); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c index d7e0fbb12bf1..cfdce23ab83a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c @@ -26,18 +26,20 @@ static const struct nv50_disp_func mcp77_disp = { + .init = nv50_disp_init, + .fini = nv50_disp_fini, .intr = nv50_disp_intr, .uevent = &nv50_disp_chan_uevent, .super = nv50_disp_super, .root = &g94_disp_root_oclass, - .head.new = nv50_head_new, - .dac = { .nr = 3, .new = nv50_dac_new }, - .sor = { .nr = 4, .new = mcp77_sor_new }, - .pior = { .nr = 3, .new = nv50_pior_new }, + .head = { .cnt = nv50_head_cnt, .new = nv50_head_new }, + .dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new }, + .sor = { .cnt = g94_sor_cnt, .new = mcp77_sor_new }, + .pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new }, }; int mcp77_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(&mcp77_disp, device, index, 2, pdisp); + return nv50_disp_new_(&mcp77_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c index 7b75c57c12ed..85d9329cfa0e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c @@ -26,18 +26,20 @@ static const struct nv50_disp_func mcp89_disp = { + .init = nv50_disp_init, + .fini = nv50_disp_fini, .intr = nv50_disp_intr, .uevent = &nv50_disp_chan_uevent, .super = nv50_disp_super, .root = >215_disp_root_oclass, - .head.new = nv50_head_new, - .dac = { .nr = 3, .new = nv50_dac_new }, - .sor = { .nr = 4, .new = mcp89_sor_new }, - .pior = { .nr = 3, .new = nv50_pior_new }, + .head = { .cnt = nv50_head_cnt, .new = nv50_head_new }, + .dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new }, + .sor = { .cnt = g94_sor_cnt, .new = mcp89_sor_new }, + .pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new }, }; int mcp89_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(&mcp89_disp, device, index, 2, pdisp); + return nv50_disp_new_(&mcp89_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c index 0c570dbd3021..f89c7b977aa5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c @@ -24,11 +24,12 @@ #include "nv50.h" #include "head.h" #include "ior.h" +#include "channv50.h" #include "rootnv50.h" #include <core/client.h> #include <core/enum.h> -#include <core/gpuobj.h> +#include <core/ramht.h> #include <subdev/bios.h> #include <subdev/bios/disp.h> #include <subdev/bios/init.h> @@ -49,29 +50,115 @@ nv50_disp_intr_(struct nvkm_disp *base) disp->func->intr(disp); } +static void +nv50_disp_fini_(struct nvkm_disp *base) +{ + struct nv50_disp *disp = nv50_disp(base); + disp->func->fini(disp); +} + +static int +nv50_disp_init_(struct nvkm_disp *base) +{ + struct nv50_disp *disp = nv50_disp(base); + return disp->func->init(disp); +} + static void * nv50_disp_dtor_(struct nvkm_disp *base) { struct nv50_disp *disp = nv50_disp(base); + + nvkm_ramht_del(&disp->ramht); + nvkm_gpuobj_del(&disp->inst); + nvkm_event_fini(&disp->uevent); if (disp->wq) destroy_workqueue(disp->wq); + return disp; } +static int +nv50_disp_oneinit_(struct nvkm_disp *base) +{ + struct nv50_disp *disp = nv50_disp(base); + const struct nv50_disp_func *func = disp->func; + struct nvkm_subdev *subdev = &disp->base.engine.subdev; + struct nvkm_device *device = subdev->device; + int ret, i; + + if (func->wndw.cnt) { + disp->wndw.nr = func->wndw.cnt(&disp->base, &disp->wndw.mask); + nvkm_debug(subdev, "Window(s): %d (%08lx)\n", + disp->wndw.nr, disp->wndw.mask); + } + + disp->head.nr = func->head.cnt(&disp->base, &disp->head.mask); + nvkm_debug(subdev, " Head(s): %d (%02lx)\n", + disp->head.nr, disp->head.mask); + for_each_set_bit(i, &disp->head.mask, disp->head.nr) { + ret = func->head.new(&disp->base, i); + if (ret) + return ret; + } + + if (func->dac.cnt) { + disp->dac.nr = func->dac.cnt(&disp->base, &disp->dac.mask); + nvkm_debug(subdev, " DAC(s): %d (%02lx)\n", + disp->dac.nr, disp->dac.mask); + for_each_set_bit(i, &disp->dac.mask, disp->dac.nr) { + ret = func->dac.new(&disp->base, i); + if (ret) + return ret; + } + } + + if (func->pior.cnt) { + disp->pior.nr = func->pior.cnt(&disp->base, &disp->pior.mask); + nvkm_debug(subdev, " PIOR(s): %d (%02lx)\n", + disp->pior.nr, disp->pior.mask); + for_each_set_bit(i, &disp->pior.mask, disp->pior.nr) { + ret = func->pior.new(&disp->base, i); + if (ret) + return ret; + } + } + + disp->sor.nr = func->sor.cnt(&disp->base, &disp->sor.mask); + nvkm_debug(subdev, " SOR(s): %d (%02lx)\n", + disp->sor.nr, disp->sor.mask); + for_each_set_bit(i, &disp->sor.mask, disp->sor.nr) { + ret = func->sor.new(&disp->base, i); + if (ret) + return ret; + } + + ret = nvkm_gpuobj_new(device, 0x10000, 0x10000, false, NULL, + &disp->inst); + if (ret) + return ret; + + return nvkm_ramht_new(device, func->ramht_size ? func->ramht_size : + 0x1000, 0, disp->inst, &disp->ramht); +} + static const struct nvkm_disp_func nv50_disp_ = { .dtor = nv50_disp_dtor_, + .oneinit = nv50_disp_oneinit_, + .init = nv50_disp_init_, + .fini = nv50_disp_fini_, .intr = nv50_disp_intr_, .root = nv50_disp_root_, }; int nv50_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device, - int index, int heads, struct nvkm_disp **pdisp) + int index, struct nvkm_disp **pdisp) { struct nv50_disp *disp; - int ret, i; + int ret; if (!(disp = kzalloc(sizeof(*disp), GFP_KERNEL))) return -ENOMEM; @@ -85,33 +172,11 @@ nv50_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device, disp->wq = create_singlethread_workqueue("nvkm-disp"); if (!disp->wq) return -ENOMEM; - INIT_WORK(&disp->supervisor, func->super); - - for (i = 0; func->head.new && i < heads; i++) { - ret = func->head.new(&disp->base, i); - if (ret) - return ret; - } - - for (i = 0; func->dac.new && i < func->dac.nr; i++) { - ret = func->dac.new(&disp->base, i); - if (ret) - return ret; - } - - for (i = 0; func->pior.new && i < func->pior.nr; i++) { - ret = func->pior.new(&disp->base, i); - if (ret) - return ret; - } - for (i = 0; func->sor.new && i < func->sor.nr; i++) { - ret = func->sor.new(&disp->base, i); - if (ret) - return ret; - } + INIT_WORK(&disp->supervisor, func->super); - return nvkm_event_init(func->uevent, 1, 1 + (heads * 4), &disp->uevent); + return nvkm_event_init(func->uevent, 1, ARRAY_SIZE(disp->chan), + &disp->uevent); } static u32 @@ -613,20 +678,96 @@ nv50_disp_intr(struct nv50_disp *disp) } } +void +nv50_disp_fini(struct nv50_disp *disp) +{ + struct nvkm_device *device = disp->base.engine.subdev.device; + /* disable all interrupts */ + nvkm_wr32(device, 0x610024, 0x00000000); + nvkm_wr32(device, 0x610020, 0x00000000); +} + +int +nv50_disp_init(struct nv50_disp *disp) +{ + struct nvkm_device *device = disp->base.engine.subdev.device; + struct nvkm_head *head; + u32 tmp; + int i; + + /* The below segments of code copying values from one register to + * another appear to inform EVO of the display capabilities or + * something similar. NFI what the 0x614004 caps are for.. + */ + tmp = nvkm_rd32(device, 0x614004); + nvkm_wr32(device, 0x610184, tmp); + + /* ... CRTC caps */ + list_for_each_entry(head, &disp->base.head, head) { + tmp = nvkm_rd32(device, 0x616100 + (head->id * 0x800)); + nvkm_wr32(device, 0x610190 + (head->id * 0x10), tmp); + tmp = nvkm_rd32(device, 0x616104 + (head->id * 0x800)); + nvkm_wr32(device, 0x610194 + (head->id * 0x10), tmp); + tmp = nvkm_rd32(device, 0x616108 + (head->id * 0x800)); + nvkm_wr32(device, 0x610198 + (head->id * 0x10), tmp); + tmp = nvkm_rd32(device, 0x61610c + (head->id * 0x800)); + nvkm_wr32(device, 0x61019c + (head->id * 0x10), tmp); + } + + /* ... DAC caps */ + for (i = 0; i < disp->dac.nr; i++) { + tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800)); + nvkm_wr32(device, 0x6101d0 + (i * 0x04), tmp); + } + + /* ... SOR caps */ + for (i = 0; i < disp->sor.nr; i++) { + tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800)); + nvkm_wr32(device, 0x6101e0 + (i * 0x04), tmp); + } + + /* ... PIOR caps */ + for (i = 0; i < disp->pior.nr; i++) { + tmp = nvkm_rd32(device, 0x61e000 + (i * 0x800)); + nvkm_wr32(device, 0x6101f0 + (i * 0x04), tmp); + } + + /* steal display away from vbios, or something like that */ + if (nvkm_rd32(device, 0x610024) & 0x00000100) { + nvkm_wr32(device, 0x610024, 0x00000100); + nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000); + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002)) + break; + ) < 0) + return -EBUSY; + } + + /* point at display engine memory area (hash table, objects) */ + nvkm_wr32(device, 0x610010, (disp->inst->addr >> 8) | 9); + + /* enable supervisor interrupts, disable everything else */ + nvkm_wr32(device, 0x61002c, 0x00000370); + nvkm_wr32(device, 0x610028, 0x00000000); + return 0; +} + static const struct nv50_disp_func nv50_disp = { + .init = nv50_disp_init, + .fini = nv50_disp_fini, .intr = nv50_disp_intr, .uevent = &nv50_disp_chan_uevent, .super = nv50_disp_super, .root = &nv50_disp_root_oclass, - .head.new = nv50_head_new, - .dac = { .nr = 3, .new = nv50_dac_new }, - .sor = { .nr = 2, .new = nv50_sor_new }, - .pior = { .nr = 3, .new = nv50_pior_new }, + .head = { .cnt = nv50_head_cnt, .new = nv50_head_new }, + .dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new }, + .sor = { .cnt = nv50_sor_cnt, .new = nv50_sor_new }, + .pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new }, }; int nv50_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) { - return nv50_disp_new_(&nv50_disp, device, index, 2, pdisp); + return nv50_disp_new_(&nv50_disp, device, index, pdisp); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h index eb0b8acb1c5b..8580382ab248 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h @@ -16,14 +16,26 @@ struct nv50_disp { struct nvkm_event uevent; struct { + unsigned long mask; + int nr; + } wndw, head, dac; + + struct { + unsigned long mask; + int nr; u32 lvdsconf; } sor; struct { + unsigned long mask; + int nr; u8 type[3]; } pior; - struct nv50_disp_chan *chan[21]; + struct nvkm_gpuobj *inst; + struct nvkm_ramht *ramht; + + struct nv50_disp_chan *chan[81]; }; void nv50_disp_super_1(struct nv50_disp *); @@ -34,11 +46,11 @@ void nv50_disp_super_2_2(struct nv50_disp *, struct nvkm_head *); void nv50_disp_super_3_0(struct nv50_disp *, struct nvkm_head *); int nv50_disp_new_(const struct nv50_disp_func *, struct nvkm_device *, - int index, int heads, struct nvkm_disp **); -int gf119_disp_new_(const struct nv50_disp_func *, struct nvkm_device *, - int index, struct nvkm_disp **); + int index, struct nvkm_disp **); struct nv50_disp_func { + int (*init)(struct nv50_disp *); + void (*fini)(struct nv50_disp *); void (*intr)(struct nv50_disp *); void (*intr_error)(struct nv50_disp *, int chid); @@ -48,28 +60,20 @@ struct nv50_disp_func { const struct nvkm_disp_oclass *root; struct { + int (*cnt)(struct nvkm_disp *, unsigned long *mask); int (*new)(struct nvkm_disp *, int id); - } head; + } wndw, head, dac, sor, pior; - struct { - int nr; - int (*new)(struct nvkm_disp *, int id); - } dac; - - struct { - int nr; - int (*new)(struct nvkm_disp *, int id); - } sor; - - struct { - int nr; - int (*new)(struct nvkm_disp *, int id); - } pior; + u16 ramht_size; }; +int nv50_disp_init(struct nv50_disp *); +void nv50_disp_fini(struct nv50_disp *); void nv50_disp_intr(struct nv50_disp *); void nv50_disp_super(struct work_struct *); +int gf119_disp_init(struct nv50_disp *); +void gf119_disp_fini(struct nv50_disp *); void gf119_disp_intr(struct nv50_disp *); void gf119_disp_super(struct work_struct *); void gf119_disp_intr_error(struct nv50_disp *, int); @@ -77,4 +81,12 @@ void gf119_disp_intr_error(struct nv50_disp *, int); void nv50_disp_dptmds_war_2(struct nv50_disp *, struct dcb_output *); void nv50_disp_dptmds_war_3(struct nv50_disp *, struct dcb_output *); void nv50_disp_update_sppll1(struct nv50_disp *); + +extern const struct nvkm_event_func nv50_disp_chan_uevent; +int nv50_disp_chan_uevent_ctor(struct nvkm_object *, void *, u32, + struct nvkm_notify *); +void nv50_disp_chan_uevent_send(struct nv50_disp *, int); + +extern const struct nvkm_event_func gf119_disp_chan_uevent; +extern const struct nvkm_event_func gv100_disp_chan_uevent; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c index 1f9fd3403f07..1ae0bcfc89b9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c @@ -22,16 +22,11 @@ * Authors: Ben Skeggs */ #include "channv50.h" -#include "rootnv50.h" -#include <nvif/class.h> - -const struct nv50_disp_pioc_oclass -gf119_disp_oimm_oclass = { - .base.oclass = GF110_DISP_OVERLAY, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_oimm_new, - .func = &gf119_disp_pioc_func, - .chid = { 9, 9 }, -}; +int +gf119_disp_oimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_oimm_new_(&gf119_disp_pioc_func, disp, 9, 9, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c deleted file mode 100644 index 0c09fe85e952..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2012 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs - */ -#include "channv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_pioc_oclass -gk104_disp_oimm_oclass = { - .base.oclass = GK104_DISP_OVERLAY, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_oimm_new, - .func = &gf119_disp_pioc_func, - .chid = { 9, 9 }, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c index abf82365c671..30ffb1008505 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c @@ -22,16 +22,11 @@ * Authors: Ben Skeggs <bskeggs@redhat.com> */ #include "channv50.h" -#include "rootnv50.h" -#include <nvif/class.h> - -const struct nv50_disp_pioc_oclass -gp102_disp_oimm_oclass = { - .base.oclass = GK104_DISP_OVERLAY, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_oimm_new, - .func = &gf119_disp_pioc_func, - .chid = { 9, 13 }, -}; +int +gp102_disp_oimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_oimm_new_(&gf119_disp_pioc_func, disp, 9, 13, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c deleted file mode 100644 index 1281db28aebd..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2012 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs - */ -#include "channv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_pioc_oclass -gt215_disp_oimm_oclass = { - .base.oclass = GT214_DISP_OVERLAY, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_oimm_new, - .func = &nv50_disp_pioc_func, - .chid = { 5, 5 }, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c index f3b0fa2c5924..0db99bfe9db9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c @@ -23,30 +23,26 @@ */ #include "channv50.h" #include "head.h" -#include "rootnv50.h" #include <core/client.h> -#include <nvif/class.h> #include <nvif/cl507b.h> #include <nvif/unpack.h> int -nv50_disp_oimm_new(const struct nv50_disp_chan_func *func, - const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int ctrl, int user, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **pobject) +nv50_disp_oimm_new_(const struct nv50_disp_chan_func *func, + struct nv50_disp *disp, int ctrl, int user, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) { union { struct nv50_disp_overlay_v0 v0; - } *args = data; + } *args = argv; struct nvkm_object *parent = oclass->parent; - struct nv50_disp *disp = root->disp; int head, ret = -ENOSYS; - nvif_ioctl(parent, "create disp overlay size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create disp overlay size %d\n", argc); + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { nvif_ioctl(parent, "create disp overlay vers %d head %d\n", args->v0.version, args->v0.head); if (!nvkm_head_find(&disp->base, args->v0.head)) @@ -55,16 +51,14 @@ nv50_disp_oimm_new(const struct nv50_disp_chan_func *func, } else return ret; - return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head, + return nv50_disp_chan_new_(func, NULL, disp, ctrl + head, user + head, head, oclass, pobject); } -const struct nv50_disp_pioc_oclass -nv50_disp_oimm_oclass = { - .base.oclass = NV50_DISP_OVERLAY, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_oimm_new, - .func = &nv50_disp_pioc_func, - .chid = { 5, 5 }, -}; +int +nv50_disp_oimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_oimm_new_(&nv50_disp_pioc_func, disp, 5, 5, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c index db6234eebc61..31b915d48699 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list g84_disp_ovly_mthd_base = { @@ -54,8 +51,8 @@ g84_disp_ovly_mthd_base = { } }; -const struct nv50_disp_chan_mthd -g84_disp_ovly_chan_mthd = { +static const struct nv50_disp_chan_mthd +g84_disp_ovly_mthd = { .name = "Overlay", .addr = 0x000540, .prev = 0x000004, @@ -65,13 +62,10 @@ g84_disp_ovly_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -g84_disp_ovly_oclass = { - .base.oclass = G82_DISP_OVERLAY_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_ovly_new, - .func = &nv50_disp_dmac_func, - .mthd = &g84_disp_ovly_chan_mthd, - .chid = 3, -}; +int +g84_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_ovly_new_(&nv50_disp_dmac_func, &g84_disp_ovly_mthd, + disp, 3, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c index 5985879abd23..83fd534c44da 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list gf119_disp_ovly_mthd_base = { @@ -79,7 +76,7 @@ gf119_disp_ovly_mthd_base = { }; static const struct nv50_disp_chan_mthd -gf119_disp_ovly_chan_mthd = { +gf119_disp_ovly_mthd = { .name = "Overlay", .addr = 0x001000, .prev = -0x020000, @@ -89,13 +86,10 @@ gf119_disp_ovly_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -gf119_disp_ovly_oclass = { - .base.oclass = GF110_DISP_OVERLAY_CONTROL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_ovly_new, - .func = &gf119_disp_dmac_func, - .mthd = &gf119_disp_ovly_chan_mthd, - .chid = 5, -}; +int +gf119_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_ovly_new_(&gf119_disp_dmac_func, &gf119_disp_ovly_mthd, + disp, 5, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c index 2f0220b39f34..a7acacbc92c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list gk104_disp_ovly_mthd_base = { @@ -81,7 +78,7 @@ gk104_disp_ovly_mthd_base = { }; const struct nv50_disp_chan_mthd -gk104_disp_ovly_chan_mthd = { +gk104_disp_ovly_mthd = { .name = "Overlay", .addr = 0x001000, .prev = -0x020000, @@ -91,13 +88,10 @@ gk104_disp_ovly_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -gk104_disp_ovly_oclass = { - .base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_ovly_new, - .func = &gf119_disp_dmac_func, - .mthd = &gk104_disp_ovly_chan_mthd, - .chid = 5, -}; +int +gk104_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_ovly_new_(&gf119_disp_dmac_func, &gk104_disp_ovly_mthd, + disp, 5, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c index 589bd2f12b41..e0eca6ea914c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c @@ -21,18 +21,12 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" +#include "channv50.h" -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gp102_disp_ovly_oclass = { - .base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_ovly_new, - .func = &gp102_disp_dmac_func, - .mthd = &gk104_disp_ovly_chan_mthd, - .chid = 5, -}; +int +gp102_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_ovly_new_(&gp102_disp_dmac_func, &gk104_disp_ovly_mthd, + disp, 5, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c index f858053db83d..dc60cd00dc16 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c @@ -21,10 +21,7 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "channv50.h" static const struct nv50_disp_mthd_list gt200_disp_ovly_mthd_base = { @@ -58,7 +55,7 @@ gt200_disp_ovly_mthd_base = { }; static const struct nv50_disp_chan_mthd -gt200_disp_ovly_chan_mthd = { +gt200_disp_ovly_mthd = { .name = "Overlay", .addr = 0x000540, .prev = 0x000004, @@ -68,13 +65,10 @@ gt200_disp_ovly_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -gt200_disp_ovly_oclass = { - .base.oclass = GT200_DISP_OVERLAY_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_ovly_new, - .func = &nv50_disp_dmac_func, - .mthd = >200_disp_ovly_chan_mthd, - .chid = 3, -}; +int +gt200_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_ovly_new_(&nv50_disp_dmac_func, >200_disp_ovly_mthd, + disp, 3, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt215.c deleted file mode 100644 index c947e1e16a37..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt215.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2015 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs <bskeggs@redhat.com> - */ -#include "dmacnv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> - -const struct nv50_disp_dmac_oclass -gt215_disp_ovly_oclass = { - .base.oclass = GT214_DISP_OVERLAY_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_ovly_new, - .func = &nv50_disp_dmac_func, - .mthd = &g84_disp_ovly_chan_mthd, - .chid = 3, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c index 9ebaaa6e9e33..6974c12c4518 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c @@ -21,33 +21,30 @@ * * Authors: Ben Skeggs */ -#include "dmacnv50.h" +#include "channv50.h" #include "head.h" -#include "rootnv50.h" #include <core/client.h> -#include <nvif/class.h> #include <nvif/cl507e.h> #include <nvif/unpack.h> int -nv50_disp_ovly_new(const struct nv50_disp_dmac_func *func, - const struct nv50_disp_chan_mthd *mthd, - struct nv50_disp_root *root, int chid, - const struct nvkm_oclass *oclass, void *data, u32 size, - struct nvkm_object **pobject) +nv50_disp_ovly_new_(const struct nv50_disp_chan_func *func, + const struct nv50_disp_chan_mthd *mthd, + struct nv50_disp *disp, int chid, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) { union { struct nv50_disp_overlay_channel_dma_v0 v0; - } *args = data; + } *args = argv; struct nvkm_object *parent = oclass->parent; - struct nv50_disp *disp = root->disp; int head, ret = -ENOSYS; u64 push; - nvif_ioctl(parent, "create disp overlay channel dma size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create disp overlay channel dma size %d\n", argc); + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { nvif_ioctl(parent, "create disp overlay channel dma vers %d " "pushbuf %016llx head %d\n", args->v0.version, args->v0.pushbuf, args->v0.head); @@ -58,7 +55,7 @@ nv50_disp_ovly_new(const struct nv50_disp_dmac_func *func, } else return ret; - return nv50_disp_dmac_new_(func, mthd, root, chid + head, + return nv50_disp_dmac_new_(func, mthd, disp, chid + head, head, push, oclass, pobject); } @@ -91,7 +88,7 @@ nv50_disp_ovly_mthd_base = { }; static const struct nv50_disp_chan_mthd -nv50_disp_ovly_chan_mthd = { +nv50_disp_ovly_mthd = { .name = "Overlay", .addr = 0x000540, .prev = 0x000004, @@ -101,13 +98,10 @@ nv50_disp_ovly_chan_mthd = { } }; -const struct nv50_disp_dmac_oclass -nv50_disp_ovly_oclass = { - .base.oclass = NV50_DISP_OVERLAY_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_ovly_new, - .func = &nv50_disp_dmac_func, - .mthd = &nv50_disp_ovly_chan_mthd, - .chid = 3, -}; +int +nv50_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return nv50_disp_ovly_new_(&nv50_disp_dmac_func, &nv50_disp_ovly_mthd, + disp, 3, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c index 0abaa6431943..5296e7bee813 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c @@ -29,7 +29,7 @@ static void gf119_disp_pioc_fini(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->root->disp; + struct nv50_disp *disp = chan->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; int ctrl = chan->chid.ctrl; @@ -43,24 +43,17 @@ gf119_disp_pioc_fini(struct nv50_disp_chan *chan) nvkm_error(subdev, "ch %d fini: %08x\n", user, nvkm_rd32(device, 0x610490 + (ctrl * 0x10))); } - - /* disable error reporting and completion notification */ - nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000); - nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000); } static int gf119_disp_pioc_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->root->disp; + struct nv50_disp *disp = chan->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; int ctrl = chan->chid.ctrl; int user = chan->chid.user; - /* enable error reporting */ - nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user); - /* activate channel */ nvkm_wr32(device, 0x610490 + (ctrl * 0x10), 0x00000001); if (nvkm_msec(device, 2000, @@ -80,4 +73,6 @@ const struct nv50_disp_chan_func gf119_disp_pioc_func = { .init = gf119_disp_pioc_init, .fini = gf119_disp_pioc_fini, + .intr = gf119_disp_chan_intr, + .user = nv50_disp_chan_user, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c index 0211e0e8a35f..4faed6fce682 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c @@ -29,7 +29,7 @@ static void nv50_disp_pioc_fini(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->root->disp; + struct nv50_disp *disp = chan->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; int ctrl = chan->chid.ctrl; @@ -48,7 +48,7 @@ nv50_disp_pioc_fini(struct nv50_disp_chan *chan) static int nv50_disp_pioc_init(struct nv50_disp_chan *chan) { - struct nv50_disp *disp = chan->root->disp; + struct nv50_disp *disp = chan->disp; struct nvkm_subdev *subdev = &disp->base.engine.subdev; struct nvkm_device *device = subdev->device; int ctrl = chan->chid.ctrl; @@ -82,4 +82,6 @@ const struct nv50_disp_chan_func nv50_disp_pioc_func = { .init = nv50_disp_pioc_init, .fini = nv50_disp_pioc_fini, + .intr = nv50_disp_chan_intr, + .user = nv50_disp_chan_user, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c index 99b3b9050635..e997a207f546 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c @@ -127,8 +127,13 @@ nv50_pior = { int nv50_pior_new(struct nvkm_disp *disp, int id) { - struct nvkm_device *device = disp->engine.subdev.device; - if (!(nvkm_rd32(device, 0x610184) & (0x10000000 << id))) - return 0; return nvkm_ior_new_(&nv50_pior, disp, PIOR, id); } + +int +nv50_pior_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = (nvkm_rd32(device, 0x610184) & 0x70000000) >> 28; + return 3; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h index 6c9bfff6d043..ef66c5f38ad5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h @@ -12,6 +12,9 @@ void nvkm_disp_vblank(struct nvkm_disp *, int head); struct nvkm_disp_func { void *(*dtor)(struct nvkm_disp *); + int (*oneinit)(struct nvkm_disp *); + int (*init)(struct nvkm_disp *); + void (*fini)(struct nvkm_disp *); void (*intr)(struct nvkm_disp *); const struct nvkm_disp_oclass *(*root)(struct nvkm_disp *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c index 721e4f74d1fc..1ed371fd7ddf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func g84_disp_root = { - .init = nv50_disp_root_init, - .fini = nv50_disp_root_fini, - .dmac = { - &g84_disp_core_oclass, - &g84_disp_base_oclass, - &g84_disp_ovly_oclass, - }, - .pioc = { - &g84_disp_oimm_oclass, - &g84_disp_curs_oclass, + .user = { + {{0,0,G82_DISP_CURSOR }, nv50_disp_curs_new }, + {{0,0,G82_DISP_OVERLAY }, nv50_disp_oimm_new }, + {{0,0,G82_DISP_BASE_CHANNEL_DMA }, g84_disp_base_new }, + {{0,0,G82_DISP_CORE_CHANNEL_DMA }, g84_disp_core_new }, + {{0,0,G82_DISP_OVERLAY_CHANNEL_DMA}, g84_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c index 9493f6edf62b..ef579eb00238 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func g94_disp_root = { - .init = nv50_disp_root_init, - .fini = nv50_disp_root_fini, - .dmac = { - &g94_disp_core_oclass, - >200_disp_base_oclass, - >200_disp_ovly_oclass, - }, - .pioc = { - &g84_disp_oimm_oclass, - &g84_disp_curs_oclass, + .user = { + {{0,0, G82_DISP_CURSOR }, nv50_disp_curs_new }, + {{0,0, G82_DISP_OVERLAY }, nv50_disp_oimm_new }, + {{0,0,GT200_DISP_BASE_CHANNEL_DMA }, g84_disp_base_new }, + {{0,0,GT206_DISP_CORE_CHANNEL_DMA }, g94_disp_core_new }, + {{0,0,GT200_DISP_OVERLAY_CHANNEL_DMA}, gt200_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c index 333c8424b413..fe011165dc02 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c @@ -22,104 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "head.h" -#include "dmacnv50.h" - -#include <core/ramht.h> -#include <subdev/timer.h> +#include "channv50.h" #include <nvif/class.h> -void -gf119_disp_root_fini(struct nv50_disp_root *root) -{ - struct nvkm_device *device = root->disp->base.engine.subdev.device; - /* disable all interrupts */ - nvkm_wr32(device, 0x6100b0, 0x00000000); -} - -int -gf119_disp_root_init(struct nv50_disp_root *root) -{ - struct nv50_disp *disp = root->disp; - struct nvkm_head *head; - struct nvkm_device *device = disp->base.engine.subdev.device; - u32 tmp; - int i; - - /* The below segments of code copying values from one register to - * another appear to inform EVO of the display capabilities or - * something similar. - */ - - /* ... CRTC caps */ - list_for_each_entry(head, &disp->base.head, head) { - const u32 hoff = head->id * 0x800; - tmp = nvkm_rd32(device, 0x616104 + hoff); - nvkm_wr32(device, 0x6101b4 + hoff, tmp); - tmp = nvkm_rd32(device, 0x616108 + hoff); - nvkm_wr32(device, 0x6101b8 + hoff, tmp); - tmp = nvkm_rd32(device, 0x61610c + hoff); - nvkm_wr32(device, 0x6101bc + hoff, tmp); - } - - /* ... DAC caps */ - for (i = 0; i < disp->func->dac.nr; i++) { - tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800)); - nvkm_wr32(device, 0x6101c0 + (i * 0x800), tmp); - } - - /* ... SOR caps */ - for (i = 0; i < disp->func->sor.nr; i++) { - tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800)); - nvkm_wr32(device, 0x6301c4 + (i * 0x800), tmp); - } - - /* steal display away from vbios, or something like that */ - if (nvkm_rd32(device, 0x6100ac) & 0x00000100) { - nvkm_wr32(device, 0x6100ac, 0x00000100); - nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002)) - break; - ) < 0) - return -EBUSY; - } - - /* point at display engine memory area (hash table, objects) */ - nvkm_wr32(device, 0x610010, (root->instmem->addr >> 8) | 9); - - /* enable supervisor interrupts, disable everything else */ - nvkm_wr32(device, 0x610090, 0x00000000); - nvkm_wr32(device, 0x6100a0, 0x00000000); - nvkm_wr32(device, 0x6100b0, 0x00000307); - - /* disable underflow reporting, preventing an intermittent issue - * on some gk104 boards where the production vbios left this - * setting enabled by default. - * - * ftp://download.nvidia.com/open-gpu-doc/gk104-disable-underflow-reporting/1/gk104-disable-underflow-reporting.txt - */ - list_for_each_entry(head, &disp->base.head, head) { - const u32 hoff = head->id * 0x800; - nvkm_mask(device, 0x616308 + hoff, 0x00000111, 0x00000010); - } - - return 0; -} - static const struct nv50_disp_root_func gf119_disp_root = { - .init = gf119_disp_root_init, - .fini = gf119_disp_root_fini, - .dmac = { - &gf119_disp_core_oclass, - &gf119_disp_base_oclass, - &gf119_disp_ovly_oclass, - }, - .pioc = { - &gf119_disp_oimm_oclass, - &gf119_disp_curs_oclass, + .user = { + {{0,0,GF110_DISP_CURSOR }, gf119_disp_curs_new }, + {{0,0,GF110_DISP_OVERLAY }, gf119_disp_oimm_new }, + {{0,0,GF110_DISP_BASE_CHANNEL_DMA }, gf119_disp_base_new }, + {{0,0,GF110_DISP_CORE_CHANNEL_DMA }, gf119_disp_core_new }, + {{0,0,GF110_DISP_OVERLAY_CONTROL_DMA}, gf119_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c index 0bfdb1d1c6ab..9e8ffd348b50 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gk104_disp_root = { - .init = gf119_disp_root_init, - .fini = gf119_disp_root_fini, - .dmac = { - &gk104_disp_core_oclass, - &gk104_disp_base_oclass, - &gk104_disp_ovly_oclass, - }, - .pioc = { - &gk104_disp_oimm_oclass, - &gk104_disp_curs_oclass, + .user = { + {{0,0,GK104_DISP_CURSOR }, gf119_disp_curs_new }, + {{0,0,GK104_DISP_OVERLAY }, gf119_disp_oimm_new }, + {{0,0,GK104_DISP_BASE_CHANNEL_DMA }, gf119_disp_base_new }, + {{0,0,GK104_DISP_CORE_CHANNEL_DMA }, gk104_disp_core_new }, + {{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c index 1e8dbed8a67c..dc85cc1c9490 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gk110_disp_root = { - .init = gf119_disp_root_init, - .fini = gf119_disp_root_fini, - .dmac = { - &gk110_disp_core_oclass, - &gk110_disp_base_oclass, - &gk104_disp_ovly_oclass, - }, - .pioc = { - &gk104_disp_oimm_oclass, - &gk104_disp_curs_oclass, + .user = { + {{0,0,GK104_DISP_CURSOR }, gf119_disp_curs_new }, + {{0,0,GK104_DISP_OVERLAY }, gf119_disp_oimm_new }, + {{0,0,GK110_DISP_BASE_CHANNEL_DMA }, gf119_disp_base_new }, + {{0,0,GK110_DISP_CORE_CHANNEL_DMA }, gk104_disp_core_new }, + {{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c index 44c55be69e99..e0181ca08840 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gm107_disp_root = { - .init = gf119_disp_root_init, - .fini = gf119_disp_root_fini, - .dmac = { - &gm107_disp_core_oclass, - &gk110_disp_base_oclass, - &gk104_disp_ovly_oclass, - }, - .pioc = { - &gk104_disp_oimm_oclass, - &gk104_disp_curs_oclass, + .user = { + {{0,0,GK104_DISP_CURSOR }, gf119_disp_curs_new }, + {{0,0,GK104_DISP_OVERLAY }, gf119_disp_oimm_new }, + {{0,0,GK110_DISP_BASE_CHANNEL_DMA }, gf119_disp_base_new }, + {{0,0,GM107_DISP_CORE_CHANNEL_DMA }, gk104_disp_core_new }, + {{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c index 38f5ee1dfc58..e5e590e19f62 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gm200_disp_root = { - .init = gf119_disp_root_init, - .fini = gf119_disp_root_fini, - .dmac = { - &gm200_disp_core_oclass, - &gk110_disp_base_oclass, - &gk104_disp_ovly_oclass, - }, - .pioc = { - &gk104_disp_oimm_oclass, - &gk104_disp_curs_oclass, + .user = { + {{0,0,GK104_DISP_CURSOR }, gf119_disp_curs_new }, + {{0,0,GK104_DISP_OVERLAY }, gf119_disp_oimm_new }, + {{0,0,GK110_DISP_BASE_CHANNEL_DMA }, gf119_disp_base_new }, + {{0,0,GM200_DISP_CORE_CHANNEL_DMA }, gk104_disp_core_new }, + {{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c index ac8fdd728ec6..762a1a922e05 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs <bskeggs@redhat.com> */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gp100_disp_root = { - .init = gf119_disp_root_init, - .fini = gf119_disp_root_fini, - .dmac = { - &gp100_disp_core_oclass, - &gk110_disp_base_oclass, - &gk104_disp_ovly_oclass, - }, - .pioc = { - &gk104_disp_oimm_oclass, - &gk104_disp_curs_oclass, + .user = { + {{0,0,GK104_DISP_CURSOR }, gf119_disp_curs_new }, + {{0,0,GK104_DISP_OVERLAY }, gf119_disp_oimm_new }, + {{0,0,GK110_DISP_BASE_CHANNEL_DMA }, gf119_disp_base_new }, + {{0,0,GP100_DISP_CORE_CHANNEL_DMA }, gk104_disp_core_new }, + {{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c index 37122ca579ad..c7f00946c9af 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs <bskeggs@redhat.com> */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gp102_disp_root = { - .init = gf119_disp_root_init, - .fini = gf119_disp_root_fini, - .dmac = { - &gp102_disp_core_oclass, - &gp102_disp_base_oclass, - &gp102_disp_ovly_oclass, - }, - .pioc = { - &gp102_disp_oimm_oclass, - &gp102_disp_curs_oclass, + .user = { + {{0,0,GK104_DISP_CURSOR }, gp102_disp_curs_new }, + {{0,0,GK104_DISP_OVERLAY }, gp102_disp_oimm_new }, + {{0,0,GK110_DISP_BASE_CHANNEL_DMA }, gp102_disp_base_new }, + {{0,0,GP102_DISP_CORE_CHANNEL_DMA }, gp102_disp_core_new }, + {{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gp102_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c index 124a0c24f92c..a6963654087c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gt200_disp_root = { - .init = nv50_disp_root_init, - .fini = nv50_disp_root_fini, - .dmac = { - >200_disp_core_oclass, - >200_disp_base_oclass, - >200_disp_ovly_oclass, - }, - .pioc = { - &g84_disp_oimm_oclass, - &g84_disp_curs_oclass, + .user = { + {{0,0, G82_DISP_CURSOR }, nv50_disp_curs_new }, + {{0,0, G82_DISP_OVERLAY }, nv50_disp_oimm_new }, + {{0,0,GT200_DISP_BASE_CHANNEL_DMA }, g84_disp_base_new }, + {{0,0,GT200_DISP_CORE_CHANNEL_DMA }, g84_disp_core_new }, + {{0,0,GT200_DISP_OVERLAY_CHANNEL_DMA}, gt200_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c index dff52f30668b..4fe0a3ae8891 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c @@ -22,22 +22,19 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include <nvif/class.h> static const struct nv50_disp_root_func gt215_disp_root = { - .init = nv50_disp_root_init, - .fini = nv50_disp_root_fini, - .dmac = { - >215_disp_core_oclass, - >215_disp_base_oclass, - >215_disp_ovly_oclass, - }, - .pioc = { - >215_disp_oimm_oclass, - >215_disp_curs_oclass, + .user = { + {{0,0,GT214_DISP_CURSOR }, nv50_disp_curs_new }, + {{0,0,GT214_DISP_OVERLAY }, nv50_disp_oimm_new }, + {{0,0,GT214_DISP_BASE_CHANNEL_DMA }, g84_disp_base_new }, + {{0,0,GT214_DISP_CORE_CHANNEL_DMA }, g94_disp_core_new }, + {{0,0,GT214_DISP_OVERLAY_CHANNEL_DMA}, g84_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c new file mode 100644 index 000000000000..9c658d632d37 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c @@ -0,0 +1,52 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "rootnv50.h" +#include "channv50.h" + +#include <nvif/class.h> + +static const struct nv50_disp_root_func +gv100_disp_root = { + .user = { + {{0,0,GV100_DISP_CURSOR }, gv100_disp_curs_new }, + {{0,0,GV100_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new }, + {{0,0,GV100_DISP_CORE_CHANNEL_DMA }, gv100_disp_core_new }, + {{0,0,GV100_DISP_WINDOW_CHANNEL_DMA }, gv100_disp_wndw_new }, + {} + }, +}; + +static int +gv100_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass, + void *data, u32 size, struct nvkm_object **pobject) +{ + return nv50_disp_root_new_(&gv100_disp_root, disp, oclass, + data, size, pobject); +} + +const struct nvkm_disp_oclass +gv100_disp_root_oclass = { + .base.oclass = GV100_DISP, + .base.minver = -1, + .base.maxver = -1, + .ctor = gv100_disp_root_new, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c index 1208524aae14..3aa5a2879239 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c @@ -22,14 +22,12 @@ * Authors: Ben Skeggs */ #include "rootnv50.h" -#include "dmacnv50.h" +#include "channv50.h" #include "dp.h" #include "head.h" #include "ior.h" #include <core/client.h> -#include <core/ramht.h> -#include <subdev/timer.h> #include <nvif/class.h> #include <nvif/cl5070.h> @@ -271,23 +269,12 @@ nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size) } static int -nv50_disp_root_dmac_new_(const struct nvkm_oclass *oclass, - void *data, u32 size, struct nvkm_object **pobject) +nv50_disp_root_child_new_(const struct nvkm_oclass *oclass, + void *argv, u32 argc, struct nvkm_object **pobject) { - const struct nv50_disp_dmac_oclass *sclass = oclass->priv; - struct nv50_disp_root *root = nv50_disp_root(oclass->parent); - return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid, - oclass, data, size, pobject); -} - -static int -nv50_disp_root_pioc_new_(const struct nvkm_oclass *oclass, - void *data, u32 size, struct nvkm_object **pobject) -{ - const struct nv50_disp_pioc_oclass *sclass = oclass->priv; - struct nv50_disp_root *root = nv50_disp_root(oclass->parent); - return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid.ctrl, - sclass->chid.user, oclass, data, size, pobject); + struct nv50_disp *disp = nv50_disp_root(oclass->parent)->disp; + const struct nv50_disp_user *user = oclass->priv; + return user->ctor(oclass, argv, argc, disp, pobject); } static int @@ -296,68 +283,26 @@ nv50_disp_root_child_get_(struct nvkm_object *object, int index, { struct nv50_disp_root *root = nv50_disp_root(object); - if (index < ARRAY_SIZE(root->func->dmac)) { - sclass->base = root->func->dmac[index]->base; - sclass->priv = root->func->dmac[index]; - sclass->ctor = nv50_disp_root_dmac_new_; - return 0; - } - - index -= ARRAY_SIZE(root->func->dmac); - - if (index < ARRAY_SIZE(root->func->pioc)) { - sclass->base = root->func->pioc[index]->base; - sclass->priv = root->func->pioc[index]; - sclass->ctor = nv50_disp_root_pioc_new_; + if (root->func->user[index].ctor) { + sclass->base = root->func->user[index].base; + sclass->priv = root->func->user + index; + sclass->ctor = nv50_disp_root_child_new_; return 0; } return -EINVAL; } -static int -nv50_disp_root_fini_(struct nvkm_object *object, bool suspend) -{ - struct nv50_disp_root *root = nv50_disp_root(object); - root->func->fini(root); - return 0; -} - -static int -nv50_disp_root_init_(struct nvkm_object *object) -{ - struct nv50_disp_root *root = nv50_disp_root(object); - struct nvkm_ior *ior; - int ret; - - ret = root->func->init(root); - if (ret) - return ret; - - /* Set 'normal' (ie. when it's attached to a head) state for - * each output resource to 'fully enabled'. - */ - list_for_each_entry(ior, &root->disp->base.ior, head) { - ior->func->power(ior, true, true, true, true, true); - } - - return 0; -} - static void * nv50_disp_root_dtor_(struct nvkm_object *object) { struct nv50_disp_root *root = nv50_disp_root(object); - nvkm_ramht_del(&root->ramht); - nvkm_gpuobj_del(&root->instmem); return root; } static const struct nvkm_object_func nv50_disp_root_ = { .dtor = nv50_disp_root_dtor_, - .init = nv50_disp_root_init_, - .fini = nv50_disp_root_fini_, .mthd = nv50_disp_root_mthd_, .ntfy = nvkm_disp_ntfy, .sclass = nv50_disp_root_child_get_, @@ -370,8 +315,6 @@ nv50_disp_root_new_(const struct nv50_disp_root_func *func, { struct nv50_disp *disp = nv50_disp(base); struct nv50_disp_root *root; - struct nvkm_device *device = disp->base.engine.subdev.device; - int ret; if (!(root = kzalloc(sizeof(*root), GFP_KERNEL))) return -ENOMEM; @@ -380,102 +323,18 @@ nv50_disp_root_new_(const struct nv50_disp_root_func *func, nvkm_object_ctor(&nv50_disp_root_, oclass, &root->object); root->func = func; root->disp = disp; - - ret = nvkm_gpuobj_new(disp->base.engine.subdev.device, 0x10000, 0x10000, - false, NULL, &root->instmem); - if (ret) - return ret; - - return nvkm_ramht_new(device, 0x1000, 0, root->instmem, &root->ramht); -} - -void -nv50_disp_root_fini(struct nv50_disp_root *root) -{ - struct nvkm_device *device = root->disp->base.engine.subdev.device; - /* disable all interrupts */ - nvkm_wr32(device, 0x610024, 0x00000000); - nvkm_wr32(device, 0x610020, 0x00000000); -} - -int -nv50_disp_root_init(struct nv50_disp_root *root) -{ - struct nv50_disp *disp = root->disp; - struct nvkm_head *head; - struct nvkm_device *device = disp->base.engine.subdev.device; - u32 tmp; - int i; - - /* The below segments of code copying values from one register to - * another appear to inform EVO of the display capabilities or - * something similar. NFI what the 0x614004 caps are for.. - */ - tmp = nvkm_rd32(device, 0x614004); - nvkm_wr32(device, 0x610184, tmp); - - /* ... CRTC caps */ - list_for_each_entry(head, &disp->base.head, head) { - tmp = nvkm_rd32(device, 0x616100 + (head->id * 0x800)); - nvkm_wr32(device, 0x610190 + (head->id * 0x10), tmp); - tmp = nvkm_rd32(device, 0x616104 + (head->id * 0x800)); - nvkm_wr32(device, 0x610194 + (head->id * 0x10), tmp); - tmp = nvkm_rd32(device, 0x616108 + (head->id * 0x800)); - nvkm_wr32(device, 0x610198 + (head->id * 0x10), tmp); - tmp = nvkm_rd32(device, 0x61610c + (head->id * 0x800)); - nvkm_wr32(device, 0x61019c + (head->id * 0x10), tmp); - } - - /* ... DAC caps */ - for (i = 0; i < disp->func->dac.nr; i++) { - tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800)); - nvkm_wr32(device, 0x6101d0 + (i * 0x04), tmp); - } - - /* ... SOR caps */ - for (i = 0; i < disp->func->sor.nr; i++) { - tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800)); - nvkm_wr32(device, 0x6101e0 + (i * 0x04), tmp); - } - - /* ... PIOR caps */ - for (i = 0; i < disp->func->pior.nr; i++) { - tmp = nvkm_rd32(device, 0x61e000 + (i * 0x800)); - nvkm_wr32(device, 0x6101f0 + (i * 0x04), tmp); - } - - /* steal display away from vbios, or something like that */ - if (nvkm_rd32(device, 0x610024) & 0x00000100) { - nvkm_wr32(device, 0x610024, 0x00000100); - nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000); - if (nvkm_msec(device, 2000, - if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002)) - break; - ) < 0) - return -EBUSY; - } - - /* point at display engine memory area (hash table, objects) */ - nvkm_wr32(device, 0x610010, (root->instmem->addr >> 8) | 9); - - /* enable supervisor interrupts, disable everything else */ - nvkm_wr32(device, 0x61002c, 0x00000370); - nvkm_wr32(device, 0x610028, 0x00000000); return 0; } static const struct nv50_disp_root_func nv50_disp_root = { - .init = nv50_disp_root_init, - .fini = nv50_disp_root_fini, - .dmac = { - &nv50_disp_core_oclass, - &nv50_disp_base_oclass, - &nv50_disp_ovly_oclass, - }, - .pioc = { - &nv50_disp_oimm_oclass, - &nv50_disp_curs_oclass, + .user = { + {{0,0,NV50_DISP_CURSOR }, nv50_disp_curs_new }, + {{0,0,NV50_DISP_OVERLAY }, nv50_disp_oimm_new }, + {{0,0,NV50_DISP_BASE_CHANNEL_DMA }, nv50_disp_base_new }, + {{0,0,NV50_DISP_CORE_CHANNEL_DMA }, nv50_disp_core_new }, + {{0,0,NV50_DISP_OVERLAY_CHANNEL_DMA}, nv50_disp_ovly_new }, + {} }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h index 4818fa69ae6c..6ca4f9184b51 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h @@ -2,34 +2,27 @@ #ifndef __NV50_DISP_ROOT_H__ #define __NV50_DISP_ROOT_H__ #define nv50_disp_root(p) container_of((p), struct nv50_disp_root, object) +#include <core/object.h> #include "nv50.h" -#include "channv50.h" -#include "dmacnv50.h" struct nv50_disp_root { const struct nv50_disp_root_func *func; struct nv50_disp *disp; struct nvkm_object object; - - struct nvkm_gpuobj *instmem; - struct nvkm_ramht *ramht; }; struct nv50_disp_root_func { - int (*init)(struct nv50_disp_root *); - void (*fini)(struct nv50_disp_root *); - const struct nv50_disp_dmac_oclass *dmac[3]; - const struct nv50_disp_pioc_oclass *pioc[2]; + int blah; + struct nv50_disp_user { + struct nvkm_sclass base; + int (*ctor)(const struct nvkm_oclass *, void *argv, u32 argc, + struct nv50_disp *, struct nvkm_object **); + } user[]; }; int nv50_disp_root_new_(const struct nv50_disp_root_func *, struct nvkm_disp *, const struct nvkm_oclass *, void *data, u32 size, struct nvkm_object **); -int nv50_disp_root_init(struct nv50_disp_root *); -void nv50_disp_root_fini(struct nv50_disp_root *); - -int gf119_disp_root_init(struct nv50_disp_root *); -void gf119_disp_root_fini(struct nv50_disp_root *); extern const struct nvkm_disp_oclass nv50_disp_root_oclass; extern const struct nvkm_disp_oclass g84_disp_root_oclass; @@ -43,4 +36,5 @@ extern const struct nvkm_disp_oclass gm107_disp_root_oclass; extern const struct nvkm_disp_oclass gm200_disp_root_oclass; extern const struct nvkm_disp_oclass gp100_disp_root_oclass; extern const struct nvkm_disp_oclass gp102_disp_root_oclass; +extern const struct nvkm_disp_oclass gv100_disp_root_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c index f40b909b4ca2..ec3a7db08118 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c @@ -34,5 +34,5 @@ g84_sor = { int g84_sor_new(struct nvkm_disp *disp, int id) { - return nv50_sor_new_(&g84_sor, disp, id); + return nvkm_ior_new_(&g84_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c index 49aeafde0031..4d59d02525d9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c @@ -279,5 +279,13 @@ g94_sor = { int g94_sor_new(struct nvkm_disp *disp, int id) { - return nv50_sor_new_(&g94_sor, disp, id); + return nvkm_ior_new_(&g94_sor, disp, SOR, id); +} + +int +g94_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = (nvkm_rd32(device, 0x610184) & 0x0f000000) >> 24; + return 4; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c index 700fc754f28a..e6e6dfbb1283 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c @@ -152,15 +152,6 @@ gf119_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state) state->head = ctrl & 0x0000000f; } -int -gf119_sor_new_(const struct nvkm_ior_func *func, struct nvkm_disp *disp, int id) -{ - struct nvkm_device *device = disp->engine.subdev.device; - if (!(nvkm_rd32(device, 0x612004) & (0x00000100 << id))) - return 0; - return nvkm_ior_new_(func, disp, SOR, id); -} - static const struct nvkm_ior_func gf119_sor = { .state = gf119_sor_state, @@ -189,5 +180,13 @@ gf119_sor = { int gf119_sor_new(struct nvkm_disp *disp, int id) { - return gf119_sor_new_(&gf119_sor, disp, id); + return nvkm_ior_new_(&gf119_sor, disp, SOR, id); +} + +int +gf119_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = (nvkm_rd32(device, 0x612004) & 0x0000ff00) >> 8; + return 8; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c index a1547bdf490b..b94090edaebf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c @@ -49,5 +49,5 @@ gk104_sor = { int gk104_sor_new(struct nvkm_disp *disp, int id) { - return gf119_sor_new_(&gk104_sor, disp, id); + return nvkm_ior_new_(&gk104_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c index 60230957d82b..e6965dec09c9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c @@ -63,5 +63,5 @@ gm107_sor = { int gm107_sor_new(struct nvkm_disp *disp, int id) { - return gf119_sor_new_(&gm107_sor, disp, id); + return nvkm_ior_new_(&gm107_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c index f9b8107aa2a2..d892bdf04034 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c @@ -23,7 +23,7 @@ */ #include "ior.h" -static void +void gm200_sor_dp_drive(struct nvkm_ior *sor, int ln, int pc, int dc, int pe, int pu) { struct nvkm_device *device = sor->disp->engine.subdev.device; @@ -45,7 +45,7 @@ gm200_sor_dp_drive(struct nvkm_ior *sor, int ln, int pc, int dc, int pe, int pu) nvkm_wr32(device, 0x61c13c + loff, data[3] | (pc << shift)); } -static void +void gm200_sor_route_set(struct nvkm_outp *outp, struct nvkm_ior *ior) { struct nvkm_device *device = outp->disp->engine.subdev.device; @@ -62,7 +62,7 @@ gm200_sor_route_set(struct nvkm_outp *outp, struct nvkm_ior *ior) nvkm_mask(device, 0x612388 + moff, 0x0000001f, link << 4 | sor); } -static int +int gm200_sor_route_get(struct nvkm_outp *outp, int *link) { struct nvkm_device *device = outp->disp->engine.subdev.device; @@ -120,5 +120,5 @@ gm200_sor = { int gm200_sor_new(struct nvkm_disp *disp, int id) { - return gf119_sor_new_(&gm200_sor, disp, id); + return nvkm_ior_new_(&gm200_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c index da228b54b43e..54d134d4ca1d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c @@ -65,5 +65,5 @@ gt215_sor = { int gt215_sor_new(struct nvkm_disp *disp, int id) { - return nv50_sor_new_(>215_sor, disp, id); + return nvkm_ior_new_(>215_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c new file mode 100644 index 000000000000..040db8a338de --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c @@ -0,0 +1,120 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ior.h" + +#include <subdev/timer.h> + +static void +gv100_sor_dp_watermark(struct nvkm_ior *sor, int head, u8 watermark) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + const u32 hoff = head * 0x800; + nvkm_mask(device, 0x616550 + hoff, 0x0c00003f, 0x08000000 | watermark); +} + +static void +gv100_sor_dp_audio_sym(struct nvkm_ior *sor, int head, u16 h, u32 v) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + const u32 hoff = head * 0x800; + nvkm_mask(device, 0x616568 + hoff, 0x0000ffff, h); + nvkm_mask(device, 0x61656c + hoff, 0x00ffffff, v); +} + +static void +gv100_sor_dp_audio(struct nvkm_ior *sor, int head, bool enable) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + const u32 hoff = 0x800 * head; + const u32 data = 0x80000000 | (0x00000001 * enable); + const u32 mask = 0x8000000d; + nvkm_mask(device, 0x616560 + hoff, mask, data); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x616560 + hoff) & 0x80000000)) + break; + ); +} + +static void +gv100_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + const u32 coff = (state == &sor->arm) * 0x8000 + sor->id * 0x20; + u32 ctrl = nvkm_rd32(device, 0x680300 + coff); + + state->proto_evo = (ctrl & 0x00000f00) >> 8; + switch (state->proto_evo) { + case 0: state->proto = LVDS; state->link = 1; break; + case 1: state->proto = TMDS; state->link = 1; break; + case 2: state->proto = TMDS; state->link = 2; break; + case 5: state->proto = TMDS; state->link = 3; break; + case 8: state->proto = DP; state->link = 1; break; + case 9: state->proto = DP; state->link = 2; break; + default: + state->proto = UNKNOWN; + break; + } + + state->head = ctrl & 0x000000ff; +} + +static const struct nvkm_ior_func +gv100_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = gf119_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = gf119_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, + .hda = { + .hpd = gf119_hda_hpd, + .eld = gf119_hda_eld, + }, +}; + +int +gv100_sor_new(struct nvkm_disp *disp, int id) +{ + return nvkm_ior_new_(&gv100_sor, disp, SOR, id); +} + +int +gv100_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = (nvkm_rd32(device, 0x610060) & 0x0000ff00) >> 8; + return (nvkm_rd32(device, 0x610074) & 0x00000f00) >> 8; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c index c0179ccb956d..8a70dd25b13a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c @@ -44,5 +44,5 @@ mcp77_sor = { int mcp77_sor_new(struct nvkm_disp *disp, int id) { - return nv50_sor_new_(&mcp77_sor, disp, id); + return nvkm_ior_new_(&mcp77_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c index 9bb01cd96697..eac9c5be9166 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c @@ -49,5 +49,5 @@ mcp89_sor = { int mcp89_sor_new(struct nvkm_disp *disp, int id) { - return nv50_sor_new_(&mcp89_sor, disp, id); + return nvkm_ior_new_(&mcp89_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c index f3ebd0c22e7d..b4729f8798af 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c @@ -84,15 +84,6 @@ nv50_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state) state->head = ctrl & 0x00000003; } -int -nv50_sor_new_(const struct nvkm_ior_func *func, struct nvkm_disp *disp, int id) -{ - struct nvkm_device *device = disp->engine.subdev.device; - if (!(nvkm_rd32(device, 0x610184) & (0x01000000 << id))) - return 0; - return nvkm_ior_new_(func, disp, SOR, id); -} - static const struct nvkm_ior_func nv50_sor = { .state = nv50_sor_state, @@ -103,5 +94,13 @@ nv50_sor = { int nv50_sor_new(struct nvkm_disp *disp, int id) { - return nv50_sor_new_(&nv50_sor, disp, id); + return nvkm_ior_new_(&nv50_sor, disp, SOR, id); +} + +int +nv50_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask) +{ + struct nvkm_device *device = disp->engine.subdev.device; + *pmask = (nvkm_rd32(device, 0x610184) & 0x03000000) >> 24; + return 2; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c new file mode 100644 index 000000000000..89d783368b4f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c @@ -0,0 +1,82 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "channv50.h" + +#include <core/client.h> + +#include <nvif/clc37b.h> +#include <nvif/unpack.h> + +static void +gv100_disp_wimm_intr(struct nv50_disp_chan *chan, bool en) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 mask = 0x00000001 << chan->head; + const u32 data = en ? mask : 0; + nvkm_mask(device, 0x611da8, mask, data); +} + +const struct nv50_disp_chan_func +gv100_disp_wimm = { + .init = gv100_disp_dmac_init, + .fini = gv100_disp_dmac_fini, + .intr = gv100_disp_wimm_intr, + .user = gv100_disp_chan_user, +}; + +static int +gv100_disp_wimm_new_(const struct nv50_disp_chan_func *func, + const struct nv50_disp_chan_mthd *mthd, + struct nv50_disp *disp, int chid, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) +{ + union { + struct nvc37b_window_imm_channel_dma_v0 v0; + } *args = argv; + struct nvkm_object *parent = oclass->parent; + int wndw, ret = -ENOSYS; + u64 push; + + nvif_ioctl(parent, "create window imm channel dma size %d\n", argc); + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create window imm channel dma vers %d " + "pushbuf %016llx index %d\n", + args->v0.version, args->v0.pushbuf, args->v0.index); + if (!(disp->wndw.mask & BIT(args->v0.index))) + return -EINVAL; + push = args->v0.pushbuf; + wndw = args->v0.index; + } else + return ret; + + return nv50_disp_dmac_new_(func, mthd, disp, chid + wndw, + wndw, push, oclass, pobject); +} + +int +gv100_disp_wimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return gv100_disp_wimm_new_(&gv100_disp_wimm, NULL, disp, 33, + oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c new file mode 100644 index 000000000000..98911805aabf --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c @@ -0,0 +1,184 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "channv50.h" + +#include <core/client.h> + +#include <nvif/clc37e.h> +#include <nvif/unpack.h> + +static const struct nv50_disp_mthd_list +gv100_disp_wndw_mthd_base = { + .mthd = 0x0000, + .addr = 0x000000, + .data = { + { 0x0200, 0x690200 }, + { 0x020c, 0x69020c }, + { 0x0210, 0x690210 }, + { 0x0214, 0x690214 }, + { 0x0218, 0x690218 }, + { 0x021c, 0x69021c }, + { 0x0220, 0x690220 }, + { 0x0224, 0x690224 }, + { 0x0228, 0x690228 }, + { 0x022c, 0x69022c }, + { 0x0230, 0x690230 }, + { 0x0234, 0x690234 }, + { 0x0238, 0x690238 }, + { 0x0240, 0x690240 }, + { 0x0244, 0x690244 }, + { 0x0248, 0x690248 }, + { 0x024c, 0x69024c }, + { 0x0250, 0x690250 }, + { 0x0254, 0x690254 }, + { 0x0260, 0x690260 }, + { 0x0264, 0x690264 }, + { 0x0268, 0x690268 }, + { 0x026c, 0x69026c }, + { 0x0270, 0x690270 }, + { 0x0274, 0x690274 }, + { 0x0280, 0x690280 }, + { 0x0284, 0x690284 }, + { 0x0288, 0x690288 }, + { 0x028c, 0x69028c }, + { 0x0290, 0x690290 }, + { 0x0298, 0x690298 }, + { 0x029c, 0x69029c }, + { 0x02a0, 0x6902a0 }, + { 0x02a4, 0x6902a4 }, + { 0x02a8, 0x6902a8 }, + { 0x02ac, 0x6902ac }, + { 0x02b0, 0x6902b0 }, + { 0x02b4, 0x6902b4 }, + { 0x02b8, 0x6902b8 }, + { 0x02bc, 0x6902bc }, + { 0x02c0, 0x6902c0 }, + { 0x02c4, 0x6902c4 }, + { 0x02c8, 0x6902c8 }, + { 0x02cc, 0x6902cc }, + { 0x02d0, 0x6902d0 }, + { 0x02d4, 0x6902d4 }, + { 0x02d8, 0x6902d8 }, + { 0x02dc, 0x6902dc }, + { 0x02e0, 0x6902e0 }, + { 0x02e4, 0x6902e4 }, + { 0x02e8, 0x6902e8 }, + { 0x02ec, 0x6902ec }, + { 0x02f0, 0x6902f0 }, + { 0x02f4, 0x6902f4 }, + { 0x02f8, 0x6902f8 }, + { 0x02fc, 0x6902fc }, + { 0x0300, 0x690300 }, + { 0x0304, 0x690304 }, + { 0x0308, 0x690308 }, + { 0x0310, 0x690310 }, + { 0x0314, 0x690314 }, + { 0x0318, 0x690318 }, + { 0x031c, 0x69031c }, + { 0x0320, 0x690320 }, + { 0x0324, 0x690324 }, + { 0x0328, 0x690328 }, + { 0x032c, 0x69032c }, + { 0x033c, 0x69033c }, + { 0x0340, 0x690340 }, + { 0x0344, 0x690344 }, + { 0x0348, 0x690348 }, + { 0x034c, 0x69034c }, + { 0x0350, 0x690350 }, + { 0x0354, 0x690354 }, + { 0x0358, 0x690358 }, + { 0x0364, 0x690364 }, + { 0x0368, 0x690368 }, + { 0x036c, 0x69036c }, + { 0x0370, 0x690370 }, + { 0x0374, 0x690374 }, + { 0x0380, 0x690380 }, + {} + } +}; + +const struct nv50_disp_chan_mthd +gv100_disp_wndw_mthd = { + .name = "Base", + .addr = 0x001000, + .prev = 0x000800, + .data = { + { "Global", 1, &gv100_disp_wndw_mthd_base }, + {} + } +}; + +static void +gv100_disp_wndw_intr(struct nv50_disp_chan *chan, bool en) +{ + struct nvkm_device *device = chan->disp->base.engine.subdev.device; + const u32 mask = 0x00000001 << chan->head; + const u32 data = en ? mask : 0; + nvkm_mask(device, 0x611da4, mask, data); +} + +const struct nv50_disp_chan_func +gv100_disp_wndw = { + .init = gv100_disp_dmac_init, + .fini = gv100_disp_dmac_fini, + .intr = gv100_disp_wndw_intr, + .user = gv100_disp_chan_user, + .bind = gv100_disp_dmac_bind, +}; + +static int +gv100_disp_wndw_new_(const struct nv50_disp_chan_func *func, + const struct nv50_disp_chan_mthd *mthd, + struct nv50_disp *disp, int chid, + const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) +{ + union { + struct nvc37e_window_channel_dma_v0 v0; + } *args = argv; + struct nvkm_object *parent = oclass->parent; + int wndw, ret = -ENOSYS; + u64 push; + + nvif_ioctl(parent, "create window channel dma size %d\n", argc); + if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create window channel dma vers %d " + "pushbuf %016llx index %d\n", + args->v0.version, args->v0.pushbuf, args->v0.index); + if (!(disp->wndw.mask & BIT(args->v0.index))) + return -EINVAL; + push = args->v0.pushbuf; + wndw = args->v0.index; + } else + return ret; + + return nv50_disp_dmac_new_(func, mthd, disp, chid + wndw, + wndw, push, oclass, pobject); +} + +int +gv100_disp_wndw_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nv50_disp *disp, struct nvkm_object **pobject) +{ + return gv100_disp_wndw_new_(&gv100_disp_wndw, &gv100_disp_wndw_mthd, + disp, 1, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild index c4a2ce9b0d71..e96d1f57f9f9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild @@ -3,9 +3,11 @@ nvkm-y += nvkm/engine/dma/nv04.o nvkm-y += nvkm/engine/dma/nv50.o nvkm-y += nvkm/engine/dma/gf100.o nvkm-y += nvkm/engine/dma/gf119.o +nvkm-y += nvkm/engine/dma/gv100.o nvkm-y += nvkm/engine/dma/user.o nvkm-y += nvkm/engine/dma/usernv04.o nvkm-y += nvkm/engine/dma/usernv50.o nvkm-y += nvkm/engine/dma/usergf100.o nvkm-y += nvkm/engine/dma/usergf119.o +nvkm-y += nvkm/engine/dma/usergv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/gv100.c index 5ad5d0f5db05..c65a4c2ea93d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/gv100.c @@ -1,5 +1,5 @@ /* - * Copyright 2012 Red Hat Inc. + * Copyright 2018 Red Hat Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -18,20 +18,17 @@ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs */ -#include "channv50.h" -#include "rootnv50.h" - -#include <nvif/class.h> +#include "priv.h" +#include "user.h" -const struct nv50_disp_pioc_oclass -g84_disp_oimm_oclass = { - .base.oclass = G82_DISP_OVERLAY, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_disp_oimm_new, - .func = &nv50_disp_pioc_func, - .chid = { 5, 5 }, +static const struct nvkm_dma_func +gv100_dma = { + .class_new = gv100_dmaobj_new, }; + +int +gv100_dma_new(struct nvkm_device *device, int index, struct nvkm_dma **pdma) +{ + return nvkm_dma_new_(&gv100_dma, device, index, pdma); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h index 4bbac8a21c71..9fe01fd75474 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h @@ -16,4 +16,6 @@ int gf100_dmaobj_new(struct nvkm_dma *, const struct nvkm_oclass *, void *, u32, struct nvkm_dmaobj **); int gf119_dmaobj_new(struct nvkm_dma *, const struct nvkm_oclass *, void *, u32, struct nvkm_dmaobj **); +int gv100_dmaobj_new(struct nvkm_dma *, const struct nvkm_oclass *, void *, u32, + struct nvkm_dmaobj **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/usergv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usergv100.c new file mode 100644 index 000000000000..39eba9fc82be --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usergv100.c @@ -0,0 +1,119 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define gv100_dmaobj(p) container_of((p), struct gv100_dmaobj, base) +#include "user.h" + +#include <core/client.h> +#include <core/gpuobj.h> +#include <subdev/fb.h> + +#include <nvif/cl0002.h> +#include <nvif/unpack.h> + +struct gv100_dmaobj { + struct nvkm_dmaobj base; + u32 flags0; +}; + +static int +gv100_dmaobj_bind(struct nvkm_dmaobj *base, struct nvkm_gpuobj *parent, + int align, struct nvkm_gpuobj **pgpuobj) +{ + struct gv100_dmaobj *dmaobj = gv100_dmaobj(base); + struct nvkm_device *device = dmaobj->base.dma->engine.subdev.device; + u64 start = dmaobj->base.start >> 8; + u64 limit = dmaobj->base.limit >> 8; + int ret; + + ret = nvkm_gpuobj_new(device, 24, align, false, parent, pgpuobj); + if (ret == 0) { + nvkm_kmap(*pgpuobj); + nvkm_wo32(*pgpuobj, 0x00, dmaobj->flags0); + nvkm_wo32(*pgpuobj, 0x04, lower_32_bits(start)); + nvkm_wo32(*pgpuobj, 0x08, upper_32_bits(start)); + nvkm_wo32(*pgpuobj, 0x0c, lower_32_bits(limit)); + nvkm_wo32(*pgpuobj, 0x10, upper_32_bits(limit)); + nvkm_done(*pgpuobj); + } + + return ret; +} + +static const struct nvkm_dmaobj_func +gv100_dmaobj_func = { + .bind = gv100_dmaobj_bind, +}; + +int +gv100_dmaobj_new(struct nvkm_dma *dma, const struct nvkm_oclass *oclass, + void *data, u32 size, struct nvkm_dmaobj **pdmaobj) +{ + union { + struct gf119_dma_v0 v0; + } *args; + struct nvkm_object *parent = oclass->parent; + struct gv100_dmaobj *dmaobj; + u32 kind, page; + int ret; + + if (!(dmaobj = kzalloc(sizeof(*dmaobj), GFP_KERNEL))) + return -ENOMEM; + *pdmaobj = &dmaobj->base; + + ret = nvkm_dmaobj_ctor(&gv100_dmaobj_func, dma, oclass, + &data, &size, &dmaobj->base); + if (ret) + return ret; + + ret = -ENOSYS; + args = data; + + nvif_ioctl(parent, "create gv100 dma size %d\n", size); + if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + nvif_ioctl(parent, + "create gv100 dma vers %d page %d kind %02x\n", + args->v0.version, args->v0.page, args->v0.kind); + kind = args->v0.kind != 0; + page = args->v0.page != 0; + } else + if (size == 0) { + kind = 0; + page = GF119_DMA_V0_PAGE_SP; + } else + return ret; + + if (kind) + dmaobj->flags0 |= 0x00100000; + if (page) + dmaobj->flags0 |= 0x00000040; + dmaobj->flags0 |= 0x00000004; /* rw */ + + switch (dmaobj->base.target) { + case NV_MEM_TARGET_VRAM : dmaobj->flags0 |= 0x00000001; break; + case NV_MEM_TARGET_PCI : dmaobj->flags0 |= 0x00000002; break; + case NV_MEM_TARGET_PCI_NOSNOOP: dmaobj->flags0 |= 0x00000003; break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild index 64e51838edf8..f00408577a6a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild @@ -15,6 +15,7 @@ nvkm-y += nvkm/engine/fifo/gm200.o nvkm-y += nvkm/engine/fifo/gm20b.o nvkm-y += nvkm/engine/fifo/gp100.o nvkm-y += nvkm/engine/fifo/gp10b.o +nvkm-y += nvkm/engine/fifo/gv100.o nvkm-y += nvkm/engine/fifo/chan.o nvkm-y += nvkm/engine/fifo/channv50.o @@ -31,6 +32,6 @@ nvkm-y += nvkm/engine/fifo/gpfifonv50.o nvkm-y += nvkm/engine/fifo/gpfifog84.o nvkm-y += nvkm/engine/fifo/gpfifogf100.o nvkm-y += nvkm/engine/fifo/gpfifogk104.o -nvkm-y += nvkm/engine/fifo/gpfifogk110.o -nvkm-y += nvkm/engine/fifo/gpfifogm200.o -nvkm-y += nvkm/engine/fifo/gpfifogp100.o +nvkm-y += nvkm/engine/fifo/gpfifogv100.o + +nvkm-y += nvkm/engine/fifo/usergv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c index 64f6b7654a08..c773caf21f6b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c @@ -30,6 +30,7 @@ #include <subdev/mc.h> #include <nvif/event.h> +#include <nvif/cl0080.h> #include <nvif/unpack.h> void @@ -56,6 +57,12 @@ nvkm_fifo_start(struct nvkm_fifo *fifo, unsigned long *flags) } void +nvkm_fifo_fault(struct nvkm_fifo *fifo, struct nvkm_fault_data *info) +{ + return fifo->func->fault(fifo, info); +} + +void nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags, struct nvkm_fifo_chan **pchan) { @@ -209,6 +216,20 @@ nvkm_fifo_uevent(struct nvkm_fifo *fifo) } static int +nvkm_fifo_class_new_(struct nvkm_device *device, + const struct nvkm_oclass *oclass, void *data, u32 size, + struct nvkm_object **pobject) +{ + struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine); + return fifo->func->class_new(fifo, oclass, data, size, pobject); +} + +static const struct nvkm_device_oclass +nvkm_fifo_class_ = { + .ctor = nvkm_fifo_class_new_, +}; + +static int nvkm_fifo_class_new(struct nvkm_device *device, const struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) @@ -232,13 +253,9 @@ nvkm_fifo_class_get(struct nvkm_oclass *oclass, int index, int c = 0; if (fifo->func->class_get) { - int ret = fifo->func->class_get(fifo, index, &sclass); - if (ret == 0) { - oclass->base = sclass->base; - oclass->engn = sclass; - *class = &nvkm_fifo_class; - return 0; - } + int ret = fifo->func->class_get(fifo, index, oclass); + if (ret == 0) + *class = &nvkm_fifo_class_; return ret; } @@ -271,6 +288,20 @@ nvkm_fifo_fini(struct nvkm_engine *engine, bool suspend) } static int +nvkm_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data) +{ + struct nvkm_fifo *fifo = nvkm_fifo(engine); + switch (mthd) { + case NV_DEVICE_FIFO_CHANNELS: *data = fifo->nr; return 0; + default: + if (fifo->func->info) + return fifo->func->info(fifo, mthd, data); + break; + } + return -ENOSYS; +} + +static int nvkm_fifo_oneinit(struct nvkm_engine *engine) { struct nvkm_fifo *fifo = nvkm_fifo(engine); @@ -311,6 +342,7 @@ nvkm_fifo = { .dtor = nvkm_fifo_dtor, .preinit = nvkm_fifo_preinit, .oneinit = nvkm_fifo_oneinit, + .info = nvkm_fifo_info, .init = nvkm_fifo_init, .fini = nvkm_fifo_fini, .intr = nvkm_fifo_intr, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h new file mode 100644 index 000000000000..d0ac60b06720 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h @@ -0,0 +1,11 @@ +#ifndef __NVKM_FIFO_CGRP_H__ +#define __NVKM_FIFO_CGRP_H__ +#include "priv.h" + +struct nvkm_fifo_cgrp { + int id; + struct list_head head; + struct list_head chan; + int chan_nr; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h index 1208e3d9dbe2..8e28ba6b2307 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h @@ -10,6 +10,7 @@ struct gk104_fifo_chan { struct gk104_fifo *fifo; int runl; + struct nvkm_fifo_cgrp *cgrp; struct list_head head; bool killed; @@ -19,11 +20,20 @@ struct gk104_fifo_chan { } engn[NVKM_SUBDEV_NR]; }; -int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *, +extern const struct nvkm_fifo_chan_func gk104_fifo_gpfifo_func; + +int gk104_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *, void *data, u32 size, struct nvkm_object **); +void *gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *); +void gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *); +void gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *); +int gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *, struct nvkm_engine *, + struct nvkm_object *); +void gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *, + struct nvkm_engine *); +int gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *); +int gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *); -extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass; -extern const struct nvkm_fifo_chan_oclass gk110_fifo_gpfifo_oclass; -extern const struct nvkm_fifo_chan_oclass gm200_fifo_gpfifo_oclass; -extern const struct nvkm_fifo_chan_oclass gp100_fifo_gpfifo_oclass; +int gv100_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *, + void *data, u32 size, struct nvkm_object **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index 84bd703dd897..a99046414a18 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -22,16 +22,19 @@ * Authors: Ben Skeggs */ #include "gk104.h" +#include "cgrp.h" #include "changk104.h" #include <core/client.h> #include <core/gpuobj.h> #include <subdev/bar.h> +#include <subdev/fault.h> #include <subdev/timer.h> #include <subdev/top.h> #include <engine/sw.h> #include <nvif/class.h> +#include <nvif/cl0080.h> struct gk104_fifo_engine_status { bool busy; @@ -93,15 +96,39 @@ gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn, } static int +gk104_fifo_class_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, + void *argv, u32 argc, struct nvkm_object **pobject) +{ + struct gk104_fifo *fifo = gk104_fifo(base); + if (oclass->engn == &fifo->func->chan) { + const struct gk104_fifo_chan_user *user = oclass->engn; + return user->ctor(fifo, oclass, argv, argc, pobject); + } else + if (oclass->engn == &fifo->func->user) { + const struct gk104_fifo_user_user *user = oclass->engn; + return user->ctor(oclass, argv, argc, pobject); + } + WARN_ON(1); + return -EINVAL; +} + +static int gk104_fifo_class_get(struct nvkm_fifo *base, int index, - const struct nvkm_fifo_chan_oclass **psclass) + struct nvkm_oclass *oclass) { struct gk104_fifo *fifo = gk104_fifo(base); int c = 0; - while ((*psclass = fifo->func->chan[c])) { - if (c++ == index) - return 0; + if (fifo->func->user.ctor && c++ == index) { + oclass->base = fifo->func->user.user; + oclass->engn = &fifo->func->user; + return 0; + } + + if (fifo->func->chan.ctor && c++ == index) { + oclass->base = fifo->func->chan.user; + oclass->engn = &fifo->func->chan; + return 0; } return c; @@ -124,10 +151,12 @@ gk104_fifo_uevent_init(struct nvkm_fifo *fifo) void gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl) { + const struct gk104_fifo_runlist_func *func = fifo->func->runlist; struct gk104_fifo_chan *chan; struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; struct nvkm_memory *mem; + struct nvkm_fifo_cgrp *cgrp; int nr = 0; int target; @@ -137,9 +166,14 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl) nvkm_kmap(mem); list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { - nvkm_wo32(mem, (nr * 8) + 0, chan->base.chid); - nvkm_wo32(mem, (nr * 8) + 4, 0x00000000); - nr++; + func->chan(chan, mem, nr++ * func->size); + } + + list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) { + func->cgrp(cgrp, mem, nr++ * func->size); + list_for_each_entry(chan, &cgrp->chan, head) { + func->chan(chan, mem, nr++ * func->size); + } } nvkm_done(mem); @@ -155,10 +189,10 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl) (target << 28)); nvkm_wr32(device, 0x002274, (runl << 20) | nr); - if (wait_event_timeout(fifo->runlist[runl].wait, - !(nvkm_rd32(device, 0x002284 + (runl * 0x08)) - & 0x00100000), - msecs_to_jiffies(2000)) == 0) + if (nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x002284 + (runl * 0x08)) & 0x00100000)) + break; + ) < 0) nvkm_error(subdev, "runlist %d update timeout\n", runl); unlock: mutex_unlock(&subdev->mutex); @@ -167,19 +201,45 @@ unlock: void gk104_fifo_runlist_remove(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan) { + struct nvkm_fifo_cgrp *cgrp = chan->cgrp; mutex_lock(&fifo->base.engine.subdev.mutex); - list_del_init(&chan->head); + if (!list_empty(&chan->head)) { + list_del_init(&chan->head); + if (cgrp && !--cgrp->chan_nr) + list_del_init(&cgrp->head); + } mutex_unlock(&fifo->base.engine.subdev.mutex); } void gk104_fifo_runlist_insert(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan) { + struct nvkm_fifo_cgrp *cgrp = chan->cgrp; mutex_lock(&fifo->base.engine.subdev.mutex); - list_add_tail(&chan->head, &fifo->runlist[chan->runl].chan); + if (cgrp) { + if (!cgrp->chan_nr++) + list_add_tail(&cgrp->head, &fifo->runlist[chan->runl].cgrp); + list_add_tail(&chan->head, &cgrp->chan); + } else { + list_add_tail(&chan->head, &fifo->runlist[chan->runl].chan); + } mutex_unlock(&fifo->base.engine.subdev.mutex); } +void +gk104_fifo_runlist_chan(struct gk104_fifo_chan *chan, + struct nvkm_memory *memory, u32 offset) +{ + nvkm_wo32(memory, offset + 0, chan->base.chid); + nvkm_wo32(memory, offset + 4, 0x00000000); +} + +const struct gk104_fifo_runlist_func +gk104_fifo_runlist = { + .size = 8, + .chan = gk104_fifo_runlist_chan, +}; + static void gk104_fifo_recover_work(struct work_struct *w) { @@ -235,6 +295,32 @@ gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl) schedule_work(&fifo->recover.work); } +static struct gk104_fifo_chan * +gk104_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid) +{ + struct gk104_fifo_chan *chan; + struct nvkm_fifo_cgrp *cgrp; + + list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { + if (chan->base.chid == chid) { + list_del_init(&chan->head); + return chan; + } + } + + list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) { + if (cgrp->id == chid) { + chan = list_first_entry(&cgrp->chan, typeof(*chan), head); + list_del_init(&chan->head); + if (!--cgrp->chan_nr) + list_del_init(&cgrp->head); + return chan; + } + } + + return NULL; +} + static void gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid) { @@ -252,13 +338,10 @@ gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid) return; /* Lookup SW state for channel, and mark it as dead. */ - list_for_each_entry(chan, &fifo->runlist[runl].chan, head) { - if (chan->base.chid == chid) { - list_del_init(&chan->head); - chan->killed = true; - nvkm_fifo_kevent(&fifo->base, chid); - break; - } + chan = gk104_fifo_recover_chid(fifo, runl, chid); + if (chan) { + chan->killed = true; + nvkm_fifo_kevent(&fifo->base, chid); } /* Disable channel. */ @@ -347,6 +430,90 @@ gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn) schedule_work(&fifo->recover.work); } +static void +gk104_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info) +{ + struct gk104_fifo *fifo = gk104_fifo(base); + struct nvkm_subdev *subdev = &fifo->base.engine.subdev; + struct nvkm_device *device = subdev->device; + const struct nvkm_enum *er, *ee, *ec, *ea; + struct nvkm_engine *engine = NULL; + struct nvkm_fifo_chan *chan; + unsigned long flags; + char ct[8] = "HUB/", en[16] = ""; + int engn; + + er = nvkm_enum_find(fifo->func->fault.reason, info->reason); + ee = nvkm_enum_find(fifo->func->fault.engine, info->engine); + if (info->hub) { + ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client); + } else { + ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client); + snprintf(ct, sizeof(ct), "GPC%d/", info->gpc); + } + ea = nvkm_enum_find(fifo->func->fault.access, info->access); + + if (ee && ee->data2) { + switch (ee->data2) { + case NVKM_SUBDEV_BAR: + nvkm_mask(device, 0x001704, 0x00000000, 0x00000000); + break; + case NVKM_SUBDEV_INSTMEM: + nvkm_mask(device, 0x001714, 0x00000000, 0x00000000); + break; + case NVKM_ENGINE_IFB: + nvkm_mask(device, 0x001718, 0x00000000, 0x00000000); + break; + default: + engine = nvkm_device_engine(device, ee->data2); + break; + } + } + + if (ee == NULL) { + enum nvkm_devidx engidx = nvkm_top_fault(device, info->engine); + if (engidx < NVKM_SUBDEV_NR) { + const char *src = nvkm_subdev_name[engidx]; + char *dst = en; + do { + *dst++ = toupper(*src++); + } while(*src); + engine = nvkm_device_engine(device, engidx); + } + } else { + snprintf(en, sizeof(en), "%s", ee->name); + } + + spin_lock_irqsave(&fifo->base.lock, flags); + chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst); + + nvkm_error(subdev, + "fault %02x [%s] at %016llx engine %02x [%s] client %02x " + "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n", + info->access, ea ? ea->name : "", info->addr, + info->engine, ee ? ee->name : en, + info->client, ct, ec ? ec->name : "", + info->reason, er ? er->name : "", chan ? chan->chid : -1, + info->inst, chan ? chan->object.client->name : "unknown"); + + /* Kill the channel that caused the fault. */ + if (chan) + gk104_fifo_recover_chan(&fifo->base, chan->chid); + + /* Channel recovery will probably have already done this for the + * correct engine(s), but just in case we can't find the channel + * information... + */ + for (engn = 0; engn < fifo->engine_nr && engine; engn++) { + if (fifo->engine[engn].engine == engine) { + gk104_fifo_recover_engn(fifo, engn); + break; + } + } + + spin_unlock_irqrestore(&fifo->base.lock, flags); +} + static const struct nvkm_enum gk104_fifo_bind_reason[] = { { 0x01, "BIND_NOT_UNBOUND" }, @@ -456,88 +623,21 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit) u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10)); u32 valo = nvkm_rd32(device, 0x002804 + (unit * 0x10)); u32 vahi = nvkm_rd32(device, 0x002808 + (unit * 0x10)); - u32 stat = nvkm_rd32(device, 0x00280c + (unit * 0x10)); - u32 gpc = (stat & 0x1f000000) >> 24; - u32 client = (stat & 0x00001f00) >> 8; - u32 write = (stat & 0x00000080); - u32 hub = (stat & 0x00000040); - u32 reason = (stat & 0x0000000f); - const struct nvkm_enum *er, *eu, *ec; - struct nvkm_engine *engine = NULL; - struct nvkm_fifo_chan *chan; - unsigned long flags; - char gpcid[8] = "", en[16] = ""; - int engn; - - er = nvkm_enum_find(fifo->func->fault.reason, reason); - eu = nvkm_enum_find(fifo->func->fault.engine, unit); - if (hub) { - ec = nvkm_enum_find(fifo->func->fault.hubclient, client); - } else { - ec = nvkm_enum_find(fifo->func->fault.gpcclient, client); - snprintf(gpcid, sizeof(gpcid), "GPC%d/", gpc); - } - - if (eu && eu->data2) { - switch (eu->data2) { - case NVKM_SUBDEV_BAR: - nvkm_mask(device, 0x001704, 0x00000000, 0x00000000); - break; - case NVKM_SUBDEV_INSTMEM: - nvkm_mask(device, 0x001714, 0x00000000, 0x00000000); - break; - case NVKM_ENGINE_IFB: - nvkm_mask(device, 0x001718, 0x00000000, 0x00000000); - break; - default: - engine = nvkm_device_engine(device, eu->data2); - break; - } - } - - if (eu == NULL) { - enum nvkm_devidx engidx = nvkm_top_fault(device, unit); - if (engidx < NVKM_SUBDEV_NR) { - const char *src = nvkm_subdev_name[engidx]; - char *dst = en; - do { - *dst++ = toupper(*src++); - } while(*src); - engine = nvkm_device_engine(device, engidx); - } - } else { - snprintf(en, sizeof(en), "%s", eu->name); - } - - spin_lock_irqsave(&fifo->base.lock, flags); - chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12); - - nvkm_error(subdev, - "%s fault at %010llx engine %02x [%s] client %02x [%s%s] " - "reason %02x [%s] on channel %d [%010llx %s]\n", - write ? "write" : "read", (u64)vahi << 32 | valo, - unit, en, client, gpcid, ec ? ec->name : "", - reason, er ? er->name : "", chan ? chan->chid : -1, - (u64)inst << 12, - chan ? chan->object.client->name : "unknown"); - - - /* Kill the channel that caused the fault. */ - if (chan) - gk104_fifo_recover_chan(&fifo->base, chan->chid); - - /* Channel recovery will probably have already done this for the - * correct engine(s), but just in case we can't find the channel - * information... - */ - for (engn = 0; engn < fifo->engine_nr && engine; engn++) { - if (fifo->engine[engn].engine == engine) { - gk104_fifo_recover_engn(fifo, engn); - break; - } - } - - spin_unlock_irqrestore(&fifo->base.lock, flags); + u32 type = nvkm_rd32(device, 0x00280c + (unit * 0x10)); + struct nvkm_fault_data info; + + info.inst = (u64)inst << 12; + info.addr = ((u64)vahi << 32) | valo; + info.time = 0; + info.engine = unit; + info.valid = 1; + info.gpc = (type & 0x1f000000) >> 24; + info.client = (type & 0x00001f00) >> 8; + info.access = (type & 0x00000080) >> 7; + info.hub = (type & 0x00000040) >> 6; + info.reason = (type & 0x000000ff); + + nvkm_fifo_fault(&fifo->base, &info); } static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = { @@ -766,6 +866,34 @@ gk104_fifo_fini(struct nvkm_fifo *base) } static int +gk104_fifo_info(struct nvkm_fifo *base, u64 mthd, u64 *data) +{ + struct gk104_fifo *fifo = gk104_fifo(base); + switch (mthd) { + case NV_DEVICE_FIFO_RUNLISTS: + *data = (1ULL << fifo->runlist_nr) - 1; + return 0; + case NV_DEVICE_FIFO_RUNLIST_ENGINES(0)... + NV_DEVICE_FIFO_RUNLIST_ENGINES(63): { + int runl = mthd - NV_DEVICE_FIFO_RUNLIST_ENGINES(0), engn; + if (runl < fifo->runlist_nr) { + unsigned long engm = fifo->runlist[runl].engm; + struct nvkm_engine *engine; + *data = 0; + for_each_set_bit(engn, &engm, fifo->engine_nr) { + if ((engine = fifo->engine[engn].engine)) + *data |= BIT_ULL(engine->subdev.index); + } + return 0; + } + } + return -EINVAL; + default: + return -EINVAL; + } +} + +static int gk104_fifo_oneinit(struct nvkm_fifo *base) { struct gk104_fifo *fifo = gk104_fifo(base); @@ -813,19 +941,18 @@ gk104_fifo_oneinit(struct nvkm_fifo *base) kfree(map); for (i = 0; i < fifo->runlist_nr; i++) { - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - 0x8000, 0x1000, false, - &fifo->runlist[i].mem[0]); - if (ret) - return ret; - - ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, - 0x8000, 0x1000, false, - &fifo->runlist[i].mem[1]); - if (ret) - return ret; + for (j = 0; j < ARRAY_SIZE(fifo->runlist[i].mem); j++) { + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, + fifo->base.nr * 2/* TSG+chan */ * + fifo->func->runlist->size, + 0x1000, false, + &fifo->runlist[i].mem[j]); + if (ret) + return ret; + } init_waitqueue_head(&fifo->runlist[i].wait); + INIT_LIST_HEAD(&fifo->runlist[i].cgrp); INIT_LIST_HEAD(&fifo->runlist[i].chan); } @@ -868,6 +995,9 @@ gk104_fifo_init(struct nvkm_fifo *base) nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12); + if (fifo->func->init_pbdma_timeout) + fifo->func->init_pbdma_timeout(fifo); + nvkm_wr32(device, 0x002100, 0xffffffff); nvkm_wr32(device, 0x002140, 0x7fffffff); } @@ -894,13 +1024,16 @@ static const struct nvkm_fifo_func gk104_fifo_ = { .dtor = gk104_fifo_dtor, .oneinit = gk104_fifo_oneinit, + .info = gk104_fifo_info, .init = gk104_fifo_init, .fini = gk104_fifo_fini, .intr = gk104_fifo_intr, + .fault = gk104_fifo_fault, .uevent_init = gk104_fifo_uevent_init, .uevent_fini = gk104_fifo_uevent_fini, .recover_chan = gk104_fifo_recover_chan, .class_get = gk104_fifo_class_get, + .class_new = gk104_fifo_class_new, }; int @@ -919,6 +1052,13 @@ gk104_fifo_new_(const struct gk104_fifo_func *func, struct nvkm_device *device, } const struct nvkm_enum +gk104_fifo_fault_access[] = { + { 0x0, "READ" }, + { 0x1, "WRITE" }, + {} +}; + +const struct nvkm_enum gk104_fifo_fault_engine[] = { { 0x00, "GR", NULL, NVKM_ENGINE_GR }, { 0x01, "DISPLAY" }, @@ -1035,14 +1175,13 @@ gk104_fifo_fault_gpcclient[] = { static const struct gk104_fifo_func gk104_fifo = { + .fault.access = gk104_fifo_fault_access, .fault.engine = gk104_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gk104_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gk104_fifo_runlist, + .chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h index 1579785cf941..d295b81e18d6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h @@ -3,6 +3,7 @@ #define __GK104_FIFO_H__ #define gk104_fifo(p) container_of((p), struct gk104_fifo, base) #include "priv.h" +struct nvkm_fifo_cgrp; #include <core/enum.h> #include <subdev/mmu.h> @@ -31,6 +32,7 @@ struct gk104_fifo { struct nvkm_memory *mem[2]; int next; wait_queue_head_t wait; + struct list_head cgrp; struct list_head chan; u32 engm; } runlist[16]; @@ -43,14 +45,36 @@ struct gk104_fifo { }; struct gk104_fifo_func { + void (*init_pbdma_timeout)(struct gk104_fifo *); + struct { + const struct nvkm_enum *access; const struct nvkm_enum *engine; const struct nvkm_enum *reason; const struct nvkm_enum *hubclient; const struct nvkm_enum *gpcclient; } fault; - const struct nvkm_fifo_chan_oclass *chan[]; + const struct gk104_fifo_runlist_func { + u8 size; + void (*cgrp)(struct nvkm_fifo_cgrp *, + struct nvkm_memory *, u32 offset); + void (*chan)(struct gk104_fifo_chan *, + struct nvkm_memory *, u32 offset); + } *runlist; + + struct gk104_fifo_user_user { + struct nvkm_sclass user; + int (*ctor)(const struct nvkm_oclass *, void *, u32, + struct nvkm_object **); + } user; + + struct gk104_fifo_chan_user { + struct nvkm_sclass user; + int (*ctor)(struct gk104_fifo *, const struct nvkm_oclass *, + void *, u32, struct nvkm_object **); + } chan; + bool cgrp_force; }; int gk104_fifo_new_(const struct gk104_fifo_func *, struct nvkm_device *, @@ -59,30 +83,23 @@ void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *); void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *); void gk104_fifo_runlist_commit(struct gk104_fifo *, int runl); -static inline u64 -gk104_fifo_engine_subdev(int engine) -{ - switch (engine) { - case 0: return (1ULL << NVKM_ENGINE_GR) | - (1ULL << NVKM_ENGINE_SW) | - (1ULL << NVKM_ENGINE_CE2); - case 1: return (1ULL << NVKM_ENGINE_MSPDEC); - case 2: return (1ULL << NVKM_ENGINE_MSPPP); - case 3: return (1ULL << NVKM_ENGINE_MSVLD); - case 4: return (1ULL << NVKM_ENGINE_CE0); - case 5: return (1ULL << NVKM_ENGINE_CE1); - case 6: return (1ULL << NVKM_ENGINE_MSENC); - default: - WARN_ON(1); - return 0; - } -} - +extern const struct nvkm_enum gk104_fifo_fault_access[]; extern const struct nvkm_enum gk104_fifo_fault_engine[]; extern const struct nvkm_enum gk104_fifo_fault_reason[]; extern const struct nvkm_enum gk104_fifo_fault_hubclient[]; extern const struct nvkm_enum gk104_fifo_fault_gpcclient[]; +extern const struct gk104_fifo_runlist_func gk104_fifo_runlist; +void gk104_fifo_runlist_chan(struct gk104_fifo_chan *, + struct nvkm_memory *, u32); + +extern const struct gk104_fifo_runlist_func gk110_fifo_runlist; +void gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *, + struct nvkm_memory *, u32); + +void gk208_fifo_init_pbdma_timeout(struct gk104_fifo *); extern const struct nvkm_enum gm107_fifo_fault_engine[]; +extern const struct gk104_fifo_runlist_func gm107_fifo_runlist; + extern const struct nvkm_enum gp100_fifo_fault_engine[]; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c index b2f8ab7bf847..ac7655a130fb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c @@ -22,18 +22,38 @@ * Authors: Ben Skeggs */ #include "gk104.h" +#include "cgrp.h" #include "changk104.h" +#include <core/memory.h> + +#include <nvif/class.h> + +void +gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp, + struct nvkm_memory *memory, u32 offset) +{ + nvkm_wo32(memory, offset + 0, (cgrp->chan_nr << 26) | (128 << 18) | + (3 << 14) | 0x00002000 | cgrp->id); + nvkm_wo32(memory, offset + 4, 0x00000000); +} + +const struct gk104_fifo_runlist_func +gk110_fifo_runlist = { + .size = 8, + .cgrp = gk110_fifo_runlist_cgrp, + .chan = gk104_fifo_runlist_chan, +}; + static const struct gk104_fifo_func gk110_fifo = { + .fault.access = gk104_fifo_fault_access, .fault.engine = gk104_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gk110_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gk110_fifo_runlist, + .chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c index 160617d376e4..5ea7e452cc66 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c @@ -24,16 +24,28 @@ #include "gk104.h" #include "changk104.h" +#include <nvif/class.h> + +void +gk208_fifo_init_pbdma_timeout(struct gk104_fifo *fifo) +{ + struct nvkm_device *device = fifo->base.engine.subdev.device; + int i; + + for (i = 0; i < fifo->pbdma_nr; i++) + nvkm_wr32(device, 0x04012c + (i * 0x2000), 0x0000ffff); +} + static const struct gk104_fifo_func gk208_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gk104_fifo_fault_access, .fault.engine = gk104_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gk104_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gk110_fifo_runlist, + .chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c index be9f5c16ed7d..535a0eb67a5f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c @@ -22,16 +22,18 @@ #include "gk104.h" #include "changk104.h" +#include <nvif/class.h> + static const struct gk104_fifo_func gk20a_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gk104_fifo_fault_access, .fault.engine = gk104_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gk104_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gk110_fifo_runlist, + .chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c index 29c080683b32..79ae19b1db67 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c @@ -24,6 +24,25 @@ #include "gk104.h" #include "changk104.h" +#include <core/gpuobj.h> + +#include <nvif/class.h> + +static void +gm107_fifo_runlist_chan(struct gk104_fifo_chan *chan, + struct nvkm_memory *memory, u32 offset) +{ + nvkm_wo32(memory, offset + 0, chan->base.chid); + nvkm_wo32(memory, offset + 4, chan->base.inst->addr >> 12); +} + +const struct gk104_fifo_runlist_func +gm107_fifo_runlist = { + .size = 8, + .cgrp = gk110_fifo_runlist_cgrp, + .chan = gm107_fifo_runlist_chan, +}; + const struct nvkm_enum gm107_fifo_fault_engine[] = { { 0x01, "DISPLAY" }, @@ -49,14 +68,14 @@ gm107_fifo_fault_engine[] = { static const struct gk104_fifo_func gm107_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gk104_fifo_fault_access, .fault.engine = gm107_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gk110_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gm107_fifo_runlist, + .chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c index b069f785c5d8..49565faa854d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c @@ -24,16 +24,18 @@ #include "gk104.h" #include "changk104.h" +#include <nvif/class.h> + static const struct gk104_fifo_func gm200_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gk104_fifo_fault_access, .fault.engine = gm107_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gm200_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gm107_fifo_runlist, + .chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c index 2ed87c2e8299..46736513bd11 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c @@ -22,16 +22,18 @@ #include "gk104.h" #include "changk104.h" +#include <nvif/class.h> + static const struct gk104_fifo_func gm20b_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gk104_fifo_fault_access, .fault.engine = gm107_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gm200_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gm107_fifo_runlist, + .chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c index 41f16cf5a918..e2f8f9087d7c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c @@ -24,6 +24,8 @@ #include "gk104.h" #include "changk104.h" +#include <nvif/class.h> + const struct nvkm_enum gp100_fifo_fault_engine[] = { { 0x01, "DISPLAY" }, @@ -50,14 +52,15 @@ gp100_fifo_fault_engine[] = { static const struct gk104_fifo_func gp100_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gk104_fifo_fault_access, .fault.engine = gp100_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gp100_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gm107_fifo_runlist, + .chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new }, + .cgrp_force = true, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c index 4af96c3e69ff..7733bf7c6545 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c @@ -22,16 +22,19 @@ #include "gk104.h" #include "changk104.h" +#include <nvif/class.h> + static const struct gk104_fifo_func gp10b_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gk104_fifo_fault_access, .fault.engine = gp100_fifo_fault_engine, .fault.reason = gk104_fifo_fault_reason, .fault.hubclient = gk104_fifo_fault_hubclient, .fault.gpcclient = gk104_fifo_fault_gpcclient, - .chan = { - &gp100_fifo_gpfifo_oclass, - NULL - }, + .runlist = &gm107_fifo_runlist, + .chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new }, + .cgrp_force = true, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index 80c87521bebe..118b37aea318 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "changk104.h" +#include "cgrp.h" #include <core/client.h> #include <core/gpuobj.h> @@ -33,27 +34,40 @@ #include <nvif/cla06f.h> #include <nvif/unpack.h> -static int -gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) +int +gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *chan) { struct gk104_fifo *fifo = chan->fifo; struct nvkm_subdev *subdev = &fifo->base.engine.subdev; struct nvkm_device *device = subdev->device; struct nvkm_client *client = chan->base.object.client; + struct nvkm_fifo_cgrp *cgrp = chan->cgrp; int ret = 0; - mutex_lock(&subdev->mutex); - nvkm_wr32(device, 0x002634, chan->base.chid); + if (cgrp) + nvkm_wr32(device, 0x002634, cgrp->id | 0x01000000); + else + nvkm_wr32(device, 0x002634, chan->base.chid); if (nvkm_msec(device, 2000, if (!(nvkm_rd32(device, 0x002634) & 0x00100000)) break; ) < 0) { - nvkm_error(subdev, "channel %d [%s] kick timeout\n", - chan->base.chid, client->name); + nvkm_error(subdev, "%s %d [%s] kick timeout\n", + cgrp ? "tsg" : "channel", + cgrp ? cgrp->id : chan->base.chid, client->name); nvkm_fifo_recover_chan(&fifo->base, chan->base.chid); ret = -ETIMEDOUT; } - mutex_unlock(&subdev->mutex); + return ret; +} + +int +gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan) +{ + int ret; + mutex_lock(&chan->base.fifo->engine.subdev.mutex); + ret = gk104_fifo_gpfifo_kick_locked(chan); + mutex_unlock(&chan->base.fifo->engine.subdev.mutex); return ret; } @@ -62,9 +76,8 @@ gk104_fifo_gpfifo_engine_addr(struct nvkm_engine *engine) { switch (engine->subdev.index) { case NVKM_ENGINE_SW : - case NVKM_ENGINE_CE0 : - case NVKM_ENGINE_CE1 : - case NVKM_ENGINE_CE2 : return 0x0000; + case NVKM_ENGINE_CE0...NVKM_ENGINE_CE_LAST: + return 0; case NVKM_ENGINE_GR : return 0x0210; case NVKM_ENGINE_SEC : return 0x0220; case NVKM_ENGINE_MSPDEC: return 0x0250; @@ -133,7 +146,7 @@ gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base, return 0; } -static void +void gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base, struct nvkm_engine *engine) { @@ -142,7 +155,7 @@ gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base, nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst); } -static int +int gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base, struct nvkm_engine *engine, struct nvkm_object *object) @@ -167,7 +180,7 @@ gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base, chan->engn[engn].vma, NULL, 0); } -static void +void gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base) { struct gk104_fifo_chan *chan = gk104_fifo_chan(base); @@ -185,7 +198,7 @@ gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base) nvkm_wr32(device, 0x800000 + coff, 0x00000000); } -static void +void gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base) { struct gk104_fifo_chan *chan = gk104_fifo_chan(base); @@ -205,13 +218,15 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base) } } -static void * +void * gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base) { - return gk104_fifo_chan(base); + struct gk104_fifo_chan *chan = gk104_fifo_chan(base); + kfree(chan->cgrp); + return chan; } -static const struct nvkm_fifo_chan_func +const struct nvkm_fifo_chan_func gk104_fifo_gpfifo_func = { .dtor = gk104_fifo_gpfifo_dtor, .init = gk104_fifo_gpfifo_init, @@ -223,62 +238,30 @@ gk104_fifo_gpfifo_func = { .engine_fini = gk104_fifo_gpfifo_engine_fini, }; -struct gk104_fifo_chan_func { - u32 engine; - u64 subdev; -}; - static int -gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, - struct gk104_fifo *fifo, u32 *engmask, u16 *chid, +gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid, u64 vmm, u64 ioffset, u64 ilength, const struct nvkm_oclass *oclass, struct nvkm_object **pobject) { struct gk104_fifo_chan *chan; - int runlist = -1, ret = -ENOSYS, i, j; - u32 engines = 0, present = 0; + int runlist = ffs(*runlists) -1, ret, i; + unsigned long engm; u64 subdevs = 0; u64 usermem; - if (!vmm) + if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr) return -EINVAL; + *runlists = BIT_ULL(runlist); - /* Determine which downstream engines are present */ - for (i = 0; i < fifo->engine_nr; i++) { - struct nvkm_engine *engine = fifo->engine[i].engine; - if (engine) { - u64 submask = BIT_ULL(engine->subdev.index); - for (j = 0; func[j].subdev; j++) { - if (func[j].subdev & submask) { - present |= func[j].engine; - break; - } - } - - if (!func[j].subdev) - continue; - - if (runlist < 0 && (*engmask & present)) - runlist = fifo->engine[i].runl; - if (runlist == fifo->engine[i].runl) { - engines |= func[j].engine; - subdevs |= func[j].subdev; - } - } - } - - /* Just an engine mask query? All done here! */ - if (!*engmask) { - *engmask = present; - return nvkm_object_new(oclass, NULL, 0, pobject); + engm = fifo->runlist[runlist].engm; + for_each_set_bit(i, &engm, fifo->engine_nr) { + if (fifo->engine[i].engine) + subdevs |= BIT_ULL(fifo->engine[i].engine->subdev.index); } - /* No runlist? No supported engines. */ - *engmask = present; - if (runlist < 0) - return -ENODEV; - *engmask = engines; + if (subdevs & BIT_ULL(NVKM_ENGINE_GR)) + subdevs |= BIT_ULL(NVKM_ENGINE_SW); /* Allocate the channel. */ if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) @@ -297,6 +280,18 @@ gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, *chid = chan->base.chid; + /* Hack to support GPUs where even individual channels should be + * part of a channel group. + */ + if (fifo->func->cgrp_force) { + if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL))) + return -ENOMEM; + chan->cgrp->id = chan->base.chid; + INIT_LIST_HEAD(&chan->cgrp->head); + INIT_LIST_HEAD(&chan->cgrp->chan); + chan->cgrp->chan_nr = 0; + } + /* Clear channel control registers. */ usermem = chan->base.chid * 0x200; ilength = order_base_2(ilength / 8); @@ -328,45 +323,25 @@ gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func, return 0; } -static const struct gk104_fifo_chan_func -gk104_fifo_gpfifo[] = { - { NVA06F_V0_ENGINE_SW | NVA06F_V0_ENGINE_GR, - BIT_ULL(NVKM_ENGINE_SW) | BIT_ULL(NVKM_ENGINE_GR) - }, - { NVA06F_V0_ENGINE_SEC , BIT_ULL(NVKM_ENGINE_SEC ) }, - { NVA06F_V0_ENGINE_MSVLD , BIT_ULL(NVKM_ENGINE_MSVLD ) }, - { NVA06F_V0_ENGINE_MSPDEC, BIT_ULL(NVKM_ENGINE_MSPDEC) }, - { NVA06F_V0_ENGINE_MSPPP , BIT_ULL(NVKM_ENGINE_MSPPP ) }, - { NVA06F_V0_ENGINE_MSENC , BIT_ULL(NVKM_ENGINE_MSENC ) }, - { NVA06F_V0_ENGINE_VIC , BIT_ULL(NVKM_ENGINE_VIC ) }, - { NVA06F_V0_ENGINE_NVDEC , BIT_ULL(NVKM_ENGINE_NVDEC ) }, - { NVA06F_V0_ENGINE_NVENC0, BIT_ULL(NVKM_ENGINE_NVENC0) }, - { NVA06F_V0_ENGINE_NVENC1, BIT_ULL(NVKM_ENGINE_NVENC1) }, - { NVA06F_V0_ENGINE_CE0 , BIT_ULL(NVKM_ENGINE_CE0 ) }, - { NVA06F_V0_ENGINE_CE1 , BIT_ULL(NVKM_ENGINE_CE1 ) }, - { NVA06F_V0_ENGINE_CE2 , BIT_ULL(NVKM_ENGINE_CE2 ) }, - {} -}; - int -gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, +gk104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, void *data, u32 size, struct nvkm_object **pobject) { struct nvkm_object *parent = oclass->parent; union { struct kepler_channel_gpfifo_a_v0 v0; } *args = data; - struct gk104_fifo *fifo = gk104_fifo(base); int ret = -ENOSYS; nvif_ioctl(parent, "create channel gpfifo size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " - "ioffset %016llx ilength %08x engine %08x\n", + "ioffset %016llx ilength %08x " + "runlist %016llx\n", args->v0.version, args->v0.vmm, args->v0.ioffset, - args->v0.ilength, args->v0.engines); - return gk104_fifo_gpfifo_new_(gk104_fifo_gpfifo, fifo, - &args->v0.engines, + args->v0.ilength, args->v0.runlist); + return gk104_fifo_gpfifo_new_(fifo, + &args->v0.runlist, &args->v0.chid, args->v0.vmm, args->v0.ioffset, @@ -376,11 +351,3 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, return ret; } - -const struct nvkm_fifo_chan_oclass -gk104_fifo_gpfifo_oclass = { - .base.oclass = KEPLER_CHANNEL_GPFIFO_A, - .base.minver = 0, - .base.maxver = 0, - .ctor = gk104_fifo_gpfifo_new, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c new file mode 100644 index 000000000000..9598853ced56 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c @@ -0,0 +1,225 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "changk104.h" +#include "cgrp.h" + +#include <core/client.h> +#include <core/gpuobj.h> + +#include <nvif/cla06f.h> +#include <nvif/unpack.h> + +static int +gv100_fifo_gpfifo_engine_valid(struct gk104_fifo_chan *chan, bool ce, bool valid) +{ + struct nvkm_subdev *subdev = &chan->base.fifo->engine.subdev; + struct nvkm_device *device = subdev->device; + const u32 mask = ce ? 0x00020000 : 0x00010000; + const u32 data = valid ? mask : 0x00000000; + int ret; + + /* Block runlist to prevent the channel from being rescheduled. */ + mutex_lock(&subdev->mutex); + nvkm_mask(device, 0x002630, BIT(chan->runl), BIT(chan->runl)); + + /* Preempt the channel. */ + ret = gk104_fifo_gpfifo_kick_locked(chan); + if (ret == 0) { + /* Update engine context validity. */ + nvkm_kmap(chan->base.inst); + nvkm_mo32(chan->base.inst, 0x0ac, mask, data); + nvkm_done(chan->base.inst); + } + + /* Resume runlist. */ + nvkm_mask(device, 0x002630, BIT(chan->runl), 0); + mutex_unlock(&subdev->mutex); + return ret; +} + +static int +gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base, + struct nvkm_engine *engine, bool suspend) +{ + struct gk104_fifo_chan *chan = gk104_fifo_chan(base); + struct nvkm_gpuobj *inst = chan->base.inst; + int ret; + + if (engine->subdev.index >= NVKM_ENGINE_CE0 && + engine->subdev.index <= NVKM_ENGINE_CE_LAST) + return gk104_fifo_gpfifo_kick(chan); + + ret = gv100_fifo_gpfifo_engine_valid(chan, false, false); + if (ret && suspend) + return ret; + + nvkm_kmap(inst); + nvkm_wo32(inst, 0x0210, 0x00000000); + nvkm_wo32(inst, 0x0214, 0x00000000); + nvkm_done(inst); + return ret; +} + +static int +gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base, + struct nvkm_engine *engine) +{ + struct gk104_fifo_chan *chan = gk104_fifo_chan(base); + struct nvkm_gpuobj *inst = chan->base.inst; + u64 addr; + + if (engine->subdev.index >= NVKM_ENGINE_CE0 && + engine->subdev.index <= NVKM_ENGINE_CE_LAST) + return 0; + + addr = chan->engn[engine->subdev.index].vma->addr; + nvkm_kmap(inst); + nvkm_wo32(inst, 0x210, lower_32_bits(addr) | 0x00000004); + nvkm_wo32(inst, 0x214, upper_32_bits(addr)); + nvkm_done(inst); + + return gv100_fifo_gpfifo_engine_valid(chan, false, true); +} + +const struct nvkm_fifo_chan_func +gv100_fifo_gpfifo_func = { + .dtor = gk104_fifo_gpfifo_dtor, + .init = gk104_fifo_gpfifo_init, + .fini = gk104_fifo_gpfifo_fini, + .ntfy = gf100_fifo_chan_ntfy, + .engine_ctor = gk104_fifo_gpfifo_engine_ctor, + .engine_dtor = gk104_fifo_gpfifo_engine_dtor, + .engine_init = gv100_fifo_gpfifo_engine_init, + .engine_fini = gv100_fifo_gpfifo_engine_fini, +}; + +static int +gv100_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid, + u64 vmm, u64 ioffset, u64 ilength, + const struct nvkm_oclass *oclass, + struct nvkm_object **pobject) +{ + struct gk104_fifo_chan *chan; + int runlist = ffs(*runlists) -1, ret, i; + unsigned long engm; + u64 subdevs = 0; + u64 usermem; + + if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr) + return -EINVAL; + *runlists = BIT_ULL(runlist); + + engm = fifo->runlist[runlist].engm; + for_each_set_bit(i, &engm, fifo->engine_nr) { + if (fifo->engine[i].engine) + subdevs |= BIT_ULL(fifo->engine[i].engine->subdev.index); + } + + /* Allocate the channel. */ + if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) + return -ENOMEM; + *pobject = &chan->base.object; + chan->fifo = fifo; + chan->runl = runlist; + INIT_LIST_HEAD(&chan->head); + + ret = nvkm_fifo_chan_ctor(&gv100_fifo_gpfifo_func, &fifo->base, + 0x1000, 0x1000, true, vmm, 0, subdevs, + 1, fifo->user.bar->addr, 0x200, + oclass, &chan->base); + if (ret) + return ret; + + *chid = chan->base.chid; + + /* Hack to support GPUs where even individual channels should be + * part of a channel group. + */ + if (fifo->func->cgrp_force) { + if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL))) + return -ENOMEM; + chan->cgrp->id = chan->base.chid; + INIT_LIST_HEAD(&chan->cgrp->head); + INIT_LIST_HEAD(&chan->cgrp->chan); + chan->cgrp->chan_nr = 0; + } + + /* Clear channel control registers. */ + usermem = chan->base.chid * 0x200; + ilength = order_base_2(ilength / 8); + + nvkm_kmap(fifo->user.mem); + for (i = 0; i < 0x200; i += 4) + nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000); + nvkm_done(fifo->user.mem); + usermem = nvkm_memory_addr(fifo->user.mem) + usermem; + + /* RAMFC */ + nvkm_kmap(chan->base.inst); + nvkm_wo32(chan->base.inst, 0x008, lower_32_bits(usermem)); + nvkm_wo32(chan->base.inst, 0x00c, upper_32_bits(usermem)); + nvkm_wo32(chan->base.inst, 0x010, 0x0000face); + nvkm_wo32(chan->base.inst, 0x030, 0x7ffff902); + nvkm_wo32(chan->base.inst, 0x048, lower_32_bits(ioffset)); + nvkm_wo32(chan->base.inst, 0x04c, upper_32_bits(ioffset) | + (ilength << 16)); + nvkm_wo32(chan->base.inst, 0x084, 0x20400000); + nvkm_wo32(chan->base.inst, 0x094, 0x30000001); + nvkm_wo32(chan->base.inst, 0x0e4, 0x00000020); + nvkm_wo32(chan->base.inst, 0x0e8, chan->base.chid); + nvkm_wo32(chan->base.inst, 0x0f4, 0x00001100); + nvkm_wo32(chan->base.inst, 0x0f8, 0x10003080); + nvkm_mo32(chan->base.inst, 0x218, 0x00000000, 0x00000000); + nvkm_wo32(chan->base.inst, 0x220, 0x020a1000); + nvkm_wo32(chan->base.inst, 0x224, 0x00000000); + nvkm_done(chan->base.inst); + return gv100_fifo_gpfifo_engine_valid(chan, true, true); +} + +int +gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, + void *data, u32 size, struct nvkm_object **pobject) +{ + struct nvkm_object *parent = oclass->parent; + union { + struct kepler_channel_gpfifo_a_v0 v0; + } *args = data; + int ret = -ENOSYS; + + nvif_ioctl(parent, "create channel gpfifo size %d\n", size); + if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { + nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx " + "ioffset %016llx ilength %08x " + "runlist %016llx\n", + args->v0.version, args->v0.vmm, args->v0.ioffset, + args->v0.ilength, args->v0.runlist); + return gv100_fifo_gpfifo_new_(fifo, + &args->v0.runlist, + &args->v0.chid, + args->v0.vmm, + args->v0.ioffset, + args->v0.ilength, + oclass, pobject); + } + + return ret; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c new file mode 100644 index 000000000000..4e1d159c0ae7 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c @@ -0,0 +1,306 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gk104.h" +#include "cgrp.h" +#include "changk104.h" +#include "user.h" + +#include <core/gpuobj.h> + +#include <nvif/class.h> + +static void +gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan, + struct nvkm_memory *memory, u32 offset) +{ + struct nvkm_memory *usermem = chan->fifo->user.mem; + const u64 user = nvkm_memory_addr(usermem) + (chan->base.chid * 0x200); + const u64 inst = chan->base.inst->addr; + + nvkm_wo32(memory, offset + 0x0, lower_32_bits(user)); + nvkm_wo32(memory, offset + 0x4, upper_32_bits(user)); + nvkm_wo32(memory, offset + 0x8, lower_32_bits(inst) | chan->base.chid); + nvkm_wo32(memory, offset + 0xc, upper_32_bits(inst)); +} + +static void +gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp, + struct nvkm_memory *memory, u32 offset) +{ + nvkm_wo32(memory, offset + 0x0, (128 << 24) | (3 << 16) | 0x00000001); + nvkm_wo32(memory, offset + 0x4, cgrp->chan_nr); + nvkm_wo32(memory, offset + 0x8, cgrp->id); + nvkm_wo32(memory, offset + 0xc, 0x00000000); +} + +const struct gk104_fifo_runlist_func +gv100_fifo_runlist = { + .size = 16, + .cgrp = gv100_fifo_runlist_cgrp, + .chan = gv100_fifo_runlist_chan, +}; + +static const struct nvkm_enum +gv100_fifo_fault_gpcclient[] = { + { 0x00, "T1_0" }, + { 0x01, "T1_1" }, + { 0x02, "T1_2" }, + { 0x03, "T1_3" }, + { 0x04, "T1_4" }, + { 0x05, "T1_5" }, + { 0x06, "T1_6" }, + { 0x07, "T1_7" }, + { 0x08, "PE_0" }, + { 0x09, "PE_1" }, + { 0x0a, "PE_2" }, + { 0x0b, "PE_3" }, + { 0x0c, "PE_4" }, + { 0x0d, "PE_5" }, + { 0x0e, "PE_6" }, + { 0x0f, "PE_7" }, + { 0x10, "RAST" }, + { 0x11, "GCC" }, + { 0x12, "GPCCS" }, + { 0x13, "PROP_0" }, + { 0x14, "PROP_1" }, + { 0x15, "PROP_2" }, + { 0x16, "PROP_3" }, + { 0x17, "GPM" }, + { 0x18, "LTP_UTLB_0" }, + { 0x19, "LTP_UTLB_1" }, + { 0x1a, "LTP_UTLB_2" }, + { 0x1b, "LTP_UTLB_3" }, + { 0x1c, "LTP_UTLB_4" }, + { 0x1d, "LTP_UTLB_5" }, + { 0x1e, "LTP_UTLB_6" }, + { 0x1f, "LTP_UTLB_7" }, + { 0x20, "RGG_UTLB" }, + { 0x21, "T1_8" }, + { 0x22, "T1_9" }, + { 0x23, "T1_10" }, + { 0x24, "T1_11" }, + { 0x25, "T1_12" }, + { 0x26, "T1_13" }, + { 0x27, "T1_14" }, + { 0x28, "T1_15" }, + { 0x29, "TPCCS_0" }, + { 0x2a, "TPCCS_1" }, + { 0x2b, "TPCCS_2" }, + { 0x2c, "TPCCS_3" }, + { 0x2d, "TPCCS_4" }, + { 0x2e, "TPCCS_5" }, + { 0x2f, "TPCCS_6" }, + { 0x30, "TPCCS_7" }, + { 0x31, "PE_8" }, + { 0x32, "PE_9" }, + { 0x33, "TPCCS_8" }, + { 0x34, "TPCCS_9" }, + { 0x35, "T1_16" }, + { 0x36, "T1_17" }, + { 0x37, "T1_18" }, + { 0x38, "T1_19" }, + { 0x39, "PE_10" }, + { 0x3a, "PE_11" }, + { 0x3b, "TPCCS_10" }, + { 0x3c, "TPCCS_11" }, + { 0x3d, "T1_20" }, + { 0x3e, "T1_21" }, + { 0x3f, "T1_22" }, + { 0x40, "T1_23" }, + { 0x41, "PE_12" }, + { 0x42, "PE_13" }, + { 0x43, "TPCCS_12" }, + { 0x44, "TPCCS_13" }, + { 0x45, "T1_24" }, + { 0x46, "T1_25" }, + { 0x47, "T1_26" }, + { 0x48, "T1_27" }, + { 0x49, "PE_14" }, + { 0x4a, "PE_15" }, + { 0x4b, "TPCCS_14" }, + { 0x4c, "TPCCS_15" }, + { 0x4d, "T1_28" }, + { 0x4e, "T1_29" }, + { 0x4f, "T1_30" }, + { 0x50, "T1_31" }, + { 0x51, "PE_16" }, + { 0x52, "PE_17" }, + { 0x53, "TPCCS_16" }, + { 0x54, "TPCCS_17" }, + { 0x55, "T1_32" }, + { 0x56, "T1_33" }, + { 0x57, "T1_34" }, + { 0x58, "T1_35" }, + { 0x59, "PE_18" }, + { 0x5a, "PE_19" }, + { 0x5b, "TPCCS_18" }, + { 0x5c, "TPCCS_19" }, + { 0x5d, "T1_36" }, + { 0x5e, "T1_37" }, + { 0x5f, "T1_38" }, + { 0x60, "T1_39" }, + {} +}; + +static const struct nvkm_enum +gv100_fifo_fault_hubclient[] = { + { 0x00, "VIP" }, + { 0x01, "CE0" }, + { 0x02, "CE1" }, + { 0x03, "DNISO" }, + { 0x04, "FE" }, + { 0x05, "FECS" }, + { 0x06, "HOST" }, + { 0x07, "HOST_CPU" }, + { 0x08, "HOST_CPU_NB" }, + { 0x09, "ISO" }, + { 0x0a, "MMU" }, + { 0x0b, "NVDEC" }, + { 0x0d, "NVENC1" }, + { 0x0e, "NISO" }, + { 0x0f, "P2P" }, + { 0x10, "PD" }, + { 0x11, "PERF" }, + { 0x12, "PMU" }, + { 0x13, "RASTERTWOD" }, + { 0x14, "SCC" }, + { 0x15, "SCC_NB" }, + { 0x16, "SEC" }, + { 0x17, "SSYNC" }, + { 0x18, "CE2" }, + { 0x19, "XV" }, + { 0x1a, "MMU_NB" }, + { 0x1b, "NVENC0" }, + { 0x1c, "DFALCON" }, + { 0x1d, "SKED" }, + { 0x1e, "AFALCON" }, + { 0x1f, "DONT_CARE" }, + { 0x20, "HSCE0" }, + { 0x21, "HSCE1" }, + { 0x22, "HSCE2" }, + { 0x23, "HSCE3" }, + { 0x24, "HSCE4" }, + { 0x25, "HSCE5" }, + { 0x26, "HSCE6" }, + { 0x27, "HSCE7" }, + { 0x28, "HSCE8" }, + { 0x29, "HSCE9" }, + { 0x2a, "HSHUB" }, + { 0x2b, "PTP_X0" }, + { 0x2c, "PTP_X1" }, + { 0x2d, "PTP_X2" }, + { 0x2e, "PTP_X3" }, + { 0x2f, "PTP_X4" }, + { 0x30, "PTP_X5" }, + { 0x31, "PTP_X6" }, + { 0x32, "PTP_X7" }, + { 0x33, "NVENC2" }, + { 0x34, "VPR_SCRUBBER0" }, + { 0x35, "VPR_SCRUBBER1" }, + { 0x36, "DWBIF" }, + { 0x37, "FBFALCON" }, + { 0x38, "CE_SHIM" }, + { 0x39, "GSP" }, + {} +}; + +static const struct nvkm_enum +gv100_fifo_fault_reason[] = { + { 0x00, "PDE" }, + { 0x01, "PDE_SIZE" }, + { 0x02, "PTE" }, + { 0x03, "VA_LIMIT_VIOLATION" }, + { 0x04, "UNBOUND_INST_BLOCK" }, + { 0x05, "PRIV_VIOLATION" }, + { 0x06, "RO_VIOLATION" }, + { 0x07, "WO_VIOLATION" }, + { 0x08, "PITCH_MASK_VIOLATION" }, + { 0x09, "WORK_CREATION" }, + { 0x0a, "UNSUPPORTED_APERTURE" }, + { 0x0b, "COMPRESSION_FAILURE" }, + { 0x0c, "UNSUPPORTED_KIND" }, + { 0x0d, "REGION_VIOLATION" }, + { 0x0e, "POISONED" }, + { 0x0f, "ATOMIC_VIOLATION" }, + {} +}; + +static const struct nvkm_enum +gv100_fifo_fault_engine[] = { + { 0x01, "DISPLAY" }, + { 0x03, "PTP" }, + { 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR }, + { 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM }, + { 0x06, "PWR_PMU" }, + { 0x08, "IFB", NULL, NVKM_ENGINE_IFB }, + { 0x09, "PERF" }, + { 0x1f, "PHYSICAL" }, + { 0x20, "HOST0" }, + { 0x21, "HOST1" }, + { 0x22, "HOST2" }, + { 0x23, "HOST3" }, + { 0x24, "HOST4" }, + { 0x25, "HOST5" }, + { 0x26, "HOST6" }, + { 0x27, "HOST7" }, + { 0x28, "HOST8" }, + { 0x29, "HOST9" }, + { 0x2a, "HOST10" }, + { 0x2b, "HOST11" }, + { 0x2c, "HOST12" }, + { 0x2d, "HOST13" }, + {} +}; + +static const struct nvkm_enum +gv100_fifo_fault_access[] = { + { 0x0, "VIRT_READ" }, + { 0x1, "VIRT_WRITE" }, + { 0x2, "VIRT_ATOMIC" }, + { 0x3, "VIRT_PREFETCH" }, + { 0x4, "VIRT_ATOMIC_WEAK" }, + { 0x8, "PHYS_READ" }, + { 0x9, "PHYS_WRITE" }, + { 0xa, "PHYS_ATOMIC" }, + { 0xb, "PHYS_PREFETCH" }, + {} +}; + +static const struct gk104_fifo_func +gv100_fifo = { + .init_pbdma_timeout = gk208_fifo_init_pbdma_timeout, + .fault.access = gv100_fifo_fault_access, + .fault.engine = gv100_fifo_fault_engine, + .fault.reason = gv100_fifo_fault_reason, + .fault.hubclient = gv100_fifo_fault_hubclient, + .fault.gpcclient = gv100_fifo_fault_gpcclient, + .runlist = &gv100_fifo_runlist, + .user = {{-1,-1,VOLTA_USERMODE_A }, gv100_fifo_user_new }, + .chan = {{ 0, 0,VOLTA_CHANNEL_GPFIFO_A}, gv100_fifo_gpfifo_new }, + .cgrp_force = true, +}; + +int +gv100_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo) +{ + return gk104_fifo_new_(&gv100_fifo, device, index, 4096, pfifo); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h index ae76b1aaccd4..d5acbba293f4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h @@ -18,16 +18,19 @@ struct nvkm_fifo_chan_oclass; struct nvkm_fifo_func { void *(*dtor)(struct nvkm_fifo *); int (*oneinit)(struct nvkm_fifo *); + int (*info)(struct nvkm_fifo *, u64 mthd, u64 *data); void (*init)(struct nvkm_fifo *); void (*fini)(struct nvkm_fifo *); void (*intr)(struct nvkm_fifo *); + void (*fault)(struct nvkm_fifo *, struct nvkm_fault_data *); void (*pause)(struct nvkm_fifo *, unsigned long *); void (*start)(struct nvkm_fifo *, unsigned long *); void (*uevent_init)(struct nvkm_fifo *); void (*uevent_fini)(struct nvkm_fifo *); void (*recover_chan)(struct nvkm_fifo *, int chid); - int (*class_get)(struct nvkm_fifo *, int index, - const struct nvkm_fifo_chan_oclass **); + int (*class_get)(struct nvkm_fifo *, int index, struct nvkm_oclass *); + int (*class_new)(struct nvkm_fifo *, const struct nvkm_oclass *, + void *, u32, struct nvkm_object **); const struct nvkm_fifo_chan_oclass *chan[]; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h new file mode 100644 index 000000000000..ed840921ebe8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h @@ -0,0 +1,6 @@ +#ifndef __NVKM_FIFO_USER_H__ +#define __NVKM_FIFO_USER_H__ +#include "priv.h" +int gv100_fifo_user_new(const struct nvkm_oclass *, void *, u32, + struct nvkm_object **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c new file mode 100644 index 000000000000..3dc3b8b312de --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c @@ -0,0 +1,45 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "user.h" + +static int +gv100_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc, + enum nvkm_object_map *type, u64 *addr, u64 *size) +{ + struct nvkm_device *device = object->engine->subdev.device; + *addr = 0x810000 + device->func->resource_addr(device, 0); + *size = 0x010000; + *type = NVKM_OBJECT_MAP_IO; + return 0; +} + +static const struct nvkm_object_func +gv100_fifo_user = { + .map = gv100_fifo_user_map, +}; + +int +gv100_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, + struct nvkm_object **pobject) +{ + return nvkm_object_new_(&gv100_fifo_user, oclass, argv, argc, pobject); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild index 8a22558b7b52..93e3733f54e2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild @@ -33,8 +33,10 @@ nvkm-y += nvkm/engine/gr/gm200.o nvkm-y += nvkm/engine/gr/gm20b.o nvkm-y += nvkm/engine/gr/gp100.o nvkm-y += nvkm/engine/gr/gp102.o +nvkm-y += nvkm/engine/gr/gp104.o nvkm-y += nvkm/engine/gr/gp107.o nvkm-y += nvkm/engine/gr/gp10b.o +nvkm-y += nvkm/engine/gr/gv100.o nvkm-y += nvkm/engine/gr/ctxnv40.o nvkm-y += nvkm/engine/gr/ctxnv50.o @@ -54,4 +56,6 @@ nvkm-y += nvkm/engine/gr/ctxgm200.o nvkm-y += nvkm/engine/gr/ctxgm20b.o nvkm-y += nvkm/engine/gr/ctxgp100.o nvkm-y += nvkm/engine/gr/ctxgp102.o +nvkm-y += nvkm/engine/gr/ctxgp104.o nvkm-y += nvkm/engine/gr/ctxgp107.o +nvkm-y += nvkm/engine/gr/ctxgv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index 881015080d83..e813a3f8ea93 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -850,12 +850,17 @@ gf100_grctx_init_gcc_0[] = { }; const struct gf100_gr_pack -gf100_grctx_pack_gpc[] = { +gf100_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf100_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gf100_grctx_pack_gpc_1[] = { { gf100_grctx_init_crstr_0 }, { gf100_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -1025,6 +1030,13 @@ gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data, } void +gf100_grctx_generate_r419cb8(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419cb8, 0x00007c00, 0x00000000); +} + +void gf100_grctx_generate_bundle(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; @@ -1080,89 +1092,38 @@ gf100_grctx_generate_unkn(struct gf100_gr *gr) } void -gf100_grctx_generate_tpcid(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; - - for (tpc = 0, id = 0; tpc < 4; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } - - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); - } - } -} - -void -gf100_grctx_generate_r406028(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - u32 tmp[GPC_MAX / 8] = {}, i = 0; - for (i = 0; i < gr->gpc_nr; i++) - tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4); - for (i = 0; i < 4; i++) { - nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]); - nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]); - } -} - -void gf100_grctx_generate_r4060a8(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u8 tpcnr[GPC_MAX], data[TPC_MAX]; - int gpc, tpc, i; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - memset(data, 0x1f, sizeof(data)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - data[tpc] = gpc; + const u8 gpcmax = nvkm_rd32(device, 0x022430); + const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax; + int i, j, sm = 0; + u32 data; + + for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) { + for (data = 0, j = 0; j < 4; j++) { + if (sm < gr->sm_nr) + data |= gr->sm[sm++].gpc << (j * 8); + else + data |= 0x1f << (j * 8); + } + nvkm_wr32(device, 0x4060a8 + (i * 4), data); } - - for (i = 0; i < 4; i++) - nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]); } void -gf100_grctx_generate_r418bb8(struct gf100_gr *gr) +gf100_grctx_generate_rop_mapping(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; u32 data[6] = {}, data2[2] = {}; - u8 tpcnr[GPC_MAX]; u8 shift, ntpcv; - int gpc, tpc, i; - - /* calculate first set of magics */ - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - - data[tpc / 6] |= gpc << ((tpc % 6) * 5); - } + int i; - for (; tpc < 32; tpc++) - data[tpc / 6] |= 7 << ((tpc % 6) * 5); + /* Pack tile map into register format. */ + for (i = 0; i < 32; i++) + data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5); - /* and the second... */ + /* Magic. */ shift = 0; ntpcv = gr->tpc_total; while (!(ntpcv & (1 << 4))) { @@ -1197,40 +1158,214 @@ gf100_grctx_generate_r418bb8(struct gf100_gr *gr) } void -gf100_grctx_generate_r406800(struct gf100_gr *gr) +gf100_grctx_generate_max_ways_evict(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u64 tpc_mask = 0, tpc_set = 0; - u8 tpcnr[GPC_MAX]; - int gpc, tpc; - int i, a, b; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (gpc = 0; gpc < gr->gpc_nr; gpc++) - tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8); - - for (i = 0, gpc = -1, b = -1; i < 32; i++) { - a = (i * (gr->tpc_total - 1)) / 32; - if (a != b) { - b = a; - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - tpc_set |= 1ULL << ((gpc * 8) + tpc); + u32 fbps = nvkm_rd32(device, 0x121c74); + if (fbps == 1) + nvkm_mask(device, 0x17e91c, 0x001f0000, 0x00090000); +} + +static const u32 +gf100_grctx_alpha_beta_map[17][32] = { + [1] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + [2] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + }, + //XXX: 3 + [4] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + }, + //XXX: 5 + //XXX: 6 + [7] = { + 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, + }, + [8] = { + 1, 1, 1, + 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, + 7, 7, 7, + }, + //XXX: 9 + //XXX: 10 + [11] = { + 1, 1, + 2, 2, 2, 2, + 3, 3, 3, + 4, 4, 4, 4, + 5, 5, 5, + 6, 6, 6, + 7, 7, 7, 7, + 8, 8, 8, + 9, 9, 9, 9, + 10, 10, + }, + //XXX: 12 + //XXX: 13 + [14] = { + 1, 1, + 2, 2, + 3, 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + }, + [15] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, 9, + 10, 10, + 11, 11, 11, + 12, 12, + 13, 13, + 14, 14, + }, + [16] = { + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5, + 6, 6, 6, + 7, 7, + 8, 8, + 9, 9, + 10, 10, 10, + 11, 11, + 12, 12, + 13, 13, + 14, 14, + 15, 15, + }, +}; + +void +gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + int i, gpc; + + for (i = 0; i < 32; i++) { + u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i]; + u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0; + + if (!atarget) { + nvkm_warn(subdev, "missing alpha/beta mapping table\n"); + atarget = max_t(u32, gr->tpc_total * i / 32, 1); + } + + while (atarget) { + for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) { + if (abits[gpc] < gr->tpc_nr[gpc]) { + abits[gpc]++; + atarget--; + } + } } - nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask)); - if (gr->gpc_nr > 4) { - nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set)); - nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask)); + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + u32 bbits = gr->tpc_nr[gpc] - abits[gpc]; + amask |= ((1 << abits[gpc]) - 1) << (gpc * 8); + bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8); } + + nvkm_wr32(device, 0x406800 + (i * 0x20), amask); + nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask); } } void +gf100_grctx_generate_tpc_nr(struct gf100_gr *gr, int gpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); +} + +void +gf100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); +} + +void +gf100_grctx_generate_floorsweep(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const struct gf100_grctx_func *func = gr->func->grctx; + int gpc, sm, i, j; + u32 data; + + for (sm = 0; sm < gr->sm_nr; sm++) { + func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm); + if (func->tpc_nr) + func->tpc_nr(gr, gr->sm[sm].gpc); + } + + for (gpc = 0, i = 0; i < 4; i++) { + for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++) + data |= gr->tpc_nr[gpc] << (j * 4); + nvkm_wr32(device, 0x406028 + (i * 4), data); + nvkm_wr32(device, 0x405870 + (i * 4), data); + } + + if (func->r4060a8) + func->r4060a8(gr); + + func->rop_mapping(gr); + + if (func->alpha_beta_tables) + func->alpha_beta_tables(gr); + if (func->max_ways_evict) + func->max_ways_evict(gr); + if (func->dist_skip_table) + func->dist_skip_table(gr); + if (func->r406500) + func->r406500(gr); + if (func->gpc_tpc_nr) + func->gpc_tpc_nr(gr); + if (func->r419f78) + func->r419f78(gr); + if (func->tpc_mask) + func->tpc_mask(gr); + if (func->smid_config) + func->smid_config(gr); +} + +void gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -1239,29 +1374,63 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) nvkm_mc_unk260(device, 0); - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); + if (!gr->fuc_sw_ctx) { + gf100_gr_mmio(gr, grctx->hub); + gf100_gr_mmio(gr, grctx->gpc_0); + gf100_gr_mmio(gr, grctx->zcull); + gf100_gr_mmio(gr, grctx->gpc_1); + gf100_gr_mmio(gr, grctx->tpc); + gf100_gr_mmio(gr, grctx->ppc); + } else { + gf100_gr_mmio(gr, gr->fuc_sw_ctx); + } + + gf100_gr_wait_idle(gr); idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - grctx->bundle(info); grctx->pagepool(info); + grctx->bundle(info); grctx->attrib(info); + if (grctx->patch_ltc) + grctx->patch_ltc(info); grctx->unkn(gr); - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gf100_grctx_generate_r4060a8(gr); - gf100_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); + gf100_grctx_generate_floorsweep(gr); + + gf100_gr_wait_idle(gr); + + if (grctx->r400088) grctx->r400088(gr, false); + if (gr->fuc_bundle) + gf100_gr_icmd(gr, gr->fuc_bundle); + else + gf100_gr_icmd(gr, grctx->icmd); + if (grctx->sw_veid_bundle_init) + gf100_gr_icmd(gr, grctx->sw_veid_bundle_init); + if (grctx->r400088) grctx->r400088(gr, true); - gf100_gr_icmd(gr, grctx->icmd); nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); + + if (gr->fuc_method) + gf100_gr_mthd(gr, gr->fuc_method); + else + gf100_gr_mthd(gr, grctx->mthd); nvkm_mc_unk260(device, 1); + + if (grctx->r419cb8) + grctx->r419cb8(gr); + if (grctx->r418800) + grctx->r418800(gr); + if (grctx->r419eb0) + grctx->r419eb0(gr); + if (grctx->r419e00) + grctx->r419e00(gr); + if (grctx->r418e94) + grctx->r418e94(gr); + if (grctx->r419a3c) + grctx->r419a3c(gr); + if (grctx->r408840) + grctx->r408840(gr); } #define CB_RESERVED 0x80000 @@ -1280,6 +1449,32 @@ gf100_grctx_generate(struct gf100_gr *gr) int ret, i; u64 addr; + /* NV_PGRAPH_FE_PWR_MODE_FORCE_ON. */ + nvkm_wr32(device, 0x404170, 0x00000012); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x404170) & 0x00000010)) + break; + ); + + if (grctx->unkn88c) + grctx->unkn88c(gr, true); + + /* Reset FECS. */ + nvkm_wr32(device, 0x409614, 0x00000070); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_mask(device, 0x409614, 0x00000700, 0x00000700); + nvkm_usec(device, 10, NVKM_DELAY); + nvkm_rd32(device, 0x409614); + + if (grctx->unkn88c) + grctx->unkn88c(gr, false); + + /* NV_PGRAPH_FE_PWR_MODE_AUTO. */ + nvkm_wr32(device, 0x404170, 0x00000010); + + /* Init SCC RAM. */ + nvkm_wr32(device, 0x40802c, 0x00000001); + /* Allocate memory to for a "channel", which we'll use to generate * the default context values. */ @@ -1392,7 +1587,8 @@ gf100_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf100_grctx_pack_gpc, + .gpc_0 = gf100_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf100_grctx_pack_tpc, .icmd = gf100_grctx_pack_icmd, @@ -1404,4 +1600,11 @@ gf100_grctx = { .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 5199e5aa0cb7..33e932bd73b1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -21,19 +21,22 @@ void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int) #define mmio_wr32(a,b,c) mmio_refn((a), (b), (c), 0, -1) struct gf100_grctx_func { + void (*unkn88c)(struct gf100_gr *, bool on); /* main context generation function */ void (*main)(struct gf100_gr *, struct gf100_grctx *); /* context-specific modify-on-first-load list generation function */ void (*unkn)(struct gf100_gr *); /* mmio context data */ const struct gf100_gr_pack *hub; - const struct gf100_gr_pack *gpc; + const struct gf100_gr_pack *gpc_0; + const struct gf100_gr_pack *gpc_1; const struct gf100_gr_pack *zcull; const struct gf100_gr_pack *tpc; const struct gf100_gr_pack *ppc; /* indirect context data, generated with icmds/mthds */ const struct gf100_gr_pack *icmd; const struct gf100_gr_pack *mthd; + const struct gf100_gr_pack *sw_veid_bundle_init; /* bundle circular buffer */ void (*bundle)(struct gf100_grctx *); u32 bundle_size; @@ -48,6 +51,31 @@ struct gf100_grctx_func { u32 attrib_nr; u32 alpha_nr_max; u32 alpha_nr; + u32 gfxp_nr; + /* other patch buffer stuff */ + void (*patch_ltc)(struct gf100_grctx *); + /* floorsweeping */ + void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm); + void (*tpc_nr)(struct gf100_gr *, int gpc); + void (*r4060a8)(struct gf100_gr *); + void (*rop_mapping)(struct gf100_gr *); + void (*alpha_beta_tables)(struct gf100_gr *); + void (*max_ways_evict)(struct gf100_gr *); + void (*dist_skip_table)(struct gf100_gr *); + void (*r406500)(struct gf100_gr *); + void (*gpc_tpc_nr)(struct gf100_gr *); + void (*r419f78)(struct gf100_gr *); + void (*tpc_mask)(struct gf100_gr *); + void (*smid_config)(struct gf100_gr *); + /* misc other things */ + void (*r400088)(struct gf100_gr *, bool); + void (*r419cb8)(struct gf100_gr *); + void (*r418800)(struct gf100_gr *); + void (*r419eb0)(struct gf100_gr *); + void (*r419e00)(struct gf100_gr *); + void (*r418e94)(struct gf100_gr *); + void (*r419a3c)(struct gf100_gr *); + void (*r408840)(struct gf100_gr *); }; extern const struct gf100_grctx_func gf100_grctx; @@ -57,11 +85,14 @@ void gf100_grctx_generate_bundle(struct gf100_grctx *); void gf100_grctx_generate_pagepool(struct gf100_grctx *); void gf100_grctx_generate_attrib(struct gf100_grctx *); void gf100_grctx_generate_unkn(struct gf100_gr *); -void gf100_grctx_generate_tpcid(struct gf100_gr *); -void gf100_grctx_generate_r406028(struct gf100_gr *); +void gf100_grctx_generate_floorsweep(struct gf100_gr *); +void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int); +void gf100_grctx_generate_tpc_nr(struct gf100_gr *, int); void gf100_grctx_generate_r4060a8(struct gf100_gr *); -void gf100_grctx_generate_r418bb8(struct gf100_gr *); -void gf100_grctx_generate_r406800(struct gf100_gr *); +void gf100_grctx_generate_rop_mapping(struct gf100_gr *); +void gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *); +void gf100_grctx_generate_max_ways_evict(struct gf100_gr *); +void gf100_grctx_generate_r419cb8(struct gf100_gr *); extern const struct gf100_grctx_func gf108_grctx; void gf108_grctx_generate_attrib(struct gf100_grctx *); @@ -72,22 +103,25 @@ extern const struct gf100_grctx_func gf110_grctx; extern const struct gf100_grctx_func gf117_grctx; void gf117_grctx_generate_attrib(struct gf100_grctx *); +void gf117_grctx_generate_rop_mapping(struct gf100_gr *); +void gf117_grctx_generate_dist_skip_table(struct gf100_gr *); extern const struct gf100_grctx_func gf119_grctx; extern const struct gf100_grctx_func gk104_grctx; +void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *); +void gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *); + extern const struct gf100_grctx_func gk20a_grctx; -void gk104_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *); void gk104_grctx_generate_bundle(struct gf100_grctx *); void gk104_grctx_generate_pagepool(struct gf100_grctx *); +void gk104_grctx_generate_patch_ltc(struct gf100_grctx *); void gk104_grctx_generate_unkn(struct gf100_gr *); -void gk104_grctx_generate_r418bb8(struct gf100_gr *); - -void gm107_grctx_generate_bundle(struct gf100_grctx *); -void gm107_grctx_generate_pagepool(struct gf100_grctx *); -void gm107_grctx_generate_attrib(struct gf100_grctx *); +void gk104_grctx_generate_r418800(struct gf100_gr *); extern const struct gf100_grctx_func gk110_grctx; +void gk110_grctx_generate_r419eb0(struct gf100_gr *); + extern const struct gf100_grctx_func gk110b_grctx; extern const struct gf100_grctx_func gk208_grctx; @@ -95,22 +129,30 @@ extern const struct gf100_grctx_func gm107_grctx; void gm107_grctx_generate_bundle(struct gf100_grctx *); void gm107_grctx_generate_pagepool(struct gf100_grctx *); void gm107_grctx_generate_attrib(struct gf100_grctx *); +void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int); extern const struct gf100_grctx_func gm200_grctx; -void gm200_grctx_generate_tpcid(struct gf100_gr *); -void gm200_grctx_generate_405b60(struct gf100_gr *); +void gm200_grctx_generate_dist_skip_table(struct gf100_gr *); +void gm200_grctx_generate_r406500(struct gf100_gr *); +void gm200_grctx_generate_tpc_mask(struct gf100_gr *); +void gm200_grctx_generate_smid_config(struct gf100_gr *); +void gm200_grctx_generate_r419a3c(struct gf100_gr *); extern const struct gf100_grctx_func gm20b_grctx; extern const struct gf100_grctx_func gp100_grctx; -void gp100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *); void gp100_grctx_generate_pagepool(struct gf100_grctx *); +void gp100_grctx_generate_smid_config(struct gf100_gr *); extern const struct gf100_grctx_func gp102_grctx; void gp102_grctx_generate_attrib(struct gf100_grctx *); +extern const struct gf100_grctx_func gp104_grctx; + extern const struct gf100_grctx_func gp107_grctx; +extern const struct gf100_grctx_func gv100_grctx; + /* context init value lists */ extern const struct gf100_gr_pack gf100_grctx_pack_icmd[]; @@ -128,7 +170,8 @@ extern const struct gf100_gr_init gf100_grctx_init_memfmt_0[]; extern const struct gf100_gr_init gf100_grctx_init_rstr2d_0[]; extern const struct gf100_gr_init gf100_grctx_init_scc_0[]; -extern const struct gf100_gr_pack gf100_grctx_pack_gpc[]; +extern const struct gf100_gr_pack gf100_grctx_pack_gpc_0[]; +extern const struct gf100_gr_pack gf100_grctx_pack_gpc_1[]; extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_0[]; extern const struct gf100_gr_init gf100_grctx_init_prop_0[]; extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_1[]; @@ -177,6 +220,8 @@ extern const struct gf100_gr_init gf117_grctx_init_pe_0[]; extern const struct gf100_gr_init gf117_grctx_init_wwdx_0[]; +extern const struct gf100_gr_pack gf117_grctx_pack_gpc_1[]; + extern const struct gf100_gr_init gk104_grctx_init_memfmt_0[]; extern const struct gf100_gr_init gk104_grctx_init_ds_0[]; extern const struct gf100_gr_init gk104_grctx_init_scc_0[]; @@ -186,7 +231,6 @@ extern const struct gf100_gr_init gk104_grctx_init_gpm_0[]; extern const struct gf100_gr_init gk104_grctx_init_pes_0[]; extern const struct gf100_gr_pack gk104_grctx_pack_hub[]; -extern const struct gf100_gr_pack gk104_grctx_pack_gpc[]; extern const struct gf100_gr_pack gk104_grctx_pack_tpc[]; extern const struct gf100_gr_pack gk104_grctx_pack_ppc[]; extern const struct gf100_gr_pack gk104_grctx_pack_icmd[]; @@ -200,7 +244,8 @@ extern const struct gf100_gr_pack gk110_grctx_pack_hub[]; extern const struct gf100_gr_init gk110_grctx_init_pri_0[]; extern const struct gf100_gr_init gk110_grctx_init_cwd_0[]; -extern const struct gf100_gr_pack gk110_grctx_pack_gpc[]; +extern const struct gf100_gr_pack gk110_grctx_pack_gpc_0[]; +extern const struct gf100_gr_pack gk110_grctx_pack_gpc_1[]; extern const struct gf100_gr_init gk110_grctx_init_gpc_unk_2[]; extern const struct gf100_gr_init gk110_grctx_init_tex_0[]; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c index 54fd74e9cca0..7a0564b6e3c7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c @@ -84,7 +84,8 @@ gf104_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf100_grctx_pack_gpc, + .gpc_0 = gf100_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf104_grctx_pack_tpc, .icmd = gf100_grctx_pack_icmd, @@ -96,4 +97,11 @@ gf104_grctx = { .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c index 82f71b10c06e..dda2c32e6232 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c @@ -667,12 +667,17 @@ gf108_grctx_init_gpm_0[] = { }; static const struct gf100_gr_pack -gf108_grctx_pack_gpc[] = { +gf108_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf108_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gf108_grctx_pack_gpc_1[] = { { gf100_grctx_init_crstr_0 }, { gf108_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -780,7 +785,8 @@ gf108_grctx = { .main = gf100_grctx_generate_main, .unkn = gf108_grctx_generate_unkn, .hub = gf108_grctx_pack_hub, - .gpc = gf108_grctx_pack_gpc, + .gpc_0 = gf108_grctx_pack_gpc_0, + .gpc_1 = gf108_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf108_grctx_pack_tpc, .icmd = gf108_grctx_pack_icmd, @@ -794,4 +800,11 @@ gf108_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x324, .alpha_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c index 7df398b53f8f..f5cca5e6a4f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c @@ -314,15 +314,12 @@ gf110_grctx_init_setup_0[] = { }; static const struct gf100_gr_pack -gf110_grctx_pack_gpc[] = { +gf110_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf100_grctx_init_prop_0 }, { gf100_grctx_init_gpc_unk_1 }, { gf110_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, - { gf100_grctx_init_crstr_0 }, - { gf100_grctx_init_gpm_0 }, - { gf100_grctx_init_gcc_0 }, {} }; @@ -335,7 +332,8 @@ gf110_grctx = { .main = gf100_grctx_generate_main, .unkn = gf100_grctx_generate_unkn, .hub = gf100_grctx_pack_hub, - .gpc = gf110_grctx_pack_gpc, + .gpc_0 = gf110_grctx_pack_gpc_0, + .gpc_1 = gf100_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf100_grctx_pack_tpc, .icmd = gf110_grctx_pack_icmd, @@ -347,4 +345,11 @@ gf110_grctx = { .attrib = gf100_grctx_generate_attrib, .attrib_nr_max = 0x324, .attrib_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 19301d88577d..276c282d19aa 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -84,12 +84,17 @@ gf117_grctx_init_setup_0[] = { }; static const struct gf100_gr_pack -gf117_grctx_pack_gpc[] = { +gf117_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gf117_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gf117_grctx_pack_gpc_1[] = { { gf119_grctx_init_crstr_0 }, { gf108_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -180,6 +185,62 @@ gf117_grctx_pack_ppc[] = { ******************************************************************************/ void +gf117_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int i; + + for (i = 0; i < 8; i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); +} + +void +gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data[6] = {}, data2[2] = {}; + u8 shift, ntpcv; + int i; + + /* Pack tile map into register format. */ + for (i = 0; i < 32; i++) + data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5); + + /* Magic. */ + shift = 0; + ntpcv = gr->tpc_total; + while (!(ntpcv & (1 << 4))) { + ntpcv <<= 1; + shift++; + } + + data2[0] = (ntpcv << 16); + data2[0] |= (shift << 21); + data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24); + for (i = 1; i < 7; i++) + data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5); + + /* GPC_BROADCAST */ + nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x418b08 + (i * 4), data[i]); + + /* GPC_BROADCAST.TP_BROADCAST */ + nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | + gr->screen_tile_row_offset | data2[0]); + nvkm_wr32(device, 0x41bfe4, data2[1]); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]); + + /* UNK78xx */ + nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 6; i++) + nvkm_wr32(device, 0x40780c + (i * 4), data[i]); +} + +void gf117_grctx_generate_attrib(struct gf100_grctx *info) { struct gf100_gr *gr = info->gr; @@ -217,50 +278,13 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info) } } -static void -gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout; - int i; - - nvkm_mc_unk260(device, 0); - - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gf100_grctx_generate_r4060a8(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - - gf100_gr_icmd(gr, grctx->icmd); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); - nvkm_mc_unk260(device, 1); -} - const struct gf100_grctx_func gf117_grctx = { - .main = gf117_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gf117_grctx_pack_hub, - .gpc = gf117_grctx_pack_gpc, + .gpc_0 = gf117_grctx_pack_gpc_0, + .gpc_1 = gf117_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf117_grctx_pack_tpc, .ppc = gf117_grctx_pack_ppc, @@ -275,4 +299,12 @@ gf117_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x324, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c index 605185b078be..0cfe46366af6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c @@ -431,15 +431,12 @@ gf119_grctx_init_crstr_0[] = { }; static const struct gf100_gr_pack -gf119_grctx_pack_gpc[] = { +gf119_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gf119_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, - { gf119_grctx_init_crstr_0 }, - { gf108_grctx_init_gpm_0 }, - { gf100_grctx_init_gcc_0 }, {} }; @@ -503,7 +500,8 @@ gf119_grctx = { .main = gf100_grctx_generate_main, .unkn = gf108_grctx_generate_unkn, .hub = gf119_grctx_pack_hub, - .gpc = gf119_grctx_pack_gpc, + .gpc_0 = gf119_grctx_pack_gpc_0, + .gpc_1 = gf117_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gf119_grctx_pack_tpc, .icmd = gf119_grctx_pack_icmd, @@ -517,4 +515,11 @@ gf119_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x324, .alpha_nr = 0x218, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .r4060a8 = gf100_grctx_generate_r4060a8, + .rop_mapping = gf100_grctx_generate_rop_mapping, + .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, + .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .r419cb8 = gf100_grctx_generate_r419cb8, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index 825c8fd500bc..304e9d268bad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -739,13 +739,18 @@ gk104_grctx_init_gpm_0[] = { {} }; -const struct gf100_gr_pack -gk104_grctx_pack_gpc[] = { +static const struct gf100_gr_pack +gk104_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gk104_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gk104_grctx_pack_gpc_1[] = { { gf119_grctx_init_crstr_0 }, { gk104_grctx_init_gpm_0 }, { gf100_grctx_init_gcc_0 }, @@ -841,6 +846,32 @@ gk104_grctx_pack_ppc[] = { ******************************************************************************/ void +gk104_grctx_generate_r418800(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + /*XXX: Not real sure where to apply these, there doesn't seem + * to be any pattern to which chipsets it's done on. + * + * Perhaps a VBIOS tweak? + */ + if (0) { + nvkm_mask(device, 0x418800, 0x00200000, 0x00200000); + nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000); + } +} + +void +gk104_grctx_generate_patch_ltc(struct gf100_grctx *info) +{ + struct nvkm_device *device = info->gr->base.engine.subdev.device; + u32 data0 = nvkm_rd32(device, 0x17e91c); + u32 data1 = nvkm_rd32(device, 0x17e920); + /*XXX: Figure out how to modify this correctly! */ + mmio_wr32(info, 0x17e91c, data0); + mmio_wr32(info, 0x17e920, data1); +} + +void gk104_grctx_generate_bundle(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; @@ -881,114 +912,74 @@ gk104_grctx_generate_unkn(struct gf100_gr *gr) nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008); } -void -gk104_grctx_generate_r418bb8(struct gf100_gr *gr) +static void +gk104_grctx_generate_r419f78(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - u32 data[6] = {}, data2[2] = {}; - u8 tpcnr[GPC_MAX]; - u8 shift, ntpcv; - int gpc, tpc, i; - - /* calculate first set of magics */ - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - gpc = -1; - for (tpc = 0; tpc < gr->tpc_total; tpc++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpcnr[gpc]--; - - data[tpc / 6] |= gpc << ((tpc % 6) * 5); - } - - for (; tpc < 32; tpc++) - data[tpc / 6] |= 7 << ((tpc % 6) * 5); - - /* and the second... */ - shift = 0; - ntpcv = gr->tpc_total; - while (!(ntpcv & (1 << 4))) { - ntpcv <<= 1; - shift++; - } - - data2[0] = (ntpcv << 16); - data2[0] |= (shift << 21); - data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24); - for (i = 1; i < 7; i++) - data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5); - - /* GPC_BROADCAST */ - nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | - gr->screen_tile_row_offset); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x418b08 + (i * 4), data[i]); - - /* GPC_BROADCAST.TP_BROADCAST */ - nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | - gr->screen_tile_row_offset | data2[0]); - nvkm_wr32(device, 0x41bfe4, data2[1]); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]); - - /* UNK78xx */ - nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | - gr->screen_tile_row_offset); - for (i = 0; i < 6; i++) - nvkm_wr32(device, 0x40780c + (i * 4), data[i]); + nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000); } void -gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout; - int i; - - nvkm_mc_unk260(device, 0); - - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000); - - gf100_gr_icmd(gr, grctx->icmd); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); - nvkm_mc_unk260(device, 1); +} - nvkm_mask(device, 0x418800, 0x00200000, 0x00200000); - nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000); +void +gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int i, j, gpc, ppc; + + for (i = 0; i < 32; i++) { + u32 atarget = max_t(u32, gr->tpc_total * i / 32, 1); + u32 btarget = gr->tpc_total - atarget; + bool alpha = atarget < btarget; + u64 amask = 0, bmask = 0; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) { + u32 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc]; + u32 abits, bbits, pmask; + + if (alpha) { + abits = atarget ? ppc_tpcs : 0; + bbits = ppc_tpcs - abits; + } else { + bbits = btarget ? ppc_tpcs : 0; + abits = ppc_tpcs - bbits; + } + + pmask = gr->ppc_tpc_mask[gpc][ppc]; + while (ppc_tpcs-- > abits) + pmask &= pmask - 1; + amask |= (u64)pmask << (gpc * 8); + + pmask ^= gr->ppc_tpc_mask[gpc][ppc]; + bmask |= (u64)pmask << (gpc * 8); + + atarget -= min(abits, atarget); + btarget -= min(bbits, btarget); + if ((abits > 0) || (bbits > 0)) + alpha = !alpha; + } + } + + for (j = 0; j < gr->gpc_nr; j += 4, amask >>= 32, bmask >>= 32) { + nvkm_wr32(device, 0x406800 + (i * 0x20) + j, amask); + nvkm_wr32(device, 0x406c00 + (i * 0x20) + j, bmask); + } + } } const struct gf100_grctx_func gk104_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk104_grctx_pack_hub, - .gpc = gk104_grctx_pack_gpc, + .gpc_0 = gk104_grctx_pack_gpc_0, + .gpc_1 = gk104_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk104_grctx_pack_tpc, .ppc = gk104_grctx_pack_ppc, @@ -1005,4 +996,13 @@ gk104_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r419f78 = gk104_grctx_generate_r419f78, + .r418800 = gk104_grctx_generate_r418800, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c index 7b95ec2fe453..86547cfc38dc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -704,12 +704,17 @@ gk110_grctx_init_gpc_unk_2[] = { }; const struct gf100_gr_pack -gk110_grctx_pack_gpc[] = { +gk110_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gf119_grctx_init_prop_0 }, { gf119_grctx_init_gpc_unk_1 }, { gk110_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +const struct gf100_gr_pack +gk110_grctx_pack_gpc_1[] = { { gf119_grctx_init_crstr_0 }, { gk104_grctx_init_gpm_0 }, { gk110_grctx_init_gpc_unk_2 }, @@ -808,12 +813,20 @@ gk110_grctx_pack_ppc[] = { * PGRAPH context implementation ******************************************************************************/ +void +gk110_grctx_generate_r419eb0(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000); +} + const struct gf100_grctx_func gk110_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk110_grctx_pack_hub, - .gpc = gk110_grctx_pack_gpc, + .gpc_0 = gk110_grctx_pack_gpc_0, + .gpc_1 = gk110_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk110_grctx_pack_tpc, .ppc = gk110_grctx_pack_ppc, @@ -830,4 +843,13 @@ gk110_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r418800 = gk104_grctx_generate_r418800, + .r419eb0 = gk110_grctx_generate_r419eb0, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c index 048b1152da44..ebb947bd1446 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -71,10 +71,11 @@ gk110b_grctx_pack_tpc[] = { const struct gf100_grctx_func gk110b_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk110_grctx_pack_hub, - .gpc = gk110_grctx_pack_gpc, + .gpc_0 = gk110_grctx_pack_gpc_0, + .gpc_1 = gk110_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk110b_grctx_pack_tpc, .ppc = gk110_grctx_pack_ppc, @@ -91,4 +92,13 @@ gk110b_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r418800 = gk104_grctx_generate_r418800, + .r419eb0 = gk110_grctx_generate_r419eb0, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c index 67b7a1b43617..4d40512b5c99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -443,12 +443,17 @@ gk208_grctx_init_gpm_0[] = { }; static const struct gf100_gr_pack -gk208_grctx_pack_gpc[] = { +gk208_grctx_pack_gpc_0[] = { { gf100_grctx_init_gpc_unk_0 }, { gk208_grctx_init_prop_0 }, { gk208_grctx_init_gpc_unk_1 }, { gk208_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gk208_grctx_pack_gpc_1[] = { { gk208_grctx_init_crstr_0 }, { gk208_grctx_init_gpm_0 }, { gk110_grctx_init_gpc_unk_2 }, @@ -532,10 +537,11 @@ gk208_grctx_pack_ppc[] = { const struct gf100_grctx_func gk208_grctx = { - .main = gk104_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gk208_grctx_pack_hub, - .gpc = gk208_grctx_pack_gpc, + .gpc_0 = gk208_grctx_pack_gpc_0, + .gpc_1 = gk208_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gk208_grctx_pack_tpc, .ppc = gk208_grctx_pack_ppc, @@ -552,4 +558,12 @@ gk208_grctx = { .attrib_nr = 0x218, .alpha_nr_max = 0x7ff, .alpha_nr = 0x648, + .patch_ltc = gk104_grctx_generate_patch_ltc, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r418800 = gk104_grctx_generate_r418800, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c index da7c35a6a3d2..896d473dcc0f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c @@ -42,10 +42,7 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) grctx->unkn(gr); - gf100_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); + gf100_grctx_generate_floorsweep(gr); for (i = 0; i < 8; i++) nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); @@ -82,4 +79,8 @@ gk20a_grctx = { .attrib_nr = 0x240, .alpha_nr_max = 0x648 + (0x648 / 2), .alpha_nr = 0x648, + .sm_id = gf100_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index 9b43d4ce3eaa..0b3964e6b36e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -744,12 +744,17 @@ gm107_grctx_init_gpc_unk_2[] = { }; static const struct gf100_gr_pack -gm107_grctx_pack_gpc[] = { +gm107_grctx_pack_gpc_0[] = { { gm107_grctx_init_gpc_unk_0 }, { gk208_grctx_init_prop_0 }, { gm107_grctx_init_gpc_unk_1 }, { gm107_grctx_init_setup_0 }, { gf100_grctx_init_zcull_0 }, + {} +}; + +static const struct gf100_gr_pack +gm107_grctx_pack_gpc_1[] = { { gk208_grctx_init_crstr_0 }, { gk104_grctx_init_gpm_0 }, { gm107_grctx_init_gpc_unk_2 }, @@ -860,6 +865,16 @@ gm107_grctx_pack_ppc[] = { * PGRAPH context implementation ******************************************************************************/ +static void +gm107_grctx_generate_r419e00(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080); + nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000); + nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000); + nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000); +} + void gm107_grctx_generate_bundle(struct gf100_grctx *info) { @@ -931,75 +946,27 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info) } static void -gm107_grctx_generate_tpcid(struct gf100_gr *gr) +gm107_grctx_generate_r406500(struct gf100_gr *gr) { - struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; - - for (tpc = 0, id = 0; tpc < 4; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } - - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]); - } - } + nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000001); } -static void -gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +void +gm107_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) { struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout; - int i; - - gf100_gr_mmio(gr, grctx->hub); - gf100_gr_mmio(gr, grctx->gpc); - gf100_gr_mmio(gr, grctx->zcull); - gf100_gr_mmio(gr, grctx->tpc); - gf100_gr_mmio(gr, grctx->ppc); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gm107_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - gf100_grctx_generate_r406800(gr); - - nvkm_wr32(device, 0x4064d0, 0x00000001); - for (i = 1; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x406500, 0x00000001); - - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - - gf100_gr_icmd(gr, grctx->icmd); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, grctx->mthd); - - nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080); - nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000); - nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000); - nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); } const struct gf100_grctx_func gm107_grctx = { - .main = gm107_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .hub = gm107_grctx_pack_hub, - .gpc = gm107_grctx_pack_gpc, + .gpc_0 = gm107_grctx_pack_gpc_0, + .gpc_1 = gm107_grctx_pack_gpc_1, .zcull = gf100_grctx_pack_zcull, .tpc = gm107_grctx_pack_tpc, .ppc = gm107_grctx_pack_ppc, @@ -1016,4 +983,12 @@ gm107_grctx = { .attrib_nr = 0xaa0, .alpha_nr_max = 0x1800, .alpha_nr = 0x1000, + .sm_id = gm107_grctx_generate_sm_id, + .tpc_nr = gf100_grctx_generate_tpc_nr, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, + .r406500 = gm107_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .r419e00 = gm107_grctx_generate_r419e00, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c index db209d33f486..013d05a0f0f6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -28,47 +28,34 @@ ******************************************************************************/ void -gm200_grctx_generate_tpcid(struct gf100_gr *gr) +gm200_grctx_generate_r419a3c(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - int gpc, tpc, id; + nvkm_mask(device, 0x419a3c, 0x00000014, 0x00000000); +} - for (tpc = 0, id = 0; tpc < TPC_MAX_PER_GPC; tpc++) { - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - if (tpc < gr->tpc_nr[gpc]) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id); - id++; - } - } - } +static void +gm200_grctx_generate_r418e94(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000); + nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000); } void -gm200_grctx_generate_405b60(struct gf100_gr *gr) +gm200_grctx_generate_smid_config(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4); u32 dist[TPC_MAX / 4] = {}; u32 gpcs[GPC_MAX] = {}; - u8 tpcnr[GPC_MAX]; - int tpc, gpc, i; + u8 sm, i; - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - /* won't result in the same distribution as the binary driver where - * some of the gpcs have more tpcs than others, but this shall do - * for the moment. the code for earlier gpus has this issue too. - */ - for (gpc = -1, i = 0; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while(!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8); - gpcs[gpc] |= i << (tpc * 8); + for (sm = 0; sm < gr->sm_nr; sm++) { + const u8 gpc = gr->sm[sm].gpc; + const u8 tpc = gr->sm[sm].tpc; + dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8); + gpcs[gpc] |= sm << (tpc * 8); } for (i = 0; i < dist_nr; i++) @@ -77,50 +64,46 @@ gm200_grctx_generate_405b60(struct gf100_gr *gr) nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]); } -static void -gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) +void +gm200_grctx_generate_tpc_mask(struct gf100_gr *gr) { - struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout, tmp; - int i; - - gf100_gr_mmio(gr, gr->fuc_sw_ctx); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->bundle(info); - grctx->pagepool(info); - grctx->attrib(info); - grctx->unkn(gr); - - gm200_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x406500, 0x00000000); - - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - + u32 tmp, i; for (tmp = 0, i = 0; i < gr->gpc_nr; i++) - tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4); - nvkm_wr32(device, 0x4041c4, tmp); + tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * gr->func->tpc_nr); + nvkm_wr32(gr->base.engine.subdev.device, 0x4041c4, tmp); +} - gm200_grctx_generate_405b60(gr); +void +gm200_grctx_generate_r406500(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000000); +} - gf100_gr_icmd(gr, gr->fuc_bundle); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, gr->fuc_method); +void +gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data[8] = {}; + int gpc, ppc, i; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { + u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc]; + u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc]; + while (ppc_tpcs-- > gr->ppc_tpc_min) + ppc_tpcm &= ppc_tpcm - 1; + ppc_tpcm ^= gr->ppc_tpc_mask[gpc][ppc]; + ((u8 *)data)[gpc] |= ppc_tpcm; + } + } - nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000); - nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000); + for (i = 0; i < ARRAY_SIZE(data); i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), data[i]); } const struct gf100_grctx_func gm200_grctx = { - .main = gm200_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -133,4 +116,13 @@ gm200_grctx = { .attrib_nr = 0x400, .alpha_nr_max = 0x1800, .alpha_nr = 0x1000, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gm200_grctx_generate_smid_config, + .r418e94 = gm200_grctx_generate_r418e94, + .r419a3c = gm200_grctx_generate_r419a3c, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c index e5702e3e0a5a..a1d9e114ebeb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c @@ -22,20 +22,6 @@ #include "ctxgf100.h" static void -gm20b_grctx_generate_r406028(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - u32 tpc_per_gpc = 0; - int i; - - for (i = 0; i < gr->gpc_nr; i++) - tpc_per_gpc |= gr->tpc_nr[i] << (4 * i); - - nvkm_wr32(device, 0x406028, tpc_per_gpc); - nvkm_wr32(device, 0x405870, tpc_per_gpc); -} - -static void gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -53,9 +39,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) grctx->unkn(gr); - gm200_grctx_generate_tpcid(gr); - gm20b_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); + gf100_grctx_generate_floorsweep(gr); for (i = 0; i < 8; i++) nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); @@ -68,7 +52,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4); nvkm_wr32(device, 0x4041c4, tmp); - gm200_grctx_generate_405b60(gr); + gm200_grctx_generate_smid_config(gr); gf100_gr_wait_idle(gr); @@ -98,4 +82,6 @@ gm20b_grctx = { .attrib_nr = 0x400, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index 88ea322d956c..0b3326262e12 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -36,7 +36,7 @@ gp100_grctx_generate_pagepool(struct gf100_grctx *info) const int s = 8; const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true); mmio_refn(info, 0x40800c, 0x00000000, s, b); - mmio_wr32(info, 0x408010, 0x80000000); + mmio_wr32(info, 0x408010, 0x8007d800); mmio_refn(info, 0x419004, 0x00000000, s, b); mmio_wr32(info, 0x419008, 0x00000000); } @@ -48,14 +48,17 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 size = roundup(gr->tpc_total * pertpc, 0x80); const int s = 12; - const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; + u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; - u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; - int gpc, ppc, n = 0; + u32 bo = ao + size; + int gpc, ppc, b, n = 0; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max; + size = ((size * 0x20) + 128) & ~127; + b = mmio_vram(info, size, (1 << s), false); mmio_refn(info, 0x418810, 0x80000000, s, b); mmio_refn(info, 0x419848, 0x10000000, s, b); @@ -69,7 +72,7 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; - const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; + const u32 bs = attrib * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); if (!(gr->ppc_mask[gpc] & (1 << ppc))) @@ -77,7 +80,7 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) mmio_wr32(info, o + 0xc0, bs); mmio_wr32(info, o + 0xf4, bo); mmio_wr32(info, o + 0xf0, bs); - bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + bo += grctx->attrib_nr_max * gr->ppc_tpc_max; mmio_wr32(info, o + 0xe4, as); mmio_wr32(info, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; @@ -89,79 +92,30 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info) mmio_wr32(info, 0x41befc, 0x00000000); } -static void -gp100_grctx_generate_405b60(struct gf100_gr *gr) +void +gp100_grctx_generate_smid_config(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4); - u32 dist[TPC_MAX / 4] = {}; - u32 gpcs[GPC_MAX * 2] = {}; - u8 tpcnr[GPC_MAX]; - int tpc, gpc, i; - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - - /* won't result in the same distribution as the binary driver where - * some of the gpcs have more tpcs than others, but this shall do - * for the moment. the code for earlier gpus has this issue too. - */ - for (gpc = -1, i = 0; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while(!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8); - gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8); + u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {}; + u8 sm, i; + + for (sm = 0; sm < gr->sm_nr; sm++) { + const u8 gpc = gr->sm[sm].gpc; + const u8 tpc = gr->sm[sm].tpc; + dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8); + gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8); } for (i = 0; i < dist_nr; i++) nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]); - for (i = 0; i < gr->gpc_nr * 2; i++) + for (i = 0; i < ARRAY_SIZE(gpcs); i++) nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]); } -void -gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - const struct gf100_grctx_func *grctx = gr->func->grctx; - u32 idle_timeout, tmp; - int i; - - gf100_gr_mmio(gr, gr->fuc_sw_ctx); - - idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000); - - grctx->pagepool(info); - grctx->bundle(info); - grctx->attrib(info); - grctx->unkn(gr); - - gm200_grctx_generate_tpcid(gr); - gf100_grctx_generate_r406028(gr); - gk104_grctx_generate_r418bb8(gr); - - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x406500, 0x00000000); - - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); - - for (tmp = 0, i = 0; i < gr->gpc_nr; i++) - tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5); - nvkm_wr32(device, 0x4041c4, tmp); - - gp100_grctx_generate_405b60(gr); - - gf100_gr_icmd(gr, gr->fuc_bundle); - nvkm_wr32(device, 0x404154, idle_timeout); - gf100_gr_mthd(gr, gr->fuc_method); -} - const struct gf100_grctx_func gp100_grctx = { - .main = gp100_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -174,4 +128,12 @@ gp100_grctx = { .attrib_nr = 0x440, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index 7a66b4c2eb18..daee17bf7d0d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -29,6 +29,13 @@ * PGRAPH context implementation ******************************************************************************/ +static void +gp102_grctx_generate_r408840(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x408840, 0x00000003, 0x00000000); +} + void gp102_grctx_generate_attrib(struct gf100_grctx *info) { @@ -36,14 +43,18 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func *grctx = gr->func->grctx; const u32 alpha = grctx->alpha_nr; const u32 attrib = grctx->attrib_nr; - const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); - const u32 size = roundup(gr->tpc_total * pertpc, 0x80); + const u32 gfxp = grctx->gfxp_nr; const int s = 12; - const int b = mmio_vram(info, size, (1 << s), false); const int max_batches = 0xffff; + u32 size = grctx->alpha_nr_max * gr->tpc_total; u32 ao = 0; - u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; - int gpc, ppc, n = 0; + u32 bo = ao + size; + int gpc, ppc, b, n = 0; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) + size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max; + size = ((size * 0x20) + 128) & ~127; + b = mmio_vram(info, size, (1 << s), false); mmio_refn(info, 0x418810, 0x80000000, s, b); mmio_refn(info, 0x419848, 0x10000000, s, b); @@ -57,17 +68,18 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) for (gpc = 0; gpc < gr->gpc_nr; gpc++) { for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; - const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; + const u32 bs = attrib * gr->ppc_tpc_max; + const u32 gs = gfxp * gr->ppc_tpc_max; const u32 u = 0x418ea0 + (n * 0x04); const u32 o = PPC_UNIT(gpc, ppc, 0); const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4)); if (!(gr->ppc_mask[gpc] & (1 << ppc))) continue; - mmio_wr32(info, o + 0xc0, bs); + mmio_wr32(info, o + 0xc0, gs); mmio_wr32(info, p, bs); mmio_wr32(info, o + 0xf4, bo); mmio_wr32(info, o + 0xf0, bs); - bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + bo += gs; mmio_wr32(info, o + 0xe4, as); mmio_wr32(info, o + 0xf8, ao); ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; @@ -81,7 +93,7 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info) const struct gf100_grctx_func gp102_grctx = { - .main = gp100_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -90,8 +102,18 @@ gp102_grctx = { .pagepool = gp100_grctx_generate_pagepool, .pagepool_size = 0x20000, .attrib = gp102_grctx_generate_attrib, - .attrib_nr_max = 0x5d4, + .attrib_nr_max = 0x4b0, .attrib_nr = 0x320, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .gfxp_nr = 0xba8, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, + .r408840 = gp102_grctx_generate_r408840, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c new file mode 100644 index 000000000000..3b85e3d326b2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c @@ -0,0 +1,48 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ctxgf100.h" + +const struct gf100_grctx_func +gp104_grctx = { + .main = gf100_grctx_generate_main, + .unkn = gk104_grctx_generate_unkn, + .bundle = gm107_grctx_generate_bundle, + .bundle_size = 0x3000, + .bundle_min_gpm_fifo_depth = 0x180, + .bundle_token_limit = 0x900, + .pagepool = gp100_grctx_generate_pagepool, + .pagepool_size = 0x20000, + .attrib = gp102_grctx_generate_attrib, + .attrib_nr_max = 0x4b0, + .attrib_nr = 0x320, + .alpha_nr_max = 0xc00, + .alpha_nr = 0x800, + .gfxp_nr = 0xba8, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c index 8da91a0b3bd2..5060c5ee5ce0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -31,7 +31,7 @@ const struct gf100_grctx_func gp107_grctx = { - .main = gp100_grctx_generate_main, + .main = gf100_grctx_generate_main, .unkn = gk104_grctx_generate_unkn, .bundle = gm107_grctx_generate_bundle, .bundle_size = 0x3000, @@ -44,4 +44,13 @@ gp107_grctx = { .attrib_nr = 0x540, .alpha_nr_max = 0xc00, .alpha_nr = 0x800, + .gfxp_nr = 0xe94, + .sm_id = gm107_grctx_generate_sm_id, + .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .tpc_mask = gm200_grctx_generate_tpc_mask, + .smid_config = gp100_grctx_generate_smid_config, + .r419a3c = gm200_grctx_generate_r419a3c, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c new file mode 100644 index 000000000000..0990765ef191 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c @@ -0,0 +1,215 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ctxgf100.h" + +/******************************************************************************* + * PGRAPH context implementation + ******************************************************************************/ + +static const struct gf100_gr_init +gv100_grctx_init_sw_veid_bundle_init_0[] = { + { 0x00001000, 64, 0x00100000, 0x00000008 }, + { 0x00000941, 64, 0x00100000, 0x00000000 }, + { 0x0000097e, 64, 0x00100000, 0x00000000 }, + { 0x0000097f, 64, 0x00100000, 0x00000100 }, + { 0x0000035c, 64, 0x00100000, 0x00000000 }, + { 0x0000035d, 64, 0x00100000, 0x00000000 }, + { 0x00000a08, 64, 0x00100000, 0x00000000 }, + { 0x00000a09, 64, 0x00100000, 0x00000000 }, + { 0x00000a0a, 64, 0x00100000, 0x00000000 }, + { 0x00000352, 64, 0x00100000, 0x00000000 }, + { 0x00000353, 64, 0x00100000, 0x00000000 }, + { 0x00000358, 64, 0x00100000, 0x00000000 }, + { 0x00000359, 64, 0x00100000, 0x00000000 }, + { 0x00000370, 64, 0x00100000, 0x00000000 }, + { 0x00000371, 64, 0x00100000, 0x00000000 }, + { 0x00000372, 64, 0x00100000, 0x000fffff }, + { 0x00000366, 64, 0x00100000, 0x00000000 }, + { 0x00000367, 64, 0x00100000, 0x00000000 }, + { 0x00000368, 64, 0x00100000, 0x00000fff }, + { 0x00000623, 64, 0x00100000, 0x00000000 }, + { 0x00000624, 64, 0x00100000, 0x00000000 }, + { 0x0001e100, 1, 0x00000001, 0x02000001 }, + {} +}; + +static const struct gf100_gr_pack +gv100_grctx_pack_sw_veid_bundle_init[] = { + { gv100_grctx_init_sw_veid_bundle_init_0 }, + {} +}; + +static void +gv100_grctx_generate_attrib(struct gf100_grctx *info) +{ + struct gf100_gr *gr = info->gr; + const struct gf100_grctx_func *grctx = gr->func->grctx; + const u32 alpha = grctx->alpha_nr; + const u32 attrib = grctx->attrib_nr; + const u32 gfxp = grctx->gfxp_nr; + const int s = 12; + const int max_batches = 0xffff; + u32 size = grctx->alpha_nr_max * gr->tpc_total; + u32 ao = 0; + u32 bo = ao + size; + int gpc, ppc, b, n = 0; + + size += grctx->gfxp_nr * gr->tpc_total; + size = ((size * 0x20) + 128) & ~127; + b = mmio_vram(info, size, (1 << s), false); + + mmio_refn(info, 0x418810, 0x80000000, s, b); + mmio_refn(info, 0x419848, 0x10000000, s, b); + mmio_refn(info, 0x419c2c, 0x10000000, s, b); + mmio_refn(info, 0x419e00, 0x00000000, s, b); + mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7); + mmio_wr32(info, 0x405830, attrib); + mmio_wr32(info, 0x40585c, alpha); + mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { + const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; + const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; + const u32 gs = gfxp * gr->ppc_tpc_nr[gpc][ppc]; + const u32 u = 0x418ea0 + (n * 0x04); + const u32 o = PPC_UNIT(gpc, ppc, 0); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) + continue; + mmio_wr32(info, o + 0xc0, gs); + mmio_wr32(info, o + 0xf4, bo); + mmio_wr32(info, o + 0xf0, bs); + bo += gs; + mmio_wr32(info, o + 0xe4, as); + mmio_wr32(info, o + 0xf8, ao); + ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + mmio_wr32(info, u, bs); + } + } + + mmio_wr32(info, 0x4181e4, 0x00000100); + mmio_wr32(info, 0x41befc, 0x00000100); +} + +static void +gv100_grctx_generate_rop_mapping(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data; + int i, j; + + /* Pack tile map into register format. */ + nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0; i < 11; i++) { + for (data = 0, j = 0; j < 6; j++) + data |= (gr->tile[i * 6 + j] & 0x1f) << (j * 5); + nvkm_wr32(device, 0x418b08 + (i * 4), data); + nvkm_wr32(device, 0x41bf00 + (i * 4), data); + nvkm_wr32(device, 0x40780c + (i * 4), data); + } + + /* GPC_BROADCAST.TP_BROADCAST */ + nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); + for (i = 0, j = 1; i < 5; i++, j += 4) { + u8 v19 = (1 << (j + 0)) % gr->tpc_total; + u8 v20 = (1 << (j + 1)) % gr->tpc_total; + u8 v21 = (1 << (j + 2)) % gr->tpc_total; + u8 v22 = (1 << (j + 3)) % gr->tpc_total; + nvkm_wr32(device, 0x41bfb0 + (i * 4), (v22 << 24) | + (v21 << 16) | + (v20 << 8) | + v19); + } + + /* UNK78xx */ + nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) | + gr->screen_tile_row_offset); +} + +static void +gv100_grctx_generate_r400088(struct gf100_gr *gr, bool on) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x400088, 0x00060000, on ? 0x00060000 : 0x00000000); +} + +static void +gv100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm); +} + +static void +gv100_grctx_generate_unkn(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010); + nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004); + nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000); + nvkm_mask(device, 0x405800, 0x08000000, 0x08000000); + nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008); +} + +static void +gv100_grctx_unkn88c(struct gf100_gr *gr, bool on) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const u32 mask = 0x00000010, data = on ? mask : 0x00000000; + nvkm_mask(device, 0x40988c, mask, data); + nvkm_rd32(device, 0x40988c); + nvkm_mask(device, 0x41a88c, mask, data); + nvkm_rd32(device, 0x41a88c); + nvkm_mask(device, 0x408a14, mask, data); + nvkm_rd32(device, 0x408a14); +} + +const struct gf100_grctx_func +gv100_grctx = { + .unkn88c = gv100_grctx_unkn88c, + .main = gf100_grctx_generate_main, + .unkn = gv100_grctx_generate_unkn, + .sw_veid_bundle_init = gv100_grctx_pack_sw_veid_bundle_init, + .bundle = gm107_grctx_generate_bundle, + .bundle_size = 0x3000, + .bundle_min_gpm_fifo_depth = 0x180, + .bundle_token_limit = 0x1680, + .pagepool = gp100_grctx_generate_pagepool, + .pagepool_size = 0x20000, + .attrib = gv100_grctx_generate_attrib, + .attrib_nr_max = 0x6c0, + .attrib_nr = 0x480, + .alpha_nr_max = 0xc00, + .alpha_nr = 0x800, + .gfxp_nr = 0xd10, + .sm_id = gv100_grctx_generate_sm_id, + .rop_mapping = gv100_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, + .r406500 = gm200_grctx_generate_r406500, + .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr, + .smid_config = gp100_grctx_generate_smid_config, + .r400088 = gv100_grctx_generate_r400088, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index 2f8dc107047d..70d3d41e616c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -32,6 +32,7 @@ #include <subdev/fb.h> #include <subdev/mc.h> #include <subdev/pmu.h> +#include <subdev/therm.h> #include <subdev/timer.h> #include <engine/fifo.h> @@ -91,7 +92,7 @@ gf100_gr_zbc_color_get(struct gf100_gr *gr, int format, memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2)); gr->zbc_color[zbc].format = format; nvkm_ltc_zbc_color_get(ltc, zbc, l2); - gf100_gr_zbc_clear_color(gr, zbc); + gr->func->zbc->clear_color(gr, zbc); return zbc; } @@ -136,10 +137,16 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format, gr->zbc_depth[zbc].ds = ds; gr->zbc_depth[zbc].l2 = l2; nvkm_ltc_zbc_depth_get(ltc, zbc, l2); - gf100_gr_zbc_clear_depth(gr, zbc); + gr->func->zbc->clear_depth(gr, zbc); return zbc; } +const struct gf100_gr_func_zbc +gf100_gr_zbc = { + .clear_color = gf100_gr_zbc_clear_color, + .clear_depth = gf100_gr_zbc_clear_depth, +}; + /******************************************************************************* * Graphics object classes ******************************************************************************/ @@ -743,21 +750,31 @@ gf100_gr_zbc_init(struct gf100_gr *gr) const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 }; struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; - int index; + int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min; if (!gr->zbc_color[0].format) { - gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]); - gf100_gr_zbc_color_get(gr, 2, & one[0], &one[4]); - gf100_gr_zbc_color_get(gr, 4, &f32_0[0], &f32_0[4]); - gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]); - gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); - gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); - } - - for (index = ltc->zbc_min; index <= ltc->zbc_max; index++) - gf100_gr_zbc_clear_color(gr, index); - for (index = ltc->zbc_min; index <= ltc->zbc_max; index++) - gf100_gr_zbc_clear_depth(gr, index); + gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]); c++; + gf100_gr_zbc_color_get(gr, 2, & one[0], &one[4]); c++; + gf100_gr_zbc_color_get(gr, 4, &f32_0[0], &f32_0[4]); c++; + gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]); c++; + gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); d++; + gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); d++; + if (gr->func->zbc->stencil_get) { + gr->func->zbc->stencil_get(gr, 1, 0x00, 0x00); s++; + gr->func->zbc->stencil_get(gr, 1, 0x01, 0x01); s++; + gr->func->zbc->stencil_get(gr, 1, 0xff, 0xff); s++; + } + } + + for (index = c; index <= ltc->zbc_max; index++) + gr->func->zbc->clear_color(gr, index); + for (index = d; index <= ltc->zbc_max; index++) + gr->func->zbc->clear_depth(gr, index); + + if (gr->func->zbc->clear_stencil) { + for (index = s; index <= ltc->zbc_max; index++) + gr->func->zbc->clear_stencil(gr, index); + } } /** @@ -970,7 +987,7 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc) nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); } -static const struct nvkm_enum gf100_mp_warp_error[] = { +const struct nvkm_enum gf100_mp_warp_error[] = { { 0x01, "STACK_ERROR" }, { 0x02, "API_STACK_ERROR" }, { 0x03, "RET_EMPTY_STACK_ERROR" }, @@ -995,7 +1012,7 @@ static const struct nvkm_enum gf100_mp_warp_error[] = { {} }; -static const struct nvkm_bitfield gf100_mp_global_error[] = { +const struct nvkm_bitfield gf100_mp_global_error[] = { { 0x00000001, "SM_TO_SM_FAULT" }, { 0x00000002, "L1_ERROR" }, { 0x00000004, "MULTIPLE_WARP_ERRORS" }, @@ -1009,7 +1026,7 @@ static const struct nvkm_bitfield gf100_mp_global_error[] = { {} }; -static void +void gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc) { struct nvkm_subdev *subdev = &gr->base.engine.subdev; @@ -1045,7 +1062,7 @@ gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc) } if (stat & 0x00000002) { - gf100_gr_trap_mp(gr, gpc, tpc); + gr->func->trap_mp(gr, gpc, tpc); stat &= ~0x00000002; } @@ -1611,7 +1628,8 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr) /* load register lists */ gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000); - gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000); + gf100_gr_init_csdata(gr, grctx->gpc_0, 0x41a000, 0x000, 0x418000); + gf100_gr_init_csdata(gr, grctx->gpc_1, 0x41a000, 0x000, 0x418000); gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800); gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00); @@ -1651,6 +1669,97 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr) return ret; } +void +gf100_gr_oneinit_sm_id(struct gf100_gr *gr) +{ + int tpc, gpc; + for (tpc = 0; tpc < gr->tpc_max; tpc++) { + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + if (tpc < gr->tpc_nr[gpc]) { + gr->sm[gr->sm_nr].gpc = gpc; + gr->sm[gr->sm_nr].tpc = tpc; + gr->sm_nr++; + } + } + } +} + +void +gf100_gr_oneinit_tiles(struct gf100_gr *gr) +{ + static const u8 primes[] = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 + }; + int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j; + u32 mul_factor, comm_denom; + u8 gpc_map[GPC_MAX]; + bool sorted; + + switch (gr->tpc_total) { + case 15: gr->screen_tile_row_offset = 0x06; break; + case 14: gr->screen_tile_row_offset = 0x05; break; + case 13: gr->screen_tile_row_offset = 0x02; break; + case 11: gr->screen_tile_row_offset = 0x07; break; + case 10: gr->screen_tile_row_offset = 0x06; break; + case 7: + case 5: gr->screen_tile_row_offset = 0x01; break; + case 3: gr->screen_tile_row_offset = 0x02; break; + case 2: + case 1: gr->screen_tile_row_offset = 0x01; break; + default: gr->screen_tile_row_offset = 0x03; + for (i = 0; i < ARRAY_SIZE(primes); i++) { + if (gr->tpc_total % primes[i]) { + gr->screen_tile_row_offset = primes[i]; + break; + } + } + break; + } + + /* Sort GPCs by TPC count, highest-to-lowest. */ + for (i = 0; i < gr->gpc_nr; i++) + gpc_map[i] = i; + sorted = false; + + while (!sorted) { + for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) { + if (gr->tpc_nr[gpc_map[i + 1]] > + gr->tpc_nr[gpc_map[i + 0]]) { + u8 swap = gpc_map[i]; + gpc_map[i + 0] = gpc_map[i + 1]; + gpc_map[i + 1] = swap; + sorted = false; + } + } + } + + /* Determine tile->GPC mapping */ + mul_factor = gr->gpc_nr * gr->tpc_max; + if (mul_factor & 1) + mul_factor = 2; + else + mul_factor = 1; + + comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor; + + for (i = 0; i < gr->gpc_nr; i++) { + init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor; + init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2; + run_err[i] = init_frac[i] + init_err[i]; + } + + for (i = 0; i < gr->tpc_total;) { + for (j = 0; j < gr->gpc_nr; j++) { + if ((run_err[j] * 2) >= comm_denom) { + gr->tile[i++] = gpc_map[j]; + run_err[j] += init_frac[j] - comm_denom; + } else { + run_err[j] += init_frac[j]; + } + } + } +} + static int gf100_gr_oneinit(struct nvkm_gr *base) { @@ -1674,55 +1783,27 @@ gf100_gr_oneinit(struct nvkm_gr *base) gr->gpc_nr = nvkm_rd32(device, 0x409604) & 0x0000001f; for (i = 0; i < gr->gpc_nr; i++) { gr->tpc_nr[i] = nvkm_rd32(device, GPC_UNIT(i, 0x2608)); + gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]); gr->tpc_total += gr->tpc_nr[i]; gr->ppc_nr[i] = gr->func->ppc_nr; for (j = 0; j < gr->ppc_nr[i]; j++) { - u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4))); - if (mask) - gr->ppc_mask[i] |= (1 << j); - gr->ppc_tpc_nr[i][j] = hweight8(mask); - } - } - - /*XXX: these need figuring out... though it might not even matter */ - switch (device->chipset) { - case 0xc0: - if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */ - gr->screen_tile_row_offset = 0x07; - } else - if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */ - gr->screen_tile_row_offset = 0x05; - } else - if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */ - gr->screen_tile_row_offset = 0x06; + gr->ppc_tpc_mask[i][j] = + nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4))); + if (gr->ppc_tpc_mask[i][j] == 0) + continue; + gr->ppc_mask[i] |= (1 << j); + gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]); + if (gr->ppc_tpc_min == 0 || + gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j]) + gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j]; + if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j]) + gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j]; } - break; - case 0xc3: /* 450, 4/0/0/0, 2 */ - gr->screen_tile_row_offset = 0x03; - break; - case 0xc4: /* 460, 3/4/0/0, 4 */ - gr->screen_tile_row_offset = 0x01; - break; - case 0xc1: /* 2/0/0/0, 1 */ - gr->screen_tile_row_offset = 0x01; - break; - case 0xc8: /* 4/4/3/4, 5 */ - gr->screen_tile_row_offset = 0x06; - break; - case 0xce: /* 4/4/0/0, 4 */ - gr->screen_tile_row_offset = 0x03; - break; - case 0xcf: /* 4/0/0/0, 3 */ - gr->screen_tile_row_offset = 0x03; - break; - case 0xd7: - case 0xd9: /* 1/0/0/0, 1 */ - case 0xea: /* gk20a */ - case 0x12b: /* gm20b */ - gr->screen_tile_row_offset = 0x01; - break; } + memset(gr->tile, 0xff, sizeof(gr->tile)); + gr->func->oneinit_tiles(gr); + gr->func->oneinit_sm_id(gr); return 0; } @@ -1914,13 +1995,68 @@ gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device, } void +gf100_gr_init_400054(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x34ce3464); +} + +void +gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f); +} + +void +gf100_gr_init_tex_hww_esr(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); +} + +void +gf100_gr_init_419eb4(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); +} + +void +gf100_gr_init_419cc0(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int gpc, tpc; + + nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); + } +} + +void +gf100_gr_init_40601c(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x40601c, 0xc0000000); +} + +void +gf100_gr_init_fecs_exceptions(struct gf100_gr *gr) +{ + const u32 data = gr->firmware ? 0x000e0000 : 0x000e0001; + nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, data); +} + +void gf100_gr_init_gpc_mmu(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_fb *fb = device->fb; nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0x00000001); - nvkm_wr32(device, 0x4188a4, 0x00000000); + nvkm_wr32(device, 0x4188a4, 0x03000000); nvkm_wr32(device, 0x418888, 0x00000000); nvkm_wr32(device, 0x41888c, 0x00000000); nvkm_wr32(device, 0x418890, 0x00000000); @@ -1929,37 +2065,30 @@ gf100_gr_init_gpc_mmu(struct gf100_gr *gr) nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(fb->mmu_rd) >> 8); } -int -gf100_gr_init(struct gf100_gr *gr) +void +gf100_gr_init_num_active_ltcs(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->func->mmio); - - nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); - - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; + nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); +} - data[i / 8] |= tpc << ((i % 8) * 4); +void +gf100_gr_init_zcull(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); + const u8 tile_nr = ALIGN(gr->tpc_total, 32); + u8 bank[GPC_MAX] = {}, gpc, i, j; + u32 data; + + for (i = 0; i < tile_nr; i += 8) { + for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) { + data |= bank[gr->tile[i + j]] << (j * 4); + bank[gr->tile[i + j]]++; + } + nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data); } - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); @@ -1968,29 +2097,88 @@ gf100_gr_init(struct gf100_gr *gr) nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); } - if (device->chipset != 0xd7) - nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918); + nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918); +} + +void +gf100_gr_init_vsc_stream_master(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001); +} + +int +gf100_gr_init(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int gpc, tpc, rop; + + if (gr->func->init_419bd8) + gr->func->init_419bd8(gr); + + gr->func->init_gpc_mmu(gr); + + if (gr->fuc_sw_nonctx) + gf100_gr_mmio(gr, gr->fuc_sw_nonctx); else - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); + gf100_gr_mmio(gr, gr->func->mmio); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); + gf100_gr_wait_idle(gr); + + if (gr->func->init_r405a14) + gr->func->init_r405a14(gr); + + if (gr->func->clkgate_pack) + nvkm_therm_clkgate_init(device->therm, gr->func->clkgate_pack); + + if (gr->func->init_bios) + gr->func->init_bios(gr); + + gr->func->init_vsc_stream_master(gr); + gr->func->init_zcull(gr); + gr->func->init_num_active_ltcs(gr); + if (gr->func->init_rop_active_fbps) + gr->func->init_rop_active_fbps(gr); + if (gr->func->init_bios_2) + gr->func->init_bios_2(gr); + if (gr->func->init_swdx_pes_mask) + gr->func->init_swdx_pes_mask(gr); nvkm_wr32(device, 0x400500, 0x00010001); nvkm_wr32(device, 0x400100, 0xffffffff); nvkm_wr32(device, 0x40013c, 0xffffffff); + nvkm_wr32(device, 0x400124, 0x00000002); + + gr->func->init_fecs_exceptions(gr); + if (gr->func->init_ds_hww_esr_2) + gr->func->init_ds_hww_esr_2(gr); - nvkm_wr32(device, 0x409c24, 0x000f0000); nvkm_wr32(device, 0x404000, 0xc0000000); nvkm_wr32(device, 0x404600, 0xc0000000); nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x40601c, 0xc0000000); + + if (gr->func->init_40601c) + gr->func->init_40601c(gr); + nvkm_wr32(device, 0x404490, 0xc0000000); nvkm_wr32(device, 0x406018, 0xc0000000); + + if (gr->func->init_sked_hww_esr) + gr->func->init_sked_hww_esr(gr); + nvkm_wr32(device, 0x405840, 0xc0000000); nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); + + if (gr->func->init_419cc0) + gr->func->init_419cc0(gr); + if (gr->func->init_419eb4) + gr->func->init_419eb4(gr); + if (gr->func->init_419c9c) + gr->func->init_419c9c(gr); + + if (gr->func->init_ppc_exceptions) + gr->func->init_ppc_exceptions(gr); for (gpc = 0; gpc < gr->gpc_nr; gpc++) { nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); @@ -2000,19 +2188,20 @@ gf100_gr_init(struct gf100_gr *gr) for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); + if (gr->func->init_tex_hww_esr) + gr->func->init_tex_hww_esr(gr, gpc, tpc); nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f); + if (gr->func->init_504430) + gr->func->init_504430(gr, gpc, tpc); + gr->func->init_shader_exceptions(gr, gpc, tpc); } nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); } for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000); + nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); + nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); } @@ -2024,10 +2213,14 @@ gf100_gr_init(struct gf100_gr *gr) nvkm_wr32(device, 0x40011c, 0xffffffff); nvkm_wr32(device, 0x400134, 0xffffffff); - nvkm_wr32(device, 0x400054, 0x34ce3464); + if (gr->func->init_400054) + gr->func->init_400054(gr); gf100_gr_zbc_init(gr); + if (gr->func->init_4188a4) + gr->func->init_4188a4(gr); + return gf100_gr_init_ctxctl(gr); } @@ -2053,13 +2246,27 @@ gf100_gr_gpccs_ucode = { static const struct gf100_gr_func gf100_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf100_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf100_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index c8ec3fd97155..dc46cf0131db 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -72,6 +72,12 @@ struct gf100_gr_zbc_depth { u32 l2; }; +struct gf100_gr_zbc_stencil { + u32 format; + u32 ds; + u32 l2; +}; + struct gf100_gr { const struct gf100_gr_func *func; struct nvkm_gr base; @@ -95,21 +101,33 @@ struct gf100_gr { struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT]; struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT]; + struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT]; u8 rop_nr; u8 gpc_nr; u8 tpc_nr[GPC_MAX]; + u8 tpc_max; u8 tpc_total; u8 ppc_nr[GPC_MAX]; u8 ppc_mask[GPC_MAX]; + u8 ppc_tpc_mask[GPC_MAX][4]; u8 ppc_tpc_nr[GPC_MAX][4]; + u8 ppc_tpc_min; + u8 ppc_tpc_max; + + u8 screen_tile_row_offset; + u8 tile[TPC_MAX]; + + struct { + u8 gpc; + u8 tpc; + } sm[TPC_MAX]; + u8 sm_nr; struct gf100_gr_data mmio_data[4]; struct gf100_gr_mmio mmio_list[4096/8]; u32 size; u32 *data; - - u8 screen_tile_row_offset; }; int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *, @@ -118,14 +136,43 @@ int gf100_gr_new_(const struct gf100_gr_func *, struct nvkm_device *, int, struct nvkm_gr **); void *gf100_gr_dtor(struct nvkm_gr *); +struct gf100_gr_func_zbc { + void (*clear_color)(struct gf100_gr *, int zbc); + void (*clear_depth)(struct gf100_gr *, int zbc); + int (*stencil_get)(struct gf100_gr *, int format, + const u32 ds, const u32 l2); + void (*clear_stencil)(struct gf100_gr *, int zbc); +}; + struct gf100_gr_func { void (*dtor)(struct gf100_gr *); + void (*oneinit_tiles)(struct gf100_gr *); + void (*oneinit_sm_id)(struct gf100_gr *); int (*init)(struct gf100_gr *); + void (*init_419bd8)(struct gf100_gr *); void (*init_gpc_mmu)(struct gf100_gr *); + void (*init_r405a14)(struct gf100_gr *); + void (*init_bios)(struct gf100_gr *); + void (*init_vsc_stream_master)(struct gf100_gr *); + void (*init_zcull)(struct gf100_gr *); + void (*init_num_active_ltcs)(struct gf100_gr *); void (*init_rop_active_fbps)(struct gf100_gr *); - void (*init_ppc_exceptions)(struct gf100_gr *); + void (*init_bios_2)(struct gf100_gr *); void (*init_swdx_pes_mask)(struct gf100_gr *); - void (*init_num_active_ltcs)(struct gf100_gr *); + void (*init_fecs_exceptions)(struct gf100_gr *); + void (*init_ds_hww_esr_2)(struct gf100_gr *); + void (*init_40601c)(struct gf100_gr *); + void (*init_sked_hww_esr)(struct gf100_gr *); + void (*init_419cc0)(struct gf100_gr *); + void (*init_419eb4)(struct gf100_gr *); + void (*init_419c9c)(struct gf100_gr *); + void (*init_ppc_exceptions)(struct gf100_gr *); + void (*init_tex_hww_esr)(struct gf100_gr *, int gpc, int tpc); + void (*init_504430)(struct gf100_gr *, int gpc, int tpc); + void (*init_shader_exceptions)(struct gf100_gr *, int gpc, int tpc); + void (*init_400054)(struct gf100_gr *); + void (*init_4188a4)(struct gf100_gr *); + void (*trap_mp)(struct gf100_gr *, int gpc, int tpc); void (*set_hww_esr_report_mask)(struct gf100_gr *); const struct gf100_gr_pack *mmio; struct { @@ -135,26 +182,60 @@ struct gf100_gr_func { struct gf100_gr_ucode *ucode; } gpccs; int (*rops)(struct gf100_gr *); + int gpc_nr; + int tpc_nr; int ppc_nr; const struct gf100_grctx_func *grctx; const struct nvkm_therm_clkgate_pack *clkgate_pack; + const struct gf100_gr_func_zbc *zbc; struct nvkm_sclass sclass[]; }; -int gf100_gr_init(struct gf100_gr *); int gf100_gr_rops(struct gf100_gr *); - -int gk104_gr_init(struct gf100_gr *); +void gf100_gr_oneinit_tiles(struct gf100_gr *); +void gf100_gr_oneinit_sm_id(struct gf100_gr *); +int gf100_gr_init(struct gf100_gr *); +void gf100_gr_init_vsc_stream_master(struct gf100_gr *); +void gf100_gr_init_zcull(struct gf100_gr *); +void gf100_gr_init_num_active_ltcs(struct gf100_gr *); +void gf100_gr_init_fecs_exceptions(struct gf100_gr *); +void gf100_gr_init_40601c(struct gf100_gr *); +void gf100_gr_init_419cc0(struct gf100_gr *); +void gf100_gr_init_419eb4(struct gf100_gr *); +void gf100_gr_init_tex_hww_esr(struct gf100_gr *, int, int); +void gf100_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gf100_gr_init_400054(struct gf100_gr *); +extern const struct gf100_gr_func_zbc gf100_gr_zbc; + +void gf117_gr_init_zcull(struct gf100_gr *); + +void gk104_gr_init_vsc_stream_master(struct gf100_gr *); void gk104_gr_init_rop_active_fbps(struct gf100_gr *); void gk104_gr_init_ppc_exceptions(struct gf100_gr *); +void gk104_gr_init_sked_hww_esr(struct gf100_gr *); + +void gk110_gr_init_419eb4(struct gf100_gr *); + +void gm107_gr_init_504430(struct gf100_gr *, int, int); +void gm107_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gm107_gr_init_400054(struct gf100_gr *); int gk20a_gr_init(struct gf100_gr *); -int gm200_gr_init(struct gf100_gr *); +void gm200_gr_oneinit_tiles(struct gf100_gr *); +void gm200_gr_oneinit_sm_id(struct gf100_gr *); int gm200_gr_rops(struct gf100_gr *); +void gm200_gr_init_num_active_ltcs(struct gf100_gr *); +void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *); -int gp100_gr_init(struct gf100_gr *); void gp100_gr_init_rop_active_fbps(struct gf100_gr *); +void gp100_gr_init_fecs_exceptions(struct gf100_gr *); +void gp100_gr_init_shader_exceptions(struct gf100_gr *, int, int); +void gp100_gr_zbc_clear_color(struct gf100_gr *, int); +void gp100_gr_zbc_clear_depth(struct gf100_gr *, int); + +void gp102_gr_init_swdx_pes_mask(struct gf100_gr *); +extern const struct gf100_gr_func_zbc gp102_gr_zbc; #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object) #include <core/object.h> @@ -187,7 +268,7 @@ extern const struct nvkm_object_func gf100_fermi; struct gf100_gr_init { u32 addr; u8 count; - u8 pitch; + u32 pitch; u32 data; }; @@ -257,6 +338,9 @@ extern const struct gf100_gr_init gf100_gr_init_be_0[]; extern const struct gf100_gr_init gf100_gr_init_fe_1[]; extern const struct gf100_gr_init gf100_gr_init_pe_1[]; void gf100_gr_init_gpc_mmu(struct gf100_gr *); +void gf100_gr_trap_mp(struct gf100_gr *, int, int); +extern const struct nvkm_bitfield gf100_mp_global_error[]; +extern const struct nvkm_enum gf100_mp_warp_error[]; extern const struct gf100_gr_init gf104_gr_init_ds_0[]; extern const struct gf100_gr_init gf104_gr_init_tex_0[]; @@ -279,6 +363,7 @@ extern const struct gf100_gr_init gf117_gr_init_wwdx_0[]; extern const struct gf100_gr_init gf117_gr_init_cbm_0[]; extern const struct gf100_gr_init gk104_gr_init_main_0[]; +extern const struct gf100_gr_init gk104_gr_init_gpc_unk_2[]; extern const struct gf100_gr_init gk104_gr_init_tpccs_0[]; extern const struct gf100_gr_init gk104_gr_init_pe_0[]; extern const struct gf100_gr_init gk104_gr_init_be_0[]; @@ -306,8 +391,4 @@ extern const struct gf100_gr_init gm107_gr_init_cbm_0[]; void gm107_gr_init_bios(struct gf100_gr *); void gm200_gr_init_gpc_mmu(struct gf100_gr *); - -void gp100_gr_init_num_active_ltcs(struct gf100_gr *gr); - -void gp102_gr_init_swdx_pes_mask(struct gf100_gr *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c index ec0f11983b23..42c2fd9fc04e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c @@ -114,13 +114,27 @@ gf104_gr_pack_mmio[] = { static const struct gf100_gr_func gf104_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf104_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf104_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c index cc152eb74123..4731a460adc7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c @@ -103,15 +103,36 @@ gf108_gr_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ +static void +gf108_gr_init_r405a14(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x405a14, 0x80000000); +} + static const struct gf100_gr_func gf108_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_r405a14 = gf108_gr_init_r405a14, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf108_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf108_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c index 10d2d73ca8c3..cdf759c8cd7f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c @@ -86,13 +86,27 @@ gf110_gr_pack_mmio[] = { static const struct gf100_gr_func gf110_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf110_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf110_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c index ac09a07c4150..a4158f84c649 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c @@ -120,16 +120,58 @@ gf117_gr_gpccs_ucode = { .data.size = sizeof(gf117_grgpc_data), }; +void +gf117_gr_init_zcull(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); + const u8 tile_nr = ALIGN(gr->tpc_total, 32); + u8 bank[GPC_MAX] = {}, gpc, i, j; + u32 data; + + for (i = 0; i < tile_nr; i += 8) { + for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) { + data |= bank[gr->tile[i + j]] << (j * 4); + bank[gr->tile[i + j]]++; + } + nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data); + } + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), + gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | + gr->tpc_total); + nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); + } + + nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); +} + static const struct gf100_gr_func gf117_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf117_gr_pack_mmio, .fecs.ucode = &gf117_gr_fecs_ucode, .gpccs.ucode = &gf117_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 1, .grctx = &gf117_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c index 7f449ec6f760..4197844870b3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c @@ -177,13 +177,27 @@ gf119_gr_pack_mmio[] = { static const struct gf100_gr_func gf119_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gf100_gr_init_vsc_stream_master, + .init_zcull = gf100_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_40601c = gf100_gr_init_40601c, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gf119_gr_pack_mmio, .fecs.ucode = &gf100_gr_fecs_ucode, .gpccs.ucode = &gf100_gr_gpccs_ucode, .rops = gf100_gr_rops, .grctx = &gf119_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c index 1b52fcb2c49a..477fee3e3715 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c @@ -83,6 +83,12 @@ gk104_gr_init_gpc_unk_1[] = { }; const struct gf100_gr_init +gk104_gr_init_gpc_unk_2[] = { + { 0x418884, 1, 0x04, 0x00000000 }, + {} +}; + +const struct gf100_gr_init gk104_gr_init_tpccs_0[] = { { 0x419d0c, 1, 0x04, 0x00000000 }, { 0x419d10, 1, 0x04, 0x00000014 }, @@ -160,6 +166,7 @@ gk104_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk104_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gf119_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -381,6 +388,21 @@ gk104_clkgate_pack[] = { ******************************************************************************/ void +gk104_gr_init_sked_hww_esr(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x407020, 0x40000000); +} + +static void +gk104_gr_init_fecs_exceptions(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, 0x409ffc, 0x00000000); + nvkm_wr32(device, 0x409c14, 0x00003e3e); + nvkm_wr32(device, 0x409c24, 0x000f0001); +} + +void gk104_gr_init_rop_active_fbps(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -404,112 +426,11 @@ gk104_gr_init_ppc_exceptions(struct gf100_gr *gr) } } -int -gk104_gr_init(struct gf100_gr *gr) +void +gk104_gr_init_vsc_stream_master(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->func->mmio); - if (gr->func->clkgate_pack) - nvkm_therm_clkgate_init(gr->base.engine.subdev.device->therm, - gr->func->clkgate_pack); - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - - gr->func->init_rop_active_fbps(gr); - - nvkm_wr32(device, 0x400500, 0x00010001); - - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - - nvkm_wr32(device, 0x409ffc, 0x00000000); - nvkm_wr32(device, 0x409c14, 0x00003e3e); - nvkm_wr32(device, 0x409c24, 0x000f0001); - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); - - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } - - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); - } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - nvkm_wr32(device, 0x400054, 0x34ce3464); - - gf100_gr_zbc_init(gr); - - return gf100_gr_init_ctxctl(gr); } #include "fuc/hubgk104.fuc3.h" @@ -534,10 +455,23 @@ gk104_gr_gpccs_ucode = { static const struct gf100_gr_func gk104_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gk104_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gf100_gr_init_419eb4, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk104_gr_pack_mmio, .fecs.ucode = &gk104_gr_fecs_ucode, .gpccs.ucode = &gk104_gr_gpccs_ucode, @@ -545,6 +479,7 @@ gk104_gr = { .ppc_nr = 1, .grctx = &gk104_grctx, .clkgate_pack = gk104_clkgate_pack, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c index 4da916a9fc73..7cd628c84e07 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c @@ -143,6 +143,7 @@ gk110_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk110_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gk110_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -334,12 +335,39 @@ gk110_gr_gpccs_ucode = { .data.size = sizeof(gk110_grgpc_data), }; +void +gk110_gr_init_419eb4(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000); + nvkm_mask(device, 0x419eb4, 0x00002000, 0x00002000); + nvkm_mask(device, 0x419eb4, 0x00004000, 0x00004000); + nvkm_mask(device, 0x419eb4, 0x00008000, 0x00008000); + nvkm_mask(device, 0x419eb4, 0x00001000, 0x00000000); + nvkm_mask(device, 0x419eb4, 0x00002000, 0x00000000); + nvkm_mask(device, 0x419eb4, 0x00004000, 0x00000000); + nvkm_mask(device, 0x419eb4, 0x00008000, 0x00000000); +} + static const struct gf100_gr_func gk110_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gk110_gr_init_419eb4, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk110_gr_pack_mmio, .fecs.ucode = &gk110_gr_fecs_ucode, .gpccs.ucode = &gk110_gr_gpccs_ucode, @@ -347,6 +375,7 @@ gk110_gr = { .ppc_nr = 2, .grctx = &gk110_grctx, .clkgate_pack = gk110_clkgate_pack, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c index 1912c0bfd7ee..a38faa215635 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c @@ -82,6 +82,7 @@ gk110b_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk110_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gk110_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -102,16 +103,30 @@ gk110b_gr_pack_mmio[] = { static const struct gf100_gr_func gk110b_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419eb4 = gk110_gr_init_419eb4, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk110b_gr_pack_mmio, .fecs.ucode = &gk110_gr_fecs_ucode, .gpccs.ucode = &gk110_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 2, .grctx = &gk110b_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c index 1fc258163f25..58456660e603 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c @@ -121,6 +121,7 @@ gk208_gr_pack_mmio[] = { { gf119_gr_init_gpm_0 }, { gk110_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gk104_gr_init_tpccs_0 }, { gk208_gr_init_tex_0 }, { gk104_gr_init_pe_0 }, @@ -161,16 +162,29 @@ gk208_gr_gpccs_ucode = { static const struct gf100_gr_func gk208_gr = { - .init = gk104_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gf100_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_shader_exceptions = gf100_gr_init_shader_exceptions, + .init_400054 = gf100_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gk208_gr_pack_mmio, .fecs.ucode = &gk208_gr_fecs_ucode, .gpccs.ucode = &gk208_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 1, .grctx = &gk208_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index de8b806b88fd..500cb08dd608 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -219,11 +219,7 @@ int gk20a_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc; - int ret, i; + int ret; /* Clear SCC RAM */ nvkm_wr32(device, 0x40802c, 0x1); @@ -246,31 +242,7 @@ gk20a_gr_init(struct gf100_gr *gr) nvkm_mask(device, 0x503018, 0x1, 0x1); /* Zcull init */ - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); + gr->func->init_zcull(gr); gr->func->init_rop_active_fbps(gr); @@ -310,12 +282,17 @@ gk20a_gr_init(struct gf100_gr *gr) static const struct gf100_gr_func gk20a_gr = { + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, .init = gk20a_gr_init, + .init_zcull = gf117_gr_init_zcull, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .trap_mp = gf100_gr_trap_mp, .set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask, .rops = gf100_gr_rops, .ppc_nr = 1, .grctx = &gk20a_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_A }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c index 2c67fac576d1..92e31d397207 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c @@ -25,6 +25,8 @@ #include "ctxgf100.h" #include <subdev/bios.h> +#include <subdev/bios/bit.h> +#include <subdev/bios/init.h> #include <subdev/bios/P0260.h> #include <subdev/fb.h> @@ -36,6 +38,10 @@ static const struct gf100_gr_init gm107_gr_init_main_0[] = { + { 0x40880c, 1, 0x04, 0x00000000 }, + { 0x408910, 1, 0x04, 0x00000000 }, + { 0x408984, 1, 0x04, 0x00000000 }, + { 0x41a8a0, 1, 0x04, 0x00000000 }, { 0x400080, 1, 0x04, 0x003003c2 }, { 0x400088, 1, 0x04, 0x0001bfe7 }, { 0x40008c, 1, 0x04, 0x00060000 }, @@ -210,14 +216,13 @@ gm107_gr_init_cbm_0[] = { static const struct gf100_gr_init gm107_gr_init_be_0[] = { { 0x408890, 1, 0x04, 0x000000ff }, - { 0x40880c, 1, 0x04, 0x00000000 }, { 0x408850, 1, 0x04, 0x00000004 }, { 0x408878, 1, 0x04, 0x00c81603 }, { 0x40887c, 1, 0x04, 0x80543432 }, { 0x408880, 1, 0x04, 0x0010581e }, { 0x408884, 1, 0x04, 0x00001205 }, { 0x408974, 1, 0x04, 0x000000ff }, - { 0x408910, 9, 0x04, 0x00000000 }, + { 0x408914, 8, 0x04, 0x00000000 }, { 0x408950, 1, 0x04, 0x00000000 }, { 0x408954, 1, 0x04, 0x0000ffff }, { 0x408958, 1, 0x04, 0x00000034 }, @@ -227,7 +232,6 @@ gm107_gr_init_be_0[] = { { 0x408968, 1, 0x04, 0x02808833 }, { 0x40896c, 1, 0x04, 0x01f02438 }, { 0x408970, 1, 0x04, 0x00012c00 }, - { 0x408984, 1, 0x04, 0x00000000 }, { 0x408988, 1, 0x04, 0x08040201 }, { 0x40898c, 1, 0x04, 0x80402010 }, {} @@ -260,6 +264,7 @@ gm107_gr_pack_mmio[] = { { gf100_gr_init_gpm_0 }, { gm107_gr_init_gpc_unk_1 }, { gf100_gr_init_gcc_0 }, + { gk104_gr_init_gpc_unk_2 }, { gm107_gr_init_tpccs_0 }, { gm107_gr_init_tex_0 }, { gm107_gr_init_pe_0 }, @@ -280,6 +285,52 @@ gm107_gr_pack_mmio[] = { ******************************************************************************/ void +gm107_gr_init_400054(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x2c350f63); +} + +void +gm107_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005); +} + +void +gm107_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); +} + +static void +gm107_gr_init_bios_2(struct gf100_gr *gr) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + struct nvkm_bios *bios = device->bios; + struct bit_entry bit_P; + if (!bit_entry(bios, 'P', &bit_P) && + bit_P.version == 2 && bit_P.length >= 0x2c) { + u32 data = nvbios_rd32(bios, bit_P.offset + 0x28); + if (data) { + u8 ver = nvbios_rd08(bios, data + 0x00); + u8 hdr = nvbios_rd08(bios, data + 0x01); + if (ver == 0x20 && hdr >= 8) { + data = nvbios_rd32(bios, data + 0x04); + if (data) { + u32 save = nvkm_rd32(device, 0x619444); + nvbios_init(subdev, data); + nvkm_wr32(device, 0x619444, save); + } + } + } + } +} + +void gm107_gr_init_bios(struct gf100_gr *gr) { static const struct { @@ -308,115 +359,17 @@ gm107_gr_init_bios(struct gf100_gr *gr) } } -static int -gm107_gr_init(struct gf100_gr *gr) +static void +gm107_gr_init_gpc_mmu(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; struct nvkm_fb *fb = device->fb; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000); nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000); nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000); nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8); nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8); - - gf100_gr_mmio(gr, gr->func->mmio); - - gm107_gr_init_bios(gr); - - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - - gr->func->init_rop_active_fbps(gr); - - nvkm_wr32(device, 0x400500, 0x00010001); - - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - nvkm_wr32(device, 0x400124, 0x00000002); - nvkm_wr32(device, 0x409c24, 0x000e0000); - - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } - - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); - } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - nvkm_wr32(device, 0x400054, 0x2c350f63); - - gf100_gr_zbc_init(gr); - - return gf100_gr_init_ctxctl(gr); } #include "fuc/hubgm107.fuc5.h" @@ -441,15 +394,32 @@ gm107_gr_gpccs_ucode = { static const struct gf100_gr_func gm107_gr = { - .init = gm107_gr_init, + .oneinit_tiles = gf100_gr_oneinit_tiles, + .oneinit_sm_id = gf100_gr_oneinit_sm_id, + .init = gf100_gr_init, + .init_gpc_mmu = gm107_gr_init_gpc_mmu, + .init_bios = gm107_gr_init_bios, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .init_bios_2 = gm107_gr_init_bios_2, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gm107_gr_init_shader_exceptions, + .init_400054 = gm107_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .mmio = gm107_gr_pack_mmio, .fecs.ucode = &gm107_gr_fecs_ucode, .gpccs.ucode = &gm107_gr_gpccs_ucode, .rops = gf100_gr_rops, .ppc_nr = 2, .grctx = &gm107_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c index 6435f1257572..eff30662b984 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c @@ -39,6 +39,22 @@ gm200_gr_rops(struct gf100_gr *gr) } void +gm200_gr_init_ds_hww_esr_2(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, 0x405848, 0xc0000000); + nvkm_mask(device, 0x40584c, 0x00000001, 0x00000001); +} + +void +gm200_gr_init_num_active_ltcs(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); + nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804)); +} + +void gm200_gr_init_gpc_mmu(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -61,111 +77,51 @@ gm200_gr_init_rop_active_fbps(struct gf100_gr *gr) nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */ } -int -gm200_gr_init(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->fuc_sw_nonctx); - - gm107_gr_init_bios(gr); - - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } +static u8 +gm200_gr_tile_map_6_24[] = { + 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, +}; - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804)); +static u8 +gm200_gr_tile_map_4_16[] = { + 0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0, +}; - gr->func->init_rop_active_fbps(gr); +static u8 +gm200_gr_tile_map_2_8[] = { + 0, 1, 1, 0, 0, 1, 1, 0, +}; - nvkm_wr32(device, 0x400500, 0x00010001); - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - nvkm_wr32(device, 0x400124, 0x00000002); - nvkm_wr32(device, 0x409c24, 0x000e0000); - nvkm_wr32(device, 0x405848, 0xc0000000); - nvkm_wr32(device, 0x40584c, 0x00000001); - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); - - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } +void +gm200_gr_oneinit_sm_id(struct gf100_gr *gr) +{ + /*XXX: There's a different algorithm here I've not yet figured out. */ + gf100_gr_oneinit_sm_id(gr); +} - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); +void +gm200_gr_oneinit_tiles(struct gf100_gr *gr) +{ + /*XXX: Not sure what this is about. The algorithm from NVGPU + * seems to work for all boards I tried from earlier (and + * later) GPUs except in these specific configurations. + * + * Let's just hardcode them for now. + */ + if (gr->gpc_nr == 2 && gr->tpc_total == 8) { + memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total); + gr->screen_tile_row_offset = 1; + } else + if (gr->gpc_nr == 4 && gr->tpc_total == 16) { + memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total); + gr->screen_tile_row_offset = 4; + } else + if (gr->gpc_nr == 6 && gr->tpc_total == 24) { + memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total); + gr->screen_tile_row_offset = 5; + } else { + gf100_gr_oneinit_tiles(gr); } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - nvkm_wr32(device, 0x400054, 0x2c350f63); - - gf100_gr_zbc_init(gr); - - return gf100_gr_init_ctxctl(gr); } int @@ -208,13 +164,30 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device, static const struct gf100_gr_func gm200_gr = { - .init = gm200_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_bios = gm107_gr_init_bios, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gm200_gr_init_rop_active_fbps, + .init_fecs_exceptions = gf100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gm107_gr_init_shader_exceptions, + .init_400054 = gm107_gr_init_400054, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .tpc_nr = 4, .ppc_nr = 2, .grctx = &gm200_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c index 69479af1d829..a667770ce3cb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c @@ -64,13 +64,18 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr) static const struct gf100_gr_func gm20b_gr = { + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, .init = gk20a_gr_init, + .init_zcull = gf117_gr_init_zcull, .init_gpc_mmu = gm20b_gr_init_gpc_mmu, .init_rop_active_fbps = gk104_gr_init_rop_active_fbps, + .trap_mp = gf100_gr_trap_mp, .set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask, .rops = gm200_gr_rops, .ppc_nr = 1, .grctx = &gm20b_grctx, + .zbc = &gf100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c index 867a5f7cc5bc..9d0521ce309a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c @@ -29,143 +29,103 @@ /******************************************************************************* * PGRAPH engine/subdev functions ******************************************************************************/ - void -gp100_gr_init_rop_active_fbps(struct gf100_gr *gr) +gp100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc) { struct nvkm_device *device = gr->base.engine.subdev.device; - /*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */ - const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f; - nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */ - nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */ + const int znum = zbc - 1; + const u32 zoff = znum * 4; + + if (gr->zbc_color[zbc].format) { + nvkm_wr32(device, 0x418010 + zoff, gr->zbc_color[zbc].ds[0]); + nvkm_wr32(device, 0x41804c + zoff, gr->zbc_color[zbc].ds[1]); + nvkm_wr32(device, 0x418088 + zoff, gr->zbc_color[zbc].ds[2]); + nvkm_wr32(device, 0x4180c4 + zoff, gr->zbc_color[zbc].ds[3]); + } + + nvkm_mask(device, 0x418100 + ((znum / 4) * 4), + 0x0000007f << ((znum % 4) * 7), + gr->zbc_color[zbc].format << ((znum % 4) * 7)); } void -gp100_gr_init_num_active_ltcs(struct gf100_gr *gr) +gp100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc) { struct nvkm_device *device = gr->base.engine.subdev.device; - - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); - nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804)); + const int znum = zbc - 1; + const u32 zoff = znum * 4; + + if (gr->zbc_depth[zbc].format) + nvkm_wr32(device, 0x418110 + zoff, gr->zbc_depth[zbc].ds); + nvkm_mask(device, 0x41814c + ((znum / 4) * 4), + 0x0000007f << ((znum % 4) * 7), + gr->zbc_depth[zbc].format << ((znum % 4) * 7)); } -int -gp100_gr_init(struct gf100_gr *gr) +static const struct gf100_gr_func_zbc +gp100_gr_zbc = { + .clear_color = gp100_gr_zbc_clear_color, + .clear_depth = gp100_gr_zbc_clear_depth, +}; + +void +gp100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) { struct nvkm_device *device = gr->base.engine.subdev.device; - const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); - u32 data[TPC_MAX / 8] = {}; - u8 tpcnr[GPC_MAX]; - int gpc, tpc, rop; - int i; - - gr->func->init_gpc_mmu(gr); - - gf100_gr_mmio(gr, gr->fuc_sw_nonctx); - - nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001); - - memset(data, 0x00, sizeof(data)); - memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); - for (i = 0, gpc = -1; i < gr->tpc_total; i++) { - do { - gpc = (gpc + 1) % gr->gpc_nr; - } while (!tpcnr[gpc]); - tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; - - data[i / 8] |= tpc << ((i % 8) * 4); - } - - nvkm_wr32(device, GPC_BCAST(0x0980), data[0]); - nvkm_wr32(device, GPC_BCAST(0x0984), data[1]); - nvkm_wr32(device, GPC_BCAST(0x0988), data[2]); - nvkm_wr32(device, GPC_BCAST(0x098c), data[3]); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0914), - gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 | - gr->tpc_total); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918); - } - - nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918); - gr->func->init_num_active_ltcs(gr); - - gr->func->init_rop_active_fbps(gr); - if (gr->func->init_swdx_pes_mask) - gr->func->init_swdx_pes_mask(gr); - - nvkm_wr32(device, 0x400500, 0x00010001); - nvkm_wr32(device, 0x400100, 0xffffffff); - nvkm_wr32(device, 0x40013c, 0xffffffff); - nvkm_wr32(device, 0x400124, 0x00000002); - nvkm_wr32(device, 0x409c24, 0x000f0002); - nvkm_wr32(device, 0x405848, 0xc0000000); - nvkm_mask(device, 0x40584c, 0x00000000, 0x00000001); - nvkm_wr32(device, 0x404000, 0xc0000000); - nvkm_wr32(device, 0x404600, 0xc0000000); - nvkm_wr32(device, 0x408030, 0xc0000000); - nvkm_wr32(device, 0x404490, 0xc0000000); - nvkm_wr32(device, 0x406018, 0xc0000000); - nvkm_wr32(device, 0x407020, 0x40000000); - nvkm_wr32(device, 0x405840, 0xc0000000); - nvkm_wr32(device, 0x405844, 0x00ffffff); - nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105); +} +static void +gp100_gr_init_419c9c(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000); nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000); +} - gr->func->init_ppc_exceptions(gr); - - for (gpc = 0; gpc < gr->gpc_nr; gpc++) { - nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000); - nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000); - for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) { - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe); - nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105); - } - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff); - nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff); - } - - for (rop = 0; rop < gr->rop_nr; rop++) { - nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000); - nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff); - nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff); - } - - nvkm_wr32(device, 0x400108, 0xffffffff); - nvkm_wr32(device, 0x400138, 0xffffffff); - nvkm_wr32(device, 0x400118, 0xffffffff); - nvkm_wr32(device, 0x400130, 0xffffffff); - nvkm_wr32(device, 0x40011c, 0xffffffff); - nvkm_wr32(device, 0x400134, 0xffffffff); - - gf100_gr_zbc_init(gr); +void +gp100_gr_init_fecs_exceptions(struct gf100_gr *gr) +{ + nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000f0002); +} - return gf100_gr_init_ctxctl(gr); +void +gp100_gr_init_rop_active_fbps(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + /*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */ + const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f; + nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */ + nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */ } static const struct gf100_gr_func gp100_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_419c9c = gp100_gr_init_419c9c, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, - .init_num_active_ltcs = gp100_gr_init_num_active_ltcs, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, .ppc_nr = 2, .grctx = &gp100_grctx, + .zbc = &gp100_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c index 61e3a0b08559..37f7d739bf80 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c @@ -26,6 +26,62 @@ #include <nvif/class.h> +static void +gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + const int znum = zbc - 1; + const u32 zoff = znum * 4; + + if (gr->zbc_stencil[zbc].format) + nvkm_wr32(device, 0x41815c + zoff, gr->zbc_stencil[zbc].ds); + nvkm_mask(device, 0x418198 + ((znum / 4) * 4), + 0x0000007f << ((znum % 4) * 7), + gr->zbc_stencil[zbc].format << ((znum % 4) * 7)); +} + +static int +gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format, + const u32 ds, const u32 l2) +{ + struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc; + int zbc = -ENOSPC, i; + + for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { + if (gr->zbc_stencil[i].format) { + if (gr->zbc_stencil[i].format != format) + continue; + if (gr->zbc_stencil[i].ds != ds) + continue; + if (gr->zbc_stencil[i].l2 != l2) { + WARN_ON(1); + return -EINVAL; + } + return i; + } else { + zbc = (zbc < 0) ? i : zbc; + } + } + + if (zbc < 0) + return zbc; + + gr->zbc_stencil[zbc].format = format; + gr->zbc_stencil[zbc].ds = ds; + gr->zbc_stencil[zbc].l2 = l2; + nvkm_ltc_zbc_stencil_get(ltc, zbc, l2); + gr->func->zbc->clear_stencil(gr, zbc); + return zbc; +} + +const struct gf100_gr_func_zbc +gp102_gr_zbc = { + .clear_color = gp100_gr_zbc_clear_color, + .clear_depth = gp100_gr_zbc_clear_depth, + .stencil_get = gp102_gr_zbc_stencil_get, + .clear_stencil = gp102_gr_zbc_clear_stencil, +}; + void gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr) { @@ -42,15 +98,30 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr) static const struct gf100_gr_func gp102_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, - .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, - .init_num_active_ltcs = gp100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, .ppc_nr = 3, .grctx = &gp102_grctx, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c new file mode 100644 index 000000000000..4573c914c021 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c @@ -0,0 +1,66 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ctxgf100.h" + +#include <nvif/class.h> + +static const struct gf100_gr_func +gp104_gr = { + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, + .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, + .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, + .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, + .ppc_nr = 3, + .grctx = &gp104_grctx, + .zbc = &gp102_gr_zbc, + .sclass = { + { -1, -1, FERMI_TWOD_A }, + { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, + { -1, -1, PASCAL_B, &gf100_fermi }, + { -1, -1, PASCAL_COMPUTE_B }, + {} + } +}; + +int +gp104_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr) +{ + return gm200_gr_new_(&gp104_gr, device, index, pgr); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c index f7272323f694..812aba91653f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c @@ -28,15 +28,30 @@ static const struct gf100_gr_func gp107_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, - .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, - .init_num_active_ltcs = gp100_gr_init_num_active_ltcs, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 2, + .tpc_nr = 3, .ppc_nr = 1, .grctx = &gp107_grctx, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c index 5f3d161a0842..303dceddd4a8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c @@ -25,24 +25,31 @@ #include <nvif/class.h> -static void -gp10b_gr_init_num_active_ltcs(struct gf100_gr *gr) -{ - struct nvkm_device *device = gr->base.engine.subdev.device; - - nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800)); -} - static const struct gf100_gr_func gp10b_gr = { - .init = gp100_gr_init, + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gf100_gr_init_num_active_ltcs, .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_419cc0 = gf100_gr_init_419cc0, .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, - .init_num_active_ltcs = gp10b_gr_init_num_active_ltcs, + .init_tex_hww_esr = gf100_gr_init_tex_hww_esr, + .init_504430 = gm107_gr_init_504430, + .init_shader_exceptions = gp100_gr_init_shader_exceptions, + .trap_mp = gf100_gr_trap_mp, .rops = gm200_gr_rops, + .gpc_nr = 1, + .tpc_nr = 2, .ppc_nr = 1, .grctx = &gp102_grctx, + .zbc = &gp102_gr_zbc, .sclass = { { -1, -1, FERMI_TWOD_A }, { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c new file mode 100644 index 000000000000..19173ea19096 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c @@ -0,0 +1,120 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ctxgf100.h" + +#include <nvif/class.h> + +static void +gv100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; + u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x730)); + u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x734)); + const struct nvkm_enum *warp; + char glob[128]; + + nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr); + warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff); + + nvkm_error(subdev, "GPC%i/TPC%i/MP trap: " + "global %08x [%s] warp %04x [%s]\n", + gpc, tpc, gerr, glob, werr, warp ? warp->name : ""); + + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x730), 0x00000000); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x734), gerr); +} + +static void +gv100_gr_init_4188a4(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x4188a4, 0x03000000, 0x03000000); +} + +static void +gv100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int sm; + for (sm = 0; sm < 0x100; sm += 0x80) { + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x728 + sm), 0x0085eb64); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x610), 0x00000001); + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x72c + sm), 0x00000004); + } +} + +static void +gv100_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0x403f0000); +} + +static void +gv100_gr_init_419bd8(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + nvkm_mask(device, 0x419bd8, 0x00000700, 0x00000000); +} + +static const struct gf100_gr_func +gv100_gr = { + .oneinit_tiles = gm200_gr_oneinit_tiles, + .oneinit_sm_id = gm200_gr_oneinit_sm_id, + .init = gf100_gr_init, + .init_419bd8 = gv100_gr_init_419bd8, + .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_vsc_stream_master = gk104_gr_init_vsc_stream_master, + .init_zcull = gf117_gr_init_zcull, + .init_num_active_ltcs = gm200_gr_init_num_active_ltcs, + .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, + .init_fecs_exceptions = gp100_gr_init_fecs_exceptions, + .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2, + .init_sked_hww_esr = gk104_gr_init_sked_hww_esr, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_504430 = gv100_gr_init_504430, + .init_shader_exceptions = gv100_gr_init_shader_exceptions, + .init_4188a4 = gv100_gr_init_4188a4, + .trap_mp = gv100_gr_trap_mp, + .rops = gm200_gr_rops, + .gpc_nr = 6, + .tpc_nr = 5, + .ppc_nr = 3, + .grctx = &gv100_grctx, + .zbc = &gp102_gr_zbc, + .sclass = { + { -1, -1, FERMI_TWOD_A }, + { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, + { -1, -1, VOLTA_A, &gf100_fermi }, + { -1, -1, VOLTA_COMPUTE_A }, + {} + } +}; + +int +gv100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr) +{ + return gm200_gr_new_(&gv100_gr, device, index, pgr); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c index 58a59b7db2e5..771e16a16267 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c @@ -506,6 +506,7 @@ nvkm_msgqueue_new(u32 version, struct nvkm_falcon *falcon, break; case 0x0148cdec: case 0x015ccf3e: + case 0x0167d263: ret = msgqueue_0148cdec_new(falcon, sb, queue); break; default: diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild index 3f5d38d74fba..cfdffef1afb9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild @@ -3,6 +3,7 @@ include $(src)/nvkm/subdev/bios/Kbuild include $(src)/nvkm/subdev/bus/Kbuild include $(src)/nvkm/subdev/clk/Kbuild include $(src)/nvkm/subdev/devinit/Kbuild +include $(src)/nvkm/subdev/fault/Kbuild include $(src)/nvkm/subdev/fb/Kbuild include $(src)/nvkm/subdev/fuse/Kbuild include $(src)/nvkm/subdev/gpio/Kbuild diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c index 7c7efa4ea0d0..3133b28f849c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c @@ -25,7 +25,7 @@ #include <subdev/bios/bit.h> #include <subdev/bios/dp.h> -static u16 +u16 nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len) { struct bit_entry d; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c index 2ca23a9157ab..e6e804cee2bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c @@ -193,7 +193,10 @@ pll_map_type(struct nvkm_bios *bios, u8 type, u32 *reg, u8 *ver, u8 *len) data += hdr; while (cnt--) { if (nvbios_rd08(bios, data + 0) == type) { - *reg = nvbios_rd32(bios, data + 3); + if (*ver < 0x50) + *reg = nvbios_rd32(bios, data + 3); + else + *reg = 0; return data; } data += *len; @@ -361,6 +364,20 @@ nvbios_pll_parse(struct nvkm_bios *bios, u32 type, struct nvbios_pll *info) info->min_p = nvbios_rd08(bios, data + 12); info->max_p = nvbios_rd08(bios, data + 13); break; + case 0x50: + info->refclk = nvbios_rd16(bios, data + 1) * 1000; + /* info->refclk_alt = nvbios_rd16(bios, data + 3) * 1000; */ + info->vco1.min_freq = nvbios_rd16(bios, data + 5) * 1000; + info->vco1.max_freq = nvbios_rd16(bios, data + 7) * 1000; + info->vco1.min_inputfreq = nvbios_rd16(bios, data + 9) * 1000; + info->vco1.max_inputfreq = nvbios_rd16(bios, data + 11) * 1000; + info->vco1.min_m = nvbios_rd08(bios, data + 13); + info->vco1.max_m = nvbios_rd08(bios, data + 14); + info->vco1.min_n = nvbios_rd08(bios, data + 15); + info->vco1.max_n = nvbios_rd08(bios, data + 16); + info->min_p = nvbios_rd08(bios, data + 17); + info->max_p = nvbios_rd08(bios, data + 18); + break; default: nvkm_error(subdev, "unknown pll limits version 0x%02x\n", ver); return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c index 0f537c22804c..3634cd0630b8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c @@ -78,7 +78,10 @@ pramin_init(struct nvkm_bios *bios, const char *name) * important as we don't want to be touching vram on an * uninitialised board */ - addr = nvkm_rd32(device, 0x619f04); + if (device->card_type >= GV100) + addr = nvkm_rd32(device, 0x625f04); + else + addr = nvkm_rd32(device, 0x619f04); if (!(addr & 0x00000008)) { nvkm_debug(subdev, "... not enabled\n"); return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c index 81c3567d4e67..ba6a868d4c95 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c @@ -109,18 +109,17 @@ nvkm_cstate_valid(struct nvkm_clk *clk, struct nvkm_cstate *cstate, static struct nvkm_cstate * nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate, - struct nvkm_cstate *start) + struct nvkm_cstate *cstate) { struct nvkm_device *device = clk->subdev.device; struct nvkm_volt *volt = device->volt; - struct nvkm_cstate *cstate; int max_volt; - if (!pstate || !start) + if (!pstate || !cstate) return NULL; if (!volt) - return start; + return cstate; max_volt = volt->max_uv; if (volt->max0_id != 0xff) @@ -133,8 +132,7 @@ nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate, max_volt = min(max_volt, nvkm_volt_map(volt, volt->max2_id, clk->temp)); - for (cstate = start; &cstate->head != &pstate->list; - cstate = list_prev_entry(cstate, head)) { + list_for_each_entry_from_reverse(cstate, &pstate->list, head) { if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp)) break; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild index eac88e3dc6e5..50a436926484 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild @@ -12,3 +12,4 @@ nvkm-y += nvkm/subdev/devinit/mcp89.o nvkm-y += nvkm/subdev/devinit/gf100.o nvkm-y += nvkm/subdev/devinit/gm107.o nvkm-y += nvkm/subdev/devinit/gm200.o +nvkm-y += nvkm/subdev/devinit/gv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c index 1730371933df..b80618e35491 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c @@ -107,7 +107,7 @@ pmu_load(struct nv50_devinit *init, u8 type, bool post, return pmu_exec(init, pmu.init_addr_pmu), 0; } -static int +int gm200_devinit_post(struct nvkm_devinit *base, bool post) { struct nv50_devinit *init = nv50_devinit(base); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gv100.c new file mode 100644 index 000000000000..fbde6828bd38 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gv100.c @@ -0,0 +1,79 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" + +#include <subdev/bios.h> +#include <subdev/bios/pll.h> +#include <subdev/clk/pll.h> + +static int +gv100_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) +{ + struct nvkm_subdev *subdev = &init->subdev; + struct nvkm_device *device = subdev->device; + struct nvbios_pll info; + int head = type - PLL_VPLL0; + int N, fN, M, P; + int ret; + + ret = nvbios_pll_parse(device->bios, type, &info); + if (ret) + return ret; + + ret = gt215_pll_calc(subdev, &info, freq, &N, &fN, &M, &P); + if (ret < 0) + return ret; + + switch (info.type) { + case PLL_VPLL0: + case PLL_VPLL1: + case PLL_VPLL2: + case PLL_VPLL3: + nvkm_wr32(device, 0x00ef10 + (head * 0x40), fN << 16); + nvkm_wr32(device, 0x00ef04 + (head * 0x40), (P << 16) | + (N << 8) | + (M << 0)); + break; + default: + nvkm_warn(subdev, "%08x/%dKhz unimplemented\n", type, freq); + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct nvkm_devinit_func +gv100_devinit = { + .preinit = gf100_devinit_preinit, + .init = nv50_devinit_init, + .post = gm200_devinit_post, + .pll_set = gv100_devinit_pll_set, + .disable = gm107_devinit_disable, +}; + +int +gv100_devinit_new(struct nvkm_device *device, int index, + struct nvkm_devinit **pinit) +{ + return nv50_devinit_new_(&gv100_devinit, device, index, pinit); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h index 315ebaff1165..9b9f0dc1e192 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h @@ -24,4 +24,6 @@ int gf100_devinit_pll_set(struct nvkm_devinit *, u32, u32); void gf100_devinit_preinit(struct nvkm_devinit *); u64 gm107_devinit_disable(struct nvkm_devinit *); + +int gm200_devinit_post(struct nvkm_devinit *, bool); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild new file mode 100644 index 000000000000..45bb46fb0929 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild @@ -0,0 +1,3 @@ +nvkm-y += nvkm/subdev/fault/base.o +nvkm-y += nvkm/subdev/fault/gp100.o +nvkm-y += nvkm/subdev/fault/gv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c new file mode 100644 index 000000000000..007bf4af33b9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c @@ -0,0 +1,179 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +#include <core/memory.h> +#include <core/notify.h> +#include <subdev/bar.h> +#include <subdev/mmu.h> + +static void +nvkm_fault_ntfy_fini(struct nvkm_event *event, int type, int index) +{ + struct nvkm_fault *fault = container_of(event, typeof(*fault), event); + fault->func->buffer.fini(fault->buffer[index]); +} + +static void +nvkm_fault_ntfy_init(struct nvkm_event *event, int type, int index) +{ + struct nvkm_fault *fault = container_of(event, typeof(*fault), event); + fault->func->buffer.init(fault->buffer[index]); +} + +static int +nvkm_fault_ntfy_ctor(struct nvkm_object *object, void *argv, u32 argc, + struct nvkm_notify *notify) +{ + struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object); + if (argc == 0) { + notify->size = 0; + notify->types = 1; + notify->index = buffer->id; + return 0; + } + return -ENOSYS; +} + +static const struct nvkm_event_func +nvkm_fault_ntfy = { + .ctor = nvkm_fault_ntfy_ctor, + .init = nvkm_fault_ntfy_init, + .fini = nvkm_fault_ntfy_fini, +}; + +static void +nvkm_fault_intr(struct nvkm_subdev *subdev) +{ + struct nvkm_fault *fault = nvkm_fault(subdev); + return fault->func->intr(fault); +} + +static int +nvkm_fault_fini(struct nvkm_subdev *subdev, bool suspend) +{ + struct nvkm_fault *fault = nvkm_fault(subdev); + if (fault->func->fini) + fault->func->fini(fault); + return 0; +} + +static int +nvkm_fault_init(struct nvkm_subdev *subdev) +{ + struct nvkm_fault *fault = nvkm_fault(subdev); + if (fault->func->init) + fault->func->init(fault); + return 0; +} + +static int +nvkm_fault_oneinit_buffer(struct nvkm_fault *fault, int id) +{ + struct nvkm_subdev *subdev = &fault->subdev; + struct nvkm_device *device = subdev->device; + struct nvkm_vmm *bar2 = nvkm_bar_bar2_vmm(device); + struct nvkm_fault_buffer *buffer; + int ret; + + if (!(buffer = kzalloc(sizeof(*buffer), GFP_KERNEL))) + return -ENOMEM; + buffer->fault = fault; + buffer->id = id; + buffer->entries = fault->func->buffer.entries(buffer); + fault->buffer[id] = buffer; + + nvkm_debug(subdev, "buffer %d: %d entries\n", id, buffer->entries); + + ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, buffer->entries * + fault->func->buffer.entry_size, 0x1000, true, + &buffer->mem); + if (ret) + return ret; + + ret = nvkm_vmm_get(bar2, 12, nvkm_memory_size(buffer->mem), + &buffer->vma); + if (ret) + return ret; + + return nvkm_memory_map(buffer->mem, 0, bar2, buffer->vma, NULL, 0); +} + +static int +nvkm_fault_oneinit(struct nvkm_subdev *subdev) +{ + struct nvkm_fault *fault = nvkm_fault(subdev); + int ret, i; + + for (i = 0; i < ARRAY_SIZE(fault->buffer); i++) { + if (i < fault->func->buffer.nr) { + ret = nvkm_fault_oneinit_buffer(fault, i); + if (ret) + return ret; + fault->buffer_nr = i + 1; + } + } + + return nvkm_event_init(&nvkm_fault_ntfy, 1, fault->buffer_nr, + &fault->event); +} + +static void * +nvkm_fault_dtor(struct nvkm_subdev *subdev) +{ + struct nvkm_vmm *bar2 = nvkm_bar_bar2_vmm(subdev->device); + struct nvkm_fault *fault = nvkm_fault(subdev); + int i; + + nvkm_event_fini(&fault->event); + + for (i = 0; i < fault->buffer_nr; i++) { + if (fault->buffer[i]) { + nvkm_vmm_put(bar2, &fault->buffer[i]->vma); + nvkm_memory_unref(&fault->buffer[i]->mem); + kfree(fault->buffer[i]); + } + } + + return fault; +} + +static const struct nvkm_subdev_func +nvkm_fault = { + .dtor = nvkm_fault_dtor, + .oneinit = nvkm_fault_oneinit, + .init = nvkm_fault_init, + .fini = nvkm_fault_fini, + .intr = nvkm_fault_intr, +}; + +int +nvkm_fault_new_(const struct nvkm_fault_func *func, struct nvkm_device *device, + int index, struct nvkm_fault **pfault) +{ + struct nvkm_fault *fault; + if (!(fault = *pfault = kzalloc(sizeof(*fault), GFP_KERNEL))) + return -ENOMEM; + nvkm_subdev_ctor(&nvkm_fault, device, index, &fault->subdev); + fault->func = func; + return 0; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c new file mode 100644 index 000000000000..5e71db2e8d75 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c @@ -0,0 +1,69 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +#include <subdev/mmu.h> + +static void +gp100_fault_buffer_fini(struct nvkm_fault_buffer *buffer) +{ + struct nvkm_device *device = buffer->fault->subdev.device; + nvkm_mask(device, 0x002a70, 0x00000001, 0x00000000); +} + +static void +gp100_fault_buffer_init(struct nvkm_fault_buffer *buffer) +{ + struct nvkm_device *device = buffer->fault->subdev.device; + nvkm_wr32(device, 0x002a74, upper_32_bits(buffer->vma->addr)); + nvkm_wr32(device, 0x002a70, lower_32_bits(buffer->vma->addr)); + nvkm_mask(device, 0x002a70, 0x00000001, 0x00000001); +} + +static u32 +gp100_fault_buffer_entries(struct nvkm_fault_buffer *buffer) +{ + return nvkm_rd32(buffer->fault->subdev.device, 0x002a78); +} + +static void +gp100_fault_intr(struct nvkm_fault *fault) +{ + nvkm_event_send(&fault->event, 1, 0, NULL, 0); +} + +static const struct nvkm_fault_func +gp100_fault = { + .intr = gp100_fault_intr, + .buffer.nr = 1, + .buffer.entry_size = 32, + .buffer.entries = gp100_fault_buffer_entries, + .buffer.init = gp100_fault_buffer_init, + .buffer.fini = gp100_fault_buffer_fini, +}; + +int +gp100_fault_new(struct nvkm_device *device, int index, + struct nvkm_fault **pfault) +{ + return nvkm_fault_new_(&gp100_fault, device, index, pfault); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c new file mode 100644 index 000000000000..73c7728b5969 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c @@ -0,0 +1,206 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +#include <core/memory.h> +#include <subdev/mmu.h> +#include <engine/fifo.h> + +static void +gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer) +{ + struct nvkm_device *device = buffer->fault->subdev.device; + struct nvkm_memory *mem = buffer->mem; + const u32 foff = buffer->id * 0x14; + u32 get = nvkm_rd32(device, 0x100e2c + foff); + u32 put = nvkm_rd32(device, 0x100e30 + foff); + if (put == get) + return; + + nvkm_kmap(mem); + while (get != put) { + const u32 base = get * buffer->fault->func->buffer.entry_size; + const u32 instlo = nvkm_ro32(mem, base + 0x00); + const u32 insthi = nvkm_ro32(mem, base + 0x04); + const u32 addrlo = nvkm_ro32(mem, base + 0x08); + const u32 addrhi = nvkm_ro32(mem, base + 0x0c); + const u32 timelo = nvkm_ro32(mem, base + 0x10); + const u32 timehi = nvkm_ro32(mem, base + 0x14); + const u32 info0 = nvkm_ro32(mem, base + 0x18); + const u32 info1 = nvkm_ro32(mem, base + 0x1c); + struct nvkm_fault_data info; + + if (++get == buffer->entries) + get = 0; + nvkm_wr32(device, 0x100e2c + foff, get); + + info.addr = ((u64)addrhi << 32) | addrlo; + info.inst = ((u64)insthi << 32) | instlo; + info.time = ((u64)timehi << 32) | timelo; + info.engine = (info0 & 0x000000ff); + info.valid = (info1 & 0x80000000) >> 31; + info.gpc = (info1 & 0x1f000000) >> 24; + info.hub = (info1 & 0x00100000) >> 20; + info.access = (info1 & 0x000f0000) >> 16; + info.client = (info1 & 0x00007f00) >> 8; + info.reason = (info1 & 0x0000001f); + + nvkm_fifo_fault(device->fifo, &info); + } + nvkm_done(mem); +} + +static void +gv100_fault_buffer_fini(struct nvkm_fault_buffer *buffer) +{ + struct nvkm_device *device = buffer->fault->subdev.device; + const u32 intr = buffer->id ? 0x08000000 : 0x20000000; + const u32 foff = buffer->id * 0x14; + + nvkm_mask(device, 0x100a34, intr, intr); + nvkm_mask(device, 0x100e34 + foff, 0x80000000, 0x00000000); +} + +static void +gv100_fault_buffer_init(struct nvkm_fault_buffer *buffer) +{ + struct nvkm_device *device = buffer->fault->subdev.device; + const u32 intr = buffer->id ? 0x08000000 : 0x20000000; + const u32 foff = buffer->id * 0x14; + + nvkm_mask(device, 0x100e34 + foff, 0xc0000000, 0x40000000); + nvkm_wr32(device, 0x100e28 + foff, upper_32_bits(buffer->vma->addr)); + nvkm_wr32(device, 0x100e24 + foff, lower_32_bits(buffer->vma->addr)); + nvkm_mask(device, 0x100e34 + foff, 0x80000000, 0x80000000); + nvkm_mask(device, 0x100a2c, intr, intr); +} + +static u32 +gv100_fault_buffer_entries(struct nvkm_fault_buffer *buffer) +{ + struct nvkm_device *device = buffer->fault->subdev.device; + const u32 foff = buffer->id * 0x14; + nvkm_mask(device, 0x100e34 + foff, 0x40000000, 0x40000000); + return nvkm_rd32(device, 0x100e34 + foff) & 0x000fffff; +} + +static int +gv100_fault_ntfy_nrpfb(struct nvkm_notify *notify) +{ + struct nvkm_fault *fault = container_of(notify, typeof(*fault), nrpfb); + gv100_fault_buffer_process(fault->buffer[0]); + return NVKM_NOTIFY_KEEP; +} + +static void +gv100_fault_intr_fault(struct nvkm_fault *fault) +{ + struct nvkm_subdev *subdev = &fault->subdev; + struct nvkm_device *device = subdev->device; + struct nvkm_fault_data info; + const u32 addrlo = nvkm_rd32(device, 0x100e4c); + const u32 addrhi = nvkm_rd32(device, 0x100e50); + const u32 info0 = nvkm_rd32(device, 0x100e54); + const u32 insthi = nvkm_rd32(device, 0x100e58); + const u32 info1 = nvkm_rd32(device, 0x100e5c); + + info.addr = ((u64)addrhi << 32) | addrlo; + info.inst = ((u64)insthi << 32) | (info0 & 0xfffff000); + info.time = 0; + info.engine = (info0 & 0x000000ff); + info.valid = (info1 & 0x80000000) >> 31; + info.gpc = (info1 & 0x1f000000) >> 24; + info.hub = (info1 & 0x00100000) >> 20; + info.access = (info1 & 0x000f0000) >> 16; + info.client = (info1 & 0x00007f00) >> 8; + info.reason = (info1 & 0x0000001f); + + nvkm_fifo_fault(device->fifo, &info); +} + +static void +gv100_fault_intr(struct nvkm_fault *fault) +{ + struct nvkm_subdev *subdev = &fault->subdev; + struct nvkm_device *device = subdev->device; + u32 stat = nvkm_rd32(device, 0x100a20); + + if (stat & 0x80000000) { + gv100_fault_intr_fault(fault); + nvkm_wr32(device, 0x100e60, 0x80000000); + stat &= ~0x80000000; + } + + if (stat & 0x20000000) { + if (fault->buffer[0]) { + nvkm_event_send(&fault->event, 1, 0, NULL, 0); + stat &= ~0x20000000; + } + } + + if (stat) { + nvkm_debug(subdev, "intr %08x\n", stat); + } +} + +static void +gv100_fault_fini(struct nvkm_fault *fault) +{ + nvkm_notify_put(&fault->nrpfb); + nvkm_mask(fault->subdev.device, 0x100a34, 0x80000000, 0x80000000); +} + +static void +gv100_fault_init(struct nvkm_fault *fault) +{ + nvkm_mask(fault->subdev.device, 0x100a2c, 0x80000000, 0x80000000); + nvkm_notify_get(&fault->nrpfb); +} + +static const struct nvkm_fault_func +gv100_fault = { + .init = gv100_fault_init, + .fini = gv100_fault_fini, + .intr = gv100_fault_intr, + .buffer.nr = 2, + .buffer.entry_size = 32, + .buffer.entries = gv100_fault_buffer_entries, + .buffer.init = gv100_fault_buffer_init, + .buffer.fini = gv100_fault_buffer_fini, +}; + +int +gv100_fault_new(struct nvkm_device *device, int index, + struct nvkm_fault **pfault) +{ + struct nvkm_fault *fault; + int ret; + + ret = nvkm_fault_new_(&gv100_fault, device, index, &fault); + *pfault = fault; + if (ret) + return ret; + + return nvkm_notify_init(&fault->buffer[0]->object, &fault->event, + gv100_fault_ntfy_nrpfb, false, NULL, 0, 0, + &fault->nrpfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h new file mode 100644 index 000000000000..44843ecf12b0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h @@ -0,0 +1,34 @@ +#ifndef __NVKM_FAULT_PRIV_H__ +#define __NVKM_FAULT_PRIV_H__ +#define nvkm_fault_buffer(p) container_of((p), struct nvkm_fault_buffer, object) +#define nvkm_fault(p) container_of((p), struct nvkm_fault, subdev) +#include <subdev/fault.h> + +#include <core/event.h> +#include <core/object.h> + +struct nvkm_fault_buffer { + struct nvkm_object object; + struct nvkm_fault *fault; + int id; + int entries; + struct nvkm_memory *mem; + struct nvkm_vma *vma; +}; + +int nvkm_fault_new_(const struct nvkm_fault_func *, struct nvkm_device *, + int index, struct nvkm_fault **); + +struct nvkm_fault_func { + void (*init)(struct nvkm_fault *); + void (*fini)(struct nvkm_fault *); + void (*intr)(struct nvkm_fault *); + struct { + int nr; + u32 entry_size; + u32 (*entries)(struct nvkm_fault_buffer *); + void (*init)(struct nvkm_fault_buffer *); + void (*fini)(struct nvkm_fault_buffer *); + } buffer; +}; +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild index b4f22cce5d43..969610951263 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild @@ -30,6 +30,7 @@ nvkm-y += nvkm/subdev/fb/gm20b.o nvkm-y += nvkm/subdev/fb/gp100.o nvkm-y += nvkm/subdev/fb/gp102.o nvkm-y += nvkm/subdev/fb/gp10b.o +nvkm-y += nvkm/subdev/fb/gv100.o nvkm-y += nvkm/subdev/fb/ram.o nvkm-y += nvkm/subdev/fb/ramnv04.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c index cdc4e0a2cc6b..e8dc4e913494 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c @@ -46,10 +46,10 @@ gf100_fb_oneinit(struct nvkm_fb *base) { struct gf100_fb *fb = gf100_fb(base); struct nvkm_device *device = fb->base.subdev.device; - int ret, size = 0x1000; + int ret, size = 1 << (fb->base.page ? fb->base.page : 17); size = nvkm_longopt(device->cfgopt, "MmuDebugBufferSize", size); - size = min(size, 0x1000); + size = max(size, 0x1000); ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, size, 0x1000, true, &fb->base.mmu_rd); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c index 8137e19d3292..d3b8c3367152 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c @@ -49,8 +49,6 @@ gm200_fb_init(struct nvkm_fb *base) if (fb->r100c10_page) nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8); - nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */ - nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8); nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8); nvkm_mask(device, 0x100cc4, 0x00060000, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c index 147f69b30cd8..dffe1f5e1071 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c @@ -26,7 +26,7 @@ #include <core/memory.h> -static void +void gp100_fb_init_unkn(struct nvkm_fb *base) { struct nvkm_device *device = gf100_fb(base)->base.subdev.device; @@ -48,7 +48,7 @@ gp100_fb_init(struct nvkm_fb *base) nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8); nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8); nvkm_mask(device, 0x100cc4, 0x00060000, - max(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17); + min(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17); } static const struct nvkm_fb_func diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c new file mode 100644 index 000000000000..3c5e02e9794a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c @@ -0,0 +1,46 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ram.h" + +static int +gv100_fb_init_page(struct nvkm_fb *fb) +{ + return (fb->page == 16) ? 0 : -EINVAL; +} + +static const struct nvkm_fb_func +gv100_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gp100_fb_init, + .init_page = gv100_fb_init_page, + .init_unkn = gp100_fb_init_unkn, + .ram_new = gp100_ram_new, + .default_bigpage = 16, +}; + +int +gv100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gf100_fb_new_(&gv100_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h index 414a423e0e55..2857f31466bf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h @@ -68,4 +68,6 @@ int gf100_fb_oneinit(struct nvkm_fb *); int gf100_fb_init_page(struct nvkm_fb *); int gm200_fb_init_page(struct nvkm_fb *); + +void gp100_fb_init_unkn(struct nvkm_fb *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild index 12d6f4f102cb..290ff1c425a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild @@ -4,3 +4,4 @@ nvkm-y += nvkm/subdev/ltc/gk104.o nvkm-y += nvkm/subdev/ltc/gm107.o nvkm-y += nvkm/subdev/ltc/gm200.o nvkm-y += nvkm/subdev/ltc/gp100.o +nvkm-y += nvkm/subdev/ltc/gp102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c index 1f185274d3e6..23242179e600 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c @@ -55,6 +55,14 @@ nvkm_ltc_zbc_depth_get(struct nvkm_ltc *ltc, int index, const u32 depth) return index; } +int +nvkm_ltc_zbc_stencil_get(struct nvkm_ltc *ltc, int index, const u32 stencil) +{ + ltc->zbc_stencil[index] = stencil; + ltc->func->zbc_clear_stencil(ltc, index, stencil); + return index; +} + void nvkm_ltc_invalidate(struct nvkm_ltc *ltc) { @@ -92,6 +100,8 @@ nvkm_ltc_init(struct nvkm_subdev *subdev) for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) { ltc->func->zbc_clear_color(ltc, i, ltc->zbc_color[i]); ltc->func->zbc_clear_depth(ltc, i, ltc->zbc_depth[i]); + if (ltc->func->zbc_clear_stencil) + ltc->func->zbc_clear_stencil(ltc, i, ltc->zbc_stencil[i]); } ltc->func->init(ltc); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c index e34d42108019..e923ed76d37a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c @@ -23,7 +23,7 @@ */ #include "priv.h" -static void +void gp100_ltc_intr(struct nvkm_ltc *ltc) { struct nvkm_device *device = ltc->subdev.device; @@ -38,7 +38,7 @@ gp100_ltc_intr(struct nvkm_ltc *ltc) } } -static int +int gp100_ltc_oneinit(struct nvkm_ltc *ltc) { struct nvkm_device *device = ltc->subdev.device; @@ -48,7 +48,7 @@ gp100_ltc_oneinit(struct nvkm_ltc *ltc) return 0; } -static void +void gp100_ltc_init(struct nvkm_ltc *ltc) { /*XXX: PMU LS call to setup tagram address */ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c new file mode 100644 index 000000000000..601747ada655 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c @@ -0,0 +1,51 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +void +gp102_ltc_zbc_clear_stencil(struct nvkm_ltc *ltc, int i, const u32 stencil) +{ + struct nvkm_device *device = ltc->subdev.device; + nvkm_mask(device, 0x17e338, 0x0000000f, i); + nvkm_wr32(device, 0x17e204, stencil); +} + +static const struct nvkm_ltc_func +gp102_ltc = { + .oneinit = gp100_ltc_oneinit, + .init = gp100_ltc_init, + .intr = gp100_ltc_intr, + .cbc_clear = gm107_ltc_cbc_clear, + .cbc_wait = gm107_ltc_cbc_wait, + .zbc = 16, + .zbc_clear_color = gm107_ltc_zbc_clear_color, + .zbc_clear_depth = gm107_ltc_zbc_clear_depth, + .zbc_clear_stencil = gp102_ltc_zbc_clear_stencil, + .invalidate = gf100_ltc_invalidate, + .flush = gf100_ltc_flush, +}; + +int +gp102_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc) +{ + return nvkm_ltc_new_(&gp102_ltc, device, index, pltc); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h index e71cc25cc775..9dcde43c0f3c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h @@ -19,6 +19,7 @@ struct nvkm_ltc_func { int zbc; void (*zbc_clear_color)(struct nvkm_ltc *, int, const u32[4]); void (*zbc_clear_depth)(struct nvkm_ltc *, int, const u32); + void (*zbc_clear_stencil)(struct nvkm_ltc *, int, const u32); void (*invalidate)(struct nvkm_ltc *); void (*flush)(struct nvkm_ltc *); @@ -41,4 +42,8 @@ void gm107_ltc_cbc_clear(struct nvkm_ltc *, u32, u32); void gm107_ltc_cbc_wait(struct nvkm_ltc *); void gm107_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]); void gm107_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32); + +int gp100_ltc_oneinit(struct nvkm_ltc *); +void gp100_ltc_init(struct nvkm_ltc *); +void gp100_ltc_intr(struct nvkm_ltc *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c index 7321ad3758c3..43db245eec9a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c @@ -75,10 +75,28 @@ gp100_mc_intr_mask(struct nvkm_mc *base, u32 mask, u32 intr) spin_unlock_irqrestore(&mc->lock, flags); } +const struct nvkm_mc_map +gp100_mc_intr[] = { + { 0x04000000, NVKM_ENGINE_DISP }, + { 0x00000100, NVKM_ENGINE_FIFO }, + { 0x00000200, NVKM_SUBDEV_FAULT }, + { 0x40000000, NVKM_SUBDEV_IBUS }, + { 0x10000000, NVKM_SUBDEV_BUS }, + { 0x08000000, NVKM_SUBDEV_FB }, + { 0x02000000, NVKM_SUBDEV_LTC }, + { 0x01000000, NVKM_SUBDEV_PMU }, + { 0x00200000, NVKM_SUBDEV_GPIO }, + { 0x00200000, NVKM_SUBDEV_I2C }, + { 0x00100000, NVKM_SUBDEV_TIMER }, + { 0x00040000, NVKM_SUBDEV_THERM }, + { 0x00002000, NVKM_SUBDEV_FB }, + {}, +}; + static const struct nvkm_mc_func gp100_mc = { .init = nv50_mc_init, - .intr = gk104_mc_intr, + .intr = gp100_mc_intr, .intr_unarm = gp100_mc_intr_unarm, .intr_rearm = gp100_mc_intr_rearm, .intr_mask = gp100_mc_intr_mask, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c index 2283e3b74277..ff8629de97d6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c @@ -34,7 +34,7 @@ gp10b_mc_init(struct nvkm_mc *mc) static const struct nvkm_mc_func gp10b_mc = { .init = gp10b_mc_init, - .intr = gk104_mc_intr, + .intr = gp100_mc_intr, .intr_unarm = gp100_mc_intr_unarm, .intr_rearm = gp100_mc_intr_rearm, .intr_mask = gp100_mc_intr_mask, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h index 8869d79c2b59..d9e3691d45b7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h @@ -57,4 +57,6 @@ int gp100_mc_new_(const struct nvkm_mc_func *, struct nvkm_device *, int, extern const struct nvkm_mc_map gk104_mc_intr[]; extern const struct nvkm_mc_map gk104_mc_reset[]; + +extern const struct nvkm_mc_map gp100_mc_intr[]; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild index 67ee983bb026..58a24e3a0598 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild @@ -12,6 +12,7 @@ nvkm-y += nvkm/subdev/mmu/gm200.o nvkm-y += nvkm/subdev/mmu/gm20b.o nvkm-y += nvkm/subdev/mmu/gp100.o nvkm-y += nvkm/subdev/mmu/gp10b.o +nvkm-y += nvkm/subdev/mmu/gv100.o nvkm-y += nvkm/subdev/mmu/mem.o nvkm-y += nvkm/subdev/mmu/memnv04.o @@ -31,6 +32,7 @@ nvkm-y += nvkm/subdev/mmu/vmmgm200.o nvkm-y += nvkm/subdev/mmu/vmmgm20b.o nvkm-y += nvkm/subdev/mmu/vmmgp100.o nvkm-y += nvkm/subdev/mmu/vmmgp10b.o +nvkm-y += nvkm/subdev/mmu/vmmgv100.o nvkm-y += nvkm/subdev/mmu/umem.o nvkm-y += nvkm/subdev/mmu/ummu.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c new file mode 100644 index 000000000000..f666cb57f69e --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c @@ -0,0 +1,43 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "mem.h" +#include "vmm.h" + +#include <core/option.h> + +#include <nvif/class.h> + +static const struct nvkm_mmu_func +gv100_mmu = { + .dma_bits = 47, + .mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}}, + .mem = {{ -1, 0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map }, + .vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gv100_vmm_new }, + .kind = gm200_mmu_kind, + .kind_sys = true, +}; + +int +gv100_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) +{ + return nvkm_mmu_new_(&gv100_mmu, device, index, pmmu); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h index da06e64d8a7d..1a3b0a3724ca 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h @@ -236,6 +236,9 @@ int gp100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, struct lock_class_key *, const char *, struct nvkm_vmm **); +int gv100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32, + struct lock_class_key *, const char *, + struct nvkm_vmm **); #define VMM_PRINT(l,v,p,f,a...) do { \ struct nvkm_vmm *_vmm = (v); \ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c new file mode 100644 index 000000000000..2fa40c16e6d2 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c @@ -0,0 +1,87 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "vmm.h" + +#include <subdev/fb.h> +#include <subdev/ltc.h> + +#include <nvif/ifc00d.h> +#include <nvif/unpack.h> + +int +gv100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst) +{ + u64 data[2], mask; + int ret = gp100_vmm_join(vmm, inst), i; + if (ret) + return ret; + + nvkm_kmap(inst); + data[0] = nvkm_ro32(inst, 0x200); + data[1] = nvkm_ro32(inst, 0x204); + mask = BIT_ULL(0); + + nvkm_wo32(inst, 0x21c, 0x00000000); + + for (i = 0; i < 64; i++) { + if (mask & BIT_ULL(i)) { + nvkm_wo32(inst, 0x2a4 + (i * 0x10), data[1]); + nvkm_wo32(inst, 0x2a0 + (i * 0x10), data[0]); + } else { + nvkm_wo32(inst, 0x2a4 + (i * 0x10), 0x00000001); + nvkm_wo32(inst, 0x2a0 + (i * 0x10), 0x00000001); + } + nvkm_wo32(inst, 0x2a8 + (i * 0x10), 0x00000000); + } + + nvkm_wo32(inst, 0x298, lower_32_bits(mask)); + nvkm_wo32(inst, 0x29c, upper_32_bits(mask)); + nvkm_done(inst); + return 0; +} + +static const struct nvkm_vmm_func +gv100_vmm = { + .join = gv100_vmm_join, + .part = gf100_vmm_part, + .aper = gf100_vmm_aper, + .valid = gp100_vmm_valid, + .flush = gp100_vmm_flush, + .page = { + { 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx }, + { 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx }, + { 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx }, + { 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC }, + { 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC }, + { 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx }, + {} + } +}; + +int +gv100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc, + struct lock_class_key *key, const char *name, + struct nvkm_vmm **pvmm) +{ + return nv04_vmm_new_(&gv100_vmm, mmu, 0, addr, size, + argv, argc, key, name, pvmm); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c index e8c27ec700de..737a8d50a1f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c @@ -65,3 +65,24 @@ MODULE_FIRMWARE("nvidia/gp108/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/gp108/sec2/desc.bin"); MODULE_FIRMWARE("nvidia/gp108/sec2/image.bin"); MODULE_FIRMWARE("nvidia/gp108/sec2/sig.bin"); + +MODULE_FIRMWARE("nvidia/gv100/acr/bl.bin"); +MODULE_FIRMWARE("nvidia/gv100/acr/unload_bl.bin"); +MODULE_FIRMWARE("nvidia/gv100/acr/ucode_load.bin"); +MODULE_FIRMWARE("nvidia/gv100/acr/ucode_unload.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/fecs_inst.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/fecs_data.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_inst.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_data.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/sw_ctx.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/sw_nonctx.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/sw_bundle_init.bin"); +MODULE_FIRMWARE("nvidia/gv100/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/gv100/nvdec/scrubber.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/gv100/sec2/sig.bin"); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c index 6f10b098676c..1e1f1c635cab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c @@ -80,12 +80,11 @@ acr_ls_msgqueue_post_run(struct nvkm_msgqueue *queue, struct nvkm_falcon *falcon, u32 addr_args) { struct nvkm_device *device = falcon->owner->device; - u32 cmdline_size = NVKM_MSGQUEUE_CMDLINE_SIZE; - u8 buf[cmdline_size]; + u8 buf[NVKM_MSGQUEUE_CMDLINE_SIZE]; - memset(buf, 0, cmdline_size); + memset(buf, 0, sizeof(buf)); nvkm_msgqueue_write_cmdline(queue, buf); - nvkm_falcon_load_dmem(falcon, buf, addr_args, cmdline_size, 0); + nvkm_falcon_load_dmem(falcon, buf, addr_args, sizeof(buf), 0); /* rearm the queue so it will wait for the init message */ nvkm_msgqueue_reinit(queue); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c index fea4957291da..4f1f3e890650 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c @@ -48,7 +48,8 @@ gk104_top_oneinit(struct nvkm_top *top) case 0x00000001: /* DATA */ inst = (data & 0x3c000000) >> 26; info->addr = (data & 0x00fff000); - info->fault = (data & 0x000000f8) >> 3; + if (data & 0x00000004) + info->fault = (data & 0x000003f8) >> 3; break; case 0x00000002: /* ENUM */ if (data & 0x00000020) diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index 5e2e65e88847..7f3ac6b13b56 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -828,6 +828,12 @@ static void dispc_ovl_set_scale_coef(struct dispc_device *dispc, h_coef = dispc_ovl_get_scale_coef(fir_hinc, true); v_coef = dispc_ovl_get_scale_coef(fir_vinc, five_taps); + if (!h_coef || !v_coef) { + dev_err(&dispc->pdev->dev, "%s: failed to find scale coefs\n", + __func__); + return; + } + for (i = 0; i < 8; i++) { u32 h, hv; @@ -2342,7 +2348,7 @@ static int dispc_ovl_calc_scaling_24xx(struct dispc_device *dispc, } if (in_width > maxsinglelinewidth) { - DSSERR("Cannot scale max input width exceeded"); + DSSERR("Cannot scale max input width exceeded\n"); return -EINVAL; } return 0; @@ -2424,13 +2430,13 @@ again: } if (in_width > (maxsinglelinewidth * 2)) { - DSSERR("Cannot setup scaling"); - DSSERR("width exceeds maximum width possible"); + DSSERR("Cannot setup scaling\n"); + DSSERR("width exceeds maximum width possible\n"); return -EINVAL; } if (in_width > maxsinglelinewidth && *five_taps) { - DSSERR("cannot setup scaling with five taps"); + DSSERR("cannot setup scaling with five taps\n"); return -EINVAL; } return 0; @@ -2472,7 +2478,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc, in_width > maxsinglelinewidth && ++*decim_x); if (in_width > maxsinglelinewidth) { - DSSERR("Cannot scale width exceeds max line width"); + DSSERR("Cannot scale width exceeds max line width\n"); return -EINVAL; } @@ -2490,7 +2496,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc, * bandwidth. Despite what theory says this appears to * be true also for 16-bit color formats. */ - DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)", *decim_x); + DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)\n", *decim_x); return -EINVAL; } @@ -4633,7 +4639,7 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc) i734_buf.size, &i734_buf.paddr, GFP_KERNEL); if (!i734_buf.vaddr) { - dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed", + dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n", __func__); return -ENOMEM; } diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c index 97c88861d67a..5879f45f6fc9 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c @@ -679,7 +679,7 @@ static int hdmi_audio_config(struct device *dev, struct omap_dss_audio *dss_audio) { struct omap_hdmi *hd = dev_get_drvdata(dev); - int ret; + int ret = 0; mutex_lock(&hd->lock); diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c index 35ed2add6189..813ba42f2753 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c @@ -922,8 +922,13 @@ int hdmi4_core_init(struct platform_device *pdev, struct hdmi_core_data *core) { const struct hdmi4_features *features; struct resource *res; + const struct soc_device_attribute *soc; - features = soc_device_match(hdmi4_soc_devices)->data; + soc = soc_device_match(hdmi4_soc_devices); + if (!soc) + return -ENODEV; + + features = soc->data; core->cts_swmode = features->cts_swmode; core->audio_use_mclk = features->audio_use_mclk; diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c index d28da9ac3e90..ae1a001d1b83 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c @@ -671,7 +671,7 @@ static int hdmi_audio_config(struct device *dev, struct omap_dss_audio *dss_audio) { struct omap_hdmi *hd = dev_get_drvdata(dev); - int ret; + int ret = 0; mutex_lock(&hd->lock); diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c index a0d7b1d905e8..5cde26ac937b 100644 --- a/drivers/gpu/drm/omapdrm/omap_connector.c +++ b/drivers/gpu/drm/omapdrm/omap_connector.c @@ -121,6 +121,9 @@ static int omap_connector_get_modes(struct drm_connector *connector) if (dssdrv->read_edid) { void *edid = kzalloc(MAX_EDID, GFP_KERNEL); + if (!edid) + return 0; + if ((dssdrv->read_edid(dssdev, edid, MAX_EDID) > 0) && drm_edid_is_valid(edid)) { drm_mode_connector_update_edid_property( @@ -139,6 +142,9 @@ static int omap_connector_get_modes(struct drm_connector *connector) struct drm_display_mode *mode = drm_mode_create(dev); struct videomode vm = {0}; + if (!mode) + return 0; + dssdrv->get_timings(dssdev, &vm); drm_display_mode_from_videomode(&vm, mode); @@ -200,6 +206,10 @@ static int omap_connector_mode_valid(struct drm_connector *connector, if (!r) { /* check if vrefresh is still valid */ new_mode = drm_mode_duplicate(dev, mode); + + if (!new_mode) + return MODE_BAD; + new_mode->clock = vm.pixelclock / 1000; new_mode->vrefresh = 0; if (mode->vrefresh == drm_mode_vrefresh(new_mode)) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index f9fa1c90b35c..401c02e9e6b2 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -401,12 +401,16 @@ int tiler_unpin(struct tiler_block *block) struct tiler_block *tiler_reserve_2d(enum tiler_fmt fmt, u16 w, u16 h, u16 align) { - struct tiler_block *block = kzalloc(sizeof(*block), GFP_KERNEL); + struct tiler_block *block; u32 min_align = 128; int ret; unsigned long flags; u32 slot_bytes; + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return ERR_PTR(-ENOMEM); + BUG_ON(!validfmt(fmt)); /* convert width/height to slots */ diff --git a/drivers/gpu/drm/omapdrm/tcm-sita.c b/drivers/gpu/drm/omapdrm/tcm-sita.c index d7f7bc9f061a..817be3c41863 100644 --- a/drivers/gpu/drm/omapdrm/tcm-sita.c +++ b/drivers/gpu/drm/omapdrm/tcm-sita.c @@ -90,7 +90,7 @@ static int l2r_t2b(u16 w, u16 h, u16 a, s16 offset, { int i; unsigned long index; - bool area_free; + bool area_free = false; unsigned long slots_per_band = PAGE_SIZE / slot_bytes; unsigned long bit_offset = (offset > 0) ? offset / slot_bytes : 0; unsigned long curr_bit = bit_offset; diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 02baaaf20e9d..efbd5816082d 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1145,7 +1145,6 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; - struct radeon_framebuffer *radeon_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; @@ -1164,19 +1163,15 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - radeon_fb = to_radeon_framebuffer(fb); + if (atomic) target_fb = fb; - } - else { - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } /* If atomic, assume fb object is pinned & idle & fenced and * just update base pointers */ - obj = radeon_fb->obj; + obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1441,8 +1436,7 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0); if (!atomic && fb && fb != crtc->primary->fb) { - radeon_fb = to_radeon_framebuffer(fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; @@ -1463,7 +1457,6 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; - struct radeon_framebuffer *radeon_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; struct drm_framebuffer *target_fb; @@ -1481,16 +1474,12 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - radeon_fb = to_radeon_framebuffer(fb); + if (atomic) target_fb = fb; - } - else { - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } - obj = radeon_fb->obj; + obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) @@ -1641,8 +1630,7 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc, WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3); if (!atomic && fb && fb != crtc->primary->fb) { - radeon_fb = to_radeon_framebuffer(fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; @@ -2149,11 +2137,9 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct radeon_framebuffer *radeon_fb; struct radeon_bo *rbo; - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(crtc->primary->fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r)) DRM_ERROR("failed to reserve rbo before unpin\n"); diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 40be4068ca69..fa5fadaa9bbb 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -526,7 +526,7 @@ static int radeon_atpx_init(void) * look up whether we are the integrated or discrete GPU (all asics). * Returns the client id. */ -static int radeon_atpx_get_client_id(struct pci_dev *pdev) +static enum vga_switcheroo_client_id radeon_atpx_get_client_id(struct pci_dev *pdev) { if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev)) return VGA_SWITCHEROO_IGD; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index df9469a8fdb1..2aea2bdff99b 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -852,7 +852,7 @@ static int radeon_lvds_get_modes(struct drm_connector *connector) return ret; } -static int radeon_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -1012,7 +1012,7 @@ static int radeon_vga_get_modes(struct drm_connector *connector) return ret; } -static int radeon_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1156,7 +1156,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector) return 1; } -static int radeon_tv_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_tv_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if ((mode->hdisplay > 1024) || (mode->vdisplay > 768)) @@ -1498,7 +1498,7 @@ static void radeon_dvi_force(struct drm_connector *connector) radeon_connector->use_digital = true; } -static int radeon_dvi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dvi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; @@ -1800,7 +1800,7 @@ out: return ret; } -static int radeon_dp_mode_valid(struct drm_connector *connector, +static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 48d0e6bd0508..59c8a6647ff2 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1591,7 +1591,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); + struct drm_framebuffer *fb = crtc->primary->fb; struct radeon_bo *robj; if (radeon_crtc->cursor_bo) { @@ -1603,10 +1603,10 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, } } - if (rfb == NULL || rfb->obj == NULL) { + if (fb == NULL || fb->obj[0] == NULL) { continue; } - robj = gem_to_radeon_bo(rfb->obj); + robj = gem_to_radeon_bo(fb->obj[0]); /* don't unpin kernel fb objects */ if (!radeon_fbdev_robj_is_fb(rdev, robj)) { r = radeon_bo_reserve(robj, false); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 26129b2b082d..9d3ac8b981da 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -32,6 +32,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_plane_helper.h> #include <drm/drm_edid.h> @@ -478,8 +479,6 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_framebuffer *old_radeon_fb; - struct radeon_framebuffer *new_radeon_fb; struct drm_gem_object *obj; struct radeon_flip_work *work; struct radeon_bo *new_rbo; @@ -501,15 +500,13 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; /* schedule unpin of the old buffer */ - old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb); - obj = old_radeon_fb->obj; + obj = crtc->primary->fb->obj[0]; /* take a reference to the old object */ drm_gem_object_get(obj); work->old_rbo = gem_to_radeon_bo(obj); - new_radeon_fb = to_radeon_framebuffer(fb); - obj = new_radeon_fb->obj; + obj = fb->obj[0]; new_rbo = gem_to_radeon_bo(obj); /* pin the new buffer */ @@ -1285,41 +1282,23 @@ void radeon_compute_pll_legacy(struct radeon_pll *pll, } -static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb) -{ - struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); - - drm_gem_object_put_unlocked(radeon_fb->obj); - drm_framebuffer_cleanup(fb); - kfree(radeon_fb); -} - -static int radeon_user_framebuffer_create_handle(struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); - - return drm_gem_handle_create(file_priv, radeon_fb->obj, handle); -} - static const struct drm_framebuffer_funcs radeon_fb_funcs = { - .destroy = radeon_user_framebuffer_destroy, - .create_handle = radeon_user_framebuffer_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; int radeon_framebuffer_init(struct drm_device *dev, - struct radeon_framebuffer *rfb, + struct drm_framebuffer *fb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; - rfb->obj = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - ret = drm_framebuffer_init(dev, &rfb->base, &radeon_fb_funcs); + fb->obj[0] = obj; + drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + ret = drm_framebuffer_init(dev, fb, &radeon_fb_funcs); if (ret) { - rfb->obj = NULL; + fb->obj[0] = NULL; return ret; } return 0; @@ -1331,7 +1310,7 @@ radeon_user_framebuffer_create(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj; - struct radeon_framebuffer *radeon_fb; + struct drm_framebuffer *fb; int ret; obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); @@ -1347,20 +1326,20 @@ radeon_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-EINVAL); } - radeon_fb = kzalloc(sizeof(*radeon_fb), GFP_KERNEL); - if (radeon_fb == NULL) { + fb = kzalloc(sizeof(*fb), GFP_KERNEL); + if (fb == NULL) { drm_gem_object_put_unlocked(obj); return ERR_PTR(-ENOMEM); } - ret = radeon_framebuffer_init(dev, radeon_fb, mode_cmd, obj); + ret = radeon_framebuffer_init(dev, fb, mode_cmd, obj); if (ret) { - kfree(radeon_fb); + kfree(fb); drm_gem_object_put_unlocked(obj); return ERR_PTR(ret); } - return &radeon_fb->base; + return fb; } static const struct drm_mode_config_funcs radeon_mode_funcs = { diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index b28288a781ef..2a7977a23b31 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -168,7 +168,12 @@ int radeon_no_wb; int radeon_modeset = -1; int radeon_dynclks = -1; int radeon_r4xx_atom = 0; +#ifdef __powerpc__ +/* Default to PCI on PowerPC (fdo #95017) */ +int radeon_agpmode = -1; +#else int radeon_agpmode = 0; +#endif int radeon_vram_limit = 0; int radeon_gart_size = -1; /* auto */ int radeon_benchmarking = 0; diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 57c5404a1654..1179034024ae 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -43,7 +43,7 @@ */ struct radeon_fbdev { struct drm_fb_helper helper; - struct radeon_framebuffer rfb; + struct drm_framebuffer fb; struct radeon_device *rdev; }; @@ -246,13 +246,13 @@ static int radeonfb_create(struct drm_fb_helper *helper, info->par = rfbdev; - ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); + ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->fb, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto out; } - fb = &rfbdev->rfb.base; + fb = &rfbdev->fb; /* setup helper */ rfbdev->helper.fb = fb; @@ -308,15 +308,15 @@ out: static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfbdev) { - struct radeon_framebuffer *rfb = &rfbdev->rfb; + struct drm_framebuffer *fb = &rfbdev->fb; drm_fb_helper_unregister_fbi(&rfbdev->helper); - if (rfb->obj) { - radeonfb_destroy_pinned_object(rfb->obj); - rfb->obj = NULL; - drm_framebuffer_unregister_private(&rfb->base); - drm_framebuffer_cleanup(&rfb->base); + if (fb->obj[0]) { + radeonfb_destroy_pinned_object(fb->obj[0]); + fb->obj[0] = NULL; + drm_framebuffer_unregister_private(fb); + drm_framebuffer_cleanup(fb); } drm_fb_helper_fini(&rfbdev->helper); @@ -400,7 +400,7 @@ bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj) if (!rdev->mode_info.rfbdev) return false; - if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->rfb.obj)) + if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->fb.obj[0])) return true; return false; } diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c index 1f1856e0b1e0..35a205ae4318 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c @@ -374,7 +374,6 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_framebuffer *radeon_fb; struct drm_framebuffer *target_fb; struct drm_gem_object *obj; struct radeon_bo *rbo; @@ -393,14 +392,10 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, return 0; } - if (atomic) { - radeon_fb = to_radeon_framebuffer(fb); + if (atomic) target_fb = fb; - } - else { - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); + else target_fb = crtc->primary->fb; - } switch (target_fb->format->cpp[0] * 8) { case 8: @@ -423,7 +418,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc, } /* Pin framebuffer & get tilling informations */ - obj = radeon_fb->obj; + obj = target_fb->obj[0]; rbo = gem_to_radeon_bo(obj); retry: r = radeon_bo_reserve(rbo, false); @@ -451,7 +446,7 @@ retry: struct radeon_bo *old_rbo; unsigned long nsize, osize; - old_rbo = gem_to_radeon_bo(to_radeon_framebuffer(fb)->obj); + old_rbo = gem_to_radeon_bo(fb->obj[0]); osize = radeon_bo_size(old_rbo); nsize = radeon_bo_size(rbo); if (nsize <= osize && !radeon_bo_reserve(old_rbo, false)) { @@ -558,8 +553,7 @@ retry: WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch); if (!atomic && fb && fb != crtc->primary->fb) { - radeon_fb = to_radeon_framebuffer(fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r != 0)) return r; @@ -1093,11 +1087,9 @@ static void radeon_crtc_disable(struct drm_crtc *crtc) radeon_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { int r; - struct radeon_framebuffer *radeon_fb; struct radeon_bo *rbo; - radeon_fb = to_radeon_framebuffer(crtc->primary->fb); - rbo = gem_to_radeon_bo(radeon_fb->obj); + rbo = gem_to_radeon_bo(crtc->primary->fb->obj[0]); r = radeon_bo_reserve(rbo, false); if (unlikely(r)) DRM_ERROR("failed to reserve rbo before unpin\n"); diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 3243e5e01432..fd470d6bf3f4 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -46,7 +46,6 @@ struct radeon_device; #define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base) #define to_radeon_connector(x) container_of(x, struct radeon_connector, base) #define to_radeon_encoder(x) container_of(x, struct radeon_encoder, base) -#define to_radeon_framebuffer(x) container_of(x, struct radeon_framebuffer, base) #define RADEON_MAX_HPD_PINS 7 #define RADEON_MAX_CRTCS 6 @@ -574,11 +573,6 @@ struct radeon_connector { int enabled_attribs; }; -struct radeon_framebuffer { - struct drm_framebuffer base; - struct drm_gem_object *obj; -}; - #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ ((em) == ATOM_ENCODER_MODE_DP_MST)) @@ -932,7 +926,7 @@ radeon_combios_encoder_crtc_scratch_regs(struct drm_encoder *encoder, int crtc); extern void radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on); int radeon_framebuffer_init(struct drm_device *dev, - struct radeon_framebuffer *rfb, + struct drm_framebuffer *rfb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index c4420538ec85..f2a0bd1e5119 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -767,7 +767,8 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) * Initialization */ -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex) { static const unsigned int mmio_offsets[] = { DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET @@ -775,7 +776,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) struct rcar_du_device *rcdu = rgrp->dev; struct platform_device *pdev = to_platform_device(rcdu->dev); - struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index]; + struct rcar_du_crtc *rcrtc = &rcdu->crtcs[swindex]; struct drm_crtc *crtc = &rcrtc->crtc; struct drm_plane *primary; unsigned int irqflags; @@ -787,7 +788,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Get the CRTC clock and the optional external clock. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - sprintf(clk_name, "du.%u", index); + sprintf(clk_name, "du.%u", hwindex); name = clk_name; } else { name = NULL; @@ -795,16 +796,16 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) rcrtc->clock = devm_clk_get(rcdu->dev, name); if (IS_ERR(rcrtc->clock)) { - dev_err(rcdu->dev, "no clock for CRTC %u\n", index); + dev_err(rcdu->dev, "no clock for DU channel %u\n", hwindex); return PTR_ERR(rcrtc->clock); } - sprintf(clk_name, "dclkin.%u", index); + sprintf(clk_name, "dclkin.%u", hwindex); clk = devm_clk_get(rcdu->dev, clk_name); if (!IS_ERR(clk)) { rcrtc->extclock = clk; } else if (PTR_ERR(rcrtc->clock) == -EPROBE_DEFER) { - dev_info(rcdu->dev, "can't get external clock %u\n", index); + dev_info(rcdu->dev, "can't get external clock %u\n", hwindex); return -EPROBE_DEFER; } @@ -813,13 +814,13 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) spin_lock_init(&rcrtc->vblank_lock); rcrtc->group = rgrp; - rcrtc->mmio_offset = mmio_offsets[index]; - rcrtc->index = index; + rcrtc->mmio_offset = mmio_offsets[hwindex]; + rcrtc->index = hwindex; if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) primary = &rcrtc->vsp->planes[rcrtc->vsp_pipe].plane; else - primary = &rgrp->planes[index % 2].plane; + primary = &rgrp->planes[swindex % 2].plane; ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary, NULL, &crtc_funcs, NULL); @@ -833,7 +834,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Register the interrupt handler. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - irq = platform_get_irq(pdev, index); + /* The IRQ's are associated with the CRTC (sw)index. */ + irq = platform_get_irq(pdev, swindex); irqflags = 0; } else { irq = platform_get_irq(pdev, 0); @@ -841,7 +843,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) } if (irq < 0) { - dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index); + dev_err(rcdu->dev, "no IRQ for CRTC %u\n", swindex); return irq; } @@ -849,7 +851,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) dev_name(rcdu->dev), rcrtc); if (ret < 0) { dev_err(rcdu->dev, - "failed to register IRQ for CRTC %u\n", index); + "failed to register IRQ for CRTC %u\n", swindex); return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index fdc2bf99bda1..84b5e23a85b1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -80,7 +80,8 @@ enum rcar_du_output { RCAR_DU_OUTPUT_MAX, }; -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index); +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex); void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc); void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 3917d839c04c..02aee6cb0e53 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -40,7 +40,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7743_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7743 has one RGB output and one LVDS output @@ -61,7 +61,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7745 has two RGB outputs @@ -80,7 +80,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { static const struct rcar_du_device_info rcar_du_r8a7779_info = { .gen = 2, .features = 0, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7779 has two RGB outputs and one (currently unsupported) @@ -102,7 +102,7 @@ static const struct rcar_du_device_info rcar_du_r8a7790_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, .quirks = RCAR_DU_QUIRK_ALIGN_128B, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7790 has one RGB output, two LVDS outputs and one @@ -129,7 +129,7 @@ static const struct rcar_du_device_info rcar_du_r8a7791_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A779[13] has one RGB output, one LVDS output and one @@ -151,7 +151,7 @@ static const struct rcar_du_device_info rcar_du_r8a7792_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* R8A7792 has two RGB outputs. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -169,7 +169,7 @@ static const struct rcar_du_device_info rcar_du_r8a7794_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7794 has two RGB outputs and one (currently unsupported) @@ -191,7 +191,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 4, + .channels_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7795 has one RGB output, two HDMI outputs and one @@ -215,7 +215,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { }, }, .num_lvds = 1, - .dpll_ch = BIT(1) | BIT(2), + .dpll_ch = BIT(2) | BIT(1), }; static const struct rcar_du_device_info rcar_du_r8a7796_info = { @@ -223,7 +223,7 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7796 has one RGB output, one LVDS output and one HDMI @@ -246,12 +246,40 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .dpll_ch = BIT(1), }; +static const struct rcar_du_device_info rcar_du_r8a77965_info = { + .gen = 3, + .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK + | RCAR_DU_FEATURE_EXT_CTRL_REGS + | RCAR_DU_FEATURE_VSP1_SOURCE, + .channels_mask = BIT(3) | BIT(1) | BIT(0), + .routes = { + /* + * R8A77965 has one RGB output, one LVDS output and one HDMI + * output. + */ + [RCAR_DU_OUTPUT_DPAD0] = { + .possible_crtcs = BIT(2), + .port = 0, + }, + [RCAR_DU_OUTPUT_HDMI0] = { + .possible_crtcs = BIT(1), + .port = 1, + }, + [RCAR_DU_OUTPUT_LVDS0] = { + .possible_crtcs = BIT(0), + .port = 2, + }, + }, + .num_lvds = 1, + .dpll_ch = BIT(1), +}; + static const struct rcar_du_device_info rcar_du_r8a77970_info = { .gen = 3, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 1, + .channels_mask = BIT(0), .routes = { /* R8A77970 has one RGB output and one LVDS output. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -277,6 +305,7 @@ static const struct of_device_id rcar_du_of_table[] = { { .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info }, { .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info }, { .compatible = "renesas,du-r8a7796", .data = &rcar_du_r8a7796_info }, + { .compatible = "renesas,du-r8a77965", .data = &rcar_du_r8a77965_info }, { .compatible = "renesas,du-r8a77970", .data = &rcar_du_r8a77970_info }, { } }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index 131d8e88b06c..b3a25e8e07d0 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -52,7 +52,7 @@ struct rcar_du_output_routing { * @gen: device generation (2 or 3) * @features: device features (RCAR_DU_FEATURE_*) * @quirks: device quirks (RCAR_DU_QUIRK_*) - * @num_crtcs: total number of CRTCs + * @channels_mask: bit mask of available DU channels * @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*) * @num_lvds: number of internal LVDS encoders */ @@ -60,7 +60,7 @@ struct rcar_du_device_info { unsigned int gen; unsigned int features; unsigned int quirks; - unsigned int num_crtcs; + unsigned int channels_mask; struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX]; unsigned int num_lvds; unsigned int dpll_ch; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c index 2f37ea901873..d539cb290a35 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c @@ -46,10 +46,13 @@ void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data) static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp) { - u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP; + u32 defr6 = DEFR6_CODE; - if (rgrp->num_crtcs > 1) - defr6 |= DEFR6_ODPM22_DISP; + if (rgrp->channels_mask & BIT(0)) + defr6 |= DEFR6_ODPM02_DISP; + + if (rgrp->channels_mask & BIT(1)) + defr6 |= DEFR6_ODPM12_DISP; rcar_du_group_write(rgrp, DEFR6, defr6); } @@ -80,10 +83,11 @@ static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp) * On Gen3 VSPD routing can't be configured, but DPAD routing * needs to be set despite having a single option available. */ - u32 crtc = ffs(possible_crtcs) - 1; + unsigned int rgb_crtc = ffs(possible_crtcs) - 1; + struct rcar_du_crtc *crtc = &rcdu->crtcs[rgb_crtc]; - if (crtc / 2 == rgrp->index) - defr8 |= DEFR8_DRGBS_DU(crtc); + if (crtc->index / 2 == rgrp->index) + defr8 |= DEFR8_DRGBS_DU(crtc->index); } rcar_du_group_write(rgrp, DEFR8, defr8); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h index 5e3adc6b31b5..42105aedecc8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h @@ -25,6 +25,7 @@ struct rcar_du_device; * @dev: the DU device * @mmio_offset: registers offset in the device memory map * @index: group index + * @channels_mask: bitmask of populated DU channels in this group * @num_crtcs: number of CRTCs in this group (1 or 2) * @use_count: number of users of the group (rcar_du_group_(get|put)) * @used_crtcs: number of CRTCs currently in use @@ -39,6 +40,7 @@ struct rcar_du_group { unsigned int mmio_offset; unsigned int index; + unsigned int channels_mask; unsigned int num_crtcs; unsigned int use_count; unsigned int used_crtcs; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index f4ac0f884f00..f0bc7cc0e913 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -428,7 +428,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) struct { struct device_node *np; unsigned int crtcs_mask; - } vsps[RCAR_DU_MAX_VSPS] = { { 0, }, }; + } vsps[RCAR_DU_MAX_VSPS] = { { NULL, }, }; unsigned int vsps_count = 0; unsigned int cells; unsigned int i; @@ -507,6 +507,8 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) struct drm_fbdev_cma *fbdev; unsigned int num_encoders; unsigned int num_groups; + unsigned int swindex; + unsigned int hwindex; unsigned int i; int ret; @@ -520,7 +522,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) dev->mode_config.funcs = &rcar_du_mode_config_funcs; dev->mode_config.helper_private = &rcar_du_mode_config_helper; - rcdu->num_crtcs = rcdu->info->num_crtcs; + rcdu->num_crtcs = hweight8(rcdu->info->channels_mask); ret = rcar_du_properties_init(rcdu); if (ret < 0) @@ -530,7 +532,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) * Initialize vertical blanking interrupts handling. Start with vblank * disabled for all CRTCs. */ - ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1); + ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1); if (ret < 0) return ret; @@ -545,7 +547,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) rgrp->dev = rcdu; rgrp->mmio_offset = mmio_offsets[i]; rgrp->index = i; - rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U); + /* Extract the channel mask for this group only. */ + rgrp->channels_mask = (rcdu->info->channels_mask >> (2 * i)) + & GENMASK(1, 0); + rgrp->num_crtcs = hweight8(rgrp->channels_mask); /* * If we have more than one CRTCs in this group pre-associate @@ -572,10 +577,16 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) } /* Create the CRTCs. */ - for (i = 0; i < rcdu->num_crtcs; ++i) { - struct rcar_du_group *rgrp = &rcdu->groups[i / 2]; + for (swindex = 0, hwindex = 0; swindex < rcdu->num_crtcs; ++hwindex) { + struct rcar_du_group *rgrp; + + /* Skip unpopulated DU channels. */ + if (!(rcdu->info->channels_mask & BIT(hwindex))) + continue; + + rgrp = &rcdu->groups[hwindex / 2]; - ret = rcar_du_crtc_create(rgrp, i); + ret = rcar_du_crtc_create(rgrp, swindex++, hwindex); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.c b/drivers/gpu/drm/rcar-du/rcar_du_of.c index 68a0b82cb17e..afef69669bb4 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_of.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_of.c @@ -18,6 +18,7 @@ #include "rcar_du_crtc.h" #include "rcar_du_drv.h" +#include "rcar_du_of.h" /* ----------------------------------------------------------------------------- * Generic Overlay Handling diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.h b/drivers/gpu/drm/rcar-du/rcar_du_of.h index c2e65a727e91..8dd3fbe96650 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_of.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_of.h @@ -11,7 +11,7 @@ struct of_device_id; -#ifdef CONFIG_DRM_RCAR_LVDS +#if IS_ENABLED(CONFIG_DRM_RCAR_LVDS) void __init rcar_du_of_init(const struct of_device_id *of_ids); #else static inline void rcar_du_of_init(const struct of_device_id *of_ids) { } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h index d5bae99d3cfe..9dfd220ceda1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h @@ -187,14 +187,14 @@ #define DEFR6 0x000e8 #define DEFR6_CODE (0x7778 << 16) -#define DEFR6_ODPM22_DSMR (0 << 10) -#define DEFR6_ODPM22_DISP (2 << 10) -#define DEFR6_ODPM22_CDE (3 << 10) -#define DEFR6_ODPM22_MASK (3 << 10) -#define DEFR6_ODPM12_DSMR (0 << 8) -#define DEFR6_ODPM12_DISP (2 << 8) -#define DEFR6_ODPM12_CDE (3 << 8) -#define DEFR6_ODPM12_MASK (3 << 8) +#define DEFR6_ODPM12_DSMR (0 << 10) +#define DEFR6_ODPM12_DISP (2 << 10) +#define DEFR6_ODPM12_CDE (3 << 10) +#define DEFR6_ODPM12_MASK (3 << 10) +#define DEFR6_ODPM02_DSMR (0 << 8) +#define DEFR6_ODPM02_DISP (2 << 8) +#define DEFR6_ODPM02_CDE (3 << 8) +#define DEFR6_ODPM02_MASK (3 << 8) #define DEFR6_TCNE1 (1 << 6) #define DEFR6_TCNE0 (1 << 4) #define DEFR6_MLOS1 (1 << 2) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index b3bec0125696..c59f0cfabd33 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -17,6 +17,7 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_cma_helper.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_plane_helper.h> #include <linux/bitops.h> @@ -237,6 +238,10 @@ static int rcar_du_vsp_plane_prepare_fb(struct drm_plane *plane, } } + ret = drm_gem_fb_prepare_fb(plane, state); + if (ret) + goto fail; + return 0; fail: @@ -299,14 +304,12 @@ static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = { static struct drm_plane_state * rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane) { - struct rcar_du_vsp_plane_state *state; struct rcar_du_vsp_plane_state *copy; if (WARN_ON(!plane->state)) return NULL; - state = to_rcar_vsp_plane_state(plane->state); - copy = kmemdup(state, sizeof(*state), GFP_KERNEL); + copy = kzalloc(sizeof(*copy), GFP_KERNEL); if (copy == NULL) return NULL; diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 0d95888ccc3e..44d480768dfe 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -30,7 +30,7 @@ #include <drm/spsc_queue.h> #define CREATE_TRACE_POINTS -#include <drm/gpu_scheduler_trace.h> +#include "gpu_scheduler_trace.h" #define to_drm_sched_job(sched_job) \ container_of((sched_job), struct drm_sched_job, queue_node) @@ -117,15 +117,15 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) * @sched The pointer to the scheduler * @entity The pointer to a valid drm_sched_entity * @rq The run queue this entity belongs - * @kernel If this is an entity for the kernel - * @jobs The max number of jobs in the job queue + * @guilty atomic_t set to 1 when a job on this queue + * is found to be guilty causing a timeout * * return 0 if succeed. negative error code on failure */ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, - uint32_t jobs, atomic_t *guilty) + atomic_t *guilty) { if (!(sched && entity && rq)) return -EINVAL; @@ -135,9 +135,10 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; entity->guilty = guilty; + entity->fini_status = 0; + entity->last_scheduled = NULL; spin_lock_init(&entity->rq_lock); - spin_lock_init(&entity->queue_lock); spsc_queue_init(&entity->job_queue); atomic_set(&entity->fence_seq, 0); @@ -196,19 +197,30 @@ static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity) return true; } +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct drm_sched_job *job = container_of(cb, struct drm_sched_job, + finish_cb); + drm_sched_fence_finished(job->s_fence); + WARN_ON(job->s_fence->parent); + dma_fence_put(&job->s_fence->finished); + job->sched->ops->free_job(job); +} + + /** * Destroy a context entity * * @sched Pointer to scheduler instance * @entity The pointer to a valid scheduler entity * - * Cleanup and free the allocated resources. + * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting, + * removes the entity from the runqueue and returns an error when the process was killed. */ -void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, +void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - int r; - if (!drm_sched_entity_is_initialized(sched, entity)) return; /** @@ -216,13 +228,28 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, * queued IBs or discard them on SIGKILL */ if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL) - r = -ERESTARTSYS; + entity->fini_status = -ERESTARTSYS; else - r = wait_event_killable(sched->job_scheduled, + entity->fini_status = wait_event_killable(sched->job_scheduled, drm_sched_entity_is_idle(entity)); drm_sched_entity_set_rq(entity, NULL); - if (r) { +} +EXPORT_SYMBOL(drm_sched_entity_do_release); + +/** + * Destroy a context entity + * + * @sched Pointer to scheduler instance + * @entity The pointer to a valid scheduler entity + * + * The second one then goes over the entity and signals all jobs with an error code. + */ +void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + if (entity->fini_status) { struct drm_sched_job *job; + int r; /* Park the kernel for a moment to make sure it isn't processing * our enity. @@ -240,12 +267,25 @@ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_fence *s_fence = job->s_fence; drm_sched_fence_scheduled(s_fence); dma_fence_set_error(&s_fence->finished, -ESRCH); - drm_sched_fence_finished(s_fence); - WARN_ON(s_fence->parent); - dma_fence_put(&s_fence->finished); - sched->ops->free_job(job); + r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, + drm_sched_entity_kill_jobs_cb); + if (r == -ENOENT) + drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", r); } } + + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = NULL; +} +EXPORT_SYMBOL(drm_sched_entity_cleanup); + +void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity) +{ + drm_sched_entity_do_release(sched, entity); + drm_sched_entity_cleanup(sched, entity); } EXPORT_SYMBOL(drm_sched_entity_fini); @@ -309,8 +349,13 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) struct dma_fence * fence = entity->dependency; struct drm_sched_fence *s_fence; - if (fence->context == entity->fence_context) { - /* We can ignore fences from ourself */ + if (fence->context == entity->fence_context || + fence->context == entity->fence_context + 1) { + /* + * Fence is a scheduled/finished fence from a job + * which belongs to the same entity, we can ignore + * fences from ourself + */ dma_fence_put(entity->dependency); return false; } @@ -360,6 +405,9 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (entity->guilty && atomic_read(entity->guilty)) dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); + dma_fence_put(entity->last_scheduled); + entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished); + spsc_queue_pop(&entity->job_queue); return sched_job; } @@ -369,6 +417,10 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) * * @sched_job The pointer to job required to submit * + * Note: To guarantee that the order of insertion to queue matches + * the job's fence sequence number this function should be + * called with drm_sched_job_init under common lock. + * * Returns 0 for success, negative error code otherwise. */ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, @@ -379,11 +431,8 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, trace_drm_sched_job(sched_job, entity); - spin_lock(&entity->queue_lock); first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); - spin_unlock(&entity->queue_lock); - /* first job wakes up scheduler */ if (first) { /* Add the entity to the run queue */ @@ -529,6 +578,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) spin_unlock(&sched->job_list_lock); fence = sched->ops->run_job(s_job); atomic_inc(&sched->hw_rq_count); + if (fence) { s_fence->parent = dma_fence_get(fence); r = dma_fence_add_callback(fence, &s_fence->cb, @@ -548,13 +598,19 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) } EXPORT_SYMBOL(drm_sched_job_recovery); -/* init a sched_job with basic field */ +/** + * Init a sched_job with basic field + * + * Note: Refer to drm_sched_entity_push_job documentation + * for locking considerations. + */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, void *owner) { job->sched = sched; + job->entity = entity; job->s_priority = entity->rq - sched->sched_rq; job->s_fence = drm_sched_fence_create(entity, owner); if (!job->s_fence) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h new file mode 100644 index 000000000000..4998ad950a48 --- /dev/null +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -0,0 +1,82 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _GPU_SCHED_TRACE_H_ + +#include <linux/stringify.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#include <drm/drmP.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gpu_scheduler +#define TRACE_INCLUDE_FILE gpu_scheduler_trace + +TRACE_EVENT(drm_sched_job, + TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), + TP_ARGS(sched_job, entity), + TP_STRUCT__entry( + __field(struct drm_sched_entity *, entity) + __field(struct dma_fence *, fence) + __field(const char *, name) + __field(uint64_t, id) + __field(u32, job_count) + __field(int, hw_job_count) + ), + + TP_fast_assign( + __entry->entity = entity; + __entry->id = sched_job->id; + __entry->fence = &sched_job->s_fence->finished; + __entry->name = sched_job->sched->name; + __entry->job_count = spsc_queue_count(&entity->job_queue); + __entry->hw_job_count = atomic_read( + &sched_job->sched->hw_rq_count); + ), + TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", + __entry->entity, __entry->id, + __entry->fence, __entry->name, + __entry->job_count, __entry->hw_job_count) +); + +TRACE_EVENT(drm_sched_process_job, + TP_PROTO(struct drm_sched_fence *fence), + TP_ARGS(fence), + TP_STRUCT__entry( + __field(struct dma_fence *, fence) + ), + + TP_fast_assign( + __entry->fence = &fence->finished; + ), + TP_printk("fence=%p signaled", __entry->fence) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/scheduler +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 69aab086b913..df4461648e3f 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -87,7 +87,7 @@ static bool drm_sched_fence_enable_signaling(struct dma_fence *f) } /** - * amd_sched_fence_free - free up the fence memory + * drm_sched_fence_free - free up the fence memory * * @rcu: RCU callback head * @@ -98,12 +98,11 @@ static void drm_sched_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); struct drm_sched_fence *fence = to_drm_sched_fence(f); - dma_fence_put(fence->parent); kmem_cache_free(sched_fence_slab, fence); } /** - * amd_sched_fence_release_scheduled - callback that fence can be freed + * drm_sched_fence_release_scheduled - callback that fence can be freed * * @fence: fence * @@ -114,11 +113,12 @@ static void drm_sched_fence_release_scheduled(struct dma_fence *f) { struct drm_sched_fence *fence = to_drm_sched_fence(f); + dma_fence_put(fence->parent); call_rcu(&fence->finished.rcu, drm_sched_fence_free); } /** - * amd_sched_fence_release_finished - drop extra reference + * drm_sched_fence_release_finished - drop extra reference * * @f: fence * diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 9f83a65b5ea9..c3afe7b2237e 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -163,28 +163,89 @@ static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane) BLEND_COLOR_KEY_NONE; u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255); struct tegra_plane_state *state; + u32 blending[2]; unsigned int i; + /* disable blending for non-overlapping case */ + tegra_plane_writel(plane, blendnokey, DC_WIN_BLEND_NOKEY); + tegra_plane_writel(plane, foreground, DC_WIN_BLEND_1WIN); + state = to_tegra_plane_state(plane->base.state); - /* alpha contribution is 1 minus sum of overlapping windows */ - for (i = 0; i < 3; i++) { - if (state->dependent[i]) - background[i] |= BLEND_CONTROL_DEPENDENT; - } + if (state->opaque) { + /* + * Since custom fix-weight blending isn't utilized and weight + * of top window is set to max, we can enforce dependent + * blending which in this case results in transparent bottom + * window if top window is opaque and if top window enables + * alpha blending, then bottom window is getting alpha value + * of 1 minus the sum of alpha components of the overlapping + * plane. + */ + background[0] |= BLEND_CONTROL_DEPENDENT; + background[1] |= BLEND_CONTROL_DEPENDENT; - /* enable alpha blending if pixel format has an alpha component */ - if (!state->opaque) + /* + * The region where three windows overlap is the intersection + * of the two regions where two windows overlap. It contributes + * to the area if all of the windows on top of it have an alpha + * component. + */ + switch (state->base.normalized_zpos) { + case 0: + if (state->blending[0].alpha && + state->blending[1].alpha) + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + + case 1: + background[2] |= BLEND_CONTROL_DEPENDENT; + break; + } + } else { + /* + * Enable alpha blending if pixel format has an alpha + * component. + */ foreground |= BLEND_CONTROL_ALPHA; - /* - * Disable blending and assume Window A is the bottom-most window, - * Window C is the top-most window and Window B is in the middle. - */ - tegra_plane_writel(plane, blendnokey, DC_WIN_BLEND_NOKEY); - tegra_plane_writel(plane, foreground, DC_WIN_BLEND_1WIN); + /* + * If any of the windows on top of this window is opaque, it + * will completely conceal this window within that area. If + * top window has an alpha component, it is blended over the + * bottom window. + */ + for (i = 0; i < 2; i++) { + if (state->blending[i].alpha && + state->blending[i].top) + background[i] |= BLEND_CONTROL_DEPENDENT; + } + + switch (state->base.normalized_zpos) { + case 0: + if (state->blending[0].alpha && + state->blending[1].alpha) + background[2] |= BLEND_CONTROL_DEPENDENT; + break; - switch (plane->index) { + case 1: + /* + * When both middle and topmost windows have an alpha, + * these windows a mixed together and then the result + * is blended over the bottom window. + */ + if (state->blending[0].alpha && + state->blending[0].top) + background[2] |= BLEND_CONTROL_ALPHA; + + if (state->blending[1].alpha && + state->blending[1].top) + background[2] |= BLEND_CONTROL_ALPHA; + break; + } + } + + switch (state->base.normalized_zpos) { case 0: tegra_plane_writel(plane, background[0], DC_WIN_BLEND_2WIN_X); tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y); @@ -192,8 +253,21 @@ static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane) break; case 1: - tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_X); - tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y); + /* + * If window B / C is topmost, then X / Y registers are + * matching the order of blending[...] state indices, + * otherwise a swap is required. + */ + if (!state->blending[0].top && state->blending[1].top) { + blending[0] = foreground; + blending[1] = background[1]; + } else { + blending[0] = background[0]; + blending[1] = foreground; + } + + tegra_plane_writel(plane, blending[0], DC_WIN_BLEND_2WIN_X); + tegra_plane_writel(plane, blending[1], DC_WIN_BLEND_2WIN_Y); tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY); break; @@ -224,6 +298,39 @@ static void tegra_plane_setup_blending(struct tegra_plane *plane, tegra_plane_writel(plane, value, DC_WIN_BLEND_LAYER_CONTROL); } +static bool +tegra_plane_use_horizontal_filtering(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + struct tegra_dc *dc = plane->dc; + + if (window->src.w == window->dst.w) + return false; + + if (plane->index == 0 && dc->soc->has_win_a_without_filters) + return false; + + return true; +} + +static bool +tegra_plane_use_vertical_filtering(struct tegra_plane *plane, + const struct tegra_dc_window *window) +{ + struct tegra_dc *dc = plane->dc; + + if (window->src.h == window->dst.h) + return false; + + if (plane->index == 0 && dc->soc->has_win_a_without_filters) + return false; + + if (plane->index == 2 && dc->soc->has_win_c_without_vert_filter) + return false; + + return true; +} + static void tegra_dc_setup_window(struct tegra_plane *plane, const struct tegra_dc_window *window) { @@ -361,12 +468,50 @@ static void tegra_dc_setup_window(struct tegra_plane *plane, if (window->bottom_up) value |= V_DIRECTION; + if (tegra_plane_use_horizontal_filtering(plane, window)) { + /* + * Enable horizontal 6-tap filter and set filtering + * coefficients to the default values defined in TRM. + */ + tegra_plane_writel(plane, 0x00008000, DC_WIN_H_FILTER_P(0)); + tegra_plane_writel(plane, 0x3e087ce1, DC_WIN_H_FILTER_P(1)); + tegra_plane_writel(plane, 0x3b117ac1, DC_WIN_H_FILTER_P(2)); + tegra_plane_writel(plane, 0x591b73aa, DC_WIN_H_FILTER_P(3)); + tegra_plane_writel(plane, 0x57256d9a, DC_WIN_H_FILTER_P(4)); + tegra_plane_writel(plane, 0x552f668b, DC_WIN_H_FILTER_P(5)); + tegra_plane_writel(plane, 0x73385e8b, DC_WIN_H_FILTER_P(6)); + tegra_plane_writel(plane, 0x72435583, DC_WIN_H_FILTER_P(7)); + tegra_plane_writel(plane, 0x714c4c8b, DC_WIN_H_FILTER_P(8)); + tegra_plane_writel(plane, 0x70554393, DC_WIN_H_FILTER_P(9)); + tegra_plane_writel(plane, 0x715e389b, DC_WIN_H_FILTER_P(10)); + tegra_plane_writel(plane, 0x71662faa, DC_WIN_H_FILTER_P(11)); + tegra_plane_writel(plane, 0x536d25ba, DC_WIN_H_FILTER_P(12)); + tegra_plane_writel(plane, 0x55731bca, DC_WIN_H_FILTER_P(13)); + tegra_plane_writel(plane, 0x387a11d9, DC_WIN_H_FILTER_P(14)); + tegra_plane_writel(plane, 0x3c7c08f1, DC_WIN_H_FILTER_P(15)); + + value |= H_FILTER; + } + + if (tegra_plane_use_vertical_filtering(plane, window)) { + unsigned int i, k; + + /* + * Enable vertical 2-tap filter and set filtering + * coefficients to the default values defined in TRM. + */ + for (i = 0, k = 128; i < 16; i++, k -= 8) + tegra_plane_writel(plane, k, DC_WIN_V_FILTER_P(i)); + + value |= V_FILTER; + } + tegra_plane_writel(plane, value, DC_WIN_WIN_OPTIONS); - if (dc->soc->supports_blending) - tegra_plane_setup_blending(plane, window); - else + if (dc->soc->has_legacy_blending) tegra_plane_setup_blending_legacy(plane); + else + tegra_plane_setup_blending(plane, window); } static const u32 tegra20_primary_formats[] = { @@ -451,17 +596,18 @@ static int tegra_plane_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) { struct tegra_plane_state *plane_state = to_tegra_plane_state(state); + unsigned int rotation = DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_Y; struct tegra_bo_tiling *tiling = &plane_state->tiling; struct tegra_plane *tegra = to_tegra_plane(plane); struct tegra_dc *dc = to_tegra_dc(state->crtc); - unsigned int format; int err; /* no need for further checks if the plane is being disabled */ if (!state->crtc) return 0; - err = tegra_plane_format(state->fb->format->format, &format, + err = tegra_plane_format(state->fb->format->format, + &plane_state->format, &plane_state->swap); if (err < 0) return err; @@ -472,22 +618,12 @@ static int tegra_plane_atomic_check(struct drm_plane *plane, * the corresponding opaque formats. However, the opaque formats can * be emulated by disabling alpha blending for the plane. */ - if (!dc->soc->supports_blending) { - if (!tegra_plane_format_has_alpha(format)) { - err = tegra_plane_format_get_alpha(format, &format); - if (err < 0) - return err; - - plane_state->opaque = true; - } else { - plane_state->opaque = false; - } - - tegra_plane_check_dependent(tegra, plane_state); + if (dc->soc->has_legacy_blending) { + err = tegra_plane_setup_legacy_state(tegra, plane_state); + if (err < 0) + return err; } - plane_state->format = format; - err = tegra_fb_get_tiling(state->fb, tiling); if (err < 0) return err; @@ -498,6 +634,13 @@ static int tegra_plane_atomic_check(struct drm_plane *plane, return -EINVAL; } + rotation = drm_rotation_simplify(state->rotation, rotation); + + if (rotation & DRM_MODE_REFLECT_Y) + plane_state->bottom_up = true; + else + plane_state->bottom_up = false; + /* * Tegra doesn't support different strides for U and V planes so we * error out if the user tries to display a framebuffer with such a @@ -558,7 +701,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane, window.dst.w = drm_rect_width(&plane->state->dst); window.dst.h = drm_rect_height(&plane->state->dst); window.bits_per_pixel = fb->format->cpp[0] * 8; - window.bottom_up = tegra_fb_is_bottom_up(fb); + window.bottom_up = tegra_fb_is_bottom_up(fb) || state->bottom_up; /* copy from state */ window.zpos = plane->state->normalized_zpos; @@ -639,9 +782,15 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm, } drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); + drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255); - if (dc->soc->supports_blending) - drm_plane_create_zpos_property(&plane->base, 0, 0, 255); + err = drm_plane_create_rotation_property(&plane->base, + DRM_MODE_ROTATE_0, + DRM_MODE_ROTATE_0 | + DRM_MODE_REFLECT_Y); + if (err < 0) + dev_err(dc->dev, "failed to create rotation property: %d\n", + err); return &plane->base; } @@ -918,9 +1067,15 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm, } drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs); + drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255); - if (dc->soc->supports_blending) - drm_plane_create_zpos_property(&plane->base, 0, 0, 255); + err = drm_plane_create_rotation_property(&plane->base, + DRM_MODE_ROTATE_0, + DRM_MODE_ROTATE_0 | + DRM_MODE_REFLECT_Y); + if (err < 0) + dev_err(dc->dev, "failed to create rotation property: %d\n", + err); return &plane->base; } @@ -1826,7 +1981,6 @@ static irqreturn_t tegra_dc_irq(int irq, void *data) static int tegra_dc_init(struct host1x_client *client) { struct drm_device *drm = dev_get_drvdata(client->parent); - struct iommu_group *group = iommu_group_get(client->dev); unsigned long flags = HOST1X_SYNCPT_CLIENT_MANAGED; struct tegra_dc *dc = host1x_client_to_dc(client); struct tegra_drm *tegra = drm->dev_private; @@ -1838,20 +1992,11 @@ static int tegra_dc_init(struct host1x_client *client) if (!dc->syncpt) dev_warn(dc->dev, "failed to allocate syncpoint\n"); - if (group && tegra->domain) { - if (group != tegra->group) { - err = iommu_attach_group(tegra->domain, group); - if (err < 0) { - dev_err(dc->dev, - "failed to attach to domain: %d\n", - err); - return err; - } - - tegra->group = group; - } - - dc->domain = tegra->domain; + dc->group = host1x_client_iommu_attach(client, true); + if (IS_ERR(dc->group)) { + err = PTR_ERR(dc->group); + dev_err(client->dev, "failed to attach to domain: %d\n", err); + return err; } if (dc->soc->wgrps) @@ -1916,24 +2061,15 @@ cleanup: if (!IS_ERR(primary)) drm_plane_cleanup(primary); - if (group && dc->domain) { - if (group == tegra->group) { - iommu_detach_group(dc->domain, group); - tegra->group = NULL; - } - - dc->domain = NULL; - } + host1x_client_iommu_detach(client, dc->group); + host1x_syncpt_free(dc->syncpt); return err; } static int tegra_dc_exit(struct host1x_client *client) { - struct drm_device *drm = dev_get_drvdata(client->parent); - struct iommu_group *group = iommu_group_get(client->dev); struct tegra_dc *dc = host1x_client_to_dc(client); - struct tegra_drm *tegra = drm->dev_private; int err; devm_free_irq(dc->dev, dc->irq, dc); @@ -1944,15 +2080,7 @@ static int tegra_dc_exit(struct host1x_client *client) return err; } - if (group && dc->domain) { - if (group == tegra->group) { - iommu_detach_group(dc->domain, group); - tegra->group = NULL; - } - - dc->domain = NULL; - } - + host1x_client_iommu_detach(client, dc->group); host1x_syncpt_free(dc->syncpt); return 0; @@ -1968,7 +2096,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, - .supports_blending = false, + .has_legacy_blending = true, .pitch_align = 8, .has_powergate = false, .coupled_pm = true, @@ -1978,6 +2106,8 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = { .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats), .overlay_formats = tegra20_overlay_formats, .modifiers = tegra20_modifiers, + .has_win_a_without_filters = true, + .has_win_c_without_vert_filter = true, }; static const struct tegra_dc_soc_info tegra30_dc_soc_info = { @@ -1985,7 +2115,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, - .supports_blending = false, + .has_legacy_blending = true, .pitch_align = 8, .has_powergate = false, .coupled_pm = false, @@ -1995,6 +2125,8 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = { .num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats), .overlay_formats = tegra20_overlay_formats, .modifiers = tegra20_modifiers, + .has_win_a_without_filters = false, + .has_win_c_without_vert_filter = false, }; static const struct tegra_dc_soc_info tegra114_dc_soc_info = { @@ -2002,7 +2134,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .supports_interlacing = false, .supports_cursor = false, .supports_block_linear = false, - .supports_blending = false, + .has_legacy_blending = true, .pitch_align = 64, .has_powergate = true, .coupled_pm = false, @@ -2012,6 +2144,8 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = { .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats), .overlay_formats = tegra114_overlay_formats, .modifiers = tegra20_modifiers, + .has_win_a_without_filters = false, + .has_win_c_without_vert_filter = false, }; static const struct tegra_dc_soc_info tegra124_dc_soc_info = { @@ -2019,7 +2153,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, - .supports_blending = true, + .has_legacy_blending = false, .pitch_align = 64, .has_powergate = true, .coupled_pm = false, @@ -2029,6 +2163,8 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats), .overlay_formats = tegra124_overlay_formats, .modifiers = tegra124_modifiers, + .has_win_a_without_filters = false, + .has_win_c_without_vert_filter = false, }; static const struct tegra_dc_soc_info tegra210_dc_soc_info = { @@ -2036,7 +2172,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, - .supports_blending = true, + .has_legacy_blending = false, .pitch_align = 64, .has_powergate = true, .coupled_pm = false, @@ -2046,6 +2182,8 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = { .num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats), .overlay_formats = tegra114_overlay_formats, .modifiers = tegra124_modifiers, + .has_win_a_without_filters = false, + .has_win_c_without_vert_filter = false, }; static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = { @@ -2087,7 +2225,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = { .supports_interlacing = true, .supports_cursor = true, .supports_block_linear = true, - .supports_blending = true, + .has_legacy_blending = false, .pitch_align = 64, .has_powergate = false, .coupled_pm = false, diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index d2b50d32de4d..e96f582ca692 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -55,7 +55,7 @@ struct tegra_dc_soc_info { bool supports_interlacing; bool supports_cursor; bool supports_block_linear; - bool supports_blending; + bool has_legacy_blending; unsigned int pitch_align; bool has_powergate; bool coupled_pm; @@ -67,6 +67,8 @@ struct tegra_dc_soc_info { const u32 *overlay_formats; unsigned int num_overlay_formats; const u64 *modifiers; + bool has_win_a_without_filters; + bool has_win_c_without_vert_filter; }; struct tegra_dc { @@ -92,7 +94,7 @@ struct tegra_dc { const struct tegra_dc_soc_info *soc; - struct iommu_domain *domain; + struct iommu_group *group; }; static inline struct tegra_dc * @@ -553,6 +555,9 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc); #define THREAD_NUM(x) (((x) & 0x1f) << 1) #define THREAD_GROUP_ENABLE (1 << 0) +#define DC_WIN_H_FILTER_P(p) (0x601 + (p)) +#define DC_WIN_V_FILTER_P(p) (0x619 + (p)) + #define DC_WIN_CSC_YOF 0x611 #define DC_WIN_CSC_KYRGB 0x612 #define DC_WIN_CSC_KUR 0x613 @@ -566,6 +571,8 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc); #define H_DIRECTION (1 << 0) #define V_DIRECTION (1 << 2) #define COLOR_EXPAND (1 << 6) +#define H_FILTER (1 << 8) +#define V_FILTER (1 << 10) #define CSC_ENABLE (1 << 18) #define WIN_ENABLE (1 << 30) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index a0519612ae2c..776c1513e582 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -98,6 +98,10 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) goto free; } + err = iova_cache_get(); + if (err < 0) + goto domain; + geometry = &tegra->domain->geometry; gem_start = geometry->aperture_start; gem_end = geometry->aperture_end - CARVEOUT_SZ; @@ -191,11 +195,14 @@ config: drm_mode_config_cleanup(drm); if (tegra->domain) { - iommu_domain_free(tegra->domain); - drm_mm_takedown(&tegra->mm); mutex_destroy(&tegra->mm_lock); + drm_mm_takedown(&tegra->mm); put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); } +domain: + if (tegra->domain) + iommu_domain_free(tegra->domain); free: kfree(tegra); return err; @@ -217,10 +224,11 @@ static void tegra_drm_unload(struct drm_device *drm) return; if (tegra->domain) { - iommu_domain_free(tegra->domain); - drm_mm_takedown(&tegra->mm); mutex_destroy(&tegra->mm_lock); + drm_mm_takedown(&tegra->mm); put_iova_domain(&tegra->carveout.domain); + iova_cache_put(); + iommu_domain_free(tegra->domain); } kfree(tegra); @@ -300,46 +308,15 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest, return 0; } -static int host1x_waitchk_copy_from_user(struct host1x_waitchk *dest, - struct drm_tegra_waitchk __user *src, - struct drm_file *file) -{ - u32 cmdbuf; - int err; - - err = get_user(cmdbuf, &src->handle); - if (err < 0) - return err; - - err = get_user(dest->offset, &src->offset); - if (err < 0) - return err; - - err = get_user(dest->syncpt_id, &src->syncpt); - if (err < 0) - return err; - - err = get_user(dest->thresh, &src->thresh); - if (err < 0) - return err; - - dest->bo = host1x_bo_lookup(file, cmdbuf); - if (!dest->bo) - return -ENOENT; - - return 0; -} - int tegra_drm_submit(struct tegra_drm_context *context, struct drm_tegra_submit *args, struct drm_device *drm, struct drm_file *file) { + struct host1x_client *client = &context->client->base; unsigned int num_cmdbufs = args->num_cmdbufs; unsigned int num_relocs = args->num_relocs; - unsigned int num_waitchks = args->num_waitchks; struct drm_tegra_cmdbuf __user *user_cmdbufs; struct drm_tegra_reloc __user *user_relocs; - struct drm_tegra_waitchk __user *user_waitchks; struct drm_tegra_syncpt __user *user_syncpt; struct drm_tegra_syncpt syncpt; struct host1x *host1x = dev_get_drvdata(drm->dev->parent); @@ -351,7 +328,6 @@ int tegra_drm_submit(struct tegra_drm_context *context, user_cmdbufs = u64_to_user_ptr(args->cmdbufs); user_relocs = u64_to_user_ptr(args->relocs); - user_waitchks = u64_to_user_ptr(args->waitchks); user_syncpt = u64_to_user_ptr(args->syncpts); /* We don't yet support other than one syncpt_incr struct per submit */ @@ -363,21 +339,20 @@ int tegra_drm_submit(struct tegra_drm_context *context, return -EINVAL; job = host1x_job_alloc(context->channel, args->num_cmdbufs, - args->num_relocs, args->num_waitchks); + args->num_relocs); if (!job) return -ENOMEM; job->num_relocs = args->num_relocs; - job->num_waitchk = args->num_waitchks; - job->client = (u32)args->context; - job->class = context->client->base.class; + job->client = client; + job->class = client->class; job->serialize = true; /* * Track referenced BOs so that they can be unreferenced after the * submission is complete. */ - num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks; + num_refs = num_cmdbufs + num_relocs * 2; refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL); if (!refs) { @@ -438,13 +413,13 @@ int tegra_drm_submit(struct tegra_drm_context *context, struct host1x_reloc *reloc; struct tegra_bo *obj; - err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs], + err = host1x_reloc_copy_from_user(&job->relocs[num_relocs], &user_relocs[num_relocs], drm, file); if (err < 0) goto fail; - reloc = &job->relocarray[num_relocs]; + reloc = &job->relocs[num_relocs]; obj = host1x_to_tegra_bo(reloc->cmdbuf.bo); refs[num_refs++] = &obj->gem; @@ -468,30 +443,6 @@ int tegra_drm_submit(struct tegra_drm_context *context, } } - /* copy and resolve waitchks from submit */ - while (num_waitchks--) { - struct host1x_waitchk *wait = &job->waitchk[num_waitchks]; - struct tegra_bo *obj; - - err = host1x_waitchk_copy_from_user( - wait, &user_waitchks[num_waitchks], file); - if (err < 0) - goto fail; - - obj = host1x_to_tegra_bo(wait->bo); - refs[num_refs++] = &obj->gem; - - /* - * The unaligned offset will cause an unaligned write during - * of the waitchks patching, corrupting the commands stream. - */ - if (wait->offset & 3 || - wait->offset >= obj->gem.size) { - err = -EINVAL; - goto fail; - } - } - if (copy_from_user(&syncpt, user_syncpt, sizeof(syncpt))) { err = -EFAULT; goto fail; @@ -1101,6 +1052,52 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra, return 0; } +struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client, + bool shared) +{ + struct drm_device *drm = dev_get_drvdata(client->parent); + struct tegra_drm *tegra = drm->dev_private; + struct iommu_group *group = NULL; + int err; + + if (tegra->domain) { + group = iommu_group_get(client->dev); + if (!group) { + dev_err(client->dev, "failed to get IOMMU group\n"); + return ERR_PTR(-ENODEV); + } + + if (!shared || (shared && (group != tegra->group))) { + err = iommu_attach_group(tegra->domain, group); + if (err < 0) { + iommu_group_put(group); + return ERR_PTR(err); + } + + if (shared && !tegra->group) + tegra->group = group; + } + } + + return group; +} + +void host1x_client_iommu_detach(struct host1x_client *client, + struct iommu_group *group) +{ + struct drm_device *drm = dev_get_drvdata(client->parent); + struct tegra_drm *tegra = drm->dev_private; + + if (group) { + if (group == tegra->group) { + iommu_detach_group(tegra->domain, group); + tegra->group = NULL; + } + + iommu_group_put(group); + } +} + void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma) { struct iova *alloc; diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 4f41aaec8530..92d248784396 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -29,16 +29,10 @@ struct reset_control; -struct tegra_fb { - struct drm_framebuffer base; - struct tegra_bo **planes; - unsigned int num_planes; -}; - #ifdef CONFIG_DRM_FBDEV_EMULATION struct tegra_fbdev { struct drm_fb_helper base; - struct tegra_fb *fb; + struct drm_framebuffer *fb; }; #endif @@ -97,6 +91,7 @@ struct tegra_drm_client { struct host1x_client base; struct list_head list; + unsigned int version; const struct tegra_drm_client_ops *ops; }; @@ -110,6 +105,10 @@ int tegra_drm_register_client(struct tegra_drm *tegra, struct tegra_drm_client *client); int tegra_drm_unregister_client(struct tegra_drm *tegra, struct tegra_drm_client *client); +struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client, + bool shared); +void host1x_client_iommu_detach(struct host1x_client *client, + struct iommu_group *group); int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm); int tegra_drm_exit(struct tegra_drm *tegra); diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index e69434909a42..4c22cdded3c2 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -14,11 +14,7 @@ #include "drm.h" #include "gem.h" - -static inline struct tegra_fb *to_tegra_fb(struct drm_framebuffer *fb) -{ - return container_of(fb, struct tegra_fb, base); -} +#include <drm/drm_gem_framebuffer_helper.h> #ifdef CONFIG_DRM_FBDEV_EMULATION static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper) @@ -30,19 +26,14 @@ static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper) struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer, unsigned int index) { - struct tegra_fb *fb = to_tegra_fb(framebuffer); - - if (index >= framebuffer->format->num_planes) - return NULL; - - return fb->planes[index]; + return to_tegra_bo(drm_gem_fb_get_obj(framebuffer, index)); } bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer) { - struct tegra_fb *fb = to_tegra_fb(framebuffer); + struct tegra_bo *bo = tegra_fb_get_plane(framebuffer, 0); - if (fb->planes[0]->flags & TEGRA_BO_BOTTOM_UP) + if (bo->flags & TEGRA_BO_BOTTOM_UP) return true; return false; @@ -51,8 +42,7 @@ bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer) int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, struct tegra_bo_tiling *tiling) { - struct tegra_fb *fb = to_tegra_fb(framebuffer); - uint64_t modifier = fb->base.modifier; + uint64_t modifier = framebuffer->modifier; switch (modifier) { case DRM_FORMAT_MOD_LINEAR: @@ -102,46 +92,17 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, return 0; } -static void tegra_fb_destroy(struct drm_framebuffer *framebuffer) -{ - struct tegra_fb *fb = to_tegra_fb(framebuffer); - unsigned int i; - - for (i = 0; i < fb->num_planes; i++) { - struct tegra_bo *bo = fb->planes[i]; - - if (bo) { - if (bo->pages) - vunmap(bo->vaddr); - - drm_gem_object_put_unlocked(&bo->gem); - } - } - - drm_framebuffer_cleanup(framebuffer); - kfree(fb->planes); - kfree(fb); -} - -static int tegra_fb_create_handle(struct drm_framebuffer *framebuffer, - struct drm_file *file, unsigned int *handle) -{ - struct tegra_fb *fb = to_tegra_fb(framebuffer); - - return drm_gem_handle_create(file, &fb->planes[0]->gem, handle); -} - static const struct drm_framebuffer_funcs tegra_fb_funcs = { - .destroy = tegra_fb_destroy, - .create_handle = tegra_fb_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; -static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct tegra_bo **planes, - unsigned int num_planes) +static struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct tegra_bo **planes, + unsigned int num_planes) { - struct tegra_fb *fb; + struct drm_framebuffer *fb; unsigned int i; int err; @@ -149,24 +110,15 @@ static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm, if (!fb) return ERR_PTR(-ENOMEM); - fb->planes = kzalloc(num_planes * sizeof(*planes), GFP_KERNEL); - if (!fb->planes) { - kfree(fb); - return ERR_PTR(-ENOMEM); - } - - fb->num_planes = num_planes; + drm_helper_mode_fill_fb_struct(drm, fb, mode_cmd); - drm_helper_mode_fill_fb_struct(drm, &fb->base, mode_cmd); + for (i = 0; i < fb->format->num_planes; i++) + fb->obj[i] = &planes[i]->gem; - for (i = 0; i < fb->num_planes; i++) - fb->planes[i] = planes[i]; - - err = drm_framebuffer_init(drm, &fb->base, &tegra_fb_funcs); + err = drm_framebuffer_init(drm, fb, &tegra_fb_funcs); if (err < 0) { dev_err(drm->dev, "failed to initialize framebuffer: %d\n", err); - kfree(fb->planes); kfree(fb); return ERR_PTR(err); } @@ -181,7 +133,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, unsigned int hsub, vsub, i; struct tegra_bo *planes[4]; struct drm_gem_object *gem; - struct tegra_fb *fb; + struct drm_framebuffer *fb; int err; hsub = drm_format_horz_chroma_subsampling(cmd->pixel_format); @@ -217,7 +169,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, goto unreference; } - return &fb->base; + return fb; unreference: while (i--) @@ -298,7 +250,7 @@ static int tegra_fbdev_probe(struct drm_fb_helper *helper, return PTR_ERR(fbdev->fb); } - fb = &fbdev->fb->base; + fb = fbdev->fb; helper->fb = fb; helper->fbdev = info; @@ -398,8 +350,17 @@ static void tegra_fbdev_exit(struct tegra_fbdev *fbdev) { drm_fb_helper_unregister_fbi(&fbdev->base); - if (fbdev->fb) - drm_framebuffer_remove(&fbdev->fb->base); + if (fbdev->fb) { + struct tegra_bo *bo = tegra_fb_get_plane(fbdev->fb, 0); + + /* Undo the special mapping we made in fbdev probe. */ + if (bo && bo->pages) { + vunmap(bo->vaddr); + bo->vaddr = 0; + } + + drm_framebuffer_remove(fbdev->fb); + } drm_fb_helper_fini(&fbdev->base); tegra_fbdev_free(fbdev); diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index d7661702c11c..4f80100ff5f3 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -422,14 +422,13 @@ int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm, return 0; } -static int tegra_bo_fault(struct vm_fault *vmf) +static vm_fault_t tegra_bo_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *gem = vma->vm_private_data; struct tegra_bo *bo = to_tegra_bo(gem); struct page *page; pgoff_t offset; - int err; if (!bo->pages) return VM_FAULT_SIGBUS; @@ -437,20 +436,7 @@ static int tegra_bo_fault(struct vm_fault *vmf) offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; page = bo->pages[offset]; - err = vm_insert_page(vma, vmf->address, page); - switch (err) { - case -EAGAIN: - case 0: - case -ERESTARTSYS: - case -EINTR: - case -EBUSY: - return VM_FAULT_NOPAGE; - - case -ENOMEM: - return VM_FAULT_OOM; - } - - return VM_FAULT_SIGBUS; + return vmf_insert_page(vma, vmf->address, page); } const struct vm_operations_struct tegra_bo_vm_ops = { @@ -649,6 +635,8 @@ struct dma_buf *tegra_gem_prime_export(struct drm_device *drm, { DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + exp_info.exp_name = KBUILD_MODNAME; + exp_info.owner = drm->driver->fops->owner; exp_info.ops = &tegra_gem_prime_dmabuf_ops; exp_info.size = gem->size; exp_info.flags = flags; diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c index 9a8ea93016a9..673059fd2fcb 100644 --- a/drivers/gpu/drm/tegra/gr2d.c +++ b/drivers/gpu/drm/tegra/gr2d.c @@ -7,16 +7,25 @@ */ #include <linux/clk.h> +#include <linux/iommu.h> +#include <linux/of_device.h> #include "drm.h" #include "gem.h" #include "gr2d.h" +struct gr2d_soc { + unsigned int version; +}; + struct gr2d { + struct iommu_group *group; struct tegra_drm_client client; struct host1x_channel *channel; struct clk *clk; + const struct gr2d_soc *soc; + DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS); }; @@ -31,6 +40,7 @@ static int gr2d_init(struct host1x_client *client) struct drm_device *dev = dev_get_drvdata(client->parent); unsigned long flags = HOST1X_SYNCPT_HAS_BASE; struct gr2d *gr2d = to_gr2d(drm); + int err; gr2d->channel = host1x_channel_request(client->dev); if (!gr2d->channel) @@ -38,24 +48,48 @@ static int gr2d_init(struct host1x_client *client) client->syncpts[0] = host1x_syncpt_request(client, flags); if (!client->syncpts[0]) { - host1x_channel_put(gr2d->channel); - return -ENOMEM; + err = -ENOMEM; + dev_err(client->dev, "failed to request syncpoint: %d\n", err); + goto put; + } + + gr2d->group = host1x_client_iommu_attach(client, false); + if (IS_ERR(gr2d->group)) { + err = PTR_ERR(gr2d->group); + dev_err(client->dev, "failed to attach to domain: %d\n", err); + goto free; + } + + err = tegra_drm_register_client(dev->dev_private, drm); + if (err < 0) { + dev_err(client->dev, "failed to register client: %d\n", err); + goto detach; } - return tegra_drm_register_client(dev->dev_private, drm); + return 0; + +detach: + host1x_client_iommu_detach(client, gr2d->group); +free: + host1x_syncpt_free(client->syncpts[0]); +put: + host1x_channel_put(gr2d->channel); + return err; } static int gr2d_exit(struct host1x_client *client) { struct tegra_drm_client *drm = host1x_to_drm_client(client); struct drm_device *dev = dev_get_drvdata(client->parent); + struct tegra_drm *tegra = dev->dev_private; struct gr2d *gr2d = to_gr2d(drm); int err; - err = tegra_drm_unregister_client(dev->dev_private, drm); + err = tegra_drm_unregister_client(tegra, drm); if (err < 0) return err; + host1x_client_iommu_detach(client, gr2d->group); host1x_syncpt_free(client->syncpts[0]); host1x_channel_put(gr2d->channel); @@ -123,9 +157,17 @@ static const struct tegra_drm_client_ops gr2d_ops = { .submit = tegra_drm_submit, }; +static const struct gr2d_soc tegra20_gr2d_soc = { + .version = 0x20, +}; + +static const struct gr2d_soc tegra30_gr2d_soc = { + .version = 0x30, +}; + static const struct of_device_id gr2d_match[] = { - { .compatible = "nvidia,tegra30-gr2d" }, - { .compatible = "nvidia,tegra20-gr2d" }, + { .compatible = "nvidia,tegra30-gr2d", .data = &tegra20_gr2d_soc }, + { .compatible = "nvidia,tegra20-gr2d", .data = &tegra30_gr2d_soc }, { }, }; MODULE_DEVICE_TABLE(of, gr2d_match); @@ -158,6 +200,8 @@ static int gr2d_probe(struct platform_device *pdev) if (!gr2d) return -ENOMEM; + gr2d->soc = of_device_get_match_data(dev); + syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL); if (!syncpts) return -ENOMEM; @@ -182,6 +226,7 @@ static int gr2d_probe(struct platform_device *pdev) gr2d->client.base.num_syncpts = 1; INIT_LIST_HEAD(&gr2d->client.list); + gr2d->client.version = gr2d->soc->version; gr2d->client.ops = &gr2d_ops; err = host1x_client_register(&gr2d->client.base); diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c index 28c4ef63065b..4778ae999668 100644 --- a/drivers/gpu/drm/tegra/gr3d.c +++ b/drivers/gpu/drm/tegra/gr3d.c @@ -9,7 +9,9 @@ #include <linux/clk.h> #include <linux/host1x.h> +#include <linux/iommu.h> #include <linux/module.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/reset.h> @@ -19,7 +21,12 @@ #include "gem.h" #include "gr3d.h" +struct gr3d_soc { + unsigned int version; +}; + struct gr3d { + struct iommu_group *group; struct tegra_drm_client client; struct host1x_channel *channel; struct clk *clk_secondary; @@ -27,6 +34,8 @@ struct gr3d { struct reset_control *rst_secondary; struct reset_control *rst; + const struct gr3d_soc *soc; + DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS); }; @@ -41,6 +50,7 @@ static int gr3d_init(struct host1x_client *client) struct drm_device *dev = dev_get_drvdata(client->parent); unsigned long flags = HOST1X_SYNCPT_HAS_BASE; struct gr3d *gr3d = to_gr3d(drm); + int err; gr3d->channel = host1x_channel_request(client->dev); if (!gr3d->channel) @@ -48,11 +58,33 @@ static int gr3d_init(struct host1x_client *client) client->syncpts[0] = host1x_syncpt_request(client, flags); if (!client->syncpts[0]) { - host1x_channel_put(gr3d->channel); - return -ENOMEM; + err = -ENOMEM; + dev_err(client->dev, "failed to request syncpoint: %d\n", err); + goto put; } - return tegra_drm_register_client(dev->dev_private, drm); + gr3d->group = host1x_client_iommu_attach(client, false); + if (IS_ERR(gr3d->group)) { + err = PTR_ERR(gr3d->group); + dev_err(client->dev, "failed to attach to domain: %d\n", err); + goto free; + } + + err = tegra_drm_register_client(dev->dev_private, drm); + if (err < 0) { + dev_err(client->dev, "failed to register client: %d\n", err); + goto detach; + } + + return 0; + +detach: + host1x_client_iommu_detach(client, gr3d->group); +free: + host1x_syncpt_free(client->syncpts[0]); +put: + host1x_channel_put(gr3d->channel); + return err; } static int gr3d_exit(struct host1x_client *client) @@ -66,6 +98,7 @@ static int gr3d_exit(struct host1x_client *client) if (err < 0) return err; + host1x_client_iommu_detach(client, gr3d->group); host1x_syncpt_free(client->syncpts[0]); host1x_channel_put(gr3d->channel); @@ -125,10 +158,22 @@ static const struct tegra_drm_client_ops gr3d_ops = { .submit = tegra_drm_submit, }; +static const struct gr3d_soc tegra20_gr3d_soc = { + .version = 0x20, +}; + +static const struct gr3d_soc tegra30_gr3d_soc = { + .version = 0x30, +}; + +static const struct gr3d_soc tegra114_gr3d_soc = { + .version = 0x35, +}; + static const struct of_device_id tegra_gr3d_match[] = { - { .compatible = "nvidia,tegra114-gr3d" }, - { .compatible = "nvidia,tegra30-gr3d" }, - { .compatible = "nvidia,tegra20-gr3d" }, + { .compatible = "nvidia,tegra114-gr3d", .data = &tegra114_gr3d_soc }, + { .compatible = "nvidia,tegra30-gr3d", .data = &tegra30_gr3d_soc }, + { .compatible = "nvidia,tegra20-gr3d", .data = &tegra20_gr3d_soc }, { } }; MODULE_DEVICE_TABLE(of, tegra_gr3d_match); @@ -250,6 +295,8 @@ static int gr3d_probe(struct platform_device *pdev) if (!gr3d) return -ENOMEM; + gr3d->soc = of_device_get_match_data(&pdev->dev); + syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL); if (!syncpts) return -ENOMEM; @@ -307,6 +354,7 @@ static int gr3d_probe(struct platform_device *pdev) gr3d->client.base.num_syncpts = 1; INIT_LIST_HEAD(&gr3d->client.list); + gr3d->client.version = gr3d->soc->version; gr3d->client.ops = &gr3d_ops; err = host1x_client_register(&gr3d->client.base); diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 9a3f23d4780f..8f4fcbb515fb 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -687,7 +687,7 @@ void tegra_display_hub_atomic_commit(struct drm_device *drm, struct device *dev = hub->client.dev; int err; - hub_state = tegra_display_hub_get_state(hub, state); + hub_state = to_tegra_display_hub_state(hub->base.state); if (hub_state->clk) { err = clk_set_rate(hub_state->clk, hub_state->rate); diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 176ef46c615c..d068e8aa3553 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -23,6 +23,7 @@ static void tegra_plane_destroy(struct drm_plane *plane) static void tegra_plane_reset(struct drm_plane *plane) { + struct tegra_plane *p = to_tegra_plane(plane); struct tegra_plane_state *state; if (plane->state) @@ -35,6 +36,8 @@ static void tegra_plane_reset(struct drm_plane *plane) if (state) { plane->state = &state->base; plane->state->plane = plane; + plane->state->zpos = p->index; + plane->state->normalized_zpos = p->index; } } @@ -53,10 +56,11 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane) copy->tiling = state->tiling; copy->format = state->format; copy->swap = state->swap; + copy->bottom_up = state->bottom_up; copy->opaque = state->opaque; - for (i = 0; i < 3; i++) - copy->dependent[i] = state->dependent[i]; + for (i = 0; i < 2; i++) + copy->blending[i] = state->blending[i]; return ©->base; } @@ -267,24 +271,8 @@ static bool __drm_format_has_alpha(u32 format) return false; } -/* - * This is applicable to Tegra20 and Tegra30 only where the opaque formats can - * be emulated using the alpha formats and alpha blending disabled. - */ -bool tegra_plane_format_has_alpha(unsigned int format) -{ - switch (format) { - case WIN_COLOR_DEPTH_B5G5R5A1: - case WIN_COLOR_DEPTH_A1B5G5R5: - case WIN_COLOR_DEPTH_R8G8B8A8: - case WIN_COLOR_DEPTH_B8G8R8A8: - return true; - } - - return false; -} - -int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha) +static int tegra_plane_format_get_alpha(unsigned int opaque, + unsigned int *alpha) { if (tegra_plane_format_is_yuv(opaque, NULL)) { *alpha = opaque; @@ -316,6 +304,67 @@ int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha) return -EINVAL; } +/* + * This is applicable to Tegra20 and Tegra30 only where the opaque formats can + * be emulated using the alpha formats and alpha blending disabled. + */ +static int tegra_plane_setup_opacity(struct tegra_plane *tegra, + struct tegra_plane_state *state) +{ + unsigned int format; + int err; + + switch (state->format) { + case WIN_COLOR_DEPTH_B5G5R5A1: + case WIN_COLOR_DEPTH_A1B5G5R5: + case WIN_COLOR_DEPTH_R8G8B8A8: + case WIN_COLOR_DEPTH_B8G8R8A8: + state->opaque = false; + break; + + default: + err = tegra_plane_format_get_alpha(state->format, &format); + if (err < 0) + return err; + + state->format = format; + state->opaque = true; + break; + } + + return 0; +} + +static int tegra_plane_check_transparency(struct tegra_plane *tegra, + struct tegra_plane_state *state) +{ + struct drm_plane_state *old, *plane_state; + struct drm_plane *plane; + + old = drm_atomic_get_old_plane_state(state->base.state, &tegra->base); + + /* check if zpos / transparency changed */ + if (old->normalized_zpos == state->base.normalized_zpos && + to_tegra_plane_state(old)->opaque == state->opaque) + return 0; + + /* include all sibling planes into this commit */ + drm_for_each_plane(plane, tegra->base.dev) { + struct tegra_plane *p = to_tegra_plane(plane); + + /* skip this plane and planes on different CRTCs */ + if (p == tegra || p->dc != tegra->dc) + continue; + + plane_state = drm_atomic_get_plane_state(state->base.state, + plane); + if (IS_ERR(plane_state)) + return PTR_ERR(plane_state); + } + + return 1; +} + static unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane, struct tegra_plane *other) { @@ -336,61 +385,98 @@ static unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane, return index; } -void tegra_plane_check_dependent(struct tegra_plane *tegra, - struct tegra_plane_state *state) +static void tegra_plane_update_transparency(struct tegra_plane *tegra, + struct tegra_plane_state *state) { - struct drm_plane_state *old, *new; + struct drm_plane_state *new; struct drm_plane *plane; - unsigned int zpos[2]; unsigned int i; - for (i = 0; i < 2; i++) - zpos[i] = 0; - - for_each_oldnew_plane_in_state(state->base.state, plane, old, new, i) { + for_each_new_plane_in_state(state->base.state, plane, new, i) { struct tegra_plane *p = to_tegra_plane(plane); unsigned index; /* skip this plane and planes on different CRTCs */ - if (p == tegra || new->crtc != state->base.crtc) + if (p == tegra || p->dc != tegra->dc) continue; index = tegra_plane_get_overlap_index(tegra, p); - state->dependent[index] = false; + if (new->fb && __drm_format_has_alpha(new->fb->format->format)) + state->blending[index].alpha = true; + else + state->blending[index].alpha = false; + + if (new->normalized_zpos > state->base.normalized_zpos) + state->blending[index].top = true; + else + state->blending[index].top = false; /* - * If any of the other planes is on top of this plane and uses - * a format with an alpha component, mark this plane as being - * dependent, meaning it's alpha value will be 1 minus the sum - * of alpha components of the overlapping planes. + * Missing framebuffer means that plane is disabled, in this + * case mark B / C window as top to be able to differentiate + * windows indices order in regards to zPos for the middle + * window X / Y registers programming. */ - if (p->index > tegra->index) { - if (__drm_format_has_alpha(new->fb->format->format)) - state->dependent[index] = true; - - /* keep track of the Z position */ - zpos[index] = p->index; - } + if (!new->fb) + state->blending[index].top = (index == 1); } +} + +static int tegra_plane_setup_transparency(struct tegra_plane *tegra, + struct tegra_plane_state *state) +{ + struct tegra_plane_state *tegra_state; + struct drm_plane_state *new; + struct drm_plane *plane; + int err; /* - * The region where three windows overlap is the intersection of the - * two regions where two windows overlap. It contributes to the area - * if any of the windows on top of it have an alpha component. + * If planes zpos / transparency changed, sibling planes blending + * state may require adjustment and in this case they will be included + * into this atom commit, otherwise blending state is unchanged. */ - for (i = 0; i < 2; i++) - state->dependent[2] = state->dependent[2] || - state->dependent[i]; + err = tegra_plane_check_transparency(tegra, state); + if (err <= 0) + return err; /* - * However, if any of the windows on top of this window is opaque, it - * will completely conceal this window within that area, so avoid the - * window from contributing to the area. + * All planes are now in the atomic state, walk them up and update + * transparency state for each plane. */ - for (i = 0; i < 2; i++) { - if (zpos[i] > tegra->index) - state->dependent[2] = state->dependent[2] && - state->dependent[i]; + drm_for_each_plane(plane, tegra->base.dev) { + struct tegra_plane *p = to_tegra_plane(plane); + + /* skip planes on different CRTCs */ + if (p->dc != tegra->dc) + continue; + + new = drm_atomic_get_new_plane_state(state->base.state, plane); + tegra_state = to_tegra_plane_state(new); + + /* + * There is no need to update blending state for the disabled + * plane. + */ + if (new->fb) + tegra_plane_update_transparency(p, tegra_state); } + + return 0; +} + +int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, + struct tegra_plane_state *state) +{ + int err; + + err = tegra_plane_setup_opacity(tegra, state); + if (err < 0) + return err; + + err = tegra_plane_setup_transparency(tegra, state); + if (err < 0) + return err; + + return 0; } diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index 6938719e7e5d..e79e6b4a8e0a 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -34,6 +34,11 @@ static inline struct tegra_plane *to_tegra_plane(struct drm_plane *plane) return container_of(plane, struct tegra_plane, base); } +struct tegra_plane_legacy_blending_state { + bool alpha; + bool top; +}; + struct tegra_plane_state { struct drm_plane_state base; @@ -41,9 +46,11 @@ struct tegra_plane_state { u32 format; u32 swap; + bool bottom_up; + /* used for legacy blending support only */ + struct tegra_plane_legacy_blending_state blending[2]; bool opaque; - bool dependent[3]; }; static inline struct tegra_plane_state * @@ -62,9 +69,7 @@ int tegra_plane_state_add(struct tegra_plane *plane, int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap); bool tegra_plane_format_is_yuv(unsigned int format, bool *planar); -bool tegra_plane_format_has_alpha(unsigned int format); -int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha); -void tegra_plane_check_dependent(struct tegra_plane *tegra, - struct tegra_plane_state *state); +int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, + struct tegra_plane_state *state); #endif /* TEGRA_PLANE_H */ diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index f5794dd49f3b..9f657a63b0bb 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -25,6 +25,7 @@ struct vic_config { const char *firmware; + unsigned int version; }; struct vic { @@ -264,18 +265,21 @@ static const struct tegra_drm_client_ops vic_ops = { static const struct vic_config vic_t124_config = { .firmware = NVIDIA_TEGRA_124_VIC_FIRMWARE, + .version = 0x40, }; #define NVIDIA_TEGRA_210_VIC_FIRMWARE "nvidia/tegra210/vic04_ucode.bin" static const struct vic_config vic_t210_config = { .firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE, + .version = 0x21, }; #define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin" static const struct vic_config vic_t186_config = { .firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE, + .version = 0x18, }; static const struct of_device_id vic_match[] = { @@ -342,6 +346,7 @@ static int vic_probe(struct platform_device *pdev) vic->dev = dev; INIT_LIST_HEAD(&vic->client.list); + vic->client.version = vic->config->version; vic->client.ops = &vic_ops; err = host1x_client_register(&vic->client.base); diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 1b278a22c8b7..1067e702c22c 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -224,7 +224,7 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc) ret = clk_set_rate(priv->clk, req_rate * clkdiv); clk_rate = clk_get_rate(priv->clk); - if (ret < 0) { + if (ret < 0 || tilcdc_pclk_diff(req_rate, clk_rate) > 5) { /* * If we fail to set the clock rate (some architectures don't * use the common clock framework yet and may not implement diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c index 7c2485fe88d8..ea4d59eb8966 100644 --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 98e06f8bf23b..5d8688e522d1 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA @@ -1175,7 +1176,6 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, reservation_object_init(&bo->ttm_resv); atomic_inc(&bo->bdev->glob->bo_count); drm_vma_node_reset(&bo->vma_node); - bo->priority = 0; /* * For ttm_bo_type_device buffers, allocate diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index a7c232dc39cb..18d3debcc949 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2007-2010 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2ebbae6067ab..f2c167702eef 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA @@ -39,6 +40,11 @@ #include <linux/module.h> #include <linux/reservation.h> +struct ttm_transfer_obj { + struct ttm_buffer_object base; + struct ttm_buffer_object *bo; +}; + void ttm_bo_free_old_node(struct ttm_buffer_object *bo) { ttm_bo_mem_put(bo, &bo->mem); @@ -454,7 +460,11 @@ EXPORT_SYMBOL(ttm_bo_move_memcpy); static void ttm_transfered_destroy(struct ttm_buffer_object *bo) { - kfree(bo); + struct ttm_transfer_obj *fbo; + + fbo = container_of(bo, struct ttm_transfer_obj, base); + ttm_bo_unref(&fbo->bo); + kfree(fbo); } /** @@ -475,14 +485,15 @@ static void ttm_transfered_destroy(struct ttm_buffer_object *bo) static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, struct ttm_buffer_object **new_obj) { - struct ttm_buffer_object *fbo; + struct ttm_transfer_obj *fbo; int ret; fbo = kmalloc(sizeof(*fbo), GFP_KERNEL); if (!fbo) return -ENOMEM; - *fbo = *bo; + fbo->base = *bo; + fbo->bo = ttm_bo_reference(bo); /** * Fix up members that we shouldn't copy directly: @@ -490,25 +501,25 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, */ atomic_inc(&bo->bdev->glob->bo_count); - INIT_LIST_HEAD(&fbo->ddestroy); - INIT_LIST_HEAD(&fbo->lru); - INIT_LIST_HEAD(&fbo->swap); - INIT_LIST_HEAD(&fbo->io_reserve_lru); - mutex_init(&fbo->wu_mutex); - fbo->moving = NULL; - drm_vma_node_reset(&fbo->vma_node); - atomic_set(&fbo->cpu_writers, 0); - - kref_init(&fbo->list_kref); - kref_init(&fbo->kref); - fbo->destroy = &ttm_transfered_destroy; - fbo->acc_size = 0; - fbo->resv = &fbo->ttm_resv; - reservation_object_init(fbo->resv); - ret = reservation_object_trylock(fbo->resv); + INIT_LIST_HEAD(&fbo->base.ddestroy); + INIT_LIST_HEAD(&fbo->base.lru); + INIT_LIST_HEAD(&fbo->base.swap); + INIT_LIST_HEAD(&fbo->base.io_reserve_lru); + mutex_init(&fbo->base.wu_mutex); + fbo->base.moving = NULL; + drm_vma_node_reset(&fbo->base.vma_node); + atomic_set(&fbo->base.cpu_writers, 0); + + kref_init(&fbo->base.list_kref); + kref_init(&fbo->base.kref); + fbo->base.destroy = &ttm_transfered_destroy; + fbo->base.acc_size = 0; + fbo->base.resv = &fbo->base.ttm_resv; + reservation_object_init(fbo->base.resv); + ret = reservation_object_trylock(fbo->base.resv); WARN_ON(!ret); - *new_obj = fbo; + *new_obj = &fbo->base; return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 8eba95b3c737..c7ece7613a6a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 3dca206e85f7..e73ae0d22897 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c index 913f4318cdc0..20694b8a01ca 100644 --- a/drivers/gpu/drm/ttm/ttm_lock.c +++ b/drivers/gpu/drm/ttm/ttm_lock.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index 27856c55dc84..450387c92b63 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 66fc6395eb54..6ff40c041d79 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index 1aa2baa83959..74f1b1eb1f8e 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index f0481b7b60c5..06c94e3a5f15 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -910,7 +910,8 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags, while (npages >= HPAGE_PMD_NR) { gfp_t huge_flags = gfp_flags; - huge_flags |= GFP_TRANSHUGE; + huge_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM; huge_flags &= ~__GFP_MOVABLE; huge_flags &= ~__GFP_COMP; p = alloc_pages(huge_flags, HPAGE_PMD_ORDER); @@ -1027,11 +1028,15 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) GFP_USER | GFP_DMA32, "uc dma", 0); ttm_page_pool_init_locked(&_manager->wc_pool_huge, - GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP), + (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM) & + ~(__GFP_MOVABLE | __GFP_COMP), "wc huge", order); ttm_page_pool_init_locked(&_manager->uc_pool_huge, - GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP) + (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM) & + ~(__GFP_MOVABLE | __GFP_COMP) , "uc huge", order); _manager->options.max_size = max_pages; diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 8a25d1974385..f63d99c302e4 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -910,7 +910,8 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge) gfp_flags |= __GFP_ZERO; if (huge) { - gfp_flags |= GFP_TRANSHUGE; + gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM; gfp_flags &= ~__GFP_MOVABLE; gfp_flags &= ~__GFP_COMP; } diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 7e672be987b5..a1e543972ca7 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA diff --git a/drivers/gpu/drm/v3d/Kconfig b/drivers/gpu/drm/v3d/Kconfig index a0c0259355bd..1552bf552c94 100644 --- a/drivers/gpu/drm/v3d/Kconfig +++ b/drivers/gpu/drm/v3d/Kconfig @@ -3,6 +3,7 @@ config DRM_V3D depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST depends on DRM depends on COMMON_CLK + depends on MMU select DRM_SCHED help Choose this option if you have a system that has a Broadcom diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 38e8041b5f0c..cdb582043b4f 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -135,7 +135,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file) drm_sched_entity_init(&v3d->queue[i].sched, &v3d_priv->sched_entity[i], &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], - 32, NULL); + NULL); } file->driver_priv = v3d_priv; diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 868f39a9ae8a..dcadf793ee80 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -741,6 +741,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data) struct vc4_async_flip_state { struct drm_crtc *crtc; struct drm_framebuffer *fb; + struct drm_framebuffer *old_fb; struct drm_pending_vblank_event *event; struct vc4_seqno_cb cb; @@ -770,6 +771,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) drm_crtc_vblank_put(crtc); drm_framebuffer_put(flip_state->fb); + + /* Decrement the BO usecnt in order to keep the inc/dec calls balanced + * when the planes are updated through the async update path. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made cleanup_fb() + * logic. + */ + if (flip_state->old_fb) { + struct drm_gem_cma_object *cma_bo; + struct vc4_bo *bo; + + cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); + bo = to_vc4_bo(&cma_bo->base); + vc4_bo_dec_usecnt(bo); + drm_framebuffer_put(flip_state->old_fb); + } + kfree(flip_state); up(&vc4->async_modeset); @@ -794,9 +812,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + /* Increment the BO usecnt here, so that we never end up with an + * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the + * plane is later updated through the non-async path. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made prepare_fb() + * logic. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); - if (!flip_state) + if (!flip_state) { + vc4_bo_dec_usecnt(bo); return -ENOMEM; + } drm_framebuffer_get(fb); flip_state->fb = fb; @@ -807,10 +838,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, ret = down_interruptible(&vc4->async_modeset); if (ret) { drm_framebuffer_put(fb); + vc4_bo_dec_usecnt(bo); kfree(flip_state); return ret; } + /* Save the current FB before it's replaced by the new one in + * drm_atomic_set_fb_for_plane(). We'll need the old FB in + * vc4_async_page_flip_complete() to decrement the BO usecnt and keep + * it consistent. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made cleanup_fb() + * logic. + */ + flip_state->old_fb = plane->state->fb; + if (flip_state->old_fb) + drm_framebuffer_get(flip_state->old_fb); + WARN_ON(drm_crtc_vblank_get(crtc) != 0); /* Immediately update the plane's legacy fb pointer, so that later diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c index 72c9dbd81d7f..f185812970da 100644 --- a/drivers/gpu/drm/vc4/vc4_dpi.c +++ b/drivers/gpu/drm/vc4/vc4_dpi.c @@ -96,7 +96,6 @@ struct vc4_dpi { struct platform_device *pdev; struct drm_encoder *encoder; - struct drm_connector *connector; void __iomem *regs; @@ -164,14 +163,31 @@ static void vc4_dpi_encoder_disable(struct drm_encoder *encoder) static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) { + struct drm_device *dev = encoder->dev; struct drm_display_mode *mode = &encoder->crtc->mode; struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder); struct vc4_dpi *dpi = vc4_encoder->dpi; + struct drm_connector_list_iter conn_iter; + struct drm_connector *connector = NULL, *connector_scan; u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE; int ret; - if (dpi->connector->display_info.num_bus_formats) { - u32 bus_format = dpi->connector->display_info.bus_formats[0]; + /* Look up the connector attached to DPI so we can get the + * bus_format. Ideally the bridge would tell us the + * bus_format we want, but it doesn't yet, so assume that it's + * uniform throughout the bridge chain. + */ + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector_scan, &conn_iter) { + if (connector_scan->encoder == encoder) { + connector = connector_scan; + break; + } + } + drm_connector_list_iter_end(&conn_iter); + + if (connector && connector->display_info.num_bus_formats) { + u32 bus_format = connector->display_info.bus_formats[0]; switch (bus_format) { case MEDIA_BUS_FMT_RGB888_1X24: @@ -199,6 +215,9 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) DRM_ERROR("Unknown media bus format %d\n", bus_format); break; } + } else { + /* Default to 24bit if no connector found. */ + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT); } if (mode->flags & DRM_MODE_FLAG_NHSYNC) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index d9b8b701d2ce..466d0a27b415 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -130,6 +130,7 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file) struct vc4_file *vc4file = file->driver_priv; vc4_perfmon_close_file(vc4file); + kfree(vc4file); } static const struct vm_operations_struct vc4_vm_ops = { diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 3922f5f5807d..36ffb630bf47 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -507,7 +507,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, * the scl fields here. */ if (num_planes == 1) { - scl0 = vc4_get_scl_field(state, 1); + scl0 = vc4_get_scl_field(state, 0); scl1 = scl0; } else { scl0 = vc4_get_scl_field(state, 1); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 3c5935f3d49e..aaac8d11e705 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -439,30 +439,22 @@ static int vmw_fb_compute_depth(struct fb_var_screeninfo *var, static int vmwgfx_set_config_internal(struct drm_mode_set *set) { struct drm_crtc *crtc = set->crtc; - struct drm_modeset_acquire_ctx *ctx; - struct drm_device *dev = set->crtc->dev; + struct drm_modeset_acquire_ctx ctx; int ret; - ctx = dev->mode_config.acquire_ctx; + drm_modeset_acquire_init(&ctx, 0); restart: - ret = crtc->funcs->set_config(set, ctx); + ret = crtc->funcs->set_config(set, &ctx); if (ret == -EDEADLK) { - dev->mode_config.acquire_ctx = NULL; - -retry_locking: - drm_modeset_backoff(ctx); - - ret = drm_modeset_lock_all_ctx(dev, ctx); - if (ret) - goto retry_locking; - - dev->mode_config.acquire_ctx = ctx; - + drm_modeset_backoff(&ctx); goto restart; } + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + return ret; } @@ -600,7 +592,6 @@ static int vmw_fb_set_par(struct fb_info *info) } mutex_lock(&par->bo_mutex); - drm_modeset_lock_all(vmw_priv->dev); ret = vmw_fb_kms_framebuffer(info); if (ret) goto out_unlock; @@ -633,7 +624,6 @@ out_unlock: drm_mode_destroy(vmw_priv->dev, old_mode); par->set_mode = mode; - drm_modeset_unlock_all(vmw_priv->dev); mutex_unlock(&par->bo_mutex); return ret; @@ -689,18 +679,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv) par->max_width = fb_width; par->max_height = fb_height; - drm_modeset_lock_all(vmw_priv->dev); ret = vmw_kms_fbdev_init_data(vmw_priv, 0, par->max_width, par->max_height, &par->con, &par->crtc, &init_mode); - if (ret) { - drm_modeset_unlock_all(vmw_priv->dev); + if (ret) goto err_kms; - } info->var.xres = init_mode->hdisplay; info->var.yres = init_mode->vdisplay; - drm_modeset_unlock_all(vmw_priv->dev); /* * Create buffers and alloc memory @@ -808,7 +794,9 @@ int vmw_fb_close(struct vmw_private *vmw_priv) cancel_delayed_work_sync(&par->local_work); unregister_framebuffer(info); + mutex_lock(&par->bo_mutex); (void) vmw_fb_kms_detach(par, true, true); + mutex_unlock(&par->bo_mutex); vfree(par->vmalloc); framebuffer_release(info); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index e6889682205f..ef96ba7432ad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2600,6 +2600,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx, vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf, out_fence, NULL); + vmw_dmabuf_unreference(&ctx->buf); vmw_resource_unreserve(res, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -2685,7 +2686,9 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, struct vmw_display_unit *du; struct drm_display_mode *mode; int i = 0; + int ret = 0; + mutex_lock(&dev_priv->dev->mode_config.mutex); list_for_each_entry(con, &dev_priv->dev->mode_config.connector_list, head) { if (i == unit) @@ -2696,7 +2699,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, if (i != unit) { DRM_ERROR("Could not find initial display unit.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } if (list_empty(&con->modes)) @@ -2704,7 +2708,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, if (list_empty(&con->modes)) { DRM_ERROR("Could not find initial display mode.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } du = vmw_connector_to_du(con); @@ -2725,7 +2730,10 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, head); } - return 0; + out_unlock: + mutex_unlock(&dev_priv->dev->mode_config.mutex); + + return ret; } /** diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 28541b280739..91df51e631b2 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -51,7 +51,7 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb) struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x *host1x = cdma_to_host1x(cdma); - if (!pb->phys) + if (!pb->mapped) return; if (host1x->domain) { @@ -127,7 +127,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) iommu_free_iova: __free_iova(&host1x->iova, alloc); iommu_free_mem: - dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); + dma_free_wc(host1x->dev, size, pb->mapped, pb->phys); return err; } @@ -247,7 +247,7 @@ static void cdma_start_timer_locked(struct host1x_cdma *cdma, static void stop_cdma_timer_locked(struct host1x_cdma *cdma) { cancel_delayed_work(&cdma->timeout.wq); - cdma->timeout.client = 0; + cdma->timeout.client = NULL; } /* diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index 286d49386be9..e97e17b82370 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -44,7 +44,7 @@ struct host1x_job; struct push_buffer { void *mapped; /* mapped pushbuffer memory */ dma_addr_t dma; /* device address of pushbuffer */ - phys_addr_t phys; /* physical address of pushbuffer */ + dma_addr_t phys; /* physical address of pushbuffer */ u32 fence; /* index we've written */ u32 pos; /* index to write to */ u32 size; @@ -58,7 +58,7 @@ struct buffer_timeout { u32 syncpt_val; /* syncpt value when completed */ ktime_t start_ktime; /* starting time */ /* context timeout information */ - int client; + struct host1x_client *client; }; enum cdma_event { diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index dc77ec452ffc..329e4a3d8ae7 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -103,7 +103,7 @@ static void show_syncpts(struct host1x *m, struct output *o) static void show_all(struct host1x *m, struct output *o, bool show_fifo) { - int i; + unsigned int i; host1x_hw_show_mlocks(m, o); show_syncpts(m, o); diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 03db71173f5d..f1d5f76e9c33 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -223,10 +223,14 @@ static int host1x_probe(struct platform_device *pdev) struct iommu_domain_geometry *geometry; unsigned long order; + err = iova_cache_get(); + if (err < 0) + goto put_group; + host->domain = iommu_domain_alloc(&platform_bus_type); if (!host->domain) { err = -ENOMEM; - goto put_group; + goto put_cache; } err = iommu_attach_group(host->domain, host->group); @@ -234,6 +238,7 @@ static int host1x_probe(struct platform_device *pdev) if (err == -ENODEV) { iommu_domain_free(host->domain); host->domain = NULL; + iova_cache_put(); iommu_group_put(host->group); host->group = NULL; goto skip_iommu; @@ -308,6 +313,9 @@ fail_detach_device: fail_free_domain: if (host->domain) iommu_domain_free(host->domain); +put_cache: + if (host->group) + iova_cache_put(); put_group: iommu_group_put(host->group); @@ -328,6 +336,7 @@ static int host1x_remove(struct platform_device *pdev) put_iova_domain(&host->iova); iommu_detach_group(host->domain, host->group); iommu_domain_free(host->domain); + iova_cache_put(); iommu_group_put(host->group); } diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 43e9fabb43a1..36f44ffebe73 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -78,7 +78,6 @@ struct host1x_syncpt_ops { void (*load_wait_base)(struct host1x_syncpt *syncpt); u32 (*load)(struct host1x_syncpt *syncpt); int (*cpu_incr)(struct host1x_syncpt *syncpt); - int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr); void (*assign_to_channel)(struct host1x_syncpt *syncpt, struct host1x_channel *channel); void (*enable_protection)(struct host1x *host); @@ -183,13 +182,6 @@ static inline int host1x_hw_syncpt_cpu_incr(struct host1x *host, return host->syncpt_op->cpu_incr(sp); } -static inline int host1x_hw_syncpt_patch_wait(struct host1x *host, - struct host1x_syncpt *sp, - void *patch_addr) -{ - return host->syncpt_op->patch_wait(sp, patch_addr); -} - static inline void host1x_hw_syncpt_assign_to_channel( struct host1x *host, struct host1x_syncpt *sp, struct host1x_channel *ch) diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 9af758785a11..d188f9068b91 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -104,8 +104,7 @@ static int channel_submit(struct host1x_job *job) sp = host->syncpt + job->syncpt_id; trace_host1x_channel_submit(dev_name(ch->dev), job->num_gathers, job->num_relocs, - job->num_waitchk, job->syncpt_id, - job->syncpt_incrs); + job->syncpt_id, job->syncpt_incrs); /* before error checks, return current max */ prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); @@ -165,7 +164,7 @@ static int channel_submit(struct host1x_job *job) trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); /* schedule a submit complete interrupt */ - err = host1x_intr_add_action(host, job->syncpt_id, syncval, + err = host1x_intr_add_action(host, sp, syncval, HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, completed_waiter, NULL); completed_waiter = NULL; diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c index 7dfd47d74f89..a23bb3352d02 100644 --- a/drivers/gpu/host1x/hw/syncpt_hw.c +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -96,16 +96,6 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp) return 0; } -/* remove a wait pointed to by patch_addr */ -static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) -{ - u32 override = host1x_class_host_wait_syncpt(HOST1X_SYNCPT_RESERVED, 0); - - *((u32 *)patch_addr) = override; - - return 0; -} - /** * syncpt_assign_to_channel() - Assign syncpoint to channel * @sp: syncpoint @@ -156,7 +146,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = { .load_wait_base = syncpt_read_wait_base, .load = syncpt_load, .cpu_incr = syncpt_cpu_incr, - .patch_wait = syncpt_patch_wait, .assign_to_channel = syncpt_assign_to_channel, .enable_protection = syncpt_enable_protection, }; diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 8b4fad0ab35d..9629c009d10f 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -144,7 +144,7 @@ static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) { struct list_head *head = completed; - int i; + unsigned int i; for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) { action_handler handler = action_handlers[i]; @@ -211,11 +211,11 @@ static void syncpt_thresh_work(struct work_struct *work) host1x_syncpt_load(host->syncpt + id)); } -int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh, - enum host1x_intr_action action, void *data, - struct host1x_waitlist *waiter, void **ref) +int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, + u32 thresh, enum host1x_intr_action action, + void *data, struct host1x_waitlist *waiter, + void **ref) { - struct host1x_syncpt *syncpt; int queue_was_empty; if (waiter == NULL) { @@ -234,19 +234,17 @@ int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh, waiter->data = data; waiter->count = 1; - syncpt = host->syncpt + id; - spin_lock(&syncpt->intr.lock); queue_was_empty = list_empty(&syncpt->intr.wait_head); if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) { /* added at head of list - new threshold value */ - host1x_hw_intr_set_syncpt_threshold(host, id, thresh); + host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh); /* added as first waiter - enable interrupt */ if (queue_was_empty) - host1x_hw_intr_enable_syncpt_intr(host, id); + host1x_hw_intr_enable_syncpt_intr(host, syncpt->id); } spin_unlock(&syncpt->intr.lock); diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index 1370c2bb75b8..6db96af484fe 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/workqueue.h> +struct host1x_syncpt; struct host1x; enum host1x_intr_action { @@ -75,9 +76,10 @@ struct host1x_waitlist { * * This is a non-blocking api. */ -int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh, - enum host1x_intr_action action, void *data, - struct host1x_waitlist *waiter, void **ref); +int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, + u32 thresh, enum host1x_intr_action action, + void *data, struct host1x_waitlist *waiter, + void **ref); /* * Unreference an action submitted to host1x_intr_add_action(). diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index db509ab8874e..e2f4a4d93d20 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -34,8 +34,7 @@ #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, - u32 num_cmdbufs, u32 num_relocs, - u32 num_waitchks) + u32 num_cmdbufs, u32 num_relocs) { struct host1x_job *job = NULL; unsigned int num_unpins = num_cmdbufs + num_relocs; @@ -46,7 +45,6 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, total = sizeof(struct host1x_job) + (u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + - (u64)num_waitchks * sizeof(struct host1x_waitchk) + (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + (u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(u32 *); @@ -62,12 +60,10 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, /* Redistribute memory to the structs */ mem += sizeof(struct host1x_job); - job->relocarray = num_relocs ? mem : NULL; + job->relocs = num_relocs ? mem : NULL; mem += num_relocs * sizeof(struct host1x_reloc); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * sizeof(struct host1x_job_unpin_data); - job->waitchk = num_waitchks ? mem : NULL; - mem += num_waitchks * sizeof(struct host1x_waitchk); job->gathers = num_cmdbufs ? mem : NULL; mem += num_cmdbufs * sizeof(struct host1x_job_gather); job->addr_phys = num_unpins ? mem : NULL; @@ -100,84 +96,18 @@ void host1x_job_put(struct host1x_job *job) EXPORT_SYMBOL(host1x_job_put); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, - u32 words, u32 offset) + unsigned int words, unsigned int offset) { - struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers]; + struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; + + gather->words = words; + gather->bo = bo; + gather->offset = offset; - cur_gather->words = words; - cur_gather->bo = bo; - cur_gather->offset = offset; job->num_gathers++; } EXPORT_SYMBOL(host1x_job_add_gather); -/* - * NULL an already satisfied WAIT_SYNCPT host method, by patching its - * args in the command stream. The method data is changed to reference - * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt - * with a matching threshold value of 0, so is guaranteed to be popped - * by the host HW. - */ -static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp, - struct host1x_bo *h, u32 offset) -{ - void *patch_addr = NULL; - - /* patch the wait */ - patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT); - if (patch_addr) { - host1x_syncpt_patch_wait(sp, - patch_addr + (offset & ~PAGE_MASK)); - host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr); - } else - pr_err("Could not map cmdbuf for wait check\n"); -} - -/* - * Check driver supplied waitchk structs for syncpt thresholds - * that have already been satisfied and NULL the comparison (to - * avoid a wrap condition in the HW). - */ -static int do_waitchks(struct host1x_job *job, struct host1x *host, - struct host1x_job_gather *g) -{ - struct host1x_bo *patch = g->bo; - int i; - - /* compare syncpt vs wait threshold */ - for (i = 0; i < job->num_waitchk; i++) { - struct host1x_waitchk *wait = &job->waitchk[i]; - struct host1x_syncpt *sp = - host1x_syncpt_get(host, wait->syncpt_id); - - /* validate syncpt id */ - if (wait->syncpt_id > host1x_syncpt_nb_pts(host)) - continue; - - /* skip all other gathers */ - if (patch != wait->bo) - continue; - - trace_host1x_syncpt_wait_check(wait->bo, wait->offset, - wait->syncpt_id, wait->thresh, - host1x_syncpt_read_min(sp)); - - if (host1x_syncpt_is_expired(sp, wait->thresh)) { - dev_dbg(host->dev, - "drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n", - wait->syncpt_id, sp->name, wait->thresh, - host1x_syncpt_read_min(sp)); - - host1x_syncpt_patch_offset(sp, patch, - g->offset + wait->offset); - } - - wait->bo = NULL; - } - - return 0; -} - static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { unsigned int i; @@ -186,7 +116,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { - struct host1x_reloc *reloc = &job->relocarray[i]; + struct host1x_reloc *reloc = &job->relocs[i]; struct sg_table *sgt; dma_addr_t phys_addr; @@ -267,14 +197,14 @@ unpin: static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) { - int i = 0; u32 last_page = ~0; void *cmdbuf_page_addr = NULL; struct host1x_bo *cmdbuf = g->bo; + unsigned int i; /* pin & patch the relocs for one gather */ for (i = 0; i < job->num_relocs; i++) { - struct host1x_reloc *reloc = &job->relocarray[i]; + struct host1x_reloc *reloc = &job->relocs[i]; u32 reloc_addr = (job->reloc_addr_phys[i] + reloc->target.offset) >> reloc->shift; u32 *target; @@ -331,17 +261,6 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, return true; } -static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf, - unsigned int offset) -{ - offset *= sizeof(u32); - - if (wait->bo != cmdbuf || wait->offset != offset) - return false; - - return true; -} - struct host1x_firewall { struct host1x_job *job; struct device *dev; @@ -349,9 +268,6 @@ struct host1x_firewall { unsigned int num_relocs; struct host1x_reloc *reloc; - unsigned int num_waitchks; - struct host1x_waitchk *waitchk; - struct host1x_bo *cmdbuf; unsigned int offset; @@ -378,20 +294,6 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset) fw->reloc++; } - if (offset == HOST1X_WAIT_SYNCPT_OFFSET) { - if (fw->class != HOST1X_CLASS_HOST1X) - return -EINVAL; - - if (!fw->num_waitchks) - return -EINVAL; - - if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset)) - return -EINVAL; - - fw->num_waitchks--; - fw->waitchk++; - } - return 0; } @@ -550,14 +452,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) struct host1x_firewall fw; size_t size = 0; size_t offset = 0; - int i; + unsigned int i; fw.job = job; fw.dev = dev; - fw.reloc = job->relocarray; + fw.reloc = job->relocs; fw.num_relocs = job->num_relocs; - fw.waitchk = job->waitchk; - fw.num_waitchks = job->num_waitchk; fw.class = job->class; for (i = 0; i < job->num_gathers; i++) { @@ -604,8 +504,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) offset += g->words * sizeof(u32); } - /* No relocs and waitchks should remain at this point */ - if (fw.num_relocs || fw.num_waitchks) + /* No relocs should remain at this point */ + if (fw.num_relocs) return -EINVAL; return 0; @@ -616,19 +516,6 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) int err; unsigned int i, j; struct host1x *host = dev_get_drvdata(dev->parent); - DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host)); - - bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host)); - for (i = 0; i < job->num_waitchk; i++) { - u32 syncpt_id = job->waitchk[i].syncpt_id; - - if (syncpt_id < host1x_syncpt_nb_pts(host)) - set_bit(syncpt_id, waitchk_mask); - } - - /* get current syncpt values for waitchk */ - for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host)) - host1x_syncpt_load(host->syncpt + i); /* pin memory */ err = pin_job(host, job); @@ -663,10 +550,6 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) err = do_relocs(job, g); if (err) break; - - err = do_waitchks(job, host, g); - if (err) - break; } out: diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 4bda51d503ec..188400e00192 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -20,10 +20,10 @@ #define __HOST1X_JOB_H struct host1x_job_gather { - u32 words; + unsigned int words; dma_addr_t base; struct host1x_bo *bo; - u32 offset; + unsigned int offset; bool handled; }; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index a2a952adc136..a5dbf1ba4645 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -57,8 +57,8 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, struct host1x_client *client, unsigned long flags) { - int i; struct host1x_syncpt *sp = host->syncpt; + unsigned int i; char *name; mutex_lock(&host->syncpt_mutex); @@ -255,7 +255,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, } /* schedule a wakeup when the syncpoint value is reached */ - err = host1x_intr_add_action(sp->host, sp->id, thresh, + err = host1x_intr_add_action(sp->host, sp, thresh, HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, &wq, waiter, &ref); if (err) @@ -373,12 +373,6 @@ bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh) return (s32)(current_val - thresh) >= 0; } -/* remove a wait pointed to by patch_addr */ -int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) -{ - return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr); -} - int host1x_syncpt_init(struct host1x *host) { struct host1x_syncpt_base *bases; diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index 9d88d37c2397..d98e22325e9d 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -124,7 +124,4 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) return sp->id < host1x_syncpt_nb_pts(sp->host); } -/* Patch a wait by replacing it with a wait for syncpt 0 value 0 */ -int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr); - #endif diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 60252fd796f6..0000434a1fbd 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -462,10 +462,11 @@ config HID_LENOVO select NEW_LEDS select LEDS_CLASS ---help--- - Support for Lenovo devices that are not fully compliant with HID standard. + Support for IBM/Lenovo devices that are not fully compliant with HID standard. - Say Y if you want support for the non-compliant features of the Lenovo - Thinkpad standalone keyboards, e.g: + Say Y if you want support for horizontal scrolling of the IBM/Lenovo + Scrollpoint mice or the non-compliant features of the Lenovo Thinkpad + standalone keyboards, e.g: - ThinkPad USB Keyboard with TrackPoint (supports extra LEDs and trackpoint configuration) - ThinkPad Compact Bluetooth Keyboard with TrackPoint (supports Fn keys) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0b5cc910f62e..46f5ecd11bf7 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -552,6 +552,13 @@ #define USB_VENDOR_ID_HUION 0x256c #define USB_DEVICE_ID_HUION_TABLET 0x006e +#define USB_VENDOR_ID_IBM 0x04b3 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_PRO 0x3103 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL 0x3105 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL 0x3108 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO 0x3109 + #define USB_VENDOR_ID_IDEACOM 0x1cb6 #define USB_DEVICE_ID_IDEACOM_IDC6650 0x6650 #define USB_DEVICE_ID_IDEACOM_IDC6651 0x6651 @@ -684,6 +691,7 @@ #define USB_DEVICE_ID_LENOVO_TPKBD 0x6009 #define USB_DEVICE_ID_LENOVO_CUSBKBD 0x6047 #define USB_DEVICE_ID_LENOVO_CBTKBD 0x6048 +#define USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL 0x6049 #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067 #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 @@ -964,6 +972,7 @@ #define USB_DEVICE_ID_SIS817_TOUCH 0x0817 #define USB_DEVICE_ID_SIS_TS 0x1013 #define USB_DEVICE_ID_SIS1030_TOUCH 0x1030 +#define USB_DEVICE_ID_SIS10FB_TOUCH 0x10fb #define USB_VENDOR_ID_SKYCABLE 0x1223 #define USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER 0x3F07 diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 1ac4ff4d57a6..643b6eb54442 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -6,6 +6,17 @@ * * Copyright (c) 2012 Bernhard Seibold * Copyright (c) 2014 Jamie Lentin <jm@lentin.co.uk> + * + * Linux IBM/Lenovo Scrollpoint mouse driver: + * - IBM Scrollpoint III + * - IBM Scrollpoint Pro + * - IBM Scrollpoint Optical + * - IBM Scrollpoint Optical 800dpi + * - IBM Scrollpoint Optical 800dpi Pro + * - Lenovo Scrollpoint Optical + * + * Copyright (c) 2012 Peter De Wachter <pdewacht@gmail.com> + * Copyright (c) 2018 Peter Ganzhorn <peter.ganzhorn@gmail.com> */ /* @@ -160,6 +171,17 @@ static int lenovo_input_mapping_cptkbd(struct hid_device *hdev, return 0; } +static int lenovo_input_mapping_scrollpoint(struct hid_device *hdev, + struct hid_input *hi, struct hid_field *field, + struct hid_usage *usage, unsigned long **bit, int *max) +{ + if (usage->hid == HID_GD_Z) { + hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL); + return 1; + } + return 0; +} + static int lenovo_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) @@ -172,6 +194,14 @@ static int lenovo_input_mapping(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_CBTKBD: return lenovo_input_mapping_cptkbd(hdev, hi, field, usage, bit, max); + case USB_DEVICE_ID_IBM_SCROLLPOINT_III: + case USB_DEVICE_ID_IBM_SCROLLPOINT_PRO: + case USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL: + case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL: + case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO: + case USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL: + return lenovo_input_mapping_scrollpoint(hdev, hi, field, + usage, bit, max); default: return 0; } @@ -883,6 +913,12 @@ static const struct hid_device_id lenovo_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_III) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_PRO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL) }, { } }; diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 963328674e93..cc33622253aa 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -174,6 +174,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_3118, I2C_HID_QUIRK_RESEND_REPORT_DESCR }, + { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH, + I2C_HID_QUIRK_RESEND_REPORT_DESCR }, { 0, 0 } }; diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 157b44aacdff..acc2536c8094 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -77,21 +77,21 @@ static void process_recv(struct ishtp_cl *hid_ishtp_cl, void *recv_buf, struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data; int curr_hid_dev = client_data->cur_hid_dev; - if (data_len < sizeof(struct hostif_msg_hdr)) { - dev_err(&client_data->cl_device->dev, - "[hid-ish]: error, received %u which is less than data header %u\n", - (unsigned int)data_len, - (unsigned int)sizeof(struct hostif_msg_hdr)); - ++client_data->bad_recv_cnt; - ish_hw_reset(hid_ishtp_cl->dev); - return; - } - payload = recv_buf + sizeof(struct hostif_msg_hdr); total_len = data_len; cur_pos = 0; do { + if (cur_pos + sizeof(struct hostif_msg) > total_len) { + dev_err(&client_data->cl_device->dev, + "[hid-ish]: error, received %u which is less than data header %u\n", + (unsigned int)data_len, + (unsigned int)sizeof(struct hostif_msg_hdr)); + ++client_data->bad_recv_cnt; + ish_hw_reset(hid_ishtp_cl->dev); + break; + } + recv_msg = (struct hostif_msg *)(recv_buf + cur_pos); payload_len = recv_msg->hdr.size; @@ -412,9 +412,7 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id, { struct ishtp_hid_data *hid_data = hid->driver_data; struct ishtp_cl_data *client_data = hid_data->client_data; - static unsigned char buf[10]; - unsigned int len; - struct hostif_msg_to_sensor *msg = (struct hostif_msg_to_sensor *)buf; + struct hostif_msg_to_sensor msg = {}; int rv; int i; @@ -426,14 +424,11 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id, return; } - len = sizeof(struct hostif_msg_to_sensor); - - memset(msg, 0, sizeof(struct hostif_msg_to_sensor)); - msg->hdr.command = (report_type == HID_FEATURE_REPORT) ? + msg.hdr.command = (report_type == HID_FEATURE_REPORT) ? HOSTIF_GET_FEATURE_REPORT : HOSTIF_GET_INPUT_REPORT; for (i = 0; i < client_data->num_hid_devices; ++i) { if (hid == client_data->hid_sensor_hubs[i]) { - msg->hdr.device_id = + msg.hdr.device_id = client_data->hid_devices[i].dev_id; break; } @@ -442,8 +437,9 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id, if (i == client_data->num_hid_devices) return; - msg->report_id = report_id; - rv = ishtp_cl_send(client_data->hid_ishtp_cl, buf, len); + msg.report_id = report_id; + rv = ishtp_cl_send(client_data->hid_ishtp_cl, (uint8_t *)&msg, + sizeof(msg)); if (rv) hid_ishtp_trace(client_data, "%s hid %p send failed\n", __func__, hid); diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index f272cdd9bd55..2623a567ffba 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -418,7 +418,7 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev, list_del(&device->device_link); spin_unlock_irqrestore(&dev->device_list_lock, flags); dev_err(dev->devc, "Failed to register ISHTP client device\n"); - kfree(device); + put_device(&device->dev); return NULL; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index b54ef1ffcbec..ee7a37eb159a 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -1213,8 +1213,10 @@ static int __wacom_devm_sysfs_create_group(struct wacom *wacom, devres->root = root; error = sysfs_create_group(devres->root, group); - if (error) + if (error) { + devres_free(devres); return error; + } devres_add(&wacom->hdev->dev, devres); diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index ee270e065ba9..2a972ed6851b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -61,9 +61,12 @@ config INFINIBAND_ON_DEMAND_PAGING pages on demand instead. config INFINIBAND_ADDR_TRANS - bool + bool "RDMA/CM" depends on INFINIBAND default y + ---help--- + Support for RDMA communication manager (CM). + This allows for a generic connection abstraction over RDMA. config INFINIBAND_ADDR_TRANS_CONFIGFS bool diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e337b08de2ff..fb2d347f760f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -291,14 +291,18 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, * so lookup free slot only if requested. */ if (pempty && empty < 0) { - if (data->props & GID_TABLE_ENTRY_INVALID) { - /* Found an invalid (free) entry; allocate it */ - if (data->props & GID_TABLE_ENTRY_DEFAULT) { - if (default_gid) - empty = curr_index; - } else { - empty = curr_index; - } + if (data->props & GID_TABLE_ENTRY_INVALID && + (default_gid == + !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { + /* + * Found an invalid (free) entry; allocate it. + * If default GID is requested, then our + * found slot must be one of the DEFAULT + * reserved slots or we fail. + * This ensures that only DEFAULT reserved + * slots are used for default property GIDs. + */ + empty = curr_index; } } @@ -420,8 +424,10 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, return ret; } -int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, - union ib_gid *gid, struct ib_gid_attr *attr) +static int +_ib_cache_gid_del(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr, + unsigned long mask, bool default_gid) { struct ib_gid_table *table; int ret = 0; @@ -431,11 +437,7 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, mutex_lock(&table->lock); - ix = find_gid(table, gid, attr, false, - GID_ATTR_FIND_MASK_GID | - GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_NETDEV, - NULL); + ix = find_gid(table, gid, attr, default_gid, mask, NULL); if (ix < 0) { ret = -EINVAL; goto out_unlock; @@ -452,6 +454,17 @@ out_unlock: return ret; } +int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, + union ib_gid *gid, struct ib_gid_attr *attr) +{ + unsigned long mask = GID_ATTR_FIND_MASK_GID | + GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_DEFAULT | + GID_ATTR_FIND_MASK_NETDEV; + + return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false); +} + int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, struct net_device *ndev) { @@ -728,7 +741,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, unsigned long gid_type_mask, enum ib_cache_gid_default_mode mode) { - union ib_gid gid; + union ib_gid gid = { }; struct ib_gid_attr gid_attr; struct ib_gid_table *table; unsigned int gid_type; @@ -736,7 +749,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid; - make_default_gid(ndev, &gid); + mask = GID_ATTR_FIND_MASK_GID_TYPE | + GID_ATTR_FIND_MASK_DEFAULT | + GID_ATTR_FIND_MASK_NETDEV; memset(&gid_attr, 0, sizeof(gid_attr)); gid_attr.ndev = ndev; @@ -747,12 +762,12 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, gid_attr.gid_type = gid_type; if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { - mask = GID_ATTR_FIND_MASK_GID_TYPE | - GID_ATTR_FIND_MASK_DEFAULT; + make_default_gid(ndev, &gid); __ib_cache_gid_add(ib_dev, port, &gid, &gid_attr, mask, true); } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) { - ib_cache_gid_del(ib_dev, port, &gid, &gid_attr); + _ib_cache_gid_del(ib_dev, port, &gid, + &gid_attr, mask, true); } } } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 51a641002e10..a693fcd4c513 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -382,6 +382,8 @@ struct cma_hdr { #define CMA_VERSION 0x00 struct cma_req_info { + struct sockaddr_storage listen_addr_storage; + struct sockaddr_storage src_addr_storage; struct ib_device *device; int port; union ib_gid local_gid; @@ -866,7 +868,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; - union ib_gid sgid; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { @@ -889,12 +890,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, if (ret) goto out; - ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, - rdma_ah_read_grh(&qp_attr.ah_attr)->sgid_index, - &sgid, NULL); - if (ret) - goto out; - BUG_ON(id_priv->cma_dev->device != id_priv->id.device); if (conn_param) @@ -1340,11 +1335,11 @@ static bool validate_net_dev(struct net_device *net_dev, } static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, - const struct cma_req_info *req) + struct cma_req_info *req) { - struct sockaddr_storage listen_addr_storage, src_addr_storage; - struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, - *src_addr = (struct sockaddr *)&src_addr_storage; + struct sockaddr *listen_addr = + (struct sockaddr *)&req->listen_addr_storage; + struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage; struct net_device *net_dev; const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; int err; @@ -1359,11 +1354,6 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, if (!net_dev) return ERR_PTR(-ENODEV); - if (!validate_net_dev(net_dev, listen_addr, src_addr)) { - dev_put(net_dev); - return ERR_PTR(-EHOSTUNREACH); - } - return net_dev; } @@ -1490,15 +1480,51 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, } } + /* + * Net namespace might be getting deleted while route lookup, + * cm_id lookup is in progress. Therefore, perform netdevice + * validation, cm_id lookup under rcu lock. + * RCU lock along with netdevice state check, synchronizes with + * netdevice migrating to different net namespace and also avoids + * case where net namespace doesn't get deleted while lookup is in + * progress. + * If the device state is not IFF_UP, its properties such as ifindex + * and nd_net cannot be trusted to remain valid without rcu lock. + * net/core/dev.c change_net_namespace() ensures to synchronize with + * ongoing operations on net device after device is closed using + * synchronize_net(). + */ + rcu_read_lock(); + if (*net_dev) { + /* + * If netdevice is down, it is likely that it is administratively + * down or it might be migrating to different namespace. + * In that case avoid further processing, as the net namespace + * or ifindex may change. + */ + if (((*net_dev)->flags & IFF_UP) == 0) { + id_priv = ERR_PTR(-EHOSTUNREACH); + goto err; + } + + if (!validate_net_dev(*net_dev, + (struct sockaddr *)&req.listen_addr_storage, + (struct sockaddr *)&req.src_addr_storage)) { + id_priv = ERR_PTR(-EHOSTUNREACH); + goto err; + } + } + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); +err: + rcu_read_unlock(); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; } - return id_priv; } diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 9821ae900f6d..da12da1c36f6 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -114,7 +114,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_sockaddr, u8 nl_client) { - struct hlist_head *hash_bucket_head; + struct hlist_head *hash_bucket_head = NULL; struct iwpm_mapping_info *map_info; unsigned long flags; int ret = -EINVAL; @@ -142,6 +142,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, } } spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); + + if (!hash_bucket_head) + kfree(map_info); return ret; } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index c50596f7f98a..b28452a55a08 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -59,7 +59,7 @@ module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); static struct list_head ib_mad_port_list; -static u32 ib_mad_client_id = 0; +static atomic_t ib_mad_client_id = ATOMIC_INIT(0); /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); @@ -377,7 +377,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, } spin_lock_irqsave(&port_priv->reg_lock, flags); - mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; + mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); /* * Make sure MAD registration (if supplied) diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index cc2966380c0c..c0e4fd55e2cc 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -255,6 +255,7 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, struct net_device *rdma_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); + unsigned long gid_type_mask; if (!rdma_ndev) return; @@ -264,21 +265,22 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, rcu_read_lock(); - if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) && - is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) == - BONDING_SLAVE_STATE_INACTIVE) { - unsigned long gid_type_mask; - + if (((rdma_ndev != event_ndev && + !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) || + is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) + == + BONDING_SLAVE_STATE_INACTIVE)) { rcu_read_unlock(); + return; + } - gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + rcu_read_unlock(); - ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, - gid_type_mask, - IB_CACHE_GID_DEFAULT_MODE_DELETE); - } else { - rcu_read_unlock(); - } + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); + + ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, + gid_type_mask, + IB_CACHE_GID_DEFAULT_MODE_DELETE); } static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 74329483af6d..eab43b17e9cf 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -159,6 +159,23 @@ static void ucma_put_ctx(struct ucma_context *ctx) complete(&ctx->comp); } +/* + * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the + * CM_ID is bound. + */ +static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) +{ + struct ucma_context *ctx = ucma_get_ctx(file, id); + + if (IS_ERR(ctx)) + return ctx; + if (!ctx->cm_id->device) { + ucma_put_ctx(ctx); + return ERR_PTR(-EINVAL); + } + return ctx; +} + static void ucma_close_event_id(struct work_struct *work) { struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); @@ -683,7 +700,7 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - if (!rdma_addr_size_in6(&cmd.src_addr) || + if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) || !rdma_addr_size_in6(&cmd.dst_addr)) return -EINVAL; @@ -734,7 +751,7 @@ static ssize_t ucma_resolve_route(struct ucma_file *file, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1050,7 +1067,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, if (!cmd.conn_param.valid) return -EINVAL; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1092,7 +1109,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1120,7 +1137,7 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1139,7 +1156,7 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1167,15 +1184,10 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, if (cmd.qp_state > IB_QPS_ERR) return -EINVAL; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx_dev(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - if (!ctx->cm_id->device) { - ret = -EINVAL; - goto out; - } - resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; @@ -1316,13 +1328,13 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) + return -EINVAL; + ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE)) - return -EINVAL; - optval = memdup_user(u64_to_user_ptr(cmd.optval), cmd.optlen); if (IS_ERR(optval)) { @@ -1384,7 +1396,7 @@ static ssize_t ucma_process_join(struct ucma_file *file, else return -EINVAL; - ctx = ucma_get_ctx(file, cmd->id); + ctx = ucma_get_ctx_dev(file, cmd->id); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 13cb5e4deb86..21a887c9523b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -691,6 +691,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, mr->device = pd->device; mr->pd = pd; + mr->dm = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; @@ -765,6 +766,11 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, mr = uobj->object; + if (mr->dm) { + ret = -EINVAL; + goto put_uobjs; + } + if (cmd.flags & IB_MR_REREG_ACCESS) { ret = ib_check_mr_access(cmd.access_flags); if (ret) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 8c93970dc8f1..8d32c4ae368c 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -234,6 +234,15 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met return -EINVAL; } + for (; i < method_spec->num_buckets; i++) { + struct uverbs_attr_spec_hash *attr_spec_bucket = + method_spec->attr_buckets[i]; + + if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask, + attr_spec_bucket->num_attrs)) + return -EINVAL; + } + return 0; } diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c index cbcec3da12f6..b4f016dfa23d 100644 --- a/drivers/infiniband/core/uverbs_std_types_flow_action.c +++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c @@ -363,28 +363,28 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { - .ptr = { + { .ptr = { .type = UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - }, + } }, }, }; static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { - .ptr = { + { .ptr = { .type = UVERBS_ATTR_TYPE_PTR_IN, /* No need to specify any data */ .len = 0, - } + } } }, [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { - .ptr = { + { .ptr = { .type = UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, - } + } } }, }; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 7eff3aeffe01..6ddfb1fade79 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1656,6 +1656,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; + mr->dm = NULL; mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 6f2b26126c64..2be2e1ac1b5f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -315,7 +315,7 @@ static void advance_oldest_read(struct t4_wq *wq) * Deal with out-of-order and/or completions that complete * prior unsignalled WRs. */ -void c4iw_flush_hw_cq(struct c4iw_cq *chp) +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp) { struct t4_cqe *hw_cqe, *swcqe, read_cqe; struct c4iw_qp *qhp; @@ -339,6 +339,13 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) if (qhp == NULL) goto next_cqe; + if (flush_qhp != qhp) { + spin_lock(&qhp->lock); + + if (qhp->wq.flushed == 1) + goto next_cqe; + } + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) goto next_cqe; @@ -390,6 +397,8 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) next_cqe: t4_hwcq_consume(&chp->cq); ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + if (qhp && flush_qhp != qhp) + spin_unlock(&qhp->lock); } } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index feeb8ee6f4a2..44161ca4d2a8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -875,6 +875,11 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->db_off = 0; + init_completion(&rdev->rqt_compl); + init_completion(&rdev->pbl_compl); + kref_init(&rdev->rqt_kref); + kref_init(&rdev->pbl_kref); + return 0; err_free_status_page_and_wr_log: if (c4iw_wr_log && rdev->wr_log) @@ -893,13 +898,15 @@ destroy_resource: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { - destroy_workqueue(rdev->free_workq); kfree(rdev->wr_log); c4iw_release_dev_ucontext(rdev, &rdev->uctx); free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); + wait_for_completion(&rdev->pbl_compl); + wait_for_completion(&rdev->rqt_compl); c4iw_ocqp_pool_destroy(rdev); + destroy_workqueue(rdev->free_workq); c4iw_destroy_resource(&rdev->resource); } diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index cc929002c05e..831027717121 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -185,6 +185,10 @@ struct c4iw_rdev { struct wr_log_entry *wr_log; int wr_log_size; struct workqueue_struct *free_workq; + struct completion rqt_compl; + struct completion pbl_compl; + struct kref rqt_kref; + struct kref pbl_kref; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -1049,7 +1053,7 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); -void c4iw_flush_hw_cq(struct c4iw_cq *chp); +void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index de77b6027d69..ae167b686608 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1343,12 +1343,12 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, qhp->wq.flushed = 1; t4_set_wq_in_error(&qhp->wq); - c4iw_flush_hw_cq(rchp); + c4iw_flush_hw_cq(rchp, qhp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); if (schp != rchp) - c4iw_flush_hw_cq(schp); + c4iw_flush_hw_cq(schp, qhp); sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 3cf25997ed2b..0ef25ae05e6f 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -260,12 +260,22 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); if (rdev->stats.pbl.cur > rdev->stats.pbl.max) rdev->stats.pbl.max = rdev->stats.pbl.cur; + kref_get(&rdev->pbl_kref); } else rdev->stats.pbl.fail++; mutex_unlock(&rdev->stats.lock); return (u32)addr; } +static void destroy_pblpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, pbl_kref); + gen_pool_destroy(rdev->pbl_pool); + complete(&rdev->pbl_compl); +} + void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { pr_debug("addr 0x%x size %d\n", addr, size); @@ -273,6 +283,7 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); + kref_put(&rdev->pbl_kref, destroy_pblpool); } int c4iw_pblpool_create(struct c4iw_rdev *rdev) @@ -310,7 +321,7 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev) void c4iw_pblpool_destroy(struct c4iw_rdev *rdev) { - gen_pool_destroy(rdev->pbl_pool); + kref_put(&rdev->pbl_kref, destroy_pblpool); } /* @@ -331,12 +342,22 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); if (rdev->stats.rqt.cur > rdev->stats.rqt.max) rdev->stats.rqt.max = rdev->stats.rqt.cur; + kref_get(&rdev->rqt_kref); } else rdev->stats.rqt.fail++; mutex_unlock(&rdev->stats.lock); return (u32)addr; } +static void destroy_rqtpool(struct kref *kref) +{ + struct c4iw_rdev *rdev; + + rdev = container_of(kref, struct c4iw_rdev, rqt_kref); + gen_pool_destroy(rdev->rqt_pool); + complete(&rdev->rqt_compl); +} + void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) { pr_debug("addr 0x%x size %d\n", addr, size << 6); @@ -344,6 +365,7 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); mutex_unlock(&rdev->stats.lock); gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); + kref_put(&rdev->rqt_kref, destroy_rqtpool); } int c4iw_rqtpool_create(struct c4iw_rdev *rdev) @@ -380,7 +402,7 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev) void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) { - gen_pool_destroy(rdev->rqt_pool); + kref_put(&rdev->rqt_kref, destroy_rqtpool); } /* diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a97055dd4fbd..b5fab55cc275 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -412,7 +412,6 @@ static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) static int get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { - int ret; cpumask_var_t diff; struct hfi1_affinity_node *entry; struct cpu_mask_set *set = NULL; @@ -424,10 +423,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd, extra[0] = '\0'; cpumask_clear(&msix->mask); - ret = zalloc_cpumask_var(&diff, GFP_KERNEL); - if (!ret) - return -ENOMEM; - entry = node_affinity_lookup(dd->node); switch (msix->type) { @@ -458,6 +453,9 @@ static int get_irq_affinity(struct hfi1_devdata *dd, * finds its CPU here. */ if (cpu == -1 && set) { + if (!zalloc_cpumask_var(&diff, GFP_KERNEL)) + return -ENOMEM; + if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -469,6 +467,8 @@ static int get_irq_affinity(struct hfi1_devdata *dd, cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); + + free_cpumask_var(diff); } cpumask_set_cpu(cpu, &msix->mask); @@ -482,7 +482,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd, hfi1_setup_sdma_notifier(msix); } - free_cpumask_var(diff); return 0; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 46d1475b2154..bd837a048bf4 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -433,31 +433,43 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; - u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr); + u16 pkey; + u32 rlid, slid, dlid = 0; u8 hdr_type, sc, svc_type; bool is_mcast = false; + /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); + dlid = hfi1_16B_get_dlid(pkt->hdr); + slid = hfi1_16B_get_slid(pkt->hdr); hdr_type = HFI1_PKT_TYPE_16B; } else { is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); + dlid = ib_get_dlid(pkt->hdr); + slid = ib_get_slid(pkt->hdr); hdr_type = HFI1_PKT_TYPE_9B; } switch (qp->ibqp.qp_type) { + case IB_QPT_UD: + dlid = ppd->lid; + rlid = slid; + rqpn = ib_get_sqpn(pkt->ohdr); + svc_type = IB_CC_SVCTYPE_UD; + break; case IB_QPT_SMI: case IB_QPT_GSI: - case IB_QPT_UD: - rlid = ib_get_slid(pkt->hdr); + rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; break; @@ -482,7 +494,6 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, dlid, rlid, sc, grh); if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 32c48265405e..cac2c62bc42d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1537,13 +1537,13 @@ void set_link_ipg(struct hfi1_pportdata *ppd); void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type); void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, - u32 pkey, u32 slid, u32 dlid, u8 sc5, + u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); #define PKEY_CHECK_INVALID -1 @@ -2437,7 +2437,7 @@ static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr, ((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT); lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) | ((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT); - lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT); lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4; hdr->lrh[0] = lrh0; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 33eba2356742..6309edf811df 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -88,9 +88,9 @@ * pio buffers per ctxt, etc.) Zero means use one user context per CPU. */ int num_user_contexts = -1; -module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO); +module_param_named(num_user_contexts, num_user_contexts, int, 0444); MODULE_PARM_DESC( - num_user_contexts, "Set max number of user contexts to use"); + num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)"); uint krcvqs[RXE_NUM_DATA_VL]; int krcvqsset; @@ -1209,30 +1209,49 @@ static void finalize_asic_data(struct hfi1_devdata *dd, kfree(ad); } -static void __hfi1_free_devdata(struct kobject *kobj) +/** + * hfi1_clean_devdata - cleans up per-unit data structure + * @dd: pointer to a valid devdata structure + * + * It cleans up all data structures set up by + * by hfi1_alloc_devdata(). + */ +static void hfi1_clean_devdata(struct hfi1_devdata *dd) { - struct hfi1_devdata *dd = - container_of(kobj, struct hfi1_devdata, kobj); struct hfi1_asic_data *ad; unsigned long flags; spin_lock_irqsave(&hfi1_devs_lock, flags); - idr_remove(&hfi1_unit_table, dd->unit); - list_del(&dd->list); + if (!list_empty(&dd->list)) { + idr_remove(&hfi1_unit_table, dd->unit); + list_del_init(&dd->list); + } ad = release_asic_data(dd); spin_unlock_irqrestore(&hfi1_devs_lock, flags); - if (ad) - finalize_asic_data(dd, ad); + + finalize_asic_data(dd, ad); free_platform_config(dd); rcu_barrier(); /* wait for rcu callbacks to complete */ free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); free_percpu(dd->send_schedule); free_percpu(dd->tx_opstats); + dd->int_counter = NULL; + dd->rcv_limit = NULL; + dd->send_schedule = NULL; + dd->tx_opstats = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } +static void __hfi1_free_devdata(struct kobject *kobj) +{ + struct hfi1_devdata *dd = + container_of(kobj, struct hfi1_devdata, kobj); + + hfi1_clean_devdata(dd); +} + static struct kobj_type hfi1_devdata_type = { .release = __hfi1_free_devdata, }; @@ -1265,6 +1284,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) return ERR_PTR(-ENOMEM); dd->num_pports = nports; dd->pport = (struct hfi1_pportdata *)(dd + 1); + dd->pcidev = pdev; + pci_set_drvdata(pdev, dd); INIT_LIST_HEAD(&dd->list); idr_preload(GFP_KERNEL); @@ -1331,9 +1352,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) return dd; bail: - if (!list_empty(&dd->list)) - list_del_init(&dd->list); - rvt_dealloc_device(&dd->verbs_dev.rdi); + hfi1_clean_devdata(dd); return ERR_PTR(ret); } diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 83d66e862207..c1c982908b4b 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -163,9 +163,6 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) resource_size_t addr; int ret = 0; - dd->pcidev = pdev; - pci_set_drvdata(pdev, dd); - addr = pci_resource_start(pdev, 0); len = pci_resource_len(pdev, 0); diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index d486355880cb..cbf7faa5038c 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -199,6 +199,7 @@ void free_platform_config(struct hfi1_devdata *dd) { /* Release memory allocated for eprom or fallback file read. */ kfree(dd->platform_config.data); + dd->platform_config.data = NULL; } void get_port_type(struct hfi1_pportdata *ppd) diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 1869f639c3ae..b5966991d647 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -204,6 +204,8 @@ static void clean_i2c_bus(struct hfi1_i2c_bus *bus) void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad) { + if (!ad) + return; clean_i2c_bus(ad->i2c_bus0); ad->i2c_bus0 = NULL; clean_i2c_bus(ad->i2c_bus1); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 3daa94bdae3a..c0071ca4147a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -733,6 +733,20 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, ohdr->bth[2] = cpu_to_be32(bth2); } +/** + * hfi1_make_ruc_header_16B - build a 16B header + * @qp: the queue pair + * @ohdr: a pointer to the destination header memory + * @bth0: bth0 passed in from the RC/UC builder + * @bth2: bth2 passed in from the RC/UC builder + * @middle: non zero implies indicates ahg "could" be used + * @ps: the current packet state + * + * This routine may disarm ahg under these situations: + * - packet needs a GRH + * - BECN needed + * - migration state not IB_MIG_MIGRATED + */ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -777,6 +791,12 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, else middle = 0; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + becn = true; + middle = 0; + } if (middle) build_ahg(qp, bth2); else @@ -784,11 +804,6 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, bth0 |= pkey; bth0 |= extra_bytes << 20; - if (qp->s_flags & RVT_S_ECN) { - qp->s_flags &= ~RVT_S_ECN; - /* we recently received a FECN, so return a BECN */ - becn = true; - } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); if (!ppd->lid) @@ -806,6 +821,20 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, pkey, becn, 0, l4, priv->s_sc); } +/** + * hfi1_make_ruc_header_9B - build a 9B header + * @qp: the queue pair + * @ohdr: a pointer to the destination header memory + * @bth0: bth0 passed in from the RC/UC builder + * @bth2: bth2 passed in from the RC/UC builder + * @middle: non zero implies indicates ahg "could" be used + * @ps: the current packet state + * + * This routine may disarm ahg under these situations: + * - packet needs a GRH + * - BECN needed + * - migration state not IB_MIG_MIGRATED + */ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, @@ -839,6 +868,12 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, else middle = 0; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); + middle = 0; + } if (middle) build_ahg(qp, bth2); else @@ -846,11 +881,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, bth0 |= pkey; bth0 |= extra_bytes << 20; - if (qp->s_flags & RVT_S_ECN) { - qp->s_flags &= ~RVT_S_ECN; - /* we recently received a FECN, so return a BECN */ - bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); - } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, lrh0, diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index bcf3b0bebac8..69c17a5ef038 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -628,7 +628,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey) } void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, - u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u32 remote_qpn, u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) { u64 pbc, pbc_flags = 0; @@ -687,7 +687,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, } void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, - u32 pkey, u32 slid, u32 dlid, u8 sc5, + u16 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) { u64 pbc, pbc_flags = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 0eeabfbee192..63b5b3edabcb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -912,7 +912,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, obj_per_chunk = buf_chunk_size / obj_size; num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; bt_chunk_num = bt_chunk_size / 8; - if (table->type >= HEM_TYPE_MTT) + if (type >= HEM_TYPE_MTT) num_bt_l0 = bt_chunk_num; table->hem = kcalloc(num_hem, sizeof(*table->hem), @@ -920,7 +920,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, if (!table->hem) goto err_kcalloc_hem_buf; - if (check_whether_bt_num_3(table->type, hop_num)) { + if (check_whether_bt_num_3(type, hop_num)) { unsigned long num_bt_l1; num_bt_l1 = (num_hem + bt_chunk_num - 1) / @@ -939,8 +939,8 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, goto err_kcalloc_l1_dma; } - if (check_whether_bt_num_2(table->type, hop_num) || - check_whether_bt_num_3(table->type, hop_num)) { + if (check_whether_bt_num_2(type, hop_num) || + check_whether_bt_num_3(type, hop_num)) { table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0), GFP_KERNEL); if (!table->bt_l0) @@ -1039,14 +1039,14 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.trrl_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); - hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_cqe_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8b84ab7800d8..25916e8522ed 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -71,6 +71,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, return -EINVAL; } + if (wr->opcode == IB_WR_RDMA_READ) { + dev_err(hr_dev->dev, "Not support inline data!\n"); + return -EINVAL; + } + for (i = 0; i < wr->num_sge; i++) { memcpy(wqe, ((void *)wr->sg_list[i].addr), wr->sg_list[i].length); @@ -148,7 +153,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_UD)) { dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); - *bad_wr = NULL; + *bad_wr = wr; return -EOPNOTSUPP; } @@ -182,7 +187,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = wr->wr_id; - owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1; + owner_bit = + ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { @@ -456,6 +462,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } else { dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); spin_unlock_irqrestore(&qp->sq.lock, flags); + *bad_wr = wr; return -EOPNOTSUPP; } } @@ -2592,10 +2599,12 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, 0); - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn); - roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, 0); + if (attr_mask & IB_QP_DEST_QPN) { + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn); + roce_set_field(qpc_mask->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); + } roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, @@ -2650,8 +2659,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; } - if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_PKEY_INDEX) || (attr_mask & IB_QP_QKEY)) { + if (attr_mask & IB_QP_ALT_PATH) { dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask); return -EINVAL; } @@ -2800,10 +2808,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_140_RR_MAX_S, 0); } - roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); - roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, - V2_QPC_BYTE_56_DQPN_S, 0); + if (attr_mask & IB_QP_DEST_QPN) { + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, + V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); + roce_set_field(qpc_mask->byte_56_dqpn_err, + V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); + } /* Configure GID index */ port_num = rdma_ah_get_port_num(&attr->ah_attr); @@ -2845,7 +2855,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); - else + else if (attr_mask & IB_QP_PATH_MTU) roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S, attr->path_mtu); @@ -2922,11 +2932,9 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return -EINVAL; } - /* If exist optional param, return error */ - if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || - (attr_mask & IB_QP_QKEY) || (attr_mask & IB_QP_PATH_MIG_STATE) || - (attr_mask & IB_QP_CUR_STATE) || - (attr_mask & IB_QP_MIN_RNR_TIMER)) { + /* Not support alternate path and path migration */ + if ((attr_mask & IB_QP_ALT_PATH) || + (attr_mask & IB_QP_PATH_MIG_STATE)) { dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask); return -EINVAL; } @@ -3161,7 +3169,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR)) { + (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { /* Nothing */ ; } else { @@ -4478,7 +4487,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) { - dev_err(dev, "[mailbox cmd] creat eqc failed.\n"); + dev_err(dev, "[mailbox cmd] create eqc failed.\n"); goto err_cmd_mbox; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e289a924e789..d4aad34c21e2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -620,7 +620,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, to_hr_ucontext(ib_pd->uobject->context), ucmd.db_addr, &hr_qp->rdb); if (ret) { - dev_err(dev, "rp record doorbell map failed!\n"); + dev_err(dev, "rq record doorbell map failed!\n"); goto err_mtt; } } diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 17f4f151a97f..61d8b06375bb 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -346,7 +346,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, /* Add to the first block the misalignment that it suffers from. */ total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); last_block_end = current_block_start + current_block_len; - last_block_aligned_end = round_up(last_block_end, 1 << block_shift); + last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift); total_len += (last_block_aligned_end - last_block_end); if (total_len & ((1ULL << block_shift) - 1ULL)) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 50af8915e7ec..199648adac74 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -673,7 +673,8 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, MLX4_IB_RX_HASH_SRC_PORT_TCP | MLX4_IB_RX_HASH_DST_PORT_TCP | MLX4_IB_RX_HASH_SRC_PORT_UDP | - MLX4_IB_RX_HASH_DST_PORT_UDP)) { + MLX4_IB_RX_HASH_DST_PORT_UDP | + MLX4_IB_RX_HASH_INNER)) { pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n", ucmd->rx_hash_fields_mask); return (-EOPNOTSUPP); diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index bce263b92821..fb4d77be019b 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,6 +1,7 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + depends on INFINIBAND_USER_ACCESS || INFINIBAND_USER_ACCESS=n ---help--- This driver provides low-level InfiniBand support for Mellanox Connect-IB PCI Express host channel adapters (HCAs). diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index daa919e5a442..b4d8ff8ab807 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -52,7 +52,6 @@ #include <linux/mlx5/port.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> -#include <linux/mlx5/fs_helpers.h> #include <linux/list.h> #include <rdma/ib_smi.h> #include <rdma/ib_umem.h> @@ -180,7 +179,7 @@ static int mlx5_netdev_event(struct notifier_block *this, if (rep_ndev == ndev) roce->netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; - } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) { + } else if (ndev->dev.parent == &mdev->pdev->dev) { roce->netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; } @@ -4757,7 +4756,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity(dev->mdev, comp_vector); + return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); } /* The mlx5_ib_multiport_mutex should be held when calling this function */ @@ -5427,9 +5426,7 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev) { dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); - if (!dev->mdev->priv.uar) - return -ENOMEM; - return 0; + return PTR_ERR_OR_ZERO(dev->mdev->priv.uar); } static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1520a2f20f98..90a9c461cedc 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -866,25 +866,28 @@ static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int *order) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem *u; int err; - *umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); - err = PTR_ERR_OR_ZERO(*umem); + *umem = NULL; + + u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); + err = PTR_ERR_OR_ZERO(u); if (err) { - *umem = NULL; - mlx5_ib_err(dev, "umem get failed (%d)\n", err); + mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); return err; } - mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(*umem); + ib_umem_release(u); return -EINVAL; } + *umem = u; + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", *npages, *ncont, *order, *page_shift); @@ -1458,13 +1461,12 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int access_flags = flags & IB_MR_REREG_ACCESS ? new_access_flags : mr->access_flags; - u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; - u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; int page_shift = 0; int upd_flags = 0; int npages = 0; int ncont = 0; int order = 0; + u64 addr, len; int err; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", @@ -1472,6 +1474,17 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); + if (!mr->umem) + return -EINVAL; + + if (flags & IB_MR_REREG_TRANS) { + addr = virt_addr; + len = length; + } else { + addr = mr->umem->address; + len = mr->umem->length; + } + if (flags != IB_MR_REREG_PD) { /* * Replace umem. This needs to be done whether or not UMR is @@ -1479,6 +1492,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, */ flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); + mr->umem = NULL; err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, &npages, &page_shift, &ncont, &order); if (err) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7ed4b70f6447..87b7c1be2a11 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -259,7 +259,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; + if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift)) + return -EINVAL; qp->rq.wqe_shift = ucmd->rq_wqe_shift; + if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig) + return -EINVAL; qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; qp->rq.max_post = qp->rq.wqe_cnt; } else { @@ -2451,18 +2455,18 @@ enum { static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { - if (rate == IB_RATE_PORT_CURRENT) { + if (rate == IB_RATE_PORT_CURRENT) return 0; - } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { + + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) return -EINVAL; - } else { - while (rate != IB_RATE_2_5_GBPS && - !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - MLX5_CAP_GEN(dev->mdev, stat_rate_support))) - --rate; - } - return rate + MLX5_STAT_RATE_OFFSET; + while (rate != IB_RATE_PORT_CURRENT && + !(1 << (rate + MLX5_STAT_RATE_OFFSET) & + MLX5_CAP_GEN(dev->mdev, stat_rate_support))) + --rate; + + return rate ? rate + MLX5_STAT_RATE_OFFSET : rate; } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 0a75164cedea..007d5e8a0121 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -461,7 +461,7 @@ static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev) /** * nes_netdev_start_xmit */ -static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 61927c165b59..4cf11063e0b5 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -390,7 +390,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { .name = "IB_OPCODE_RC_SEND_ONLY_INV", .mask = RXE_IETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_COMP_MASK | RXE_RWR_MASK | RXE_SEND_MASK - | RXE_END_MASK, + | RXE_END_MASK | RXE_START_MASK, .length = RXE_BTH_BYTES + RXE_IETH_BYTES, .offset = { [RXE_BTH] = 0, diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 7bdaf71b8221..785199990457 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -728,7 +728,6 @@ next_wqe: rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { - kfree_skb(skb); rxe_run_task(&qp->req.task, 1); goto exit; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index a65c9969f7fc..955ff3b6da9c 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -742,7 +742,6 @@ static enum resp_states read_reply(struct rxe_qp *qp, err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); if (err) { pr_err("Failed sending RDMA reply.\n"); - kfree_skb(skb); return RESPST_ERR_RNR; } @@ -954,10 +953,8 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, } err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb); - if (err) { + if (err) pr_err_ratelimited("Failed sending ack\n"); - kfree_skb(skb); - } err1: return err; @@ -1141,7 +1138,6 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); rxe_drop_ref(qp); - kfree_skb(skb_copy); rc = RESPST_CLEANUP; goto out; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 161ba8c76285..cf291f90b58f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1094,7 +1094,7 @@ drop_and_unlock: spin_unlock_irqrestore(&priv->lock, flags); } -static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct rdma_netdev *rn = netdev_priv(dev); diff --git a/drivers/infiniband/ulp/srp/Kconfig b/drivers/infiniband/ulp/srp/Kconfig index c74ee9633041..99db8fe5173a 100644 --- a/drivers/infiniband/ulp/srp/Kconfig +++ b/drivers/infiniband/ulp/srp/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRP tristate "InfiniBand SCSI RDMA Protocol" - depends on SCSI + depends on SCSI && INFINIBAND_ADDR_TRANS select SCSI_SRP_ATTRS ---help--- Support for the SCSI RDMA Protocol over InfiniBand. This diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig index 31ee83d528d9..fb8b7182f05e 100644 --- a/drivers/infiniband/ulp/srpt/Kconfig +++ b/drivers/infiniband/ulp/srpt/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_SRPT tristate "InfiniBand SCSI RDMA Protocol target support" - depends on INFINIBAND && TARGET_CORE + depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE ---help--- Support for the SCSI RDMA Protocol (SRP) Target driver. The diff --git a/drivers/input/input-leds.c b/drivers/input/input-leds.c index 766bf2660116..5f04b2d94635 100644 --- a/drivers/input/input-leds.c +++ b/drivers/input/input-leds.c @@ -88,6 +88,7 @@ static int input_leds_connect(struct input_handler *handler, const struct input_device_id *id) { struct input_leds *leds; + struct input_led *led; unsigned int num_leds; unsigned int led_code; int led_no; @@ -119,14 +120,13 @@ static int input_leds_connect(struct input_handler *handler, led_no = 0; for_each_set_bit(led_code, dev->ledbit, LED_CNT) { - struct input_led *led = &leds->leds[led_no]; + if (!input_led_info[led_code].name) + continue; + led = &leds->leds[led_no]; led->handle = &leds->handle; led->code = led_code; - if (!input_led_info[led_code].name) - continue; - led->cdev.name = kasprintf(GFP_KERNEL, "%s::%s", dev_name(&dev->dev), input_led_info[led_code].name); diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 0a67f235ba88..38f9501acdf0 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -583,7 +583,7 @@ static void alps_process_trackstick_packet_v3(struct psmouse *psmouse) x = (s8)(((packet[0] & 0x20) << 2) | (packet[1] & 0x7f)); y = (s8)(((packet[0] & 0x10) << 3) | (packet[2] & 0x7f)); - z = packet[4] & 0x7c; + z = packet[4] & 0x7f; /* * The x and y values tend to be quite large, and when used diff --git a/drivers/input/rmi4/rmi_spi.c b/drivers/input/rmi4/rmi_spi.c index 76edbf2c1bce..082defc329a8 100644 --- a/drivers/input/rmi4/rmi_spi.c +++ b/drivers/input/rmi4/rmi_spi.c @@ -147,8 +147,11 @@ static int rmi_spi_xfer(struct rmi_spi_xport *rmi_spi, if (len > RMI_SPI_XFER_SIZE_LIMIT) return -EINVAL; - if (rmi_spi->xfer_buf_size < len) - rmi_spi_manage_pools(rmi_spi, len); + if (rmi_spi->xfer_buf_size < len) { + ret = rmi_spi_manage_pools(rmi_spi, len); + if (ret < 0) + return ret; + } if (addr == 0) /* diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 4f15496fec8b..3e613afa10b4 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -362,7 +362,7 @@ config TOUCHSCREEN_HIDEEP If unsure, say N. - To compile this driver as a moudle, choose M here : the + To compile this driver as a module, choose M here : the module will be called hideep_ts. config TOUCHSCREEN_ILI210X diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 5d9699fe1b55..09194721aed2 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -280,7 +280,8 @@ struct mxt_data { struct input_dev *input_dev; char phys[64]; /* device physical location */ struct mxt_object *object_table; - struct mxt_info info; + struct mxt_info *info; + void *raw_info_block; unsigned int irq; unsigned int max_x; unsigned int max_y; @@ -460,12 +461,13 @@ static int mxt_lookup_bootloader_address(struct mxt_data *data, bool retry) { u8 appmode = data->client->addr; u8 bootloader; + u8 family_id = data->info ? data->info->family_id : 0; switch (appmode) { case 0x4a: case 0x4b: /* Chips after 1664S use different scheme */ - if (retry || data->info.family_id >= 0xa2) { + if (retry || family_id >= 0xa2) { bootloader = appmode - 0x24; break; } @@ -692,7 +694,7 @@ mxt_get_object(struct mxt_data *data, u8 type) struct mxt_object *object; int i; - for (i = 0; i < data->info.object_num; i++) { + for (i = 0; i < data->info->object_num; i++) { object = data->object_table + i; if (object->type == type) return object; @@ -1462,12 +1464,12 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg) data_pos += offset; } - if (cfg_info.family_id != data->info.family_id) { + if (cfg_info.family_id != data->info->family_id) { dev_err(dev, "Family ID mismatch!\n"); return -EINVAL; } - if (cfg_info.variant_id != data->info.variant_id) { + if (cfg_info.variant_id != data->info->variant_id) { dev_err(dev, "Variant ID mismatch!\n"); return -EINVAL; } @@ -1512,7 +1514,7 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg) /* Malloc memory to store configuration */ cfg_start_ofs = MXT_OBJECT_START + - data->info.object_num * sizeof(struct mxt_object) + + data->info->object_num * sizeof(struct mxt_object) + MXT_INFO_CHECKSUM_SIZE; config_mem_size = data->mem_size - cfg_start_ofs; config_mem = kzalloc(config_mem_size, GFP_KERNEL); @@ -1563,20 +1565,6 @@ release_mem: return ret; } -static int mxt_get_info(struct mxt_data *data) -{ - struct i2c_client *client = data->client; - struct mxt_info *info = &data->info; - int error; - - /* Read 7-byte info block starting at address 0 */ - error = __mxt_read_reg(client, 0, sizeof(*info), info); - if (error) - return error; - - return 0; -} - static void mxt_free_input_device(struct mxt_data *data) { if (data->input_dev) { @@ -1591,9 +1579,10 @@ static void mxt_free_object_table(struct mxt_data *data) video_unregister_device(&data->dbg.vdev); v4l2_device_unregister(&data->dbg.v4l2); #endif - - kfree(data->object_table); data->object_table = NULL; + data->info = NULL; + kfree(data->raw_info_block); + data->raw_info_block = NULL; kfree(data->msg_buf); data->msg_buf = NULL; data->T5_address = 0; @@ -1609,34 +1598,18 @@ static void mxt_free_object_table(struct mxt_data *data) data->max_reportid = 0; } -static int mxt_get_object_table(struct mxt_data *data) +static int mxt_parse_object_table(struct mxt_data *data, + struct mxt_object *object_table) { struct i2c_client *client = data->client; - size_t table_size; - struct mxt_object *object_table; - int error; int i; u8 reportid; u16 end_address; - table_size = data->info.object_num * sizeof(struct mxt_object); - object_table = kzalloc(table_size, GFP_KERNEL); - if (!object_table) { - dev_err(&data->client->dev, "Failed to allocate memory\n"); - return -ENOMEM; - } - - error = __mxt_read_reg(client, MXT_OBJECT_START, table_size, - object_table); - if (error) { - kfree(object_table); - return error; - } - /* Valid Report IDs start counting from 1 */ reportid = 1; data->mem_size = 0; - for (i = 0; i < data->info.object_num; i++) { + for (i = 0; i < data->info->object_num; i++) { struct mxt_object *object = object_table + i; u8 min_id, max_id; @@ -1660,8 +1633,8 @@ static int mxt_get_object_table(struct mxt_data *data) switch (object->type) { case MXT_GEN_MESSAGE_T5: - if (data->info.family_id == 0x80 && - data->info.version < 0x20) { + if (data->info->family_id == 0x80 && + data->info->version < 0x20) { /* * On mXT224 firmware versions prior to V2.0 * read and discard unused CRC byte otherwise @@ -1716,24 +1689,102 @@ static int mxt_get_object_table(struct mxt_data *data) /* If T44 exists, T5 position has to be directly after */ if (data->T44_address && (data->T5_address != data->T44_address + 1)) { dev_err(&client->dev, "Invalid T44 position\n"); - error = -EINVAL; - goto free_object_table; + return -EINVAL; } data->msg_buf = kcalloc(data->max_reportid, data->T5_msg_size, GFP_KERNEL); - if (!data->msg_buf) { - dev_err(&client->dev, "Failed to allocate message buffer\n"); + if (!data->msg_buf) + return -ENOMEM; + + return 0; +} + +static int mxt_read_info_block(struct mxt_data *data) +{ + struct i2c_client *client = data->client; + int error; + size_t size; + void *id_buf, *buf; + uint8_t num_objects; + u32 calculated_crc; + u8 *crc_ptr; + + /* If info block already allocated, free it */ + if (data->raw_info_block) + mxt_free_object_table(data); + + /* Read 7-byte ID information block starting at address 0 */ + size = sizeof(struct mxt_info); + id_buf = kzalloc(size, GFP_KERNEL); + if (!id_buf) + return -ENOMEM; + + error = __mxt_read_reg(client, 0, size, id_buf); + if (error) + goto err_free_mem; + + /* Resize buffer to give space for rest of info block */ + num_objects = ((struct mxt_info *)id_buf)->object_num; + size += (num_objects * sizeof(struct mxt_object)) + + MXT_INFO_CHECKSUM_SIZE; + + buf = krealloc(id_buf, size, GFP_KERNEL); + if (!buf) { error = -ENOMEM; - goto free_object_table; + goto err_free_mem; + } + id_buf = buf; + + /* Read rest of info block */ + error = __mxt_read_reg(client, MXT_OBJECT_START, + size - MXT_OBJECT_START, + id_buf + MXT_OBJECT_START); + if (error) + goto err_free_mem; + + /* Extract & calculate checksum */ + crc_ptr = id_buf + size - MXT_INFO_CHECKSUM_SIZE; + data->info_crc = crc_ptr[0] | (crc_ptr[1] << 8) | (crc_ptr[2] << 16); + + calculated_crc = mxt_calculate_crc(id_buf, 0, + size - MXT_INFO_CHECKSUM_SIZE); + + /* + * CRC mismatch can be caused by data corruption due to I2C comms + * issue or else device is not using Object Based Protocol (eg i2c-hid) + */ + if ((data->info_crc == 0) || (data->info_crc != calculated_crc)) { + dev_err(&client->dev, + "Info Block CRC error calculated=0x%06X read=0x%06X\n", + calculated_crc, data->info_crc); + error = -EIO; + goto err_free_mem; + } + + data->raw_info_block = id_buf; + data->info = (struct mxt_info *)id_buf; + + dev_info(&client->dev, + "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n", + data->info->family_id, data->info->variant_id, + data->info->version >> 4, data->info->version & 0xf, + data->info->build, data->info->object_num); + + /* Parse object table information */ + error = mxt_parse_object_table(data, id_buf + MXT_OBJECT_START); + if (error) { + dev_err(&client->dev, "Error %d parsing object table\n", error); + mxt_free_object_table(data); + goto err_free_mem; } - data->object_table = object_table; + data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START); return 0; -free_object_table: - mxt_free_object_table(data); +err_free_mem: + kfree(id_buf); return error; } @@ -2046,7 +2097,7 @@ static int mxt_initialize(struct mxt_data *data) int error; while (1) { - error = mxt_get_info(data); + error = mxt_read_info_block(data); if (!error) break; @@ -2077,16 +2128,9 @@ static int mxt_initialize(struct mxt_data *data) msleep(MXT_FW_RESET_TIME); } - /* Get object table information */ - error = mxt_get_object_table(data); - if (error) { - dev_err(&client->dev, "Error %d reading object table\n", error); - return error; - } - error = mxt_acquire_irq(data); if (error) - goto err_free_object_table; + return error; error = request_firmware_nowait(THIS_MODULE, true, MXT_CFG_NAME, &client->dev, GFP_KERNEL, data, @@ -2094,14 +2138,10 @@ static int mxt_initialize(struct mxt_data *data) if (error) { dev_err(&client->dev, "Failed to invoke firmware loader: %d\n", error); - goto err_free_object_table; + return error; } return 0; - -err_free_object_table: - mxt_free_object_table(data); - return error; } static int mxt_set_t7_power_cfg(struct mxt_data *data, u8 sleep) @@ -2162,7 +2202,7 @@ recheck: static u16 mxt_get_debug_value(struct mxt_data *data, unsigned int x, unsigned int y) { - struct mxt_info *info = &data->info; + struct mxt_info *info = data->info; struct mxt_dbg *dbg = &data->dbg; unsigned int ofs, page; unsigned int col = 0; @@ -2490,7 +2530,7 @@ static const struct video_device mxt_video_device = { static void mxt_debug_init(struct mxt_data *data) { - struct mxt_info *info = &data->info; + struct mxt_info *info = data->info; struct mxt_dbg *dbg = &data->dbg; struct mxt_object *object; int error; @@ -2576,7 +2616,6 @@ static int mxt_configure_objects(struct mxt_data *data, const struct firmware *cfg) { struct device *dev = &data->client->dev; - struct mxt_info *info = &data->info; int error; error = mxt_init_t7_power_cfg(data); @@ -2601,11 +2640,6 @@ static int mxt_configure_objects(struct mxt_data *data, mxt_debug_init(data); - dev_info(dev, - "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n", - info->family_id, info->variant_id, info->version >> 4, - info->version & 0xf, info->build, info->object_num); - return 0; } @@ -2614,7 +2648,7 @@ static ssize_t mxt_fw_version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mxt_data *data = dev_get_drvdata(dev); - struct mxt_info *info = &data->info; + struct mxt_info *info = data->info; return scnprintf(buf, PAGE_SIZE, "%u.%u.%02X\n", info->version >> 4, info->version & 0xf, info->build); } @@ -2624,7 +2658,7 @@ static ssize_t mxt_hw_version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mxt_data *data = dev_get_drvdata(dev); - struct mxt_info *info = &data->info; + struct mxt_info *info = data->info; return scnprintf(buf, PAGE_SIZE, "%u.%u\n", info->family_id, info->variant_id); } @@ -2663,7 +2697,7 @@ static ssize_t mxt_object_show(struct device *dev, return -ENOMEM; error = 0; - for (i = 0; i < data->info.object_num; i++) { + for (i = 0; i < data->info->object_num; i++) { object = data->object_table + i; if (!mxt_object_readable(object->type)) @@ -3035,6 +3069,15 @@ static const struct dmi_system_id mxt_dmi_table[] = { .driver_data = samus_platform_data, }, { + /* Samsung Chromebook Pro */ + .ident = "Samsung Chromebook Pro", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Google"), + DMI_MATCH(DMI_PRODUCT_NAME, "Caroline"), + }, + .driver_data = samus_platform_data, + }, + { /* Other Google Chromebooks */ .ident = "Chromebook", .matches = { @@ -3254,6 +3297,11 @@ static SIMPLE_DEV_PM_OPS(mxt_pm_ops, mxt_suspend, mxt_resume); static const struct of_device_id mxt_of_match[] = { { .compatible = "atmel,maxtouch", }, + /* Compatibles listed below are deprecated */ + { .compatible = "atmel,qt602240_ts", }, + { .compatible = "atmel,atmel_mxt_ts", }, + { .compatible = "atmel,atmel_mxt_tp", }, + { .compatible = "atmel,mXT224", }, {}, }; MODULE_DEVICE_TABLE(of, mxt_of_match); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2a99f0f14795..8fb8c737fffe 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -83,7 +83,6 @@ static DEFINE_SPINLOCK(amd_iommu_devtable_lock); static DEFINE_SPINLOCK(pd_bitmap_lock); -static DEFINE_SPINLOCK(iommu_table_lock); /* List of all available dev_data structures */ static LLIST_HEAD(dev_data_list); @@ -3562,6 +3561,7 @@ EXPORT_SYMBOL(amd_iommu_device_info); *****************************************************************************/ static struct irq_chip amd_ir_chip; +static DEFINE_SPINLOCK(iommu_table_lock); static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) { diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f05f3cf90756..ddcbbdb5d658 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -167,40 +167,16 @@ EXPORT_SYMBOL(iommu_put_dma_cookie); * @list: Reserved region list from iommu_get_resv_regions() * * IOMMU drivers can use this to implement their .get_resv_regions callback - * for general non-IOMMU-specific reservations. Currently, this covers host - * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI - * based ARM platforms that may require HW MSI reservation. + * for general non-IOMMU-specific reservations. Currently, this covers GICv3 + * ITS region reservation on ACPI based ARM platforms that may require HW MSI + * reservation. */ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { - struct pci_host_bridge *bridge; - struct resource_entry *window; - - if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) && - iort_iommu_msi_get_resv_regions(dev, list) < 0) - return; - - if (!dev_is_pci(dev)) - return; - - bridge = pci_find_host_bridge(to_pci_dev(dev)->bus); - resource_list_for_each_entry(window, &bridge->windows) { - struct iommu_resv_region *region; - phys_addr_t start; - size_t length; - - if (resource_type(window->res) != IORESOURCE_MEM) - continue; - start = window->res->start - window->offset; - length = window->res->end - window->res->start + 1; - region = iommu_alloc_resv_region(start, length, 0, - IOMMU_RESV_RESERVED); - if (!region) - return; + if (!is_of_node(dev->iommu_fwspec->iommu_fwnode)) + iort_iommu_msi_get_resv_regions(dev, list); - list_add_tail(®ion->list, list); - } } EXPORT_SYMBOL(iommu_dma_get_resv_regions); @@ -229,6 +205,23 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, return 0; } +static void iova_reserve_pci_windows(struct pci_dev *dev, + struct iova_domain *iovad) +{ + struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); + struct resource_entry *window; + unsigned long lo, hi; + + resource_list_for_each_entry(window, &bridge->windows) { + if (resource_type(window->res) != IORESOURCE_MEM) + continue; + + lo = iova_pfn(iovad, window->res->start - window->offset); + hi = iova_pfn(iovad, window->res->end - window->offset); + reserve_iova(iovad, lo, hi); + } +} + static int iova_reserve_iommu_regions(struct device *dev, struct iommu_domain *domain) { @@ -238,6 +231,9 @@ static int iova_reserve_iommu_regions(struct device *dev, LIST_HEAD(resv_regions); int ret = 0; + if (dev_is_pci(dev)) + iova_reserve_pci_windows(to_pci_dev(dev), iovad); + iommu_get_resv_regions(dev, &resv_regions); list_for_each_entry(region, &resv_regions, list) { unsigned long lo, hi; diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index accf58388bdb..460bed4fc5b1 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1345,7 +1345,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, struct qi_desc desc; if (mask) { - BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1)); + WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 66f69af2c219..3062a154a9fb 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1136,7 +1136,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) irte->dest_id = IRTE_DEST(cfg->dest_apicid); /* Update the hardware only if the interrupt is in remapped mode. */ - if (!force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) + if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) modify_irte(&ir_data->irq_2_iommu, irte); } diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 5fc8656c60f9..0468acfa131f 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1098,7 +1098,7 @@ static int rk_iommu_of_xlate(struct device *dev, data->iommu = platform_get_drvdata(iommu_dev); dev->archdata.iommu = data; - of_dev_put(iommu_dev); + platform_device_put(iommu_dev); return 0; } @@ -1175,8 +1175,15 @@ static int rk_iommu_probe(struct platform_device *pdev) for (i = 0; i < iommu->num_clocks; ++i) iommu->clocks[i].id = rk_iommu_clocks[i]; + /* + * iommu clocks should be present for all new devices and devicetrees + * but there are older devicetrees without clocks out in the wild. + * So clocks as optional for the time being. + */ err = devm_clk_bulk_get(iommu->dev, iommu->num_clocks, iommu->clocks); - if (err) + if (err == -ENOENT) + iommu->num_clocks = 0; + else if (err) return err; err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks); diff --git a/drivers/irqchip/qcom-irq-combiner.c b/drivers/irqchip/qcom-irq-combiner.c index f31265937439..7f0c0be322e0 100644 --- a/drivers/irqchip/qcom-irq-combiner.c +++ b/drivers/irqchip/qcom-irq-combiner.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. +/* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -68,7 +68,7 @@ static void combiner_handle_irq(struct irq_desc *desc) bit = readl_relaxed(combiner->regs[reg].addr); status = bit & combiner->regs[reg].enabled; - if (!status) + if (bit && !status) pr_warn_ratelimited("Unexpected IRQ on CPU%d: (%08x %08lx %p)\n", smp_processor_id(), bit, combiner->regs[reg].enabled, diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 004cc3cc6123..7fa2631b422c 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -290,7 +290,7 @@ do { \ if (kthread_should_stop() || \ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \ set_current_state(TASK_RUNNING); \ - return 0; \ + goto out; \ } \ \ schedule(); \ @@ -378,6 +378,9 @@ retry_invalidate: bch_prio_write(ca); } } +out: + wait_for_kthread_stop(); + return 0; } /* Allocation */ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d338b7086013..3a0cfb237af9 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -392,6 +392,8 @@ struct cached_dev { #define DEFAULT_CACHED_DEV_ERROR_LIMIT 64 atomic_t io_errors; unsigned error_limit; + + char backing_dev_name[BDEVNAME_SIZE]; }; enum alloc_reserve { @@ -464,6 +466,8 @@ struct cache { atomic_long_t meta_sectors_written; atomic_long_t btree_sectors_written; atomic_long_t sectors_written; + + char cache_dev_name[BDEVNAME_SIZE]; }; struct gc_stat { diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 028f7b386e01..4e63c6f6c04d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -106,7 +106,6 @@ void bch_btree_verify(struct btree *b) void bch_data_verify(struct cached_dev *dc, struct bio *bio) { - char name[BDEVNAME_SIZE]; struct bio *check; struct bio_vec bv, cbv; struct bvec_iter iter, citer = { 0 }; @@ -134,7 +133,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) bv.bv_len), dc->disk.c, "verify failed at dev %s sector %llu", - bdevname(dc->bdev, name), + dc->backing_dev_name, (uint64_t) bio->bi_iter.bi_sector); kunmap_atomic(p1); diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 7fac97ae036e..2ddf8515e6a5 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -52,7 +52,6 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, /* IO errors */ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) { - char buf[BDEVNAME_SIZE]; unsigned errors; WARN_ONCE(!dc, "NULL pointer of struct cached_dev"); @@ -60,7 +59,7 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) errors = atomic_add_return(1, &dc->io_errors); if (errors < dc->error_limit) pr_err("%s: IO error on backing device, unrecoverable", - bio_devname(bio, buf)); + dc->backing_dev_name); else bch_cached_dev_error(dc); } @@ -105,19 +104,18 @@ void bch_count_io_errors(struct cache *ca, } if (error) { - char buf[BDEVNAME_SIZE]; unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT, &ca->io_errors); errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) pr_err("%s: IO error on %s%s", - bdevname(ca->bdev, buf), m, + ca->cache_dev_name, m, is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, "%s: too many IO errors %s", - bdevname(ca->bdev, buf), m); + ca->cache_dev_name, m); } } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index a65e3365eeb9..8e3e8655ed63 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -649,11 +649,8 @@ static void backing_request_endio(struct bio *bio) */ if (unlikely(s->iop.writeback && bio->bi_opf & REQ_PREFLUSH)) { - char buf[BDEVNAME_SIZE]; - - bio_devname(bio, buf); pr_err("Can't flush %s: returned bi_status %i", - buf, bio->bi_status); + dc->backing_dev_name, bio->bi_status); } else { /* set to orig_bio->bi_status in bio_complete() */ s->iop.status = bio->bi_status; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d90d9e59ca00..3dea06b41d43 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -936,7 +936,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); - char buf[BDEVNAME_SIZE]; struct closure cl; closure_init_stack(&cl); @@ -967,7 +966,7 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_unlock(&bch_register_lock); - pr_info("Caching disabled for %s", bdevname(dc->bdev, buf)); + pr_info("Caching disabled for %s", dc->backing_dev_name); /* Drop ref we took in cached_dev_detach() */ closure_put(&dc->disk.cl); @@ -999,29 +998,28 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, { uint32_t rtime = cpu_to_le32(get_seconds()); struct uuid_entry *u; - char buf[BDEVNAME_SIZE]; struct cached_dev *exist_dc, *t; - bdevname(dc->bdev, buf); - if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) || (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16))) return -ENOENT; if (dc->disk.c) { - pr_err("Can't attach %s: already attached", buf); + pr_err("Can't attach %s: already attached", + dc->backing_dev_name); return -EINVAL; } if (test_bit(CACHE_SET_STOPPING, &c->flags)) { - pr_err("Can't attach %s: shutting down", buf); + pr_err("Can't attach %s: shutting down", + dc->backing_dev_name); return -EINVAL; } if (dc->sb.block_size < c->sb.block_size) { /* Will die */ pr_err("Couldn't attach %s: block size less than set's block size", - buf); + dc->backing_dev_name); return -EINVAL; } @@ -1029,7 +1027,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { pr_err("Tried to attach %s but duplicate UUID already attached", - buf); + dc->backing_dev_name); return -EINVAL; } @@ -1047,13 +1045,15 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (!u) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set", buf); + pr_err("Couldn't find uuid for %s in set", + dc->backing_dev_name); return -ENOENT; } u = uuid_find_empty(c); if (!u) { - pr_err("Not caching %s, no room for UUID", buf); + pr_err("Not caching %s, no room for UUID", + dc->backing_dev_name); return -EINVAL; } } @@ -1112,7 +1112,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, up_write(&dc->writeback_lock); pr_info("Caching %s as %s on set %pU", - bdevname(dc->bdev, buf), dc->disk.disk->disk_name, + dc->backing_dev_name, + dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); return 0; } @@ -1225,10 +1226,10 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cached_dev *dc) { - char name[BDEVNAME_SIZE]; const char *err = "cannot allocate memory"; struct cache_set *c; + bdevname(bdev, dc->backing_dev_name); memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; dc->bdev->bd_holder = dc; @@ -1237,6 +1238,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page; get_page(sb_page); + if (cached_dev_init(dc, sb->block_size << 9)) goto err; @@ -1247,7 +1249,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; - pr_info("registered backing device %s", bdevname(bdev, name)); + pr_info("registered backing device %s", dc->backing_dev_name); list_add(&dc->list, &uncached_devices); list_for_each_entry(c, &bch_cache_sets, list) @@ -1259,7 +1261,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, return; err: - pr_notice("error %s: %s", bdevname(bdev, name), err); + pr_notice("error %s: %s", dc->backing_dev_name, err); bcache_device_stop(&dc->disk); } @@ -1367,7 +1369,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) bool bch_cached_dev_error(struct cached_dev *dc) { - char name[BDEVNAME_SIZE]; + struct cache_set *c; if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) return false; @@ -1377,7 +1379,22 @@ bool bch_cached_dev_error(struct cached_dev *dc) smp_mb(); pr_err("stop %s: too many IO errors on backing device %s\n", - dc->disk.disk->disk_name, bdevname(dc->bdev, name)); + dc->disk.disk->disk_name, dc->backing_dev_name); + + /* + * If the cached device is still attached to a cache set, + * even dc->io_disable is true and no more I/O requests + * accepted, cache device internal I/O (writeback scan or + * garbage collection) may still prevent bcache device from + * being stopped. So here CACHE_SET_IO_DISABLE should be + * set to c->flags too, to make the internal I/O to cache + * device rejected and stopped immediately. + * If c is NULL, that means the bcache device is not attached + * to any cache set, then no CACHE_SET_IO_DISABLE bit to set. + */ + c = dc->disk.c; + if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) + pr_info("CACHE_SET_IO_DISABLE already set"); bcache_device_stop(&dc->disk); return true; @@ -1395,7 +1412,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) return false; if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_warn("CACHE_SET_IO_DISABLE already set"); + pr_info("CACHE_SET_IO_DISABLE already set"); /* XXX: we can be called from atomic context acquire_console_sem(); @@ -1539,6 +1556,20 @@ static void conditional_stop_bcache_device(struct cache_set *c, */ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", d->disk->disk_name); + /* + * There might be a small time gap that cache set is + * released but bcache device is not. Inside this time + * gap, regular I/O requests will directly go into + * backing device as no cache set attached to. This + * behavior may also introduce potential inconsistence + * data in writeback mode while cache is dirty. + * Therefore before calling bcache_device_stop() due + * to a broken cache device, dc->io_disable should be + * explicitly set to true. + */ + dc->io_disable = true; + /* make others know io_disable is true earlier */ + smp_mb(); bcache_device_stop(d); } else { /* @@ -2003,12 +2034,10 @@ static int cache_alloc(struct cache *ca) static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { - char name[BDEVNAME_SIZE]; const char *err = NULL; /* must be set for any error case */ int ret = 0; - bdevname(bdev, name); - + bdevname(bdev, ca->cache_dev_name); memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; ca->bdev->bd_holder = ca; @@ -2045,14 +2074,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, goto out; } - pr_info("registered cache device %s", name); + pr_info("registered cache device %s", ca->cache_dev_name); out: kobject_put(&ca->kobj); err: if (err) - pr_notice("error %s: %s", name, err); + pr_notice("error %s: %s", ca->cache_dev_name, err); return ret; } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 4a9547cdcdc5..ad45ebe1a74b 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -244,8 +244,10 @@ static void dirty_endio(struct bio *bio) struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; - if (bio->bi_status) + if (bio->bi_status) { SET_KEY_DIRTY(&w->key, false); + bch_count_backing_io_errors(io->dc, bio); + } closure_put(&io->cl); } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 12aa9ca21d8c..dc385b70e4c3 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1681,8 +1681,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign if (block_size <= KMALLOC_MAX_SIZE && (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { - snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size); - c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN, + unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE); + snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size); + c->slab_cache = kmem_cache_create(slab_name, block_size, align, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_cache) { r = -ENOMEM; diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c index 1d0af0a21fc7..84814e819e4c 100644 --- a/drivers/md/dm-cache-background-tracker.c +++ b/drivers/md/dm-cache-background-tracker.c @@ -166,7 +166,7 @@ static bool max_work_reached(struct background_tracker *b) atomic_read(&b->pending_demotes) >= b->max_work; } -struct bt_work *alloc_work(struct background_tracker *b) +static struct bt_work *alloc_work(struct background_tracker *b) { if (max_work_reached(b)) return NULL; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 77d9fe58dae2..514fb4aec5d1 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2440,7 +2440,7 @@ static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, str unsigned i; for (i = 0; i < ic->journal_sections; i++) kvfree(sl[i]); - kfree(sl); + kvfree(sl); } static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 580c49cc8079..5903e492bb34 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -23,6 +23,8 @@ #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ +#define MAX_NR_MIRRORS (DM_KCOPYD_MAX_REGIONS + 1) + #define DM_RAID1_HANDLE_ERRORS 0x01 #define DM_RAID1_KEEP_LOG 0x02 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) @@ -255,7 +257,7 @@ static int mirror_flush(struct dm_target *ti) unsigned long error_bits; unsigned int i; - struct dm_io_region io[ms->nr_mirrors]; + struct dm_io_region io[MAX_NR_MIRRORS]; struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, @@ -651,7 +653,7 @@ static void write_callback(unsigned long error, void *context) static void do_write(struct mirror_set *ms, struct bio *bio) { unsigned int i; - struct dm_io_region io[ms->nr_mirrors], *dest = io; + struct dm_io_region io[MAX_NR_MIRRORS], *dest = io; struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, @@ -1083,7 +1085,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) argc -= args_used; if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 || - nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) { + nr_mirrors < 2 || nr_mirrors > MAX_NR_MIRRORS) { ti->error = "Invalid number of mirrors"; dm_dirty_log_destroy(dl); return -EINVAL; @@ -1404,7 +1406,7 @@ static void mirror_status(struct dm_target *ti, status_type_t type, int num_feature_args = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); - char buffer[ms->nr_mirrors + 1]; + char buffer[MAX_NR_MIRRORS + 1]; switch (type) { case STATUSTYPE_INFO: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4ea404dbcf0b..0a7b0107ca78 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1020,7 +1020,8 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, - sector_t sector, int *srcu_idx) + sector_t sector, int *srcu_idx) + __acquires(md->io_barrier) { struct dm_table *map; struct dm_target *ti; @@ -1037,7 +1038,7 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, } static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; @@ -1065,7 +1066,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, } static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; diff --git a/drivers/media/i2c/saa7115.c b/drivers/media/i2c/saa7115.c index e216cd768409..b07114b5efb2 100644 --- a/drivers/media/i2c/saa7115.c +++ b/drivers/media/i2c/saa7115.c @@ -20,7 +20,7 @@ // // VBI support (2004) and cleanups (2005) by Hans Verkuil <hverkuil@xs4all.nl> // -// Copyright (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org> +// Copyright (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org> // SAA7111, SAA7113 and SAA7118 support #include "saa711x_regs.h" diff --git a/drivers/media/i2c/saa711x_regs.h b/drivers/media/i2c/saa711x_regs.h index a50d480e101a..44fabe08234d 100644 --- a/drivers/media/i2c/saa711x_regs.h +++ b/drivers/media/i2c/saa711x_regs.h @@ -2,7 +2,7 @@ * SPDX-License-Identifier: GPL-2.0+ * saa711x - Philips SAA711x video decoder register specifications * - * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org> + * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org> */ #define R_00_CHIP_VERSION 0x00 diff --git a/drivers/media/i2c/tda7432.c b/drivers/media/i2c/tda7432.c index 1c5c61d829d6..9b4f21237810 100644 --- a/drivers/media/i2c/tda7432.c +++ b/drivers/media/i2c/tda7432.c @@ -8,7 +8,7 @@ * Muting and tone control by Jonathan Isom <jisom@ematic.com> * * Copyright (c) 2000 Eric Sandeen <eric_sandeen@bigfoot.com> - * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org> + * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org> * This code is placed under the terms of the GNU General Public License * Based on tda9855.c by Steve VanDeBogart (vandebo@uclink.berkeley.edu) * Which was based on tda8425.c by Greg Alexander (c) 1998 diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c index 2476d812f669..1734ed4ede33 100644 --- a/drivers/media/i2c/tvp5150.c +++ b/drivers/media/i2c/tvp5150.c @@ -2,7 +2,7 @@ // // tvp5150 - Texas Instruments TVP5150A/AM1 and TVP5151 video decoder driver // -// Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@infradead.org> +// Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@kernel.org> #include <dt-bindings/media/tvp5150.h> #include <linux/i2c.h> diff --git a/drivers/media/i2c/tvp5150_reg.h b/drivers/media/i2c/tvp5150_reg.h index c43b7b844021..d3a764cae1a0 100644 --- a/drivers/media/i2c/tvp5150_reg.h +++ b/drivers/media/i2c/tvp5150_reg.h @@ -3,7 +3,7 @@ * * tvp5150 - Texas Instruments TVP5150A/AM1 video decoder registers * - * Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@infradead.org> + * Copyright (c) 2005,2006 Mauro Carvalho Chehab <mchehab@kernel.org> */ #define TVP5150_VD_IN_SRC_SEL_1 0x00 /* Video input source selection #1 */ diff --git a/drivers/media/i2c/tvp7002.c b/drivers/media/i2c/tvp7002.c index a26c1a3f7183..4599b7e28a8d 100644 --- a/drivers/media/i2c/tvp7002.c +++ b/drivers/media/i2c/tvp7002.c @@ -5,7 +5,7 @@ * Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com> * * This code is partially based upon the TVP5150 driver - * written by Mauro Carvalho Chehab (mchehab@infradead.org), + * written by Mauro Carvalho Chehab <mchehab@kernel.org>, * the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com> * and the TVP7002 driver in the TI LSP 2.10.00.14. Revisions by * Muralidharan Karicheri and Snehaprabha Narnakaje (TI). diff --git a/drivers/media/i2c/tvp7002_reg.h b/drivers/media/i2c/tvp7002_reg.h index 3c8c8b0a6a4c..7f56ba689dfe 100644 --- a/drivers/media/i2c/tvp7002_reg.h +++ b/drivers/media/i2c/tvp7002_reg.h @@ -5,7 +5,7 @@ * Author: Santiago Nunez-Corrales <santiago.nunez@ridgerun.com> * * This code is partially based upon the TVP5150 driver - * written by Mauro Carvalho Chehab (mchehab@infradead.org), + * written by Mauro Carvalho Chehab <mchehab@kernel.org>, * the TVP514x driver written by Vaibhav Hiremath <hvaibhav@ti.com> * and the TVP7002 driver in the TI LSP 2.10.00.14 * diff --git a/drivers/media/media-devnode.c b/drivers/media/media-devnode.c index 67ac51eff15c..6b87a721dc49 100644 --- a/drivers/media/media-devnode.c +++ b/drivers/media/media-devnode.c @@ -4,7 +4,7 @@ * Copyright (C) 2010 Nokia Corporation * * Based on drivers/media/video/v4l2_dev.c code authored by - * Mauro Carvalho Chehab <mchehab@infradead.org> (version 2) + * Mauro Carvalho Chehab <mchehab@kernel.org> (version 2) * Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1) * * Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com> diff --git a/drivers/media/pci/bt8xx/bttv-audio-hook.c b/drivers/media/pci/bt8xx/bttv-audio-hook.c index 9f1f9169fb5b..346fc7f58839 100644 --- a/drivers/media/pci/bt8xx/bttv-audio-hook.c +++ b/drivers/media/pci/bt8xx/bttv-audio-hook.c @@ -1,7 +1,7 @@ /* * Handlers for board audio hooks, splitted from bttv-cards * - * Copyright (c) 2006 Mauro Carvalho Chehab (mchehab@infradead.org) + * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org> * This code is placed under the terms of the GNU General Public License */ diff --git a/drivers/media/pci/bt8xx/bttv-audio-hook.h b/drivers/media/pci/bt8xx/bttv-audio-hook.h index 159d07adeff8..be16a537a03a 100644 --- a/drivers/media/pci/bt8xx/bttv-audio-hook.h +++ b/drivers/media/pci/bt8xx/bttv-audio-hook.h @@ -1,7 +1,7 @@ /* * Handlers for board audio hooks, splitted from bttv-cards * - * Copyright (c) 2006 Mauro Carvalho Chehab (mchehab@infradead.org) + * Copyright (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org> * This code is placed under the terms of the GNU General Public License */ diff --git a/drivers/media/pci/bt8xx/bttv-cards.c b/drivers/media/pci/bt8xx/bttv-cards.c index 1902732f90e1..2616243b2c49 100644 --- a/drivers/media/pci/bt8xx/bttv-cards.c +++ b/drivers/media/pci/bt8xx/bttv-cards.c @@ -2447,7 +2447,7 @@ struct tvcard bttv_tvcards[] = { }, /* ---- card 0x88---------------------------------- */ [BTTV_BOARD_ACORP_Y878F] = { - /* Mauro Carvalho Chehab <mchehab@infradead.org> */ + /* Mauro Carvalho Chehab <mchehab@kernel.org> */ .name = "Acorp Y878F", .video_inputs = 3, /* .audio_inputs= 1, */ @@ -2688,7 +2688,7 @@ struct tvcard bttv_tvcards[] = { }, [BTTV_BOARD_ENLTV_FM_2] = { /* Encore TV Tuner Pro ENL TV-FM-2 - Mauro Carvalho Chehab <mchehab@infradead.org */ + Mauro Carvalho Chehab <mchehab@kernel.org> */ .name = "Encore ENL TV-FM-2", .video_inputs = 3, /* .audio_inputs= 1, */ diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c index 707f57a9f940..de3f44b8dec6 100644 --- a/drivers/media/pci/bt8xx/bttv-driver.c +++ b/drivers/media/pci/bt8xx/bttv-driver.c @@ -13,7 +13,7 @@ (c) 2005-2006 Nickolay V. Shmyrev <nshmyrev@yandex.ru> Fixes to be fully V4L2 compliant by - (c) 2006 Mauro Carvalho Chehab <mchehab@infradead.org> + (c) 2006 Mauro Carvalho Chehab <mchehab@kernel.org> Cropping and overscan support Copyright (C) 2005, 2006 Michael H. Schimek <mschimek@gmx.at> diff --git a/drivers/media/pci/bt8xx/bttv-i2c.c b/drivers/media/pci/bt8xx/bttv-i2c.c index eccd1e3d717a..c76823eb399d 100644 --- a/drivers/media/pci/bt8xx/bttv-i2c.c +++ b/drivers/media/pci/bt8xx/bttv-i2c.c @@ -8,7 +8,7 @@ & Marcus Metzler (mocm@thp.uni-koeln.de) (c) 1999-2003 Gerd Knorr <kraxel@bytesex.org> - (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org> + (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org> - Multituner support and i2c address binding This program is free software; you can redistribute it and/or modify diff --git a/drivers/media/pci/cx23885/cx23885-input.c b/drivers/media/pci/cx23885/cx23885-input.c index be49589a61d2..395ff9bba759 100644 --- a/drivers/media/pci/cx23885/cx23885-input.c +++ b/drivers/media/pci/cx23885/cx23885-input.c @@ -13,7 +13,7 @@ * Copyright (C) 2008 <srinivasa.deevi at conexant dot com> * Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> * Markus Rechberger <mrechberger@gmail.com> - * Mauro Carvalho Chehab <mchehab@infradead.org> + * Mauro Carvalho Chehab <mchehab@kernel.org> * Sascha Sommer <saschasommer@freenet.de> * Copyright (C) 2004, 2005 Chris Pascoe * Copyright (C) 2003, 2004 Gerd Knorr diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c index ab09bb55cf45..8a28fda703a2 100644 --- a/drivers/media/pci/cx88/cx88-alsa.c +++ b/drivers/media/pci/cx88/cx88-alsa.c @@ -4,7 +4,7 @@ * * (c) 2007 Trent Piepho <xyzzy@speakeasy.org> * (c) 2005,2006 Ricardo Cerqueira <v4l@cerqueira.org> - * (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org> + * (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org> * Based on a dummy cx88 module by Gerd Knorr <kraxel@bytesex.org> * Based on dummy.c by Jaroslav Kysela <perex@perex.cz> * @@ -103,7 +103,7 @@ MODULE_PARM_DESC(index, "Index value for cx88x capture interface(s)."); MODULE_DESCRIPTION("ALSA driver module for cx2388x based TV cards"); MODULE_AUTHOR("Ricardo Cerqueira"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL"); MODULE_VERSION(CX88_VERSION); diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c index 0e0952e60795..7a4876cf9f08 100644 --- a/drivers/media/pci/cx88/cx88-blackbird.c +++ b/drivers/media/pci/cx88/cx88-blackbird.c @@ -5,7 +5,7 @@ * (c) 2004 Jelle Foks <jelle@foks.us> * (c) 2004 Gerd Knorr <kraxel@bytesex.org> * - * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org> + * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org> * - video_ioctl2 conversion * * Includes parts from the ivtv driver <http://sourceforge.net/projects/ivtv/> diff --git a/drivers/media/pci/cx88/cx88-core.c b/drivers/media/pci/cx88/cx88-core.c index 8bfa5b7ed91b..60988e95b637 100644 --- a/drivers/media/pci/cx88/cx88-core.c +++ b/drivers/media/pci/cx88/cx88-core.c @@ -4,7 +4,7 @@ * * (c) 2003 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs] * - * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org> + * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org> * - Multituner support * - video_ioctl2 conversion * - PAL/M fixes diff --git a/drivers/media/pci/cx88/cx88-i2c.c b/drivers/media/pci/cx88/cx88-i2c.c index f7692775fb5a..99f88a05a7c9 100644 --- a/drivers/media/pci/cx88/cx88-i2c.c +++ b/drivers/media/pci/cx88/cx88-i2c.c @@ -8,7 +8,7 @@ * (c) 2002 Yurij Sysoev <yurij@naturesoft.net> * (c) 1999-2003 Gerd Knorr <kraxel@bytesex.org> * - * (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org> + * (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org> * - Multituner support and i2c address binding * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c index 9be682cdb644..7b113bad70d2 100644 --- a/drivers/media/pci/cx88/cx88-video.c +++ b/drivers/media/pci/cx88/cx88-video.c @@ -5,7 +5,7 @@ * * (c) 2003-04 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs] * - * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@infradead.org> + * (c) 2005-2006 Mauro Carvalho Chehab <mchehab@kernel.org> * - Multituner support * - video_ioctl2 conversion * - PAL/M fixes diff --git a/drivers/media/radio/radio-aimslab.c b/drivers/media/radio/radio-aimslab.c index 5ef635e72e10..4c52ac6d8bc5 100644 --- a/drivers/media/radio/radio-aimslab.c +++ b/drivers/media/radio/radio-aimslab.c @@ -4,7 +4,7 @@ * Copyright 1997 M. Kirkwood * * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com> - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> * Converted to new API by Alan Cox <alan@lxorguk.ukuu.org.uk> * Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org> * diff --git a/drivers/media/radio/radio-aztech.c b/drivers/media/radio/radio-aztech.c index 9e12c6027359..840b7d60462b 100644 --- a/drivers/media/radio/radio-aztech.c +++ b/drivers/media/radio/radio-aztech.c @@ -2,7 +2,7 @@ * radio-aztech.c - Aztech radio card driver * * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@xs4all.nl> - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> * Adapted to support the Video for Linux API by * Russell Kroll <rkroll@exploits.org>. Based on original tuner code by: * diff --git a/drivers/media/radio/radio-gemtek.c b/drivers/media/radio/radio-gemtek.c index 3ff4c4e1435f..f051f8694ab9 100644 --- a/drivers/media/radio/radio-gemtek.c +++ b/drivers/media/radio/radio-gemtek.c @@ -15,7 +15,7 @@ * Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org> * * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com> - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> * * Note: this card seems to swap the left and right audio channels! * diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c index 95f06f3b35dc..e4e758739246 100644 --- a/drivers/media/radio/radio-maxiradio.c +++ b/drivers/media/radio/radio-maxiradio.c @@ -27,7 +27,7 @@ * BUGS: * - card unmutes if you change frequency * - * (c) 2006, 2007 by Mauro Carvalho Chehab <mchehab@infradead.org>: + * (c) 2006, 2007 by Mauro Carvalho Chehab <mchehab@kernel.org>: * - Conversion to V4L2 API * - Uses video_ioctl2 for parsing and to add debug support */ diff --git a/drivers/media/radio/radio-rtrack2.c b/drivers/media/radio/radio-rtrack2.c index abeaedd8d437..5a1470eb753e 100644 --- a/drivers/media/radio/radio-rtrack2.c +++ b/drivers/media/radio/radio-rtrack2.c @@ -7,7 +7,7 @@ * Various bugfixes and enhancements by Russell Kroll <rkroll@exploits.org> * * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com> - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> * * Fully tested with actual hardware and the v4l2-compliance tool. */ diff --git a/drivers/media/radio/radio-sf16fmi.c b/drivers/media/radio/radio-sf16fmi.c index fc4e63d36e4c..4f9b97edd9eb 100644 --- a/drivers/media/radio/radio-sf16fmi.c +++ b/drivers/media/radio/radio-sf16fmi.c @@ -13,7 +13,7 @@ * No volume control - only mute/unmute - you have to use line volume * control on SB-part of SF16-FMI/SF16-FMP/SF16-FMD * - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> */ #include <linux/kernel.h> /* __setup */ diff --git a/drivers/media/radio/radio-terratec.c b/drivers/media/radio/radio-terratec.c index 4f116ea294fb..1af8f29cc7d1 100644 --- a/drivers/media/radio/radio-terratec.c +++ b/drivers/media/radio/radio-terratec.c @@ -17,7 +17,7 @@ * Volume Control is done digitally * * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com> - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> */ #include <linux/module.h> /* Modules */ diff --git a/drivers/media/radio/radio-trust.c b/drivers/media/radio/radio-trust.c index 26a8c6002121..a4bad322ffff 100644 --- a/drivers/media/radio/radio-trust.c +++ b/drivers/media/radio/radio-trust.c @@ -12,7 +12,7 @@ * Scott McGrath (smcgrath@twilight.vtc.vsc.edu) * William McGrath (wmcgrath@twilight.vtc.vsc.edu) * - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> */ #include <stdarg.h> diff --git a/drivers/media/radio/radio-typhoon.c b/drivers/media/radio/radio-typhoon.c index eb72a4d13758..d0d67ad85b8f 100644 --- a/drivers/media/radio/radio-typhoon.c +++ b/drivers/media/radio/radio-typhoon.c @@ -25,7 +25,7 @@ * The frequency change is necessary since the card never seems to be * completely silent. * - * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@infradead.org> + * Converted to V4L2 API by Mauro Carvalho Chehab <mchehab@kernel.org> */ #include <linux/module.h> /* Modules */ diff --git a/drivers/media/radio/radio-zoltrix.c b/drivers/media/radio/radio-zoltrix.c index 026e88eef29c..6007cd09b328 100644 --- a/drivers/media/radio/radio-zoltrix.c +++ b/drivers/media/radio/radio-zoltrix.c @@ -27,7 +27,7 @@ * 2002-07-15 - Fix Stereo typo * * 2006-07-24 - Converted to V4L2 API - * by Mauro Carvalho Chehab <mchehab@infradead.org> + * by Mauro Carvalho Chehab <mchehab@kernel.org> * * Converted to the radio-isa framework by Hans Verkuil <hans.verkuil@cisco.com> * diff --git a/drivers/media/rc/keymaps/rc-avermedia-m135a.c b/drivers/media/rc/keymaps/rc-avermedia-m135a.c index f6977df1a75b..d275d98d066a 100644 --- a/drivers/media/rc/keymaps/rc-avermedia-m135a.c +++ b/drivers/media/rc/keymaps/rc-avermedia-m135a.c @@ -12,7 +12,7 @@ * * On Avermedia M135A with IR model RM-JX, the same codes exist on both * Positivo (BR) and original IR, initial version and remote control codes - * added by Mauro Carvalho Chehab <mchehab@infradead.org> + * added by Mauro Carvalho Chehab <mchehab@kernel.org> * * Positivo also ships Avermedia M135A with model RM-K6, extra control * codes added by Herton Ronaldo Krzesinski <herton@mandriva.com.br> diff --git a/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c b/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c index e4e78c1f4123..057c13b765ef 100644 --- a/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c +++ b/drivers/media/rc/keymaps/rc-encore-enltv-fm53.c @@ -9,7 +9,7 @@ #include <linux/module.h> /* Encore ENLTV-FM v5.3 - Mauro Carvalho Chehab <mchehab@infradead.org> + Mauro Carvalho Chehab <mchehab@kernel.org> */ static struct rc_map_table encore_enltv_fm53[] = { diff --git a/drivers/media/rc/keymaps/rc-encore-enltv2.c b/drivers/media/rc/keymaps/rc-encore-enltv2.c index c3d4437a6fda..cd0555924456 100644 --- a/drivers/media/rc/keymaps/rc-encore-enltv2.c +++ b/drivers/media/rc/keymaps/rc-encore-enltv2.c @@ -9,7 +9,7 @@ #include <linux/module.h> /* Encore ENLTV2-FM - silver plastic - "Wand Media" written at the botton - Mauro Carvalho Chehab <mchehab@infradead.org> */ + Mauro Carvalho Chehab <mchehab@kernel.org> */ static struct rc_map_table encore_enltv2[] = { { 0x4c, KEY_POWER2 }, diff --git a/drivers/media/rc/keymaps/rc-kaiomy.c b/drivers/media/rc/keymaps/rc-kaiomy.c index f0f88df18606..a00051339842 100644 --- a/drivers/media/rc/keymaps/rc-kaiomy.c +++ b/drivers/media/rc/keymaps/rc-kaiomy.c @@ -9,7 +9,7 @@ #include <linux/module.h> /* Kaiomy TVnPC U2 - Mauro Carvalho Chehab <mchehab@infradead.org> + Mauro Carvalho Chehab <mchehab@kernel.org> */ static struct rc_map_table kaiomy[] = { diff --git a/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c b/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c index 453e04377de7..db5edde3eeb1 100644 --- a/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c +++ b/drivers/media/rc/keymaps/rc-kworld-plus-tv-analog.c @@ -9,7 +9,7 @@ #include <linux/module.h> /* Kworld Plus TV Analog Lite PCI IR - Mauro Carvalho Chehab <mchehab@infradead.org> + Mauro Carvalho Chehab <mchehab@kernel.org> */ static struct rc_map_table kworld_plus_tv_analog[] = { diff --git a/drivers/media/rc/keymaps/rc-pixelview-new.c b/drivers/media/rc/keymaps/rc-pixelview-new.c index 791130f108ff..e4e34f2ccf74 100644 --- a/drivers/media/rc/keymaps/rc-pixelview-new.c +++ b/drivers/media/rc/keymaps/rc-pixelview-new.c @@ -9,7 +9,7 @@ #include <linux/module.h> /* - Mauro Carvalho Chehab <mchehab@infradead.org> + Mauro Carvalho Chehab <mchehab@kernel.org> present on PV MPEG 8000GT */ diff --git a/drivers/media/tuners/tea5761.c b/drivers/media/tuners/tea5761.c index 88b3e80c38ad..d78a2bdb3e36 100644 --- a/drivers/media/tuners/tea5761.c +++ b/drivers/media/tuners/tea5761.c @@ -2,7 +2,7 @@ // For Philips TEA5761 FM Chip // I2C address is always 0x20 (0x10 at 7-bit mode). // -// Copyright (c) 2005-2007 Mauro Carvalho Chehab (mchehab@infradead.org) +// Copyright (c) 2005-2007 Mauro Carvalho Chehab <mchehab@kernel.org> #include <linux/i2c.h> #include <linux/slab.h> @@ -337,5 +337,5 @@ EXPORT_SYMBOL_GPL(tea5761_attach); EXPORT_SYMBOL_GPL(tea5761_autodetection); MODULE_DESCRIPTION("Philips TEA5761 FM tuner driver"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/media/tuners/tea5767.c b/drivers/media/tuners/tea5767.c index 2b2c064d7dc3..016d0d5ec50b 100644 --- a/drivers/media/tuners/tea5767.c +++ b/drivers/media/tuners/tea5767.c @@ -2,7 +2,7 @@ // For Philips TEA5767 FM Chip used on some TV Cards like Prolink Pixelview // I2C address is always 0xC0. // -// Copyright (c) 2005 Mauro Carvalho Chehab (mchehab@infradead.org) +// Copyright (c) 2005 Mauro Carvalho Chehab <mchehab@kernel.org> // // tea5767 autodetection thanks to Torsten Seeboth and Atsushi Nakagawa // from their contributions on DScaler. @@ -469,5 +469,5 @@ EXPORT_SYMBOL_GPL(tea5767_attach); EXPORT_SYMBOL_GPL(tea5767_autodetection); MODULE_DESCRIPTION("Philips TEA5767 FM tuner driver"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/media/tuners/tuner-xc2028-types.h b/drivers/media/tuners/tuner-xc2028-types.h index bb0437c36c03..50d017a4822a 100644 --- a/drivers/media/tuners/tuner-xc2028-types.h +++ b/drivers/media/tuners/tuner-xc2028-types.h @@ -5,7 +5,7 @@ * This file includes internal tipes to be used inside tuner-xc2028. * Shouldn't be included outside tuner-xc2028 * - * Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org) + * Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org> */ /* xc3028 firmware types */ diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c index fca85e08ebd7..84744e138982 100644 --- a/drivers/media/tuners/tuner-xc2028.c +++ b/drivers/media/tuners/tuner-xc2028.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // tuner-xc2028 // -// Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org) +// Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org> // // Copyright (c) 2007 Michel Ludwig (michel.ludwig@gmail.com) // - frontend interface @@ -1518,7 +1518,7 @@ EXPORT_SYMBOL(xc2028_attach); MODULE_DESCRIPTION("Xceive xc2028/xc3028 tuner driver"); MODULE_AUTHOR("Michel Ludwig <michel.ludwig@gmail.com>"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL v2"); MODULE_FIRMWARE(XC2028_DEFAULT_FIRMWARE); MODULE_FIRMWARE(XC3028L_DEFAULT_FIRMWARE); diff --git a/drivers/media/tuners/tuner-xc2028.h b/drivers/media/tuners/tuner-xc2028.h index 03fd6d4233a4..7b58bc06e35c 100644 --- a/drivers/media/tuners/tuner-xc2028.h +++ b/drivers/media/tuners/tuner-xc2028.h @@ -2,7 +2,7 @@ * SPDX-License-Identifier: GPL-2.0 * tuner-xc2028 * - * Copyright (c) 2007-2008 Mauro Carvalho Chehab (mchehab@infradead.org) + * Copyright (c) 2007-2008 Mauro Carvalho Chehab <mchehab@kernel.org> */ #ifndef __TUNER_XC2028_H__ diff --git a/drivers/media/usb/em28xx/em28xx-camera.c b/drivers/media/usb/em28xx/em28xx-camera.c index 3c2694a16ed1..d1e66b503f4d 100644 --- a/drivers/media/usb/em28xx/em28xx-camera.c +++ b/drivers/media/usb/em28xx/em28xx-camera.c @@ -2,7 +2,7 @@ // // em28xx-camera.c - driver for Empia EM25xx/27xx/28xx USB video capture devices // -// Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@infradead.org> +// Copyright (C) 2009 Mauro Carvalho Chehab <mchehab@kernel.org> // Copyright (C) 2013 Frank Schäfer <fschaefer.oss@googlemail.com> // // This program is free software; you can redistribute it and/or modify diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 6e0e67d23876..7c3203d7044b 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -5,7 +5,7 @@ // // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> // Markus Rechberger <mrechberger@gmail.com> -// Mauro Carvalho Chehab <mchehab@infradead.org> +// Mauro Carvalho Chehab <mchehab@kernel.org> // Sascha Sommer <saschasommer@freenet.de> // Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com> // diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c index 36d341fb65dd..f28995383090 100644 --- a/drivers/media/usb/em28xx/em28xx-core.c +++ b/drivers/media/usb/em28xx/em28xx-core.c @@ -4,7 +4,7 @@ // // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> // Markus Rechberger <mrechberger@gmail.com> -// Mauro Carvalho Chehab <mchehab@infradead.org> +// Mauro Carvalho Chehab <mchehab@kernel.org> // Sascha Sommer <saschasommer@freenet.de> // Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com> // @@ -32,7 +32,7 @@ #define DRIVER_AUTHOR "Ludovico Cavedon <cavedon@sssup.it>, " \ "Markus Rechberger <mrechberger@gmail.com>, " \ - "Mauro Carvalho Chehab <mchehab@infradead.org>, " \ + "Mauro Carvalho Chehab <mchehab@kernel.org>, " \ "Sascha Sommer <saschasommer@freenet.de>" MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c index a54cb8dc52c9..3f493e0b0716 100644 --- a/drivers/media/usb/em28xx/em28xx-dvb.c +++ b/drivers/media/usb/em28xx/em28xx-dvb.c @@ -2,7 +2,7 @@ // // DVB device driver for em28xx // -// (c) 2008-2011 Mauro Carvalho Chehab <mchehab@infradead.org> +// (c) 2008-2011 Mauro Carvalho Chehab <mchehab@kernel.org> // // (c) 2008 Devin Heitmueller <devin.heitmueller@gmail.com> // - Fixes for the driver to properly work with HVR-950 @@ -63,7 +63,7 @@ #include "tc90522.h" #include "qm1d1c0042.h" -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION(DRIVER_DESC " - digital TV interface"); MODULE_VERSION(EM28XX_VERSION); diff --git a/drivers/media/usb/em28xx/em28xx-i2c.c b/drivers/media/usb/em28xx/em28xx-i2c.c index 9151bccd859a..6458682bc6e2 100644 --- a/drivers/media/usb/em28xx/em28xx-i2c.c +++ b/drivers/media/usb/em28xx/em28xx-i2c.c @@ -4,7 +4,7 @@ // // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> // Markus Rechberger <mrechberger@gmail.com> -// Mauro Carvalho Chehab <mchehab@infradead.org> +// Mauro Carvalho Chehab <mchehab@kernel.org> // Sascha Sommer <saschasommer@freenet.de> // Copyright (C) 2013 Frank Schäfer <fschaefer.oss@googlemail.com> // diff --git a/drivers/media/usb/em28xx/em28xx-input.c b/drivers/media/usb/em28xx/em28xx-input.c index 2dc1be00b8b8..f84a1208d5d3 100644 --- a/drivers/media/usb/em28xx/em28xx-input.c +++ b/drivers/media/usb/em28xx/em28xx-input.c @@ -4,7 +4,7 @@ // // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> // Markus Rechberger <mrechberger@gmail.com> -// Mauro Carvalho Chehab <mchehab@infradead.org> +// Mauro Carvalho Chehab <mchehab@kernel.org> // Sascha Sommer <saschasommer@freenet.de> // // This program is free software; you can redistribute it and/or modify diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index d70ee13cc52e..68571bf36d28 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -5,7 +5,7 @@ // // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> // Markus Rechberger <mrechberger@gmail.com> -// Mauro Carvalho Chehab <mchehab@infradead.org> +// Mauro Carvalho Chehab <mchehab@kernel.org> // Sascha Sommer <saschasommer@freenet.de> // Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com> // @@ -44,7 +44,7 @@ #define DRIVER_AUTHOR "Ludovico Cavedon <cavedon@sssup.it>, " \ "Markus Rechberger <mrechberger@gmail.com>, " \ - "Mauro Carvalho Chehab <mchehab@infradead.org>, " \ + "Mauro Carvalho Chehab <mchehab@kernel.org>, " \ "Sascha Sommer <saschasommer@freenet.de>" static unsigned int isoc_debug; diff --git a/drivers/media/usb/em28xx/em28xx.h b/drivers/media/usb/em28xx/em28xx.h index 63c7c6124707..b0378e77ddff 100644 --- a/drivers/media/usb/em28xx/em28xx.h +++ b/drivers/media/usb/em28xx/em28xx.h @@ -4,7 +4,7 @@ * * Copyright (C) 2005 Markus Rechberger <mrechberger@gmail.com> * Ludovico Cavedon <cavedon@sssup.it> - * Mauro Carvalho Chehab <mchehab@infradead.org> + * Mauro Carvalho Chehab <mchehab@kernel.org> * Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com> * * Based on the em2800 driver from Sascha Sommer <saschasommer@freenet.de> diff --git a/drivers/media/usb/gspca/zc3xx-reg.h b/drivers/media/usb/gspca/zc3xx-reg.h index a1bd94e8ce52..71fda38e85e0 100644 --- a/drivers/media/usb/gspca/zc3xx-reg.h +++ b/drivers/media/usb/gspca/zc3xx-reg.h @@ -1,7 +1,7 @@ /* * zc030x registers * - * Copyright (c) 2008 Mauro Carvalho Chehab <mchehab@infradead.org> + * Copyright (c) 2008 Mauro Carvalho Chehab <mchehab@kernel.org> * * The register aliases used here came from this driver: * http://zc0302.sourceforge.net/zc0302.php diff --git a/drivers/media/usb/tm6000/tm6000-cards.c b/drivers/media/usb/tm6000/tm6000-cards.c index 70939e96b856..23df50aa0a4a 100644 --- a/drivers/media/usb/tm6000/tm6000-cards.c +++ b/drivers/media/usb/tm6000/tm6000-cards.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // tm6000-cards.c - driver for TM5600/TM6000/TM6010 USB video capture devices // -// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org> +// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org> #include <linux/init.h> #include <linux/module.h> diff --git a/drivers/media/usb/tm6000/tm6000-core.c b/drivers/media/usb/tm6000/tm6000-core.c index 23a1332d98e6..d3229aa45fcb 100644 --- a/drivers/media/usb/tm6000/tm6000-core.c +++ b/drivers/media/usb/tm6000/tm6000-core.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // tm6000-core.c - driver for TM5600/TM6000/TM6010 USB video capture devices // -// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org> +// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org> // // Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com> // - DVB-T support diff --git a/drivers/media/usb/tm6000/tm6000-i2c.c b/drivers/media/usb/tm6000/tm6000-i2c.c index c9a62bbff27a..659b63febf85 100644 --- a/drivers/media/usb/tm6000/tm6000-i2c.c +++ b/drivers/media/usb/tm6000/tm6000-i2c.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // tm6000-i2c.c - driver for TM5600/TM6000/TM6010 USB video capture devices // -// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org> +// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org> // // Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com> // - Fix SMBus Read Byte command diff --git a/drivers/media/usb/tm6000/tm6000-regs.h b/drivers/media/usb/tm6000/tm6000-regs.h index 21587fcf11e3..d10424673db9 100644 --- a/drivers/media/usb/tm6000/tm6000-regs.h +++ b/drivers/media/usb/tm6000/tm6000-regs.h @@ -2,7 +2,7 @@ * SPDX-License-Identifier: GPL-2.0 * tm6000-regs.h - driver for TM5600/TM6000/TM6010 USB video capture devices * - * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org> + * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org> */ /* diff --git a/drivers/media/usb/tm6000/tm6000-usb-isoc.h b/drivers/media/usb/tm6000/tm6000-usb-isoc.h index 5c615b0a7a46..b275dbce3a1b 100644 --- a/drivers/media/usb/tm6000/tm6000-usb-isoc.h +++ b/drivers/media/usb/tm6000/tm6000-usb-isoc.h @@ -2,7 +2,7 @@ * SPDX-License-Identifier: GPL-2.0 * tm6000-buf.c - driver for TM5600/TM6000/TM6010 USB video capture devices * - * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org> + * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org> */ #include <linux/videodev2.h> diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c index b2399d4266da..aa85fe31c835 100644 --- a/drivers/media/usb/tm6000/tm6000-video.c +++ b/drivers/media/usb/tm6000/tm6000-video.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // tm6000-video.c - driver for TM5600/TM6000/TM6010 USB video capture devices // -// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org> +// Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org> // // Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com> // - Fixed module load/unload diff --git a/drivers/media/usb/tm6000/tm6000.h b/drivers/media/usb/tm6000/tm6000.h index e1e45770e28d..0864ed7314eb 100644 --- a/drivers/media/usb/tm6000/tm6000.h +++ b/drivers/media/usb/tm6000/tm6000.h @@ -2,7 +2,7 @@ * SPDX-License-Identifier: GPL-2.0 * tm6000.h - driver for TM5600/TM6000/TM6010 USB video capture devices * - * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org> + * Copyright (c) 2006-2007 Mauro Carvalho Chehab <mchehab@kernel.org> * * Copyright (c) 2007 Michel Ludwig <michel.ludwig@gmail.com> * - DVB-T support diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index 1d0b2208e8fb..c080dcc75393 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -10,7 +10,7 @@ * 2 of the License, or (at your option) any later version. * * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1) - * Mauro Carvalho Chehab <mchehab@infradead.org> (version 2) + * Mauro Carvalho Chehab <mchehab@kernel.org> (version 2) * * Fixes: 20000516 Claudio Matsuoka <claudio@conectiva.com> * - Added procfs support @@ -1072,7 +1072,7 @@ static void __exit videodev_exit(void) subsys_initcall(videodev_init); module_exit(videodev_exit) -MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Alan Cox, Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_DESCRIPTION("Device registrar for Video4Linux drivers v2"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV_MAJOR(VIDEO_MAJOR); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index f48c505550e0..de5d96dbe69e 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. * * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> (version 1) - * Mauro Carvalho Chehab <mchehab@infradead.org> (version 2) + * Mauro Carvalho Chehab <mchehab@kernel.org> (version 2) */ #include <linux/mm.h> diff --git a/drivers/media/v4l2-core/videobuf-core.c b/drivers/media/v4l2-core/videobuf-core.c index 2b3981842b4b..7491b337002c 100644 --- a/drivers/media/v4l2-core/videobuf-core.c +++ b/drivers/media/v4l2-core/videobuf-core.c @@ -1,11 +1,11 @@ /* * generic helper functions for handling video4linux capture buffers * - * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org> + * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org> * * Highly based on video-buf written originally by: * (c) 2001,02 Gerd Knorr <kraxel@bytesex.org> - * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org> + * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org> * (c) 2006 Ted Walther and John Sokol * * This program is free software; you can redistribute it and/or modify @@ -38,7 +38,7 @@ static int debug; module_param(debug, int, 0644); MODULE_DESCRIPTION("helper module to manage video4linux buffers"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL"); #define dprintk(level, fmt, arg...) \ diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c index e02353e340dd..f46132504d88 100644 --- a/drivers/media/v4l2-core/videobuf-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf-dma-contig.c @@ -7,7 +7,7 @@ * Copyright (c) 2008 Magnus Damm * * Based on videobuf-vmalloc.c, - * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org> + * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index add2edb23eac..7770034aae28 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -6,11 +6,11 @@ * into PAGE_SIZE chunks). They also assume the driver does not need * to touch the video data. * - * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org> + * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org> * * Highly based on video-buf written originally by: * (c) 2001,02 Gerd Knorr <kraxel@bytesex.org> - * (c) 2006 Mauro Carvalho Chehab, <mchehab@infradead.org> + * (c) 2006 Mauro Carvalho Chehab, <mchehab@kernel.org> * (c) 2006 Ted Walther and John Sokol * * This program is free software; you can redistribute it and/or modify @@ -48,7 +48,7 @@ static int debug; module_param(debug, int, 0644); MODULE_DESCRIPTION("helper module to manage video4linux dma sg buffers"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL"); #define dprintk(level, fmt, arg...) \ diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c index 2ff7fcc77b11..45fe781aeeec 100644 --- a/drivers/media/v4l2-core/videobuf-vmalloc.c +++ b/drivers/media/v4l2-core/videobuf-vmalloc.c @@ -6,7 +6,7 @@ * into PAGE_SIZE chunks). They also assume the driver does not need * to touch the video data. * - * (c) 2007 Mauro Carvalho Chehab, <mchehab@infradead.org> + * (c) 2007 Mauro Carvalho Chehab, <mchehab@kernel.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,7 +41,7 @@ static int debug; module_param(debug, int, 0644); MODULE_DESCRIPTION("helper module to manage video4linux vmalloc buffers"); -MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@infradead.org>"); +MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL"); #define dprintk(level, fmt, arg...) \ diff --git a/drivers/mtd/nand/onenand/omap2.c b/drivers/mtd/nand/onenand/omap2.c index 9c159f0dd9a6..321137158ff3 100644 --- a/drivers/mtd/nand/onenand/omap2.c +++ b/drivers/mtd/nand/onenand/omap2.c @@ -375,56 +375,42 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, { struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); struct onenand_chip *this = mtd->priv; - dma_addr_t dma_src, dma_dst; - int bram_offset; + struct device *dev = &c->pdev->dev; void *buf = (void *)buffer; + dma_addr_t dma_src, dma_dst; + int bram_offset, err; size_t xtra; - int ret; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; - if (bram_offset & 3 || (size_t)buf & 3 || count < 384) - goto out_copy; - - /* panic_write() may be in an interrupt context */ - if (in_interrupt() || oops_in_progress) + /* + * If the buffer address is not DMA-able, len is not long enough to make + * DMA transfers profitable or panic_write() may be in an interrupt + * context fallback to PIO mode. + */ + if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 || + count < 384 || in_interrupt() || oops_in_progress ) goto out_copy; - if (buf >= high_memory) { - struct page *p1; - - if (((size_t)buf & PAGE_MASK) != - ((size_t)(buf + count - 1) & PAGE_MASK)) - goto out_copy; - p1 = vmalloc_to_page(buf); - if (!p1) - goto out_copy; - buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK); - } - xtra = count & 3; if (xtra) { count -= xtra; memcpy(buf + count, this->base + bram_offset + count, xtra); } + dma_dst = dma_map_single(dev, buf, count, DMA_FROM_DEVICE); dma_src = c->phys_base + bram_offset; - dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE); - if (dma_mapping_error(&c->pdev->dev, dma_dst)) { - dev_err(&c->pdev->dev, - "Couldn't DMA map a %d byte buffer\n", - count); - goto out_copy; - } - ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); - dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE); - - if (ret) { - dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); + if (dma_mapping_error(dev, dma_dst)) { + dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count); goto out_copy; } - return 0; + err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); + dma_unmap_single(dev, dma_dst, count, DMA_FROM_DEVICE); + if (!err) + return 0; + + dev_err(dev, "timeout waiting for DMA\n"); out_copy: memcpy(buf, this->base + bram_offset, count); @@ -437,49 +423,34 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, { struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); struct onenand_chip *this = mtd->priv; - dma_addr_t dma_src, dma_dst; - int bram_offset; + struct device *dev = &c->pdev->dev; void *buf = (void *)buffer; - int ret; + dma_addr_t dma_src, dma_dst; + int bram_offset, err; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; - if (bram_offset & 3 || (size_t)buf & 3 || count < 384) - goto out_copy; - - /* panic_write() may be in an interrupt context */ - if (in_interrupt() || oops_in_progress) + /* + * If the buffer address is not DMA-able, len is not long enough to make + * DMA transfers profitable or panic_write() may be in an interrupt + * context fallback to PIO mode. + */ + if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 || + count < 384 || in_interrupt() || oops_in_progress ) goto out_copy; - if (buf >= high_memory) { - struct page *p1; - - if (((size_t)buf & PAGE_MASK) != - ((size_t)(buf + count - 1) & PAGE_MASK)) - goto out_copy; - p1 = vmalloc_to_page(buf); - if (!p1) - goto out_copy; - buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK); - } - - dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE); + dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE); dma_dst = c->phys_base + bram_offset; - if (dma_mapping_error(&c->pdev->dev, dma_src)) { - dev_err(&c->pdev->dev, - "Couldn't DMA map a %d byte buffer\n", - count); - return -1; - } - - ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); - dma_unmap_single(&c->pdev->dev, dma_src, count, DMA_TO_DEVICE); - - if (ret) { - dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count); goto out_copy; } - return 0; + err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); + dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE); + if (!err) + return 0; + + dev_err(dev, "timeout waiting for DMA\n"); out_copy: memcpy(this->base + bram_offset, buf, count); diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 1d779a35ac8e..db5ec4e8bde9 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1074,7 +1074,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, return ret; ret = marvell_nfc_wait_op(chip, - chip->data_interface.timings.sdr.tPROG_max); + PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); return ret; } @@ -1408,6 +1408,7 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk, struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + u32 xtype; int ret; struct marvell_nfc_op nfc_op = { .ndcb[0] = NDCB0_CMD_TYPE(TYPE_WRITE) | NDCB0_LEN_OVRD, @@ -1423,7 +1424,12 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk, * last naked write. */ if (chunk == 0) { - nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_WRITE_DISPATCH) | + if (lt->nchunks == 1) + xtype = XTYPE_MONOLITHIC_RW; + else + xtype = XTYPE_WRITE_DISPATCH; + + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(xtype) | NDCB0_ADDR_CYC(marvell_nand->addr_cyc) | NDCB0_CMD1(NAND_CMD_SEQIN); nfc_op.ndcb[1] |= NDCB1_ADDRS_PAGE(page); @@ -1494,7 +1500,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct mtd_info *mtd, } ret = marvell_nfc_wait_op(chip, - chip->data_interface.timings.sdr.tPROG_max); + PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); marvell_nfc_disable_hw_ecc(chip); diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 72f3a89da513..f28c3a555861 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -706,12 +706,17 @@ static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo) */ int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms) { + const struct nand_sdr_timings *timings; u8 status = 0; int ret; if (!chip->exec_op) return -ENOTSUPP; + /* Wait tWB before polling the STATUS reg. */ + timings = nand_get_sdr_timings(&chip->data_interface); + ndelay(PSEC_TO_NSEC(timings->tWB_max)); + ret = nand_status_op(chip, NULL); if (ret) return ret; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 1ed9529e7bd1..5eb0df2e5464 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -450,7 +450,7 @@ static void rlb_update_client(struct rlb_client_info *client_info) { int i; - if (!client_info->slave) + if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst)) return; for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { @@ -943,6 +943,10 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], skb->priority = TC_PRIO_CONTROL; skb->dev = slave->dev; + netdev_dbg(slave->bond->dev, + "Send learning packet: dev %s mac %pM vlan %d\n", + slave->dev->name, mac_addr, vid); + if (vid) __vlan_hwaccel_put_tag(skb, vlan_proto, vid); @@ -965,14 +969,13 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data) u8 *mac_addr = data->mac_addr; struct bond_vlan_tag *tags; - if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { - if (strict_match && - ether_addr_equal_64bits(mac_addr, - upper->dev_addr)) { + if (is_vlan_dev(upper) && + bond->nest_level == vlan_get_encap_level(upper) - 1) { + if (upper->addr_assign_type == NET_ADDR_STOLEN) { alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); - } else if (!strict_match) { + } else { alb_send_lp_vid(slave, upper->dev_addr, vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 718e4914e3a0..1f1e97b26f95 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1738,6 +1738,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (bond_mode_uses_xmit_hash(bond)) bond_update_slave_arr(bond, NULL); + bond->nest_level = dev_get_nest_level(bond_dev); + netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", slave_dev->name, bond_is_active_slave(new_slave) ? "an active" : "a backup", diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b1779566c5bb..3c71f1cb205f 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -605,7 +605,7 @@ void can_bus_off(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); - netdev_dbg(dev, "bus-off\n"); + netdev_info(dev, "bus-off\n"); netif_carrier_off(dev); diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 634c51e6b8ae..d53a45bf2a72 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -200,6 +200,7 @@ #define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */ #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */ #define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6) /* No interrupt for error passive */ +#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN BIT(7) /* default to BE register access */ /* Structure of the message buffer */ struct flexcan_mb { @@ -288,6 +289,12 @@ struct flexcan_priv { static const struct flexcan_devtype_data fsl_p1010_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE | + FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN, +}; + +static const struct flexcan_devtype_data fsl_imx25_devtype_data = { + .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; @@ -1251,9 +1258,9 @@ static void unregister_flexcandev(struct net_device *dev) static const struct of_device_id flexcan_of_match[] = { { .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, }, { .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, }, - { .compatible = "fsl,imx53-flexcan", .data = &fsl_p1010_devtype_data, }, - { .compatible = "fsl,imx35-flexcan", .data = &fsl_p1010_devtype_data, }, - { .compatible = "fsl,imx25-flexcan", .data = &fsl_p1010_devtype_data, }, + { .compatible = "fsl,imx53-flexcan", .data = &fsl_imx25_devtype_data, }, + { .compatible = "fsl,imx35-flexcan", .data = &fsl_imx25_devtype_data, }, + { .compatible = "fsl,imx25-flexcan", .data = &fsl_imx25_devtype_data, }, { .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, }, { .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, }, { .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, }, @@ -1337,18 +1344,13 @@ static int flexcan_probe(struct platform_device *pdev) priv = netdev_priv(dev); - if (of_property_read_bool(pdev->dev.of_node, "big-endian")) { + if (of_property_read_bool(pdev->dev.of_node, "big-endian") || + devtype_data->quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) { priv->read = flexcan_read_be; priv->write = flexcan_write_be; } else { - if (of_device_is_compatible(pdev->dev.of_node, - "fsl,p1010-flexcan")) { - priv->read = flexcan_read_be; - priv->write = flexcan_write_be; - } else { - priv->read = flexcan_read_le; - priv->write = flexcan_write_le; - } + priv->read = flexcan_read_le; + priv->write = flexcan_write_le; } priv->can.clock.freq = clock_freq; diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 5590c559a8ca..53e320c92a8b 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -91,6 +91,7 @@ #define HI3110_STAT_BUSOFF BIT(2) #define HI3110_STAT_ERRP BIT(3) #define HI3110_STAT_ERRW BIT(4) +#define HI3110_STAT_TXMTY BIT(7) #define HI3110_BTR0_SJW_SHIFT 6 #define HI3110_BTR0_BRP_SHIFT 0 @@ -427,8 +428,10 @@ static int hi3110_get_berr_counter(const struct net_device *net, struct hi3110_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; + mutex_lock(&priv->hi3110_lock); bec->txerr = hi3110_read(spi, HI3110_READ_TEC); bec->rxerr = hi3110_read(spi, HI3110_READ_REC); + mutex_unlock(&priv->hi3110_lock); return 0; } @@ -735,10 +738,7 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) } } - if (intf == 0) - break; - - if (intf & HI3110_INT_TXCPLT) { + if (priv->tx_len && statf & HI3110_STAT_TXMTY) { net->stats.tx_packets++; net->stats.tx_bytes += priv->tx_len - 1; can_led_event(net, CAN_LED_EVENT_TX); @@ -748,6 +748,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) } netif_wake_queue(net); } + + if (intf == 0) + break; } mutex_unlock(&priv->hi3110_lock); return IRQ_HANDLED; diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 63587b8e6825..daed57d3d209 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1179,7 +1179,7 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev, skb = alloc_can_skb(priv->netdev, &cf); if (!skb) { - stats->tx_dropped++; + stats->rx_dropped++; return; } diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 3d2091099f7f..5b4374f21d76 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3370,6 +3370,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3391,6 +3392,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3410,6 +3412,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 8, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3431,6 +3434,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3452,6 +3456,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3472,6 +3477,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x10, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3493,6 +3499,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3514,6 +3521,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3535,6 +3543,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3557,6 +3566,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3578,6 +3588,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3600,6 +3611,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3621,6 +3633,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3641,6 +3654,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .tag_protocol = DSA_TAG_PROTO_DSA, @@ -3663,6 +3677,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3684,6 +3699,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 11, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3707,6 +3723,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3730,6 +3747,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3753,6 +3771,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3776,6 +3795,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3798,6 +3818,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x10, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3820,6 +3841,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3841,6 +3863,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3863,6 +3886,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3885,6 +3909,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3907,6 +3932,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 80490f66bc06..12b7f4649b25 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -114,6 +114,7 @@ struct mv88e6xxx_info { unsigned int num_gpio; unsigned int max_vid; unsigned int port_base_addr; + unsigned int phy_base_addr; unsigned int global1_addr; unsigned int global2_addr; unsigned int age_time_coeff; diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 0ce627fded48..8d22d66d84b7 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -1118,7 +1118,7 @@ int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip, err = irq; goto out; } - bus->irq[chip->info->port_base_addr + phy] = irq; + bus->irq[chip->info->phy_base_addr + phy] = irq; } return 0; out: diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 32f6d2e24d66..1a1a6380c128 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -95,6 +95,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) /*rss rings */ cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF); cfg->vecs = min(cfg->vecs, num_online_cpus()); + cfg->vecs = min(cfg->vecs, self->irqvecs); /* cfg->vecs should be power of 2 for RSS */ if (cfg->vecs >= 8U) cfg->vecs = 8U; @@ -246,6 +247,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->hw_features |= aq_hw_caps->hw_features; self->ndev->features = aq_hw_caps->hw_features; + self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO; self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 219b550d1665..faa533a0ec47 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -80,6 +80,7 @@ struct aq_nic_s { struct pci_dev *pdev; unsigned int msix_entry_mask; + u32 irqvecs; }; static inline struct device *aq_nic_get_dev(struct aq_nic_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index ecc6306f940f..a50e08bb4748 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -267,16 +267,16 @@ static int aq_pci_probe(struct pci_dev *pdev, numvecs = min(numvecs, num_online_cpus()); /*enable interrupts */ #if !AQ_CFG_FORCE_LEGACY_INT - err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs, - PCI_IRQ_MSIX); - - if (err < 0) { - err = pci_alloc_irq_vectors(self->pdev, 1, 1, - PCI_IRQ_MSI | PCI_IRQ_LEGACY); - if (err < 0) - goto err_hwinit; + numvecs = pci_alloc_irq_vectors(self->pdev, 1, numvecs, + PCI_IRQ_MSIX | PCI_IRQ_MSI | + PCI_IRQ_LEGACY); + + if (numvecs < 0) { + err = numvecs; + goto err_hwinit; } #endif + self->irqvecs = numvecs; /* net device init */ aq_nic_cfg_start(self); @@ -298,9 +298,9 @@ err_free_aq_hw: kfree(self->aq_hw); err_ioremap: free_netdev(ndev); -err_pci_func: - pci_release_regions(pdev); err_ndev: + pci_release_regions(pdev); +err_pci_func: pci_disable_device(pdev); return err; } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f9a3c1a76d5d..f33b25fbca63 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2144,14 +2144,21 @@ static const struct net_device_ops bcm_sysport_netdev_ops = { .ndo_select_queue = bcm_sysport_select_queue, }; -static int bcm_sysport_map_queues(struct net_device *dev, +static int bcm_sysport_map_queues(struct notifier_block *nb, struct dsa_notifier_register_info *info) { - struct bcm_sysport_priv *priv = netdev_priv(dev); struct bcm_sysport_tx_ring *ring; + struct bcm_sysport_priv *priv; struct net_device *slave_dev; unsigned int num_tx_queues; unsigned int q, start, port; + struct net_device *dev; + + priv = container_of(nb, struct bcm_sysport_priv, dsa_notifier); + if (priv->netdev != info->master) + return 0; + + dev = info->master; /* We can't be setting up queue inspection for non directly attached * switches @@ -2174,11 +2181,12 @@ static int bcm_sysport_map_queues(struct net_device *dev, if (priv->is_lite) netif_set_real_num_tx_queues(slave_dev, slave_dev->num_tx_queues / 2); + num_tx_queues = slave_dev->real_num_tx_queues; if (priv->per_port_num_tx_queues && priv->per_port_num_tx_queues != num_tx_queues) - netdev_warn(slave_dev, "asymetric number of per-port queues\n"); + netdev_warn(slave_dev, "asymmetric number of per-port queues\n"); priv->per_port_num_tx_queues = num_tx_queues; @@ -2201,7 +2209,7 @@ static int bcm_sysport_map_queues(struct net_device *dev, return 0; } -static int bcm_sysport_dsa_notifier(struct notifier_block *unused, +static int bcm_sysport_dsa_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { struct dsa_notifier_register_info *info; @@ -2211,7 +2219,7 @@ static int bcm_sysport_dsa_notifier(struct notifier_block *unused, info = ptr; - return notifier_from_errno(bcm_sysport_map_queues(info->master, info)); + return notifier_from_errno(bcm_sysport_map_queues(nb, info)); } #define REV_FMT "v%2x.%02x" diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 08bbb639be1a..9f59b1270a7c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8733,14 +8733,15 @@ static void tg3_free_consistent(struct tg3 *tp) tg3_mem_rx_release(tp); tg3_mem_tx_release(tp); - /* Protect tg3_get_stats64() from reading freed tp->hw_stats. */ - tg3_full_lock(tp, 0); + /* tp->hw_stats can be referenced safely: + * 1. under rtnl_lock + * 2. or under tp->lock if TG3_FLAG_INIT_COMPLETE is set. + */ if (tp->hw_stats) { dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats), tp->hw_stats, tp->stats_mapping); tp->hw_stats = NULL; } - tg3_full_unlock(tp); } /* @@ -14178,7 +14179,7 @@ static void tg3_get_stats64(struct net_device *dev, struct tg3 *tp = netdev_priv(dev); spin_lock_bh(&tp->lock); - if (!tp->hw_stats) { + if (!tp->hw_stats || !tg3_flag(tp, INIT_COMPLETE)) { *stats = tp->net_stats_prev; spin_unlock_bh(&tp->lock); return; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 24d2865b8806..005283c7cdfe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3433,8 +3433,8 @@ static int adap_config_hma(struct adapter *adapter) sgl = adapter->hma.sgt->sgl; node = dev_to_node(adapter->pdev_dev); for_each_sg(sgl, iter, sgt->orig_nents, i) { - newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL, - page_order); + newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL | + __GFP_ZERO, page_order); if (!newpage) { dev_err(adapter->pdev_dev, "Not enough memory for HMA page allocation\n"); @@ -5474,6 +5474,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } spin_lock_init(&adapter->mbox_lock); INIT_LIST_HEAD(&adapter->mlist.list); + adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS; pci_set_drvdata(pdev, adapter); if (func != ent->driver_data) { @@ -5508,8 +5509,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_adapter; } - adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS; - /* PCI device has been enabled */ adapter->flags |= DEV_ENABLED; memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map)); diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 4df282ed22c7..0beee2cc2ddd 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -61,7 +61,7 @@ static const char hw_stat_gstrings[][ETH_GSTRING_LEN] = { static const char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { "tx-single-collision", "tx-multiple-collision", - "tx-late-collsion", + "tx-late-collision", "tx-aborted-frames", "tx-lost-frames", "tx-carrier-sense-errors", diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 5909a4407e38..7c511f144ed6 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1014,10 +1014,10 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, desc = ICE_CTL_Q_DESC(cq->rq, ntc); desc_idx = ntc; + cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; - cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event received with error 0x%x\n", cq->rq_last_status); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 68af127987bc..cead23e3db0c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -943,8 +943,8 @@ err2: kfree(ipsec->ip_tbl); kfree(ipsec->rx_tbl); kfree(ipsec->tx_tbl); + kfree(ipsec); err1: - kfree(adapter->ipsec); netdev_err(adapter->netdev, "Unable to allocate memory for SA tables"); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 3123267dfba9..9592f3e3e42e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3427,6 +3427,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + return status; + /* Reset PHY */ if (!hw->phy.reset_disable && hw->phy.ops.reset) hw->phy.ops.reset(hw); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e3d04f226d57..850f8af95e49 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4137,7 +4137,7 @@ out_drop: return NETDEV_TX_OK; } -static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbevf_ring *tx_ring; diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 4202f9b5b966..6f410235987c 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -942,6 +942,7 @@ struct mvpp2 { struct clk *pp_clk; struct clk *gop_clk; struct clk *mg_clk; + struct clk *mg_core_clk; struct clk *axi_clk; /* List of pointers to port structures */ @@ -8768,18 +8769,27 @@ static int mvpp2_probe(struct platform_device *pdev) err = clk_prepare_enable(priv->mg_clk); if (err < 0) goto err_gop_clk; + + priv->mg_core_clk = devm_clk_get(&pdev->dev, "mg_core_clk"); + if (IS_ERR(priv->mg_core_clk)) { + priv->mg_core_clk = NULL; + } else { + err = clk_prepare_enable(priv->mg_core_clk); + if (err < 0) + goto err_mg_clk; + } } priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk"); if (IS_ERR(priv->axi_clk)) { err = PTR_ERR(priv->axi_clk); if (err == -EPROBE_DEFER) - goto err_gop_clk; + goto err_mg_core_clk; priv->axi_clk = NULL; } else { err = clk_prepare_enable(priv->axi_clk); if (err < 0) - goto err_gop_clk; + goto err_mg_core_clk; } /* Get system's tclk rate */ @@ -8793,7 +8803,7 @@ static int mvpp2_probe(struct platform_device *pdev) if (priv->hw_version == MVPP22) { err = dma_set_mask(&pdev->dev, MVPP2_DESC_DMA_MASK); if (err) - goto err_mg_clk; + goto err_axi_clk; /* Sadly, the BM pools all share the same register to * store the high 32 bits of their address. So they * must all have the same high 32 bits, which forces @@ -8801,14 +8811,14 @@ static int mvpp2_probe(struct platform_device *pdev) */ err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (err) - goto err_mg_clk; + goto err_axi_clk; } /* Initialize network controller */ err = mvpp2_init(pdev, priv); if (err < 0) { dev_err(&pdev->dev, "failed to initialize controller\n"); - goto err_mg_clk; + goto err_axi_clk; } /* Initialize ports */ @@ -8821,7 +8831,7 @@ static int mvpp2_probe(struct platform_device *pdev) if (priv->port_count == 0) { dev_err(&pdev->dev, "no ports enabled\n"); err = -ENODEV; - goto err_mg_clk; + goto err_axi_clk; } /* Statistics must be gathered regularly because some of them (like @@ -8849,8 +8859,13 @@ err_port_probe: mvpp2_port_remove(priv->port_list[i]); i++; } -err_mg_clk: +err_axi_clk: clk_disable_unprepare(priv->axi_clk); + +err_mg_core_clk: + if (priv->hw_version == MVPP22) + clk_disable_unprepare(priv->mg_core_clk); +err_mg_clk: if (priv->hw_version == MVPP22) clk_disable_unprepare(priv->mg_clk); err_gop_clk: @@ -8897,6 +8912,7 @@ static int mvpp2_remove(struct platform_device *pdev) return 0; clk_disable_unprepare(priv->axi_clk); + clk_disable_unprepare(priv->mg_core_clk); clk_disable_unprepare(priv->mg_clk); clk_disable_unprepare(priv->pp_clk); clk_disable_unprepare(priv->gop_clk); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index a30a2e95d13f..f11b45001cad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1027,6 +1027,22 @@ static int mlx4_en_set_coalesce(struct net_device *dev, if (!coal->tx_max_coalesced_frames_irq) return -EINVAL; + if (coal->tx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs_low > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs_high > MLX4_EN_MAX_COAL_TIME) { + netdev_info(dev, "%s: maximum coalesce time supported is %d usecs\n", + __func__, MLX4_EN_MAX_COAL_TIME); + return -ERANGE; + } + + if (coal->tx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS || + coal->rx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS) { + netdev_info(dev, "%s: maximum coalesced frames supported is %d\n", + __func__, MLX4_EN_MAX_COAL_PKTS); + return -ERANGE; + } + priv->rx_frames = (coal->rx_max_coalesced_frames == MLX4_EN_AUTO_CONF) ? MLX4_EN_RX_COAL_TARGET : diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e0adac4a9a19..9670b33fc9b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3324,12 +3324,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_ring[t]) { err = -ENOMEM; - goto err_free_tx; + goto out; } priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_cq[t]) { - kfree(priv->tx_ring[t]); err = -ENOMEM; goto out; } @@ -3582,11 +3581,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, return 0; -err_free_tx: - while (t--) { - kfree(priv->tx_ring[t]); - kfree(priv->tx_cq[t]); - } out: mlx4_en_destroy_netdev(dev); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index bfef69235d71..211578ffc70d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1317,7 +1317,7 @@ static int mlx4_mf_unbond(struct mlx4_dev *dev) ret = mlx4_unbond_fs_rules(dev); if (ret) - mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret); + mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret); ret1 = mlx4_unbond_mac_table(dev); if (ret1) { mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f7c81133594f..ace6545f82e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -132,6 +132,9 @@ #define MLX4_EN_TX_COAL_PKTS 16 #define MLX4_EN_TX_COAL_TIME 0x10 +#define MLX4_EN_MAX_COAL_PKTS U16_MAX +#define MLX4_EN_MAX_COAL_TIME U16_MAX + #define MLX4_EN_RX_RATE_LOW 400000 #define MLX4_EN_RX_COAL_TIME_LOW 0 #define MLX4_EN_RX_RATE_HIGH 450000 @@ -552,8 +555,8 @@ struct mlx4_en_priv { u16 rx_usecs_low; u32 pkt_rate_high; u16 rx_usecs_high; - u16 sample_interval; - u16 adaptive_rx_coal; + u32 sample_interval; + u32 adaptive_rx_coal; u32 msg_enable; u32 loopback_ok; u32 validate_loopback; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 3d46ef48d5b8..c641d5656b2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1007,12 +1007,14 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto out; - new_channels.params = priv->channels.params; mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + /* Skip if tx_min_inline is the same */ if (new_channels.params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index d8f68e4d1018..876c3e4c6193 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -877,13 +877,14 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { }; static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, u16 mtu) { u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->sw_mtu = mtu; params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE; @@ -931,7 +932,7 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, priv->channels.params.num_channels = profile->max_nch(mdev); - mlx5e_build_rep_params(mdev, &priv->channels.params); + mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); mlx5e_build_rep_netdev(netdev); mlx5e_timestamp_init(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 707976482c09..027f54ac1ca2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -290,7 +290,7 @@ static int mlx5e_test_loopback(struct mlx5e_priv *priv) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { netdev_err(priv->netdev, - "\tCan't perform loobpack test while device is down\n"); + "\tCan't perform loopback test while device is down\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4197001f9801..b94276db3ce9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1261,6 +1261,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, f->mask); addr_type = key->addr_type; + /* the HW doesn't support frag first/later */ + if (mask->flags & FLOW_DIS_FIRST_FRAG) + return -EOPNOTSUPP; + if (mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, @@ -1864,7 +1868,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); - if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + if (modify_ip_header && ip_proto != IPPROTO_TCP && + ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { pr_info("can't offload re-write of ip proto %d\n", ip_proto); return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 20297108528a..5532aa3675c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -255,7 +255,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dma_addr = dma_map_single(sq->pdev, skb_data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - return -ENOMEM; + goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; @@ -273,7 +273,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - return -ENOMEM; + goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; @@ -285,6 +285,10 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, } return num_dma; + +dma_unmap_wqe_err: + mlx5e_dma_unmap_wqe_err(sq, num_dma); + return -ENOMEM; } static inline void @@ -380,17 +384,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, (struct mlx5_wqe_data_seg *)cseg + ds_cnt); if (unlikely(num_dma < 0)) - goto dma_unmap_wqe_err; + goto err_drop; mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, num_bytes, num_dma, wi, cseg); return NETDEV_TX_OK; -dma_unmap_wqe_err: +err_drop: sq->stats.dropped++; - mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); - dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -645,17 +647,15 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, (struct mlx5_wqe_data_seg *)cseg + ds_cnt); if (unlikely(num_dma < 0)) - goto dma_unmap_wqe_err; + goto err_drop; mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, num_bytes, num_dma, wi, cseg); return NETDEV_TX_OK; -dma_unmap_wqe_err: +err_drop: sq->stats.dropped++; - mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); - dev_kfree_skb_any(skb); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c1c94974e16b..1814f803bd2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -34,6 +34,9 @@ #include <linux/module.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif #include "mlx5_core.h" #include "fpga/core.h" #include "eswitch.h" @@ -923,3 +926,28 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, MLX5_SET(query_eq_in, in, eq_number, eq->eqn); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq; + +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif + list_for_each_entry(eq, &table->comp_eqs_list, list) + free_irq(eq->irqn, eq); + + free_irq(table->pages_eq.irqn, &table->pages_eq); + free_irq(table->async_eq.irqn, &table->async_eq); + free_irq(table->cmd_eq.irqn, &table->cmd_eq); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) + free_irq(table->pfault_eq.irqn, &table->pfault_eq); +#endif + pci_free_irq_vectors(dev->pdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 332bc56306bf..1352d13eedb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2175,26 +2175,35 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, memset(vf_stats, 0, sizeof(*vf_stats)); vf_stats->rx_packets = MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_ib_unicast.packets) + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets) + MLX5_GET_CTR(out, received_eth_broadcast.packets); vf_stats->rx_bytes = MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_ib_unicast.octets) + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_ib_multicast.octets) + MLX5_GET_CTR(out, received_eth_broadcast.octets); vf_stats->tx_packets = MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); vf_stats->tx_bytes = MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); vf_stats->multicast = - MLX5_GET_CTR(out, received_eth_multicast.packets); + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets); vf_stats->broadcast = MLX5_GET_CTR(out, received_eth_broadcast.packets); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index de51e7c39bc8..c39c1692e674 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -187,6 +187,7 @@ static void del_sw_ns(struct fs_node *node); static void del_sw_hw_rule(struct fs_node *node); static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2); +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns); static struct mlx5_flow_rule * find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); @@ -481,7 +482,8 @@ static void del_sw_hw_rule(struct fs_node *node) if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; update_fte = true; goto out; @@ -2351,23 +2353,27 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) static int init_root_ns(struct mlx5_flow_steering *steering) { + int err; + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); if (!steering->root_ns) - goto cleanup; + return -ENOMEM; - if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) - goto cleanup; + err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node); + if (err) + goto out_err; set_prio_attrs(steering->root_ns); - - if (create_anchor_flow_table(steering)) - goto cleanup; + err = create_anchor_flow_table(steering); + if (err) + goto out_err; return 0; -cleanup: - mlx5_cleanup_fs(steering->dev); - return -ENOMEM; +out_err: + cleanup_root_ns(steering->root_ns); + steering->root_ns = NULL; + return err; } static void clean_tree(struct fs_node *node) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 63a8ea31601c..e2c465b0b3f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1587,6 +1587,14 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) mlx5_enter_error_state(dev, true); + /* Some platforms requiring freeing the IRQ's in the shutdown + * flow. If they aren't freed they can't be allocated after + * kexec. There is no need to cleanup the mlx5_core software + * contexts. + */ + mlx5_irq_clear_affinity_hints(dev); + mlx5_core_eq_free_irqs(dev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7d001fe6e631..023882d9a22e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -128,6 +128,8 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 *out, int outlen); int mlx5_start_eqs(struct mlx5_core_dev *dev); void mlx5_stop_eqs(struct mlx5_core_dev *dev); +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); void mlx5_cq_tasklet_cb(unsigned long data); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 93ea56620a24..e13ac3b8dff7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1100,11 +1100,11 @@ err_emad_init: err_alloc_lag_mapping: mlxsw_ports_fini(mlxsw_core); err_ports_init: - mlxsw_bus->fini(bus_priv); -err_bus_init: if (!reload) devlink_resources_unregister(devlink, NULL); err_register_resources: + mlxsw_bus->fini(bus_priv); +err_bus_init: if (!reload) devlink_free(devlink); err_devlink_alloc: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c11c9a635866..4ed01182a82c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1718,13 +1718,11 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *dev = mlxsw_sp_port->dev; int err; - if (bridge_port->bridge_device->multicast_enabled) { - if (bridge_port->bridge_device->multicast_enabled) { - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, - false); - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); - } + if (bridge_port->bridge_device->multicast_enabled && + !bridge_port->mrouter) { + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); } err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index b3567a596fc1..80df9a5d4217 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -183,17 +183,21 @@ static int nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, const struct tc_action *action, struct nfp_fl_pre_tunnel *pre_tun, - enum nfp_flower_tun_type tun_type) + enum nfp_flower_tun_type tun_type, + struct net_device *netdev) { size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + struct net *net; if (ip_tun->options_len) return -EOPNOTSUPP; + net = dev_net(netdev); + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -204,6 +208,7 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; + set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; @@ -511,7 +516,8 @@ nfp_flower_loop_action(const struct tc_action *a, *a_len += sizeof(struct nfp_fl_pre_tunnel); set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type); + err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, + netdev); if (err) return err; *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index b6c0fd053a50..bee4367a2c38 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -190,7 +190,10 @@ struct nfp_fl_set_ipv4_udp_tun { __be16 reserved; __be64 tun_id __packed; __be32 tun_type_index; - __be32 extra[3]; + __be16 reserved2; + u8 ttl; + u8 reserved3; + __be32 extra[2]; }; /* Metadata with L2 (1W/4B) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index ad02592a82b7..84e3b9f5abb1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -52,8 +52,6 @@ #define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL -#define NFP_FLOWER_FRAME_HEADROOM 158 - static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn) { return "FLOWER"; @@ -360,7 +358,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) } SET_NETDEV_DEV(repr, &priv->nn->pdev->dev); - nfp_net_get_mac_addr(app->pf, port); + nfp_net_get_mac_addr(app->pf, repr, port); cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port); err = nfp_repr_init(app, repr, @@ -559,22 +557,6 @@ static void nfp_flower_clean(struct nfp_app *app) app->priv = NULL; } -static int -nfp_flower_check_mtu(struct nfp_app *app, struct net_device *netdev, - int new_mtu) -{ - /* The flower fw reserves NFP_FLOWER_FRAME_HEADROOM bytes of the - * supported max MTU to allow for appending tunnel headers. To prevent - * unexpected behaviour this needs to be accounted for. - */ - if (new_mtu > netdev->max_mtu - NFP_FLOWER_FRAME_HEADROOM) { - nfp_err(app->cpp, "New MTU (%d) is not valid\n", new_mtu); - return -EINVAL; - } - - return 0; -} - static bool nfp_flower_check_ack(struct nfp_flower_priv *app_priv) { bool ret; @@ -656,7 +638,6 @@ const struct nfp_app_type app_flower = { .init = nfp_flower_init, .clean = nfp_flower_clean, - .check_mtu = nfp_flower_check_mtu, .repr_change_mtu = nfp_flower_repr_change_mtu, .vnic_alloc = nfp_flower_vnic_alloc, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c index 2a2f2fbc8850..b9618c37403f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c @@ -69,7 +69,7 @@ int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, if (err) return err < 0 ? err : 0; - nfp_net_get_mac_addr(app->pf, nn->port); + nfp_net_get_mac_addr(app->pf, nn->dp.netdev, nn->port); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index add46e28212b..42211083b51f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -171,7 +171,9 @@ void nfp_net_pci_remove(struct nfp_pf *pf); int nfp_hwmon_register(struct nfp_pf *pf); void nfp_hwmon_unregister(struct nfp_pf *pf); -void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port); +void +nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev, + struct nfp_port *port); bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 15fa47f622aa..45cd2092e498 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -67,23 +67,26 @@ /** * nfp_net_get_mac_addr() - Get the MAC address. * @pf: NFP PF handle + * @netdev: net_device to set MAC address on * @port: NFP port structure * * First try to get the MAC address from NSP ETH table. If that * fails generate a random address. */ -void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port) +void +nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev, + struct nfp_port *port) { struct nfp_eth_table_port *eth_port; eth_port = __nfp_port_get_eth_port(port); if (!eth_port) { - eth_hw_addr_random(port->netdev); + eth_hw_addr_random(netdev); return; } - ether_addr_copy(port->netdev->dev_addr, eth_port->mac_addr); - ether_addr_copy(port->netdev->perm_addr, eth_port->mac_addr); + ether_addr_copy(netdev->dev_addr, eth_port->mac_addr); + ether_addr_copy(netdev->perm_addr, eth_port->mac_addr); } static struct nfp_eth_table_port * @@ -511,16 +514,18 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf) return PTR_ERR(mem); } - min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1); - pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats", - "net.macstats", min_size, - &pf->mac_stats_bar); - if (IS_ERR(pf->mac_stats_mem)) { - if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) { - err = PTR_ERR(pf->mac_stats_mem); - goto err_unmap_ctrl; + if (pf->eth_tbl) { + min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1); + pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats", + "net.macstats", min_size, + &pf->mac_stats_bar); + if (IS_ERR(pf->mac_stats_mem)) { + if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) { + err = PTR_ERR(pf->mac_stats_mem); + goto err_unmap_ctrl; + } + pf->mac_stats_mem = NULL; } - pf->mac_stats_mem = NULL; } pf->vf_cfg_mem = nfp_net_pf_map_rtsym(pf, "net.vfcfg", diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 27364b7572fc..b092894dd128 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1170,7 +1170,7 @@ static void *nixge_get_nvmem_address(struct device *dev) cell = nvmem_cell_get(dev, "address"); if (IS_ERR(cell)) - return cell; + return NULL; mac = nvmem_cell_read(cell, &cell_size); nvmem_cell_put(cell); @@ -1183,7 +1183,7 @@ static int nixge_probe(struct platform_device *pdev) struct nixge_priv *priv; struct net_device *ndev; struct resource *dmares; - const char *mac_addr; + const u8 *mac_addr; int err; ndev = alloc_etherdev(sizeof(*priv)); @@ -1202,10 +1202,12 @@ static int nixge_probe(struct platform_device *pdev) ndev->max_mtu = NIXGE_JUMBO_MTU; mac_addr = nixge_get_nvmem_address(&pdev->dev); - if (mac_addr && is_valid_ether_addr(mac_addr)) + if (mac_addr && is_valid_ether_addr(mac_addr)) { ether_addr_copy(ndev->dev_addr, mac_addr); - else + kfree(mac_addr); + } else { eth_hw_addr_random(ndev); + } priv = netdev_priv(ndev); priv->ndev = ndev; diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e874504e8b28..8667799d0069 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -115,8 +115,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn) void qed_l2_setup(struct qed_hwfn *p_hwfn) { - if (p_hwfn->hw_info.personality != QED_PCI_ETH && - p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + if (!QED_IS_L2_PERSONALITY(p_hwfn)) return; mutex_init(&p_hwfn->p_l2_info->lock); @@ -126,8 +125,7 @@ void qed_l2_free(struct qed_hwfn *p_hwfn) { u32 i; - if (p_hwfn->hw_info.personality != QED_PCI_ETH && - p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + if (!QED_IS_L2_PERSONALITY(p_hwfn)) return; if (!p_hwfn->p_l2_info) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 74fc626b1ec1..38502815d681 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2370,7 +2370,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) u8 flags = 0; if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { - DP_INFO(cdev, "Cannot transmit a checksumed packet\n"); + DP_INFO(cdev, "Cannot transmit a checksummed packet\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 9854aa9139af..7870ae2a6f7e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -680,7 +680,7 @@ static int qed_nic_stop(struct qed_dev *cdev) tasklet_disable(p_hwfn->sp_dpc); p_hwfn->b_sp_dpc_enabled = false; DP_VERBOSE(cdev, NETIF_MSG_IFDOWN, - "Disabled sp taskelt [hwfn %d] at %p\n", + "Disabled sp tasklet [hwfn %d] at %p\n", i, p_hwfn->sp_dpc); } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index fb7c2d1562ae..6acfd43c1a4f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -848,7 +848,7 @@ int qed_roce_query_qp(struct qed_hwfn *p_hwfn, if (!(qp->resp_offloaded)) { DP_NOTICE(p_hwfn, - "The responder's qp should be offloded before requester's\n"); + "The responder's qp should be offloaded before requester's\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 50b142fad6b8..1900bf7e67d1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -238,7 +238,7 @@ qede_rdma_get_free_event_node(struct qede_dev *edev) } if (!found) { - event_node = kzalloc(sizeof(*event_node), GFP_KERNEL); + event_node = kzalloc(sizeof(*event_node), GFP_ATOMIC); if (!event_node) { DP_NOTICE(edev, "qedr: Could not allocate memory for rdma work\n"); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index d24b47b8e0b2..d118da5a10a2 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2224,7 +2224,7 @@ static void rtl8139_poll_controller(struct net_device *dev) struct rtl8139_private *tp = netdev_priv(dev); const int irq = tp->pci_dev->irq; - disable_irq(irq); + disable_irq_nosync(irq); rtl8139_interrupt(irq, dev); enable_irq(irq); } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 604ae78381ae..c7aac1fc99e8 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4981,6 +4981,9 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) static void rtl_pll_power_up(struct rtl8169_private *tp) { rtl_generic_op(tp, tp->pll_power_ops.up); + + /* give MAC/PHY some time to resume */ + msleep(20); } static void rtl_init_pll_power_ops(struct rtl8169_private *tp) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 63036d9bf3e6..d90a7b1f4088 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4784,8 +4784,9 @@ expire: * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that * the rule is not removed by efx_rps_hash_del() below. */ - ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, - filter_idx, true) == 0; + if (ret) + ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, + filter_idx, true) == 0; /* While we can't safely dereference rule (we dropped the lock), we can * still test it for NULL. */ diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 64a94f242027..d2e254f2f72b 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -839,6 +839,8 @@ static void efx_filter_rfs_work(struct work_struct *data) int rc; rc = efx->type->filter_insert(efx, &req->spec, true); + if (rc >= 0) + rc %= efx->type->max_rx_ip_filters; if (efx->rps_hash_table) { spin_lock_bh(&efx->rps_hash_lock); rule = efx_rps_hash_find(efx, &req->spec); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index f081de4f38d7..88c12474a0c3 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3443,7 +3443,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, len = (val & RCR_ENTRY_L2_LEN) >> RCR_ENTRY_L2_LEN_SHIFT; - len -= ETH_FCS_LEN; + append_size = len + ETH_HLEN + ETH_FCS_LEN; addr = (val & RCR_ENTRY_PKT_BUF_ADDR) << RCR_ENTRY_PKT_BUF_ADDR_SHIFT; @@ -3453,7 +3453,6 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, RCR_ENTRY_PKTBUFSZ_SHIFT]; off = addr & ~PAGE_MASK; - append_size = rcr_size; if (num_rcr == 1) { int ptype; @@ -3466,7 +3465,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, else skb_checksum_none_assert(skb); } else if (!(val & RCR_ENTRY_MULTI)) - append_size = len - skb->len; + append_size = append_size - skb->len; niu_rx_skb_append(skb, page, off, append_size, rcr_size); if ((page->index + rp->rbr_block_size) - rcr_size == addr) { diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 74f828412055..28d893b93d30 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1340,6 +1340,8 @@ static inline void cpsw_add_dual_emac_def_ale_entries( cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, slave->port_vlan); + cpsw_ale_control_set(cpsw->ale, slave_port, + ALE_PORT_DROP_UNKNOWN_VLAN, 1); } static void soft_reset_slave(struct cpsw_slave *slave) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index ecc84954c511..da07ccdf84bf 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1840,7 +1840,8 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto rx_handler_failed; } - ret = netdev_upper_dev_link(vf_netdev, ndev, NULL); + ret = netdev_master_upper_dev_link(vf_netdev, ndev, + NULL, NULL, NULL); if (ret != 0) { netdev_err(vf_netdev, "can not set master device %s (err = %d)\n", diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 6b127be781d9..e7ca5b5f39ed 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1288,7 +1288,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, rndis_device->link_state ? "down" : "up"); if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) - return net_device; + goto out; rndis_filter_query_link_speed(rndis_device, net_device); diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 9fb9b565a002..4f684cbcdc57 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -1045,7 +1045,7 @@ static int atusb_probe(struct usb_interface *interface, atusb->tx_dr.bRequest = ATUSB_TX; atusb->tx_dr.wValue = cpu_to_le16(0); - atusb->tx_urb = usb_alloc_urb(0, GFP_ATOMIC); + atusb->tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!atusb->tx_urb) goto fail; diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 55a22c761808..de0d7f28a181 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1267,7 +1267,7 @@ mcr20a_probe(struct spi_device *spi) ret = mcr20a_get_platform_data(spi, pdata); if (ret < 0) { dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n"); - return ret; + goto free_pdata; } /* init reset gpio */ @@ -1275,7 +1275,7 @@ mcr20a_probe(struct spi_device *spi) ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio, GPIOF_OUT_INIT_HIGH, "reset"); if (ret) - return ret; + goto free_pdata; } /* reset mcr20a */ @@ -1291,7 +1291,8 @@ mcr20a_probe(struct spi_device *spi) hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops); if (!hw) { dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto free_pdata; } /* init mcr20a local data */ @@ -1308,8 +1309,10 @@ mcr20a_probe(struct spi_device *spi) /* init buf */ lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL); - if (!lp->buf) - return -ENOMEM; + if (!lp->buf) { + ret = -ENOMEM; + goto free_dev; + } mcr20a_setup_tx_spi_messages(lp); mcr20a_setup_rx_spi_messages(lp); @@ -1366,6 +1369,8 @@ mcr20a_probe(struct spi_device *spi) free_dev: ieee802154_free_hw(lp->hw); +free_pdata: + kfree(pdata); return ret; } diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3bb6b66dc7bf..f9c25912eb98 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -720,6 +720,15 @@ static struct phy_driver broadcom_drivers[] = { .get_strings = bcm_phy_get_strings, .get_stats = bcm53xx_phy_get_stats, .probe = bcm53xx_phy_probe, +}, { + .phy_id = PHY_ID_BCM89610, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM89610", + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .ack_interrupt = bcm_phy_ack_intr, + .config_intr = bcm_phy_config_intr, } }; module_phy_driver(broadcom_drivers); @@ -741,6 +750,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCMAC131, 0xfffffff0 }, { PHY_ID_BCM5241, 0xfffffff0 }, { PHY_ID_BCM5395, 0xfffffff0 }, + { PHY_ID_BCM89610, 0xfffffff0 }, { } }; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ac23322a32e1..9e4ba8e80a18 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -535,8 +535,17 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, /* Grab the bits from PHYIR1, and put them in the upper half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID1); - if (phy_reg < 0) + if (phy_reg < 0) { + /* if there is no device, return without an error so scanning + * the bus works properly + */ + if (phy_reg == -EIO || phy_reg == -ENODEV) { + *phy_id = 0xffffffff; + return 0; + } + return -EIO; + } *phy_id = (phy_reg & 0xffff) << 16; diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 0381da78d228..fd6c23f69c2f 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -125,7 +125,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, if (id->base.br_nominal) { if (id->base.br_nominal != 255) { br_nom = id->base.br_nominal * 100; - br_min = br_nom + id->base.br_nominal * id->ext.br_min; + br_min = br_nom - id->base.br_nominal * id->ext.br_min; br_max = br_nom + id->base.br_nominal * id->ext.br_max; } else if (id->ext.br_max) { br_nom = 250 * id->ext.br_max; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c853e7410f5a..42565dd33aa6 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1098,6 +1098,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9080, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9083, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, + {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */ {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ @@ -1343,6 +1344,18 @@ static int qmi_wwan_probe(struct usb_interface *intf, id->driver_info = (unsigned long)&qmi_wwan_info; } + /* There are devices where the same interface number can be + * configured as different functions. We should only bind to + * vendor specific functions when matching on interface number + */ + if (id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER && + desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) { + dev_dbg(&intf->dev, + "Rejecting interface number match for class %02x\n", + desc->bInterfaceClass); + return -ENODEV; + } + /* Quectel EC20 quirk where we've QMI on interface 4 instead of 0 */ if (quectel_ec20_detected(intf) && desc->bInterfaceNumber == 0) { dev_dbg(&intf->dev, "Quectel EC20 quirk, skipping interface 0\n"); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index 9277f4c2bfeb..94e177d7c9b5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -459,7 +459,7 @@ static void brcmf_fw_free_request(struct brcmf_fw_request *req) kfree(req); } -static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) +static int brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) { struct brcmf_fw *fwctx = ctx; struct brcmf_fw_item *cur; @@ -498,13 +498,10 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) brcmf_dbg(TRACE, "nvram %p len %d\n", nvram, nvram_length); cur->nv_data.data = nvram; cur->nv_data.len = nvram_length; - return; + return 0; fail: - brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev)); - fwctx->done(fwctx->dev, -ENOENT, NULL); - brcmf_fw_free_request(fwctx->req); - kfree(fwctx); + return -ENOENT; } static int brcmf_fw_request_next_item(struct brcmf_fw *fwctx, bool async) @@ -553,20 +550,27 @@ static void brcmf_fw_request_done(const struct firmware *fw, void *ctx) brcmf_dbg(TRACE, "enter: firmware %s %sfound\n", cur->path, fw ? "" : "not "); - if (fw) { - if (cur->type == BRCMF_FW_TYPE_BINARY) - cur->binary = fw; - else if (cur->type == BRCMF_FW_TYPE_NVRAM) - brcmf_fw_request_nvram_done(fw, fwctx); - else - release_firmware(fw); - } else if (cur->type == BRCMF_FW_TYPE_NVRAM) { - brcmf_fw_request_nvram_done(NULL, fwctx); - } else if (!(cur->flags & BRCMF_FW_REQF_OPTIONAL)) { + if (!fw) ret = -ENOENT; + + switch (cur->type) { + case BRCMF_FW_TYPE_NVRAM: + ret = brcmf_fw_request_nvram_done(fw, fwctx); + break; + case BRCMF_FW_TYPE_BINARY: + cur->binary = fw; + break; + default: + /* something fishy here so bail out early */ + brcmf_err("unknown fw type: %d\n", cur->type); + release_firmware(fw); + ret = -EINVAL; goto fail; } + if (ret < 0 && !(cur->flags & BRCMF_FW_REQF_OPTIONAL)) + goto fail; + do { if (++fwctx->curpos == fwctx->req->n_items) { ret = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index 7af3a0f51b77..a17c4a79b8d4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -30,7 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -749,13 +750,9 @@ struct iwl_scan_req_umac { } __packed; #define IWL_SCAN_REQ_UMAC_SIZE_V8 sizeof(struct iwl_scan_req_umac) -#define IWL_SCAN_REQ_UMAC_SIZE_V7 (sizeof(struct iwl_scan_req_umac) - \ - 4 * sizeof(u8)) -#define IWL_SCAN_REQ_UMAC_SIZE_V6 (sizeof(struct iwl_scan_req_umac) - \ - 2 * sizeof(u8) - sizeof(__le16)) -#define IWL_SCAN_REQ_UMAC_SIZE_V1 (sizeof(struct iwl_scan_req_umac) - \ - 2 * sizeof(__le32) - 2 * sizeof(u8) - \ - sizeof(__le16)) +#define IWL_SCAN_REQ_UMAC_SIZE_V7 48 +#define IWL_SCAN_REQ_UMAC_SIZE_V6 44 +#define IWL_SCAN_REQ_UMAC_SIZE_V1 36 /** * struct iwl_umac_scan_abort diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 8928613e033e..ca0174680af9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -76,6 +76,7 @@ #include "iwl-io.h" #include "iwl-csr.h" #include "fw/acpi.h" +#include "fw/api/nvm-reg.h" /* NVM offsets (in words) definitions */ enum nvm_offsets { @@ -146,8 +147,8 @@ static const u8 iwl_ext_nvm_channels[] = { 149, 153, 157, 161, 165, 169, 173, 177, 181 }; -#define IWL_NUM_CHANNELS ARRAY_SIZE(iwl_nvm_channels) -#define IWL_NUM_CHANNELS_EXT ARRAY_SIZE(iwl_ext_nvm_channels) +#define IWL_NVM_NUM_CHANNELS ARRAY_SIZE(iwl_nvm_channels) +#define IWL_NVM_NUM_CHANNELS_EXT ARRAY_SIZE(iwl_ext_nvm_channels) #define NUM_2GHZ_CHANNELS 14 #define NUM_2GHZ_CHANNELS_EXT 14 #define FIRST_2GHZ_HT_MINUS 5 @@ -301,11 +302,11 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, const u8 *nvm_chan; if (cfg->nvm_type != IWL_NVM_EXT) { - num_of_ch = IWL_NUM_CHANNELS; + num_of_ch = IWL_NVM_NUM_CHANNELS; nvm_chan = &iwl_nvm_channels[0]; num_2ghz_channels = NUM_2GHZ_CHANNELS; } else { - num_of_ch = IWL_NUM_CHANNELS_EXT; + num_of_ch = IWL_NVM_NUM_CHANNELS_EXT; nvm_chan = &iwl_ext_nvm_channels[0]; num_2ghz_channels = NUM_2GHZ_CHANNELS_EXT; } @@ -720,12 +721,12 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, if (cfg->nvm_type != IWL_NVM_EXT) data = kzalloc(sizeof(*data) + sizeof(struct ieee80211_channel) * - IWL_NUM_CHANNELS, + IWL_NVM_NUM_CHANNELS, GFP_KERNEL); else data = kzalloc(sizeof(*data) + sizeof(struct ieee80211_channel) * - IWL_NUM_CHANNELS_EXT, + IWL_NVM_NUM_CHANNELS_EXT, GFP_KERNEL); if (!data) return NULL; @@ -842,24 +843,34 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u8 *nvm_chan, return flags; } +struct regdb_ptrs { + struct ieee80211_wmm_rule *rule; + u32 token; +}; + struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, - int num_of_ch, __le32 *channels, u16 fw_mcc) + int num_of_ch, __le32 *channels, u16 fw_mcc, + u16 geo_info) { int ch_idx; u16 ch_flags; u32 reg_rule_flags, prev_reg_rule_flags = 0; const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ? iwl_ext_nvm_channels : iwl_nvm_channels; - struct ieee80211_regdomain *regd; - int size_of_regd; + struct ieee80211_regdomain *regd, *copy_rd; + int size_of_regd, regd_to_copy, wmms_to_copy; + int size_of_wmms = 0; struct ieee80211_reg_rule *rule; + struct ieee80211_wmm_rule *wmm_rule, *d_wmm, *s_wmm; + struct regdb_ptrs *regdb_ptrs; enum nl80211_band band; int center_freq, prev_center_freq = 0; - int valid_rules = 0; + int valid_rules = 0, n_wmms = 0; + int i; bool new_rule; int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ? - IWL_NUM_CHANNELS_EXT : IWL_NUM_CHANNELS; + IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS; if (WARN_ON_ONCE(num_of_ch > NL80211_MAX_SUPP_REG_RULES)) return ERR_PTR(-EINVAL); @@ -875,10 +886,26 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, sizeof(struct ieee80211_regdomain) + num_of_ch * sizeof(struct ieee80211_reg_rule); - regd = kzalloc(size_of_regd, GFP_KERNEL); + if (geo_info & GEO_WMM_ETSI_5GHZ_INFO) + size_of_wmms = + num_of_ch * sizeof(struct ieee80211_wmm_rule); + + regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL); if (!regd) return ERR_PTR(-ENOMEM); + regdb_ptrs = kcalloc(num_of_ch, sizeof(*regdb_ptrs), GFP_KERNEL); + if (!regdb_ptrs) { + copy_rd = ERR_PTR(-ENOMEM); + goto out; + } + + /* set alpha2 from FW. */ + regd->alpha2[0] = fw_mcc >> 8; + regd->alpha2[1] = fw_mcc & 0xff; + + wmm_rule = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); + for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) { ch_flags = (u16)__le32_to_cpup(channels + ch_idx); band = (ch_idx < NUM_2GHZ_CHANNELS) ? @@ -927,14 +954,66 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, iwl_nvm_print_channel_flags(dev, IWL_DL_LAR, nvm_chan[ch_idx], ch_flags); + + if (!(geo_info & GEO_WMM_ETSI_5GHZ_INFO) || + band == NL80211_BAND_2GHZ) + continue; + + if (!reg_query_regdb_wmm(regd->alpha2, center_freq, + ®db_ptrs[n_wmms].token, wmm_rule)) { + /* Add only new rules */ + for (i = 0; i < n_wmms; i++) { + if (regdb_ptrs[i].token == + regdb_ptrs[n_wmms].token) { + rule->wmm_rule = regdb_ptrs[i].rule; + break; + } + } + if (i == n_wmms) { + rule->wmm_rule = wmm_rule; + regdb_ptrs[n_wmms++].rule = wmm_rule; + wmm_rule++; + } + } } regd->n_reg_rules = valid_rules; + regd->n_wmm_rules = n_wmms; - /* set alpha2 from FW. */ - regd->alpha2[0] = fw_mcc >> 8; - regd->alpha2[1] = fw_mcc & 0xff; + /* + * Narrow down regdom for unused regulatory rules to prevent hole + * between reg rules to wmm rules. + */ + regd_to_copy = sizeof(struct ieee80211_regdomain) + + valid_rules * sizeof(struct ieee80211_reg_rule); + + wmms_to_copy = sizeof(struct ieee80211_wmm_rule) * n_wmms; + + copy_rd = kzalloc(regd_to_copy + wmms_to_copy, GFP_KERNEL); + if (!copy_rd) { + copy_rd = ERR_PTR(-ENOMEM); + goto out; + } + + memcpy(copy_rd, regd, regd_to_copy); + memcpy((u8 *)copy_rd + regd_to_copy, (u8 *)regd + size_of_regd, + wmms_to_copy); + + d_wmm = (struct ieee80211_wmm_rule *)((u8 *)copy_rd + regd_to_copy); + s_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); + + for (i = 0; i < regd->n_reg_rules; i++) { + if (!regd->reg_rules[i].wmm_rule) + continue; + + copy_rd->reg_rules[i].wmm_rule = d_wmm + + (regd->reg_rules[i].wmm_rule - s_wmm) / + sizeof(struct ieee80211_wmm_rule); + } - return regd; +out: + kfree(regdb_ptrs); + kfree(regd); + return copy_rd; } IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index 306736c7a042..3071a23b7606 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -101,12 +101,14 @@ void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg, * * This function parses the regulatory channel data received as a * MCC_UPDATE_CMD command. It returns a newly allocation regulatory domain, - * to be fed into the regulatory core. An ERR_PTR is returned on error. + * to be fed into the regulatory core. In case the geo_info is set handle + * accordingly. An ERR_PTR is returned on error. * If not given to the regulatory core, the user is responsible for freeing * the regdomain returned here with kfree. */ struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, - int num_of_ch, __le32 *channels, u16 fw_mcc); + int num_of_ch, __le32 *channels, u16 fw_mcc, + u16 geo_info); #endif /* __iwl_nvm_parse_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 51b30424575b..90f8c89ea59c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -311,7 +311,8 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy, regd = iwl_parse_nvm_mcc_info(mvm->trans->dev, mvm->cfg, __le32_to_cpu(resp->n_channels), resp->channels, - __le16_to_cpu(resp->mcc)); + __le16_to_cpu(resp->mcc), + __le16_to_cpu(resp->geo_info)); /* Store the return source id */ src_id = resp->source_id; kfree(resp); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 96d26cfae90b..4a017a0d71ea 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3236,6 +3236,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) GENL_SET_ERR_MSG(info,"MAC is no valid source addr"); NL_SET_BAD_ATTR(info->extack, info->attrs[HWSIM_ATTR_PERM_ADDR]); + kfree(hwname); return -EINVAL; } diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c index 8b6b07a936f5..b026e80940a4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c @@ -158,16 +158,6 @@ static u8 halbtc_get_wifi_central_chnl(struct btc_coexist *btcoexist) static u8 rtl_get_hwpg_single_ant_path(struct rtl_priv *rtlpriv) { - struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params; - - /* override ant_num / ant_path */ - if (mod_params->ant_sel) { - rtlpriv->btcoexist.btc_info.ant_num = - (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1); - - rtlpriv->btcoexist.btc_info.single_ant_path = - (mod_params->ant_sel == 1 ? 0 : 1); - } return rtlpriv->btcoexist.btc_info.single_ant_path; } @@ -178,7 +168,6 @@ static u8 rtl_get_hwpg_bt_type(struct rtl_priv *rtlpriv) static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv) { - struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params; u8 num; if (rtlpriv->btcoexist.btc_info.ant_num == ANT_X2) @@ -186,10 +175,6 @@ static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv) else num = 1; - /* override ant_num / ant_path */ - if (mod_params->ant_sel) - num = (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1) + 1; - return num; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index e7bbbc95cdb1..b4f3f91b590e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -848,6 +848,9 @@ static bool _rtl8723be_init_mac(struct ieee80211_hw *hw) return false; } + if (rtlpriv->cfg->ops->get_btc_status()) + rtlpriv->btcoexist.btc_ops->btc_power_on_setting(rtlpriv); + bytetmp = rtl_read_byte(rtlpriv, REG_MULTI_FUNC_CTRL); rtl_write_byte(rtlpriv, REG_MULTI_FUNC_CTRL, bytetmp | BIT(3)); @@ -2696,21 +2699,21 @@ void rtl8723be_read_bt_coexist_info_from_hwpg(struct ieee80211_hw *hw, rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B; rtlpriv->btcoexist.btc_info.ant_num = (value & 0x1); rtlpriv->btcoexist.btc_info.single_ant_path = - (value & 0x40); /*0xc3[6]*/ + (value & 0x40 ? ANT_AUX : ANT_MAIN); /*0xc3[6]*/ } else { rtlpriv->btcoexist.btc_info.btcoexist = 0; rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B; rtlpriv->btcoexist.btc_info.ant_num = ANT_X2; - rtlpriv->btcoexist.btc_info.single_ant_path = 0; + rtlpriv->btcoexist.btc_info.single_ant_path = ANT_MAIN; } /* override ant_num / ant_path */ if (mod_params->ant_sel) { rtlpriv->btcoexist.btc_info.ant_num = - (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1); + (mod_params->ant_sel == 1 ? ANT_X1 : ANT_X2); rtlpriv->btcoexist.btc_info.single_ant_path = - (mod_params->ant_sel == 1 ? 0 : 1); + (mod_params->ant_sel == 1 ? ANT_AUX : ANT_MAIN); } } diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index d27e33960e77..ce1754054a07 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2823,6 +2823,11 @@ enum bt_ant_num { ANT_X1 = 1, }; +enum bt_ant_path { + ANT_MAIN = 0, + ANT_AUX = 1, +}; + enum bt_co_type { BT_2WIRE = 0, BT_ISSC_3WIRE = 1, diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index b979cf3bce65..88a8b5916624 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -27,7 +27,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" - depends on INFINIBAND && BLOCK + depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9df4f71e58ca..99b857e5a7a9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -99,6 +99,7 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); +static void nvme_put_subsystem(struct nvme_subsystem *subsys); int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { @@ -117,7 +118,8 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (ctrl->state != NVME_CTRL_LIVE && + ctrl->state != NVME_CTRL_ADMIN_ONLY) ret = -ENETRESET; } @@ -350,6 +352,7 @@ static void nvme_free_ns_head(struct kref *ref) ida_simple_remove(&head->subsys->ns_ida, head->instance); list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); + nvme_put_subsystem(head->subsys); kfree(head); } @@ -764,6 +767,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, ret = PTR_ERR(meta); goto out_unmap; } + req->cmd_flags |= REQ_INTEGRITY; } } @@ -2860,6 +2864,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_cleanup_srcu; list_add_tail(&head->entry, &ctrl->subsys->nsheads); + + kref_get(&ctrl->subsys->ref); + return head; out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); @@ -2997,31 +3004,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (nvme_init_ns_head(ns, nsid, id)) goto out_free_id; nvme_setup_streams_ns(ctrl, ns); - -#ifdef CONFIG_NVME_MULTIPATH - /* - * If multipathing is enabled we need to always use the subsystem - * instance number for numbering our devices to avoid conflicts - * between subsystems that have multiple controllers and thus use - * the multipath-aware subsystem node and those that have a single - * controller and use the controller node directly. - */ - if (ns->head->disk) { - sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, - ctrl->cntlid, ns->head->instance); - flags = GENHD_FL_HIDDEN; - } else { - sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, - ns->head->instance); - } -#else - /* - * But without the multipath code enabled, multiple controller per - * subsystems are visible as devices and thus we cannot use the - * subsystem instance. - */ - sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); -#endif + nvme_set_disk_name(disk_name, ns, ctrl, &flags); if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { if (nvme_nvm_register(ns, disk_name, node)) { diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 124c458806df..7ae732a77fe8 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -668,6 +668,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->transport); opts->transport = p; break; case NVMF_OPT_NQN: @@ -676,6 +677,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->subsysnqn); opts->subsysnqn = p; nqnlen = strlen(opts->subsysnqn); if (nqnlen >= NVMF_NQN_SIZE) { @@ -698,6 +700,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->traddr); opts->traddr = p; break; case NVMF_OPT_TRSVCID: @@ -706,6 +709,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->trsvcid); opts->trsvcid = p; break; case NVMF_OPT_QUEUE_SIZE: @@ -792,6 +796,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -EINVAL; goto out; } + nvmf_host_put(opts->host); opts->host = nvmf_host_add(p); kfree(p); if (!opts->host) { @@ -817,6 +822,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } + kfree(opts->host_traddr); opts->host_traddr = p; break; case NVMF_OPT_HOST_ID: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 956e0b8e9c4d..d7b664ae5923 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -15,10 +15,32 @@ #include "nvme.h" static bool multipath = true; -module_param(multipath, bool, 0644); +module_param(multipath, bool, 0444); MODULE_PARM_DESC(multipath, "turn on native support for multiple controllers per subsystem"); +/* + * If multipathing is enabled we need to always use the subsystem instance + * number for numbering our devices to avoid conflicts between subsystems that + * have multiple controllers and thus use the multipath-aware subsystem node + * and those that have a single controller and use the controller node + * directly. + */ +void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags) +{ + if (!multipath) { + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); + } else if (ns->head->disk) { + sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, + ctrl->cntlid, ns->head->instance); + *flags = GENHD_FL_HIDDEN; + } else { + sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, + ns->head->instance); + } +} + void nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 061fecfd44f5..17d2f7cf3fed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -84,6 +84,11 @@ enum nvme_quirks { * Supports the LighNVM command set if indicated in vs[1]. */ NVME_QUIRK_LIGHTNVM = (1 << 6), + + /* + * Set MEDIUM priority on SQ creation + */ + NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), }; /* @@ -436,6 +441,8 @@ extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH +void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags); void nvme_failover_req(struct request *req); bool nvme_req_needs_failover(struct request *req, blk_status_t error); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); @@ -461,6 +468,16 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) } #else +/* + * Without the multipath code enabled, multiple controller per subsystems are + * visible as devices and thus we cannot use the subsystem instance. + */ +static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, + struct nvme_ctrl *ctrl, int *flags) +{ + sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); +} + static inline void nvme_failover_req(struct request *req) { } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fbc71fac6f1e..17a0190bd88f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1093,10 +1093,19 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { + struct nvme_ctrl *ctrl = &dev->ctrl; struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG; /* + * Some drives have a bug that auto-enables WRRU if MEDIUM isn't + * set. Since URGENT priority is zeroes, it makes all queues + * URGENT. + */ + if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ) + flags |= NVME_SQ_PRIO_MEDIUM; + + /* * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. */ @@ -2701,7 +2710,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ - .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_MEDIUM_PRIO_SQ }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 5f4f8b16685f..3c7b61ddb0d1 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -27,7 +27,7 @@ config NVME_TARGET_LOOP config NVME_TARGET_RDMA tristate "NVMe over Fabrics RDMA target support" - depends on INFINIBAND + depends on INFINIBAND && INFINIBAND_ADDR_TRANS depends on NVME_TARGET select SGL_ALLOC help diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 31fdfba556a8..27a8561c0cb9 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -469,6 +469,12 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) nvme_stop_ctrl(&ctrl->ctrl); nvme_loop_shutdown_ctrl(ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + /* state change failure should never happen */ + WARN_ON_ONCE(1); + return; + } + ret = nvme_loop_configure_admin_queue(ctrl); if (ret) goto out_disable; diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index b35fe88f1851..7baa53e5b1d7 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -102,12 +102,28 @@ static DEFINE_IDR(ovcs_idr); static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain); +/** + * of_overlay_notifier_register() - Register notifier for overlay operations + * @nb: Notifier block to register + * + * Register for notification on overlay operations on device tree nodes. The + * reported actions definied by @of_reconfig_change. The notifier callback + * furthermore receives a pointer to the affected device tree node. + * + * Note that a notifier callback is not supposed to store pointers to a device + * tree node or its content beyond @OF_OVERLAY_POST_REMOVE corresponding to the + * respective node it received. + */ int of_overlay_notifier_register(struct notifier_block *nb) { return blocking_notifier_chain_register(&overlay_notify_chain, nb); } EXPORT_SYMBOL_GPL(of_overlay_notifier_register); +/** + * of_overlay_notifier_register() - Unregister notifier for overlay operations + * @nb: Notifier block to unregister + */ int of_overlay_notifier_unregister(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&overlay_notify_chain, nb); @@ -671,17 +687,13 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs) of_node_put(ovcs->fragments[i].overlay); } kfree(ovcs->fragments); - /* - * TODO - * - * would like to: kfree(ovcs->overlay_tree); - * but can not since drivers may have pointers into this data - * - * would like to: kfree(ovcs->fdt); - * but can not since drivers may have pointers into this data + * There should be no live pointers into ovcs->overlay_tree and + * ovcs->fdt due to the policy that overlay notifiers are not allowed + * to retain pointers into the overlay devicetree. */ - + kfree(ovcs->overlay_tree); + kfree(ovcs->fdt); kfree(ovcs); } diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index acba1f56af3e..126cf19e869b 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1263,7 +1263,7 @@ static struct parisc_driver ccio_driver __refdata = { * I/O Page Directory, the resource map, and initalizing the * U2/Uturn chip into virtual mode. */ -static void +static void __init ccio_ioc_init(struct ioc *ioc) { int i; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a04197ce767d..dbfe7c4f3776 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1910,7 +1910,7 @@ void pci_pme_active(struct pci_dev *dev, bool enable) EXPORT_SYMBOL(pci_pme_active); /** - * pci_enable_wake - enable PCI device as wakeup event source + * __pci_enable_wake - enable PCI device as wakeup event source * @dev: PCI device affected * @state: PCI state from which device will issue wakeup events * @enable: True to enable event generation; false to disable @@ -1928,7 +1928,7 @@ EXPORT_SYMBOL(pci_pme_active); * Error code depending on the platform is returned if both the platform and * the native mechanism fail to enable the generation of wake-up events */ -int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) +static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { int ret = 0; @@ -1969,6 +1969,23 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) return ret; } + +/** + * pci_enable_wake - change wakeup settings for a PCI device + * @pci_dev: Target device + * @state: PCI state from which device will issue wakeup events + * @enable: Whether or not to enable event generation + * + * If @enable is set, check device_may_wakeup() for the device before calling + * __pci_enable_wake() for it. + */ +int pci_enable_wake(struct pci_dev *pci_dev, pci_power_t state, bool enable) +{ + if (enable && !device_may_wakeup(&pci_dev->dev)) + return -EINVAL; + + return __pci_enable_wake(pci_dev, state, enable); +} EXPORT_SYMBOL(pci_enable_wake); /** @@ -1981,9 +1998,9 @@ EXPORT_SYMBOL(pci_enable_wake); * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI * ordering constraints. * - * This function only returns error code if the device is not capable of - * generating PME# from both D3_hot and D3_cold, and the platform is unable to - * enable wake-up power for it. + * This function only returns error code if the device is not allowed to wake + * up the system from sleep or it is not capable of generating PME# from both + * D3_hot and D3_cold and the platform is unable to enable wake-up power for it. */ int pci_wake_from_d3(struct pci_dev *dev, bool enable) { @@ -2114,7 +2131,7 @@ int pci_finish_runtime_suspend(struct pci_dev *dev) dev->runtime_d3cold = target_state == PCI_D3cold; - pci_enable_wake(dev, target_state, pci_dev_run_wake(dev)); + __pci_enable_wake(dev, target_state, pci_dev_run_wake(dev)); error = pci_set_power_state(dev, target_state); @@ -2138,16 +2155,16 @@ bool pci_dev_run_wake(struct pci_dev *dev) { struct pci_bus *bus = dev->bus; - if (device_can_wakeup(&dev->dev)) - return true; - if (!dev->pme_support) return false; /* PME-capable in principle, but not from the target power state */ - if (!pci_pme_capable(dev, pci_target_state(dev, false))) + if (!pci_pme_capable(dev, pci_target_state(dev, true))) return false; + if (device_can_wakeup(&dev->dev)) + return true; + while (bus->parent) { struct pci_dev *bridge = bus->self; diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index b1ae1618fefe..fee9225ca559 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1622,22 +1622,30 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) if (!need_valid_mask) { irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0, - chip->ngpio, NUMA_NO_NODE); + community->npins, NUMA_NO_NODE); if (irq_base < 0) { dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n"); return irq_base; } - } else { - irq_base = 0; } - ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, irq_base, + ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add IRQ chip\n"); return ret; } + if (!need_valid_mask) { + for (i = 0; i < community->ngpio_ranges; i++) { + range = &community->gpio_ranges[i]; + + irq_domain_associate_many(chip->irq.domain, irq_base, + range->base, range->npins); + irq_base += range->npins; + } + } + gpiochip_set_chained_irqchip(chip, &chv_gpio_irqchip, irq, chv_gpio_irq_handler); return 0; diff --git a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c index 8870a4100164..fee3435a6f15 100644 --- a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c +++ b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c @@ -36,6 +36,27 @@ .npins = ((e) - (s) + 1), \ } +#define SPTH_GPP(r, s, e, g) \ + { \ + .reg_num = (r), \ + .base = (s), \ + .size = ((e) - (s) + 1), \ + .gpio_base = (g), \ + } + +#define SPTH_COMMUNITY(b, s, e, g) \ + { \ + .barno = (b), \ + .padown_offset = SPT_PAD_OWN, \ + .padcfglock_offset = SPT_PADCFGLOCK, \ + .hostown_offset = SPT_HOSTSW_OWN, \ + .ie_offset = SPT_GPI_IE, \ + .pin_base = (s), \ + .npins = ((e) - (s) + 1), \ + .gpps = (g), \ + .ngpps = ARRAY_SIZE(g), \ + } + /* Sunrisepoint-LP */ static const struct pinctrl_pin_desc sptlp_pins[] = { /* GPP_A */ @@ -531,10 +552,28 @@ static const struct intel_function spth_functions[] = { FUNCTION("i2c2", spth_i2c2_groups), }; +static const struct intel_padgroup spth_community0_gpps[] = { + SPTH_GPP(0, 0, 23, 0), /* GPP_A */ + SPTH_GPP(1, 24, 47, 24), /* GPP_B */ +}; + +static const struct intel_padgroup spth_community1_gpps[] = { + SPTH_GPP(0, 48, 71, 48), /* GPP_C */ + SPTH_GPP(1, 72, 95, 72), /* GPP_D */ + SPTH_GPP(2, 96, 108, 96), /* GPP_E */ + SPTH_GPP(3, 109, 132, 120), /* GPP_F */ + SPTH_GPP(4, 133, 156, 144), /* GPP_G */ + SPTH_GPP(5, 157, 180, 168), /* GPP_H */ +}; + +static const struct intel_padgroup spth_community3_gpps[] = { + SPTH_GPP(0, 181, 191, 192), /* GPP_I */ +}; + static const struct intel_community spth_communities[] = { - SPT_COMMUNITY(0, 0, 47), - SPT_COMMUNITY(1, 48, 180), - SPT_COMMUNITY(2, 181, 191), + SPTH_COMMUNITY(0, 0, 47, spth_community0_gpps), + SPTH_COMMUNITY(1, 48, 180, spth_community1_gpps), + SPTH_COMMUNITY(2, 181, 191, spth_community3_gpps), }; static const struct intel_pinctrl_soc_data spth_soc_data = { diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c index 4b91ff74779b..99a6ceac8e53 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-axg.c +++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c @@ -898,7 +898,7 @@ static struct meson_bank meson_axg_periphs_banks[] = { static struct meson_bank meson_axg_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIOAO_9, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIOAO_13, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pmx_bank meson_axg_periphs_pmx_banks[] = { diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 39d06dd1f63a..bc309c5327ff 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -154,7 +154,7 @@ config DELL_LAPTOP depends on ACPI_VIDEO || ACPI_VIDEO = n depends on RFKILL || RFKILL = n depends on SERIO_I8042 - select DELL_SMBIOS + depends on DELL_SMBIOS select POWER_SUPPLY select LEDS_CLASS select NEW_LEDS diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c index d4aeac3477f5..f086469ea740 100644 --- a/drivers/platform/x86/asus-wireless.c +++ b/drivers/platform/x86/asus-wireless.c @@ -178,8 +178,10 @@ static int asus_wireless_remove(struct acpi_device *adev) { struct asus_wireless_data *data = acpi_driver_data(adev); - if (data->wq) + if (data->wq) { + devm_led_classdev_unregister(&adev->dev, &data->led); destroy_workqueue(data->wq); + } return 0; } diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c index 8e70a627e0bb..cbbafdcaaecb 100644 --- a/drivers/remoteproc/qcom_q6v5_pil.c +++ b/drivers/remoteproc/qcom_q6v5_pil.c @@ -1083,6 +1083,7 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc) dev_err(qproc->dev, "unable to resolve mba region\n"); return ret; } + of_node_put(node); qproc->mba_phys = r.start; qproc->mba_size = resource_size(&r); @@ -1100,6 +1101,7 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc) dev_err(qproc->dev, "unable to resolve mpss region\n"); return ret; } + of_node_put(node); qproc->mpss_phys = qproc->mpss_reloc = r.start; qproc->mpss_size = resource_size(&r); diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 6d9c5832ce47..a9609d971f7f 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1163,7 +1163,7 @@ int rproc_trigger_recovery(struct rproc *rproc) if (ret) return ret; - ret = rproc_stop(rproc, false); + ret = rproc_stop(rproc, true); if (ret) goto unlock_mutex; @@ -1316,7 +1316,7 @@ void rproc_shutdown(struct rproc *rproc) if (!atomic_dec_and_test(&rproc->power)) goto out; - ret = rproc_stop(rproc, true); + ret = rproc_stop(rproc, false); if (ret) { atomic_inc(&rproc->power); goto out; diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index 64b6de9763ee..1efdf9ff8679 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -581,4 +581,6 @@ static void rpmsg_chrdev_exit(void) unregister_chrdev_region(rpmsg_major, RPMSG_DEV_MAX); } module_exit(rpmsg_chrdev_exit); + +MODULE_ALIAS("rpmsg:rpmsg_chrdev"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/sbus/char/oradax.c b/drivers/sbus/char/oradax.c index c44d7c7ffc92..1754f55e2fac 100644 --- a/drivers/sbus/char/oradax.c +++ b/drivers/sbus/char/oradax.c @@ -3,7 +3,7 @@ * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c index edb7be786c65..9e8de1462593 100644 --- a/drivers/scsi/isci/port_config.c +++ b/drivers/scsi/isci/port_config.c @@ -291,7 +291,7 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host *ihost, * Note: We have not moved the current phy_index so we will actually * compare the startting phy with itself. * This is expected and required to add the phy to the port. */ - while (phy_index < SCI_MAX_PHYS) { + for (; phy_index < SCI_MAX_PHYS; phy_index++) { if ((phy_mask & (1 << phy_index)) == 0) continue; sci_phy_get_sas_address(&ihost->phys[phy_index], @@ -311,7 +311,6 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host *ihost, &ihost->phys[phy_index]); assigned_phy_mask |= (1 << phy_index); - phy_index++; } } diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 8c51d628b52e..a2ec0bc9e9fa 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1722,11 +1722,14 @@ static int storvsc_probe(struct hv_device *device, max_targets = STORVSC_MAX_TARGETS; max_channels = STORVSC_MAX_CHANNELS; /* - * On Windows8 and above, we support sub-channels for storage. + * On Windows8 and above, we support sub-channels for storage + * on SCSI and FC controllers. * The number of sub-channels offerred is based on the number of * VCPUs in the guest. */ - max_sub_channels = (num_cpus / storvsc_vcpus_per_sub_channel); + if (!dev_is_ide) + max_sub_channels = + (num_cpus - 1) / storvsc_vcpus_per_sub_channel; } scsi_driver.can_queue = (max_outstanding_req_per_channel * diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c index 16cab40156ca..aeab05f682d9 100644 --- a/drivers/staging/media/imx/imx-media-csi.c +++ b/drivers/staging/media/imx/imx-media-csi.c @@ -1799,7 +1799,7 @@ static int imx_csi_probe(struct platform_device *pdev) priv->dev->of_node = pdata->of_node; pinctrl = devm_pinctrl_get_select_default(priv->dev); if (IS_ERR(pinctrl)) { - ret = PTR_ERR(priv->vdev); + ret = PTR_ERR(pinctrl); dev_dbg(priv->dev, "devm_pinctrl_get_select_default() failed: %d\n", ret); if (ret != -ENODEV) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 07c814c42648..60429011292a 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -427,8 +427,8 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; struct scatterlist *sg = &cmd->t_data_sg[0]; - unsigned char *buf, zero = 0x00, *p = &zero; - int rc, ret; + unsigned char *buf, *not_zero; + int ret; buf = kmap(sg_page(sg)) + sg->offset; if (!buf) @@ -437,10 +437,10 @@ iblock_execute_zero_out(struct block_device *bdev, struct se_cmd *cmd) * Fall back to block_execute_write_same() slow-path if * incoming WRITE_SAME payload does not contain zeros. */ - rc = memcmp(buf, p, cmd->data_length); + not_zero = memchr_inv(buf, 0x00, cmd->data_length); kunmap(sg_page(sg)); - if (rc) + if (not_zero) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; ret = blkdev_issue_zeroout(bdev, diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c index 8a7f24dd9315..0c19fcd56a0d 100644 --- a/drivers/thermal/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/int340x_thermal/int3403_thermal.c @@ -194,6 +194,7 @@ static int int3403_cdev_add(struct int3403_priv *priv) return -EFAULT; } + priv->priv = obj; obj->max_state = p->package.count - 1; obj->cdev = thermal_cooling_device_register(acpi_device_bid(priv->adev), @@ -201,8 +202,6 @@ static int int3403_cdev_add(struct int3403_priv *priv) if (IS_ERR(obj->cdev)) result = PTR_ERR(obj->cdev); - priv->priv = obj; - kfree(buf.pointer); /* TODO: add ACPI notification support */ diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index ed805c7c5ace..ac83f721db24 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -185,6 +185,7 @@ * @regulator: pointer to the TMU regulator structure. * @reg_conf: pointer to structure to register with core thermal. * @ntrip: number of supported trip points. + * @enabled: current status of TMU device * @tmu_initialize: SoC specific TMU initialization method * @tmu_control: SoC specific TMU control method * @tmu_read: SoC specific TMU temperature read method @@ -205,6 +206,7 @@ struct exynos_tmu_data { struct regulator *regulator; struct thermal_zone_device *tzd; unsigned int ntrip; + bool enabled; int (*tmu_initialize)(struct platform_device *pdev); void (*tmu_control)(struct platform_device *pdev, bool on); @@ -398,6 +400,7 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) mutex_lock(&data->lock); clk_enable(data->clk); data->tmu_control(pdev, on); + data->enabled = on; clk_disable(data->clk); mutex_unlock(&data->lock); } @@ -889,19 +892,24 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) static int exynos_get_temp(void *p, int *temp) { struct exynos_tmu_data *data = p; + int value, ret = 0; - if (!data || !data->tmu_read) + if (!data || !data->tmu_read || !data->enabled) return -EINVAL; mutex_lock(&data->lock); clk_enable(data->clk); - *temp = code_to_temp(data, data->tmu_read(data)) * MCELSIUS; + value = data->tmu_read(data); + if (value < 0) + ret = value; + else + *temp = code_to_temp(data, value) * MCELSIUS; clk_disable(data->clk); mutex_unlock(&data->lock); - return 0; + return ret; } #ifdef CONFIG_THERMAL_EMULATION diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index c821b4b9647e..7b5cb28ffb35 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -191,7 +191,9 @@ static const unsigned short full_speed_maxpacket_maxes[4] = { static const unsigned short high_speed_maxpacket_maxes[4] = { [USB_ENDPOINT_XFER_CONTROL] = 64, [USB_ENDPOINT_XFER_ISOC] = 1024, - [USB_ENDPOINT_XFER_BULK] = 512, + + /* Bulk should be 512, but some devices use 1024: we will warn below */ + [USB_ENDPOINT_XFER_BULK] = 1024, [USB_ENDPOINT_XFER_INT] = 1024, }; static const unsigned short super_speed_maxpacket_maxes[4] = { diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index d83be5651f87..a666e0758a99 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -985,6 +985,7 @@ struct dwc2_hsotg { /* DWC OTG HW Release versions */ #define DWC2_CORE_REV_2_71a 0x4f54271a +#define DWC2_CORE_REV_2_72a 0x4f54272a #define DWC2_CORE_REV_2_80a 0x4f54280a #define DWC2_CORE_REV_2_90a 0x4f54290a #define DWC2_CORE_REV_2_91a 0x4f54291a @@ -992,6 +993,7 @@ struct dwc2_hsotg { #define DWC2_CORE_REV_2_94a 0x4f54294a #define DWC2_CORE_REV_3_00a 0x4f54300a #define DWC2_CORE_REV_3_10a 0x4f54310a +#define DWC2_CORE_REV_4_00a 0x4f54400a #define DWC2_FS_IOT_REV_1_00a 0x5531100a #define DWC2_HS_IOT_REV_1_00a 0x5532100a diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 6c32bf26e48e..83cb5577a52f 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -3928,6 +3928,27 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, if (index && !hs_ep->isochronous) epctrl |= DXEPCTL_SETD0PID; + /* WA for Full speed ISOC IN in DDMA mode. + * By Clear NAK status of EP, core will send ZLP + * to IN token and assert NAK interrupt relying + * on TxFIFO status only + */ + + if (hsotg->gadget.speed == USB_SPEED_FULL && + hs_ep->isochronous && dir_in) { + /* The WA applies only to core versions from 2.72a + * to 4.00a (including both). Also for FS_IOT_1.00a + * and HS_IOT_1.00a. + */ + u32 gsnpsid = dwc2_readl(hsotg->regs + GSNPSID); + + if ((gsnpsid >= DWC2_CORE_REV_2_72a && + gsnpsid <= DWC2_CORE_REV_4_00a) || + gsnpsid == DWC2_FS_IOT_REV_1_00a || + gsnpsid == DWC2_HS_IOT_REV_1_00a) + epctrl |= DXEPCTL_CNAK; + } + dev_dbg(hsotg->dev, "%s: write DxEPCTL=0x%08x\n", __func__, epctrl); diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 190f95964000..c51b73b3e048 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -358,9 +358,14 @@ static void dwc2_gusbcfg_init(struct dwc2_hsotg *hsotg) static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg) { + int ret; + hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus"); - if (IS_ERR(hsotg->vbus_supply)) - return 0; + if (IS_ERR(hsotg->vbus_supply)) { + ret = PTR_ERR(hsotg->vbus_supply); + hsotg->vbus_supply = NULL; + return ret == -ENODEV ? 0 : ret; + } return regulator_enable(hsotg->vbus_supply); } @@ -4342,9 +4347,7 @@ static int _dwc2_hcd_start(struct usb_hcd *hcd) spin_unlock_irqrestore(&hsotg->lock, flags); - dwc2_vbus_supply_init(hsotg); - - return 0; + return dwc2_vbus_supply_init(hsotg); } /* diff --git a/drivers/usb/dwc2/pci.c b/drivers/usb/dwc2/pci.c index 7f21747007f1..bea2e8ec0369 100644 --- a/drivers/usb/dwc2/pci.c +++ b/drivers/usb/dwc2/pci.c @@ -141,8 +141,10 @@ static int dwc2_pci_probe(struct pci_dev *pci, goto err; glue = devm_kzalloc(dev, sizeof(*glue), GFP_KERNEL); - if (!glue) + if (!glue) { + ret = -ENOMEM; goto err; + } ret = platform_device_add(dwc2); if (ret) { diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8796a5ee9bb9..0dedf8a799f4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -166,7 +166,7 @@ static void dwc3_ep_inc_deq(struct dwc3_ep *dep) dwc3_ep_inc_trb(&dep->trb_dequeue); } -void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep, +static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep, struct dwc3_request *req, int status) { struct dwc3 *dwc = dep->dwc; @@ -1424,7 +1424,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, dwc->lock); if (!r->trb) - goto out1; + goto out0; if (r->num_pending_sgs) { struct dwc3_trb *trb; diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c index 7889bcc0509a..8b72b192c747 100644 --- a/drivers/usb/gadget/function/f_phonet.c +++ b/drivers/usb/gadget/function/f_phonet.c @@ -221,7 +221,7 @@ static void pn_tx_complete(struct usb_ep *ep, struct usb_request *req) netif_wake_queue(dev); } -static int pn_net_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t pn_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct phonet_port *port = netdev_priv(dev); struct f_phonet *fp; diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c index 4c6c08b675b5..21307d862af6 100644 --- a/drivers/usb/host/ehci-mem.c +++ b/drivers/usb/host/ehci-mem.c @@ -73,9 +73,10 @@ static struct ehci_qh *ehci_qh_alloc (struct ehci_hcd *ehci, gfp_t flags) if (!qh) goto done; qh->hw = (struct ehci_qh_hw *) - dma_pool_zalloc(ehci->qh_pool, flags, &dma); + dma_pool_alloc(ehci->qh_pool, flags, &dma); if (!qh->hw) goto fail; + memset(qh->hw, 0, sizeof *qh->hw); qh->qh_dma = dma; // INIT_LIST_HEAD (&qh->qh_list); INIT_LIST_HEAD (&qh->qtd_list); diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index 28e2a338b481..e56db44708bc 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -1287,7 +1287,7 @@ itd_urb_transaction( } else { alloc_itd: spin_unlock_irqrestore(&ehci->lock, flags); - itd = dma_pool_zalloc(ehci->itd_pool, mem_flags, + itd = dma_pool_alloc(ehci->itd_pool, mem_flags, &itd_dma); spin_lock_irqsave(&ehci->lock, flags); if (!itd) { @@ -1297,6 +1297,7 @@ itd_urb_transaction( } } + memset(itd, 0, sizeof(*itd)); itd->itd_dma = itd_dma; itd->frame = NO_FRAME; list_add(&itd->itd_list, &sched->td_list); @@ -2080,7 +2081,7 @@ sitd_urb_transaction( } else { alloc_sitd: spin_unlock_irqrestore(&ehci->lock, flags); - sitd = dma_pool_zalloc(ehci->sitd_pool, mem_flags, + sitd = dma_pool_alloc(ehci->sitd_pool, mem_flags, &sitd_dma); spin_lock_irqsave(&ehci->lock, flags); if (!sitd) { @@ -2090,6 +2091,7 @@ sitd_urb_transaction( } } + memset(sitd, 0, sizeof(*sitd)); sitd->sitd_dma = sitd_dma; sitd->frame = NO_FRAME; list_add(&sitd->sitd_list, &iso_sched->td_list); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 9b27798ecce5..711da3306b14 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3621,6 +3621,7 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } xhci_debugfs_remove_slot(xhci, udev->slot_id); + virt_dev->udev = NULL; ret = xhci_disable_slot(xhci, udev->slot_id); if (ret) xhci_free_virt_device(xhci, udev->slot_id); diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index e564695c6c8d..71c5835ea9cd 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -417,7 +417,6 @@ void musb_g_tx(struct musb *musb, u8 epnum) req = next_request(musb_ep); request = &req->request; - trace_musb_req_tx(req); csr = musb_readw(epio, MUSB_TXCSR); musb_dbg(musb, "<== %s, txcsr %04x", musb_ep->end_point.name, csr); @@ -456,6 +455,8 @@ void musb_g_tx(struct musb *musb, u8 epnum) u8 is_dma = 0; bool short_packet = false; + trace_musb_req_tx(req); + if (dma && (csr & MUSB_TXCSR_DMAENAB)) { is_dma = 1; csr |= MUSB_TXCSR_P_WZC_BITS; diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 4fa372c845e1..e7f99d55922a 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -990,7 +990,9 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep, /* set tx_reinit and schedule the next qh */ ep->tx_reinit = 1; } - musb_start_urb(musb, is_in, next_qh); + + if (next_qh) + musb_start_urb(musb, is_in, next_qh); } } diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c3f252283ab9..2058852a87fa 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Qualcomm's vendor ID */ #define QUECTEL_PRODUCT_UC20 0x9003 #define QUECTEL_PRODUCT_UC15 0x9090 +/* These u-blox products use Qualcomm's vendor ID */ +#define UBLOX_PRODUCT_R410M 0x90b2 /* These Yuga products use Qualcomm's vendor ID */ #define YUGA_PRODUCT_CLM920_NC5 0x9625 @@ -1065,6 +1067,9 @@ static const struct usb_device_id option_ids[] = { /* Yuga products use Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5), .driver_info = RSVD(1) | RSVD(4) }, + /* u-blox products using Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), + .driver_info = RSVD(1) | RSVD(3) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), .driver_info = RSVD(4) }, diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index f5373ed2cd45..8ddbecc25d89 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -335,47 +335,48 @@ static int palm_os_3_probe(struct usb_serial *serial, goto exit; } - if (retval == sizeof(*connection_info)) { - connection_info = (struct visor_connection_info *) - transfer_buffer; - - num_ports = le16_to_cpu(connection_info->num_ports); - for (i = 0; i < num_ports; ++i) { - switch ( - connection_info->connections[i].port_function_id) { - case VISOR_FUNCTION_GENERIC: - string = "Generic"; - break; - case VISOR_FUNCTION_DEBUGGER: - string = "Debugger"; - break; - case VISOR_FUNCTION_HOTSYNC: - string = "HotSync"; - break; - case VISOR_FUNCTION_CONSOLE: - string = "Console"; - break; - case VISOR_FUNCTION_REMOTE_FILE_SYS: - string = "Remote File System"; - break; - default: - string = "unknown"; - break; - } - dev_info(dev, "%s: port %d, is for %s use\n", - serial->type->description, - connection_info->connections[i].port, string); - } + if (retval != sizeof(*connection_info)) { + dev_err(dev, "Invalid connection information received from device\n"); + retval = -ENODEV; + goto exit; } - /* - * Handle devices that report invalid stuff here. - */ + + connection_info = (struct visor_connection_info *)transfer_buffer; + + num_ports = le16_to_cpu(connection_info->num_ports); + + /* Handle devices that report invalid stuff here. */ if (num_ports == 0 || num_ports > 2) { dev_warn(dev, "%s: No valid connect info available\n", serial->type->description); num_ports = 2; } + for (i = 0; i < num_ports; ++i) { + switch (connection_info->connections[i].port_function_id) { + case VISOR_FUNCTION_GENERIC: + string = "Generic"; + break; + case VISOR_FUNCTION_DEBUGGER: + string = "Debugger"; + break; + case VISOR_FUNCTION_HOTSYNC: + string = "HotSync"; + break; + case VISOR_FUNCTION_CONSOLE: + string = "Console"; + break; + case VISOR_FUNCTION_REMOTE_FILE_SYS: + string = "Remote File System"; + break; + default: + string = "unknown"; + break; + } + dev_info(dev, "%s: port %d, is for %s use\n", + serial->type->description, + connection_info->connections[i].port, string); + } dev_info(dev, "%s: Number of ports: %d\n", serial->type->description, num_ports); diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c index 677d12138dbd..ded49e3bf2b0 100644 --- a/drivers/usb/typec/tcpm.c +++ b/drivers/usb/typec/tcpm.c @@ -3725,6 +3725,7 @@ void tcpm_unregister_port(struct tcpm_port *port) for (i = 0; i < ARRAY_SIZE(port->port_altmode); i++) typec_unregister_altmode(port->port_altmode[i]); typec_unregister_port(port->typec_port); + usb_role_switch_put(port->role_sw); tcpm_debugfs_exit(port); destroy_workqueue(port->wq); } diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c index 8b8406867c02..4b4c8d271b27 100644 --- a/drivers/usb/typec/tps6598x.c +++ b/drivers/usb/typec/tps6598x.c @@ -73,6 +73,7 @@ struct tps6598x { struct device *dev; struct regmap *regmap; struct mutex lock; /* device lock */ + u8 i2c_protocol:1; struct typec_port *port; struct typec_partner *partner; @@ -80,19 +81,39 @@ struct tps6598x { struct typec_capability typec_cap; }; +static int +tps6598x_block_read(struct tps6598x *tps, u8 reg, void *val, size_t len) +{ + u8 data[len + 1]; + int ret; + + if (!tps->i2c_protocol) + return regmap_raw_read(tps->regmap, reg, val, len); + + ret = regmap_raw_read(tps->regmap, reg, data, sizeof(data)); + if (ret) + return ret; + + if (data[0] < len) + return -EIO; + + memcpy(val, &data[1], len); + return 0; +} + static inline int tps6598x_read16(struct tps6598x *tps, u8 reg, u16 *val) { - return regmap_raw_read(tps->regmap, reg, val, sizeof(u16)); + return tps6598x_block_read(tps, reg, val, sizeof(u16)); } static inline int tps6598x_read32(struct tps6598x *tps, u8 reg, u32 *val) { - return regmap_raw_read(tps->regmap, reg, val, sizeof(u32)); + return tps6598x_block_read(tps, reg, val, sizeof(u32)); } static inline int tps6598x_read64(struct tps6598x *tps, u8 reg, u64 *val) { - return regmap_raw_read(tps->regmap, reg, val, sizeof(u64)); + return tps6598x_block_read(tps, reg, val, sizeof(u64)); } static inline int tps6598x_write16(struct tps6598x *tps, u8 reg, u16 val) @@ -121,8 +142,8 @@ static int tps6598x_read_partner_identity(struct tps6598x *tps) struct tps6598x_rx_identity_reg id; int ret; - ret = regmap_raw_read(tps->regmap, TPS_REG_RX_IDENTITY_SOP, - &id, sizeof(id)); + ret = tps6598x_block_read(tps, TPS_REG_RX_IDENTITY_SOP, + &id, sizeof(id)); if (ret) return ret; @@ -224,13 +245,13 @@ static int tps6598x_exec_cmd(struct tps6598x *tps, const char *cmd, } while (val); if (out_len) { - ret = regmap_raw_read(tps->regmap, TPS_REG_DATA1, - out_data, out_len); + ret = tps6598x_block_read(tps, TPS_REG_DATA1, + out_data, out_len); if (ret) return ret; val = out_data[0]; } else { - ret = regmap_read(tps->regmap, TPS_REG_DATA1, &val); + ret = tps6598x_block_read(tps, TPS_REG_DATA1, &val, sizeof(u8)); if (ret) return ret; } @@ -385,6 +406,16 @@ static int tps6598x_probe(struct i2c_client *client) if (!vid) return -ENODEV; + /* + * Checking can the adapter handle SMBus protocol. If it can not, the + * driver needs to take care of block reads separately. + * + * FIXME: Testing with I2C_FUNC_I2C. regmap-i2c uses I2C protocol + * unconditionally if the adapter has I2C_FUNC_I2C set. + */ + if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + tps->i2c_protocol = true; + ret = tps6598x_read32(tps, TPS_REG_STATUS, &status); if (ret < 0) return ret; |