diff options
158 files changed, 3658 insertions, 1245 deletions
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt index 8e2670781c9b..8fdb78f3c6c9 100644 --- a/Documentation/filesystems/dax.txt +++ b/Documentation/filesystems/dax.txt @@ -25,7 +25,7 @@ size when creating the filesystem. Currently 3 filesystems support DAX: ext2, ext4 and xfs. Enabling DAX on them is different. -Enabling DAX on ext4 and ext2 +Enabling DAX on ext2 ----------------------------- When mounting the filesystem, use the "-o dax" option on the command line or @@ -33,8 +33,8 @@ add 'dax' to the options in /etc/fstab. This works to enable DAX on all files within the filesystem. It is equivalent to the '-o dax=always' behavior below. -Enabling DAX on xfs -------------------- +Enabling DAX on xfs and ext4 +---------------------------- Summary ------- diff --git a/Documentation/filesystems/ext4/verity.rst b/Documentation/filesystems/ext4/verity.rst index 3e4c0ee0e068..e99ff3fd09f7 100644 --- a/Documentation/filesystems/ext4/verity.rst +++ b/Documentation/filesystems/ext4/verity.rst @@ -39,3 +39,6 @@ is encrypted as well as the data itself. Verity files cannot have blocks allocated past the end of the verity metadata. + +Verity and DAX are not compatible and attempts to set both of these flags +on a file will fail. diff --git a/Documentation/sh/index.rst b/Documentation/sh/index.rst index bc8db7ba894a..0bd405acf68f 100644 --- a/Documentation/sh/index.rst +++ b/Documentation/sh/index.rst @@ -16,18 +16,6 @@ Store Queue API .. kernel-doc:: arch/sh/kernel/cpu/sh4/sq.c :export: -SH-5 ----- - -TLB Interfaces -~~~~~~~~~~~~~~ - -.. kernel-doc:: arch/sh/mm/tlb-sh5.c - :internal: - -.. kernel-doc:: arch/sh/include/asm/tlb_64.h - :internal: - Machine Specific Interfaces =========================== diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h index 67994a7e5816..1dd57ba44327 100644 --- a/arch/ia64/kernel/unwind_i.h +++ b/arch/ia64/kernel/unwind_i.h @@ -42,7 +42,7 @@ enum unw_register_index { struct unw_info_block { u64 header; - u64 desc[0]; /* unwind descriptors */ + u64 desc[]; /* unwind descriptors */ /* personality routine and language-specific data follow behind descriptors */ }; diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index b04e6e72a592..088bd764e0b7 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -34,6 +34,7 @@ KCOV_INSTRUMENT := n PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING +PURGATORY_CFLAGS += $(call cc-option,-fno-stack-protector) # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index 841580af7f9b..d8d6beaa72c4 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h @@ -93,7 +93,7 @@ struct frag { /* VBLK Fragment handling */ u8 num; /* Total number of records */ u8 rec; /* This is record number n */ u8 map; /* Which portions are in use */ - u8 data[0]; + u8 data[]; }; /* In memory LDM database structures. */ diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 57d3b2e2d007..0b2c20fddb7c 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -120,7 +120,7 @@ static const u32 tegra_ahb_gizmo[] = { struct tegra_ahb { void __iomem *regs; struct device *dev; - u32 ctx[0]; + u32 ctx[]; }; static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 14345a87c7cc..33d0831c99b6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -620,7 +620,7 @@ struct fifo_buffer { unsigned int head_index; unsigned int size; int total; /* sum of all values */ - int values[0]; + int values[]; }; extern struct fifo_buffer *fifo_alloc(unsigned int fifo_size); diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index e6fc5ad72501..dea59c92ecc1 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h @@ -271,7 +271,7 @@ struct p_rs_param { u32 resync_rate; /* Since protocol version 88 and higher. */ - char verify_alg[0]; + char verify_alg[]; } __packed; struct p_rs_param_89 { @@ -305,7 +305,7 @@ struct p_protocol { u32 two_primaries; /* Since protocol version 87 and higher. */ - char integrity_alg[0]; + char integrity_alg[]; } __packed; @@ -360,7 +360,7 @@ struct p_sizes { u16 dds_flags; /* use enum dds_flags here. */ /* optional queue_limits if (agreed_features & DRBD_FF_WSAME) */ - struct o_qlim qlim[0]; + struct o_qlim qlim[]; } __packed; struct p_state { @@ -409,7 +409,7 @@ struct p_compressed_bm { */ u8 encoding; - u8 code[0]; + u8 code[]; } __packed; struct p_delay_probe93 { diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index b3fdbdc25acb..31e427e273f8 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -223,7 +223,7 @@ struct chcr_authenc_ctx { struct __aead_ctx { struct chcr_gcm_ctx gcm[0]; - struct chcr_authenc_ctx authenc[0]; + struct chcr_authenc_ctx authenc[]; }; struct chcr_aead_ctx { @@ -235,7 +235,7 @@ struct chcr_aead_ctx { u8 nonce[4]; u16 hmac_ctrl; u16 mayverify; - struct __aead_ctx ctx[0]; + struct __aead_ctx ctx[]; }; struct hmac_ctx { @@ -247,7 +247,7 @@ struct hmac_ctx { struct __crypto_ctx { struct hmac_ctx hmacctx[0]; struct ablk_ctx ablkctx[0]; - struct chcr_aead_ctx aeadctx[0]; + struct chcr_aead_ctx aeadctx[]; }; struct chcr_context { @@ -257,7 +257,7 @@ struct chcr_context { unsigned int ntxq; unsigned int nrxq; struct completion cbc_aes_aio_done; - struct __crypto_ctx crypto_ctx[0]; + struct __crypto_ctx crypto_ctx[]; }; struct chcr_hctx_per_wr { diff --git a/drivers/dma/milbeaut-hdmac.c b/drivers/dma/milbeaut-hdmac.c index 8853d442430b..a8cfb59f6efe 100644 --- a/drivers/dma/milbeaut-hdmac.c +++ b/drivers/dma/milbeaut-hdmac.c @@ -77,7 +77,7 @@ struct milbeaut_hdmac_device { struct dma_device ddev; struct clk *clk; void __iomem *reg_base; - struct milbeaut_hdmac_chan channels[0]; + struct milbeaut_hdmac_chan channels[]; }; static struct milbeaut_hdmac_chan * diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c index ab3d2f395378..85a597228fb0 100644 --- a/drivers/dma/milbeaut-xdmac.c +++ b/drivers/dma/milbeaut-xdmac.c @@ -74,7 +74,7 @@ struct milbeaut_xdmac_chan { struct milbeaut_xdmac_device { struct dma_device ddev; void __iomem *reg_base; - struct milbeaut_xdmac_chan channels[0]; + struct milbeaut_xdmac_chan channels[]; }; static struct milbeaut_xdmac_chan * diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c index 4ab493d46375..347146a6e1d0 100644 --- a/drivers/dma/moxart-dma.c +++ b/drivers/dma/moxart-dma.c @@ -127,7 +127,7 @@ struct moxart_desc { unsigned int dma_cycles; struct virt_dma_desc vd; uint8_t es; - struct moxart_sg sg[0]; + struct moxart_sg sg[]; }; struct moxart_chan { diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index b9f0d9636620..55fc7400f717 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -225,7 +225,7 @@ struct tegra_dma { u32 global_pause_count; /* Last member of the structure */ - struct tegra_dma_channel channels[0]; + struct tegra_dma_channel channels[]; }; static inline void tdma_write(struct tegra_dma *tdma, u32 reg, u32 val) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index c4a5c170c1f9..35d81bd857f1 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -211,7 +211,7 @@ struct edma_desc { u32 residue; u32 residue_stat; - struct edma_pset pset[0]; + struct edma_pset pset[]; }; struct edma_cc; diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 945b7c604f91..c91e2dc1bb72 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -170,7 +170,7 @@ struct udma_desc { void *metadata; /* pointer to provided metadata buffer (EPIP, PSdata) */ unsigned int hwdesc_count; - struct udma_hwdesc hwdesc[0]; + struct udma_hwdesc hwdesc[]; }; enum udma_chan_state { diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index 39382694fdfc..68e48bf54d78 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -88,7 +88,7 @@ struct timb_dma { struct dma_device dma; void __iomem *membase; struct tasklet_struct tasklet; - struct timb_dma_chan channels[0]; + struct timb_dma_chan channels[]; }; static struct device *chan2dev(struct dma_chan *chan) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index c7ea4f2d5ca6..fb6c651214f3 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -117,7 +117,7 @@ struct inbound_transaction_resource { struct descriptor_resource { struct client_resource resource; struct fw_descriptor descriptor; - u32 data[0]; + u32 data[]; }; struct iso_resource { diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 404a035f104d..439d918bbaaf 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -620,7 +620,7 @@ struct fw_request { u32 request_header[4]; int ack; u32 length; - u32 data[0]; + u32 data[]; }; static void free_response_callback(struct fw_packet *packet, diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h index 4b0e4ee655a1..71d5f16f311c 100644 --- a/drivers/firewire/core.h +++ b/drivers/firewire/core.h @@ -191,7 +191,7 @@ struct fw_node { /* Upper layer specific data. */ void *data; - struct fw_node *ports[0]; + struct fw_node *ports[]; }; static inline struct fw_node *fw_node_get(struct fw_node *node) diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c index 6ca2f5ab6c57..5fd6a60b6741 100644 --- a/drivers/firewire/nosy.c +++ b/drivers/firewire/nosy.c @@ -52,7 +52,7 @@ struct pcl { struct packet { unsigned int length; - char data[0]; + char data[]; }; struct packet_buffer { diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 33269316f111..54fdc39cd0bc 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -111,7 +111,7 @@ struct descriptor_buffer { dma_addr_t buffer_bus; size_t buffer_size; size_t used; - struct descriptor buffer[0]; + struct descriptor buffer[]; }; struct context { diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index b6180023eba7..8b8127fa8955 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -262,7 +262,7 @@ struct dmi_system_event_log { u8 header_format; u8 type_descriptors_supported_count; u8 per_log_type_descriptor_length; - u8 supported_log_type_descriptos[0]; + u8 supported_log_type_descriptos[]; } __packed; #define DMI_SYSFS_SEL_FIELD(_field) \ diff --git a/drivers/firmware/google/memconsole-coreboot.c b/drivers/firmware/google/memconsole-coreboot.c index fd7f0fbec07e..d17e4d6ac9bc 100644 --- a/drivers/firmware/google/memconsole-coreboot.c +++ b/drivers/firmware/google/memconsole-coreboot.c @@ -21,7 +21,7 @@ struct cbmem_cons { u32 size_dont_access_after_boot; u32 cursor; - u8 body[0]; + u8 body[]; } __packed; #define CURSOR_MASK ((1 << 28) - 1) diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c index db0812263d46..d23c5c69ab52 100644 --- a/drivers/firmware/google/vpd.c +++ b/drivers/firmware/google/vpd.c @@ -32,7 +32,7 @@ struct vpd_cbmem { u32 version; u32 ro_size; u32 rw_size; - u8 blob[0]; + u8 blob[]; }; struct vpd_section { diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 96758b71a8db..7127a04bca19 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -104,7 +104,7 @@ struct ibft_control { u16 tgt0_off; u16 nic1_off; u16 tgt1_off; - u16 expansion[0]; + u16 expansion[]; } __attribute__((__packed__)); struct ibft_initiator { diff --git a/drivers/firmware/pcdp.h b/drivers/firmware/pcdp.h index ce75d1da9e84..e02540571c52 100644 --- a/drivers/firmware/pcdp.h +++ b/drivers/firmware/pcdp.h @@ -103,6 +103,6 @@ struct pcdp { u8 creator_id[4]; u32 creator_rev; u32 num_uarts; - struct pcdp_uart uart[0]; /* actual size is num_uarts */ + struct pcdp_uart uart[]; /* actual size is num_uarts */ /* remainder of table is pcdp_device structures */ } __attribute__((packed)); diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c index 4f932a419752..603b4a9969d3 100644 --- a/drivers/hwtracing/stm/policy.c +++ b/drivers/hwtracing/stm/policy.c @@ -34,7 +34,7 @@ struct stp_policy_node { unsigned int first_channel; unsigned int last_channel; /* this is the one that's exposed to the attributes */ - unsigned char priv[0]; + unsigned char priv[]; }; void *stp_policy_node_priv(struct stp_policy_node *pn) diff --git a/drivers/hwtracing/stm/stm.h b/drivers/hwtracing/stm/stm.h index 3569439d53bb..a9be49fc7a6b 100644 --- a/drivers/hwtracing/stm/stm.h +++ b/drivers/hwtracing/stm/stm.h @@ -23,7 +23,7 @@ void *stp_policy_node_priv(struct stp_policy_node *pn); struct stp_master { unsigned int nr_free; - unsigned long chan_map[0]; + unsigned long chan_map[]; }; struct stm_device { @@ -42,7 +42,7 @@ struct stm_device { const struct config_item_type *pdrv_node_type; /* master allocation */ spinlock_t mc_lock; - struct stp_master *masters[0]; + struct stp_master *masters[]; }; #define to_stm_device(_d) \ diff --git a/drivers/media/usb/pwc/pwc.h b/drivers/media/usb/pwc/pwc.h index 3362962d0d00..b02a3c7b7742 100644 --- a/drivers/media/usb/pwc/pwc.h +++ b/drivers/media/usb/pwc/pwc.h @@ -193,7 +193,7 @@ struct pwc_raw_frame { decompressor) */ __u8 cmd[4]; /* the four byte of the command (in case of nala, only the first 3 bytes is filled) */ - __u8 rawframe[0]; /* frame_size = H / 4 * vbandlength */ + __u8 rawframe[]; /* frame_size = H / 4 * vbandlength */ } __packed; /* intermediate buffers with raw data from the USB cam */ diff --git a/drivers/mfd/mt6360-core.c b/drivers/mfd/mt6360-core.c index db8cdf5272c1..e9cacc27d980 100644 --- a/drivers/mfd/mt6360-core.c +++ b/drivers/mfd/mt6360-core.c @@ -412,6 +412,7 @@ MODULE_DEVICE_TABLE(of, mt6360_pmu_of_id); static struct i2c_driver mt6360_pmu_driver = { .driver = { + .name = "mt6360_pmu", .pm = &mt6360_pmu_pm_ops, .of_match_table = of_match_ptr(mt6360_pmu_of_id), }, diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index efd1a1d1f35e..5d3c691a1c66 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -552,6 +552,8 @@ static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf, struct netlink_ext_ack *extack) { + memset(conf, 0, sizeof(*conf)); + if (!data[IFLA_BAREUDP_PORT]) { NL_SET_ERR_MSG(extack, "port not specified"); return -EINVAL; diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c index d08a3d559114..6ad83a881039 100644 --- a/drivers/net/can/peak_canfd/peak_pciefd_main.c +++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c @@ -146,7 +146,7 @@ struct pciefd_rx_dma { __le32 irq_status; __le32 sys_time_low; __le32 sys_time_high; - struct pucan_rx_msg msg[0]; + struct pucan_rx_msg msg[]; } __packed __aligned(4); /* Tx Link record */ @@ -194,7 +194,7 @@ struct pciefd_board { struct pci_dev *pci_dev; int can_count; spinlock_t cmd_lock; /* 64-bits cmds must be atomic */ - struct pciefd_can *can[0]; /* array of network devices */ + struct pciefd_can *can[]; /* array of network devices */ }; /* supported device ids. */ diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index d9fa4600f745..4b2de08137be 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -151,10 +151,8 @@ static int e1000_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); static void e1000_restore_vlan(struct e1000_adapter *adapter); -#ifdef CONFIG_PM -static int e1000_suspend(struct pci_dev *pdev, pm_message_t state); -static int e1000_resume(struct pci_dev *pdev); -#endif +static int __maybe_unused e1000_suspend(struct device *dev); +static int __maybe_unused e1000_resume(struct device *dev); static void e1000_shutdown(struct pci_dev *pdev); #ifdef CONFIG_NET_POLL_CONTROLLER @@ -179,16 +177,16 @@ static const struct pci_error_handlers e1000_err_handler = { .resume = e1000_io_resume, }; +static SIMPLE_DEV_PM_OPS(e1000_pm_ops, e1000_suspend, e1000_resume); + static struct pci_driver e1000_driver = { .name = e1000_driver_name, .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = e1000_remove, -#ifdef CONFIG_PM - /* Power Management Hooks */ - .suspend = e1000_suspend, - .resume = e1000_resume, -#endif + .driver = { + .pm = &e1000_pm_ops, + }, .shutdown = e1000_shutdown, .err_handler = &e1000_err_handler }; @@ -5060,9 +5058,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) struct e1000_hw *hw = &adapter->hw; u32 ctrl, ctrl_ext, rctl, status; u32 wufc = adapter->wol; -#ifdef CONFIG_PM - int retval = 0; -#endif netif_device_detach(netdev); @@ -5076,12 +5071,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) e1000_down(adapter); } -#ifdef CONFIG_PM - retval = pci_save_state(pdev); - if (retval) - return retval; -#endif - status = er32(STATUS); if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; @@ -5142,37 +5131,26 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) return 0; } -#ifdef CONFIG_PM -static int e1000_suspend(struct pci_dev *pdev, pm_message_t state) +static int __maybe_unused e1000_suspend(struct device *dev) { int retval; + struct pci_dev *pdev = to_pci_dev(dev); bool wake; retval = __e1000_shutdown(pdev, &wake); - if (retval) - return retval; - - if (wake) { - pci_prepare_to_sleep(pdev); - } else { - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - } + device_set_wakeup_enable(dev, wake); - return 0; + return retval; } -static int e1000_resume(struct pci_dev *pdev) +static int __maybe_unused e1000_resume(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 err; - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - pci_save_state(pdev); - if (adapter->need_ioport) err = pci_enable_device(pdev); else @@ -5209,7 +5187,6 @@ static int e1000_resume(struct pci_dev *pdev) return 0; } -#endif static void e1000_shutdown(struct pci_dev *pdev) { diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a279f4fa9962..6f6479ca1267 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6349,7 +6349,6 @@ fl_out: pm_runtime_put_sync(netdev->dev.parent); } -#ifdef CONFIG_PM_SLEEP /* S0ix implementation */ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) { @@ -6571,7 +6570,6 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS; ew32(CTRL_EXT, mac_data); } -#endif /* CONFIG_PM_SLEEP */ static int e1000e_pm_freeze(struct device *dev) { @@ -6611,11 +6609,17 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) struct net_device *netdev = pci_get_drvdata(pdev); struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 ctrl, ctrl_ext, rctl, status; - /* Runtime suspend should only enable wakeup for link changes */ - u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; + u32 ctrl, ctrl_ext, rctl, status, wufc; int retval = 0; + /* Runtime suspend should only enable wakeup for link changes */ + if (runtime) + wufc = E1000_WUFC_LNKC; + else if (device_may_wakeup(&pdev->dev)) + wufc = adapter->wol; + else + wufc = 0; + status = er32(STATUS); if (status & E1000_STATUS_LU) wufc &= ~E1000_WUFC_LNKC; @@ -6672,7 +6676,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime) if (adapter->hw.phy.type == e1000_phy_igp_3) { e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); } else if (hw->mac.type >= e1000_pch_lpt) { - if (!(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) + if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC))) /* ULP does not support wake from unicast, multicast * or broadcast. */ @@ -6869,7 +6873,6 @@ err_irq: return rc; } -#ifdef CONFIG_PM static int __e1000_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -6935,8 +6938,7 @@ static int __e1000_resume(struct pci_dev *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int e1000e_pm_suspend(struct device *dev) +static __maybe_unused int e1000e_pm_suspend(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -6960,7 +6962,7 @@ static int e1000e_pm_suspend(struct device *dev) return rc; } -static int e1000e_pm_resume(struct device *dev) +static __maybe_unused int e1000e_pm_resume(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -6979,9 +6981,8 @@ static int e1000e_pm_resume(struct device *dev) return e1000e_pm_thaw(dev); } -#endif /* CONFIG_PM_SLEEP */ -static int e1000e_pm_runtime_idle(struct device *dev) +static __maybe_unused int e1000e_pm_runtime_idle(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); struct e1000_adapter *adapter = netdev_priv(netdev); @@ -6997,7 +6998,7 @@ static int e1000e_pm_runtime_idle(struct device *dev) return -EBUSY; } -static int e1000e_pm_runtime_resume(struct device *dev) +static __maybe_unused int e1000e_pm_runtime_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7014,7 +7015,7 @@ static int e1000e_pm_runtime_resume(struct device *dev) return rc; } -static int e1000e_pm_runtime_suspend(struct device *dev) +static __maybe_unused int e1000e_pm_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -7039,7 +7040,6 @@ static int e1000e_pm_runtime_suspend(struct device *dev) return 0; } -#endif /* CONFIG_PM */ static void e1000_shutdown(struct pci_dev *pdev) { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 5d4ec95be869..24f4d8e0da98 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5983,8 +5983,8 @@ static int mvpp2_remove(struct platform_device *pdev) { struct mvpp2 *priv = platform_get_drvdata(pdev); struct fwnode_handle *fwnode = pdev->dev.fwnode; + int i = 0, poolnum = MVPP2_BM_POOLS_NUM; struct fwnode_handle *port_fwnode; - int i = 0; mvpp2_dbgfs_cleanup(priv); @@ -5998,7 +5998,10 @@ static int mvpp2_remove(struct platform_device *pdev) destroy_workqueue(priv->stats_queue); - for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) { + if (priv->percpu_pools) + poolnum = mvpp2_get_nrxqs(priv) * 2; + + for (i = 0; i < poolnum; i++) { struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i]; mvpp2_bm_pool_destroy(&pdev->dev, priv, bm_pool); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5ffa32b75e5f..55af877763ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -978,8 +978,10 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, lossy = !(pfc || pause_en); thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); + mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &thres_cells); delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, pause_en); + mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &delay_cells); total_cells = thres_cells + delay_cells; taken_headroom_cells += total_cells; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 6f96ca50c9ba..6e87457dd635 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -374,6 +374,19 @@ mlxsw_sp_port_vlan_find_by_vid(const struct mlxsw_sp_port *mlxsw_sp_port, return NULL; } +static inline void +mlxsw_sp_port_headroom_8x_adjust(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 *p_size) +{ + /* Ports with eight lanes use two headroom buffers between which the + * configured headroom size is split. Therefore, multiply the calculated + * headroom size by two. + */ + if (mlxsw_sp_port->mapping.width != 8) + return; + *p_size *= 2; +} + enum mlxsw_sp_flood_type { MLXSW_SP_FLOOD_TYPE_UC, MLXSW_SP_FLOOD_TYPE_BC, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 21bfb2f6a6f0..f25a8b084b4b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -312,6 +312,7 @@ static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) if (i == MLXSW_SP_PB_UNUSED) continue; + mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, &size); mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size); } mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 304eb8c3d8bd..f843545d3478 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -782,6 +782,7 @@ mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) speed = 0; buffsize = mlxsw_sp_span_buffsize_get(mlxsw_sp, speed, mtu); + mlxsw_sp_port_headroom_8x_adjust(mlxsw_sp_port, (u16 *) &buffsize); mlxsw_reg_sbib_pack(sbib_pl, mlxsw_sp_port->local_port, buffsize); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); } diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index c5c5c688b7e2..f1711ac86d0c 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3091,6 +3091,8 @@ static const struct pci_device_id lan743x_pcidev_tbl[] = { { 0, } }; +MODULE_DEVICE_TABLE(pci, lan743x_pcidev_tbl); + static struct pci_driver lan743x_pcidev_driver = { .name = DRIVER_NAME, .id_table = lan743x_pcidev_tbl, diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index e1d097e250ae..31478c0cff87 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -33,7 +33,7 @@ void flush_cpu_work(void); struct op_sample { unsigned long eip; unsigned long event; - unsigned long data[0]; + unsigned long data[]; }; struct op_entry; diff --git a/drivers/phy/samsung/phy-samsung-usb2.h b/drivers/phy/samsung/phy-samsung-usb2.h index 2c1a7d71142b..77fb23bc218f 100644 --- a/drivers/phy/samsung/phy-samsung-usb2.h +++ b/drivers/phy/samsung/phy-samsung-usb2.h @@ -43,7 +43,7 @@ struct samsung_usb2_phy_driver { struct regmap *reg_pmu; struct regmap *reg_sys; spinlock_t lock; - struct samsung_usb2_phy_instance instances[0]; + struct samsung_usb2_phy_instance instances[]; }; struct samsung_usb2_common_phy { diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 0e90c5d4bb2b..eb8ed28533f8 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -39,7 +39,7 @@ struct rio_id_table { u16 start; /* logical minimal id */ u32 max; /* max number of IDs in table */ spinlock_t lock; - unsigned long table[0]; + unsigned long table[]; }; static int next_destid = 0; diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h index 038aec352df7..a01eda720bf6 100644 --- a/drivers/soc/ti/knav_qmss.h +++ b/drivers/soc/ti/knav_qmss.h @@ -67,7 +67,7 @@ struct knav_reg_config { u32 link_ram_size0; u32 link_ram_base1; u32 __pad2[2]; - u32 starvation[0]; + u32 starvation[]; }; struct knav_reg_region { diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h index 3041092e84b3..449680a61569 100644 --- a/drivers/w1/w1_netlink.h +++ b/drivers/w1/w1_netlink.h @@ -73,7 +73,7 @@ struct w1_netlink_msg __u32 res; } mst; } id; - __u8 data[0]; + __u8 data[]; }; /** @@ -122,7 +122,7 @@ struct w1_netlink_cmd __u8 cmd; __u8 res; __u16 len; - __u8 data[0]; + __u8 data[]; }; #ifdef __KERNEL__ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index aa1d34141ea3..3e3c2bf0a722 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -648,7 +648,7 @@ static void afs_do_lookup_success(struct afs_operation *op) vp = &op->file[0]; abort_code = vp->scb.status.abort_code; if (abort_code != 0) { - op->abort_code = abort_code; + op->ac.abort_code = abort_code; op->error = afs_abort_to_error(abort_code); } break; @@ -696,10 +696,11 @@ static const struct afs_operation_ops afs_inline_bulk_status_operation = { .success = afs_do_lookup_success, }; -static const struct afs_operation_ops afs_fetch_status_operation = { +static const struct afs_operation_ops afs_lookup_fetch_status_operation = { .issue_afs_rpc = afs_fs_fetch_status, .issue_yfs_rpc = yfs_fs_fetch_status, .success = afs_do_lookup_success, + .aborted = afs_check_for_remote_deletion, }; /* @@ -1236,6 +1237,17 @@ void afs_d_release(struct dentry *dentry) _enter("%pd", dentry); } +void afs_check_for_remote_deletion(struct afs_operation *op) +{ + struct afs_vnode *vnode = op->file[0].vnode; + + switch (op->ac.abort_code) { + case VNOVNODE: + set_bit(AFS_VNODE_DELETED, &vnode->flags); + afs_break_callback(vnode, afs_cb_break_for_deleted); + } +} + /* * Create a new inode for create/mkdir/symlink */ @@ -1268,7 +1280,7 @@ static void afs_vnode_new_inode(struct afs_operation *op) static void afs_create_success(struct afs_operation *op) { _enter("op=%08x", op->debug_id); - afs_check_for_remote_deletion(op, op->file[0].vnode); + op->ctime = op->file[0].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[0]); afs_update_dentry_version(op, &op->file[0], op->dentry); afs_vnode_new_inode(op); @@ -1302,6 +1314,7 @@ static const struct afs_operation_ops afs_mkdir_operation = { .issue_afs_rpc = afs_fs_make_dir, .issue_yfs_rpc = yfs_fs_make_dir, .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, .edit_dir = afs_create_edit_dir, .put = afs_create_put, }; @@ -1325,6 +1338,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; + op->file[0].update_ctime = true; op->dentry = dentry; op->create.mode = S_IFDIR | mode; op->create.reason = afs_edit_dir_for_mkdir; @@ -1350,7 +1364,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry) static void afs_rmdir_success(struct afs_operation *op) { _enter("op=%08x", op->debug_id); - afs_check_for_remote_deletion(op, op->file[0].vnode); + op->ctime = op->file[0].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[0]); afs_update_dentry_version(op, &op->file[0], op->dentry); } @@ -1382,6 +1396,7 @@ static const struct afs_operation_ops afs_rmdir_operation = { .issue_afs_rpc = afs_fs_remove_dir, .issue_yfs_rpc = yfs_fs_remove_dir, .success = afs_rmdir_success, + .aborted = afs_check_for_remote_deletion, .edit_dir = afs_rmdir_edit_dir, .put = afs_rmdir_put, }; @@ -1404,6 +1419,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; + op->file[0].update_ctime = true; op->dentry = dentry; op->ops = &afs_rmdir_operation; @@ -1479,7 +1495,8 @@ static void afs_dir_remove_link(struct afs_operation *op) static void afs_unlink_success(struct afs_operation *op) { _enter("op=%08x", op->debug_id); - afs_check_for_remote_deletion(op, op->file[0].vnode); + op->ctime = op->file[0].scb.status.mtime_client; + afs_check_dir_conflict(op, &op->file[0]); afs_vnode_commit_status(op, &op->file[0]); afs_vnode_commit_status(op, &op->file[1]); afs_update_dentry_version(op, &op->file[0], op->dentry); @@ -1511,6 +1528,7 @@ static const struct afs_operation_ops afs_unlink_operation = { .issue_afs_rpc = afs_fs_remove_file, .issue_yfs_rpc = yfs_fs_remove_file, .success = afs_unlink_success, + .aborted = afs_check_for_remote_deletion, .edit_dir = afs_unlink_edit_dir, .put = afs_unlink_put, }; @@ -1537,6 +1555,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; + op->file[0].update_ctime = true; /* Try to make sure we have a callback promise on the victim. */ ret = afs_validate(vnode, op->key); @@ -1561,9 +1580,25 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); op->file[1].vnode = vnode; + op->file[1].update_ctime = true; + op->file[1].op_unlinked = true; op->dentry = dentry; op->ops = &afs_unlink_operation; - return afs_do_sync_operation(op); + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + /* If there was a conflict with a third party, check the status of the + * unlinked vnode. + */ + if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) { + op->file[1].update_ctime = false; + op->fetch_status.which = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + } + + return afs_put_operation(op); error: return afs_put_operation(op); @@ -1573,6 +1608,7 @@ static const struct afs_operation_ops afs_create_operation = { .issue_afs_rpc = afs_fs_create_file, .issue_yfs_rpc = yfs_fs_create_file, .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, .edit_dir = afs_create_edit_dir, .put = afs_create_put, }; @@ -1601,6 +1637,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, afs_op_set_vnode(op, 0, dvnode); op->file[0].dv_delta = 1; + op->file[0].update_ctime = true; op->dentry = dentry; op->create.mode = S_IFREG | mode; @@ -1620,6 +1657,7 @@ static void afs_link_success(struct afs_operation *op) struct afs_vnode_param *vp = &op->file[1]; _enter("op=%08x", op->debug_id); + op->ctime = dvp->scb.status.mtime_client; afs_vnode_commit_status(op, dvp); afs_vnode_commit_status(op, vp); afs_update_dentry_version(op, dvp, op->dentry); @@ -1640,6 +1678,7 @@ static const struct afs_operation_ops afs_link_operation = { .issue_afs_rpc = afs_fs_link, .issue_yfs_rpc = yfs_fs_link, .success = afs_link_success, + .aborted = afs_check_for_remote_deletion, .edit_dir = afs_create_edit_dir, .put = afs_link_put, }; @@ -1672,6 +1711,8 @@ static int afs_link(struct dentry *from, struct inode *dir, afs_op_set_vnode(op, 0, dvnode); afs_op_set_vnode(op, 1, vnode); op->file[0].dv_delta = 1; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; op->dentry = dentry; op->dentry_2 = from; @@ -1689,6 +1730,7 @@ static const struct afs_operation_ops afs_symlink_operation = { .issue_afs_rpc = afs_fs_symlink, .issue_yfs_rpc = yfs_fs_symlink, .success = afs_create_success, + .aborted = afs_check_for_remote_deletion, .edit_dir = afs_create_edit_dir, .put = afs_create_put, }; @@ -1740,9 +1782,13 @@ static void afs_rename_success(struct afs_operation *op) { _enter("op=%08x", op->debug_id); + op->ctime = op->file[0].scb.status.mtime_client; + afs_check_dir_conflict(op, &op->file[1]); afs_vnode_commit_status(op, &op->file[0]); - if (op->file[1].vnode != op->file[0].vnode) + if (op->file[1].vnode != op->file[0].vnode) { + op->ctime = op->file[1].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[1]); + } } static void afs_rename_edit_dir(struct afs_operation *op) @@ -1860,6 +1906,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */ op->file[0].dv_delta = 1; op->file[1].dv_delta = 1; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; op->dentry = old_dentry; op->dentry_2 = new_dentry; diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c index b14e3d9a25e2..04f75a44f243 100644 --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c @@ -16,6 +16,7 @@ static void afs_silly_rename_success(struct afs_operation *op) { _enter("op=%08x", op->debug_id); + afs_check_dir_conflict(op, &op->file[0]); afs_vnode_commit_status(op, &op->file[0]); } @@ -69,6 +70,11 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode return PTR_ERR(op); afs_op_set_vnode(op, 0, dvnode); + afs_op_set_vnode(op, 1, dvnode); + op->file[0].dv_delta = 1; + op->file[1].dv_delta = 1; + op->file[0].update_ctime = true; + op->file[1].update_ctime = true; op->dentry = old; op->dentry_2 = new; @@ -129,6 +135,7 @@ int afs_sillyrename(struct afs_vnode *dvnode, struct afs_vnode *vnode, switch (ret) { case 0: /* The rename succeeded. */ + set_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags); d_move(dentry, sdentry); break; case -ERESTARTSYS: @@ -148,19 +155,11 @@ out: static void afs_silly_unlink_success(struct afs_operation *op) { - struct afs_vnode *vnode = op->file[1].vnode; - _enter("op=%08x", op->debug_id); - afs_check_for_remote_deletion(op, op->file[0].vnode); + afs_check_dir_conflict(op, &op->file[0]); afs_vnode_commit_status(op, &op->file[0]); afs_vnode_commit_status(op, &op->file[1]); afs_update_dentry_version(op, &op->file[0], op->dentry); - - drop_nlink(&vnode->vfs_inode); - if (vnode->vfs_inode.i_nlink == 0) { - set_bit(AFS_VNODE_DELETED, &vnode->flags); - clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); - } } static void afs_silly_unlink_edit_dir(struct afs_operation *op) @@ -181,6 +180,7 @@ static const struct afs_operation_ops afs_silly_unlink_operation = { .issue_afs_rpc = afs_fs_remove_file, .issue_yfs_rpc = yfs_fs_remove_file, .success = afs_silly_unlink_success, + .aborted = afs_check_for_remote_deletion, .edit_dir = afs_silly_unlink_edit_dir, }; @@ -200,12 +200,30 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode afs_op_set_vnode(op, 0, dvnode); afs_op_set_vnode(op, 1, vnode); + op->file[0].dv_delta = 1; + op->file[0].update_ctime = true; + op->file[1].op_unlinked = true; + op->file[1].update_ctime = true; op->dentry = dentry; op->ops = &afs_silly_unlink_operation; trace_afs_silly_rename(vnode, true); - return afs_do_sync_operation(op); + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + + /* If there was a conflict with a third party, check the status of the + * unlinked vnode. + */ + if (op->error == 0 && (op->flags & AFS_OPERATION_DIR_CONFLICT)) { + op->file[1].update_ctime = false; + op->fetch_status.which = 1; + op->ops = &afs_fetch_status_operation; + afs_begin_vnode_operation(op); + afs_wait_for_operation(op); + } + + return afs_put_operation(op); } /* diff --git a/fs/afs/file.c b/fs/afs/file.c index 506c47471b42..6f6ed1605cfe 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -225,7 +225,6 @@ static void afs_fetch_data_success(struct afs_operation *op) struct afs_vnode *vnode = op->file[0].vnode; _enter("op=%08x", op->debug_id); - afs_check_for_remote_deletion(op, vnode); afs_vnode_commit_status(op, &op->file[0]); afs_stat_v(vnode, n_fetches); atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes); @@ -240,6 +239,7 @@ static const struct afs_operation_ops afs_fetch_data_operation = { .issue_afs_rpc = afs_fs_fetch_data, .issue_yfs_rpc = yfs_fs_fetch_data, .success = afs_fetch_data_success, + .aborted = afs_check_for_remote_deletion, .put = afs_fetch_data_put, }; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 71eea2a908c7..ffb8575345ca 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -175,10 +175,7 @@ static void afs_kill_lockers_enoent(struct afs_vnode *vnode) static void afs_lock_success(struct afs_operation *op) { - struct afs_vnode *vnode = op->file[0].vnode; - _enter("op=%08x", op->debug_id); - afs_check_for_remote_deletion(op, vnode); afs_vnode_commit_status(op, &op->file[0]); } @@ -186,6 +183,7 @@ static const struct afs_operation_ops afs_set_lock_operation = { .issue_afs_rpc = afs_fs_set_lock, .issue_yfs_rpc = yfs_fs_set_lock, .success = afs_lock_success, + .aborted = afs_check_for_remote_deletion, }; /* diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 2d2dff5688a4..c264839b2fd0 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -187,9 +187,17 @@ void afs_wait_for_operation(struct afs_operation *op) op->error = afs_wait_for_call_to_complete(op->call, &op->ac); } - if (op->error == 0) { + switch (op->error) { + case 0: _debug("success"); op->ops->success(op); + break; + case -ECONNABORTED: + if (op->ops->aborted) + op->ops->aborted(op); + break; + default: + break; } afs_end_vnode_operation(op); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index cd0a0060950b..1d13d2e882ad 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -165,9 +165,11 @@ static void afs_apply_status(struct afs_operation *op, { struct afs_file_status *status = &vp->scb.status; struct afs_vnode *vnode = vp->vnode; + struct inode *inode = &vnode->vfs_inode; struct timespec64 t; umode_t mode; bool data_changed = false; + bool change_size = vp->set_size; _enter("{%llx:%llu.%u} %s", vp->fid.vid, vp->fid.vnode, vp->fid.unique, @@ -186,25 +188,25 @@ static void afs_apply_status(struct afs_operation *op, } if (status->nlink != vnode->status.nlink) - set_nlink(&vnode->vfs_inode, status->nlink); + set_nlink(inode, status->nlink); if (status->owner != vnode->status.owner) - vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner); + inode->i_uid = make_kuid(&init_user_ns, status->owner); if (status->group != vnode->status.group) - vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group); + inode->i_gid = make_kgid(&init_user_ns, status->group); if (status->mode != vnode->status.mode) { - mode = vnode->vfs_inode.i_mode; + mode = inode->i_mode; mode &= ~S_IALLUGO; mode |= status->mode; - WRITE_ONCE(vnode->vfs_inode.i_mode, mode); + WRITE_ONCE(inode->i_mode, mode); } t = status->mtime_client; - vnode->vfs_inode.i_ctime = t; - vnode->vfs_inode.i_mtime = t; - vnode->vfs_inode.i_atime = t; + inode->i_mtime = t; + if (vp->update_ctime) + inode->i_ctime = op->ctime; if (vnode->status.data_version != status->data_version) data_changed = true; @@ -226,6 +228,7 @@ static void afs_apply_status(struct afs_operation *op, } else { set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); } + change_size = true; } else if (vnode->status.type == AFS_FTYPE_DIR) { /* Expected directory change is handled elsewhere so * that we can locally edit the directory and save on a @@ -233,11 +236,22 @@ static void afs_apply_status(struct afs_operation *op, */ if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) data_changed = false; + change_size = true; } if (data_changed) { - inode_set_iversion_raw(&vnode->vfs_inode, status->data_version); - afs_set_i_size(vnode, status->size); + inode_set_iversion_raw(inode, status->data_version); + + /* Only update the size if the data version jumped. If the + * file is being modified locally, then we might have our own + * idea of what the size should be that's not the same as + * what's on the server. + */ + if (change_size) { + afs_set_i_size(vnode, status->size); + inode->i_ctime = t; + inode->i_atime = t; + } } } @@ -267,32 +281,39 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v _enter(""); - ASSERTCMP(op->error, ==, 0); - write_seqlock(&vnode->cb_lock); if (vp->scb.have_error) { + /* A YFS server will return this from RemoveFile2 and AFS and + * YFS will return this from InlineBulkStatus. + */ if (vp->scb.status.abort_code == VNOVNODE) { set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_nlink(&vnode->vfs_inode); __afs_break_callback(vnode, afs_cb_break_for_deleted); + op->flags &= ~AFS_OPERATION_DIR_CONFLICT; } - } else { - if (vp->scb.have_status) - afs_apply_status(op, vp); + } else if (vp->scb.have_status) { + afs_apply_status(op, vp); if (vp->scb.have_cb) afs_apply_callback(op, vp); + } else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) { + drop_nlink(&vnode->vfs_inode); + if (vnode->vfs_inode.i_nlink == 0) { + set_bit(AFS_VNODE_DELETED, &vnode->flags); + __afs_break_callback(vnode, afs_cb_break_for_deleted); + } } write_sequnlock(&vnode->cb_lock); - if (op->error == 0 && vp->scb.have_status) + if (vp->scb.have_status) afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb); } static void afs_fetch_status_success(struct afs_operation *op) { - struct afs_vnode_param *vp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; struct afs_vnode *vnode = vp->vnode; int ret; @@ -306,10 +327,11 @@ static void afs_fetch_status_success(struct afs_operation *op) } } -static const struct afs_operation_ops afs_fetch_status_operation = { +const struct afs_operation_ops afs_fetch_status_operation = { .issue_afs_rpc = afs_fs_fetch_status, .issue_yfs_rpc = yfs_fs_fetch_status, .success = afs_fetch_status_success, + .aborted = afs_check_for_remote_deletion, }; /* @@ -716,6 +738,9 @@ int afs_getattr(const struct path *path, struct kstat *stat, do { read_seqbegin_or_lock(&vnode->cb_lock, &seq); generic_fillattr(inode, stat); + if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) && + stat->nlink > 0) + stat->nlink -= 1; } while (need_seqretry(&vnode->cb_lock, seq)); done_seqretry(&vnode->cb_lock, seq); @@ -785,7 +810,15 @@ void afs_evict_inode(struct inode *inode) static void afs_setattr_success(struct afs_operation *op) { + struct inode *inode = &op->file[0].vnode->vfs_inode; + afs_vnode_commit_status(op, &op->file[0]); + if (op->setattr.attr->ia_valid & ATTR_SIZE) { + loff_t i_size = inode->i_size, size = op->setattr.attr->ia_size; + if (size > i_size) + pagecache_isize_extended(inode, i_size, size); + truncate_pagecache(inode, size); + } } static const struct afs_operation_ops afs_setattr_operation = { @@ -801,17 +834,31 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) { struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); + int ret; _enter("{%llx:%llu},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | - ATTR_MTIME))) { + ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | + ATTR_TOUCH))) { _leave(" = 0 [unsupported]"); return 0; } + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(vnode->vfs_inode.i_mode)) + return -EISDIR; + + ret = inode_newsize_ok(&vnode->vfs_inode, attr->ia_size); + if (ret) + return ret; + + if (attr->ia_size == i_size_read(&vnode->vfs_inode)) + attr->ia_valid &= ~ATTR_SIZE; + } + /* flush any dirty data outstanding on a regular file */ if (S_ISREG(vnode->vfs_inode.i_mode)) filemap_write_and_wait(vnode->vfs_inode.i_mapping); @@ -825,8 +872,12 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) afs_op_set_vnode(op, 0, vnode); op->setattr.attr = attr; - if (attr->ia_valid & ATTR_SIZE) + if (attr->ia_valid & ATTR_SIZE) { op->file[0].dv_delta = 1; + op->file[0].set_size = true; + } + op->ctime = attr->ia_ctime; + op->file[0].update_ctime = 1; op->ops = &afs_setattr_operation; return afs_do_sync_operation(op); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 0c9806ef2a19..573a5922c3bb 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -634,6 +634,7 @@ struct afs_vnode { #define AFS_VNODE_AUTOCELL 6 /* set if Vnode is an auto mount point */ #define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */ #define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */ +#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */ struct list_head wb_keys; /* List of keys available for writeback */ struct list_head pending_locks; /* locks waiting to be granted */ @@ -744,8 +745,11 @@ struct afs_vnode_param { afs_dataversion_t dv_before; /* Data version before the call */ unsigned int cb_break_before; /* cb_break + cb_s_break before the call */ u8 dv_delta; /* Expected change in data version */ - bool put_vnode; /* T if we have a ref on the vnode */ - bool need_io_lock; /* T if we need the I/O lock on this */ + bool put_vnode:1; /* T if we have a ref on the vnode */ + bool need_io_lock:1; /* T if we need the I/O lock on this */ + bool update_ctime:1; /* Need to update the ctime */ + bool set_size:1; /* Must update i_size */ + bool op_unlinked:1; /* True if file was unlinked by op */ }; /* @@ -766,9 +770,9 @@ struct afs_operation { struct dentry *dentry; /* Dentry to be altered */ struct dentry *dentry_2; /* Second dentry to be altered */ struct timespec64 mtime; /* Modification time to record */ + struct timespec64 ctime; /* Change time to set */ short nr_files; /* Number of entries in file[], more_files */ short error; - unsigned int abort_code; unsigned int debug_id; unsigned int cb_v_break; /* Volume break counter before op */ @@ -837,6 +841,7 @@ struct afs_operation { #define AFS_OPERATION_LOCK_1 0x0200 /* Set if have io_lock on file[1] */ #define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */ #define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */ +#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */ }; /* @@ -932,6 +937,7 @@ extern const struct address_space_operations afs_dir_aops; extern const struct dentry_operations afs_fs_dentry_operations; extern void afs_d_release(struct dentry *); +extern void afs_check_for_remote_deletion(struct afs_operation *); /* * dir_edit.c @@ -1063,6 +1069,8 @@ extern int afs_wait_for_one_fs_probe(struct afs_server *, bool); /* * inode.c */ +extern const struct afs_operation_ops afs_fetch_status_operation; + extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *); extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *); extern int afs_ilookup5_test_by_fid(struct inode *, void *); @@ -1435,7 +1443,6 @@ extern ssize_t afs_listxattr(struct dentry *, char *, size_t); /* * yfsclient.c */ -extern void yfs_fs_fetch_file_status(struct afs_operation *); extern void yfs_fs_fetch_data(struct afs_operation *); extern void yfs_fs_create_file(struct afs_operation *); extern void yfs_fs_make_dir(struct afs_operation *); @@ -1481,15 +1488,6 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode) return &vnode->vfs_inode; } -static inline void afs_check_for_remote_deletion(struct afs_operation *op, - struct afs_vnode *vnode) -{ - if (op->error == -ENOENT) { - set_bit(AFS_VNODE_DELETED, &vnode->flags); - afs_break_callback(vnode, afs_cb_break_for_deleted); - } -} - /* * Note that a dentry got changed. We need to set d_fsdata to the data version * number derived from the result of the operation. It doesn't matter if @@ -1504,6 +1502,18 @@ static inline void afs_update_dentry_version(struct afs_operation *op, (void *)(unsigned long)dir_vp->scb.status.data_version; } +/* + * Check for a conflicting operation on a directory that we just unlinked from. + * If someone managed to sneak a link or an unlink in on the file we just + * unlinked, we won't be able to trust nlink on an AFS file (but not YFS). + */ +static inline void afs_check_dir_conflict(struct afs_operation *op, + struct afs_vnode_param *dvp) +{ + if (dvp->dv_before + dvp->dv_delta != dvp->scb.status.data_version) + op->flags |= AFS_OPERATION_DIR_CONFLICT; +} + static inline int afs_io_error(struct afs_call *call, enum afs_io_error where) { trace_afs_io_error(call->debug_id, -EIO, where); diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 52b19e9c1535..5334f1bd2bca 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -83,6 +83,7 @@ int afs_abort_to_error(u32 abort_code) case UAENOLCK: return -ENOLCK; case UAENOTEMPTY: return -ENOTEMPTY; case UAELOOP: return -ELOOP; + case UAEOVERFLOW: return -EOVERFLOW; case UAENOMEDIUM: return -ENOMEDIUM; case UAEDQUOT: return -EDQUOT; diff --git a/fs/afs/write.c b/fs/afs/write.c index 768497f82aee..7437806332d9 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -194,11 +194,11 @@ int afs_write_end(struct file *file, struct address_space *mapping, i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) { - spin_lock(&vnode->wb_lock); + write_seqlock(&vnode->cb_lock); i_size = i_size_read(&vnode->vfs_inode); if (maybe_i_size > i_size) i_size_write(&vnode->vfs_inode, maybe_i_size); - spin_unlock(&vnode->wb_lock); + write_sequnlock(&vnode->cb_lock); } if (!PageUptodate(page)) { @@ -393,6 +393,7 @@ static void afs_store_data_success(struct afs_operation *op) { struct afs_vnode *vnode = op->file[0].vnode; + op->ctime = op->file[0].scb.status.mtime_client; afs_vnode_commit_status(op, &op->file[0]); if (op->error == 0) { afs_pages_written_back(vnode, op->store.first, op->store.last); @@ -491,6 +492,7 @@ static int afs_write_back_from_locked_page(struct address_space *mapping, unsigned long count, priv; unsigned n, offset, to, f, t; pgoff_t start, first, last; + loff_t i_size, end; int loop, ret; _enter(",%lx", primary_page->index); @@ -591,7 +593,12 @@ no_more: first = primary_page->index; last = first + count - 1; + end = (loff_t)last * PAGE_SIZE + to; + i_size = i_size_read(&vnode->vfs_inode); + _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); + if (end > i_size) + to = i_size & ~PAGE_MASK; ret = afs_store_data(mapping, first, last, offset, to); switch (ret) { @@ -844,6 +851,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) vmf->page->index, priv); SetPagePrivate(vmf->page); set_page_private(vmf->page, priv); + file_update_time(file); sb_end_pagefault(inode->i_sb); return VM_FAULT_LOCKED; diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 52d5af5fcd44..8c24fdc899e3 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -330,29 +330,6 @@ static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp, } /* - * Deliver a reply that's a status, callback and volsync. - */ -static int yfs_deliver_fs_status_cb_and_volsync(struct afs_call *call) -{ - struct afs_operation *op = call->op; - const __be32 *bp; - int ret; - - ret = afs_transfer_reply(call); - if (ret < 0) - return ret; - - /* unmarshall the reply once we've received all of it */ - bp = call->buffer; - xdr_decode_YFSFetchStatus(&bp, call, &op->file[0].scb); - xdr_decode_YFSCallBack(&bp, call, &op->file[0].scb); - xdr_decode_YFSVolSync(&bp, &op->volsync); - - _leave(" = 0 [done]"); - return 0; -} - -/* * Deliver reply data to operations that just return a file status and a volume * sync record. */ @@ -375,48 +352,6 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call) } /* - * YFS.FetchStatus operation type - */ -static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = { - .name = "YFS.FetchStatus(vnode)", - .op = yfs_FS_FetchStatus, - .deliver = yfs_deliver_fs_status_cb_and_volsync, - .destructor = afs_flat_call_destructor, -}; - -/* - * Fetch the status information for a file. - */ -void yfs_fs_fetch_file_status(struct afs_operation *op) -{ - struct afs_vnode_param *vp = &op->file[0]; - struct afs_call *call; - __be32 *bp; - - _enter(",%x,{%llx:%llu},,", - key_serial(op->key), vp->fid.vid, vp->fid.vnode); - - call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchStatus_vnode, - sizeof(__be32) * 2 + - sizeof(struct yfs_xdr_YFSFid), - sizeof(struct yfs_xdr_YFSFetchStatus) + - sizeof(struct yfs_xdr_YFSCallBack) + - sizeof(struct yfs_xdr_YFSVolSync)); - if (!call) - return afs_op_nomem(op); - - /* marshall the parameters */ - bp = call->request; - bp = xdr_encode_u32(bp, YFSFETCHSTATUS); - bp = xdr_encode_u32(bp, 0); /* RPC flags */ - bp = xdr_encode_YFSFid(bp, &vp->fid); - yfs_check_req(call, bp); - - trace_afs_make_fs_call(call, &vp->fid); - afs_make_op_call(op, call, GFP_NOFS); -} - -/* * Deliver reply data to an YFS.FetchData64. */ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) @@ -1605,12 +1540,36 @@ void yfs_fs_release_lock(struct afs_operation *op) } /* + * Deliver a reply to YFS.FetchStatus + */ +static int yfs_deliver_fs_fetch_status(struct afs_call *call) +{ + struct afs_operation *op = call->op; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; + const __be32 *bp; + int ret; + + ret = afs_transfer_reply(call); + if (ret < 0) + return ret; + + /* unmarshall the reply once we've received all of it */ + bp = call->buffer; + xdr_decode_YFSFetchStatus(&bp, call, &vp->scb); + xdr_decode_YFSCallBack(&bp, call, &vp->scb); + xdr_decode_YFSVolSync(&bp, &op->volsync); + + _leave(" = 0 [done]"); + return 0; +} + +/* * YFS.FetchStatus operation type */ static const struct afs_call_type yfs_RXYFSFetchStatus = { .name = "YFS.FetchStatus", .op = yfs_FS_FetchStatus, - .deliver = yfs_deliver_fs_status_cb_and_volsync, + .deliver = yfs_deliver_fs_fetch_status, .destructor = afs_flat_call_destructor, }; @@ -1619,7 +1578,7 @@ static const struct afs_call_type yfs_RXYFSFetchStatus = { */ void yfs_fs_fetch_status(struct afs_operation *op) { - struct afs_vnode_param *vp = &op->file[0]; + struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; struct afs_call *call; __be32 *bp; @@ -67,7 +67,7 @@ struct aio_ring { unsigned header_length; /* size of aio_ring */ - struct io_event io_events[0]; + struct io_event io_events[]; }; /* 128 bytes + ring size */ /* diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 4ccb3c9189d8..2e42f47a7f98 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -9,7 +9,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \ extents_status.o file.o fsmap.o fsync.o hash.o ialloc.o \ indirect.o inline.o inode.o ioctl.o mballoc.o migrate.o \ mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \ - super.o symlink.o sysfs.o xattr.o xattr_trusted.o xattr_user.o + super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \ + xattr_user.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index c654205f648d..1d82336b1cd4 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -675,6 +675,7 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, struct qstr qstr = {.name = str, .len = len }; const struct dentry *parent = READ_ONCE(dentry->d_parent); const struct inode *inode = READ_ONCE(parent->d_inode); + char strbuf[DNAME_INLINE_LEN]; if (!inode || !IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { @@ -683,6 +684,21 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, return memcmp(str, name->name, len); } + /* + * If the dentry name is stored in-line, then it may be concurrently + * modified by a rename. If this happens, the VFS will eventually retry + * the lookup, so it doesn't matter what ->d_compare() returns. + * However, it's unsafe to call utf8_strncasecmp() with an unstable + * string. Therefore, we have to copy the name into a temporary buffer. + */ + if (len <= DNAME_INLINE_LEN - 1) { + memcpy(strbuf, str, len); + strbuf[len] = 0; + qstr.name = strbuf; + /* prevent compiler from optimizing out the temporary buffer */ + barrier(); + } + return ext4_ci_compare(inode, name, &qstr, false); } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b08841f70b69..42f5060f3cdf 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -426,13 +426,16 @@ struct flex_groups { #define EXT4_VERITY_FL 0x00100000 /* Verity protected inode */ #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ /* 0x00400000 was formerly EXT4_EOFBLOCKS_FL */ + +#define EXT4_DAX_FL 0x02000000 /* Inode is DAX */ + #define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x705BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x604BC0FF /* User modifiable flags */ +#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */ /* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */ #define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \ @@ -440,14 +443,16 @@ struct flex_groups { EXT4_APPEND_FL | \ EXT4_NODUMP_FL | \ EXT4_NOATIME_FL | \ - EXT4_PROJINHERIT_FL) + EXT4_PROJINHERIT_FL | \ + EXT4_DAX_FL) /* Flags that should be inherited by new inodes from their parent. */ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\ - EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL) + EXT4_PROJINHERIT_FL | EXT4_CASEFOLD_FL |\ + EXT4_DAX_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ #define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL | EXT4_CASEFOLD_FL |\ @@ -459,6 +464,10 @@ struct flex_groups { /* The only flags that should be swapped */ #define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL) +/* Flags which are mutually exclusive to DAX */ +#define EXT4_DAX_MUT_EXCL (EXT4_VERITY_FL | EXT4_ENCRYPT_FL |\ + EXT4_JOURNAL_DATA_FL) + /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) { @@ -499,6 +508,7 @@ enum { EXT4_INODE_VERITY = 20, /* Verity protected inode */ EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ /* 22 was formerly EXT4_INODE_EOFBLOCKS */ + EXT4_INODE_DAX = 25, /* Inode is DAX */ EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */ EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */ EXT4_INODE_CASEFOLD = 30, /* Casefolded directory */ @@ -1135,9 +1145,9 @@ struct ext4_inode_info { #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ #ifdef CONFIG_FS_DAX -#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */ +#define EXT4_MOUNT_DAX_ALWAYS 0x00200 /* Direct Access */ #else -#define EXT4_MOUNT_DAX 0 +#define EXT4_MOUNT_DAX_ALWAYS 0 #endif #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ @@ -1180,6 +1190,8 @@ struct ext4_inode_info { blocks */ #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated file systems */ +#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */ +#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */ #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly specified journal checksum */ @@ -1992,6 +2004,7 @@ static inline bool ext4_has_incompat_features(struct super_block *sb) */ #define EXT4_FLAGS_RESIZING 0 #define EXT4_FLAGS_SHUTDOWN 1 +#define EXT4_FLAGS_BDEV_IS_DAX 2 static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi) { @@ -2705,7 +2718,7 @@ extern int ext4_can_truncate(struct inode *inode); extern int ext4_truncate(struct inode *); extern int ext4_break_layouts(struct inode *); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); -extern void ext4_set_inode_flags(struct inode *); +extern void ext4_set_inode_flags(struct inode *, bool init); extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7d088ff1e902..221f240eae60 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2844,7 +2844,7 @@ again: * in use to avoid freeing it when removing blocks. */ if (sbi->s_cluster_ratio > 1) { - pblk = ext4_ext_pblock(ex) + end - ee_block + 2; + pblk = ext4_ext_pblock(ex) + end - ee_block + 1; partial.pclu = EXT4_B2C(sbi, pblk); partial.state = nofree; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 54d324e80fe5..df25d38d6539 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1116,7 +1116,7 @@ got: ei->i_block_group = group; ei->i_last_alloc_group = ~0; - ext4_set_inode_flags(inode); + ext4_set_inode_flags(inode, true); if (IS_DIRSYNC(inode)) ext4_handle_sync(handle); if (insert_inode_locked(inode) < 0) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 40ec5c7ef0d3..10dd470876b3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4403,9 +4403,11 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); } -static bool ext4_should_use_dax(struct inode *inode) +static bool ext4_should_enable_dax(struct inode *inode) { - if (!test_opt(inode->i_sb, DAX)) + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + + if (test_opt2(inode->i_sb, DAX_NEVER)) return false; if (!S_ISREG(inode->i_mode)) return false; @@ -4417,14 +4419,21 @@ static bool ext4_should_use_dax(struct inode *inode) return false; if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY)) return false; - return true; + if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) + return false; + if (test_opt(inode->i_sb, DAX_ALWAYS)) + return true; + + return ext4_test_inode_flag(inode, EXT4_INODE_DAX); } -void ext4_set_inode_flags(struct inode *inode) +void ext4_set_inode_flags(struct inode *inode, bool init) { unsigned int flags = EXT4_I(inode)->i_flags; unsigned int new_fl = 0; + WARN_ON_ONCE(IS_DAX(inode) && init); + if (flags & EXT4_SYNC_FL) new_fl |= S_SYNC; if (flags & EXT4_APPEND_FL) @@ -4435,8 +4444,13 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) new_fl |= S_DIRSYNC; - if (ext4_should_use_dax(inode)) + + /* Because of the way inode_set_flags() works we must preserve S_DAX + * here if already set. */ + new_fl |= (inode->i_flags & S_DAX); + if (init && ext4_should_enable_dax(inode)) new_fl |= S_DAX; + if (flags & EXT4_ENCRYPT_FL) new_fl |= S_ENCRYPTED; if (flags & EXT4_CASEFOLD_FL) @@ -4650,7 +4664,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, * not initialized on a new filesystem. */ } ei->i_flags = le32_to_cpu(raw_inode->i_flags); - ext4_set_inode_flags(inode); + ext4_set_inode_flags(inode, true); inode->i_blocks = ext4_inode_blocks(raw_inode, ei); ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); if (ext4_has_feature_64bit(sb)) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 2162db0c747d..999cf6add39c 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -292,6 +292,38 @@ static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid, return 0; } +static void ext4_dax_dontcache(struct inode *inode, unsigned int flags) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + if (S_ISDIR(inode->i_mode)) + return; + + if (test_opt2(inode->i_sb, DAX_NEVER) || + test_opt(inode->i_sb, DAX_ALWAYS)) + return; + + if ((ei->i_flags ^ flags) & EXT4_DAX_FL) + d_mark_dontcache(inode); +} + +static bool dax_compatible(struct inode *inode, unsigned int oldflags, + unsigned int flags) +{ + if (flags & EXT4_DAX_FL) { + if ((oldflags & EXT4_DAX_MUT_EXCL) || + ext4_test_inode_state(inode, + EXT4_STATE_VERITY_IN_PROGRESS)) { + return false; + } + } + + if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL)) + return false; + + return true; +} + static int ext4_ioctl_setflags(struct inode *inode, unsigned int flags) { @@ -300,7 +332,6 @@ static int ext4_ioctl_setflags(struct inode *inode, int err = -EPERM, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags, mask, i; - unsigned int jflag; struct super_block *sb = inode->i_sb; /* Is it quota file? Do not allow user to mess with it */ @@ -309,9 +340,6 @@ static int ext4_ioctl_setflags(struct inode *inode, oldflags = ei->i_flags; - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT4_JOURNAL_DATA_FL; - err = vfs_ioc_setflags_prepare(inode, oldflags, flags); if (err) goto flags_out; @@ -320,10 +348,16 @@ static int ext4_ioctl_setflags(struct inode *inode, * The JOURNAL_DATA flag can only be changed by * the relevant capability. */ - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { + if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } + + if (!dax_compatible(inode, oldflags, flags)) { + err = -EOPNOTSUPP; + goto flags_out; + } + if ((flags ^ oldflags) & EXT4_EXTENTS_FL) migrate = 1; @@ -369,6 +403,8 @@ static int ext4_ioctl_setflags(struct inode *inode, if (err) goto flags_err; + ext4_dax_dontcache(inode, flags); + for (i = 0, mask = 1; i < 32; i++, mask <<= 1) { if (!(mask & EXT4_FL_USER_MODIFIABLE)) continue; @@ -381,7 +417,8 @@ static int ext4_ioctl_setflags(struct inode *inode, ext4_clear_inode_flag(inode, i); } - ext4_set_inode_flags(inode); + ext4_set_inode_flags(inode, false); + inode->i_ctime = current_time(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); @@ -390,17 +427,18 @@ flags_err: if (err) goto flags_out; - if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { + if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { /* * Changes to the journaling mode can cause unsafe changes to - * S_DAX if we are using the DAX mount option. + * S_DAX if the inode is DAX */ - if (test_opt(inode->i_sb, DAX)) { + if (IS_DAX(inode)) { err = -EBUSY; goto flags_out; } - err = ext4_change_inode_journal_flag(inode, jflag); + err = ext4_change_inode_journal_flag(inode, + flags & EXT4_JOURNAL_DATA_FL); if (err) goto flags_out; } @@ -527,12 +565,15 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags) xflags |= FS_XFLAG_NOATIME; if (iflags & EXT4_PROJINHERIT_FL) xflags |= FS_XFLAG_PROJINHERIT; + if (iflags & EXT4_DAX_FL) + xflags |= FS_XFLAG_DAX; return xflags; } #define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) + FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT | \ + FS_XFLAG_DAX) /* Transfer xflags flags to internal */ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) @@ -551,6 +592,8 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags) iflags |= EXT4_NOATIME_FL; if (xflags & FS_XFLAG_PROJINHERIT) iflags |= EXT4_PROJINHERIT_FL; + if (xflags & FS_XFLAG_DAX) + iflags |= EXT4_DAX_FL; return iflags; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a9083113a8c0..c0a331e2feb0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4708,7 +4708,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } ac->ac_op = EXT4_MB_HISTORY_PREALLOC; - seq = *this_cpu_ptr(&discard_pa_seq); + seq = this_cpu_read(discard_pa_seq); if (!ext4_mb_use_preallocated(ac)) { ac->ac_op = EXT4_MB_HISTORY_ALLOC; ext4_mb_normalize_request(ac, ar); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c668f6b42374..330957ed1f05 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -522,9 +522,6 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= SB_RDONLY; } else if (test_opt(sb, ERRORS_PANIC)) { - if (EXT4_SB(sb)->s_journal && - !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) - return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); } @@ -725,23 +722,20 @@ void __ext4_abort(struct super_block *sb, const char *function, va_end(args); if (sb_rdonly(sb) == 0) { - ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; + if (EXT4_SB(sb)->s_journal) + jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); + + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* * Make sure updated value of ->s_mount_flags will be visible * before ->s_flags update */ smp_wmb(); sb->s_flags |= SB_RDONLY; - if (EXT4_SB(sb)->s_journal) - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } - if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) { - if (EXT4_SB(sb)->s_journal && - !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) - return; + if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) panic("EXT4-fs panic from previous error\n"); - } } void __ext4_msg(struct super_block *sb, @@ -1324,6 +1318,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) return -EINVAL; + if (ext4_test_inode_flag(inode, EXT4_INODE_DAX)) + return -EOPNOTSUPP; + res = ext4_convert_inline_data(inode); if (res) return res; @@ -1349,7 +1346,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, * Update inode->i_flags - S_ENCRYPTED will be enabled, * S_DAX may be disabled */ - ext4_set_inode_flags(inode); + ext4_set_inode_flags(inode, false); } return res; } @@ -1376,7 +1373,7 @@ retry: * Update inode->i_flags - S_ENCRYPTED will be enabled, * S_DAX may be disabled */ - ext4_set_inode_flags(inode); + ext4_set_inode_flags(inode, false); res = ext4_mark_inode_dirty(handle, inode); if (res) EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); @@ -1514,7 +1511,8 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, - Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, + Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, + Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize, @@ -1581,6 +1579,9 @@ static const match_table_t tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_i_version, "i_version"}, {Opt_dax, "dax"}, + {Opt_dax_always, "dax=always"}, + {Opt_dax_inode, "dax=inode"}, + {Opt_dax_never, "dax=never"}, {Opt_stripe, "stripe=%u"}, {Opt_delalloc, "delalloc"}, {Opt_warn_on_error, "warn_on_error"}, @@ -1729,6 +1730,7 @@ static int clear_qf_name(struct super_block *sb, int qtype) #define MOPT_NO_EXT3 0x0200 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) #define MOPT_STRING 0x0400 +#define MOPT_SKIP 0x0800 static const struct mount_opts { int token; @@ -1778,7 +1780,13 @@ static const struct mount_opts { {Opt_min_batch_time, 0, MOPT_GTE0}, {Opt_inode_readahead_blks, 0, MOPT_GTE0}, {Opt_init_itable, 0, MOPT_GTE0}, - {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET}, + {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP}, + {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, + {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, + {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP}, {Opt_stripe, 0, MOPT_GTE0}, {Opt_resuid, 0, MOPT_GTE0}, {Opt_resgid, 0, MOPT_GTE0}, @@ -2123,13 +2131,56 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } sbi->s_jquota_fmt = m->mount_opt; #endif - } else if (token == Opt_dax) { + } else if (token == Opt_dax || token == Opt_dax_always || + token == Opt_dax_inode || token == Opt_dax_never) { #ifdef CONFIG_FS_DAX - ext4_msg(sb, KERN_WARNING, - "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - sbi->s_mount_opt |= m->mount_opt; + switch (token) { + case Opt_dax: + case Opt_dax_always: + if (is_remount && + (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || + (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) { + fail_dax_change_remount: + ext4_msg(sb, KERN_ERR, "can't change " + "dax mount option while remounting"); + return -1; + } + if (is_remount && + (test_opt(sb, DATA_FLAGS) == + EXT4_MOUNT_JOURNAL_DATA)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dax"); + return -1; + } + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS; + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER; + break; + case Opt_dax_never: + if (is_remount && + (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || + (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) + goto fail_dax_change_remount; + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER; + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; + break; + case Opt_dax_inode: + if (is_remount && + ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) || + (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) || + !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) + goto fail_dax_change_remount; + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; + sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER; + /* Strictly for printing options */ + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE; + break; + } #else ext4_msg(sb, KERN_INFO, "dax option not supported"); + sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER; + sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS; return -1; #endif } else if (token == Opt_data_err_abort) { @@ -2293,7 +2344,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, for (m = ext4_mount_opts; m->token != Opt_err; m++) { int want_set = m->flags & MOPT_SET; if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || - (m->flags & MOPT_CLEAR_ERR)) + (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP) continue; if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) continue; /* skip if same as the default */ @@ -2353,6 +2404,17 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, fscrypt_show_test_dummy_encryption(seq, sep, sb); + if (test_opt(sb, DAX_ALWAYS)) { + if (IS_EXT2_SB(sb)) + SEQ_OPTS_PUTS("dax"); + else + SEQ_OPTS_PUTS("dax=always"); + } else if (test_opt2(sb, DAX_NEVER)) { + SEQ_OPTS_PUTS("dax=never"); + } else if (test_opt2(sb, DAX_INODE)) { + SEQ_OPTS_PUTS("dax=inode"); + } + ext4_show_quota_options(seq, sb); return 0; } @@ -2383,6 +2445,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ext4_msg(sb, KERN_ERR, "revision level too high, " "forcing read-only mode"); err = -EROFS; + goto done; } if (read_only) goto done; @@ -4017,7 +4080,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "both data=journal and delalloc"); goto failed_mount; } - if (test_opt(sb, DAX)) { + if (test_opt(sb, DAX_ALWAYS)) { ext4_msg(sb, KERN_ERR, "can't mount with " "both data=journal and dax"); goto failed_mount; @@ -4127,13 +4190,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { + if (bdev_dax_supported(sb->s_bdev, blocksize)) + set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + + if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (ext4_has_feature_inline_data(sb)) { ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" " that may contain inline data"); goto failed_mount; } - if (!bdev_dax_supported(sb->s_bdev, blocksize)) { + if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) { ext4_msg(sb, KERN_ERR, "DAX unsupported by block device."); goto failed_mount; @@ -5447,12 +5513,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) err = -EINVAL; goto restore_opts; } - if (test_opt(sb, DAX)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and dax"); - err = -EINVAL; - goto restore_opts; - } } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) { if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ext4_msg(sb, KERN_ERR, "can't mount with " @@ -5468,12 +5528,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { - ext4_msg(sb, KERN_WARNING, "warning: refusing change of " - "dax flag with busy inodes while remounting"); - sbi->s_mount_opt ^= EXT4_MOUNT_DAX; - } - if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index dec1244dd062..bbd5e7e0632b 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -113,6 +113,9 @@ static int ext4_begin_enable_verity(struct file *filp) handle_t *handle; int err; + if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX)) + return -EINVAL; + if (ext4_verity_in_progress(inode)) return -EBUSY; @@ -241,7 +244,7 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc, if (err) goto out_stop; ext4_set_inode_flag(inode, EXT4_INODE_VERITY); - ext4_set_inode_flags(inode); + ext4_set_inode_flags(inode, false); err = ext4_mark_iloc_dirty(handle, inode, &iloc); } out_stop: diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 9b29a40738ac..7d2f6576d954 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -93,6 +93,7 @@ static const struct xattr_handler * const ext4_xattr_handler_map[] = { #ifdef CONFIG_EXT4_FS_SECURITY [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, #endif + [EXT4_XATTR_INDEX_HURD] = &ext4_xattr_hurd_handler, }; const struct xattr_handler *ext4_xattr_handlers[] = { @@ -105,6 +106,7 @@ const struct xattr_handler *ext4_xattr_handlers[] = { #ifdef CONFIG_EXT4_FS_SECURITY &ext4_xattr_security_handler, #endif + &ext4_xattr_hurd_handler, NULL }; diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index ffe21ac77f78..730b91fa0dd7 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -124,6 +124,7 @@ struct ext4_xattr_inode_array { extern const struct xattr_handler ext4_xattr_user_handler; extern const struct xattr_handler ext4_xattr_trusted_handler; extern const struct xattr_handler ext4_xattr_security_handler; +extern const struct xattr_handler ext4_xattr_hurd_handler; #define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c" diff --git a/fs/ext4/xattr_hurd.c b/fs/ext4/xattr_hurd.c new file mode 100644 index 000000000000..8cfa74a56361 --- /dev/null +++ b/fs/ext4/xattr_hurd.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/fs/ext4/xattr_hurd.c + * Handler for extended gnu attributes for the Hurd. + * + * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> + * Copyright (C) 2020 by Jan (janneke) Nieuwenhuizen, <janneke@gnu.org> + */ + +#include <linux/init.h> +#include <linux/string.h> +#include "ext4.h" +#include "xattr.h" + +static bool +ext4_xattr_hurd_list(struct dentry *dentry) +{ + return test_opt(dentry->d_sb, XATTR_USER); +} + +static int +ext4_xattr_hurd_get(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_HURD, + name, buffer, size); +} + +static int +ext4_xattr_hurd_set(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + if (!test_opt(inode->i_sb, XATTR_USER)) + return -EOPNOTSUPP; + + return ext4_xattr_set(inode, EXT4_XATTR_INDEX_HURD, + name, value, size, flags); +} + +const struct xattr_handler ext4_xattr_hurd_handler = { + .prefix = XATTR_HURD_PREFIX, + .list = ext4_xattr_hurd_list, + .get = ext4_xattr_hurd_get, + .set = ext4_xattr_hurd_set, +}; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a49d0e670ddf..e4944436e733 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1140,6 +1140,7 @@ static journal_t *journal_init_common(struct block_device *bdev, init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); init_waitqueue_head(&journal->j_wait_reserved); + mutex_init(&journal->j_abort_mutex); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); @@ -1402,7 +1403,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) printk(KERN_ERR "JBD2: Error %d detected when updating " "journal superblock for %s.\n", ret, journal->j_devname); - jbd2_journal_abort(journal, ret); + if (!is_journal_aborted(journal)) + jbd2_journal_abort(journal, ret); } return ret; @@ -2154,6 +2156,13 @@ void jbd2_journal_abort(journal_t *journal, int errno) transaction_t *transaction; /* + * Lock the aborting procedure until everything is done, this avoid + * races between filesystem's error handling flow (e.g. ext4_abort()), + * ensure panic after the error info is written into journal's + * superblock. + */ + mutex_lock(&journal->j_abort_mutex); + /* * ESHUTDOWN always takes precedence because a file system check * caused by any other journal abort error is not required after * a shutdown triggered. @@ -2167,6 +2176,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) journal->j_errno = errno; jbd2_journal_update_sb_errno(journal); } + mutex_unlock(&journal->j_abort_mutex); return; } @@ -2188,10 +2198,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) * layer could realise that a filesystem check is needed. */ jbd2_journal_update_sb_errno(journal); - - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); + mutex_unlock(&journal->j_abort_mutex); } /** diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index 0637271f3770..8ff4d1a1e774 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -259,7 +259,7 @@ struct jffs2_full_dirent uint32_t ino; /* == zero for unlink */ unsigned int nhash; unsigned char type; - unsigned char name[0]; + unsigned char name[]; }; /* diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h index 60207a2ae952..e4131cb1f1d4 100644 --- a/fs/jffs2/summary.h +++ b/fs/jffs2/summary.h @@ -61,7 +61,7 @@ struct jffs2_sum_dirent_flash jint32_t ino; /* == zero for unlink */ uint8_t nsize; /* dirent name size */ uint8_t type; /* dirent type */ - uint8_t name[0]; /* dirent name */ + uint8_t name[]; /* dirent name */ } __attribute__((packed)); struct jffs2_sum_xattr_flash @@ -117,7 +117,7 @@ struct jffs2_sum_dirent_mem jint32_t ino; /* == zero for unlink */ uint8_t nsize; /* dirent name size */ uint8_t type; /* dirent type */ - uint8_t name[0]; /* dirent name */ + uint8_t name[]; /* dirent name */ } __attribute__((packed)); struct jffs2_sum_xattr_mem diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 7187bd1a30ea..8d64edb80ebf 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -262,7 +262,7 @@ struct squashfs_dir_index { __le32 index; __le32 start_block; __le32 size; - unsigned char name[0]; + unsigned char name[]; }; struct squashfs_base_inode { @@ -327,7 +327,7 @@ struct squashfs_symlink_inode { __le32 inode_number; __le32 nlink; __le32 symlink_size; - char symlink[0]; + char symlink[]; }; struct squashfs_reg_inode { @@ -341,7 +341,7 @@ struct squashfs_reg_inode { __le32 fragment; __le32 offset; __le32 file_size; - __le16 block_list[0]; + __le16 block_list[]; }; struct squashfs_lreg_inode { @@ -358,7 +358,7 @@ struct squashfs_lreg_inode { __le32 fragment; __le32 offset; __le32 xattr; - __le16 block_list[0]; + __le16 block_list[]; }; struct squashfs_dir_inode { @@ -389,7 +389,7 @@ struct squashfs_ldir_inode { __le16 i_count; __le16 offset; __le32 xattr; - struct squashfs_dir_index index[0]; + struct squashfs_dir_index index[]; }; union squashfs_inode { @@ -410,7 +410,7 @@ struct squashfs_dir_entry { __le16 inode_number; __le16 type; __le16 size; - char name[0]; + char name[]; }; struct squashfs_dir_header { @@ -428,12 +428,12 @@ struct squashfs_fragment_entry { struct squashfs_xattr_entry { __le16 type; __le16 size; - char data[0]; + char data[]; }; struct squashfs_xattr_val { __le32 vsize; - char value[0]; + char value[]; }; struct squashfs_xattr_id { diff --git a/include/drm/drm_displayid.h b/include/drm/drm_displayid.h index 27bdd273fc4e..77941efb5426 100644 --- a/include/drm/drm_displayid.h +++ b/include/drm/drm_displayid.h @@ -89,7 +89,7 @@ struct displayid_detailed_timings_1 { struct displayid_detailed_timing_block { struct displayid_block base; - struct displayid_detailed_timings_1 timings[0]; + struct displayid_detailed_timings_1 timings[]; }; #define for_each_displayid_db(displayid, block, idx, length) \ diff --git a/include/keys/encrypted-type.h b/include/keys/encrypted-type.h index 9e9ccb20d586..38afb341c3f2 100644 --- a/include/keys/encrypted-type.h +++ b/include/keys/encrypted-type.h @@ -27,7 +27,7 @@ struct encrypted_key_payload { unsigned short payload_datalen; /* payload data length */ unsigned short encrypted_key_format; /* encrypted key format */ u8 *decrypted_data; /* decrypted data */ - u8 payload_data[0]; /* payload data + datablob + hmac */ + u8 payload_data[]; /* payload data + datablob + hmac */ }; extern struct key_type key_type_encrypted; diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h index a183278c3e9e..2b0b15a71228 100644 --- a/include/keys/rxrpc-type.h +++ b/include/keys/rxrpc-type.h @@ -28,7 +28,7 @@ struct rxkad_key { u8 primary_flag; /* T if key for primary cell for this user */ u16 ticket_len; /* length of ticket[] */ u8 session_key[8]; /* DES session key */ - u8 ticket[0]; /* the encrypted ticket */ + u8 ticket[]; /* the encrypted ticket */ }; /* @@ -100,7 +100,7 @@ struct rxrpc_key_data_v1 { u32 expiry; /* time_t */ u32 kvno; u8 session_key[8]; - u8 ticket[0]; + u8 ticket[]; }; /* diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..1e1501ee53ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -92,6 +92,10 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + + /* BTF name and id of struct allocated by map_alloc */ + const char * const map_btf_name; + int *map_btf_id; }; struct bpf_map_memory { @@ -1109,6 +1113,11 @@ static inline bool bpf_allow_ptr_leaks(void) return perfmon_capable(); } +static inline bool bpf_allow_ptr_to_map_access(void) +{ + return perfmon_capable(); +} + static inline bool bpf_bypass_spec_v1(void) { return perfmon_capable(); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index ca08db4ffb5f..53c7bd568c5d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -379,6 +379,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool allow_ptr_to_map_access; bool bpf_capable; bool bypass_spec_v1; bool bypass_spec_v4; diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index a954def26c0d..900b9f4e0605 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -34,7 +34,7 @@ struct can_skb_priv { int ifindex; int skbcnt; - struct can_frame cf[0]; + struct can_frame cf[]; }; static inline struct can_skb_priv *can_skb_prv(struct sk_buff *skb) diff --git a/include/linux/cb710.h b/include/linux/cb710.h index 60de3fedd3a7..405657a9a0d5 100644 --- a/include/linux/cb710.h +++ b/include/linux/cb710.h @@ -36,7 +36,7 @@ struct cb710_chip { unsigned slot_mask; unsigned slots; spinlock_t irq_lock; - struct cb710_slot slot[0]; + struct cb710_slot slot[]; }; /* NOTE: cb710_chip.slots is modified only during device init/exit and diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e1c03339918f..6283917edd90 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -153,7 +153,7 @@ struct dma_interleaved_template { bool dst_sgl; size_t numf; size_t frame_size; - struct data_chunk sgl[0]; + struct data_chunk sgl[]; }; /** @@ -535,7 +535,7 @@ struct dmaengine_unmap_data { struct device *dev; struct kref kref; size_t len; - dma_addr_t addr[0]; + dma_addr_t addr[]; }; struct dma_async_tx_descriptor; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index ce0b5fbf239d..3f0b19dcfae7 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -46,7 +46,7 @@ struct fscache_cache_tag { unsigned long flags; #define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ atomic_t usage; - char name[0]; /* tag name */ + char name[]; /* tag name */ }; /* diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f613d8529863..d56128df2aff 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -766,6 +766,11 @@ struct journal_s int j_errno; /** + * @j_abort_mutex: Lock the whole aborting procedure. + */ + struct mutex j_abort_mutex; + + /** * @j_sb_buffer: The first part of the superblock buffer. */ struct buffer_head *j_sb_buffer; @@ -1247,7 +1252,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ -#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1776eb2e43a4..ea67910ae6b7 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -208,7 +208,7 @@ struct crash_mem_range { struct crash_mem { unsigned int max_nr_ranges; unsigned int nr_ranges; - struct crash_mem_range ranges[0]; + struct crash_mem_range ranges[]; }; extern int crash_exclude_mem_range(struct crash_mem *mem, diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 594265bfd390..e210af75ee38 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -161,7 +161,7 @@ struct kretprobe_instance { kprobe_opcode_t *ret_addr; struct task_struct *task; void *fp; - char data[0]; + char data[]; }; struct kretprobe_blackpoint { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 62ec926c78a0..d564855243d8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -409,7 +409,7 @@ struct kvm_irq_routing_table { * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ - struct hlist_head map[0]; + struct hlist_head map[]; }; #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index af832852e620..8bf5e59a7859 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -609,7 +609,7 @@ struct ata_host { struct task_struct *eh_owner; struct ata_port *simplex_claimed; /* channel owning the DMA */ - struct ata_port *ports[0]; + struct ata_port *ports[]; }; struct ata_queued_cmd { diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 8ccd82105de8..76731230bbc5 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -221,7 +221,7 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - __u8 payload[0]; + __u8 payload[]; }; struct sctp_data_chunk { @@ -269,7 +269,7 @@ struct sctp_inithdr { __be16 num_outbound_streams; __be16 num_inbound_streams; __be32 initial_tsn; - __u8 params[0]; + __u8 params[]; }; struct sctp_init_chunk { @@ -299,13 +299,13 @@ struct sctp_cookie_preserve_param { /* Section 3.3.2.1 Host Name Address (11) */ struct sctp_hostname_param { struct sctp_paramhdr param_hdr; - uint8_t hostname[0]; + uint8_t hostname[]; }; /* Section 3.3.2.1 Supported Address Types (12) */ struct sctp_supported_addrs_param { struct sctp_paramhdr param_hdr; - __be16 types[0]; + __be16 types[]; }; /* ADDIP Section 3.2.6 Adaptation Layer Indication */ @@ -317,25 +317,25 @@ struct sctp_adaptation_ind_param { /* ADDIP Section 4.2.7 Supported Extensions Parameter */ struct sctp_supported_ext_param { struct sctp_paramhdr param_hdr; - __u8 chunks[0]; + __u8 chunks[]; }; /* AUTH Section 3.1 Random */ struct sctp_random_param { struct sctp_paramhdr param_hdr; - __u8 random_val[0]; + __u8 random_val[]; }; /* AUTH Section 3.2 Chunk List */ struct sctp_chunks_param { struct sctp_paramhdr param_hdr; - __u8 chunks[0]; + __u8 chunks[]; }; /* AUTH Section 3.3 HMAC Algorithm */ struct sctp_hmac_algo_param { struct sctp_paramhdr param_hdr; - __be16 hmac_ids[0]; + __be16 hmac_ids[]; }; /* RFC 2960. Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2): @@ -350,7 +350,7 @@ struct sctp_initack_chunk { /* Section 3.3.3.1 State Cookie (7) */ struct sctp_cookie_param { struct sctp_paramhdr p; - __u8 body[0]; + __u8 body[]; }; /* Section 3.3.3.1 Unrecognized Parameters (8) */ @@ -384,7 +384,7 @@ struct sctp_sackhdr { __be32 a_rwnd; __be16 num_gap_ack_blocks; __be16 num_dup_tsns; - union sctp_sack_variable variable[0]; + union sctp_sack_variable variable[]; }; struct sctp_sack_chunk { @@ -436,7 +436,7 @@ struct sctp_shutdown_chunk { struct sctp_errhdr { __be16 cause; __be16 length; - __u8 variable[0]; + __u8 variable[]; }; struct sctp_operr_chunk { @@ -594,7 +594,7 @@ struct sctp_fwdtsn_skip { struct sctp_fwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_fwdtsn_skip skip[0]; + struct sctp_fwdtsn_skip skip[]; }; struct sctp_fwdtsn_chunk { @@ -611,7 +611,7 @@ struct sctp_ifwdtsn_skip { struct sctp_ifwdtsn_hdr { __be32 new_cum_tsn; - struct sctp_ifwdtsn_skip skip[0]; + struct sctp_ifwdtsn_skip skip[]; }; struct sctp_ifwdtsn_chunk { @@ -658,7 +658,7 @@ struct sctp_addip_param { struct sctp_addiphdr { __be32 serial; - __u8 params[0]; + __u8 params[]; }; struct sctp_addip_chunk { @@ -718,7 +718,7 @@ struct sctp_addip_chunk { struct sctp_authhdr { __be16 shkey_id; __be16 hmac_id; - __u8 hmac[0]; + __u8 hmac[]; }; struct sctp_auth_chunk { @@ -733,7 +733,7 @@ struct sctp_infox { struct sctp_reconf_chunk { struct sctp_chunkhdr chunk_hdr; - __u8 params[0]; + __u8 params[]; }; struct sctp_strreset_outreq { @@ -741,13 +741,13 @@ struct sctp_strreset_outreq { __be32 request_seq; __be32 response_seq; __be32 send_reset_at_tsn; - __be16 list_of_streams[0]; + __be16 list_of_streams[]; }; struct sctp_strreset_inreq { struct sctp_paramhdr param_hdr; __be32 request_seq; - __be16 list_of_streams[0]; + __be16 list_of_streams[]; }; struct sctp_strreset_tsnreq { diff --git a/include/linux/tifm.h b/include/linux/tifm.h index 299cbb8c63bb..44073d06710f 100644 --- a/include/linux/tifm.h +++ b/include/linux/tifm.h @@ -124,7 +124,7 @@ struct tifm_adapter { int (*has_ms_pif)(struct tifm_adapter *fm, struct tifm_dev *sock); - struct tifm_dev *sockets[0]; + struct tifm_dev *sockets[]; }; struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets, diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d7338bfd7b0f..16e8b2f8d006 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -161,10 +161,51 @@ struct nf_flow_route { struct flow_offload *flow_offload_alloc(struct nf_conn *ct); void flow_offload_free(struct flow_offload *flow); -int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv); -void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv); +static inline int +nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + int err = 0; + + down_write(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + err = -EEXIST; + goto unlock; + } + + block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); + if (IS_ERR(block_cb)) { + err = PTR_ERR(block_cb); + goto unlock; + } + + list_add_tail(&block_cb->list, &block->cb_list); + +unlock: + up_write(&flow_table->flow_block_lock); + return err; +} + +static inline void +nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, + flow_setup_cb_t *cb, void *cb_priv) +{ + struct flow_block *block = &flow_table->flow_block; + struct flow_block_cb *block_cb; + + down_write(&flow_table->flow_block_lock); + block_cb = flow_block_cb_lookup(block, cb, cb_priv); + if (block_cb) { + list_del(&block_cb->list); + flow_block_cb_free(block_cb); + } else { + WARN_ON(true); + } + up_write(&flow_table->flow_block_lock); +} int flow_offload_route_init(struct flow_offload *flow, const struct nf_flow_route *route); diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 79654bcb9a29..8250d6f0a462 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -66,7 +66,16 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a) #endif /* CONFIG_NF_CONNTRACK */ #if IS_ENABLED(CONFIG_NET_ACT_CT) -void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie); +static inline void +tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) +{ + enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK; + struct nf_conn *ct; + + ct = (struct nf_conn *)(cookie & NFCT_PTRMASK); + nf_conntrack_get(&ct->ct_general); + nf_ct_set(skb, ct, ctinfo); +} #else static inline void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) { } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 974a71342aea..9d3923e6b860 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -653,7 +653,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +671,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,7 +695,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) @@ -775,7 +775,7 @@ union bpf_attr { * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +792,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +849,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -880,7 +880,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -916,7 +916,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +953,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +969,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +981,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +1032,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1098,7 +1098,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1145,7 +1145,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +1190,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +1207,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +1276,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +1294,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +1304,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +1331,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +1358,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1389,7 +1389,7 @@ union bpf_attr { * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +1408,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1420,7 +1420,7 @@ union bpf_attr { * * 1, if the *skb* task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,7 +1444,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes @@ -1500,7 +1500,7 @@ union bpf_attr { * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +1532,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +1547,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1595,14 +1595,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1630,7 +1630,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1676,7 +1676,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1697,7 +1697,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +1708,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +1727,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +1756,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,7 +1806,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in @@ -1817,7 +1817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1842,7 +1842,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +1867,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +1911,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +1925,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +1959,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +1977,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +2008,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +2026,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +2040,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +2056,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2089,7 +2089,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +2111,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2142,7 +2142,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +2161,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,7 +2175,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. @@ -2189,7 +2189,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +2226,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +2241,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +2257,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +2286,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +2305,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2370,7 +2370,7 @@ union bpf_attr { * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. @@ -2471,7 +2471,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(struct bpf_sock *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +2479,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +2489,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +2517,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +2529,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +2543,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +2591,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +2614,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,7 +2651,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2666,7 +2666,7 @@ union bpf_attr { * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +2682,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +2701,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +2718,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +2735,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +2759,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2810,7 +2810,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return @@ -2818,7 +2818,7 @@ union bpf_attr { * * **-ENOENT** if the bpf-local-storage cannot be found. * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2859,7 +2859,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +2883,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2941,7 +2941,7 @@ union bpf_attr { * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +2949,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +2976,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +2995,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +3007,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,7 +3062,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, @@ -3097,7 +3097,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3126,7 +3126,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3221,7 +3221,7 @@ union bpf_attr { * Return * Requested value, or 0, if flags are not recognized. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) + * long bpf_csum_level(struct sk_buff *skb, u64 level) * Description * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 379a612f8f1d..f44eb0a04afd 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -262,6 +262,7 @@ struct fsxattr { #define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_DAX_FL 0x02000000 /* Inode is DAX */ #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index c1395b5bd432..9463db2dfa9d 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -7,6 +7,7 @@ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> + Copyright (c) 2020 Jan (janneke) Nieuwenhuizen <janneke@gnu.org> */ #include <linux/libc-compat.h> @@ -31,6 +32,9 @@ #define XATTR_BTRFS_PREFIX "btrfs." #define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1) +#define XATTR_HURD_PREFIX "gnu." +#define XATTR_HURD_PREFIX_LEN (sizeof(XATTR_HURD_PREFIX) - 1) + #define XATTR_SECURITY_PREFIX "security." #define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 11584618e861..ec5cd11032aa 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -494,6 +494,7 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) vma->vm_pgoff + pgoff); } +static int array_map_btf_id; const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -510,8 +511,11 @@ const struct bpf_map_ops array_map_ops = { .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, + .map_btf_name = "bpf_array", + .map_btf_id = &array_map_btf_id, }; +static int percpu_array_map_btf_id; const struct bpf_map_ops percpu_array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -522,6 +526,8 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_delete_elem = array_map_delete_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &percpu_array_map_btf_id, }; static int fd_array_map_alloc_check(union bpf_attr *attr) @@ -868,6 +874,7 @@ static void prog_array_map_free(struct bpf_map *map) fd_array_map_free(map); } +static int prog_array_map_btf_id; const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = prog_array_map_alloc, @@ -883,6 +890,8 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, + .map_btf_name = "bpf_array", + .map_btf_id = &prog_array_map_btf_id, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, @@ -961,6 +970,7 @@ static void perf_event_fd_array_release(struct bpf_map *map, rcu_read_unlock(); } +static int perf_event_array_map_btf_id; const struct bpf_map_ops perf_event_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -972,6 +982,8 @@ const struct bpf_map_ops perf_event_array_map_ops = { .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &perf_event_array_map_btf_id, }; #ifdef CONFIG_CGROUPS @@ -994,6 +1006,7 @@ static void cgroup_fd_array_free(struct bpf_map *map) fd_array_map_free(map); } +static int cgroup_array_map_btf_id; const struct bpf_map_ops cgroup_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -1004,6 +1017,8 @@ const struct bpf_map_ops cgroup_array_map_ops = { .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &cgroup_array_map_btf_id, }; #endif @@ -1077,6 +1092,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } +static int array_of_maps_map_btf_id; const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, @@ -1089,4 +1105,6 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &array_of_maps_map_btf_id, }; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index c6b0decaa46a..969c5d47f81f 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -611,6 +611,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) return map; } +static int bpf_struct_ops_map_btf_id; const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_alloc_check = bpf_struct_ops_map_alloc_check, .map_alloc = bpf_struct_ops_map_alloc, @@ -620,6 +621,8 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = { .map_delete_elem = bpf_struct_ops_map_delete_elem, .map_update_elem = bpf_struct_ops_map_update_elem, .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, + .map_btf_name = "bpf_struct_ops_map", + .map_btf_id = &bpf_struct_ops_map_btf_id, }; /* "const void *" because some subsystem is diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 58c9af1d4808..e377d1981730 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3571,6 +3571,41 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, return ctx_type; } +static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) +#define BPF_LINK_TYPE(_id, _name) +#define BPF_MAP_TYPE(_id, _ops) \ + [_id] = &_ops, +#include <linux/bpf_types.h> +#undef BPF_PROG_TYPE +#undef BPF_LINK_TYPE +#undef BPF_MAP_TYPE +}; + +static int btf_vmlinux_map_ids_init(const struct btf *btf, + struct bpf_verifier_log *log) +{ + const struct bpf_map_ops *ops; + int i, btf_id; + + for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) { + ops = btf_vmlinux_map_ops[i]; + if (!ops || (!ops->map_btf_name && !ops->map_btf_id)) + continue; + if (!ops->map_btf_name || !ops->map_btf_id) { + bpf_log(log, "map type %d is misconfigured\n", i); + return -EINVAL; + } + btf_id = btf_find_by_name_kind(btf, ops->map_btf_name, + BTF_KIND_STRUCT); + if (btf_id < 0) + return btf_id; + *ops->map_btf_id = btf_id; + } + + return 0; +} + static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *t, @@ -3591,7 +3626,7 @@ struct btf *btf_parse_vmlinux(void) struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; struct btf *btf = NULL; - int err, i; + int err, btf_id; env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) @@ -3625,24 +3660,18 @@ struct btf *btf_parse_vmlinux(void) goto errout; /* find struct bpf_ctx_convert for type checking later */ - for (i = 1; i <= btf->nr_types; i++) { - const struct btf_type *t; - const char *tname; - - t = btf_type_by_id(btf, i); - if (!__btf_type_is_struct(t)) - continue; - tname = __btf_name_by_offset(btf, t->name_off); - if (!strcmp(tname, "bpf_ctx_convert")) { - /* btf_parse_vmlinux() runs under bpf_verifier_lock */ - bpf_ctx_convert.t = t; - break; - } - } - if (i > btf->nr_types) { - err = -ENOENT; + btf_id = btf_find_by_name_kind(btf, "bpf_ctx_convert", BTF_KIND_STRUCT); + if (btf_id < 0) { + err = btf_id; goto errout; } + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ + bpf_ctx_convert.t = btf_type_by_id(btf, btf_id); + + /* find bpf map structs for map_ptr access checking */ + err = btf_vmlinux_map_ids_init(btf, log); + if (err < 0) + goto errout; bpf_struct_ops_init(btf, log); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 27595fc6da56..bd8658055c16 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -543,6 +543,7 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } +static int cpu_map_btf_id; const struct bpf_map_ops cpu_map_ops = { .map_alloc = cpu_map_alloc, .map_free = cpu_map_free, @@ -551,6 +552,8 @@ const struct bpf_map_ops cpu_map_ops = { .map_lookup_elem = cpu_map_lookup_elem, .map_get_next_key = cpu_map_get_next_key, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_cpu_map", + .map_btf_id = &cpu_map_btf_id, }; static int bq_flush_to_queue(struct xdp_bulk_queue *bq) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 0cbb72cdaf63..58acc46861ef 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -747,6 +747,7 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, map, key, value, map_flags); } +static int dev_map_btf_id; const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free, @@ -755,8 +756,11 @@ const struct bpf_map_ops dev_map_ops = { .map_update_elem = dev_map_update_elem, .map_delete_elem = dev_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_dtab", + .map_btf_id = &dev_map_btf_id, }; +static int dev_map_hash_map_btf_id; const struct bpf_map_ops dev_map_hash_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free, @@ -765,6 +769,8 @@ const struct bpf_map_ops dev_map_hash_ops = { .map_update_elem = dev_map_hash_update_elem, .map_delete_elem = dev_map_hash_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_dtab", + .map_btf_id = &dev_map_hash_map_btf_id, }; static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b4b288a3c3c9..acd06081d81d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1614,6 +1614,7 @@ htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, true, false); } +static int htab_map_btf_id; const struct bpf_map_ops htab_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1625,8 +1626,11 @@ const struct bpf_map_ops htab_map_ops = { .map_gen_lookup = htab_map_gen_lookup, .map_seq_show_elem = htab_map_seq_show_elem, BATCH_OPS(htab), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_map_btf_id, }; +static int htab_lru_map_btf_id; const struct bpf_map_ops htab_lru_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1639,6 +1643,8 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_gen_lookup = htab_lru_map_gen_lookup, .map_seq_show_elem = htab_map_seq_show_elem, BATCH_OPS(htab_lru), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_lru_map_btf_id, }; /* Called from eBPF program */ @@ -1743,6 +1749,7 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } +static int htab_percpu_map_btf_id; const struct bpf_map_ops htab_percpu_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1753,8 +1760,11 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_delete_elem = htab_map_delete_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, BATCH_OPS(htab_percpu), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_percpu_map_btf_id, }; +static int htab_lru_percpu_map_btf_id; const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_alloc_check = htab_map_alloc_check, .map_alloc = htab_map_alloc, @@ -1765,6 +1775,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_delete_elem = htab_lru_map_delete_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, BATCH_OPS(htab_lru_percpu), + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_lru_percpu_map_btf_id, }; static int fd_htab_map_alloc_check(union bpf_attr *attr) @@ -1887,6 +1899,7 @@ static void htab_of_map_free(struct bpf_map *map) fd_htab_map_free(map); } +static int htab_of_maps_map_btf_id; const struct bpf_map_ops htab_of_maps_map_ops = { .map_alloc_check = fd_htab_map_alloc_check, .map_alloc = htab_of_map_alloc, @@ -1899,4 +1912,6 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = htab_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_htab", + .map_btf_id = &htab_of_maps_map_btf_id, }; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 33d01866bcc2..51bd5a8cb01b 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -409,6 +409,7 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key, rcu_read_unlock(); } +static int cgroup_storage_map_btf_id; const struct bpf_map_ops cgroup_storage_map_ops = { .map_alloc = cgroup_storage_map_alloc, .map_free = cgroup_storage_map_free, @@ -418,6 +419,8 @@ const struct bpf_map_ops cgroup_storage_map_ops = { .map_delete_elem = cgroup_storage_delete_elem, .map_check_btf = cgroup_storage_check_btf, .map_seq_show_elem = cgroup_storage_seq_show_elem, + .map_btf_name = "bpf_cgroup_storage_map", + .map_btf_id = &cgroup_storage_map_btf_id, }; int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index c8cc4e4cf98d..1abd4e3f906d 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -735,6 +735,7 @@ static int trie_check_btf(const struct bpf_map *map, -EINVAL : 0; } +static int trie_map_btf_id; const struct bpf_map_ops trie_map_ops = { .map_alloc = trie_alloc, .map_free = trie_free, @@ -743,4 +744,6 @@ const struct bpf_map_ops trie_map_ops = { .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, .map_check_btf = trie_check_btf, + .map_btf_name = "lpm_trie", + .map_btf_id = &trie_map_btf_id, }; diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 05c8e043b9d2..80c66a6d7c54 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -262,6 +262,7 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, return -EINVAL; } +static int queue_map_btf_id; const struct bpf_map_ops queue_map_ops = { .map_alloc_check = queue_stack_map_alloc_check, .map_alloc = queue_stack_map_alloc, @@ -273,8 +274,11 @@ const struct bpf_map_ops queue_map_ops = { .map_pop_elem = queue_map_pop_elem, .map_peek_elem = queue_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, + .map_btf_name = "bpf_queue_stack", + .map_btf_id = &queue_map_btf_id, }; +static int stack_map_btf_id; const struct bpf_map_ops stack_map_ops = { .map_alloc_check = queue_stack_map_alloc_check, .map_alloc = queue_stack_map_alloc, @@ -286,4 +290,6 @@ const struct bpf_map_ops stack_map_ops = { .map_pop_elem = stack_map_pop_elem, .map_peek_elem = stack_map_peek_elem, .map_get_next_key = queue_stack_map_get_next_key, + .map_btf_name = "bpf_queue_stack", + .map_btf_id = &stack_map_btf_id, }; diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 21cde24386db..a09922f656e4 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -345,6 +345,7 @@ static int reuseport_array_get_next_key(struct bpf_map *map, void *key, return 0; } +static int reuseport_array_map_btf_id; const struct bpf_map_ops reuseport_array_ops = { .map_alloc_check = reuseport_array_alloc_check, .map_alloc = reuseport_array_alloc, @@ -352,4 +353,6 @@ const struct bpf_map_ops reuseport_array_ops = { .map_lookup_elem = reuseport_array_lookup_elem, .map_get_next_key = reuseport_array_get_next_key, .map_delete_elem = reuseport_array_delete_elem, + .map_btf_name = "reuseport_array", + .map_btf_id = &reuseport_array_map_btf_id, }; diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 180414bb0d3e..dbf37aff4827 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -294,6 +294,7 @@ static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp, return 0; } +static int ringbuf_map_btf_id; const struct bpf_map_ops ringbuf_map_ops = { .map_alloc = ringbuf_map_alloc, .map_free = ringbuf_map_free, @@ -303,6 +304,8 @@ const struct bpf_map_ops ringbuf_map_ops = { .map_update_elem = ringbuf_map_update_elem, .map_delete_elem = ringbuf_map_delete_elem, .map_get_next_key = ringbuf_map_get_next_key, + .map_btf_name = "bpf_ringbuf_map", + .map_btf_id = &ringbuf_map_btf_id, }; /* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself, diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 599488f25e40..27dc9b1b08a5 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -613,6 +613,7 @@ static void stack_map_free(struct bpf_map *map) put_callchain_buffers(); } +static int stack_trace_map_btf_id; const struct bpf_map_ops stack_trace_map_ops = { .map_alloc = stack_map_alloc, .map_free = stack_map_free, @@ -621,6 +622,8 @@ const struct bpf_map_ops stack_trace_map_ops = { .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_stack_map", + .map_btf_id = &stack_trace_map_btf_id, }; static int __init stack_map_init(void) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 34cde841ab68..7460f967cb75 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1351,6 +1351,19 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, __mark_reg_not_init(env, regs + regno); } +static void mark_btf_ld_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, u32 regno, + enum bpf_reg_type reg_type, u32 btf_id) +{ + if (reg_type == SCALAR_VALUE) { + mark_reg_unknown(env, regs, regno); + return; + } + mark_reg_known_zero(env, regs, regno); + regs[regno].type = PTR_TO_BTF_ID; + regs[regno].btf_id = btf_id; +} + #define DEF_NOT_SUBREG (0) static void init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state) @@ -3182,19 +3195,68 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; - if (atype == BPF_READ && value_regno >= 0) { - if (ret == SCALAR_VALUE) { - mark_reg_unknown(env, regs, value_regno); - return 0; - } - mark_reg_known_zero(env, regs, value_regno); - regs[value_regno].type = PTR_TO_BTF_ID; - regs[value_regno].btf_id = btf_id; + if (atype == BPF_READ && value_regno >= 0) + mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); + + return 0; +} + +static int check_ptr_to_map_access(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, + int regno, int off, int size, + enum bpf_access_type atype, + int value_regno) +{ + struct bpf_reg_state *reg = regs + regno; + struct bpf_map *map = reg->map_ptr; + const struct btf_type *t; + const char *tname; + u32 btf_id; + int ret; + + if (!btf_vmlinux) { + verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n"); + return -ENOTSUPP; } + if (!map->ops->map_btf_id || !*map->ops->map_btf_id) { + verbose(env, "map_ptr access not supported for map type %d\n", + map->map_type); + return -ENOTSUPP; + } + + t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id); + tname = btf_name_by_offset(btf_vmlinux, t->name_off); + + if (!env->allow_ptr_to_map_access) { + verbose(env, + "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", + tname); + return -EPERM; + } + + if (off < 0) { + verbose(env, "R%d is %s invalid negative access: off=%d\n", + regno, tname, off); + return -EACCES; + } + + if (atype != BPF_READ) { + verbose(env, "only read from %s is supported\n", tname); + return -EACCES; + } + + ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id); + if (ret < 0) + return ret; + + if (value_regno >= 0) + mark_btf_ld_reg(env, regs, value_regno, ret, btf_id); + return 0; } + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -3363,6 +3425,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == PTR_TO_BTF_ID) { err = check_ptr_to_btf_access(env, regs, regno, off, size, t, value_regno); + } else if (reg->type == CONST_PTR_TO_MAP) { + err = check_ptr_to_map_access(env, regs, regno, off, size, t, + value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -5031,6 +5096,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops on pointers produce (meaningless) scalars */ + if (opcode == BPF_SUB && env->allow_ptr_leaks) { + __mark_reg_unknown(env, dst_reg); + return 0; + } + verbose(env, "R%d 32-bit pointer arithmetic prohibited\n", dst); @@ -10946,6 +11016,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->strict_alignment = false; env->allow_ptr_leaks = bpf_allow_ptr_leaks(); + env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access(); env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index a0ff9e200ef6..a22b62813f8c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -236,7 +236,7 @@ struct trace_probe_event { struct trace_event_call call; struct list_head files; struct list_head probes; - struct trace_uprobe_filter filter[0]; + struct trace_uprobe_filter filter[]; }; struct trace_probe { diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index d2c4d16dadba..6f921c4ddc2c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -11,8 +11,6 @@ #include <uapi/linux/sock_diag.h> #include <uapi/linux/btf.h> -static atomic_t cache_idx; - #define SK_STORAGE_CREATE_FLAG_MASK \ (BPF_F_NO_PREALLOC | BPF_F_CLONE) @@ -81,6 +79,9 @@ struct bpf_sk_storage_elem { #define SDATA(_SELEM) (&(_SELEM)->sdata) #define BPF_SK_STORAGE_CACHE_SIZE 16 +static DEFINE_SPINLOCK(cache_idx_lock); +static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE]; + struct bpf_sk_storage { struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE]; struct hlist_head list; /* List of bpf_sk_storage_elem */ @@ -512,6 +513,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) return 0; } +static u16 cache_idx_get(void) +{ + u64 min_usage = U64_MAX; + u16 i, res = 0; + + spin_lock(&cache_idx_lock); + + for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) { + if (cache_idx_usage_counts[i] < min_usage) { + min_usage = cache_idx_usage_counts[i]; + res = i; + + /* Found a free cache_idx */ + if (!min_usage) + break; + } + } + cache_idx_usage_counts[res]++; + + spin_unlock(&cache_idx_lock); + + return res; +} + +static void cache_idx_free(u16 idx) +{ + spin_lock(&cache_idx_lock); + cache_idx_usage_counts[idx]--; + spin_unlock(&cache_idx_lock); +} + /* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { @@ -560,6 +592,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) smap = (struct bpf_sk_storage_map *)map; + cache_idx_free(smap->cache_idx); + /* Note that this map might be concurrently cloned from * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone * RCU read section to finish before proceeding. New RCU @@ -673,8 +707,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) } smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size; - smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) % - BPF_SK_STORAGE_CACHE_SIZE; + smap->cache_idx = cache_idx_get(); return &smap->map; } @@ -886,6 +919,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) return -ENOENT; } +static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { .map_alloc_check = bpf_sk_storage_map_alloc_check, .map_alloc = bpf_sk_storage_map_alloc, @@ -895,6 +929,8 @@ const struct bpf_map_ops sk_storage_map_ops = { .map_update_elem = bpf_fd_sk_storage_update_elem, .map_delete_elem = bpf_fd_sk_storage_delete_elem, .map_check_btf = bpf_sk_storage_map_check_btf, + .map_btf_name = "bpf_sk_storage_map", + .map_btf_id = &sk_storage_map_btf_id, }; const struct bpf_func_proto bpf_sk_storage_get_proto = { diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4059f94e9bb5..4c1123c749bb 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -643,6 +643,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_map_btf_id; const struct bpf_map_ops sock_map_ops = { .map_alloc = sock_map_alloc, .map_free = sock_map_free, @@ -653,9 +654,11 @@ const struct bpf_map_ops sock_map_ops = { .map_lookup_elem = sock_map_lookup, .map_release_uref = sock_map_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_stab", + .map_btf_id = &sock_map_btf_id, }; -struct bpf_htab_elem { +struct bpf_shtab_elem { struct rcu_head rcu; u32 hash; struct sock *sk; @@ -663,14 +666,14 @@ struct bpf_htab_elem { u8 key[]; }; -struct bpf_htab_bucket { +struct bpf_shtab_bucket { struct hlist_head head; raw_spinlock_t lock; }; -struct bpf_htab { +struct bpf_shtab { struct bpf_map map; - struct bpf_htab_bucket *buckets; + struct bpf_shtab_bucket *buckets; u32 buckets_num; u32 elem_size; struct sk_psock_progs progs; @@ -682,17 +685,17 @@ static inline u32 sock_hash_bucket_hash(const void *key, u32 len) return jhash(key, len, 0); } -static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab, - u32 hash) +static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab, + u32 hash) { return &htab->buckets[hash & (htab->buckets_num - 1)]; } -static struct bpf_htab_elem * +static struct bpf_shtab_elem * sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key, u32 key_size) { - struct bpf_htab_elem *elem; + struct bpf_shtab_elem *elem; hlist_for_each_entry_rcu(elem, head, node) { if (elem->hash == hash && @@ -705,10 +708,10 @@ sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key, static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 key_size = map->key_size, hash; - struct bpf_htab_bucket *bucket; - struct bpf_htab_elem *elem; + struct bpf_shtab_bucket *bucket; + struct bpf_shtab_elem *elem; WARN_ON_ONCE(!rcu_read_lock_held()); @@ -719,8 +722,8 @@ static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key) return elem ? elem->sk : NULL; } -static void sock_hash_free_elem(struct bpf_htab *htab, - struct bpf_htab_elem *elem) +static void sock_hash_free_elem(struct bpf_shtab *htab, + struct bpf_shtab_elem *elem) { atomic_dec(&htab->count); kfree_rcu(elem, rcu); @@ -729,9 +732,9 @@ static void sock_hash_free_elem(struct bpf_htab *htab, static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, void *link_raw) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_elem *elem_probe, *elem = link_raw; - struct bpf_htab_bucket *bucket; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_elem *elem_probe, *elem = link_raw; + struct bpf_shtab_bucket *bucket; WARN_ON_ONCE(!rcu_read_lock_held()); bucket = sock_hash_select_bucket(htab, elem->hash); @@ -753,10 +756,10 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, static int sock_hash_delete_elem(struct bpf_map *map, void *key) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 hash, key_size = map->key_size; - struct bpf_htab_bucket *bucket; - struct bpf_htab_elem *elem; + struct bpf_shtab_bucket *bucket; + struct bpf_shtab_elem *elem; int ret = -ENOENT; hash = sock_hash_bucket_hash(key, key_size); @@ -774,12 +777,12 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) return ret; } -static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab, - void *key, u32 key_size, - u32 hash, struct sock *sk, - struct bpf_htab_elem *old) +static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab, + void *key, u32 key_size, + u32 hash, struct sock *sk, + struct bpf_shtab_elem *old) { - struct bpf_htab_elem *new; + struct bpf_shtab_elem *new; if (atomic_inc_return(&htab->count) > htab->map.max_entries) { if (!old) { @@ -803,10 +806,10 @@ static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab, static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 key_size = map->key_size, hash; - struct bpf_htab_elem *elem, *elem_new; - struct bpf_htab_bucket *bucket; + struct bpf_shtab_elem *elem, *elem_new; + struct bpf_shtab_bucket *bucket; struct sk_psock_link *link; struct sk_psock *psock; int ret; @@ -916,8 +919,8 @@ out: static int sock_hash_get_next_key(struct bpf_map *map, void *key, void *key_next) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_elem *elem, *elem_next; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_elem *elem, *elem_next; u32 hash, key_size = map->key_size; struct hlist_head *head; int i = 0; @@ -931,7 +934,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, goto find_first_elem; elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)), - struct bpf_htab_elem, node); + struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); return 0; @@ -943,7 +946,7 @@ find_first_elem: for (; i < htab->buckets_num; i++) { head = &sock_hash_select_bucket(htab, i)->head; elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), - struct bpf_htab_elem, node); + struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); return 0; @@ -955,7 +958,7 @@ find_first_elem: static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) { - struct bpf_htab *htab; + struct bpf_shtab *htab; int i, err; u64 cost; @@ -977,15 +980,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) bpf_map_init_from_attr(&htab->map, attr); htab->buckets_num = roundup_pow_of_two(htab->map.max_entries); - htab->elem_size = sizeof(struct bpf_htab_elem) + + htab->elem_size = sizeof(struct bpf_shtab_elem) + round_up(htab->map.key_size, 8); if (htab->buckets_num == 0 || - htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) { + htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) { err = -EINVAL; goto free_htab; } - cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) + + cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) + (u64) htab->elem_size * htab->map.max_entries; if (cost >= U32_MAX - PAGE_SIZE) { err = -EINVAL; @@ -996,7 +999,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) goto free_htab; htab->buckets = bpf_map_area_alloc(htab->buckets_num * - sizeof(struct bpf_htab_bucket), + sizeof(struct bpf_shtab_bucket), htab->map.numa_node); if (!htab->buckets) { bpf_map_charge_finish(&htab->map.memory); @@ -1017,10 +1020,10 @@ free_htab: static void sock_hash_free(struct bpf_map *map) { - struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct bpf_htab_bucket *bucket; + struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); + struct bpf_shtab_bucket *bucket; struct hlist_head unlink_list; - struct bpf_htab_elem *elem; + struct bpf_shtab_elem *elem; struct hlist_node *node; int i; @@ -1096,7 +1099,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key) static void sock_hash_release_progs(struct bpf_map *map) { - psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs); + psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs); } BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops, @@ -1176,6 +1179,7 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = { .arg4_type = ARG_ANYTHING, }; +static int sock_hash_map_btf_id; const struct bpf_map_ops sock_hash_ops = { .map_alloc = sock_hash_alloc, .map_free = sock_hash_free, @@ -1186,6 +1190,8 @@ const struct bpf_map_ops sock_hash_ops = { .map_lookup_elem_sys_only = sock_hash_lookup_sys, .map_release_uref = sock_hash_release_progs, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_shtab", + .map_btf_id = &sock_hash_map_btf_id, }; static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) @@ -1194,7 +1200,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) case BPF_MAP_TYPE_SOCKMAP: return &container_of(map, struct bpf_stab, map)->progs; case BPF_MAP_TYPE_SOCKHASH: - return &container_of(map, struct bpf_htab, map)->progs; + return &container_of(map, struct bpf_shtab, map)->progs; default: break; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 83330a6cb242..12fda8f27b08 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4605,7 +4605,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { coalesce_done: - tcp_grow_window(sk, skb); + /* For non sack flows, do not grow window to force DUPACK + * and trigger fast retransmit. + */ + if (tcp_is_sack(tp)) + tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); skb = NULL; goto add_sack; @@ -4689,7 +4693,11 @@ add_sack: tcp_sack_new_ofo_skb(sk, seq, end_seq); end: if (skb) { - tcp_grow_window(sk, skb); + /* For non sack flows, do not grow window to force DUPACK + * and trigger fast retransmit. + */ + if (tcp_is_sack(tp)) + tcp_grow_window(sk, skb); skb_condense(skb); skb_set_owner_r(skb, sk); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bf132575040d..bbdb74b8bc3c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1053,8 +1053,10 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) err = tcp_set_ulp(sf->sk, "mptcp"); release_sock(sf->sk); - if (err) + if (err) { + sock_release(sf); return err; + } /* the newly created socket really belongs to the owning MPTCP master * socket, even if for additional subflows the allocation is performed diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 6a3034f84ab6..afa85171df38 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -387,51 +387,6 @@ static void nf_flow_offload_work_gc(struct work_struct *work) queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } -int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv) -{ - struct flow_block *block = &flow_table->flow_block; - struct flow_block_cb *block_cb; - int err = 0; - - down_write(&flow_table->flow_block_lock); - block_cb = flow_block_cb_lookup(block, cb, cb_priv); - if (block_cb) { - err = -EEXIST; - goto unlock; - } - - block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL); - if (IS_ERR(block_cb)) { - err = PTR_ERR(block_cb); - goto unlock; - } - - list_add_tail(&block_cb->list, &block->cb_list); - -unlock: - up_write(&flow_table->flow_block_lock); - return err; -} -EXPORT_SYMBOL_GPL(nf_flow_table_offload_add_cb); - -void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, - flow_setup_cb_t *cb, void *cb_priv) -{ - struct flow_block *block = &flow_table->flow_block; - struct flow_block_cb *block_cb; - - down_write(&flow_table->flow_block_lock); - block_cb = flow_block_cb_lookup(block, cb, cb_priv); - if (block_cb) { - list_del(&block_cb->list); - flow_block_cb_free(block_cb); - } else { - WARN_ON(true); - } - up_write(&flow_table->flow_block_lock); -} -EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb); static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index e29f0f45d688..e9f3576cbf71 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1543,17 +1543,6 @@ static void __exit ct_cleanup_module(void) destroy_workqueue(act_ct_wq); } -void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie) -{ - enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK; - struct nf_conn *ct; - - ct = (struct nf_conn *)(cookie & NFCT_PTRMASK); - nf_conntrack_get(&ct->ct_general); - nf_ct_set(skb, ct, ctinfo); -} -EXPORT_SYMBOL_GPL(tcf_ct_flow_table_restore_skb); - module_init(ct_init_module); module_exit(ct_cleanup_module); MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 1dc7208c71ba..8367adbbe9df 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -254,6 +254,7 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, spin_unlock_bh(&map->lock); } +static int xsk_map_btf_id; const struct bpf_map_ops xsk_map_ops = { .map_alloc = xsk_map_alloc, .map_free = xsk_map_free, @@ -264,4 +265,6 @@ const struct bpf_map_ops xsk_map_ops = { .map_update_elem = xsk_map_update_elem, .map_delete_elem = xsk_map_delete_elem, .map_check_btf = map_check_no_btf, + .map_btf_name = "xsk_map", + .map_btf_id = &xsk_map_btf_id, }; diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c index 32234481ad7d..ad3e56042f96 100644 --- a/samples/mei/mei-amt-version.c +++ b/samples/mei/mei-amt-version.c @@ -267,7 +267,7 @@ struct amt_host_if_msg_header { struct amt_host_if_resp_header { struct amt_host_if_msg_header header; uint32_t status; - unsigned char data[0]; + unsigned char data[]; } __attribute__((packed)); const uuid_le MEI_IAMTHIF = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d, \ diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index 13e5fbafdf2f..66a6d511b524 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -27,7 +27,10 @@ parse_symbol() { elif [[ "${modcache[$module]+isset}" == "isset" ]]; then local objfile=${modcache[$module]} else - [[ $modpath == "" ]] && return + if [[ $modpath == "" ]]; then + echo "WARNING! Modules path isn't set, but is needed to parse this symbol" >&2 + return + fi local objfile=$(find "$modpath" -name "${module//_/[-_]}.ko*" -print -quit) [[ $objfile == "" ]] && return modcache[$module]=$objfile diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index 298b73794d8b..16c1894c29bb 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -107,7 +107,7 @@ struct ima_digest_data { } ng; u8 data[2]; } xattr; - u8 digest[0]; + u8 digest[]; } __packed; /* @@ -119,7 +119,7 @@ struct signature_v2_hdr { uint8_t hash_algo; /* Digest algorithm [enum hash_algo] */ __be32 keyid; /* IMA key identifier - not X509/PGP specific */ __be16 sig_size; /* signature size */ - uint8_t sig[0]; /* signature payload */ + uint8_t sig[]; /* signature payload */ } __packed; /* integrity data associated with an inode */ diff --git a/sound/soc/sof/probe.h b/sound/soc/sof/probe.h index b04b728c7224..5e159ab239fa 100644 --- a/sound/soc/sof/probe.h +++ b/sound/soc/sof/probe.h @@ -36,7 +36,7 @@ struct sof_probe_point_desc { struct sof_ipc_probe_dma_add_params { struct sof_ipc_cmd_hdr hdr; unsigned int num_elems; - struct sof_probe_dma dma[0]; + struct sof_probe_dma dma[]; } __packed; struct sof_ipc_probe_info_params { @@ -51,19 +51,19 @@ struct sof_ipc_probe_info_params { struct sof_ipc_probe_dma_remove_params { struct sof_ipc_cmd_hdr hdr; unsigned int num_elems; - unsigned int stream_tag[0]; + unsigned int stream_tag[]; } __packed; struct sof_ipc_probe_point_add_params { struct sof_ipc_cmd_hdr hdr; unsigned int num_elems; - struct sof_probe_point_desc desc[0]; + struct sof_probe_point_desc desc[]; } __packed; struct sof_ipc_probe_point_remove_params { struct sof_ipc_cmd_hdr hdr; unsigned int num_elems; - unsigned int buffer_id[0]; + unsigned int buffer_id[]; } __packed; int sof_ipc_probe_init(struct snd_sof_dev *sdev, diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 26cde83e1ca3..3e601bcfd461 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,10 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only *.d -/_bpftool +/bpftool-bootstrap /bpftool bpftool*.8 bpf-helpers.* FEATURE-DUMP.bpftool feature libbpf -profiler.skel.h +/*.skel.h +/vmlinux.h diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index ce3a724f50c1..896f4c6c2870 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -36,6 +36,11 @@ DESCRIPTION otherwise list all BTF objects currently loaded on the system. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BTF + objects. On such kernels bpftool will automatically emit this + information as well. + **bpftool btf dump** *BTF_SRC* Dump BTF entries from a given *BTF_SRC*. diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 0e43d7b06c11..38b0949a185b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -37,6 +37,11 @@ DESCRIPTION zero or more named attributes, some of which depend on type of link. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + links. On such kernels bpftool will automatically emit this + information as well. + **bpftool link pin** *LINK* *FILE* Pin link *LINK* as *FILE*. @@ -82,6 +87,7 @@ EXAMPLES 10: cgroup prog 25 cgroup_id 614 attach_type egress + pids test_progs(223) **# bpftool --json --pretty link show** @@ -91,7 +97,12 @@ EXAMPLES "type": "cgroup", "prog_id": 25, "cgroup_id": 614, - "attach_type": "egress" + "attach_type": "egress", + "pids": [{ + "pid": 223, + "comm": "test_progs" + } + ] } ] diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 31101643e57c..5bc2123e9944 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -62,6 +62,11 @@ DESCRIPTION Output will start with map ID followed by map type and zero or more named attributes (depending on kernel version). + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + maps. On such kernels bpftool will automatically emit this + information as well. + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] Create a new map with given parameters and pin it to *bpffs* as *FILE*. @@ -180,7 +185,8 @@ EXAMPLES :: 10: hash name some_map flags 0x0 - key 4B value 8B max_entries 2048 memlock 167936B + key 4B value 8B max_entries 2048 memlock 167936B + pids systemd(1) The following three commands are equivalent: diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 2b254959d488..412ea3d9bf7f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -75,6 +75,11 @@ DESCRIPTION program run. Activation or deactivation of the feature is performed via the **kernel.bpf_stats_enabled** sysctl knob. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + programs. On such kernels bpftool will automatically emit this + information as well. + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] Dump eBPF instructions of the programs from the kernel. By default, eBPF will be disassembled and printed to standard @@ -243,6 +248,7 @@ EXAMPLES 10: xdp name some_prog tag 005a3d2123620c8b gpl run_time_ns 81632 run_cnt 10 loaded_at 2017-09-29T20:11:00+0000 uid 0 xlated 528B jited 370B memlock 4096B map_ids 10 + pids systemd(1) **# bpftool --json --pretty prog show** @@ -262,6 +268,11 @@ EXAMPLES "bytes_jited": 370, "bytes_memlock": 4096, "map_ids": [10 + ], + "pids": [{ + "pid": 1, + "comm": "systemd" + } ] } ] diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 9e85f101be85..8c6563e56ffc 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -42,6 +42,7 @@ CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ + -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ @@ -61,9 +62,9 @@ CLANG ?= clang FEATURE_USER = .bpftool FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \ - clang-bpf-global-var + clang-bpf-co-re FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \ - clang-bpf-global-var + clang-bpf-co-re check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -116,40 +117,56 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif +BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap) + +BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o -ifeq ($(feature-clang-bpf-global-var),1) - __OBJS = $(OBJS) -else - __OBJS = $(_OBJS) -endif +VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)_prog.o: prog.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< +ifneq ($(VMLINUX_BTF),) +ifeq ($(feature-clang-bpf-co-re),1) -$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) +BUILD_BPF_SKELS := 1 -skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) +$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ + +$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ + -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ + -I$(LIBBPF_PATH) \ + -I$(srctree)/tools/lib \ -g -O2 -target bpf -c $< -o $@ -profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o - $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ +$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ -$(OUTPUT)prog.o: prog.c profiler.skel.h - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< +$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h + +$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h + +endif +endif + +CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)feature.o: | zdep -$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS) +$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS) + +$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< @@ -157,7 +174,7 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d - $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o + $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool @@ -192,6 +209,7 @@ FORCE: zdep: @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi +.SECONDARY: .PHONY: all FORCE clean install uninstall zdep .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index faac8189b285..fc9bc7a23db6 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -809,6 +809,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("%s%u", n++ == 0 ? " map_ids " : ",", obj->obj_id); } + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); } @@ -841,6 +842,9 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_uint(json_wtr, obj->obj_id); } jsonw_end_array(json_wtr); /* map_ids */ + + emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + jsonw_end_object(json_wtr); /* btf object */ } @@ -893,6 +897,7 @@ static int do_show(int argc, char **argv) close(fd); return err; } + build_obj_refs_table(&refs_table, BPF_OBJ_BTF); if (fd >= 0) { err = show_btf(fd, &btf_prog_table, &btf_map_table); @@ -939,6 +944,7 @@ static int do_show(int argc, char **argv) exit_free: delete_btf_table(&btf_prog_table); delete_btf_table(&btf_map_table); + delete_obj_refs_table(&refs_table); return err; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c47bdc65de8e..6c864c3683fc 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -581,3 +581,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level, { return vfprintf(stderr, format, args); } + +static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) +{ + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + + err = bpf_prog_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get prog by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get prog info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || + (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} + +int prog_parse_fds(int *argc, char ***argv, int **fds) +{ + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARGP(); + + (*fds)[0] = bpf_prog_get_fd_by_id(id); + if ((*fds)[0] < 0) { + p_err("get by id (%u): %s", id, strerror(errno)); + return -1; + } + return 1; + } else if (is_prefix(**argv, "tag")) { + unsigned char tag[BPF_TAG_SIZE]; + + NEXT_ARGP(); + + if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, + tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) + != BPF_TAG_SIZE) { + p_err("can't parse tag"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(tag, fds, true); + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(name, fds, false); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + if ((*fds)[0] < 0) + return -1; + return 1; + } + + p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); + return -1; +} + +int prog_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several programs match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + +static int map_fd_by_name(char *name, int **fds) +{ + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get map by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get map info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} + +int map_parse_fds(int *argc, char ***argv, int **fds) +{ + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARGP(); + + (*fds)[0] = bpf_map_get_fd_by_id(id); + if ((*fds)[0] < 0) { + p_err("get map by id (%u): %s", id, strerror(errno)); + return -1; + } + return 1; + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return map_fd_by_name(name, fds); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + if ((*fds)[0] < 0) + return -1; + return 1; + } + + p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); + return -1; +} + +int map_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several maps match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +{ + int err; + int fd; + + fd = map_parse_fd(argc, argv); + if (fd < 0) + return -1; + + err = bpf_obj_get_info_by_fd(fd, info, info_len); + if (err) { + p_err("can't get map info: %s", strerror(errno)); + close(fd); + return err; + } + + return fd; +} diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index fca57ee8fafe..7329f3134283 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -143,6 +143,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) } jsonw_end_array(json_wtr); } + + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); return 0; @@ -212,6 +215,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tpinned %s", obj->path); } } + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); @@ -257,6 +261,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&link_table, BPF_OBJ_LINK); + build_obj_refs_table(&refs_table, BPF_OBJ_LINK); if (argc == 2) { fd = link_parse_fd(&argc, &argv); @@ -296,6 +301,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 46bd716a9d86..4a191fcbeb82 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,6 +31,7 @@ bool relaxed_maps; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; struct pinned_obj_table link_table; +struct obj_refs_table refs_table; static void __noreturn clean_and_exit(int i) { @@ -92,9 +93,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, if (argc < 1 && cmds[0].func) return cmds[0].func(argc, argv); - for (i = 0; cmds[i].func; i++) - if (is_prefix(*argv, cmds[i].cmd)) + for (i = 0; cmds[i].cmd; i++) { + if (is_prefix(*argv, cmds[i].cmd)) { + if (!cmds[i].func) { + p_err("command '%s' is not supported in bootstrap mode", + cmds[i].cmd); + return -1; + } return cmds[i].func(argc - 1, argv + 1); + } + } help(argc - 1, argv + 1); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 5cdf0bc049bd..ce26271e5f0c 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -127,11 +127,13 @@ static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { extern const char * const map_type_name[]; extern const size_t map_type_name_size; +/* keep in sync with the definition in skeleton/pid_iter.bpf.c */ enum bpf_obj_type { BPF_OBJ_UNKNOWN, BPF_OBJ_PROG, BPF_OBJ_MAP, BPF_OBJ_LINK, + BPF_OBJ_BTF, }; extern const char *bin_name; @@ -139,12 +141,14 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; extern bool show_pinned; +extern bool show_pids; extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; extern struct pinned_obj_table link_table; +extern struct obj_refs_table refs_table; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -168,12 +172,35 @@ struct pinned_obj { struct hlist_node hash; }; +struct obj_refs_table { + DECLARE_HASHTABLE(table, 16); +}; + +struct obj_ref { + int pid; + char comm[16]; +}; + +struct obj_refs { + struct hlist_node node; + __u32 id; + int ref_cnt; + struct obj_ref *refs; +}; + struct btf; struct bpf_line_info; int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); +__weak int build_obj_refs_table(struct obj_refs_table *table, + enum bpf_obj_type type); +__weak void delete_obj_refs_table(struct obj_refs_table *table); +__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, + json_writer_t *json_wtr); +__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, + const char *prefix); void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); @@ -194,23 +221,28 @@ int mount_bpffs_for_pin(const char *name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); -int do_prog(int argc, char **arg); -int do_map(int argc, char **arg); -int do_link(int argc, char **arg); -int do_event_pipe(int argc, char **argv); -int do_cgroup(int argc, char **arg); -int do_perf(int argc, char **arg); -int do_net(int argc, char **arg); -int do_tracelog(int argc, char **arg); -int do_feature(int argc, char **argv); -int do_btf(int argc, char **argv); +/* commands available in bootstrap mode */ int do_gen(int argc, char **argv); -int do_struct_ops(int argc, char **argv); -int do_iter(int argc, char **argv); +int do_btf(int argc, char **argv); + +/* non-bootstrap only commands */ +int do_prog(int argc, char **arg) __weak; +int do_map(int argc, char **arg) __weak; +int do_link(int argc, char **arg) __weak; +int do_event_pipe(int argc, char **argv) __weak; +int do_cgroup(int argc, char **arg) __weak; +int do_perf(int argc, char **arg) __weak; +int do_net(int argc, char **arg) __weak; +int do_tracelog(int argc, char **arg) __weak; +int do_feature(int argc, char **argv) __weak; +int do_struct_ops(int argc, char **argv) __weak; +int do_iter(int argc, char **argv) __weak; int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); +int prog_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd(int *argc, char ***argv); +int map_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); struct bpf_prog_linfo; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 99109a6afe17..bbb74d387fb0 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -93,162 +93,6 @@ static void *alloc_value(struct bpf_map_info *info) return malloc(info->value_size); } -static int map_fd_by_name(char *name, int **fds) -{ - unsigned int id = 0; - int fd, nb_fds = 0; - void *tmp; - int err; - - while (true) { - struct bpf_map_info info = {}; - __u32 len = sizeof(info); - - err = bpf_map_get_next_id(id, &id); - if (err) { - if (errno != ENOENT) { - p_err("%s", strerror(errno)); - goto err_close_fds; - } - return nb_fds; - } - - fd = bpf_map_get_fd_by_id(id); - if (fd < 0) { - p_err("can't get map by id (%u): %s", - id, strerror(errno)); - goto err_close_fds; - } - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get map info (%u): %s", - id, strerror(errno)); - goto err_close_fd; - } - - if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { - close(fd); - continue; - } - - if (nb_fds > 0) { - tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); - if (!tmp) { - p_err("failed to realloc"); - goto err_close_fd; - } - *fds = tmp; - } - (*fds)[nb_fds++] = fd; - } - -err_close_fd: - close(fd); -err_close_fds: - while (--nb_fds >= 0) - close((*fds)[nb_fds]); - return -1; -} - -static int map_parse_fds(int *argc, char ***argv, int **fds) -{ - if (is_prefix(**argv, "id")) { - unsigned int id; - char *endptr; - - NEXT_ARGP(); - - id = strtoul(**argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", **argv); - return -1; - } - NEXT_ARGP(); - - (*fds)[0] = bpf_map_get_fd_by_id(id); - if ((*fds)[0] < 0) { - p_err("get map by id (%u): %s", id, strerror(errno)); - return -1; - } - return 1; - } else if (is_prefix(**argv, "name")) { - char *name; - - NEXT_ARGP(); - - name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { - p_err("can't parse name"); - return -1; - } - NEXT_ARGP(); - - return map_fd_by_name(name, fds); - } else if (is_prefix(**argv, "pinned")) { - char *path; - - NEXT_ARGP(); - - path = **argv; - NEXT_ARGP(); - - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); - if ((*fds)[0] < 0) - return -1; - return 1; - } - - p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); - return -1; -} - -int map_parse_fd(int *argc, char ***argv) -{ - int *fds = NULL; - int nb_fds, fd; - - fds = malloc(sizeof(int)); - if (!fds) { - p_err("mem alloc failed"); - return -1; - } - nb_fds = map_parse_fds(argc, argv, &fds); - if (nb_fds != 1) { - if (nb_fds > 1) { - p_err("several maps match this handle"); - while (nb_fds--) - close(fds[nb_fds]); - } - fd = -1; - goto exit_free; - } - - fd = fds[0]; -exit_free: - free(fds); - return fd; -} - -int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) -{ - int err; - int fd; - - fd = map_parse_fd(argc, argv); - if (fd < 0) - return -1; - - err = bpf_obj_get_info_by_fd(fd, info, info_len); - if (err) { - p_err("can't get map info: %s", strerror(errno)); - close(fd); - return err; - } - - return fd; -} - static int do_dump_btf(const struct btf_dumper *d, struct bpf_map_info *map_info, void *key, void *value) @@ -666,6 +510,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_end_array(json_wtr); } + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); return 0; @@ -753,6 +599,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + printf("\n"); return 0; } @@ -811,6 +659,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + build_obj_refs_table(&refs_table, BPF_OBJ_MAP); if (argc == 2) return do_show_subset(argc, argv); @@ -854,6 +703,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c new file mode 100644 index 000000000000..3474a91743ff --- /dev/null +++ b/tools/bpf/bpftool/pids.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2020 Facebook */ +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> + +#include "main.h" +#include "skeleton/pid_iter.h" + +#ifdef BPFTOOL_WITHOUT_SKELETONS + +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +{ + p_err("bpftool built without PID iterator support"); + return -ENOTSUP; +} +void delete_obj_refs_table(struct obj_refs_table *table) {} + +#else /* BPFTOOL_WITHOUT_SKELETONS */ + +#include "pid_iter.skel.h" + +static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) +{ + struct obj_refs *refs; + struct obj_ref *ref; + void *tmp; + int i; + + hash_for_each_possible(table->table, refs, node, e->id) { + if (refs->id != e->id) + continue; + + for (i = 0; i < refs->ref_cnt; i++) { + if (refs->refs[i].pid == e->pid) + return; + } + + tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref)); + if (!tmp) { + p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + refs->refs = tmp; + ref = &refs->refs[refs->ref_cnt]; + ref->pid = e->pid; + memcpy(ref->comm, e->comm, sizeof(ref->comm)); + refs->ref_cnt++; + + return; + } + + /* new ref */ + refs = calloc(1, sizeof(*refs)); + if (!refs) { + p_err("failed to alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + + refs->id = e->id; + refs->refs = malloc(sizeof(*refs->refs)); + if (!refs->refs) { + free(refs); + p_err("failed to alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + ref = &refs->refs[0]; + ref->pid = e->pid; + memcpy(ref->comm, e->comm, sizeof(ref->comm)); + refs->ref_cnt = 1; + hash_add(table->table, &refs->node, e->id); +} + +static int __printf(2, 0) +libbpf_print_none(__maybe_unused enum libbpf_print_level level, + __maybe_unused const char *format, + __maybe_unused va_list args) +{ + return 0; +} + +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +{ + char buf[4096]; + struct pid_iter_bpf *skel; + struct pid_iter_entry *e; + int err, ret, fd = -1, i; + libbpf_print_fn_t default_print; + + hash_init(table->table); + set_max_rlimit(); + + skel = pid_iter_bpf__open(); + if (!skel) { + p_err("failed to open PID iterator skeleton"); + return -1; + } + + skel->rodata->obj_type = type; + + /* we don't want output polluted with libbpf errors if bpf_iter is not + * supported + */ + default_print = libbpf_set_print(libbpf_print_none); + err = pid_iter_bpf__load(skel); + libbpf_set_print(default_print); + if (err) { + /* too bad, kernel doesn't support BPF iterators yet */ + err = 0; + goto out; + } + err = pid_iter_bpf__attach(skel); + if (err) { + /* if we loaded above successfully, attach has to succeed */ + p_err("failed to attach PID iterator: %d", err); + goto out; + } + + fd = bpf_iter_create(bpf_link__fd(skel->links.iter)); + if (fd < 0) { + err = -errno; + p_err("failed to create PID iterator session: %d", err); + goto out; + } + + while (true) { + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + err = -errno; + p_err("failed to read PID iterator output: %d", err); + goto out; + } + if (ret == 0) + break; + if (ret % sizeof(*e)) { + err = -EINVAL; + p_err("invalid PID iterator output format"); + goto out; + } + ret /= sizeof(*e); + + e = (void *)buf; + for (i = 0; i < ret; i++, e++) { + add_ref(table, e); + } + } + err = 0; +out: + if (fd >= 0) + close(fd); + pid_iter_bpf__destroy(skel); + return err; +} + +void delete_obj_refs_table(struct obj_refs_table *table) +{ + struct obj_refs *refs; + struct hlist_node *tmp; + unsigned int bkt; + + hash_for_each_safe(table->table, bkt, tmp, refs, node) { + hash_del(&refs->node); + free(refs->refs); + free(refs); + } +} + +void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr) +{ + struct obj_refs *refs; + struct obj_ref *ref; + int i; + + if (hash_empty(table->table)) + return; + + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; + if (refs->ref_cnt == 0) + break; + + jsonw_name(json_wtr, "pids"); + jsonw_start_array(json_wtr); + for (i = 0; i < refs->ref_cnt; i++) { + ref = &refs->refs[i]; + jsonw_start_object(json_wtr); + jsonw_int_field(json_wtr, "pid", ref->pid); + jsonw_string_field(json_wtr, "comm", ref->comm); + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); + break; + } +} + +void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) +{ + struct obj_refs *refs; + struct obj_ref *ref; + int i; + + if (hash_empty(table->table)) + return; + + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; + if (refs->ref_cnt == 0) + break; + + printf("%s", prefix); + for (i = 0; i < refs->ref_cnt; i++) { + ref = &refs->refs[i]; + printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid); + } + break; + } +} + + +#endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a5eff83496f2..e21fa8ad2efa 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -86,158 +86,6 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) strftime(buf, size, "%FT%T%z", &load_tm); } -static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) -{ - unsigned int id = 0; - int fd, nb_fds = 0; - void *tmp; - int err; - - while (true) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); - - err = bpf_prog_get_next_id(id, &id); - if (err) { - if (errno != ENOENT) { - p_err("%s", strerror(errno)); - goto err_close_fds; - } - return nb_fds; - } - - fd = bpf_prog_get_fd_by_id(id); - if (fd < 0) { - p_err("can't get prog by id (%u): %s", - id, strerror(errno)); - goto err_close_fds; - } - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get prog info (%u): %s", - id, strerror(errno)); - goto err_close_fd; - } - - if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || - (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { - close(fd); - continue; - } - - if (nb_fds > 0) { - tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); - if (!tmp) { - p_err("failed to realloc"); - goto err_close_fd; - } - *fds = tmp; - } - (*fds)[nb_fds++] = fd; - } - -err_close_fd: - close(fd); -err_close_fds: - while (--nb_fds >= 0) - close((*fds)[nb_fds]); - return -1; -} - -static int prog_parse_fds(int *argc, char ***argv, int **fds) -{ - if (is_prefix(**argv, "id")) { - unsigned int id; - char *endptr; - - NEXT_ARGP(); - - id = strtoul(**argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", **argv); - return -1; - } - NEXT_ARGP(); - - (*fds)[0] = bpf_prog_get_fd_by_id(id); - if ((*fds)[0] < 0) { - p_err("get by id (%u): %s", id, strerror(errno)); - return -1; - } - return 1; - } else if (is_prefix(**argv, "tag")) { - unsigned char tag[BPF_TAG_SIZE]; - - NEXT_ARGP(); - - if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, - tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) - != BPF_TAG_SIZE) { - p_err("can't parse tag"); - return -1; - } - NEXT_ARGP(); - - return prog_fd_by_nametag(tag, fds, true); - } else if (is_prefix(**argv, "name")) { - char *name; - - NEXT_ARGP(); - - name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { - p_err("can't parse name"); - return -1; - } - NEXT_ARGP(); - - return prog_fd_by_nametag(name, fds, false); - } else if (is_prefix(**argv, "pinned")) { - char *path; - - NEXT_ARGP(); - - path = **argv; - NEXT_ARGP(); - - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); - if ((*fds)[0] < 0) - return -1; - return 1; - } - - p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); - return -1; -} - -int prog_parse_fd(int *argc, char ***argv) -{ - int *fds = NULL; - int nb_fds, fd; - - fds = malloc(sizeof(int)); - if (!fds) { - p_err("mem alloc failed"); - return -1; - } - nb_fds = prog_parse_fds(argc, argv, &fds); - if (nb_fds != 1) { - if (nb_fds > 1) { - p_err("several programs match this handle"); - while (nb_fds--) - close(fds[nb_fds]); - } - fd = -1; - goto exit_free; - } - - fd = fds[0]; -exit_free: - free(fds); - return fd; -} - static void show_prog_maps(int fd, __u32 num_maps) { struct bpf_prog_info info = {}; @@ -342,6 +190,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_array(json_wtr); } + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); } @@ -408,6 +258,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->btf_id) printf("\n\tbtf_id %d", info->btf_id); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + printf("\n"); } @@ -473,6 +325,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + build_obj_refs_table(&refs_table, BPF_OBJ_PROG); if (argc == 2) return do_show_subset(argc, argv); @@ -514,6 +367,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return err; } diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c new file mode 100644 index 000000000000..8468a608911e --- /dev/null +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (c) 2020 Facebook */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_tracing.h> +#include "pid_iter.h" + +/* keep in sync with the definition in main.h */ +enum bpf_obj_type { + BPF_OBJ_UNKNOWN, + BPF_OBJ_PROG, + BPF_OBJ_MAP, + BPF_OBJ_LINK, + BPF_OBJ_BTF, +}; + +extern const void bpf_link_fops __ksym; +extern const void bpf_map_fops __ksym; +extern const void bpf_prog_fops __ksym; +extern const void btf_fops __ksym; + +const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN; + +static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type) +{ + switch (type) { + case BPF_OBJ_PROG: + return BPF_CORE_READ((struct bpf_prog *)ent, aux, id); + case BPF_OBJ_MAP: + return BPF_CORE_READ((struct bpf_map *)ent, id); + case BPF_OBJ_BTF: + return BPF_CORE_READ((struct btf *)ent, id); + case BPF_OBJ_LINK: + return BPF_CORE_READ((struct bpf_link *)ent, id); + default: + return 0; + } +} + +SEC("iter/task_file") +int iter(struct bpf_iter__task_file *ctx) +{ + struct file *file = ctx->file; + struct task_struct *task = ctx->task; + struct pid_iter_entry e; + const void *fops; + + if (!file || !task) + return 0; + + switch (obj_type) { + case BPF_OBJ_PROG: + fops = &bpf_prog_fops; + break; + case BPF_OBJ_MAP: + fops = &bpf_map_fops; + break; + case BPF_OBJ_BTF: + fops = &btf_fops; + break; + case BPF_OBJ_LINK: + fops = &bpf_link_fops; + break; + default: + return 0; + } + + if (file->f_op != fops) + return 0; + + e.pid = task->tgid; + e.id = get_obj_id(file->private_data, obj_type); + bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm); + bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h new file mode 100644 index 000000000000..5692cf257adb --- /dev/null +++ b/tools/bpf/bpftool/skeleton/pid_iter.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2020 Facebook */ +#ifndef __PID_ITER_H +#define __PID_ITER_H + +struct pid_iter_entry { + __u32 id; + int pid; + char comm[16]; +}; + +#endif diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 20034c12f7c5..4e3512f700c0 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -1,7 +1,6 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include "profiler.h" -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -116,4 +115,4 @@ int BPF_PROG(fexit_XXX) return 0; } -char LICENSE[] SEC("license") = "GPL"; +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h deleted file mode 100644 index 1f767e9510f7..000000000000 --- a/tools/bpf/bpftool/skeleton/profiler.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __PROFILER_H -#define __PROFILER_H - -/* useful typedefs from vmlinux.h */ - -typedef signed char __s8; -typedef unsigned char __u8; -typedef short int __s16; -typedef short unsigned int __u16; -typedef int __s32; -typedef unsigned int __u32; -typedef long long int __s64; -typedef long long unsigned int __u64; - -typedef __s8 s8; -typedef __u8 u8; -typedef __s16 s16; -typedef __u16 u16; -typedef __s32 s32; -typedef __u32 u32; -typedef __s64 s64; -typedef __u64 u64; - -enum { - false = 0, - true = 1, -}; - -#ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) -#else -#define __bitwise__ -#endif - -typedef __u16 __bitwise__ __le16; -typedef __u16 __bitwise__ __be16; -typedef __u32 __bitwise__ __le32; -typedef __u32 __bitwise__ __be32; -typedef __u64 __bitwise__ __le64; -typedef __u64 __bitwise__ __be64; - -typedef __u16 __bitwise__ __sum16; -typedef __u32 __bitwise__ __wsum; - -#endif /* __PROFILER_H */ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b1f0321180f5..88371f7f0369 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -68,7 +68,7 @@ FILES= \ test-llvm-version.bin \ test-libaio.bin \ test-libzstd.bin \ - test-clang-bpf-global-var.bin \ + test-clang-bpf-co-re.bin \ test-file-handle.bin \ test-libpfm4.bin @@ -325,7 +325,7 @@ $(OUTPUT)test-libaio.bin: $(OUTPUT)test-libzstd.bin: $(BUILD) -lzstd -$(OUTPUT)test-clang-bpf-global-var.bin: +$(OUTPUT)test-clang-bpf-co-re.bin: $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ grep BTF_KIND_VAR diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c new file mode 100644 index 000000000000..cb5265bfdd83 --- /dev/null +++ b/tools/build/feature/test-clang-bpf-co-re.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +struct test { + int a; + int b; +} __attribute__((preserve_access_index)); + +volatile struct test global_value_for_test = {}; diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c deleted file mode 100644 index 221f1481d52e..000000000000 --- a/tools/build/feature/test-clang-bpf-global-var.c +++ /dev/null @@ -1,4 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2020 Facebook - -volatile int global_value_for_test = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 974a71342aea..9d3923e6b860 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -653,7 +653,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +671,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,7 +695,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) @@ -775,7 +775,7 @@ union bpf_attr { * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +792,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +849,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -880,7 +880,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -916,7 +916,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +953,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +969,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +981,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +1032,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1098,7 +1098,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1145,7 +1145,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +1190,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +1207,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +1276,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +1294,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +1304,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +1331,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +1358,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1389,7 +1389,7 @@ union bpf_attr { * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +1408,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1420,7 +1420,7 @@ union bpf_attr { * * 1, if the *skb* task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,7 +1444,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes @@ -1500,7 +1500,7 @@ union bpf_attr { * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +1532,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +1547,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1595,14 +1595,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1630,7 +1630,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1676,7 +1676,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1697,7 +1697,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +1708,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +1727,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +1756,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,7 +1806,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in @@ -1817,7 +1817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1842,7 +1842,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +1867,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +1911,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +1925,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +1959,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +1977,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +2008,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +2026,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +2040,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +2056,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2089,7 +2089,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +2111,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2142,7 +2142,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +2161,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,7 +2175,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. @@ -2189,7 +2189,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +2226,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +2241,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +2257,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +2286,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +2305,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2370,7 +2370,7 @@ union bpf_attr { * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. @@ -2471,7 +2471,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(struct bpf_sock *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +2479,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +2489,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +2517,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +2529,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +2543,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +2591,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +2614,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,7 +2651,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2666,7 +2666,7 @@ union bpf_attr { * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +2682,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +2701,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +2718,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +2735,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +2759,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2810,7 +2810,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return @@ -2818,7 +2818,7 @@ union bpf_attr { * * **-ENOENT** if the bpf-local-storage cannot be found. * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2859,7 +2859,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +2883,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2941,7 +2941,7 @@ union bpf_attr { * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +2949,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +2976,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +2995,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +3007,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,7 +3062,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, @@ -3097,7 +3097,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3126,7 +3126,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3221,7 +3221,7 @@ union bpf_attr { * Return * Requested value, or 0, if flags are not recognized. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) + * long bpf_csum_level(struct sk_buff *skb, u64 level) * Description * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 7009dc90e012..eae5cccff761 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -217,7 +217,7 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ_INTO(dst, src, a, ...) \ ({ \ - ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ + ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \ }) /* @@ -227,7 +227,7 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ ({ \ - ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ + ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\ }) /* @@ -254,8 +254,8 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ(src, a, ...) \ ({ \ - ___type(src, a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ __r; \ }) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index f67dce2af802..a510d8ed716f 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -75,5 +75,6 @@ enum libbpf_tristate { }; #define __kconfig __attribute__((section(".kconfig"))) +#define __ksym __attribute__((section(".ksyms"))) #endif diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 70c1b7ec2bd0..06cd1731c154 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -168,6 +168,11 @@ static inline bool btf_kflag(const struct btf_type *t) return BTF_INFO_KFLAG(t->info); } +static inline bool btf_is_void(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_UNKN; +} + static inline bool btf_is_int(const struct btf_type *t) { return btf_kind(t) == BTF_KIND_INT; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 477c679ed945..18461deb1b19 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -285,6 +285,7 @@ struct bpf_struct_ops { #define BSS_SEC ".bss" #define RODATA_SEC ".rodata" #define KCONFIG_SEC ".kconfig" +#define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" enum libbpf_map_type { @@ -310,6 +311,7 @@ struct bpf_map { int map_ifindex; int inner_map_fd; struct bpf_map_def def; + __u32 numa_node; __u32 btf_var_idx; __u32 btf_key_type_id; __u32 btf_value_type_id; @@ -329,24 +331,39 @@ struct bpf_map { enum extern_type { EXT_UNKNOWN, - EXT_CHAR, - EXT_BOOL, - EXT_INT, - EXT_TRISTATE, - EXT_CHAR_ARR, + EXT_KCFG, + EXT_KSYM, +}; + +enum kcfg_type { + KCFG_UNKNOWN, + KCFG_CHAR, + KCFG_BOOL, + KCFG_INT, + KCFG_TRISTATE, + KCFG_CHAR_ARR, }; struct extern_desc { - const char *name; + enum extern_type type; int sym_idx; int btf_id; - enum extern_type type; - int sz; - int align; - int data_off; - bool is_signed; - bool is_weak; + int sec_btf_id; + const char *name; bool is_set; + bool is_weak; + union { + struct { + enum kcfg_type type; + int sz; + int align; + int data_off; + bool is_signed; + } kcfg; + struct { + unsigned long long addr; + } ksym; + }; }; static LIST_HEAD(bpf_objects_list); @@ -1423,19 +1440,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, return NULL; } -static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, - char value) +static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, + char value) { - switch (ext->type) { - case EXT_BOOL: + switch (ext->kcfg.type) { + case KCFG_BOOL: if (value == 'm') { - pr_warn("extern %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) %s=%c should be tristate or char\n", ext->name, value); return -EINVAL; } *(bool *)ext_val = value == 'y' ? true : false; break; - case EXT_TRISTATE: + case KCFG_TRISTATE: if (value == 'y') *(enum libbpf_tristate *)ext_val = TRI_YES; else if (value == 'm') @@ -1443,14 +1460,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, else /* value == 'n' */ *(enum libbpf_tristate *)ext_val = TRI_NO; break; - case EXT_CHAR: + case KCFG_CHAR: *(char *)ext_val = value; break; - case EXT_UNKNOWN: - case EXT_INT: - case EXT_CHAR_ARR: + case KCFG_UNKNOWN: + case KCFG_INT: + case KCFG_CHAR_ARR: default: - pr_warn("extern %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", ext->name, value); return -EINVAL; } @@ -1458,29 +1475,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, return 0; } -static int set_ext_value_str(struct extern_desc *ext, char *ext_val, - const char *value) +static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, + const char *value) { size_t len; - if (ext->type != EXT_CHAR_ARR) { - pr_warn("extern %s=%s should char array\n", ext->name, value); + if (ext->kcfg.type != KCFG_CHAR_ARR) { + pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); return -EINVAL; } len = strlen(value); if (value[len - 1] != '"') { - pr_warn("extern '%s': invalid string config '%s'\n", + pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; } /* strip quotes */ len -= 2; - if (len >= ext->sz) { - pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", - ext->name, value, len, ext->sz - 1); - len = ext->sz - 1; + if (len >= ext->kcfg.sz) { + pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + ext->name, value, len, ext->kcfg.sz - 1); + len = ext->kcfg.sz - 1; } memcpy(ext_val, value + 1, len); ext_val[len] = '\0'; @@ -1507,11 +1524,11 @@ static int parse_u64(const char *value, __u64 *res) return 0; } -static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) +static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) { - int bit_sz = ext->sz * 8; + int bit_sz = ext->kcfg.sz * 8; - if (ext->sz == 8) + if (ext->kcfg.sz == 8) return true; /* Validate that value stored in u64 fits in integer of `ext->sz` @@ -1526,26 +1543,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) * For unsigned target integer, check that all the (64 - Y) bits are * zero. */ - if (ext->is_signed) + if (ext->kcfg.is_signed) return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); else return (v >> bit_sz) == 0; } -static int set_ext_value_num(struct extern_desc *ext, void *ext_val, - __u64 value) +static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, + __u64 value) { - if (ext->type != EXT_INT && ext->type != EXT_CHAR) { - pr_warn("extern %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) %s=%llu should be integer\n", ext->name, (unsigned long long)value); return -EINVAL; } - if (!is_ext_value_in_range(ext, value)) { - pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", - ext->name, (unsigned long long)value, ext->sz); + if (!is_kcfg_value_in_range(ext, value)) { + pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } - switch (ext->sz) { + switch (ext->kcfg.sz) { case 1: *(__u8 *)ext_val = value; break; case 2: *(__u16 *)ext_val = value; break; case 4: *(__u32 *)ext_val = value; break; @@ -1591,30 +1608,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, if (!ext || ext->is_set) return 0; - ext_val = data + ext->data_off; + ext_val = data + ext->kcfg.data_off; value = sep + 1; switch (*value) { case 'y': case 'n': case 'm': - err = set_ext_value_tri(ext, ext_val, *value); + err = set_kcfg_value_tri(ext, ext_val, *value); break; case '"': - err = set_ext_value_str(ext, ext_val, value); + err = set_kcfg_value_str(ext, ext_val, value); break; default: /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern %s=%s should be integer\n", + pr_warn("extern (kcfg) %s=%s should be integer\n", ext->name, value); return err; } - err = set_ext_value_num(ext, ext_val, num); + err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) %s=%s\n", ext->name, value); return 0; } @@ -1685,16 +1702,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, static int bpf_object__init_kconfig_map(struct bpf_object *obj) { - struct extern_desc *last_ext; + struct extern_desc *last_ext = NULL, *ext; size_t map_sz; - int err; + int i, err; - if (obj->nr_extern == 0) - return 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type == EXT_KCFG) + last_ext = ext; + } - last_ext = &obj->externs[obj->nr_extern - 1]; - map_sz = last_ext->data_off + last_ext->sz; + if (!last_ext) + return 0; + map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, obj->efile.symbols_shndx, NULL, map_sz); @@ -1957,6 +1978,10 @@ static int parse_btf_map_def(struct bpf_object *obj, return -EINVAL; pr_debug("map '%s': found map_flags = %u.\n", map->name, map->def.map_flags); + } else if (strcmp(name, "numa_node") == 0) { + if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node)) + return -EINVAL; + pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node); } else if (strcmp(name, "key_size") == 0) { __u32 sz; @@ -2709,8 +2734,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name) return -ENOENT; } -static enum extern_type find_extern_type(const struct btf *btf, int id, - bool *is_signed) +static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { + const struct btf_var_secinfo *vs; + const struct btf_type *t; + int i, j, n; + + if (!btf) + return -ESRCH; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (!btf_is_datasec(t)) + continue; + + vs = btf_var_secinfos(t); + for (j = 0; j < btf_vlen(t); j++, vs++) { + if (vs->type == ext_btf_id) + return i; + } + } + + return -ENOENT; +} + +static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, + bool *is_signed) { const struct btf_type *t; const char *name; @@ -2725,29 +2775,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id, int enc = btf_int_encoding(t); if (enc & BTF_INT_BOOL) - return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; + return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; if (is_signed) *is_signed = enc & BTF_INT_SIGNED; if (t->size == 1) - return EXT_CHAR; + return KCFG_CHAR; if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) - return EXT_UNKNOWN; - return EXT_INT; + return KCFG_UNKNOWN; + return KCFG_INT; } case BTF_KIND_ENUM: if (t->size != 4) - return EXT_UNKNOWN; + return KCFG_UNKNOWN; if (strcmp(name, "libbpf_tristate")) - return EXT_UNKNOWN; - return EXT_TRISTATE; + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) - return EXT_UNKNOWN; - if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) - return EXT_UNKNOWN; - return EXT_CHAR_ARR; + return KCFG_UNKNOWN; + if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) + return KCFG_UNKNOWN; + return KCFG_CHAR_ARR; default: - return EXT_UNKNOWN; + return KCFG_UNKNOWN; } } @@ -2756,23 +2806,45 @@ static int cmp_externs(const void *_a, const void *_b) const struct extern_desc *a = _a; const struct extern_desc *b = _b; - /* descending order by alignment requirements */ - if (a->align != b->align) - return a->align > b->align ? -1 : 1; - /* ascending order by size, within same alignment class */ - if (a->sz != b->sz) - return a->sz < b->sz ? -1 : 1; + if (a->type != b->type) + return a->type < b->type ? -1 : 1; + + if (a->type == EXT_KCFG) { + /* descending order by alignment requirements */ + if (a->kcfg.align != b->kcfg.align) + return a->kcfg.align > b->kcfg.align ? -1 : 1; + /* ascending order by size, within same alignment class */ + if (a->kcfg.sz != b->kcfg.sz) + return a->kcfg.sz < b->kcfg.sz ? -1 : 1; + } + /* resolve ties by name */ return strcmp(a->name, b->name); } +static int find_int_btf_id(const struct btf *btf) +{ + const struct btf_type *t; + int i, n; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (btf_is_int(t) && btf_int_bits(t) == 32) + return i; + } + + return 0; +} + static int bpf_object__collect_externs(struct bpf_object *obj) { + struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; const struct btf_type *t; struct extern_desc *ext; - int i, n, off, btf_id; - struct btf_type *sec; - const char *ext_name; + int i, n, off; + const char *ext_name, *sec_name; Elf_Scn *scn; GElf_Shdr sh; @@ -2818,22 +2890,50 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->name = btf__name_by_offset(obj->btf, t->name_off); ext->sym_idx = i; ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; - ext->sz = btf__resolve_size(obj->btf, t->type); - if (ext->sz <= 0) { - pr_warn("failed to resolve size of extern '%s': %d\n", - ext_name, ext->sz); - return ext->sz; - } - ext->align = btf__align_of(obj->btf, t->type); - if (ext->align <= 0) { - pr_warn("failed to determine alignment of extern '%s': %d\n", - ext_name, ext->align); - return -EINVAL; - } - ext->type = find_extern_type(obj->btf, t->type, - &ext->is_signed); - if (ext->type == EXT_UNKNOWN) { - pr_warn("extern '%s' type is unsupported\n", ext_name); + + ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); + if (ext->sec_btf_id <= 0) { + pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", + ext_name, ext->btf_id, ext->sec_btf_id); + return ext->sec_btf_id; + } + sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); + sec_name = btf__name_by_offset(obj->btf, sec->name_off); + + if (strcmp(sec_name, KCONFIG_SEC) == 0) { + kcfg_sec = sec; + ext->type = EXT_KCFG; + ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); + if (ext->kcfg.sz <= 0) { + pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", + ext_name, ext->kcfg.sz); + return ext->kcfg.sz; + } + ext->kcfg.align = btf__align_of(obj->btf, t->type); + if (ext->kcfg.align <= 0) { + pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", + ext_name, ext->kcfg.align); + return -EINVAL; + } + ext->kcfg.type = find_kcfg_type(obj->btf, t->type, + &ext->kcfg.is_signed); + if (ext->kcfg.type == KCFG_UNKNOWN) { + pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + return -ENOTSUP; + } + } else if (strcmp(sec_name, KSYMS_SEC) == 0) { + const struct btf_type *vt; + + ksym_sec = sec; + ext->type = EXT_KSYM; + + vt = skip_mods_and_typedefs(obj->btf, t->type, NULL); + if (!btf_is_void(vt)) { + pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name); + return -ENOTSUP; + } + } else { + pr_warn("unrecognized extern section '%s'\n", sec_name); return -ENOTSUP; } } @@ -2842,42 +2942,80 @@ static int bpf_object__collect_externs(struct bpf_object *obj) if (!obj->nr_extern) return 0; - /* sort externs by (alignment, size, name) and calculate their offsets - * within a map */ + /* sort externs by type, for kcfg ones also by (align, size, name) */ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); - off = 0; - for (i = 0; i < obj->nr_extern; i++) { - ext = &obj->externs[i]; - ext->data_off = roundup(off, ext->align); - off = ext->data_off + ext->sz; - pr_debug("extern #%d: symbol %d, off %u, name %s\n", - i, ext->sym_idx, ext->data_off, ext->name); - } - btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); - if (btf_id <= 0) { - pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); - return -ESRCH; - } + /* for .ksyms section, we need to turn all externs into allocated + * variables in BTF to pass kernel verification; we do this by + * pretending that each extern is a 8-byte variable + */ + if (ksym_sec) { + /* find existing 4-byte integer type in BTF to use for fake + * extern variables in DATASEC + */ + int int_btf_id = find_int_btf_id(obj->btf); - sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); - sec->size = off; - n = btf_vlen(sec); - for (i = 0; i < n; i++) { - struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; - - t = btf__type_by_id(obj->btf, vs->type); - ext_name = btf__name_by_offset(obj->btf, t->name_off); - ext = find_extern_by_name(obj, ext_name); - if (!ext) { - pr_warn("failed to find extern definition for BTF var '%s'\n", - ext_name); - return -ESRCH; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type != EXT_KSYM) + continue; + pr_debug("extern (ksym) #%d: symbol %d, name %s\n", + i, ext->sym_idx, ext->name); + } + + sec = ksym_sec; + n = btf_vlen(sec); + for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + struct btf_type *vt; + + vt = (void *)btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, vt->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + vt->type = int_btf_id; + vs->offset = off; + vs->size = sizeof(int); } - vs->offset = ext->data_off; - btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + sec->size = off; } + if (kcfg_sec) { + sec = kcfg_sec; + /* for kcfg externs calculate their offsets within a .kconfig map */ + off = 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type != EXT_KCFG) + continue; + + ext->kcfg.data_off = roundup(off, ext->kcfg.align); + off = ext->kcfg.data_off + ext->kcfg.sz; + pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", + i, ext->sym_idx, ext->kcfg.data_off, ext->name); + } + sec->size = off; + n = btf_vlen(sec); + for (i = 0; i < n; i++) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + + t = btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, t->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + vs->offset = ext->kcfg.data_off; + } + } return 0; } @@ -3007,11 +3145,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog, sym_idx); return -LIBBPF_ERRNO__RELOC; } - pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", - i, ext->name, ext->sym_idx, ext->data_off, insn_idx); + pr_debug("found extern #%d '%s' (sym %d) for insn %u\n", + i, ext->name, ext->sym_idx, insn_idx); reloc_desc->type = RELO_EXTERN; reloc_desc->insn_idx = insn_idx; - reloc_desc->sym_off = ext->data_off; + reloc_desc->sym_off = i; /* sym_off stores extern index */ return 0; } @@ -3222,20 +3360,27 @@ err_free_new_name: return err; } -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +__u32 bpf_map__max_entries(const struct bpf_map *map) { - if (!map || !max_entries) - return -EINVAL; + return map->def.max_entries; +} - /* If map already created, its attributes can't be changed. */ +int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) +{ if (map->fd >= 0) return -EBUSY; - map->def.max_entries = max_entries; - return 0; } +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + return bpf_map__set_max_entries(map, max_entries); +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -3603,6 +3748,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) create_attr.map_flags = def->map_flags; create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; + create_attr.numa_node = map->numa_node; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -4928,6 +5074,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + struct extern_desc *ext; if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { pr_warn("relocation out of range: '%s'\n", @@ -4946,9 +5093,15 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) insn[0].imm = obj->maps[relo->map_idx].fd; break; case RELO_EXTERN: - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; - insn[1].imm = relo->sym_off; + ext = &obj->externs[relo->sym_off]; + if (ext->type == EXT_KCFG) { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + insn[1].imm = ext->kcfg.data_off; + } else /* EXT_KSYM */ { + insn[0].imm = (__u32)ext->ksym.addr; + insn[1].imm = ext->ksym.addr >> 32; + } break; case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); @@ -5567,56 +5720,114 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) return 0; } +static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +{ + char sym_type, sym_name[500]; + unsigned long long sym_addr; + struct extern_desc *ext; + int ret, err = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) { + err = -errno; + pr_warn("failed to open /proc/kallsyms: %d\n", err); + return err; + } + + while (true) { + ret = fscanf(f, "%llx %c %499s%*[^\n]\n", + &sym_addr, &sym_type, sym_name); + if (ret == EOF && feof(f)) + break; + if (ret != 3) { + pr_warn("failed to read kallasyms entry: %d\n", ret); + err = -EINVAL; + goto out; + } + + ext = find_extern_by_name(obj, sym_name); + if (!ext || ext->type != EXT_KSYM) + continue; + + if (ext->is_set && ext->ksym.addr != sym_addr) { + pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", + sym_name, ext->ksym.addr, sym_addr); + err = -EINVAL; + goto out; + } + if (!ext->is_set) { + ext->is_set = true; + ext->ksym.addr = sym_addr; + pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); + } + } + +out: + fclose(f); + return err; +} + static int bpf_object__resolve_externs(struct bpf_object *obj, const char *extra_kconfig) { - bool need_config = false; + bool need_config = false, need_kallsyms = false; struct extern_desc *ext; + void *kcfg_data = NULL; int err, i; - void *data; if (obj->nr_extern == 0) return 0; - data = obj->maps[obj->kconfig_map_idx].mmaped; + if (obj->kconfig_map_idx >= 0) + kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { - void *ext_val = data + ext->data_off; + if (ext->type == EXT_KCFG && + strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + void *ext_val = kcfg_data + ext->kcfg.data_off; __u32 kver = get_kernel_version(); if (!kver) { pr_warn("failed to get kernel version\n"); return -EINVAL; } - err = set_ext_value_num(ext, ext_val, kver); + err = set_kcfg_value_num(ext, ext_val, kver); if (err) return err; - pr_debug("extern %s=0x%x\n", ext->name, kver); - } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { + pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); + } else if (ext->type == EXT_KCFG && + strncmp(ext->name, "CONFIG_", 7) == 0) { need_config = true; + } else if (ext->type == EXT_KSYM) { + need_kallsyms = true; } else { pr_warn("unrecognized extern '%s'\n", ext->name); return -EINVAL; } } if (need_config && extra_kconfig) { - err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); if (err) return -EINVAL; need_config = false; for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (!ext->is_set) { + if (ext->type == EXT_KCFG && !ext->is_set) { need_config = true; break; } } } if (need_config) { - err = bpf_object__read_kconfig_file(obj, data); + err = bpf_object__read_kconfig_file(obj, kcfg_data); + if (err) + return -EINVAL; + } + if (need_kallsyms) { + err = bpf_object__read_kallsyms_file(obj); if (err) return -EINVAL; } @@ -7088,6 +7299,71 @@ const char *bpf_map__name(const struct bpf_map *map) return map ? map->name : NULL; } +enum bpf_map_type bpf_map__type(const struct bpf_map *map) +{ + return map->def.type; +} + +int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.type = type; + return 0; +} + +__u32 bpf_map__map_flags(const struct bpf_map *map) +{ + return map->def.map_flags; +} + +int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.map_flags = flags; + return 0; +} + +__u32 bpf_map__numa_node(const struct bpf_map *map) +{ + return map->numa_node; +} + +int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) +{ + if (map->fd >= 0) + return -EBUSY; + map->numa_node = numa_node; + return 0; +} + +__u32 bpf_map__key_size(const struct bpf_map *map) +{ + return map->def.key_size; +} + +int bpf_map__set_key_size(struct bpf_map *map, __u32 size) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.key_size = size; + return 0; +} + +__u32 bpf_map__value_size(const struct bpf_map *map) +{ + return map->def.value_size; +} + +int bpf_map__set_value_size(struct bpf_map *map, __u32 size) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.value_size = size; + return 0; +} + __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) { return map ? map->btf_key_type_id : 0; @@ -7140,9 +7416,17 @@ bool bpf_map__is_internal(const struct bpf_map *map) return map->libbpf_type != LIBBPF_MAP_UNSPEC; } -void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) +__u32 bpf_map__ifindex(const struct bpf_map *map) { + return map->map_ifindex; +} + +int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) +{ + if (map->fd >= 0) + return -EBUSY; map->map_ifindex = ifindex; + return 0; } int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 334437af3014..fdd279fb1866 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -418,11 +418,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); +/* get/set map FD */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); +LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +/* get map definition */ LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +/* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); +/* get/set map type */ +LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); +/* get/set map size (max_entries) */ +LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); +/* get/set map flags */ +LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags); +/* get/set map NUMA node */ +LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node); +/* get/set map key size */ +LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); +/* get/set map value size */ +LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); +/* get map key/value BTF type IDs */ LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); +/* get/set map if_index */ +LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, @@ -430,11 +457,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); -LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); -LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f732c77b7ed0..9914e0db4859 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -270,3 +270,20 @@ LIBBPF_0.0.9 { ring_buffer__new; ring_buffer__poll; } LIBBPF_0.0.8; + +LIBBPF_0.1.0 { + global: + bpf_map__ifindex; + bpf_map__key_size; + bpf_map__map_flags; + bpf_map__max_entries; + bpf_map__numa_node; + bpf_map__set_key_size; + bpf_map__set_map_flags; + bpf_map__set_max_entries; + bpf_map__set_numa_node; + bpf_map__set_type; + bpf_map__set_value_size; + bpf_map__type; + bpf_map__value_size; +} LIBBPF_0.0.9; diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index db3c07beb9d1..b5f7a996c4d0 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -51,7 +51,7 @@ struct nd_cmd_translate_spa { __u32 nfit_device_handle; __u32 _reserved; __u64 dpa; - } __packed devices[0]; + } __packed devices[]; } __packed; @@ -74,7 +74,7 @@ struct nd_cmd_ars_err_inj_stat { struct nd_error_stat_query_record { __u64 err_inj_stat_spa_range_base; __u64 err_inj_stat_spa_range_length; - } __packed record[0]; + } __packed record[]; } __packed; #define ND_INTEL_SMART 1 @@ -180,7 +180,7 @@ struct nd_intel_fw_send_data { __u32 context; __u32 offset; __u32 length; - __u8 data[0]; + __u8 data[]; /* this field is not declared due ot variable data from input */ /* __u32 status; */ } __packed; diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c new file mode 100644 index 000000000000..e3d6777226a8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> +#include "test_ksyms.skel.h" +#include <sys/stat.h> + +static int duration; + +static __u64 kallsyms_find(const char *sym) +{ + char type, name[500]; + __u64 addr, res = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) + return 0; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + res = addr; + goto out; + } + } + + CHECK(false, "not_found", "symbol %s not found\n", sym); +out: + fclose(f); + return res; +} + +void test_ksyms(void) +{ + __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); + const char *btf_path = "/sys/kernel/btf/vmlinux"; + struct test_ksyms *skel; + struct test_ksyms__data *data; + struct stat st; + __u64 btf_size; + int err; + + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) + return; + btf_size = st.st_size; + + skel = test_ksyms__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + return; + + err = test_ksyms__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops", + "got 0x%llx, exp 0x%llx\n", + data->out__bpf_link_fops, link_fops_addr); + CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1", + "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); + CHECK(data->out__btf_size != btf_size, "btf_size", + "got %llu, exp %llu\n", data->out__btf_size, btf_size); + CHECK(data->out__per_cpu_start != 0, "__per_cpu_start", + "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0); + +cleanup: + test_ksyms__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c new file mode 100644 index 000000000000..c230a573c373 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <test_progs.h> +#include <network_helpers.h> + +#include "map_ptr_kern.skel.h" + +void test_map_ptr(void) +{ + struct map_ptr_kern *skel; + __u32 duration = 0, retval; + char buf[128]; + int err; + + skel = map_ptr_kern__open_and_load(); + if (CHECK(!skel, "skel_open_load", "open_load failed\n")) + return; + + err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, + sizeof(pkt_v4), buf, NULL, &retval, NULL); + + if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) + goto cleanup; + + if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval, + skel->bss->g_map_type, skel->bss->g_line)) + goto cleanup; + +cleanup: + map_ptr_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c new file mode 100644 index 000000000000..c75525eab02c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <time.h> +#include "test_varlen.skel.h" + +#define CHECK_VAL(got, exp) \ + CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \ + (long)(got), (long)(exp)) + +void test_varlen(void) +{ + int duration = 0, err; + struct test_varlen* skel; + struct test_varlen__bss *bss; + struct test_varlen__data *data; + const char str1[] = "Hello, "; + const char str2[] = "World!"; + const char exp_str[] = "Hello, \0World!\0"; + const int size1 = sizeof(str1); + const int size2 = sizeof(str2); + + skel = test_varlen__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + data = skel->data; + + err = test_varlen__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + bss->test_pid = getpid(); + + /* trigger everything */ + memcpy(bss->buf_in1, str1, size1); + memcpy(bss->buf_in2, str2, size2); + bss->capture = true; + usleep(1); + bss->capture = false; + + CHECK_VAL(bss->payload1_len1, size1); + CHECK_VAL(bss->payload1_len2, size2); + CHECK_VAL(bss->total1, size1 + size2); + CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload2_len1, size1); + CHECK_VAL(data->payload2_len2, size2); + CHECK_VAL(data->total2, size1 + size2); + CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload3_len1, size1); + CHECK_VAL(data->payload3_len2, size2); + CHECK_VAL(data->total3, size1 + size2); + CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload4_len1, size1); + CHECK_VAL(data->payload4_len2, size2); + CHECK_VAL(data->total4, size1 + size2); + CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", + "doesn't match!"); +cleanup: + test_varlen__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c new file mode 100644 index 000000000000..473665cac67e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -0,0 +1,686 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define LOOP_BOUND 0xf +#define MAX_ENTRIES 8 +#define HALF_ENTRIES (MAX_ENTRIES >> 1) + +_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND"); + +enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC; +__u32 g_line = 0; + +#define VERIFY_TYPE(type, func) ({ \ + g_map_type = type; \ + if (!func()) \ + return 0; \ +}) + + +#define VERIFY(expr) ({ \ + g_line = __LINE__; \ + if (!(expr)) \ + return 0; \ +}) + +struct bpf_map_memory { + __u32 pages; +} __attribute__((preserve_access_index)); + +struct bpf_map { + enum bpf_map_type map_type; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 id; + struct bpf_map_memory memory; +} __attribute__((preserve_access_index)); + +static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, + __u32 value_size, __u32 max_entries) +{ + VERIFY(map->map_type == g_map_type); + VERIFY(map->key_size == key_size); + VERIFY(map->value_size == value_size); + VERIFY(map->max_entries == max_entries); + VERIFY(map->id > 0); + VERIFY(map->memory.pages > 0); + + return 1; +} + +static inline int check_bpf_map_ptr(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(indirect->map_type == direct->map_type); + VERIFY(indirect->key_size == direct->key_size); + VERIFY(indirect->value_size == direct->value_size); + VERIFY(indirect->max_entries == direct->max_entries); + VERIFY(indirect->id == direct->id); + VERIFY(indirect->memory.pages == direct->memory.pages); + + return 1; +} + +static inline int check(struct bpf_map *indirect, struct bpf_map *direct, + __u32 key_size, __u32 value_size, __u32 max_entries) +{ + VERIFY(check_bpf_map_ptr(indirect, direct)); + VERIFY(check_bpf_map_fields(indirect, key_size, value_size, + max_entries)); + return 1; +} + +static inline int check_default(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32), + MAX_ENTRIES)); + return 1; +} + +typedef struct { + int counter; +} atomic_t; + +struct bpf_htab { + struct bpf_map map; + atomic_t count; + __u32 n_buckets; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */ + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_hash SEC(".maps"); + +static inline int check_hash(void) +{ + struct bpf_htab *hash = (struct bpf_htab *)&m_hash; + struct bpf_map *map = (struct bpf_map *)&m_hash; + int i; + + VERIFY(check_default(&hash->map, map)); + + VERIFY(hash->n_buckets == MAX_ENTRIES); + VERIFY(hash->elem_size == 64); + + VERIFY(hash->count.counter == 0); + for (i = 0; i < HALF_ENTRIES; ++i) { + const __u32 key = i; + const __u32 val = 1; + + if (bpf_map_update_elem(hash, &key, &val, 0)) + return 0; + } + VERIFY(hash->count.counter == HALF_ENTRIES); + + return 1; +} + +struct bpf_array { + struct bpf_map map; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_array SEC(".maps"); + +static inline int check_array(void) +{ + struct bpf_array *array = (struct bpf_array *)&m_array; + struct bpf_map *map = (struct bpf_map *)&m_array; + int i, n_lookups = 0, n_keys = 0; + + VERIFY(check_default(&array->map, map)); + + VERIFY(array->elem_size == 8); + + for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) { + const __u32 key = i; + __u32 *val = bpf_map_lookup_elem(array, &key); + + ++n_lookups; + if (val) + ++n_keys; + } + + VERIFY(n_lookups == MAX_ENTRIES); + VERIFY(n_keys == MAX_ENTRIES); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_prog_array SEC(".maps"); + +static inline int check_prog_array(void) +{ + struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array; + struct bpf_map *map = (struct bpf_map *)&m_prog_array; + + VERIFY(check_default(&prog_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_perf_event_array SEC(".maps"); + +static inline int check_perf_event_array(void) +{ + struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array; + struct bpf_map *map = (struct bpf_map *)&m_perf_event_array; + + VERIFY(check_default(&perf_event_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_hash SEC(".maps"); + +static inline int check_percpu_hash(void) +{ + struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_percpu_hash; + + VERIFY(check_default(&percpu_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_array SEC(".maps"); + +static inline int check_percpu_array(void) +{ + struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array; + struct bpf_map *map = (struct bpf_map *)&m_percpu_array; + + VERIFY(check_default(&percpu_array->map, map)); + + return 1; +} + +struct bpf_stack_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} m_stack_trace SEC(".maps"); + +static inline int check_stack_trace(void) +{ + struct bpf_stack_map *stack_trace = + (struct bpf_stack_map *)&m_stack_trace; + struct bpf_map *map = (struct bpf_map *)&m_stack_trace; + + VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64), + MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cgroup_array SEC(".maps"); + +static inline int check_cgroup_array(void) +{ + struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_array; + + VERIFY(check_default(&cgroup_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_hash SEC(".maps"); + +static inline int check_lru_hash(void) +{ + struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_hash; + + VERIFY(check_default(&lru_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_percpu_hash SEC(".maps"); + +static inline int check_lru_percpu_hash(void) +{ + struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash; + + VERIFY(check_default(&lru_percpu_hash->map, map)); + + return 1; +} + +struct lpm_trie { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct lpm_key { + struct bpf_lpm_trie_key trie_key; + __u32 data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct lpm_key); + __type(value, __u32); +} m_lpm_trie SEC(".maps"); + +static inline int check_lpm_trie(void) +{ + struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie; + struct bpf_map *map = (struct bpf_map *)&m_lpm_trie; + + VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32), + MAX_ENTRIES)); + + return 1; +} + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); + }); +} m_array_of_maps SEC(".maps") = { + .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + +static inline int check_array_of_maps(void) +{ + struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_array_of_maps; + + VERIFY(check_default(&array_of_maps->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct inner_map); +} m_hash_of_maps SEC(".maps") = { + .values = { + [2] = &inner_map, + }, +}; + +static inline int check_hash_of_maps(void) +{ + struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps; + + VERIFY(check_default(&hash_of_maps->map, map)); + + return 1; +} + +struct bpf_dtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap SEC(".maps"); + +static inline int check_devmap(void) +{ + struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap; + struct bpf_map *map = (struct bpf_map *)&m_devmap; + + VERIFY(check_default(&devmap->map, map)); + + return 1; +} + +struct bpf_stab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockmap SEC(".maps"); + +static inline int check_sockmap(void) +{ + struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap; + struct bpf_map *map = (struct bpf_map *)&m_sockmap; + + VERIFY(check_default(&sockmap->map, map)); + + return 1; +} + +struct bpf_cpu_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cpumap SEC(".maps"); + +static inline int check_cpumap(void) +{ + struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap; + struct bpf_map *map = (struct bpf_map *)&m_cpumap; + + VERIFY(check_default(&cpumap->map, map)); + + return 1; +} + +struct xsk_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_xskmap SEC(".maps"); + +static inline int check_xskmap(void) +{ + struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap; + struct bpf_map *map = (struct bpf_map *)&m_xskmap; + + VERIFY(check_default(&xskmap->map, map)); + + return 1; +} + +struct bpf_shtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockhash SEC(".maps"); + +static inline int check_sockhash(void) +{ + struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash; + struct bpf_map *map = (struct bpf_map *)&m_sockhash; + + VERIFY(check_default(&sockhash->map, map)); + + return 1; +} + +struct bpf_cgroup_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_cgroup_storage SEC(".maps"); + +static inline int check_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage; + + VERIFY(check(&cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct reuseport_array { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_reuseport_sockarray SEC(".maps"); + +static inline int check_reuseport_sockarray(void) +{ + struct reuseport_array *reuseport_sockarray = + (struct reuseport_array *)&m_reuseport_sockarray; + struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray; + + VERIFY(check_default(&reuseport_sockarray->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_percpu_cgroup_storage SEC(".maps"); + +static inline int check_percpu_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *percpu_cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage; + + VERIFY(check(&percpu_cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct bpf_queue_stack { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_queue SEC(".maps"); + +static inline int check_queue(void) +{ + struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue; + struct bpf_map *map = (struct bpf_map *)&m_queue; + + VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_STACK); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_stack SEC(".maps"); + +static inline int check_stack(void) +{ + struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack; + struct bpf_map *map = (struct bpf_map *)&m_stack; + + VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct bpf_sk_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, __u32); + __type(value, __u32); +} m_sk_storage SEC(".maps"); + +static inline int check_sk_storage(void) +{ + struct bpf_sk_storage_map *sk_storage = + (struct bpf_sk_storage_map *)&m_sk_storage; + struct bpf_map *map = (struct bpf_map *)&m_sk_storage; + + VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap_hash SEC(".maps"); + +static inline int check_devmap_hash(void) +{ + struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash; + struct bpf_map *map = (struct bpf_map *)&m_devmap_hash; + + VERIFY(check_default(&devmap_hash->map, map)); + + return 1; +} + +struct bpf_ringbuf_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} m_ringbuf SEC(".maps"); + +static inline int check_ringbuf(void) +{ + struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf; + struct bpf_map *map = (struct bpf_map *)&m_ringbuf; + + VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12)); + + return 1; +} + +SEC("cgroup_skb/egress") +int cg_skb(void *ctx) +{ + VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array); + VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array); + VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap); + VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap); + VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + check_reuseport_sockarray); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + check_percpu_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue); + VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack); + VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash); + VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf); + + return 1; +} + +__u32 _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c new file mode 100644 index 000000000000..6c9cbb5a3bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__u64 out__bpf_link_fops = -1; +__u64 out__bpf_link_fops1 = -1; +__u64 out__btf_size = -1; +__u64 out__per_cpu_start = -1; + +extern const void bpf_link_fops __ksym; +extern const void __start_BTF __ksym; +extern const void __stop_BTF __ksym; +extern const void __per_cpu_start __ksym; +/* non-existing symbol, weak, default to zero */ +extern const void bpf_link_fops1 __ksym __weak; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + out__bpf_link_fops = (__u64)&bpf_link_fops; + out__btf_size = (__u64)(&__stop_BTF - &__start_BTF); + out__per_cpu_start = (__u64)&__per_cpu_start; + + out__bpf_link_fops1 = (__u64)&bpf_link_fops1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c new file mode 100644 index 000000000000..cd4b72c55dfe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#define MAX_LEN 256 + +char buf_in1[MAX_LEN] = {}; +char buf_in2[MAX_LEN] = {}; + +int test_pid = 0; +bool capture = false; + +/* .bss */ +long payload1_len1 = 0; +long payload1_len2 = 0; +long total1 = 0; +char payload1[MAX_LEN + MAX_LEN] = {}; + +/* .data */ +int payload2_len1 = -1; +int payload2_len2 = -1; +int total2 = -1; +char payload2[MAX_LEN + MAX_LEN] = { 1 }; + +int payload3_len1 = -1; +int payload3_len2 = -1; +int total3= -1; +char payload3[MAX_LEN + MAX_LEN] = { 1 }; + +int payload4_len1 = -1; +int payload4_len2 = -1; +int total4= -1; +char payload4[MAX_LEN + MAX_LEN] = { 1 }; + +SEC("raw_tp/sys_enter") +int handler64_unsigned(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload1; + u64 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len2 = len; + } + + total1 = payload - (void *)payload1; + + return 0; +} + +SEC("raw_tp/sys_exit") +int handler64_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload3; + long len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload3_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload3_len2 = len; + } + total3 = payload - (void *)payload3; + + return 0; +} + +SEC("tp/raw_syscalls/sys_enter") +int handler32_unsigned(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload2; + u32 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len2 = len; + } + + total2 = payload - (void *)payload2; + + return 0; +} + +SEC("tp/raw_syscalls/sys_exit") +int handler32_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload4; + int len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload4_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload4_len2 = len; + } + total4 = payload - (void *)payload4; + + return 0; +} + +SEC("tp/syscalls/sys_exit_getpid") +int handler_exit(void *regs) +{ + long bla; + + if (bpf_probe_read_kernel(&bla, sizeof(bla), 0)) + return 1; + else + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c new file mode 100644 index 000000000000..b52209db8250 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -0,0 +1,62 @@ +{ + "bpf_map_ptr: read with negative offset rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "R1 is bpf_array invalid negative access: off=-8", +}, +{ + "bpf_map_ptr: write rejected", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "only read from bpf_array is supported", +}, +{ + "bpf_map_ptr: read non-existent field rejected", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", +}, +{ + "bpf_map_ptr: read ops field accepted", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c index cd26ee6b7b1d..1f2b8c4cb26d 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c @@ -56,7 +56,7 @@ .fixup_map_in_map = { 16 }, .fixup_map_array_48b = { 13 }, .result = REJECT, - .errstr = "R0 invalid mem access 'map_ptr'", + .errstr = "only read from bpf_array is supported", }, { "cond: two branches returning different map pointers for lookup (tail, tail)", diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 97ee658e1242..ed4e76b24649 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -836,3 +836,41 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "32bit pkt_ptr -= scalar", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7), + BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "32bit scalar -= pkt_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6), + BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, |