diff options
author | Martin K. Petersen <martin.petersen@oracle.com> | 2025-04-28 21:48:37 -0400 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2025-04-28 21:48:37 -0400 |
commit | 6b08fe7763de9e388e6b224af2e39fa5083c44d8 (patch) | |
tree | ad164e3d8991d1336927e51f82d776b9b23b41d3 /include | |
parent | a0d1cf505d3f3a98388c2d05fe03b126dfda3513 (diff) | |
parent | 268975a87c7b6f6b0ceb62df236c1e1b08b89379 (diff) |
Merge patch series "target: Remove atomics from main IO path"
Mike Christie <michael.christie@oracle.com> says:
The following patches made over Linus's tree remove the atomic use from
the main IO path. There was a handful of atomic_longs used just used
for stats and a couple atomics used for handling ordered commands. These
patches move the stats to per cpu, and moves the ordered tracking to a
per cpu counter.
With the patches 8K IOPS increases by up to 33% when running fio
with numjobs >= 4 and using the vhost-scsi target with virtio-scsi
and virtio num_queues >= 4 (jobs and queues match, and virtqueue_size
and cmd_per_lun are increased to match the total iodepth of all
jobs).
Link: https://lore.kernel.org/r/20250424032741.16216-1-michael.christie@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/target/target_core_base.h | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 97099a5e3f6c..a52d4967c0d3 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -157,6 +157,7 @@ enum se_cmd_flags_table { SCF_USE_CPUID = (1 << 16), SCF_TASK_ATTR_SET = (1 << 17), SCF_TREAT_READ_AS_NORMAL = (1 << 18), + SCF_TASK_ORDERED_SYNC = (1 << 19), }; /* @@ -669,15 +670,19 @@ struct se_lun_acl { struct se_ml_stat_grps ml_stat_grps; }; +struct se_dev_entry_io_stats { + u32 total_cmds; + u32 read_bytes; + u32 write_bytes; +}; + struct se_dev_entry { u64 mapped_lun; u64 pr_res_key; u64 creation_time; bool lun_access_ro; u32 attach_count; - atomic_long_t total_cmds; - atomic_long_t read_bytes; - atomic_long_t write_bytes; + struct se_dev_entry_io_stats __percpu *stats; /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */ struct kref pr_kref; struct completion pr_comp; @@ -800,6 +805,12 @@ struct se_device_queue { struct se_cmd_queue sq; }; +struct se_dev_io_stats { + u32 total_cmds; + u32 read_bytes; + u32 write_bytes; +}; + struct se_device { /* Used for SAM Task Attribute ordering */ u32 dev_cur_ordered_id; @@ -821,13 +832,10 @@ struct se_device { atomic_long_t num_resets; atomic_long_t aborts_complete; atomic_long_t aborts_no_task; - atomic_long_t num_cmds; - atomic_long_t read_bytes; - atomic_long_t write_bytes; + struct se_dev_io_stats __percpu *stats; /* Active commands on this virtual SE device */ - atomic_t non_ordered; + struct percpu_ref non_ordered; bool ordered_sync_in_progress; - atomic_t delayed_cmd_count; atomic_t dev_qf_count; u32 export_count; spinlock_t delayed_cmd_lock; |