diff options
Diffstat (limited to 'fs/ksmbd')
-rw-r--r-- | fs/ksmbd/connection.c | 22 | ||||
-rw-r--r-- | fs/ksmbd/connection.h | 27 | ||||
-rw-r--r-- | fs/ksmbd/ksmbd_netlink.h | 3 | ||||
-rw-r--r-- | fs/ksmbd/misc.c | 50 | ||||
-rw-r--r-- | fs/ksmbd/misc.h | 3 | ||||
-rw-r--r-- | fs/ksmbd/oplock.c | 30 | ||||
-rw-r--r-- | fs/ksmbd/oplock.h | 2 | ||||
-rw-r--r-- | fs/ksmbd/smb2misc.c | 2 | ||||
-rw-r--r-- | fs/ksmbd/smb2pdu.c | 208 | ||||
-rw-r--r-- | fs/ksmbd/smb_common.c | 4 | ||||
-rw-r--r-- | fs/ksmbd/smbacl.c | 1 | ||||
-rw-r--r-- | fs/ksmbd/transport_ipc.c | 3 | ||||
-rw-r--r-- | fs/ksmbd/transport_rdma.c | 373 | ||||
-rw-r--r-- | fs/ksmbd/transport_rdma.h | 8 | ||||
-rw-r--r-- | fs/ksmbd/transport_tcp.c | 2 | ||||
-rw-r--r-- | fs/ksmbd/vfs.c | 20 | ||||
-rw-r--r-- | fs/ksmbd/vfs.h | 2 | ||||
-rw-r--r-- | fs/ksmbd/vfs_cache.c | 2 | ||||
-rw-r--r-- | fs/ksmbd/vfs_cache.h | 1 |
19 files changed, 420 insertions, 343 deletions
diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 208d2cff7bd3..e8f476c5f189 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -62,7 +62,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) atomic_set(&conn->req_running, 0); atomic_set(&conn->r_count, 0); conn->total_credits = 1; - conn->outstanding_credits = 1; + conn->outstanding_credits = 0; init_waitqueue_head(&conn->req_running_q); INIT_LIST_HEAD(&conn->conns_list); @@ -205,31 +205,31 @@ int ksmbd_conn_write(struct ksmbd_work *work) return 0; } -int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf, - unsigned int buflen, u32 remote_key, u64 remote_offset, - u32 remote_len) +int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, + void *buf, unsigned int buflen, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len) { int ret = -EINVAL; if (conn->transport->ops->rdma_read) ret = conn->transport->ops->rdma_read(conn->transport, buf, buflen, - remote_key, remote_offset, - remote_len); + desc, desc_len); return ret; } -int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf, - unsigned int buflen, u32 remote_key, - u64 remote_offset, u32 remote_len) +int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, + void *buf, unsigned int buflen, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len) { int ret = -EINVAL; if (conn->transport->ops->rdma_write) ret = conn->transport->ops->rdma_write(conn->transport, buf, buflen, - remote_key, remote_offset, - remote_len); + desc, desc_len); return ret; } diff --git a/fs/ksmbd/connection.h b/fs/ksmbd/connection.h index 7a59aacb5daa..98c1cbe45ec9 100644 --- a/fs/ksmbd/connection.h +++ b/fs/ksmbd/connection.h @@ -122,11 +122,14 @@ struct ksmbd_transport_ops { int (*writev)(struct ksmbd_transport *t, struct kvec *iovs, int niov, int size, bool need_invalidate_rkey, unsigned int remote_key); - int (*rdma_read)(struct ksmbd_transport *t, void *buf, unsigned int len, - u32 remote_key, u64 remote_offset, u32 remote_len); - int (*rdma_write)(struct ksmbd_transport *t, void *buf, - unsigned int len, u32 remote_key, u64 remote_offset, - u32 remote_len); + int (*rdma_read)(struct ksmbd_transport *t, + void *buf, unsigned int len, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len); + int (*rdma_write)(struct ksmbd_transport *t, + void *buf, unsigned int len, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len); }; struct ksmbd_transport { @@ -148,12 +151,14 @@ struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); int ksmbd_conn_write(struct ksmbd_work *work); -int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, void *buf, - unsigned int buflen, u32 remote_key, u64 remote_offset, - u32 remote_len); -int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, void *buf, - unsigned int buflen, u32 remote_key, u64 remote_offset, - u32 remote_len); +int ksmbd_conn_rdma_read(struct ksmbd_conn *conn, + void *buf, unsigned int buflen, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len); +int ksmbd_conn_rdma_write(struct ksmbd_conn *conn, + void *buf, unsigned int buflen, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len); void ksmbd_conn_enqueue_request(struct ksmbd_work *work); int ksmbd_conn_try_dequeue_request(struct ksmbd_work *work); void ksmbd_conn_init_server_callbacks(struct ksmbd_conn_ops *ops); diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index ebe6ca08467a..52aa0adeb951 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -104,7 +104,8 @@ struct ksmbd_startup_request { */ __u32 sub_auth[3]; /* Subauth value for Security ID */ __u32 smb2_max_credits; /* MAX credits */ - __u32 reserved[128]; /* Reserved room */ + __u32 smbd_max_io_size; /* smbd read write size */ + __u32 reserved[127]; /* Reserved room */ __u32 ifc_list_sz; /* interfaces list size */ __s8 ____payload[]; }; diff --git a/fs/ksmbd/misc.c b/fs/ksmbd/misc.c index 60e7ac62c917..df991107ad2c 100644 --- a/fs/ksmbd/misc.c +++ b/fs/ksmbd/misc.c @@ -20,7 +20,7 @@ * wildcard '*' and '?' * TODO : implement consideration about DOS_DOT, DOS_QM and DOS_STAR * - * @string: string to compare with a pattern + * @str: string to compare with a pattern * @len: string length * @pattern: pattern string which might include wildcard '*' and '?' * @@ -152,25 +152,47 @@ out: /** * convert_to_nt_pathname() - extract and return windows path string * whose share directory prefix was removed from file path - * @filename : unix filename - * @sharepath: share path string + * @share: ksmbd_share_config pointer + * @path: path to report * * Return : windows path string or error */ -char *convert_to_nt_pathname(char *filename) +char *convert_to_nt_pathname(struct ksmbd_share_config *share, + struct path *path) { - char *ab_pathname; + char *pathname, *ab_pathname, *nt_pathname; + int share_path_len = share->path_sz; - if (strlen(filename) == 0) - filename = "\\"; + pathname = kmalloc(PATH_MAX, GFP_KERNEL); + if (!pathname) + return ERR_PTR(-EACCES); - ab_pathname = kstrdup(filename, GFP_KERNEL); - if (!ab_pathname) - return NULL; + ab_pathname = d_path(path, pathname, PATH_MAX); + if (IS_ERR(ab_pathname)) { + nt_pathname = ERR_PTR(-EACCES); + goto free_pathname; + } + + if (strncmp(ab_pathname, share->path, share_path_len)) { + nt_pathname = ERR_PTR(-EACCES); + goto free_pathname; + } + + nt_pathname = kzalloc(strlen(&ab_pathname[share_path_len]) + 2, GFP_KERNEL); + if (!nt_pathname) { + nt_pathname = ERR_PTR(-ENOMEM); + goto free_pathname; + } + if (ab_pathname[share_path_len] == '\0') + strcpy(nt_pathname, "/"); + strcat(nt_pathname, &ab_pathname[share_path_len]); + + ksmbd_conv_path_to_windows(nt_pathname); - ksmbd_conv_path_to_windows(ab_pathname); - return ab_pathname; +free_pathname: + kfree(pathname); + return nt_pathname; } int get_nlink(struct kstat *st) @@ -228,8 +250,8 @@ char *ksmbd_extract_sharename(char *treename) /** * convert_to_unix_name() - convert windows name to unix format - * @path: name to be converted - * @tid: tree id of mathing share + * @share: ksmbd_share_config pointer + * @name: file name that is relative to share * * Return: converted name on success, otherwise NULL */ diff --git a/fs/ksmbd/misc.h b/fs/ksmbd/misc.h index 253366bd0951..aae2a252945f 100644 --- a/fs/ksmbd/misc.h +++ b/fs/ksmbd/misc.h @@ -14,7 +14,8 @@ struct ksmbd_file; int match_pattern(const char *str, size_t len, const char *pattern); int ksmbd_validate_filename(char *filename); int parse_stream_name(char *filename, char **stream_name, int *s_type); -char *convert_to_nt_pathname(char *filename); +char *convert_to_nt_pathname(struct ksmbd_share_config *share, + struct path *path); int get_nlink(struct kstat *st); void ksmbd_conv_path_to_unix(char *path); void ksmbd_strip_last_slash(char *path); diff --git a/fs/ksmbd/oplock.c b/fs/ksmbd/oplock.c index 23871b18a429..8b5560574d4c 100644 --- a/fs/ksmbd/oplock.c +++ b/fs/ksmbd/oplock.c @@ -1694,33 +1694,3 @@ out: read_unlock(&lease_list_lock); return ret_op; } - -int smb2_check_durable_oplock(struct ksmbd_file *fp, - struct lease_ctx_info *lctx, char *name) -{ - struct oplock_info *opinfo = opinfo_get(fp); - int ret = 0; - - if (opinfo && opinfo->is_lease) { - if (!lctx) { - pr_err("open does not include lease\n"); - ret = -EBADF; - goto out; - } - if (memcmp(opinfo->o_lease->lease_key, lctx->lease_key, - SMB2_LEASE_KEY_SIZE)) { - pr_err("invalid lease key\n"); - ret = -EBADF; - goto out; - } - if (name && strcmp(fp->filename, name)) { - pr_err("invalid name reconnect %s\n", name); - ret = -EINVAL; - goto out; - } - } -out: - if (opinfo) - opinfo_put(opinfo); - return ret; -} diff --git a/fs/ksmbd/oplock.h b/fs/ksmbd/oplock.h index 0cf7a2b5bbc0..09753448f779 100644 --- a/fs/ksmbd/oplock.h +++ b/fs/ksmbd/oplock.h @@ -124,6 +124,4 @@ struct oplock_info *lookup_lease_in_table(struct ksmbd_conn *conn, int find_same_lease_key(struct ksmbd_session *sess, struct ksmbd_inode *ci, struct lease_ctx_info *lctx); void destroy_lease_table(struct ksmbd_conn *conn); -int smb2_check_durable_oplock(struct ksmbd_file *fp, - struct lease_ctx_info *lctx, char *name); #endif /* __KSMBD_OPLOCK_H */ diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index 4a9460153b59..f8f456377a51 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -338,7 +338,7 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn, ret = 1; } - if ((u64)conn->outstanding_credits + credit_charge > conn->vals->max_credits) { + if ((u64)conn->outstanding_credits + credit_charge > conn->total_credits) { ksmbd_debug(SMB, "Limits exceeding the maximum allowable outstanding requests, given : %u, pending : %u\n", credit_charge, conn->outstanding_credits); ret = 1; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 3bf6c56c654c..353f047e783c 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -11,6 +11,7 @@ #include <linux/statfs.h> #include <linux/ethtool.h> #include <linux/falloc.h> +#include <linux/mount.h> #include "glob.h" #include "smbfsctl.h" @@ -2918,7 +2919,6 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } - fp->filename = name; fp->cdoption = req->CreateDisposition; fp->daccess = daccess; fp->saccess = req->ShareAccess; @@ -3270,14 +3270,13 @@ err_out1: if (!rsp->hdr.Status) rsp->hdr.Status = STATUS_UNEXPECTED_IO_ERROR; - if (!fp || !fp->filename) - kfree(name); if (fp) ksmbd_fd_put(work, fp); smb2_set_err_rsp(work); ksmbd_debug(SMB, "Error response: %x\n", rsp->hdr.Status); } + kfree(name); kfree(lc); return 0; @@ -3895,8 +3894,6 @@ int smb2_query_dir(struct ksmbd_work *work) ksmbd_debug(SMB, "Search pattern is %s\n", srch_ptr); } - ksmbd_debug(SMB, "Directory name is %s\n", dir_fp->filename); - if (srch_flag & SMB2_REOPEN || srch_flag & SMB2_RESTART_SCANS) { ksmbd_debug(SMB, "Restart directory scan\n"); generic_file_llseek(dir_fp->filp, 0, SEEK_SET); @@ -3941,6 +3938,12 @@ int smb2_query_dir(struct ksmbd_work *work) set_ctx_actor(&dir_fp->readdir_data.ctx, __query_dir); rc = iterate_dir(dir_fp->filp, &dir_fp->readdir_data.ctx); + /* + * req->OutputBufferLength is too small to contain even one entry. + * In this case, it immediately returns OutputBufferLength 0 to client. + */ + if (!d_info.out_buf_len && !d_info.num_entry) + goto no_buf_len; if (rc == 0) restart_ctx(&dir_fp->readdir_data.ctx); if (rc == -ENOSPC) @@ -3967,10 +3970,12 @@ int smb2_query_dir(struct ksmbd_work *work) rsp->Buffer[0] = 0; inc_rfc1001_len(work->response_buf, 9); } else { +no_buf_len: ((struct file_directory_info *) ((char *)rsp->Buffer + d_info.last_entry_offset)) ->NextEntryOffset = 0; - d_info.data_count -= d_info.last_entry_off_align; + if (d_info.data_count >= d_info.last_entry_off_align) + d_info.data_count -= d_info.last_entry_off_align; rsp->StructureSize = cpu_to_le16(9); rsp->OutputBufferOffset = cpu_to_le16(72); @@ -4390,9 +4395,9 @@ static int get_file_all_info(struct ksmbd_work *work, return -EACCES; } - filename = convert_to_nt_pathname(fp->filename); - if (!filename) - return -ENOMEM; + filename = convert_to_nt_pathname(work->tcon->share_conf, &fp->filp->f_path); + if (IS_ERR(filename)) + return PTR_ERR(filename); inode = file_inode(fp->filp); generic_fillattr(file_mnt_user_ns(fp->filp), inode, &stat); @@ -4999,15 +5004,17 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, case FS_SECTOR_SIZE_INFORMATION: { struct smb3_fs_ss_info *info; + unsigned int sector_size = + min_t(unsigned int, path.mnt->mnt_sb->s_blocksize, 4096); info = (struct smb3_fs_ss_info *)(rsp->Buffer); - info->LogicalBytesPerSector = cpu_to_le32(stfs.f_bsize); + info->LogicalBytesPerSector = cpu_to_le32(sector_size); info->PhysicalBytesPerSectorForAtomicity = - cpu_to_le32(stfs.f_bsize); - info->PhysicalBytesPerSectorForPerf = cpu_to_le32(stfs.f_bsize); + cpu_to_le32(sector_size); + info->PhysicalBytesPerSectorForPerf = cpu_to_le32(sector_size); info->FSEffPhysicalBytesPerSectorForAtomicity = - cpu_to_le32(stfs.f_bsize); + cpu_to_le32(sector_size); info->Flags = cpu_to_le32(SSINFO_FLAGS_ALIGNED_DEVICE | SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE); info->ByteOffsetForSectorAlignment = 0; @@ -5683,8 +5690,7 @@ static int set_file_allocation_info(struct ksmbd_work *work, size = i_size_read(inode); rc = ksmbd_vfs_truncate(work, fp, alloc_blks * 512); if (rc) { - pr_err("truncate failed! filename : %s, err %d\n", - fp->filename, rc); + pr_err("truncate failed!, err %d\n", rc); return rc; } if (size < alloc_blks * 512) @@ -5714,12 +5720,10 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp, * truncated range. */ if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) { - ksmbd_debug(SMB, "filename : %s truncated to newsize %lld\n", - fp->filename, newsize); + ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize); rc = ksmbd_vfs_truncate(work, fp, newsize); if (rc) { - ksmbd_debug(SMB, "truncate failed! filename : %s err %d\n", - fp->filename, rc); + ksmbd_debug(SMB, "truncate failed!, err %d\n", rc); if (rc != -EAGAIN) rc = -EBADF; return rc; @@ -5765,8 +5769,10 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp, if (parent_fp) { if (parent_fp->daccess & FILE_DELETE_LE) { pr_err("parent dir is opened with delete access\n"); + ksmbd_fd_put(work, parent_fp); return -ESHARE; } + ksmbd_fd_put(work, parent_fp); } next: return smb2_rename(work, fp, user_ns, rename_info, @@ -6118,7 +6124,6 @@ out: static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work, struct smb2_buffer_desc_v1 *desc, __le32 Channel, - __le16 ChannelInfoOffset, __le16 ChannelInfoLength) { unsigned int i, ch_count; @@ -6136,15 +6141,13 @@ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work, le32_to_cpu(desc[i].length)); } } - if (ch_count != 1) { - ksmbd_debug(RDMA, "RDMA multiple buffer descriptors %d are not supported yet\n", - ch_count); + if (!ch_count) return -EINVAL; - } work->need_invalidate_rkey = (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE); - work->remote_key = le32_to_cpu(desc->token); + if (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) + work->remote_key = le32_to_cpu(desc->token); return 0; } @@ -6152,14 +6155,12 @@ static ssize_t smb2_read_rdma_channel(struct ksmbd_work *work, struct smb2_read_req *req, void *data_buf, size_t length) { - struct smb2_buffer_desc_v1 *desc = - (struct smb2_buffer_desc_v1 *)&req->Buffer[0]; int err; err = ksmbd_conn_rdma_write(work->conn, data_buf, length, - le32_to_cpu(desc->token), - le64_to_cpu(desc->offset), - le32_to_cpu(desc->length)); + (struct smb2_buffer_desc_v1 *) + ((char *)req + le16_to_cpu(req->ReadChannelInfoOffset)), + le16_to_cpu(req->ReadChannelInfoLength)); if (err) return err; @@ -6182,6 +6183,8 @@ int smb2_read(struct ksmbd_work *work) size_t length, mincount; ssize_t nbytes = 0, remain_bytes = 0; int err = 0; + bool is_rdma_channel = false; + unsigned int max_read_size = conn->vals->max_read_size; WORK_BUFFERS(work, req, rsp); @@ -6193,6 +6196,11 @@ int smb2_read(struct ksmbd_work *work) if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE || req->Channel == SMB2_CHANNEL_RDMA_V1) { + is_rdma_channel = true; + max_read_size = get_smbd_max_read_write_size(); + } + + if (is_rdma_channel == true) { unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset); if (ch_offset < offsetof(struct smb2_read_req, Buffer)) { @@ -6203,7 +6211,6 @@ int smb2_read(struct ksmbd_work *work) (struct smb2_buffer_desc_v1 *) ((char *)req + ch_offset), req->Channel, - req->ReadChannelInfoOffset, req->ReadChannelInfoLength); if (err) goto out; @@ -6225,9 +6232,9 @@ int smb2_read(struct ksmbd_work *work) length = le32_to_cpu(req->Length); mincount = le32_to_cpu(req->MinimumCount); - if (length > conn->vals->max_read_size) { + if (length > max_read_size) { ksmbd_debug(SMB, "limiting read size to max size(%u)\n", - conn->vals->max_read_size); + max_read_size); err = -EINVAL; goto out; } @@ -6259,8 +6266,7 @@ int smb2_read(struct ksmbd_work *work) ksmbd_debug(SMB, "nbytes %zu, offset %lld mincount %zu\n", nbytes, offset, mincount); - if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE || - req->Channel == SMB2_CHANNEL_RDMA_V1) { + if (is_rdma_channel == true) { /* write data to the client using rdma channel */ remain_bytes = smb2_read_rdma_channel(work, req, work->aux_payload_buf, @@ -6330,23 +6336,18 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work) length = le32_to_cpu(req->Length); id = req->VolatileFileId; - if (le16_to_cpu(req->DataOffset) == - offsetof(struct smb2_write_req, Buffer)) { - data_buf = (char *)&req->Buffer[0]; - } else { - if ((u64)le16_to_cpu(req->DataOffset) + length > - get_rfc1002_len(work->request_buf)) { - pr_err("invalid write data offset %u, smb_len %u\n", - le16_to_cpu(req->DataOffset), - get_rfc1002_len(work->request_buf)); - err = -EINVAL; - goto out; - } - - data_buf = (char *)(((char *)&req->hdr.ProtocolId) + - le16_to_cpu(req->DataOffset)); + if ((u64)le16_to_cpu(req->DataOffset) + length > + get_rfc1002_len(work->request_buf)) { + pr_err("invalid write data offset %u, smb_len %u\n", + le16_to_cpu(req->DataOffset), + get_rfc1002_len(work->request_buf)); + err = -EINVAL; + goto out; } + data_buf = (char *)(((char *)&req->hdr.ProtocolId) + + le16_to_cpu(req->DataOffset)); + rpc_resp = ksmbd_rpc_write(work->sess, id, data_buf, length); if (rpc_resp) { if (rpc_resp->flags == KSMBD_RPC_ENOTIMPLEMENTED) { @@ -6386,21 +6387,18 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work, struct ksmbd_file *fp, loff_t offset, size_t length, bool sync) { - struct smb2_buffer_desc_v1 *desc; char *data_buf; int ret; ssize_t nbytes; - desc = (struct smb2_buffer_desc_v1 *)&req->Buffer[0]; - data_buf = kvmalloc(length, GFP_KERNEL | __GFP_ZERO); if (!data_buf) return -ENOMEM; ret = ksmbd_conn_rdma_read(work->conn, data_buf, length, - le32_to_cpu(desc->token), - le64_to_cpu(desc->offset), - le32_to_cpu(desc->length)); + (struct smb2_buffer_desc_v1 *) + ((char *)req + le16_to_cpu(req->WriteChannelInfoOffset)), + le16_to_cpu(req->WriteChannelInfoLength)); if (ret < 0) { kvfree(data_buf); return ret; @@ -6429,8 +6427,9 @@ int smb2_write(struct ksmbd_work *work) size_t length; ssize_t nbytes; char *data_buf; - bool writethrough = false; + bool writethrough = false, is_rdma_channel = false; int err = 0; + unsigned int max_write_size = work->conn->vals->max_write_size; WORK_BUFFERS(work, req, rsp); @@ -6439,8 +6438,17 @@ int smb2_write(struct ksmbd_work *work) return smb2_write_pipe(work); } + offset = le64_to_cpu(req->Offset); + length = le32_to_cpu(req->Length); + if (req->Channel == SMB2_CHANNEL_RDMA_V1 || req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) { + is_rdma_channel = true; + max_write_size = get_smbd_max_read_write_size(); + length = le32_to_cpu(req->RemainingBytes); + } + + if (is_rdma_channel == true) { unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset); if (req->Length != 0 || req->DataOffset != 0 || @@ -6452,7 +6460,6 @@ int smb2_write(struct ksmbd_work *work) (struct smb2_buffer_desc_v1 *) ((char *)req + ch_offset), req->Channel, - req->WriteChannelInfoOffset, req->WriteChannelInfoLength); if (err) goto out; @@ -6476,41 +6483,28 @@ int smb2_write(struct ksmbd_work *work) goto out; } - offset = le64_to_cpu(req->Offset); - length = le32_to_cpu(req->Length); - - if (length > work->conn->vals->max_write_size) { + if (length > max_write_size) { ksmbd_debug(SMB, "limiting write size to max size(%u)\n", - work->conn->vals->max_write_size); + max_write_size); err = -EINVAL; goto out; } + ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) writethrough = true; - if (req->Channel != SMB2_CHANNEL_RDMA_V1 && - req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) { - if (le16_to_cpu(req->DataOffset) == - offsetof(struct smb2_write_req, Buffer)) { - data_buf = (char *)&req->Buffer[0]; - } else { - if ((u64)le16_to_cpu(req->DataOffset) + length > - get_rfc1002_len(work->request_buf)) { - pr_err("invalid write data offset %u, smb_len %u\n", - le16_to_cpu(req->DataOffset), - get_rfc1002_len(work->request_buf)); - err = -EINVAL; - goto out; - } - - data_buf = (char *)(((char *)&req->hdr.ProtocolId) + - le16_to_cpu(req->DataOffset)); + if (is_rdma_channel == false) { + if ((u64)le16_to_cpu(req->DataOffset) + length > + get_rfc1002_len(work->request_buf)) { + pr_err("invalid write data offset %u, smb_len %u\n", + le16_to_cpu(req->DataOffset), + get_rfc1002_len(work->request_buf)); + err = -EINVAL; + goto out; } - - ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); - if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) - writethrough = true; + data_buf = (char *)(((char *)&req->hdr.ProtocolId) + + le16_to_cpu(req->DataOffset)); ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n", fp->filp->f_path.dentry, offset, length); @@ -6522,8 +6516,7 @@ int smb2_write(struct ksmbd_work *work) /* read data from the client using rdma channel, and * write the data. */ - nbytes = smb2_write_rdma_channel(work, req, fp, offset, - le32_to_cpu(req->RemainingBytes), + nbytes = smb2_write_rdma_channel(work, req, fp, offset, length, writethrough); if (nbytes < 0) { err = (int)nbytes; @@ -7707,7 +7700,7 @@ int smb2_ioctl(struct ksmbd_work *work) { struct file_zero_data_information *zero_data; struct ksmbd_file *fp; - loff_t off, len; + loff_t off, len, bfz; if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { ksmbd_debug(SMB, @@ -7724,19 +7717,26 @@ int smb2_ioctl(struct ksmbd_work *work) zero_data = (struct file_zero_data_information *)&req->Buffer[0]; - fp = ksmbd_lookup_fd_fast(work, id); - if (!fp) { - ret = -ENOENT; + off = le64_to_cpu(zero_data->FileOffset); + bfz = le64_to_cpu(zero_data->BeyondFinalZero); + if (off > bfz) { + ret = -EINVAL; goto out; } - off = le64_to_cpu(zero_data->FileOffset); - len = le64_to_cpu(zero_data->BeyondFinalZero) - off; + len = bfz - off; + if (len) { + fp = ksmbd_lookup_fd_fast(work, id); + if (!fp) { + ret = -ENOENT; + goto out; + } - ret = ksmbd_vfs_zero_data(work, fp, off, len); - ksmbd_fd_put(work, fp); - if (ret < 0) - goto out; + ret = ksmbd_vfs_zero_data(work, fp, off, len); + ksmbd_fd_put(work, fp); + if (ret < 0) + goto out; + } break; } case FSCTL_QUERY_ALLOCATED_RANGES: @@ -7810,14 +7810,24 @@ int smb2_ioctl(struct ksmbd_work *work) src_off = le64_to_cpu(dup_ext->SourceFileOffset); dst_off = le64_to_cpu(dup_ext->TargetFileOffset); length = le64_to_cpu(dup_ext->ByteCount); - cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp, - dst_off, length, 0); + /* + * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE + * should fall back to vfs_copy_file_range(). This could be + * beneficial when re-exporting nfs/smb mount, but note that + * this can result in partial copy that returns an error status. + * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented, + * fall back to vfs_copy_file_range(), should be avoided when + * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set. + */ + cloned = vfs_clone_file_range(fp_in->filp, src_off, + fp_out->filp, dst_off, length, 0); if (cloned == -EXDEV || cloned == -EOPNOTSUPP) { ret = -EOPNOTSUPP; goto dup_ext_out; } else if (cloned != length) { cloned = vfs_copy_file_range(fp_in->filp, src_off, - fp_out->filp, dst_off, length, 0); + fp_out->filp, dst_off, + length, 0); if (cloned != length) { if (cloned < 0) ret = cloned; diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index 9a7e211dbf4f..7f8ab14fb8ec 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -140,8 +140,10 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work) hdr = work->request_buf; if (*(__le32 *)hdr->Protocol == SMB1_PROTO_NUMBER && - hdr->Command == SMB_COM_NEGOTIATE) + hdr->Command == SMB_COM_NEGOTIATE) { + work->conn->outstanding_credits++; return 0; + } return -EINVAL; } diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c index 6ecf55ea1fed..38f23bf981ac 100644 --- a/fs/ksmbd/smbacl.c +++ b/fs/ksmbd/smbacl.c @@ -1261,6 +1261,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, if (!access_bits) access_bits = SET_MINIMUM_RIGHTS; + posix_acl_release(posix_acls); goto check_access_bits; } } diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index 3ad6881e0f7e..7cb0eeb07c80 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -26,6 +26,7 @@ #include "mgmt/ksmbd_ida.h" #include "connection.h" #include "transport_tcp.h" +#include "transport_rdma.h" #define IPC_WAIT_TIMEOUT (2 * HZ) @@ -303,6 +304,8 @@ static int ipc_server_config_on_startup(struct ksmbd_startup_request *req) init_smb2_max_trans_size(req->smb2_max_trans); if (req->smb2_max_credits) init_smb2_max_credits(req->smb2_max_credits); + if (req->smbd_max_io_size) + init_smbd_max_io_size(req->smbd_max_io_size); ret = ksmbd_set_netbios_name(req->netbios_name); ret |= ksmbd_set_server_string(req->server_string); diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index e646d79554b8..35b55ee94fe5 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -5,16 +5,6 @@ * * Author(s): Long Li <longli@microsoft.com>, * Hyunchul Lee <hyc.lee@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #define SUBMOD_NAME "smb_direct" @@ -80,9 +70,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024; /* The maximum single-message size which can be received */ static int smb_direct_max_receive_size = 8192; -static int smb_direct_max_read_write_size = 524224; - -static int smb_direct_max_outstanding_rw_ops = 8; +static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE; static LIST_HEAD(smb_direct_device_list); static DEFINE_RWLOCK(smb_direct_device_lock); @@ -147,18 +135,18 @@ struct smb_direct_transport { atomic_t send_credits; spinlock_t lock_new_recv_credits; int new_recv_credits; - atomic_t rw_avail_ops; + int max_rw_credits; + int pages_per_rw_credit; + atomic_t rw_credits; wait_queue_head_t wait_send_credits; - wait_queue_head_t wait_rw_avail_ops; + wait_queue_head_t wait_rw_credits; mempool_t *sendmsg_mempool; struct kmem_cache *sendmsg_cache; mempool_t *recvmsg_mempool; struct kmem_cache *recvmsg_cache; - wait_queue_head_t wait_send_payload_pending; - atomic_t send_payload_pending; wait_queue_head_t wait_send_pending; atomic_t send_pending; @@ -208,12 +196,25 @@ struct smb_direct_recvmsg { struct smb_direct_rdma_rw_msg { struct smb_direct_transport *t; struct ib_cqe cqe; + int status; struct completion *completion; + struct list_head list; struct rdma_rw_ctx rw_ctx; struct sg_table sgt; struct scatterlist sg_list[]; }; +void init_smbd_max_io_size(unsigned int sz) +{ + sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE); + smb_direct_max_read_write_size = sz; +} + +unsigned int get_smbd_max_read_write_size(void) +{ + return smb_direct_max_read_write_size; +} + static inline int get_buf_page_count(void *buf, int size) { return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) - @@ -377,7 +378,7 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) t->reassembly_queue_length = 0; init_waitqueue_head(&t->wait_reassembly_queue); init_waitqueue_head(&t->wait_send_credits); - init_waitqueue_head(&t->wait_rw_avail_ops); + init_waitqueue_head(&t->wait_rw_credits); spin_lock_init(&t->receive_credit_lock); spin_lock_init(&t->recvmsg_queue_lock); @@ -386,8 +387,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) spin_lock_init(&t->empty_recvmsg_queue_lock); INIT_LIST_HEAD(&t->empty_recvmsg_queue); - init_waitqueue_head(&t->wait_send_payload_pending); - atomic_set(&t->send_payload_pending, 0); init_waitqueue_head(&t->wait_send_pending); atomic_set(&t->send_pending, 0); @@ -417,8 +416,6 @@ static void free_transport(struct smb_direct_transport *t) wake_up_interruptible(&t->wait_send_credits); ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n"); - wait_event(t->wait_send_payload_pending, - atomic_read(&t->send_payload_pending) == 0); wait_event(t->wait_send_pending, atomic_read(&t->send_pending) == 0); @@ -569,6 +566,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) } t->negotiation_requested = true; t->full_packet_received = true; + t->status = SMB_DIRECT_CS_CONNECTED; enqueue_reassembly(t, recvmsg, 0); wake_up_interruptible(&t->wait_status); break; @@ -873,13 +871,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) smb_direct_disconnect_rdma_connection(t); } - if (sendmsg->num_sge > 1) { - if (atomic_dec_and_test(&t->send_payload_pending)) - wake_up(&t->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&t->send_pending)) - wake_up(&t->wait_send_pending); - } + if (atomic_dec_and_test(&t->send_pending)) + wake_up(&t->wait_send_pending); /* iterate and free the list of messages in reverse. the list's head * is invalid. @@ -911,21 +904,12 @@ static int smb_direct_post_send(struct smb_direct_transport *t, { int ret; - if (wr->num_sge > 1) - atomic_inc(&t->send_payload_pending); - else - atomic_inc(&t->send_pending); - + atomic_inc(&t->send_pending); ret = ib_post_send(t->qp, wr, NULL); if (ret) { pr_err("failed to post send: %d\n", ret); - if (wr->num_sge > 1) { - if (atomic_dec_and_test(&t->send_payload_pending)) - wake_up(&t->wait_send_payload_pending); - } else { - if (atomic_dec_and_test(&t->send_pending)) - wake_up(&t->wait_send_pending); - } + if (atomic_dec_and_test(&t->send_pending)) + wake_up(&t->wait_send_pending); smb_direct_disconnect_rdma_connection(t); } return ret; @@ -983,18 +967,19 @@ static int smb_direct_flush_send_list(struct smb_direct_transport *t, } static int wait_for_credits(struct smb_direct_transport *t, - wait_queue_head_t *waitq, atomic_t *credits) + wait_queue_head_t *waitq, atomic_t *total_credits, + int needed) { int ret; do { - if (atomic_dec_return(credits) >= 0) + if (atomic_sub_return(needed, total_credits) >= 0) return 0; - atomic_inc(credits); + atomic_add(needed, total_credits); ret = wait_event_interruptible(*waitq, - atomic_read(credits) > 0 || - t->status != SMB_DIRECT_CS_CONNECTED); + atomic_read(total_credits) >= needed || + t->status != SMB_DIRECT_CS_CONNECTED); if (t->status != SMB_DIRECT_CS_CONNECTED) return -ENOTCONN; @@ -1015,7 +1000,19 @@ static int wait_for_send_credits(struct smb_direct_transport *t, return ret; } - return wait_for_credits(t, &t->wait_send_credits, &t->send_credits); + return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1); +} + +static int wait_for_rw_credits(struct smb_direct_transport *t, int credits) +{ + return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits); +} + +static int calc_rw_credits(struct smb_direct_transport *t, + char *buf, unsigned int len) +{ + return DIV_ROUND_UP(get_buf_page_count(buf, len), + t->pages_per_rw_credit); } static int smb_direct_create_header(struct smb_direct_transport *t, @@ -1086,7 +1083,7 @@ static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nen int offset, len; int i = 0; - if (nentries < get_buf_page_count(buf, size)) + if (size <= 0 || nentries < get_buf_page_count(buf, size)) return -EINVAL; offset = offset_in_page(buf); @@ -1118,7 +1115,7 @@ static int get_mapped_sg_list(struct ib_device *device, void *buf, int size, int npages; npages = get_sg_list(buf, size, sg_list, nentries); - if (npages <= 0) + if (npages < 0) return -EINVAL; return ib_dma_map_sg(device, sg_list, npages, dir); } @@ -1313,11 +1310,21 @@ done: * that means all the I/Os have been out and we are good to return */ - wait_event(st->wait_send_payload_pending, - atomic_read(&st->send_payload_pending) == 0); + wait_event(st->wait_send_pending, + atomic_read(&st->send_pending) == 0); return ret; } +static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t, + struct smb_direct_rdma_rw_msg *msg, + enum dma_data_direction dir) +{ + rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, + msg->sgt.sgl, msg->sgt.nents, dir); + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); + kfree(msg); +} + static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, enum dma_data_direction dir) { @@ -1326,19 +1333,14 @@ static void read_write_done(struct ib_cq *cq, struct ib_wc *wc, struct smb_direct_transport *t = msg->t; if (wc->status != IB_WC_SUCCESS) { + msg->status = -EIO; pr_err("read/write error. opcode = %d, status = %s(%d)\n", wc->opcode, ib_wc_status_msg(wc->status), wc->status); - smb_direct_disconnect_rdma_connection(t); + if (wc->status != IB_WC_WR_FLUSH_ERR) + smb_direct_disconnect_rdma_connection(t); } - if (atomic_inc_return(&t->rw_avail_ops) > 0) - wake_up(&t->wait_rw_avail_ops); - - rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, - msg->sg_list, msg->sgt.nents, dir); - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); complete(msg->completion); - kfree(msg); } static void read_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1351,94 +1353,141 @@ static void write_done(struct ib_cq *cq, struct ib_wc *wc) read_write_done(cq, wc, DMA_TO_DEVICE); } -static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf, - int buf_len, u32 remote_key, u64 remote_offset, - u32 remote_len, bool is_read) +static int smb_direct_rdma_xmit(struct smb_direct_transport *t, + void *buf, int buf_len, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len, + bool is_read) { - struct smb_direct_rdma_rw_msg *msg; - int ret; + struct smb_direct_rdma_rw_msg *msg, *next_msg; + int i, ret; DECLARE_COMPLETION_ONSTACK(completion); - struct ib_send_wr *first_wr = NULL; + struct ib_send_wr *first_wr; + LIST_HEAD(msg_list); + char *desc_buf; + int credits_needed; + unsigned int desc_buf_len; + size_t total_length = 0; - ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops); + if (t->status != SMB_DIRECT_CS_CONNECTED) + return -ENOTCONN; + + /* calculate needed credits */ + credits_needed = 0; + desc_buf = buf; + for (i = 0; i < desc_len / sizeof(*desc); i++) { + desc_buf_len = le32_to_cpu(desc[i].length); + + credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len); + desc_buf += desc_buf_len; + total_length += desc_buf_len; + if (desc_buf_len == 0 || total_length > buf_len || + total_length > t->max_rdma_rw_size) + return -EINVAL; + } + + ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", + is_read ? "read" : "write", buf_len, credits_needed); + + ret = wait_for_rw_credits(t, credits_needed); if (ret < 0) return ret; - /* TODO: mempool */ - msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) + - sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL); - if (!msg) { - atomic_inc(&t->rw_avail_ops); - return -ENOMEM; - } + /* build rdma_rw_ctx for each descriptor */ + desc_buf = buf; + for (i = 0; i < desc_len / sizeof(*desc); i++) { + msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) + + sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } - msg->sgt.sgl = &msg->sg_list[0]; - ret = sg_alloc_table_chained(&msg->sgt, - get_buf_page_count(buf, buf_len), - msg->sg_list, SG_CHUNK_SIZE); - if (ret) { - atomic_inc(&t->rw_avail_ops); - kfree(msg); - return -ENOMEM; - } + desc_buf_len = le32_to_cpu(desc[i].length); - ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents); - if (ret <= 0) { - pr_err("failed to get pages\n"); - goto err; - } + msg->t = t; + msg->cqe.done = is_read ? read_done : write_done; + msg->completion = &completion; - ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, - msg->sg_list, get_buf_page_count(buf, buf_len), - 0, remote_offset, remote_key, - is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); - if (ret < 0) { - pr_err("failed to init rdma_rw_ctx: %d\n", ret); - goto err; + msg->sgt.sgl = &msg->sg_list[0]; + ret = sg_alloc_table_chained(&msg->sgt, + get_buf_page_count(desc_buf, desc_buf_len), + msg->sg_list, SG_CHUNK_SIZE); + if (ret) { + kfree(msg); + ret = -ENOMEM; + goto out; + } + + ret = get_sg_list(desc_buf, desc_buf_len, + msg->sgt.sgl, msg->sgt.orig_nents); + if (ret < 0) { + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); + kfree(msg); + goto out; + } + + ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port, + msg->sgt.sgl, + get_buf_page_count(desc_buf, desc_buf_len), + 0, + le64_to_cpu(desc[i].offset), + le32_to_cpu(desc[i].token), + is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + if (ret < 0) { + pr_err("failed to init rdma_rw_ctx: %d\n", ret); + sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); + kfree(msg); + goto out; + } + + list_add_tail(&msg->list, &msg_list); + desc_buf += desc_buf_len; } - msg->t = t; - msg->cqe.done = is_read ? read_done : write_done; - msg->completion = &completion; - first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, - &msg->cqe, NULL); + /* concatenate work requests of rdma_rw_ctxs */ + first_wr = NULL; + list_for_each_entry_reverse(msg, &msg_list, list) { + first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port, + &msg->cqe, first_wr); + } ret = ib_post_send(t->qp, first_wr, NULL); if (ret) { - pr_err("failed to post send wr: %d\n", ret); - goto err; + pr_err("failed to post send wr for RDMA R/W: %d\n", ret); + goto out; } + msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list); wait_for_completion(&completion); - return 0; - -err: - atomic_inc(&t->rw_avail_ops); - if (first_wr) - rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port, - msg->sg_list, msg->sgt.nents, - is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); - sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE); - kfree(msg); + ret = msg->status; +out: + list_for_each_entry_safe(msg, next_msg, &msg_list, list) { + list_del(&msg->list); + smb_direct_free_rdma_rw_msg(t, msg, + is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + } + atomic_add(credits_needed, &t->rw_credits); + wake_up(&t->wait_rw_credits); return ret; } -static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf, - unsigned int buflen, u32 remote_key, - u64 remote_offset, u32 remote_len) +static int smb_direct_rdma_write(struct ksmbd_transport *t, + void *buf, unsigned int buflen, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len) { return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, - remote_key, remote_offset, - remote_len, false); + desc, desc_len, false); } -static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf, - unsigned int buflen, u32 remote_key, - u64 remote_offset, u32 remote_len) +static int smb_direct_rdma_read(struct ksmbd_transport *t, + void *buf, unsigned int buflen, + struct smb2_buffer_desc_v1 *desc, + unsigned int desc_len) { return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen, - remote_key, remote_offset, - remote_len, true); + desc, desc_len, true); } static void smb_direct_disconnect(struct ksmbd_transport *t) @@ -1638,41 +1687,57 @@ out_err: return ret; } +static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t) +{ + return min_t(unsigned int, + t->cm_id->device->attrs.max_fast_reg_page_list_len, + 256); +} + static int smb_direct_init_params(struct smb_direct_transport *t, struct ib_qp_cap *cap) { struct ib_device *device = t->cm_id->device; - int max_send_sges, max_pages, max_rw_wrs, max_send_wrs; + int max_send_sges, max_rw_wrs, max_send_wrs; + unsigned int max_sge_per_wr, wrs_per_credit; - /* need 2 more sge. because a SMB_DIRECT header will be mapped, - * and maybe a send buffer could be not page aligned. + /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, + * SMB2 response could be mapped. */ t->max_send_size = smb_direct_max_send_size; - max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2; + max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3; if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) { pr_err("max_send_size %d is too large\n", t->max_send_size); return -EINVAL; } - /* - * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA - * read/writes. HCA guarantees at least max_send_sge of sges for - * a RDMA read/write work request, and if memory registration is used, - * we need reg_mr, local_inv wrs for each read/write. + /* Calculate the number of work requests for RDMA R/W. + * The maximum number of pages which can be registered + * with one Memory region can be transferred with one + * R/W credit. And at least 4 work requests for each credit + * are needed for MR registration, RDMA R/W, local & remote + * MR invalidation. */ t->max_rdma_rw_size = smb_direct_max_read_write_size; - max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; - max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES); - max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num, - max_pages) * 2; - max_rw_wrs *= smb_direct_max_outstanding_rw_ops; + t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t); + t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size, + (t->pages_per_rw_credit - 1) * + PAGE_SIZE); + + max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, + device->attrs.max_sge_rd); + max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, + max_send_sges); + wrs_per_credit = max_t(unsigned int, 4, + DIV_ROUND_UP(t->pages_per_rw_credit, + max_sge_per_wr) + 1); + max_rw_wrs = t->max_rw_credits * wrs_per_credit; max_send_wrs = smb_direct_send_credit_target + max_rw_wrs; if (max_send_wrs > device->attrs.max_cqe || max_send_wrs > device->attrs.max_qp_wr) { - pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n", - smb_direct_send_credit_target, - smb_direct_max_outstanding_rw_ops); + pr_err("consider lowering send_credit_target = %d\n", + smb_direct_send_credit_target); pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", device->attrs.max_cqe, device->attrs.max_qp_wr); return -EINVAL; @@ -1687,11 +1752,6 @@ static int smb_direct_init_params(struct smb_direct_transport *t, return -EINVAL; } - if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) { - pr_err("warning: device max_send_sge = %d too small\n", - device->attrs.max_send_sge); - return -EINVAL; - } if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) { pr_err("warning: device max_recv_sge = %d too small\n", device->attrs.max_recv_sge); @@ -1707,7 +1767,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t, t->send_credit_target = smb_direct_send_credit_target; atomic_set(&t->send_credits, 0); - atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops); + atomic_set(&t->rw_credits, t->max_rw_credits); t->max_send_size = smb_direct_max_send_size; t->max_recv_size = smb_direct_max_receive_size; @@ -1715,12 +1775,10 @@ static int smb_direct_init_params(struct smb_direct_transport *t, cap->max_send_wr = max_send_wrs; cap->max_recv_wr = t->recv_credit_max; - cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES; + cap->max_send_sge = max_sge_per_wr; cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES; cap->max_inline_data = 0; - cap->max_rdma_ctxs = - rdma_rw_mr_factor(device, t->cm_id->port_num, max_pages) * - smb_direct_max_outstanding_rw_ops; + cap->max_rdma_ctxs = t->max_rw_credits; return 0; } @@ -1813,7 +1871,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, } t->send_cq = ib_alloc_cq(t->cm_id->device, t, - t->send_credit_target, 0, IB_POLL_WORKQUEUE); + smb_direct_send_credit_target + cap->max_rdma_ctxs, + 0, IB_POLL_WORKQUEUE); if (IS_ERR(t->send_cq)) { pr_err("Can't create RDMA send CQ\n"); ret = PTR_ERR(t->send_cq); @@ -1822,8 +1881,7 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, } t->recv_cq = ib_alloc_cq(t->cm_id->device, t, - cap->max_send_wr + cap->max_rdma_ctxs, - 0, IB_POLL_WORKQUEUE); + t->recv_credit_max, 0, IB_POLL_WORKQUEUE); if (IS_ERR(t->recv_cq)) { pr_err("Can't create RDMA recv CQ\n"); ret = PTR_ERR(t->recv_cq); @@ -1852,17 +1910,12 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1; if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) { - int pages_per_mr, mr_count; - - pages_per_mr = min_t(int, pages_per_rw, - t->cm_id->device->attrs.max_fast_reg_page_list_len); - mr_count = DIV_ROUND_UP(pages_per_rw, pages_per_mr) * - atomic_read(&t->rw_avail_ops); - ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, mr_count, - IB_MR_TYPE_MEM_REG, pages_per_mr, 0); + ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs, + t->max_rw_credits, IB_MR_TYPE_MEM_REG, + t->pages_per_rw_credit, 0); if (ret) { pr_err("failed to init mr pool count %d pages %d\n", - mr_count, pages_per_mr); + t->max_rw_credits, t->pages_per_rw_credit); goto err; } } diff --git a/fs/ksmbd/transport_rdma.h b/fs/ksmbd/transport_rdma.h index 5567d93a6f96..77aee4e5c9dc 100644 --- a/fs/ksmbd/transport_rdma.h +++ b/fs/ksmbd/transport_rdma.h @@ -7,6 +7,10 @@ #ifndef __KSMBD_TRANSPORT_RDMA_H__ #define __KSMBD_TRANSPORT_RDMA_H__ +#define SMBD_DEFAULT_IOSIZE (8 * 1024 * 1024) +#define SMBD_MIN_IOSIZE (512 * 1024) +#define SMBD_MAX_IOSIZE (16 * 1024 * 1024) + /* SMB DIRECT negotiation request packet [MS-SMBD] 2.2.1 */ struct smb_direct_negotiate_req { __le16 min_version; @@ -52,10 +56,14 @@ struct smb_direct_data_transfer { int ksmbd_rdma_init(void); void ksmbd_rdma_destroy(void); bool ksmbd_rdma_capable_netdev(struct net_device *netdev); +void init_smbd_max_io_size(unsigned int sz); +unsigned int get_smbd_max_read_write_size(void); #else static inline int ksmbd_rdma_init(void) { return 0; } static inline int ksmbd_rdma_destroy(void) { return 0; } static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } +static inline void init_smbd_max_io_size(unsigned int sz) { } +static inline unsigned int get_smbd_max_read_write_size(void) { return 0; } #endif #endif /* __KSMBD_TRANSPORT_RDMA_H__ */ diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 8fef9de787d3..143bba4e4db8 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) break; } ret = kernel_accept(iface->ksmbd_socket, &client_sk, - O_NONBLOCK); + SOCK_NONBLOCK); mutex_unlock(&iface->sock_release_lock); if (ret) { if (ret == -EAGAIN) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 9cebb6ba555b..7c849024999f 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -398,8 +398,7 @@ int ksmbd_vfs_read(struct ksmbd_work *work, struct ksmbd_file *fp, size_t count, nbytes = kernel_read(filp, rbuf, count, pos); if (nbytes < 0) { - pr_err("smb read failed for (%s), err = %zd\n", - fp->filename, nbytes); + pr_err("smb read failed, err = %zd\n", nbytes); return nbytes; } @@ -875,8 +874,7 @@ int ksmbd_vfs_truncate(struct ksmbd_work *work, err = vfs_truncate(&filp->f_path, size); if (err) - pr_err("truncate failed for filename : %s err %d\n", - fp->filename, err); + pr_err("truncate failed, err %d\n", err); return err; } @@ -965,7 +963,7 @@ ssize_t ksmbd_vfs_getxattr(struct user_namespace *user_ns, */ int ksmbd_vfs_setxattr(struct user_namespace *user_ns, struct dentry *dentry, const char *attr_name, - const void *attr_value, size_t attr_size, int flags) + void *attr_value, size_t attr_size, int flags) { int err; @@ -1017,7 +1015,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len); - return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len); + return vfs_fallocate(fp->filp, + FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, + off, len); } int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, @@ -1048,7 +1048,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, *out_count = 0; end = start + length; while (start < end && *out_count < in_count) { - extent_start = f->f_op->llseek(f, start, SEEK_DATA); + extent_start = vfs_llseek(f, start, SEEK_DATA); if (extent_start < 0) { if (extent_start != -ENXIO) ret = (int)extent_start; @@ -1058,7 +1058,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, if (extent_start >= end) break; - extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE); + extent_end = vfs_llseek(f, extent_start, SEEK_HOLE); if (extent_end < 0) { if (extent_end != -ENXIO) ret = (int)extent_end; @@ -1779,6 +1779,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, + len, 0); if (ret < 0) return ret; diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index 8c37aaf936ab..70da4c0ba7ad 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -109,7 +109,7 @@ ssize_t ksmbd_vfs_casexattr_len(struct user_namespace *user_ns, int attr_name_len); int ksmbd_vfs_setxattr(struct user_namespace *user_ns, struct dentry *dentry, const char *attr_name, - const void *attr_value, size_t attr_size, int flags); + void *attr_value, size_t attr_size, int flags); int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct user_namespace *user_ns, diff --git a/fs/ksmbd/vfs_cache.c b/fs/ksmbd/vfs_cache.c index 29c1db66bd0f..c4d59d2735f0 100644 --- a/fs/ksmbd/vfs_cache.c +++ b/fs/ksmbd/vfs_cache.c @@ -328,7 +328,6 @@ static void __ksmbd_close_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp) kfree(smb_lock); } - kfree(fp->filename); if (ksmbd_stream_fd(fp)) kfree(fp->stream.name); kmem_cache_free(filp_cache, fp); @@ -497,6 +496,7 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct inode *inode) list_for_each_entry(lfp, &ci->m_fp_list, node) { if (inode == file_inode(lfp->filp)) { atomic_dec(&ci->m_count); + lfp = ksmbd_fp_get(lfp); read_unlock(&ci->m_lock); return lfp; } diff --git a/fs/ksmbd/vfs_cache.h b/fs/ksmbd/vfs_cache.h index 36239ce31afd..fcb13413fa8d 100644 --- a/fs/ksmbd/vfs_cache.h +++ b/fs/ksmbd/vfs_cache.h @@ -62,7 +62,6 @@ struct ksmbd_inode { struct ksmbd_file { struct file *filp; - char *filename; u64 persistent_id; u64 volatile_id; |