summaryrefslogtreecommitdiff
path: root/net/ipv4/sysctl_net_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/sysctl_net_ipv4.c')
-rw-r--r--net/ipv4/sysctl_net_ipv4.c283
1 files changed, 231 insertions, 52 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d0cc4ef2b85..a1a50a5c80dc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -25,16 +25,19 @@ static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
+static int tcp_app_win_max = 31;
static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
static int tcp_min_snd_mss_max = 65535;
+static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC;
static int ip_privileged_port_min;
static int ip_privileged_port_max = 65535;
static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
-static int ip_ping_group_range_min[] = { 0, 0 };
-static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+static int tcp_syn_linear_timeouts_max = MAX_TCP_SYNCNT;
+static unsigned long ip_ping_group_range_min[] = { 0, 0 };
+static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
@@ -43,31 +46,31 @@ static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024;
static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX;
static int tcp_plb_max_rounds = 31;
static int tcp_plb_max_cong_thresh = 256;
+static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC;
+static int tcp_ecn_mode_max = 2;
+static u32 icmp_errors_extension_mask_all =
+ GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0);
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
/* Update system visible IP port range */
-static void set_local_port_range(struct net *net, int range[2])
+static void set_local_port_range(struct net *net, unsigned int low, unsigned int high)
{
- bool same_parity = !((range[0] ^ range[1]) & 1);
+ bool same_parity = !((low ^ high) & 1);
- write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
if (same_parity && !net->ipv4.ip_local_ports.warned) {
net->ipv4.ip_local_ports.warned = true;
pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
}
- net->ipv4.ip_local_ports.range[0] = range[0];
- net->ipv4.ip_local_ports.range[1] = range[1];
- write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
+ WRITE_ONCE(net->ipv4.ip_local_ports.range, high << 16 | low);
}
/* Validate changes from /proc interface. */
-static int ipv4_local_port_range(struct ctl_table *table, int write,
+static int ipv4_local_port_range(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net =
- container_of(table->data, struct net, ipv4.ip_local_ports.range);
+ struct net *net = table->data;
int ret;
int range[2];
struct ctl_table tmp = {
@@ -91,14 +94,14 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
(range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
- set_local_port_range(net, range);
+ set_local_port_range(net, range[0], range[1]);
}
return ret;
}
/* Validate changes from /proc interface. */
-static int ipv4_privileged_ports(struct ctl_table *table, int write,
+static int ipv4_privileged_ports(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -132,7 +135,8 @@ static int ipv4_privileged_ports(struct ctl_table *table, int write,
return ret;
}
-static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
+static void inet_get_ping_group_range_table(const struct ctl_table *table,
+ kgid_t *low, kgid_t *high)
{
kgid_t *data = table->data;
struct net *net =
@@ -147,7 +151,8 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
}
/* Update system visible IP port range */
-static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
+static void set_ping_group_range(const struct ctl_table *table,
+ kgid_t low, kgid_t high)
{
kgid_t *data = table->data;
struct net *net =
@@ -159,12 +164,12 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
}
/* Validate changes from /proc interface. */
-static int ipv4_ping_group_range(struct ctl_table *table, int write,
+static int ipv4_ping_group_range(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct user_namespace *user_ns = current_user_ns();
int ret;
- gid_t urange[2];
+ unsigned long urange[2];
kgid_t low, high;
struct ctl_table tmp = {
.data = &urange,
@@ -177,7 +182,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
inet_get_ping_group_range_table(table, &low, &high);
urange[0] = from_kgid_munged(user_ns, low);
urange[1] = from_kgid_munged(user_ns, high);
- ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
@@ -194,7 +199,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
return ret;
}
-static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
+static int ipv4_fwd_update_priority(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -210,7 +215,7 @@ static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
return ret;
}
-static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
+static int proc_tcp_congestion_control(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(ctl->data, struct net,
@@ -230,7 +235,7 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
return ret;
}
-static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
+static int proc_tcp_available_congestion_control(const struct ctl_table *ctl,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -246,7 +251,7 @@ static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
return ret;
}
-static int proc_allowed_congestion_control(struct ctl_table *ctl,
+static int proc_allowed_congestion_control(const struct ctl_table *ctl,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -283,7 +288,7 @@ static int sscanf_key(char *buf, __le32 *key)
return ret;
}
-static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
+static int proc_tcp_fastopen_key(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -354,7 +359,7 @@ bad_key:
return ret;
}
-static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
+static int proc_tfo_blackhole_detect_timeout(const struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
@@ -369,7 +374,7 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
return ret;
}
-static int proc_tcp_available_ulp(struct ctl_table *ctl,
+static int proc_tcp_available_ulp(const struct ctl_table *ctl,
int write, void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -386,7 +391,7 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
return ret;
}
-static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
+static int proc_tcp_ehash_entries(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -410,7 +415,7 @@ static int proc_tcp_ehash_entries(struct ctl_table *table, int write,
return proc_dointvec(&tbl, write, buffer, lenp, ppos);
}
-static int proc_udp_hash_entries(struct ctl_table *table, int write,
+static int proc_udp_hash_entries(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
@@ -434,7 +439,7 @@ static int proc_udp_hash_entries(struct ctl_table *table, int write,
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
+static int proc_fib_multipath_hash_policy(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -449,7 +454,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
return ret;
}
-static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
+static int proc_fib_multipath_hash_fields(const struct ctl_table *table, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -464,6 +469,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
return ret;
}
+
+static u32 proc_fib_multipath_hash_rand_seed __ro_after_init;
+
+static void proc_fib_multipath_hash_init_rand_seed(void)
+{
+ get_random_bytes(&proc_fib_multipath_hash_rand_seed,
+ sizeof(proc_fib_multipath_hash_rand_seed));
+}
+
+static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
+{
+ struct sysctl_fib_multipath_hash_seed new = {
+ .user_seed = user_seed,
+ .mp_seed = (user_seed ? user_seed :
+ proc_fib_multipath_hash_rand_seed),
+ };
+
+ WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new);
+}
+
+static int proc_fib_multipath_hash_seed(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct sysctl_fib_multipath_hash_seed *mphs;
+ struct net *net = table->data;
+ struct ctl_table tmp;
+ u32 user_seed;
+ int ret;
+
+ mphs = &net->ipv4.sysctl_fib_multipath_hash_seed;
+ user_seed = mphs->user_seed;
+
+ tmp = *table;
+ tmp.data = &user_seed;
+
+ ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ proc_fib_multipath_hash_set_seed(net, user_seed);
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
+ }
+
+ return ret;
+}
+#else
+
+static void proc_fib_multipath_hash_init_rand_seed(void)
+{
+}
+
+static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed)
+{
+}
+
#endif
static struct ctl_table ipv4_table[] = {
@@ -546,22 +606,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_available_ulp,
},
{
- .procname = "icmp_msgs_per_sec",
- .data = &sysctl_icmp_msgs_per_sec,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "icmp_msgs_burst",
- .data = &sysctl_icmp_msgs_burst,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
@@ -577,7 +621,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
- { }
};
static struct ctl_table ipv4_net_table[] = {
@@ -634,6 +677,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = SYSCTL_ONE
},
{
+ .procname = "icmp_errors_extension_mask",
+ .data = &init_net.ipv4.sysctl_icmp_errors_extension_mask,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &icmp_errors_extension_mask_all,
+ },
+ {
.procname = "icmp_ratelimit",
.data = &init_net.ipv4.sysctl_icmp_ratelimit,
.maxlen = sizeof(int),
@@ -648,6 +700,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "icmp_msgs_per_sec",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_per_sec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "icmp_msgs_burst",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
.procname = "ping_group_range",
.data = &init_net.ipv4.ping_group_range.range,
.maxlen = sizeof(gid_t)*2,
@@ -672,9 +740,27 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_ecn_mode_max,
+ },
+ {
+ .procname = "tcp_ecn_option",
+ .data = &init_net.ipv4.sysctl_tcp_ecn_option,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{
+ .procname = "tcp_ecn_option_beacon",
+ .data = &init_net.ipv4.sysctl_tcp_ecn_option_beacon,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_THREE,
+ },
+ {
.procname = "tcp_ecn_fallback",
.data = &init_net.ipv4.sysctl_tcp_ecn_fallback,
.maxlen = sizeof(u8),
@@ -731,8 +817,8 @@ static struct ctl_table ipv4_net_table[] = {
},
{
.procname = "ip_local_port_range",
- .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
- .data = &init_net.ipv4.ip_local_ports.range,
+ .maxlen = 0,
+ .data = &init_net,
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
@@ -1012,6 +1098,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = SYSCTL_TWO,
},
{
+ .procname = "tcp_tw_reuse_delay",
+ .data = &init_net.ipv4.sysctl_tcp_tw_reuse_delay,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &tcp_tw_reuse_delay_max,
+ },
+ {
.procname = "tcp_max_syn_backlog",
.data = &init_net.ipv4.sysctl_max_syn_backlog,
.maxlen = sizeof(int),
@@ -1073,6 +1168,13 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &fib_multipath_hash_fields_all_mask,
},
+ {
+ .procname = "fib_multipath_hash_seed",
+ .data = &init_net,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_fib_multipath_hash_seed,
+ },
#endif
{
.procname = "ip_unprivileged_port_start",
@@ -1198,6 +1300,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_app_win_max,
},
{
.procname = "tcp_adv_win_scale",
@@ -1239,6 +1343,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dou8vec_minmax,
},
{
+ .procname = "tcp_rcvbuf_low_rtt",
+ .data = &init_net.ipv4.sysctl_tcp_rcvbuf_low_rtt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
+ },
+ {
.procname = "tcp_tso_win_divisor",
.data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
.maxlen = sizeof(u8),
@@ -1348,6 +1461,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
+ .procname = "tcp_comp_sack_rtt_percent",
+ .data = &init_net.ipv4.sysctl_tcp_comp_sack_rtt_percent,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_ONE_THOUSAND,
+ },
+ {
.procname = "tcp_comp_sack_slack_ns",
.data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
.maxlen = sizeof(unsigned long),
@@ -1363,6 +1485,15 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
},
{
+ .procname = "tcp_backlog_ack_defer",
+ .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "tcp_reflect_tos",
.data = &init_net.ipv4.sysctl_tcp_reflect_tos,
.maxlen = sizeof(u8),
@@ -1467,11 +1598,54 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &tcp_plb_max_cong_thresh,
},
- { }
+ {
+ .procname = "tcp_syn_linear_timeouts",
+ .data = &init_net.ipv4.sysctl_tcp_syn_linear_timeouts,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_syn_linear_timeouts_max,
+ },
+ {
+ .procname = "tcp_shrink_window",
+ .data = &init_net.ipv4.sysctl_tcp_shrink_window,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
+ .procname = "tcp_pingpong_thresh",
+ .data = &init_net.ipv4.sysctl_tcp_pingpong_thresh,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ {
+ .procname = "tcp_rto_min_us",
+ .data = &init_net.ipv4.sysctl_tcp_rto_min_us,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
+ {
+ .procname = "tcp_rto_max_ms",
+ .data = &init_net.ipv4.sysctl_tcp_rto_max_ms,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE_THOUSAND,
+ .extra2 = &tcp_rto_max_max,
+ },
};
static __net_init int ipv4_sysctl_init_net(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv4_net_table);
struct ctl_table *table;
table = ipv4_net_table;
@@ -1482,7 +1656,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!table)
goto err_alloc;
- for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) {
+ for (i = 0; i < table_size; i++) {
if (table[i].data) {
/* Update the variables to point into
* the current struct net
@@ -1497,7 +1671,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
}
}
- net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
+ net->ipv4.ipv4_hdr = register_net_sysctl_sz(net, "net/ipv4", table,
+ table_size);
if (!net->ipv4.ipv4_hdr)
goto err_reg;
@@ -1505,6 +1680,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
if (!net->ipv4.sysctl_local_reserved_ports)
goto err_ports;
+ proc_fib_multipath_hash_set_seed(net, 0);
+
return 0;
err_ports:
@@ -1518,7 +1695,7 @@ err_alloc:
static __net_exit void ipv4_sysctl_exit_net(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
kfree(net->ipv4.sysctl_local_reserved_ports);
table = net->ipv4.ipv4_hdr->ctl_table_arg;
@@ -1539,6 +1716,8 @@ static __init int sysctl_ipv4_init(void)
if (!hdr)
return -ENOMEM;
+ proc_fib_multipath_hash_init_rand_seed();
+
if (register_pernet_subsys(&ipv4_sysctl_ops)) {
unregister_net_sysctl_table(hdr);
return -ENOMEM;