summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsamples/bpf/do_hbm_test.sh436
1 files changed, 436 insertions, 0 deletions
diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
new file mode 100755
index 000000000000..56c8b4115c95
--- /dev/null
+++ b/samples/bpf/do_hbm_test.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 Facebook
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of version 2 of the GNU General Public
+# License as published by the Free Software Foundation.
+
+Usage() {
+ echo "Script for testing HBM (Host Bandwidth Manager) framework."
+ echo "It creates a cgroup to use for testing and load a BPF program to limit"
+ echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
+ echo "loads. The output is the goodput in Mbps (unless -D was used)."
+ echo ""
+ echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]"
+ echo " [-d=<delay>|--delay=<delay>] [--debug] [-E]"
+ echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
+ echo " [-l] [-N] [-p=<port>|--port=<port>] [-P]"
+ echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
+ echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
+ echo " Where:"
+ echo " out egress (default)"
+ echo " -b or --bpf BPF program filename to load and attach."
+ echo " Default is hbm_out_kern.o for egress,"
+ echo " -c or -cc TCP congestion control (cubic or dctcp)"
+ echo " --debug print BPF trace buffer"
+ echo " -d or --delay add a delay in ms using netem"
+ echo " -D In addition to the goodput in Mbps, it also outputs"
+ echo " other detailed information. This information is"
+ echo " test dependent (i.e. iperf3 or netperf)."
+ echo " -E enable ECN (not required for dctcp)"
+ echo " -f or --flows number of concurrent flows (default=1)"
+ echo " -i or --id cgroup id (an integer, default is 1)"
+ echo " -N use netperf instead of iperf3"
+ echo " -l do not limit flows using loopback"
+ echo " -h Help"
+ echo " -p or --port iperf3 port (default is 5201)"
+ echo " -P use an iperf3 instance for each flow"
+ echo " -q use the specified qdisc"
+ echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
+ echo " -R Use TCP_RR for netperf. 1st flow has req"
+ echo " size of 10KB, rest of 1MB. Reply in all"
+ echo " cases is 1 byte."
+ echo " More detailed output for each flow can be found"
+ echo " in the files netperf.<cg>.<flow>, where <cg> is the"
+ echo " cgroup id as specified with the -i flag, and <flow>"
+ echo " is the flow id starting at 1 and increasing by 1 for"
+ echo " flow (as specified by -f)."
+ echo " -s or --server hostname of netperf server. Used to create netperf"
+ echo " test traffic between to hosts (default is within host)"
+ echo " netserver must be running on the host."
+ echo " -S or --stats whether to update hbm stats (default is yes)."
+ echo " -t or --time duration of iperf3 in seconds (default=5)"
+ echo " -w Work conserving flag. cgroup can increase its"
+ echo " bandwidth beyond the rate limit specified"
+ echo " while there is available bandwidth. Current"
+ echo " implementation assumes there is only one NIC"
+ echo " (eth0), but can be extended to support multiple"
+ echo " NICs."
+ echo " cubic or dctcp specify which TCP CC to use"
+ echo " "
+ exit
+}
+
+#set -x
+
+debug_flag=0
+args="$@"
+name="$0"
+netem=0
+cc=x
+dir="-o"
+dir_name="out"
+dur=5
+flows=1
+id=1
+prog=""
+port=5201
+rate=1000
+multi_iperf=0
+flow_cnt=1
+use_netperf=0
+rr=0
+ecn=0
+details=0
+server=""
+qdisc=""
+flags=""
+do_stats=0
+
+function start_hbm () {
+ rm -f hbm.out
+ echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
+ echo " " >> hbm.out
+ ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
+ echo $!
+}
+
+processArgs () {
+ for i in $args ; do
+ case $i in
+ # Support for upcomming ingress rate limiting
+ #in) # support for upcoming ingress rate limiting
+ # dir="-i"
+ # dir_name="in"
+ # ;;
+ out)
+ dir="-o"
+ dir_name="out"
+ ;;
+ -b=*|--bpf=*)
+ prog="${i#*=}"
+ ;;
+ -c=*|--cc=*)
+ cc="${i#*=}"
+ ;;
+ --debug)
+ flags="$flags -d"
+ debug_flag=1
+ ;;
+ -d=*|--delay=*)
+ netem="${i#*=}"
+ ;;
+ -D)
+ details=1
+ ;;
+ -E)
+ ecn=1
+ ;;
+ # Support for upcomming fq Early Departure Time egress rate limiting
+ #--edt)
+ # prog="hbm_out_edt_kern.o"
+ # qdisc="fq"
+ # ;;
+ -f=*|--flows=*)
+ flows="${i#*=}"
+ ;;
+ -i=*|--id=*)
+ id="${i#*=}"
+ ;;
+ -l)
+ flags="$flags -l"
+ ;;
+ -N)
+ use_netperf=1
+ ;;
+ -p=*|--port=*)
+ port="${i#*=}"
+ ;;
+ -P)
+ multi_iperf=1
+ ;;
+ -q=*)
+ qdisc="${i#*=}"
+ ;;
+ -r=*|--rate=*)
+ rate="${i#*=}"
+ ;;
+ -R)
+ rr=1
+ ;;
+ -s=*|--server=*)
+ server="${i#*=}"
+ ;;
+ -S|--stats)
+ flags="$flags -s"
+ do_stats=1
+ ;;
+ -t=*|--time=*)
+ dur="${i#*=}"
+ ;;
+ -w)
+ flags="$flags -w"
+ ;;
+ cubic)
+ cc=cubic
+ ;;
+ dctcp)
+ cc=dctcp
+ ;;
+ *)
+ echo "Unknown arg:$i"
+ Usage
+ ;;
+ esac
+ done
+}
+
+processArgs
+
+if [ $debug_flag -eq 1 ] ; then
+ rm -f hbm_out.log
+fi
+
+hbm_pid=$(start_hbm)
+usleep 100000
+
+host=`hostname`
+cg_base_dir=/sys/fs/cgroup
+cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
+
+echo $$ >> $cg_dir/cgroup.procs
+
+ulimit -l unlimited
+
+rm -f ss.out
+rm -f hbm.[0-9]*.$dir_name
+if [ $ecn -ne 0 ] ; then
+ sysctl -w -q -n net.ipv4.tcp_ecn=1
+fi
+
+if [ $use_netperf -eq 0 ] ; then
+ cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
+ if [ "$cc" != "x" ] ; then
+ sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
+ fi
+fi
+
+if [ "$netem" -ne "0" ] ; then
+ if [ "$qdisc" != "" ] ; then
+ echo "WARNING: Ignoring -q options because -d option used"
+ fi
+ tc qdisc del dev lo root > /dev/null 2>&1
+ tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
+elif [ "$qdisc" != "" ] ; then
+ tc qdisc del dev lo root > /dev/null 2>&1
+ tc qdisc add dev lo root $qdisc > /dev/null 2>&1
+fi
+
+n=0
+m=$[$dur * 5]
+hn="::1"
+if [ $use_netperf -ne 0 ] ; then
+ if [ "$server" != "" ] ; then
+ hn=$server
+ fi
+fi
+
+( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
+
+if [ $use_netperf -ne 0 ] ; then
+ begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
+ awk '{ print $1 }'`
+ if [ "$begNetserverPid" == "" ] ; then
+ if [ "$server" == "" ] ; then
+ ( ./netserver > /dev/null 2>&1) &
+ usleep 100000
+ fi
+ fi
+ flow_cnt=1
+ if [ "$server" == "" ] ; then
+ np_server=$host
+ else
+ np_server=$server
+ fi
+ if [ "$cc" == "x" ] ; then
+ np_cc=""
+ else
+ np_cc="-K $cc,$cc"
+ fi
+ replySize=1
+ while [ $flow_cnt -le $flows ] ; do
+ if [ $rr -ne 0 ] ; then
+ reqSize=1M
+ if [ $flow_cnt -eq 1 ] ; then
+ reqSize=10K
+ fi
+ if [ "$dir" == "-i" ] ; then
+ replySize=$reqSize
+ reqSize=1
+ fi
+ ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+ else
+ if [ "$dir" == "-i" ] ; then
+ ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+ else
+ ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
+ fi
+ fi
+ flow_cnt=$[flow_cnt+1]
+ done
+
+# sleep for duration of test (plus some buffer)
+ n=$[dur+2]
+ sleep $n
+
+# force graceful termination of netperf
+ pids=`pgrep netperf`
+ for p in $pids ; do
+ kill -SIGALRM $p
+ done
+
+ flow_cnt=1
+ rate=0
+ if [ $details -ne 0 ] ; then
+ echo ""
+ echo "Details for HBM in cgroup $id"
+ if [ $do_stats -eq 1 ] ; then
+ if [ -e hbm.$id.$dir_name ] ; then
+ cat hbm.$id.$dir_name
+ fi
+ fi
+ fi
+ while [ $flow_cnt -le $flows ] ; do
+ if [ "$dir" == "-i" ] ; then
+ r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
+ else
+ r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
+ fi
+ echo "rate for flow $flow_cnt: $r"
+ rate=$[rate+r]
+ if [ $details -ne 0 ] ; then
+ echo "-----"
+ echo "Details for cgroup $id, flow $flow_cnt"
+ cat netperf.$id.$flow_cnt
+ fi
+ flow_cnt=$[flow_cnt+1]
+ done
+ if [ $details -ne 0 ] ; then
+ echo ""
+ delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+ echo "PING AVG DELAY:$delay"
+ echo "AGGREGATE_GOODPUT:$rate"
+ else
+ echo $rate
+ fi
+elif [ $multi_iperf -eq 0 ] ; then
+ (iperf3 -s -p $port -1 > /dev/null 2>&1) &
+ usleep 100000
+ iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
+ rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
+ rate=`echo $rates | grep -o "[0-9]*$"`
+
+ if [ $details -ne 0 ] ; then
+ echo ""
+ echo "Details for HBM in cgroup $id"
+ if [ $do_stats -eq 1 ] ; then
+ if [ -e hbm.$id.$dir_name ] ; then
+ cat hbm.$id.$dir_name
+ fi
+ fi
+ delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+ echo "PING AVG DELAY:$delay"
+ echo "AGGREGATE_GOODPUT:$rate"
+ else
+ echo $rate
+ fi
+else
+ flow_cnt=1
+ while [ $flow_cnt -le $flows ] ; do
+ (iperf3 -s -p $port -1 > /dev/null 2>&1) &
+ ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
+ port=$[port+1]
+ flow_cnt=$[flow_cnt+1]
+ done
+ n=$[dur+1]
+ sleep $n
+ flow_cnt=1
+ rate=0
+ if [ $details -ne 0 ] ; then
+ echo ""
+ echo "Details for HBM in cgroup $id"
+ if [ $do_stats -eq 1 ] ; then
+ if [ -e hbm.$id.$dir_name ] ; then
+ cat hbm.$id.$dir_name
+ fi
+ fi
+ fi
+
+ while [ $flow_cnt -le $flows ] ; do
+ r=`cat iperf3.$id.$flow_cnt`
+# echo "rate for flow $flow_cnt: $r"
+ if [ $details -ne 0 ] ; then
+ echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
+ fi
+ rate=$[rate+r]
+ flow_cnt=$[flow_cnt+1]
+ done
+ if [ $details -ne 0 ] ; then
+ delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
+ echo "PING AVG DELAY:$delay"
+ echo "AGGREGATE_GOODPUT:$rate"
+ else
+ echo $rate
+ fi
+fi
+
+if [ $use_netperf -eq 0 ] ; then
+ sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
+fi
+if [ $ecn -ne 0 ] ; then
+ sysctl -w -q -n net.ipv4.tcp_ecn=0
+fi
+if [ "$netem" -ne "0" ] ; then
+ tc qdisc del dev lo root > /dev/null 2>&1
+fi
+
+sleep 2
+
+hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
+if [ "$hbmPid" == "$hbm_pid" ] ; then
+ kill $hbm_pid
+fi
+
+sleep 1
+
+# Detach any BPF programs that may have lingered
+ttx=`bpftool cgroup tree | grep hbm`
+v=2
+for x in $ttx ; do
+ if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
+ cg=$x ; v=0
+ else
+ if [ $v -eq 0 ] ; then
+ id=$x ; v=1
+ else
+ if [ $v -eq 1 ] ; then
+ type=$x ; bpftool cgroup detach $cg $type id $id
+ v=0
+ fi
+ fi
+ fi
+done
+
+if [ $use_netperf -ne 0 ] ; then
+ if [ "$server" == "" ] ; then
+ if [ "$begNetserverPid" == "" ] ; then
+ netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
+ if [ "$netserverPid" != "" ] ; then
+ kill $netserverPid
+ fi
+ fi
+ fi
+fi
+exit