chore: fix miscellaneous issues in scaletest scripts (#8006)

* chore: scaletest: plumb through more options

* bump terraform version

* scaletest.sh: pprof during traffic gen

* cli/scaletest: actually wait for prometheus metrics to be scraped

* increase prometheus wait
This commit is contained in:
Cian Johnston
2023-06-14 01:38:04 -07:00
committed by GitHub
parent 1da2570e14
commit df842b31e8
4 changed files with 83 additions and 26 deletions

View File

@ -902,10 +902,10 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
_, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...")
if err := closeTracing(ctx); err != nil {
_, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err)
}
// Wait for prometheus metrics to be scraped
_, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", scaletestPrometheusWait)
<-time.After(scaletestPrometheusWait)
}
}()
tracer := tracerProvider.Tracer(scaletestTracerName)

View File

@ -2,29 +2,68 @@
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <loadtest name>"
exit 1
fi
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
# shellcheck source=scripts/lib.sh
source "${PROJECT_ROOT}/scripts/lib.sh"
# Allow toggling verbose output
[[ -n ${VERBOSE:-} ]] && set -x
LOADTEST_NAME="$1"
PROJECT_ROOT="$(git rev-parse --show-toplevel)"
SCALETEST_NAME="${SCALETEST_NAME:-}"
SCALETEST_TRAFFIC_BYTES_PER_TICK="${SCALETEST_TRAFFIC_BYTES_PER_TICK:-1024}"
SCALETEST_TRAFFIC_TICK_INTERVAL="${SCALETEST_TRAFFIC_TICK_INTERVAL:-100ms}"
script_name=$(basename "$0")
args="$(getopt -o "" -l help,name:,traffic-bytes-per-tick:,traffic-tick-interval:, -- "$@")"
eval set -- "$args"
while true; do
case "$1" in
--help)
echo "Usage: $script_name --name <name> [--traffic-bytes-per-tick <bytes_per-tick>] [--traffic-tick-interval <ticks_per_second]"
exit 1
;;
--name)
SCALETEST_NAME="$2"
shift 2
;;
--traffic-bytes-per-tick)
SCALETEST_TRAFFIC_BYTES_PER_TICK="$2"
shift 2
;;
--traffic-tick-interval)
SCALETEST_TRAFFIC_TICK_INTERVAL="$2"
shift 2
;;
--)
shift
break
;;
*)
error "Unrecognized option: $1"
;;
esac
done
dependencies kubectl
if [[ -z "${SCALETEST_NAME}" ]]; then
echo "Must specify --name"
exit 1
fi
CODER_TOKEN=$("${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" tokens create)
CODER_URL="http://coder.coder-${LOADTEST_NAME}.svc.cluster.local"
export KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig"
CODER_URL="http://coder.coder-${SCALETEST_NAME}.svc.cluster.local"
export KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig"
# Clean up any pre-existing pods
kubectl -n "coder-${LOADTEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true
kubectl -n "coder-${SCALETEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: coder-scaletest-workspace-traffic
namespace: coder-${LOADTEST_NAME}
namespace: coder-${SCALETEST_NAME}
labels:
app.kubernetes.io/name: coder-scaletest-workspace-traffic
spec:
@ -36,12 +75,12 @@ spec:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- ${LOADTEST_NAME}-misc
- ${SCALETEST_NAME}-misc
containers:
- command:
- sh
- -c
- "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --verbose --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic --concurrency=0 --bytes-per-tick=4096 --tick-interval=100ms"
- "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --verbose --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic --concurrency=0 --bytes-per-tick=${SCALETEST_TRAFFIC_BYTES_PER_TICK} --tick-interval=${SCALETEST_TRAFFIC_TICK_INTERVAL} --scaletest-prometheus-wait=60s"
env:
- name: CODER_URL
value: $CODER_URL
@ -51,21 +90,18 @@ spec:
value: "0.0.0.0:21112"
- name: CODER_SCALETEST_JOB_TIMEOUT
value: "30m"
- name: CODER_SCALETEST_CONCURRENCY
value: "0"
- name: CODER_SCALETEST_WORKSPACE_TRAFFIC_BYTES_PER_TICK
value: "2048"
ports:
- containerPort: 21112
name: prometheus-http
protocol: TCP
name: cli
image: docker.io/codercom/enterprise-minimal:ubuntu
restartPolicy: Never
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
namespace: coder-${LOADTEST_NAME}
namespace: coder-${SCALETEST_NAME}
name: coder-workspacetraffic-monitoring
spec:
selector:

View File

@ -15,18 +15,25 @@ SCALETEST_PROJECT="${SCALETEST_PROJECT:-}"
SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER:-}"
SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD:-}"
SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-0}"
SCALETEST_CREATE_CONCURRENCY="${SCALETEST_CREATE_CONCURRENCY:-10}"
SCALETEST_TRAFFIC_BYTES_PER_TICK="${SCALETEST_TRAFFIC_BYTES_PER_TICK:-1024}"
SCALETEST_TRAFFIC_TICK_INTERVAL="${SCALETEST_TRAFFIC_TICK_INTERVAL:-10}"
script_name=$(basename "$0")
args="$(getopt -o "" -l dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup -- "$@")"
args="$(getopt -o "" -l create-concurrency:,dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup,traffic-bytes-per-tick:,traffic-tick-interval:, -- "$@")"
eval set -- "$args"
while true; do
case "$1" in
--create-concurrency)
SCALETEST_CREATE_CONCURRENCY="$2"
shift 2
;;
--dry-run)
DRY_RUN=1
shift
;;
--help)
echo "Usage: $script_name --name <name> --project <project> --num-workspaces <num-workspaces> --scenario <scenario> [--dry-run] [--skip-cleanup]"
echo "Usage: $script_name --name <name> --project <project> --num-workspaces <num-workspaces> --scenario <scenario> [--dry-run] [--skip-cleanup] [--create-concurrency=<create-concurrency>]"
exit 1
;;
--name)
@ -49,6 +56,14 @@ while true; do
SCALETEST_SKIP_CLEANUP=1
shift
;;
--traffic-bytes-per-tick)
SCALETEST_TRAFFIC_BYTES_PER_TICK="$2"
shift 2
;;
--traffic-tick-interval)
SCALETEST_TRAFFIC_TICK_INTERVAL="$2"
shift 2
;;
--)
shift
break
@ -144,16 +159,21 @@ echo "Creating ${SCALETEST_NUM_WORKSPACES} workspaces."
DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" scaletest create-workspaces \
--count "${SCALETEST_NUM_WORKSPACES}" \
--template=kubernetes \
--concurrency 10 \
--concurrency "${SCALETEST_CREATE_CONCURRENCY}" \
--no-cleanup
echo "Sleeping 10 minutes to establish a baseline measurement."
maybedryrun "$DRY_RUN" sleep 600
echo "Sending traffic to workspaces"
maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" "${SCALETEST_NAME}"
maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" \
--name "${SCALETEST_NAME}" \
--traffic-bytes-per-tick "${SCALETEST_TRAFFIC_BYTES_PER_TICK}" \
--traffic-tick-interval "${SCALETEST_TRAFFIC_TICK_INTERVAL}"
maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready
maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" logs -f pod/coder-scaletest-workspace-traffic
echo "Sleeping 15 minutes for traffic generation"
maybedryrun "$DRY_RUN" sleep 900
echo "Starting pprof"
maybedryrun "$DRY_RUN" kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 &
@ -168,6 +188,7 @@ while ! maybedryrun "$DRY_RUN" timeout 1 bash -c "echo > /dev/tcp/localhost/6061
echo "pprof failed to become ready in time!"
exit 1
fi
pprof_attempt_counter+=1
maybedryrun "$DRY_RUN" sleep 3
done

View File

@ -31,5 +31,5 @@ terraform {
}
}
required_version = "~> 1.4.0"
required_version = "~> 1.5.0"
}