chore: fix miscellaneous issues in scaletest scripts (#8006)

* chore: scaletest: plumb through more options

* bump terraform version

* scaletest.sh: pprof during traffic gen

* cli/scaletest: actually wait for prometheus metrics to be scraped

* increase prometheus wait
This commit is contained in:
Cian Johnston
2023-06-14 01:38:04 -07:00
committed by GitHub
parent 1da2570e14
commit df842b31e8
4 changed files with 83 additions and 26 deletions

View File

@ -902,10 +902,10 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
_, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...") _, _ = fmt.Fprintln(inv.Stderr, "\nUploading traces...")
if err := closeTracing(ctx); err != nil { if err := closeTracing(ctx); err != nil {
_, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err) _, _ = fmt.Fprintf(inv.Stderr, "\nError uploading traces: %+v\n", err)
}
// Wait for prometheus metrics to be scraped // Wait for prometheus metrics to be scraped
_, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", scaletestPrometheusWait) _, _ = fmt.Fprintf(inv.Stderr, "Waiting %s for prometheus metrics to be scraped\n", scaletestPrometheusWait)
<-time.After(scaletestPrometheusWait) <-time.After(scaletestPrometheusWait)
}
}() }()
tracer := tracerProvider.Tracer(scaletestTracerName) tracer := tracerProvider.Tracer(scaletestTracerName)

View File

@ -2,29 +2,68 @@
set -euo pipefail set -euo pipefail
if [[ $# -lt 1 ]]; then PROJECT_ROOT="$(git rev-parse --show-toplevel)"
echo "Usage: $0 <loadtest name>" # shellcheck source=scripts/lib.sh
exit 1 source "${PROJECT_ROOT}/scripts/lib.sh"
fi
# Allow toggling verbose output # Allow toggling verbose output
[[ -n ${VERBOSE:-} ]] && set -x [[ -n ${VERBOSE:-} ]] && set -x
LOADTEST_NAME="$1" SCALETEST_NAME="${SCALETEST_NAME:-}"
PROJECT_ROOT="$(git rev-parse --show-toplevel)" SCALETEST_TRAFFIC_BYTES_PER_TICK="${SCALETEST_TRAFFIC_BYTES_PER_TICK:-1024}"
SCALETEST_TRAFFIC_TICK_INTERVAL="${SCALETEST_TRAFFIC_TICK_INTERVAL:-100ms}"
script_name=$(basename "$0")
args="$(getopt -o "" -l help,name:,traffic-bytes-per-tick:,traffic-tick-interval:, -- "$@")"
eval set -- "$args"
while true; do
case "$1" in
--help)
echo "Usage: $script_name --name <name> [--traffic-bytes-per-tick <bytes_per-tick>] [--traffic-tick-interval <ticks_per_second]"
exit 1
;;
--name)
SCALETEST_NAME="$2"
shift 2
;;
--traffic-bytes-per-tick)
SCALETEST_TRAFFIC_BYTES_PER_TICK="$2"
shift 2
;;
--traffic-tick-interval)
SCALETEST_TRAFFIC_TICK_INTERVAL="$2"
shift 2
;;
--)
shift
break
;;
*)
error "Unrecognized option: $1"
;;
esac
done
dependencies kubectl
if [[ -z "${SCALETEST_NAME}" ]]; then
echo "Must specify --name"
exit 1
fi
CODER_TOKEN=$("${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" tokens create) CODER_TOKEN=$("${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" tokens create)
CODER_URL="http://coder.coder-${LOADTEST_NAME}.svc.cluster.local" CODER_URL="http://coder.coder-${SCALETEST_NAME}.svc.cluster.local"
export KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${LOADTEST_NAME}-cluster.kubeconfig" export KUBECONFIG="${PROJECT_ROOT}/scaletest/.coderv2/${SCALETEST_NAME}-cluster.kubeconfig"
# Clean up any pre-existing pods # Clean up any pre-existing pods
kubectl -n "coder-${LOADTEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true kubectl -n "coder-${SCALETEST_NAME}" delete pod coder-scaletest-workspace-traffic --force || true
cat <<EOF | kubectl apply -f - cat <<EOF | kubectl apply -f -
apiVersion: v1 apiVersion: v1
kind: Pod kind: Pod
metadata: metadata:
name: coder-scaletest-workspace-traffic name: coder-scaletest-workspace-traffic
namespace: coder-${LOADTEST_NAME} namespace: coder-${SCALETEST_NAME}
labels: labels:
app.kubernetes.io/name: coder-scaletest-workspace-traffic app.kubernetes.io/name: coder-scaletest-workspace-traffic
spec: spec:
@ -36,12 +75,12 @@ spec:
- key: cloud.google.com/gke-nodepool - key: cloud.google.com/gke-nodepool
operator: In operator: In
values: values:
- ${LOADTEST_NAME}-misc - ${SCALETEST_NAME}-misc
containers: containers:
- command: - command:
- sh - sh
- -c - -c
- "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --verbose --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic --concurrency=0 --bytes-per-tick=4096 --tick-interval=100ms" - "curl -fsSL $CODER_URL/bin/coder-linux-amd64 -o /tmp/coder && chmod +x /tmp/coder && /tmp/coder --verbose --url=$CODER_URL --token=$CODER_TOKEN scaletest workspace-traffic --concurrency=0 --bytes-per-tick=${SCALETEST_TRAFFIC_BYTES_PER_TICK} --tick-interval=${SCALETEST_TRAFFIC_TICK_INTERVAL} --scaletest-prometheus-wait=60s"
env: env:
- name: CODER_URL - name: CODER_URL
value: $CODER_URL value: $CODER_URL
@ -51,21 +90,18 @@ spec:
value: "0.0.0.0:21112" value: "0.0.0.0:21112"
- name: CODER_SCALETEST_JOB_TIMEOUT - name: CODER_SCALETEST_JOB_TIMEOUT
value: "30m" value: "30m"
- name: CODER_SCALETEST_CONCURRENCY
value: "0"
- name: CODER_SCALETEST_WORKSPACE_TRAFFIC_BYTES_PER_TICK
value: "2048"
ports: ports:
- containerPort: 21112 - containerPort: 21112
name: prometheus-http name: prometheus-http
protocol: TCP protocol: TCP
name: cli name: cli
image: docker.io/codercom/enterprise-minimal:ubuntu image: docker.io/codercom/enterprise-minimal:ubuntu
restartPolicy: Never
--- ---
apiVersion: monitoring.coreos.com/v1 apiVersion: monitoring.coreos.com/v1
kind: PodMonitor kind: PodMonitor
metadata: metadata:
namespace: coder-${LOADTEST_NAME} namespace: coder-${SCALETEST_NAME}
name: coder-workspacetraffic-monitoring name: coder-workspacetraffic-monitoring
spec: spec:
selector: selector:

View File

@ -15,18 +15,25 @@ SCALETEST_PROJECT="${SCALETEST_PROJECT:-}"
SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER:-}" SCALETEST_PROMETHEUS_REMOTE_WRITE_USER="${SCALETEST_PROMETHEUS_REMOTE_WRITE_USER:-}"
SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD:-}" SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD="${SCALETEST_PROMETHEUS_REMOTE_WRITE_PASSWORD:-}"
SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-0}" SCALETEST_SKIP_CLEANUP="${SCALETEST_SKIP_CLEANUP:-0}"
SCALETEST_CREATE_CONCURRENCY="${SCALETEST_CREATE_CONCURRENCY:-10}"
SCALETEST_TRAFFIC_BYTES_PER_TICK="${SCALETEST_TRAFFIC_BYTES_PER_TICK:-1024}"
SCALETEST_TRAFFIC_TICK_INTERVAL="${SCALETEST_TRAFFIC_TICK_INTERVAL:-10}"
script_name=$(basename "$0") script_name=$(basename "$0")
args="$(getopt -o "" -l dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup -- "$@")" args="$(getopt -o "" -l create-concurrency:,dry-run,help,name:,num-workspaces:,project:,scenario:,skip-cleanup,traffic-bytes-per-tick:,traffic-tick-interval:, -- "$@")"
eval set -- "$args" eval set -- "$args"
while true; do while true; do
case "$1" in case "$1" in
--create-concurrency)
SCALETEST_CREATE_CONCURRENCY="$2"
shift 2
;;
--dry-run) --dry-run)
DRY_RUN=1 DRY_RUN=1
shift shift
;; ;;
--help) --help)
echo "Usage: $script_name --name <name> --project <project> --num-workspaces <num-workspaces> --scenario <scenario> [--dry-run] [--skip-cleanup]" echo "Usage: $script_name --name <name> --project <project> --num-workspaces <num-workspaces> --scenario <scenario> [--dry-run] [--skip-cleanup] [--create-concurrency=<create-concurrency>]"
exit 1 exit 1
;; ;;
--name) --name)
@ -49,6 +56,14 @@ while true; do
SCALETEST_SKIP_CLEANUP=1 SCALETEST_SKIP_CLEANUP=1
shift shift
;; ;;
--traffic-bytes-per-tick)
SCALETEST_TRAFFIC_BYTES_PER_TICK="$2"
shift 2
;;
--traffic-tick-interval)
SCALETEST_TRAFFIC_TICK_INTERVAL="$2"
shift 2
;;
--) --)
shift shift
break break
@ -144,16 +159,21 @@ echo "Creating ${SCALETEST_NUM_WORKSPACES} workspaces."
DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" scaletest create-workspaces \ DRY_RUN="$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_shim.sh" scaletest create-workspaces \
--count "${SCALETEST_NUM_WORKSPACES}" \ --count "${SCALETEST_NUM_WORKSPACES}" \
--template=kubernetes \ --template=kubernetes \
--concurrency 10 \ --concurrency "${SCALETEST_CREATE_CONCURRENCY}" \
--no-cleanup --no-cleanup
echo "Sleeping 10 minutes to establish a baseline measurement." echo "Sleeping 10 minutes to establish a baseline measurement."
maybedryrun "$DRY_RUN" sleep 600 maybedryrun "$DRY_RUN" sleep 600
echo "Sending traffic to workspaces" echo "Sending traffic to workspaces"
maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" "${SCALETEST_NAME}" maybedryrun "$DRY_RUN" "${PROJECT_ROOT}/scaletest/lib/coder_workspacetraffic.sh" \
--name "${SCALETEST_NAME}" \
--traffic-bytes-per-tick "${SCALETEST_TRAFFIC_BYTES_PER_TICK}" \
--traffic-tick-interval "${SCALETEST_TRAFFIC_TICK_INTERVAL}"
maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" wait pods coder-scaletest-workspace-traffic --for condition=Ready
maybedryrun "$DRY_RUN" kubectl --kubeconfig="${KUBECONFIG}" -n "coder-${SCALETEST_NAME}" logs -f pod/coder-scaletest-workspace-traffic
echo "Sleeping 15 minutes for traffic generation"
maybedryrun "$DRY_RUN" sleep 900
echo "Starting pprof" echo "Starting pprof"
maybedryrun "$DRY_RUN" kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 & maybedryrun "$DRY_RUN" kubectl -n "coder-${SCALETEST_NAME}" port-forward deployment/coder 6061:6060 &
@ -168,6 +188,7 @@ while ! maybedryrun "$DRY_RUN" timeout 1 bash -c "echo > /dev/tcp/localhost/6061
echo "pprof failed to become ready in time!" echo "pprof failed to become ready in time!"
exit 1 exit 1
fi fi
pprof_attempt_counter+=1
maybedryrun "$DRY_RUN" sleep 3 maybedryrun "$DRY_RUN" sleep 3
done done

View File

@ -31,5 +31,5 @@ terraform {
} }
} }
required_version = "~> 1.4.0" required_version = "~> 1.5.0"
} }