chore: add terraform for spinning up load test cluster (#7504)

Adds terraform configs for spinning up loadtest environments
This commit is contained in:
Cian Johnston
2023-05-15 15:56:47 +01:00
committed by GitHub
parent dab1d1fe20
commit 854e974bb4
14 changed files with 786 additions and 4 deletions

7
.gitignore vendored
View File

@ -48,9 +48,14 @@ site/stats/
*.lock.hcl *.lock.hcl
.terraform/ .terraform/
/.coderv2/* **/.coderv2/*
**/__debug_bin **/__debug_bin
# direnv # direnv
.envrc .envrc
*.test *.test
# Loadtesting
./scaletest/terraform/.terraform
./scaletest/terraform/.terraform.lock.hcl
terraform.tfstate.*

View File

@ -51,12 +51,17 @@ site/stats/
*.lock.hcl *.lock.hcl
.terraform/ .terraform/
/.coderv2/* **/.coderv2/*
**/__debug_bin **/__debug_bin
# direnv # direnv
.envrc .envrc
*.test *.test
# Loadtesting
./scaletest/terraform/.terraform
./scaletest/terraform/.terraform.lock.hcl
terraform.tfstate.*
# .prettierignore.include: # .prettierignore.include:
# Helm templates contain variables that are invalid YAML and can't be formatted # Helm templates contain variables that are invalid YAML and can't be formatted
# by Prettier. # by Prettier.

View File

@ -0,0 +1,40 @@
# Load Test Terraform
This folder contains Terraform code and scripts to aid in performing load tests of Coder.
It does the following:
- Creates a GCP VPC.
- Creates a CloudSQL instance with a global peering rule so it's accessible inside the VPC.
- Creates a GKE cluster inside the VPC with separate nodegroups for Coder and workspaces.
- Installs Coder in a new namespace, using the CloudSQL instance.
## Usage
> You must have an existing Google Cloud project available.
1. Create a file named `override.tfvars` with the following content, modifying as appropriate:
```terraform
name = "some_unique_identifier"
project_id = "some_google_project_id"
```
1. Inspect `vars.tf` and override any other variables you deem necessary.
1. Run `terraform init`.
1. Run `terraform plan -var-file=override.tfvars` and inspect the output.
If you are not satisfied, modify `override.tfvars` until you are.
1. Run `terraform apply -var-file=override.tfvars`. This will spin up a pre-configured environment
and emit the Coder URL as an output.
1. Run `coder_init.sh <coder_url>` to setup an initial user and a pre-configured Kubernetes
template. It will also download the Coder CLI from the Coder instance locally.
1. Do whatever you need to do with the Coder instance.
> To run Coder commands against the instance, you can use `coder_shim.sh <command>`.
> You don't need to run `coder login` yourself.
1. When you are finished, you can run `terraform destroy -var-file=override.tfvars`.

View File

@ -0,0 +1,250 @@
data "google_client_config" "default" {}
locals {
coder_helm_repo = "https://helm.coder.com/v2"
coder_helm_chart = "coder"
coder_release_name = var.name
coder_namespace = "coder-${var.name}"
coder_admin_email = "admin@coder.com"
coder_admin_user = "coder"
coder_address = google_compute_address.coder.address
coder_url = "http://${google_compute_address.coder.address}"
}
provider "kubernetes" {
host = "https://${google_container_cluster.primary.endpoint}"
cluster_ca_certificate = base64decode(google_container_cluster.primary.master_auth.0.cluster_ca_certificate)
token = data.google_client_config.default.access_token
}
provider "helm" {
kubernetes {
host = "https://${google_container_cluster.primary.endpoint}"
cluster_ca_certificate = base64decode(google_container_cluster.primary.master_auth.0.cluster_ca_certificate)
token = data.google_client_config.default.access_token
}
}
resource "kubernetes_namespace" "coder_namespace" {
metadata {
name = local.coder_namespace
}
depends_on = [
google_container_node_pool.coder
]
}
resource "random_password" "postgres-admin-password" {
length = 12
}
resource "random_password" "coder-postgres-password" {
length = 12
}
resource "kubernetes_secret" "coder-db" {
type = "" # Opaque
metadata {
name = "coder-db-url"
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
}
data = {
url = "postgres://${google_sql_user.coder.name}:${urlencode(random_password.coder-postgres-password.result)}@${google_sql_database_instance.db.private_ip_address}/${google_sql_database.coder.name}?sslmode=disable"
}
}
resource "helm_release" "coder-chart" {
repository = local.coder_helm_repo
chart = local.coder_helm_chart
name = local.coder_release_name
version = var.coder_chart_version
namespace = kubernetes_namespace.coder_namespace.metadata.0.name
depends_on = [
google_container_node_pool.coder,
]
values = [<<EOF
coder:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: "cloud.google.com/gke-nodepool"
operator: "In"
values: ["${google_container_node_pool.coder.name}"]
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 1
podAffinityTerm:
topologyKey: "kubernetes.io/hostname"
labelSelector:
matchExpressions:
- key: "app.kubernetes.io/instance"
operator: "In"
values: ["${local.coder_release_name}"]
env:
- name: "CODER_CACHE_DIRECTORY"
value: "/tmp/coder"
- name: "CODER_ENABLE_TELEMETRY"
value: "false"
- name: "CODER_LOGGING_HUMAN"
value: "/dev/null"
- name: "CODER_LOGGING_STACKDRIVER"
value: "/dev/stderr"
- name: "CODER_PG_CONNECTION_URL"
valueFrom:
secretKeyRef:
name: "${kubernetes_secret.coder-db.metadata.0.name}"
key: url
- name: "CODER_PROMETHEUS_ENABLE"
value: "true"
- name: "CODER_VERBOSE"
value: "true"
image:
repo: ${var.coder_image_repo}
tag: ${var.coder_image_tag}
replicaCount: "${var.coder_replicas}"
resources:
requests:
cpu: "${var.coder_cpu}"
memory: "${var.coder_mem}"
limits:
cpu: "${var.coder_cpu}"
memory: "${var.coder_mem}"
securityContext:
readOnlyRootFilesystem: true
service:
enable: true
loadBalancerIP: "${local.coder_address}"
volumeMounts:
- mountPath: "/tmp"
name: cache
readOnly: false
volumes:
- emptyDir:
sizeLimit: 1024Mi
name: cache
EOF
]
}
resource "local_file" "coder-monitoring-manifest" {
filename = "${path.module}/.coderv2/coder-monitoring.yaml"
content = <<EOF
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
namespace: ${kubernetes_namespace.coder_namespace.metadata.0.name}
name: coder-monitoring
spec:
selector:
matchLabels:
app.kubernetes.io/name: coder
endpoints:
- port: prometheus-http
interval: 30s
EOF
}
resource "null_resource" "coder-monitoring-manifest_apply" {
provisioner "local-exec" {
working_dir = "${abspath(path.module)}/.coderv2"
command = <<EOF
KUBECONFIG=${var.name}-cluster.kubeconfig gcloud container clusters get-credentials ${var.name}-cluster --project=${var.project_id} --zone=${var.zone} && \
KUBECONFIG=${var.name}-cluster.kubeconfig kubectl apply -f ${abspath(local_file.coder-monitoring-manifest.filename)}
EOF
}
}
resource "local_file" "kubernetes_template" {
filename = "${path.module}/.coderv2/templates/kubernetes/main.tf"
content = <<EOF
terraform {
required_providers {
coder = {
source = "coder/coder"
version = "~> 0.7.0"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.18"
}
}
}
provider "coder" {}
provider "kubernetes" {
config_path = null # always use host
}
data "coder_workspace" "me" {}
resource "coder_agent" "main" {
os = "linux"
arch = "amd64"
startup_script_timeout = 180
startup_script = ""
}
resource "kubernetes_pod" "main" {
count = data.coder_workspace.me.start_count
metadata {
name = "coder-$${lower(data.coder_workspace.me.owner)}-$${lower(data.coder_workspace.me.name)}"
namespace = "${kubernetes_namespace.coder_namespace.metadata.0.name}"
labels = {
"app.kubernetes.io/name" = "coder-workspace"
"app.kubernetes.io/instance" = "coder-workspace-$${lower(data.coder_workspace.me.owner)}-$${lower(data.coder_workspace.me.name)}"
}
}
spec {
security_context {
run_as_user = "1000"
fs_group = "1000"
}
container {
name = "dev"
image = "${var.workspace_image}"
image_pull_policy = "Always"
command = ["sh", "-c", coder_agent.main.init_script]
security_context {
run_as_user = "1000"
}
env {
name = "CODER_AGENT_TOKEN"
value = coder_agent.main.token
}
resources {
requests = {
"cpu" = "0.1"
"memory" = "128Mi"
}
limits = {
"cpu" = "1"
"memory" = "1Gi"
}
}
}
affinity {
node_affinity {
required_during_scheduling_ignored_during_execution {
node_selector_term {
match_expressions {
key = "cloud.google.com/gke-nodepool"
operator = "In"
values = ["${google_container_node_pool.workspaces.name}"]
}
}
}
}
}
}
}
EOF
}
output "coder_url" {
description = "URL of the Coder deployment"
value = local.coder_url
}

View File

@ -0,0 +1,51 @@
#!/usr/bin/env bash
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <coder URL>"
exit 1
fi
# Allow toggling verbose output
[[ -n ${VERBOSE:-} ]] && set -x
CODER_URL=$1
CONFIG_DIR="${PWD}/.coderv2"
ARCH="$(arch)"
if [[ "$ARCH" == "x86_64" ]]; then
ARCH="amd64"
fi
PLATFORM="$(uname | tr '[:upper:]' '[:lower:]')"
mkdir -p "${CONFIG_DIR}"
echo "Fetching Coder CLI for first-time setup!"
curl -fsSLk "${CODER_URL}/bin/coder-${PLATFORM}-${ARCH}" -o "${CONFIG_DIR}/coder"
chmod +x "${CONFIG_DIR}/coder"
set +o pipefail
RANDOM_ADMIN_PASSWORD=$(tr </dev/urandom -dc _A-Z-a-z-0-9 | head -c16)
set -o pipefail
CODER_FIRST_USER_EMAIL="admin@coder.com"
CODER_FIRST_USER_USERNAME="coder"
CODER_FIRST_USER_PASSWORD="${RANDOM_ADMIN_PASSWORD}"
CODER_FIRST_USER_TRIAL="false"
echo "Running login command!"
"${CONFIG_DIR}/coder" login "${CODER_URL}" \
--global-config="${CONFIG_DIR}" \
--first-user-username="${CODER_FIRST_USER_USERNAME}" \
--first-user-email="${CODER_FIRST_USER_EMAIL}" \
--first-user-password="${CODER_FIRST_USER_PASSWORD}" \
--first-user-trial=false
echo "Writing credentials to ${CONFIG_DIR}/coder.env"
cat <<EOF >"${CONFIG_DIR}/coder.env"
CODER_FIRST_USER_EMAIL=admin@coder.com
CODER_FIRST_USER_USERNAME=coder
CODER_FIRST_USER_PASSWORD="${RANDOM_ADMIN_PASSWORD}"
CODER_FIRST_USER_TRIAL="${CODER_FIRST_USER_TRIAL}"
EOF
echo "Importing kubernetes template"
"${CONFIG_DIR}/coder" templates create --global-config="${CONFIG_DIR}" \
--directory "${CONFIG_DIR}/templates/kubernetes" --yes kubernetes

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
# This is a shim for easily executing Coder commands against a loadtest cluster
# without having to overwrite your own session/URL
SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}")
CONFIG_DIR="${SCRIPT_DIR}/.coderv2"
CODER_BIN="${CONFIG_DIR}/coder"
exec "${CODER_BIN}" --global-config "${CONFIG_DIR}" "$@"

View File

@ -0,0 +1,125 @@
data "google_compute_default_service_account" "default" {
project = var.project_id
}
resource "google_container_cluster" "primary" {
name = var.name
location = var.zone
project = var.project_id
network = google_compute_network.vpc.name
subnetwork = google_compute_subnetwork.subnet.name
networking_mode = "VPC_NATIVE"
ip_allocation_policy { # Required with networking_mode=VPC_NATIVE
}
release_channel {
channel = "STABLE"
}
initial_node_count = 1
remove_default_node_pool = true
network_policy {
enabled = true
}
depends_on = [
google_project_service.api["container.googleapis.com"]
]
monitoring_config {
enable_components = ["SYSTEM_COMPONENTS"]
managed_prometheus {
enabled = true
}
}
workload_identity_config {
workload_pool = "${data.google_project.project.project_id}.svc.id.goog"
}
}
resource "google_container_node_pool" "coder" {
name = "${var.name}-coder"
location = var.zone
project = var.project_id
cluster = google_container_cluster.primary.name
node_count = var.nodepool_size_coder
node_config {
oauth_scopes = [
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
]
disk_size_gb = var.node_disk_size_gb
machine_type = var.nodepool_machine_type_coder
image_type = var.node_image_type
preemptible = var.node_preemptible
service_account = data.google_compute_default_service_account.default.email
tags = ["gke-node", "${var.project_id}-gke"]
labels = {
env = var.project_id
}
metadata = {
disable-legacy-endpoints = "true"
}
}
}
resource "google_container_node_pool" "workspaces" {
name = "${var.name}-workspaces"
location = var.zone
project = var.project_id
cluster = google_container_cluster.primary.name
node_count = var.nodepool_size_workspaces
node_config {
oauth_scopes = [
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
]
disk_size_gb = var.node_disk_size_gb
machine_type = var.nodepool_machine_type_workspaces
image_type = var.node_image_type
preemptible = var.node_preemptible
service_account = data.google_compute_default_service_account.default.email
tags = ["gke-node", "${var.project_id}-gke"]
labels = {
env = var.project_id
}
metadata = {
disable-legacy-endpoints = "true"
}
}
}
resource "google_container_node_pool" "misc" {
name = "${var.name}-misc"
location = var.zone
project = var.project_id
cluster = google_container_cluster.primary.name
node_count = var.nodepool_size_misc
node_config {
oauth_scopes = [
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
]
disk_size_gb = var.node_disk_size_gb
machine_type = var.nodepool_machine_type_misc
image_type = var.node_image_type
preemptible = var.node_preemptible
service_account = data.google_compute_default_service_account.default.email
tags = ["gke-node", "${var.project_id}-gke"]
labels = {
env = var.project_id
}
metadata = {
disable-legacy-endpoints = "true"
}
}
}

View File

@ -0,0 +1,53 @@
resource "google_sql_database_instance" "db" {
name = var.name
region = var.region
database_version = var.cloudsql_version
deletion_protection = false
depends_on = [google_service_networking_connection.private_vpc_connection]
settings {
tier = var.cloudsql_tier
activation_policy = "ALWAYS"
availability_type = "ZONAL"
location_preference {
zone = var.zone
}
database_flags {
name = "max_connections"
value = var.cloudsql_max_connections
}
ip_configuration {
ipv4_enabled = false
private_network = google_compute_network.vpc.id
}
insights_config {
query_insights_enabled = true
query_string_length = 1024
record_application_tags = false
record_client_address = false
}
}
}
resource "google_sql_database" "coder" {
project = var.project_id
instance = google_sql_database_instance.db.id
name = "${var.name}-coder"
# required for postgres, otherwise db fails to delete
deletion_policy = "ABANDON"
}
resource "google_sql_user" "coder" {
project = var.project_id
instance = google_sql_database_instance.db.id
name = "${var.name}-coder"
type = "BUILT_IN"
password = random_password.coder-postgres-password.result
# required for postgres, otherwise user fails to delete
deletion_policy = "ABANDON"
}

View File

@ -0,0 +1,32 @@
provider "google" {
region = var.region
project = var.project_id
}
locals {
project_apis = [
"cloudtrace",
"compute",
"container",
"logging",
"monitoring",
"servicemanagement",
"servicenetworking",
"sqladmin",
"stackdriver",
"storage-api",
]
}
data "google_project" "project" {
project_id = var.project_id
}
resource "google_project_service" "api" {
for_each = toset(local.project_apis)
project = data.google_project.project.project_id
service = "${each.value}.googleapis.com"
disable_dependent_services = false
disable_on_destroy = false
}

View File

@ -0,0 +1,39 @@
resource "google_compute_network" "vpc" {
project = var.project_id
name = var.name
auto_create_subnetworks = "false"
depends_on = [
google_project_service.api["compute.googleapis.com"]
]
}
resource "google_compute_subnetwork" "subnet" {
name = var.name
project = var.project_id
region = var.region
network = google_compute_network.vpc.name
ip_cidr_range = "10.10.0.0/24"
}
resource "google_compute_global_address" "sql_peering" {
project = var.project_id
name = "${var.name}-sql-peering"
purpose = "VPC_PEERING"
address_type = "INTERNAL"
prefix_length = 16
network = google_compute_network.vpc.id
}
resource "google_compute_address" "coder" {
project = var.project_id
region = var.region
name = "${var.name}-coder"
address_type = "EXTERNAL"
network_tier = "PREMIUM"
}
resource "google_service_networking_connection" "private_vpc_connection" {
network = google_compute_network.vpc.id
service = "servicenetworking.googleapis.com"
reserved_peering_ranges = [google_compute_global_address.sql_peering.name]
}

View File

@ -0,0 +1,35 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "~> 4.36"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.20"
}
helm = {
source = "hashicorp/helm"
version = "~> 2.9"
}
random = {
source = "hashicorp/random"
version = "~> 3.5"
}
tls = {
source = "hashicorp/tls"
version = "~> 4.0"
}
docker = {
source = "kreuzwerker/docker"
version = "~> 3.0"
}
}
required_version = "~> 1.4.0"
}

129
scaletest/terraform/vars.tf Normal file
View File

@ -0,0 +1,129 @@
variable "project_id" {
description = "The project in which to provision resources"
}
variable "name" {
description = "Adds a prefix to resources."
}
variable "region" {
description = "GCP region in which to provision resources."
default = "us-east1"
}
variable "zone" {
description = "GCP zone in which to provision resources."
default = "us-east1-c"
}
variable "k8s_version" {
description = "Kubernetes vversion to provision."
default = "1.24"
}
variable "node_disk_size_gb" {
description = "Size of the root disk for cluster nodes."
default = 100
}
variable "node_image_type" {
description = "Image type to use for cluster nodes."
default = "cos_containerd"
}
// Preemptible nodes are way cheaper, but can be pulled out
// from under you at any time. Caveat emptor.
variable "node_preemptible" {
description = "Use preemptible nodes."
default = false
}
// We create three nodepools:
// - One for the Coder control plane
// - One for workspaces
// - One for everything else (for example, load generation)
// These variables control the node pool dedicated to Coder.
variable "nodepool_machine_type_coder" {
description = "Machine type to use for Coder control plane nodepool."
default = "t2d-standard-4"
}
variable "nodepool_size_coder" {
description = "Number of cluster nodes for the Coder control plane nodepool."
default = 1
}
// These variables control the node pool dedicated to workspaces.
variable "nodepool_machine_type_workspaces" {
description = "Machine type to use for the workspaces nodepool."
default = "t2d-standard-4"
}
variable "nodepool_size_workspaces" {
description = "Number of cluster nodes for the workspaces nodepool."
default = 1
}
// These variables control the node pool for everything else.
variable "nodepool_machine_type_misc" {
description = "Machine type to use for the misc nodepool."
default = "t2d-standard-4"
}
variable "nodepool_size_misc" {
description = "Number of cluster nodes for the misc nodepool."
default = 1
}
// These variables control the size of the database to be used by Coder.
variable "cloudsql_version" {
description = "CloudSQL version to provision"
default = "POSTGRES_14"
}
variable "cloudsql_tier" {
description = "CloudSQL database tier."
default = "db-f1-micro"
}
variable "cloudsql_max_connections" {
description = "CloudSQL database max_connections"
default = 500
}
// These variables control the Coder deployment.
variable "coder_replicas" {
description = "Number of Coder replicas to provision"
default = 1
}
variable "coder_cpu" {
description = "CPU to allocate to Coder"
default = "1000m"
}
variable "coder_mem" {
description = "Memory to allocate to Coder"
default = "1024Mi"
}
variable "coder_chart_version" {
description = "Version of the Coder Helm chart to install. Defaults to latest."
default = null
}
variable "coder_image_repo" {
description = "Repository to use for Coder image."
default = "ghcr.io/coder/coder"
}
variable "coder_image_tag" {
description = "Tag to use for Coder image."
default = "latest"
}
variable "workspace_image" {
description = "Image and tag to use for workspaces."
default = "docker.io/codercom/enterprise-minimal:ubuntu"
}

View File

@ -51,12 +51,17 @@ stats/
*.lock.hcl *.lock.hcl
.terraform/ .terraform/
../.coderv2/* **/.coderv2/*
**/__debug_bin **/__debug_bin
# direnv # direnv
.envrc .envrc
*.test *.test
# Loadtesting
.././scaletest/terraform/.terraform
.././scaletest/terraform/.terraform.lock.hcl
terraform.tfstate.*
# .prettierignore.include: # .prettierignore.include:
# Helm templates contain variables that are invalid YAML and can't be formatted # Helm templates contain variables that are invalid YAML and can't be formatted
# by Prettier. # by Prettier.

View File

@ -51,12 +51,17 @@ stats/
*.lock.hcl *.lock.hcl
.terraform/ .terraform/
../.coderv2/* **/.coderv2/*
**/__debug_bin **/__debug_bin
# direnv # direnv
.envrc .envrc
*.test *.test
# Loadtesting
.././scaletest/terraform/.terraform
.././scaletest/terraform/.terraform.lock.hcl
terraform.tfstate.*
# .prettierignore.include: # .prettierignore.include:
# Helm templates contain variables that are invalid YAML and can't be formatted # Helm templates contain variables that are invalid YAML and can't be formatted
# by Prettier. # by Prettier.