Compare commits

...

15 Commits

Author SHA1 Message Date
Vladyslav Matsiiako
506c30bcdb update docs note 2024-11-19 14:47:39 -05:00
Maidul Islam
735ad4ff65 Merge pull request #1924 from Infisical/misc/metrics-observability
feat: added setup for production observability (metrics via OTEL)
2024-11-19 13:56:29 -05:00
Sheen Capadngan
41e36dfcef misc: updated service name 2024-11-20 02:34:46 +08:00
Daniel Hougaard
421d8578b7 Merge pull request #2756 from Infisical/daniel/access-token-cleanup
fix(identity): remove access tokens when auth method is removed
2024-11-19 22:31:51 +04:00
Sheen Capadngan
d6c37c1065 misc: added metrics setup to self-host docs 2024-11-20 01:01:41 +08:00
Sheen Capadngan
d71c85e052 misc: finalized config files 2024-11-20 00:28:27 +08:00
Sheen Capadngan
0693f81d0a misc: finalized instrumentation setup 2024-11-19 23:25:20 +08:00
Sheen Capadngan
7c84adc1c2 misc: added new package to lock 2024-11-18 23:04:01 +08:00
Sheen Capadngan
fa8d6735a1 misc: reverted package lock 2024-11-18 23:00:55 +08:00
Sheen Capadngan
a6137f267d Merge remote-tracking branch 'origin/main' into misc/metrics-observability 2024-11-18 22:54:14 +08:00
Sheen Capadngan
fc3a409164 misc: added support for more config options 2024-06-12 01:39:06 +08:00
Sheen Capadngan
ffc58b0313 Merge remote-tracking branch 'origin/main' into misc/metrics-observability 2024-06-11 23:50:08 +08:00
Sheen Capadngan
9a7e05369c misc: added env-based flag for enabling telemetry 2024-06-06 00:56:11 +08:00
Sheen Capadngan
33b49f4466 misc: finalized config files 2024-06-06 00:42:24 +08:00
Sheen Capadngan
60895537a7 misc: initial working setup for metrics observabilit 2024-06-05 21:46:10 +08:00
21 changed files with 1866 additions and 51 deletions

View File

@@ -74,6 +74,14 @@ CAPTCHA_SECRET=
NEXT_PUBLIC_CAPTCHA_SITE_KEY=
OTEL_TELEMETRY_COLLECTION_ENABLED=
OTEL_EXPORT_TYPE=
OTEL_EXPORT_OTLP_ENDPOINT=
OTEL_OTLP_PUSH_INTERVAL=
OTEL_COLLECTOR_BASIC_AUTH_USERNAME=
OTEL_COLLECTOR_BASIC_AUTH_PASSWORD=
PLAIN_API_KEY=
PLAIN_WISH_LABEL_IDS=

View File

@@ -10,6 +10,9 @@ up-dev:
up-dev-ldap:
docker compose -f docker-compose.dev.yml --profile ldap up --build
up-dev-metrics:
docker compose -f docker-compose.dev.yml --profile metrics up --build
up-prod:
docker-compose -f docker-compose.prod.yml up --build
@@ -27,4 +30,3 @@ reviewable-api:
npm run type:check
reviewable: reviewable-ui reviewable-api

1606
backend/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -140,6 +140,14 @@
"@octokit/plugin-retry": "^5.0.5",
"@octokit/rest": "^20.0.2",
"@octokit/webhooks-types": "^7.3.1",
"@opentelemetry/api": "^1.9.0",
"@opentelemetry/auto-instrumentations-node": "^0.53.0",
"@opentelemetry/exporter-metrics-otlp-proto": "^0.55.0",
"@opentelemetry/exporter-prometheus": "^0.55.0",
"@opentelemetry/instrumentation": "^0.55.0",
"@opentelemetry/resources": "^1.28.0",
"@opentelemetry/sdk-metrics": "^1.28.0",
"@opentelemetry/semantic-conventions": "^1.27.0",
"@peculiar/asn1-schema": "^2.3.8",
"@peculiar/x509": "^1.12.1",
"@serdnam/pino-cloudwatch-transport": "^1.0.4",

View File

@@ -157,6 +157,15 @@ const envSchema = z
INFISICAL_CLOUD: zodStrBool.default("false"),
MAINTENANCE_MODE: zodStrBool.default("false"),
CAPTCHA_SECRET: zpStr(z.string().optional()),
// TELEMETRY
OTEL_TELEMETRY_COLLECTION_ENABLED: zodStrBool.default("false"),
OTEL_EXPORT_OTLP_ENDPOINT: zpStr(z.string().optional()),
OTEL_OTLP_PUSH_INTERVAL: z.coerce.number().default(30000),
OTEL_COLLECTOR_BASIC_AUTH_USERNAME: zpStr(z.string().optional()),
OTEL_COLLECTOR_BASIC_AUTH_PASSWORD: zpStr(z.string().optional()),
OTEL_EXPORT_TYPE: z.enum(["prometheus", "otlp"]).optional(),
PLAIN_API_KEY: zpStr(z.string().optional()),
PLAIN_WISH_LABEL_IDS: zpStr(z.string().optional()),
DISABLE_AUDIT_LOG_GENERATION: zodStrBool.default("false"),
@@ -203,11 +212,11 @@ let envCfg: Readonly<z.infer<typeof envSchema>>;
export const getConfig = () => envCfg;
// cannot import singleton logger directly as it needs config to load various transport
export const initEnvConfig = (logger: Logger) => {
export const initEnvConfig = (logger?: Logger) => {
const parsedEnv = envSchema.safeParse(process.env);
if (!parsedEnv.success) {
logger.error("Invalid environment variables. Check the error below");
logger.error(parsedEnv.error.issues);
(logger ?? console).error("Invalid environment variables. Check the error below");
(logger ?? console).error(parsedEnv.error.issues);
process.exit(-1);
}

View File

@@ -0,0 +1,91 @@
import opentelemetry, { diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-proto";
import { PrometheusExporter } from "@opentelemetry/exporter-prometheus";
import { registerInstrumentations } from "@opentelemetry/instrumentation";
import { Resource } from "@opentelemetry/resources";
import { AggregationTemporality, MeterProvider, PeriodicExportingMetricReader } from "@opentelemetry/sdk-metrics";
import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from "@opentelemetry/semantic-conventions";
import dotenv from "dotenv";
import { initEnvConfig } from "../config/env";
dotenv.config();
const initTelemetryInstrumentation = ({
exportType,
otlpURL,
otlpUser,
otlpPassword,
otlpPushInterval
}: {
exportType?: string;
otlpURL?: string;
otlpUser?: string;
otlpPassword?: string;
otlpPushInterval?: number;
}) => {
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG);
const resource = Resource.default().merge(
new Resource({
[ATTR_SERVICE_NAME]: "infisical-core",
[ATTR_SERVICE_VERSION]: "0.1.0"
})
);
const metricReaders = [];
switch (exportType) {
case "prometheus": {
const promExporter = new PrometheusExporter();
metricReaders.push(promExporter);
break;
}
case "otlp": {
const otlpExporter = new OTLPMetricExporter({
url: `${otlpURL}/v1/metrics`,
headers: {
Authorization: `Basic ${btoa(`${otlpUser}:${otlpPassword}`)}`
},
temporalityPreference: AggregationTemporality.DELTA
});
metricReaders.push(
new PeriodicExportingMetricReader({
exporter: otlpExporter,
exportIntervalMillis: otlpPushInterval
})
);
break;
}
default:
throw new Error("Invalid OTEL export type");
}
const meterProvider = new MeterProvider({
resource,
readers: metricReaders
});
opentelemetry.metrics.setGlobalMeterProvider(meterProvider);
registerInstrumentations({
instrumentations: [getNodeAutoInstrumentations()]
});
};
const setupTelemetry = () => {
const appCfg = initEnvConfig();
if (appCfg.OTEL_TELEMETRY_COLLECTION_ENABLED) {
console.log("Initializing telemetry instrumentation");
initTelemetryInstrumentation({
otlpURL: appCfg.OTEL_EXPORT_OTLP_ENDPOINT,
otlpUser: appCfg.OTEL_COLLECTOR_BASIC_AUTH_USERNAME,
otlpPassword: appCfg.OTEL_COLLECTOR_BASIC_AUTH_PASSWORD,
otlpPushInterval: appCfg.OTEL_OTLP_PUSH_INTERVAL,
exportType: appCfg.OTEL_EXPORT_TYPE
});
}
};
void setupTelemetry();

View File

@@ -1,3 +1,5 @@
import "./lib/telemetry/instrumentation";
import dotenv from "dotenv";
import path from "path";
@@ -18,6 +20,7 @@ dotenv.config();
const run = async () => {
const logger = await initLogger();
const appCfg = initEnvConfig(logger);
const db = initDbConnection({
dbConnectionUri: appCfg.DB_CONNECTION_URI,
dbRootCert: appCfg.DB_ROOT_CERT,

View File

@@ -22,6 +22,7 @@ import { TSmtpService } from "@app/services/smtp/smtp-service";
import { globalRateLimiterCfg } from "./config/rateLimiter";
import { addErrorsToResponseSchemas } from "./plugins/add-errors-to-response-schemas";
import { apiMetrics } from "./plugins/api-metrics";
import { fastifyErrHandler } from "./plugins/error-handler";
import { registerExternalNextjs } from "./plugins/external-nextjs";
import { serializerCompiler, validatorCompiler, ZodTypeProvider } from "./plugins/fastify-zod";
@@ -86,6 +87,10 @@ export const main = async ({ db, hsmModule, auditLogDb, smtp, logger, queue, key
// pull ip based on various proxy headers
await server.register(fastifyIp);
if (appCfg.OTEL_TELEMETRY_COLLECTION_ENABLED) {
await server.register(apiMetrics);
}
await server.register(fastifySwagger);
await server.register(fastifyFormBody);
await server.register(fastifyErrHandler);

View File

@@ -0,0 +1,21 @@
import opentelemetry from "@opentelemetry/api";
import fp from "fastify-plugin";
export const apiMetrics = fp(async (fastify) => {
const apiMeter = opentelemetry.metrics.getMeter("API");
const latencyHistogram = apiMeter.createHistogram("API_latency", {
unit: "ms"
});
fastify.addHook("onResponse", async (request, reply) => {
const { method } = request;
const route = request.routerPath;
const { statusCode } = reply;
latencyHistogram.record(reply.elapsedTime, {
route,
method,
statusCode
});
});
});

View File

@@ -86,6 +86,7 @@ services:
- .env
ports:
- 4000:4000
- 9464:9464 # for OTEL collection of Prometheus metrics
environment:
- NODE_ENV=development
- DB_CONNECTION_URI=postgres://infisical:infisical@db/infisical?sslmode=disable
@@ -95,6 +96,42 @@ services:
extra_hosts:
- "host.docker.internal:host-gateway"
prometheus:
image: prom/prometheus
volumes:
- ./prometheus.dev.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"
profiles: [metrics]
otel-collector:
image: otel/opentelemetry-collector-contrib
volumes:
- ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml
ports:
- 1888:1888 # pprof extension
- 8888:8888 # Prometheus metrics exposed by the Collector
- 8889:8889 # Prometheus exporter metrics
- 13133:13133 # health_check extension
- 4317:4317 # OTLP gRPC receiver
- 4318:4318 # OTLP http receiver
- 55679:55679 # zpages extension
profiles: [metrics-otel]
grafana:
image: grafana/grafana
container_name: grafana
restart: unless-stopped
environment:
- GF_LOG_LEVEL=debug
ports:
- "3005:3000"
volumes:
- "grafana_storage:/var/lib/grafana"
profiles: [metrics]
frontend:
container_name: infisical-dev-frontend
restart: unless-stopped
@@ -166,3 +203,4 @@ volumes:
driver: local
ldap_data:
ldap_config:
grafana_storage:

View File

@@ -3,6 +3,3 @@ title: "Bulk Create"
openapi: "POST /api/v3/secrets/batch/raw"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -3,6 +3,3 @@ title: "Create"
openapi: "POST /api/v3/secrets/raw/{secretName}"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -3,6 +3,3 @@ title: "Bulk Delete"
openapi: "DELETE /api/v3/secrets/batch/raw"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -3,6 +3,3 @@ title: "Delete"
openapi: "DELETE /api/v3/secrets/raw/{secretName}"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -2,7 +2,3 @@
title: "List"
openapi: "GET /api/v3/secrets/raw"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -3,6 +3,3 @@ title: "Retrieve"
openapi: "GET /api/v3/secrets/raw/{secretName}"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -3,6 +3,3 @@ title: "Bulk Update"
openapi: "PATCH /api/v3/secrets/batch/raw"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -2,7 +2,3 @@
title: "Update"
openapi: "PATCH /api/v3/secrets/raw/{secretName}"
---
<Tip>
This endpoint requires you to disable end-to-end encryption. For more information, you should consult this [note](https://infisical.com/docs/api-reference/overview/examples/note).
</Tip>

View File

@@ -30,7 +30,8 @@ Used to configure platform-specific security and operational settings
</ParamField>
<ParamField query="TELEMETRY_ENABLED" type="string" default="true" optional>
Telemetry helps us improve Infisical but if you want to dsiable it you may set this to `false`.
Telemetry helps us improve Infisical but if you want to disable it you may set
this to `false`.
</ParamField>
## Data Layer
@@ -65,8 +66,9 @@ DB_READ_REPLICAS=[{"DB_CONNECTION_URI":""}]
Use the command below to encode your certificate:
`echo "<certificate>" | base64`
If not provided it will use master SSL certificate.
If not provided it will use master SSL certificate.
</ParamField>
</Expandable>
</ParamField>
@@ -350,8 +352,9 @@ Optional (for TLS/SSL):
TLS: Available on the same ports (2525, 80, 25, 8025, or 587)
SSL: Available on ports 465, 8465, and 443
</Note>
</Accordion>
</Accordion>
## Authentication
@@ -518,3 +521,36 @@ To help you sync secrets from Infisical to services such as Github and Gitlab, I
OAuth2 client secret for Gitlab integration
</ParamField>
</Accordion>
## Observability
You can configure Infisical to collect and expose telemetry data for analytics and monitoring.
<ParamField
query="OTEL_TELEMETRY_COLLECTION_ENABLED"
type="string"
default="false"
>
Whether or not to collect and expose telemetry data.
</ParamField>
<ParamField query="OTEL_EXPORT_TYPE" type="enum" optional>
Supported types are `prometheus` and `otlp`.
If export type is set to `prometheus`, metric data will be exposed in port 9464 in the `/metrics` path.
If export type is set to `otlp`, you will have to configure a value for `OTEL_EXPORT_OTLP_ENDPOINT`.
</ParamField>
<ParamField query="OTEL_EXPORT_OTLP_ENDPOINT" type="string">
Where telemetry data would be pushed to for collection. This is only
applicable when `OTEL_EXPORT_TYPE` is set to `otlp`.
</ParamField>
<ParamField query="OTEL_COLLECTOR_BASIC_AUTH_USERNAME" type="string">
The username for authenticating with the telemetry collector.
</ParamField>
<ParamField query="OTEL_COLLECTOR_BASIC_AUTH_PASSWORD" type="string">
The password for authenticating with the telemetry collector.
</ParamField>

View File

@@ -0,0 +1,45 @@
extensions:
health_check:
pprof:
zpages:
basicauth/server:
htpasswd:
inline: |
infisical:infisical
receivers:
otlp:
protocols:
http:
endpoint: 0.0.0.0:4318
auth:
authenticator: basicauth/server
prometheus:
config:
scrape_configs:
- job_name: otel-collector
scrape_interval: 30s
static_configs:
- targets: [backend:9464]
metric_relabel_configs:
- action: labeldrop
regex: "service_instance_id|service_name"
processors:
batch:
exporters:
prometheus:
endpoint: "0.0.0.0:8889"
auth:
authenticator: basicauth/server
resource_to_telemetry_conversion:
enabled: true
service:
extensions: [basicauth/server, health_check, pprof, zpages]
pipelines:
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus]

5
prometheus.dev.yml Normal file
View File

@@ -0,0 +1,5 @@
scrape_configs:
- job_name: "metric-collector"
scrape_interval: 30s
static_configs:
- targets: ["backend:9464"]