# documentation: https://posthog.com # slogan: The single platform to analyze, test, observe, and deploy new features # tags: analytics, product, open-source, self-hosted, ab-testing, event-tracking # logo: svgs/posthog.svg # minversion: 4.0.0-beta.222 services: db: image: postgres:12-alpine volumes: - posthog-postgres-data:/var/lib/postgresql/data environment: - POSTGRES_USER=posthog - POSTGRES_DB=posthog - POSTGRES_PASSWORD=$SERVICE_PASSWORD_POSTGRES healthcheck: test: ["CMD-SHELL", "pg_isready -U posthog"] interval: 2s timeout: 10s retries: 15 redis: image: redis:6.2.7-alpine command: redis-server --maxmemory-policy allkeys-lru --maxmemory 200mb clickhouse: image: clickhouse/clickhouse-server:23.11.2.11-alpine volumes: - type: bind source: ./idl/events_dead_letter_queue.json target: /idl/events_dead_letter_queue.json content: | { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "file://posthog/idl/events_dead_letter_queue.json", "title": "events_dead_letter_queue", "description": "Events that failed to be validated or processed and are sent to the DLQ", "type": "object", "properties": { "id": { "description": "uuid for the submission", "type": "string" }, "event_uuid": { "description": "uuid for the event", "type": "string" }, "event": { "description": "event type", "type": "string" }, "properties": { "description": "String representation of the properties json object", "type": "string" }, "distinct_id": { "description": "PostHog distinct_id", "type": "string" }, "team_id": { "description": "team_id (maps to the project under the organization)", "type": "number" }, "elements_chain": { "description": "Used for autocapture. DOM element hierarchy", "type": "string" }, "created_at": { "description": "Used for autocapture. DOM element hierarchy", "type": "number" }, "ip": { "description": "IP Address of the associated with the event", "type": "string" }, "site_url": { "description": "Site URL associated with the event the event", "type": "string" }, "now": { "description": "Timestamp of the DLQ event", "type": "number" }, "raw_payload": { "description": "Raw payload of the event that failed to be consumed", "type": "string" }, "error_timestamp": { "description": "Timestamp that the error of ingestion occurred", "type": "number" }, "error_location": { "description": "Source of error if known", "type": "string" }, "error": { "description": "Error if known", "type": "string" }, "tags": { "description": "Tags associated with the error or event", "type": "array", "items": { "type": "string" } } }, "required": ["raw_payload"] } - type: bind source: ./idl/events_json.json target: /idl/events_json.json content: | { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "file://posthog/idl/events_json.json", "title": "events_json", "description": "Event schema that is destined for ClickHouse", "type": "object", "properties": { "uuid": { "description": "uuid for the event", "type": "string" }, "event": { "description": "event type", "type": "string" }, "properties": { "description": "String representation of the properties json object", "type": "string" }, "timestamp": { "description": "Timestamp that the event occurred", "type": "number" }, "team_id": { "description": "team_id (maps to the project under the organization)", "type": "number" }, "distinct_id": { "description": "PostHog distinct_id", "type": "string" }, "elements_chain": { "description": "Used for autocapture. DOM element hierarchy", "type": "string" }, "created_at": { "description": "Timestamp when event was created", "type": "number" }, "person_id": { "description": "UUID for the associated person if available", "type": "string" }, "person_created_at": { "description": "Timestamp for when the associated person was created", "type": "number" }, "person_properties": { "description": "String representation of the person JSON object", "type": "string" }, "group0_properties": { "description": "String representation of a group's properties", "type": "string" }, "group1_properties": { "description": "String representation of a group's properties", "type": "string" }, "group2_properties": { "description": "String representation of a group's properties", "type": "string" }, "group3_properties": { "description": "String representation of a group's properties", "type": "string" }, "group4_properties": { "description": "String representation of a group's properties", "type": "string" }, "group0_created_at": { "description": "Group's creation timestamp", "type": "number" }, "group1_created_at": { "description": "Group's creation timestamp", "type": "number" }, "group2_created_at": { "description": "Group's creation timestamp", "type": "number" }, "group3_created_at": { "description": "Group's creation timestamp", "type": "number" }, "group4_created_at": { "description": "Group's creation timestamp", "type": "number" } }, "required": ["uuid", "event", "properties", "timestamp", "team_id"] } - type: bind source: ./idl/groups.json target: /idl/groups.json content: | { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "file://posthog/idl/groups.json", "title": "groups", "description": "Groups schema that is destined for ClickHouse", "type": "object", "properties": { "group_type_index": { "description": "Group type index", "type": "number" }, "group_key": { "description": "Group Key", "type": "string" }, "created_at": { "description": "Group creation timestamp", "type": "number" }, "team_id": { "description": "Team ID associated with group", "type": "number" }, "group_properties": { "description": "String representation of group JSON properties object", "type": "string" } }, "required": ["group_type_index", "group_key", "created_at", "team_id", "group_properties"] } - type: bind source: ./idl/idl.md target: /idl/idl.md content: | # IDL - Interface Definition Language This directory is responsible for defining the schemas of the data between services. Primarily this will be between services and ClickHouse, but can be really any thing at the boundry of services. The reason why we do this is because it makes generating code, validating data, and understanding the system a whole lot easier. We've had a few customers request this of us for engineering a deeper integration with us. - type: bind source: ./idl/person.json target: /idl/person.json content: | { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "file://posthog/idl/person.json", "title": "person", "description": "Person schema that is destined for ClickHouse", "type": "object", "properties": { "id": { "description": "UUID for the person", "type": "string" }, "created_at": { "description": "Person creation timestamp", "type": "number" }, "team_id": { "description": "Team ID associated with person", "type": "number" }, "properties": { "description": "String representation of person JSON properties object", "type": "string" }, "is_identified": { "description": "Boolean is the person identified?", "type": "boolean" }, "is_deleted": { "description": "Boolean is the person deleted?", "type": "boolean" }, "version": { "description": "Version field for collapsing later (psuedo-tombstone)", "type": "number" } }, "required": ["id", "created_at", "team_id", "properties", "is_identified", "is_deleted", "version"] } - type: bind source: ./idl/person_distinct_id.json target: /idl/person_distinct_id.json content: | { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "file://posthog/idl/person_distinct_id.json", "title": "person_distinct_id", "description": "Person distinct id schema that is destined for ClickHouse", "type": "object", "properties": { "distinct_id": { "description": "User provided ID for the distinct user", "type": "string" }, "person_id": { "description": "UUID of the person", "type": "string" }, "team_id": { "description": "Team ID associated with person_distinct_id", "type": "number" }, "_sign": { "description": "Used for collapsing later different versions of a distinct id (psuedo-tombstone)", "type": "number" }, "is_deleted": { "description": "Boolean is the person distinct_id deleted?", "type": "boolean" } }, "required": ["distinct_id", "person_id", "team_id", "_sign", "is_deleted"] } - type: bind source: ./idl/person_distinct_id2.json target: /idl/person_distinct_id2.json content: | { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "file://posthog/idl/person_distinct_id2.json", "title": "person_distinct_id2", "description": "Person distinct id2 schema that is destined for ClickHouse", "type": "object", "properties": { "distinct_id": { "description": "User provided ID for the distinct user", "type": "string" }, "person_id": { "description": "UUID of the person", "type": "string" }, "team_id": { "description": "Team ID associated with person_distinct_id", "type": "number" }, "version": { "description": "Used for collapsing later different versions of a distinct id (psuedo-tombstone)", "type": "number" }, "is_deleted": { "description": "Boolean is the person distinct_id deleted?", "type": "boolean" } }, "required": ["distinct_id", "person_id", "team_id", "version", "is_deleted"] } - type: bind source: ./idl/plugin_log_entries.json target: /idl/plugin_log_entries.json content: | { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "file://posthog/idl/plugin_log_entries.json", "title": "plugin_log_entries", "description": "Plugin log entries that are destined for ClickHouse", "type": "object", "properties": { "id": { "description": "UUID for the log entry", "type": "string" }, "team_id": { "description": "Team ID associated with person_distinct_id", "type": "number" }, "plugin_id": { "description": "Plugin ID associated with the log entry", "type": "number" }, "plugin_config_id": { "description": "Plugin Config ID associated with the log entry", "type": "number" }, "timestamp": { "description": "Timestamp for when the log entry was created", "type": "number" }, "source": { "description": "Source of the log entry", "type": "string" }, "type": { "description": "Log entry type", "type": "string" }, "message": { "description": "Log entry body", "type": "string" }, "instance_id": { "description": "UUID of the instance that generated the log entry", "type": "string" } }, "required": [ "id", "team_id", "plugin_id", "plugin_config_id", "timestamp", "source", "type", "message", "instance_id" ] } - type: bind source: ./docker/clickhouse/docker-entrypoint-initdb.d/init-db.sh target: /docker-entrypoint-initdb.d/init-db.sh content: | #!/bin/bash set -e cp -r /idl/* /var/lib/clickhouse/format_schemas/ - type: bind source: ./docker/clickhouse/config.xml target: /etc/clickhouse-server/config.xml content: | trace /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log 1000M 10 8123 9000 9004 9005 8443 9440 9009 4096 3 false /path/to/ssl_cert_file /path/to/ssl_key_file false /path/to/ssl_ca_cert_file none 0 -1 -1 false /etc/clickhouse-server/server.crt /etc/clickhouse-server/server.key /etc/clickhouse-server/dhparam.pem none true true sslv2,sslv3 true true true sslv2,sslv3 true RejectCertificateHandler 100 0 10000 0.9 4194304 0 8589934592 5368709120 1000 134217728 10000 /var/lib/clickhouse/ /var/lib/clickhouse/tmp/ /var/lib/clickhouse/user_files/ users.xml /var/lib/clickhouse/access/ default default true false ' | sed -e 's|.*>\(.*\)<.*|\1|') wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb clickhouse-jdbc-bridge & * [CentOS/RHEL] export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm clickhouse-jdbc-bridge & Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. ]]> localhost 9000 .* zookeeper 2181 01 ch1 3600 3600 60 system query_log
toYYYYMM(event_date) 7500
system trace_log
toYYYYMM(event_date) 7500
system query_thread_log
toYYYYMM(event_date) 7500
system query_views_log
toYYYYMM(event_date) 7500
system part_log
toYYYYMM(event_date) 7500
system metric_log
7500 1000
system asynchronous_metric_log
7000
engine MergeTree partition by toYYYYMM(finish_date) order by (finish_date, finish_time_us, trace_id) system opentelemetry_span_log
7500
system crash_log
1000
system session_log
toYYYYMM(event_date) 7500
*_dictionary.xml *_function.xml /clickhouse/task_queue/ddl click_cost any 0 3600 86400 60 max 0 60 3600 300 86400 3600 /var/lib/clickhouse/format_schemas/ hide encrypt/decrypt arguments ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) \1(???) false false https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277
- type: bind source: ./docker/clickhouse/users.xml target: /etc/clickhouse-server/users.xml content: | 10000000000 random 1 1 ::/0 default default 3600 0 0 0 0 0 - clickhouse-data:/var/lib/clickhouse depends_on: - kafka - zookeeper zookeeper: image: zookeeper:3.7.0 volumes: - zookeeper-datalog:/datalog - zookeeper-data:/data - zookeeper-logs:/logs kafka: image: ghcr.io/posthog/kafka-container:v2.8.2 depends_on: - zookeeper environment: - KAFKA_BROKER_ID=1001 - KAFKA_CFG_RESERVED_BROKER_MAX_ID=1001 - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092 - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092 - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181 - ALLOW_PLAINTEXT_LISTENER=yes object_storage: image: minio/minio:RELEASE.2022-06-25T15-50-16Z environment: - MINIO_ROOT_USER=$SERVICE_USER_MINIO - MINIO_ROOT_PASSWORD=$SERVICE_PASSWORD_MINIO entrypoint: sh command: -c 'mkdir -p /data/posthog && minio server --address ":19000" --console-address ":19001" /data' volumes: - object_storage:/data maildev: image: maildev/maildev:2.0.5 flower: image: mher/flower:2.0.0 environment: FLOWER_PORT: 5555 CELERY_BROKER_URL: redis://redis:6379 web: image: posthog/posthog:latest command: /compose/start volumes: - type: bind source: ./compose/start target: /compose/start content: | #!/bin/bash /compose/wait ./bin/migrate ./bin/docker-server - type: bind source: ./compose/wait target: /compose/wait content: | #!/usr/bin/env python3 import socket import time def loop(): print("Waiting for ClickHouse and Postgres to be ready") try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect(('clickhouse', 9000)) print("Clickhouse is ready") with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect(('db', 5432)) print("Postgres is ready") except ConnectionRefusedError as e: time.sleep(5) loop() loop() environment: - SERVICE_FQDN_WEB_8000 - OPT_OUT_CAPTURING=true - DISABLE_SECURE_SSL_REDIRECT=true - IS_BEHIND_PROXY=true - TRUST_ALL_PROXIES=true - DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog - CLICKHOUSE_HOST=clickhouse - CLICKHOUSE_DATABASE=posthog - CLICKHOUSE_SECURE=false - CLICKHOUSE_VERIFY=false - KAFKA_HOSTS=kafka - REDIS_URL=redis://redis:6379/ - PGHOST=db - PGUSER=posthog - PGPASSWORD=$SERVICE_PASSWORD_POSTGRES - DEPLOYMENT=hobby - SITE_URL=$SERVICE_FQDN_WEB - SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY depends_on: - db - redis - clickhouse - kafka - object_storage worker: image: posthog/posthog:latest command: ./bin/docker-worker-celery --with-scheduler environment: - OPT_OUT_CAPTURING=true - DISABLE_SECURE_SSL_REDIRECT=true - IS_BEHIND_PROXY=true - TRUST_ALL_PROXIES=true - DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog - CLICKHOUSE_HOST=clickhouse - CLICKHOUSE_DATABASE=posthog - CLICKHOUSE_SECURE=false - CLICKHOUSE_VERIFY=false - KAFKA_HOSTS=kafka - REDIS_URL=redis://redis:6379/ - PGHOST=db - PGUSER=posthog - PGPASSWORD=$SERVICE_PASSWORD_POSTGRES - DEPLOYMENT=hobby - SITE_URL=$SERVICE_FQDN_WEB - SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY depends_on: - db - redis - clickhouse - kafka - object_storage # capture: # image: ghcr.io/posthog/capture:main # environment: # ADDRESS: "0.0.0.0:3000" # KAFKA_TOPIC: "events_plugin_ingestion" # KAFKA_HOSTS: "kafka:9092" # REDIS_URL: "redis://redis:6379/" # depends_on: # - redis # - kafka plugins: image: posthog/posthog:latest command: ./bin/plugin-server --no-restart-loop environment: - DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog - KAFKA_HOSTS=kafka:9092 - REDIS_URL=redis://redis:6379/ - CLICKHOUSE_HOST=clickhouse - CLICKHOUSE_DATABASE=posthog - CLICKHOUSE_SECURE=false - CLICKHOUSE_VERIFY=false - SITE_URL=$SERVICE_FQDN_WEB - SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY depends_on: - db - redis - clickhouse - kafka - object_storage # migrate: # image: posthog/posthog:latest # restart: "no" # command: sh -c "python manage.py migrate && python manage.py migrate_clickhouse && python manage.py run_async_migrations" # environment: # - DISABLE_SECURE_SSL_REDIRECT=true # - IS_BEHIND_PROXY=true # - TRUST_ALL_PROXIES=true # - DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog # - CLICKHOUSE_HOST=clickhouse # - CLICKHOUSE_DATABASE=posthog # - CLICKHOUSE_SECURE=false # - CLICKHOUSE_VERIFY=false # - KAFKA_HOSTS=kafka # - REDIS_URL=redis://redis:6379/ # - PGHOST=db # - PGUSER=posthog # - PGPASSWORD=$SERVICE_PASSWORD_POSTGRES # - DEPLOYMENT=hobby # - SITE_URL=$SERVICE_FQDN_WEB # - SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY # depends_on: # - db # - redis # - clickhouse # - kafka # - object_storage # Temporal containers elasticsearch: image: elasticsearch:7.16.2 environment: - cluster.routing.allocation.disk.threshold_enabled=true - cluster.routing.allocation.disk.watermark.low=512mb - cluster.routing.allocation.disk.watermark.high=256mb - cluster.routing.allocation.disk.watermark.flood_stage=128mb - discovery.type=single-node - ES_JAVA_OPTS=-Xms256m -Xmx256m - xpack.security.enabled=false volumes: - elasticsearch-data:/var/lib/elasticsearch/data temporal: image: temporalio/auto-setup:1.20.0 environment: - DB=postgresql - DB_PORT=5432 - POSTGRES_USER=posthog - POSTGRES_PWD=$SERVICE_PASSWORD_POSTGRES - POSTGRES_SEEDS=db - DYNAMIC_CONFIG_FILE_PATH=config/dynamicconfig/development-sql.yaml - ENABLE_ES=true - ES_SEEDS=elasticsearch - ES_VERSION=v7 - ENABLE_ES=false depends_on: db: condition: service_healthy volumes: - type: bind source: ./docker/temporal/dynamicconfig/development-sql.yaml target: /etc/temporal/config/dynamicconfig/development-sql.yaml content: | limit.maxIDLength: - value: 255 constraints: {} system.forceSearchAttributesCacheRefreshOnRead: - value: false constraints: {} temporal-admin-tools: image: temporalio/admin-tools:1.20.0 depends_on: - temporal environment: - TEMPORAL_CLI_ADDRESS=temporal:7233 stdin_open: true tty: true temporal-ui: image: temporalio/ui:2.10.3 depends_on: - temporal environment: - TEMPORAL_ADDRESS=temporal:7233 - TEMPORAL_CORS_ORIGINS=http://localhost:3000 temporal-django-worker: image: posthog/posthog:latest command: ./bin/temporal-django-worker environment: - DISABLE_SECURE_SSL_REDIRECT=true - IS_BEHIND_PROXY=true - TRUST_ALL_PROXIES=true - DATABASE_URL=postgres://posthog:$SERVICE_PASSWORD_POSTGRES@db:5432/posthog - CLICKHOUSE_HOST=clickhouse - CLICKHOUSE_DATABASE=posthog - CLICKHOUSE_SECURE=false - CLICKHOUSE_VERIFY=false - KAFKA_HOSTS=kafka - REDIS_URL=redis://redis:6379/ - PGHOST=db - PGUSER=posthog - PGPASSWORD=$SERVICE_PASSWORD_POSTGRES - DEPLOYMENT=hobby - SITE_URL=$SERVICE_FQDN_WEB - SECRET_KEY=$SERVICE_BASE64_64_SECRETKEY - TEMPORAL_HOST=temporal depends_on: - db - redis - clickhouse - kafka - object_storage - temporal