diff --git a/.gitignore b/.gitignore index b6981d909..2564a986a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ oci* okteto.yaml .vscode/launch.json pyrightconfig.json +manifests/fly/gateway/config.json +manifests/fly/state/ \ No newline at end of file diff --git a/manifests/fly/setup.py b/manifests/fly/setup.py new file mode 100644 index 000000000..3c71d6994 --- /dev/null +++ b/manifests/fly/setup.py @@ -0,0 +1,347 @@ +import json +import os +import secrets +import shutil +import subprocess +from dataclasses import dataclass +from uuid import uuid4 + + +@dataclass +class PostgresConfig: + app_name: str + password: str + user: str + db: str + + +@dataclass +class RedisConfig: + app_name: str + password: str + + +@dataclass +class StorageConfig: + storage_name: str + access_key: str + secret_key: str + endpoint: str + bucket: str + + +@dataclass +class TailscaleConfig: + host: str + auth_token: str + + +def generate_name(prefix): + return f"{prefix}-{uuid4()}" + + +def generate_password(): + return secrets.token_urlsafe(24) + + +def setup_postgres() -> PostgresConfig: + app_name = generate_name("control-plane-postgres") + postgres_password = generate_password() + postgres_user = "postgres" + postgres_db = "main" + + subprocess.run( + [ + "fly", + "launch", + "--no-deploy", + "--copy-config", + "--name", + app_name, + "-y", + ], + cwd="state/postgres", + env=os.environ, + ) + subprocess.run( + [ + "fly", + "secrets", + "set", + f"POSTGRES_PASSWORD={postgres_password}", + f"POSTGRES_USER={postgres_user}", + f"POSTGRES_DB={postgres_db}", + f"SU_PASSWORD={postgres_password}", + f"OPERATOR_PASSWORD={postgres_password}", + f"REPL_PASSWORD={postgres_password}", + ], + cwd="state/postgres", + env=os.environ, + ) + subprocess.run( + ["fly", "ip", "allocate-v4", "-y"], + cwd="state/postgres", + env=os.environ, + ) + subprocess.run( + ["fly", "ip", "allocate-v6"], + cwd="state/postgres", + env=os.environ, + ) + subprocess.run( + ["fly", "deploy", "-y"], + cwd="state/postgres", + env=os.environ, + ) + + return PostgresConfig(app_name, postgres_password, postgres_user, postgres_db) + + +def setup_redis(name) -> RedisConfig: + app_name = generate_name(f"redis-{name}") + redis_password = generate_password() + + subprocess.run( + [ + "fly", + "launch", + "--no-deploy", + "--copy-config", + "--name", + app_name, + "-y", + ], + cwd="state/redis-" + name, + env=os.environ, + ) + subprocess.run( + [ + "fly", + "secrets", + "set", + f"REDIS_PASSWORD={redis_password}", + ], + cwd="state/redis-" + name, + env=os.environ, + ) + subprocess.run( + ["fly", "ip", "allocate-v4", "-y"], + cwd="state/redis-" + name, + env=os.environ, + ) + subprocess.run( + ["fly", "ip", "allocate-v6"], + cwd="state/redis-" + name, + env=os.environ, + ) + subprocess.run( + ["fly", "deploy", "-y"], + cwd="state/redis-" + name, + env=os.environ, + ) + + return RedisConfig(app_name, redis_password) + + +def setup_storage(name) -> StorageConfig: + storage_name = generate_name(f"storage-{name}") + access_key = secret_key = endpoint = bucket = None + + res = subprocess.run( + ["fly", "storage", "create", "-n", storage_name, "-y", "-o", "personal"], + env=os.environ, + text=True, + check=True, + capture_output=True, + ) + + res_lines = res.stdout.splitlines() + + for line in res_lines: + if "AWS_ACCESS_KEY_ID" in line: + access_key = line.split(":")[1].strip() + if "AWS_SECRET_ACCESS_KEY" in line: + secret_key = line.split(":")[1].strip() + if "AWS_ENDPOINT_URL_S3" in line: + endpoint = line.split(":")[1].strip() + if "BUCKET_NAME" in line: + bucket = line.split(":")[1].strip() + + return StorageConfig(storage_name, access_key, secret_key, endpoint, bucket) + + +def generate_config_file( + gateway_app_name: str, + tailscale_config: TailscaleConfig, + postgres_config: PostgresConfig, + cp_redis_config: RedisConfig, + bc_redis_config: RedisConfig, + juicefs_redis_config: RedisConfig, + control_plane_storage_config: StorageConfig, + images_storage_config: StorageConfig, +): + with open("./state/gateway/config.tpl.json", "r") as f: + config = json.load(f) + postgres = config["database"]["postgres"] + postgres["host"] = f"{postgres_config.app_name}.fly.dev" + postgres["password"] = postgres_config.password + postgres["username"] = postgres_config.user + postgres["name"] = postgres_config.db + + cp_redis = config["database"]["redis"] + cp_redis["addrs"] = [f"{cp_redis_config.app_name}.fly.dev:6379"] + cp_redis["password"] = cp_redis_config.password + + cp_storage = config["storage"]["juicefs"] + cp_storage["redis_uri"] = ( + f"redis://:{juicefs_redis_config.password}@{juicefs_redis_config.app_name}.fly.dev:6379" + ) + cp_storage["aws_s3_bucket"] = ( + f"https://fly.storage.tigris.dev/{control_plane_storage_config.bucket}" + ) + cp_storage["aws_access_key"] = control_plane_storage_config.access_key + cp_storage["aws_secret_key"] = control_plane_storage_config.secret_key + + gateway_service = config["gateway_service"] + gateway_service["external_host"] = f"{gateway_app_name}.fly.dev" + + image_service = config["image_service"] + image_service["registries"]["s3"]["bucket_name"] = images_storage_config.bucket + image_service["registries"]["s3"]["access_key"] = images_storage_config.access_key + image_service["registries"]["s3"]["secret_key"] = images_storage_config.secret_key + + tailscale = config["tailscale"] + tailscale["host_name"] = tailscale_config.host + tailscale["auth_key"] = tailscale_config.auth_token + + blobcache = config["blobcache"] + blobcache["tailscale"]["host_name"] = tailscale_config.host + blobcache["tailscale"]["auth_key"] = tailscale_config.auth_token + blobcache["metadata"]["redis_addr"] = f"{bc_redis_config.app_name}.fly.dev:6379" + blobcache["metadata"]["redis_passwd"] = bc_redis_config.password + + blobcache["blobfs"]["sources"] = [ + { + "mode": "juicefs", + "fs_name": "beta9-fs", + "fs_path": "/data", + "juicefs": { + "redis_uri": f"redis://:{juicefs_redis_config.password}@{juicefs_redis_config.app_name}.fly.dev:6379", + "bucket": f"https://fly.storage.tigris.dev/{control_plane_storage_config.bucket}", + "access_key": control_plane_storage_config.access_key, + "secret_key": control_plane_storage_config.secret_key, + "cache_size": 0, + "block_size": 16384, + "prefetch": 128, + "buffer_size": 300, + }, + }, + { + "mode": "mountpoint", + "fs_name": "beta9-images", + "fs_path": "/images", + "mountpoint": { + "bucket_name": images_storage_config.bucket, + "access_key": images_storage_config.access_key, + "secret_key": images_storage_config.secret_key, + "region": "auto", + "endpoint_url": "https://fly.storage.tigris.dev", + }, + }, + ] + + agent = config["agent"] + agent["control_plane_redis"]["hostname"] = f"{cp_redis_config.app_name}.fly.dev" + agent["juicefs_redis"]["hostname"] = f"{juicefs_redis_config.app_name}.fly.dev" + agent["blobcache_redis"]["hostname"] = f"{bc_redis_config.app_name}.fly.dev" + + return config + + +def setup_gateway( + postgres_config: PostgresConfig, + cp_redis_config: RedisConfig, + bc_redis_config: RedisConfig, + juicefs_redis_config: RedisConfig, + control_plane_storage_config: StorageConfig, + images_storage_config: StorageConfig, +): + tailscale_config = TailscaleConfig( + host=os.getenv("TAILSCALE_HOST", ""), + auth_token=os.getenv("TAILSCALE_AUTH_TOKEN", ""), + ) + + gateway_app_name = generate_name("control-plane-gateway") + + cfg = generate_config_file( + gateway_app_name, + tailscale_config, + postgres_config, + cp_redis_config, + bc_redis_config, + juicefs_redis_config, + control_plane_storage_config, + images_storage_config, + ) + + with open("./state/gateway/config.json", "w") as f: + json.dump(cfg, f, indent=2) + + subprocess.run( + [ + "fly", + "launch", + "--no-deploy", + "--copy-config", + "--name", + gateway_app_name, + "-y", + ], + cwd="state/gateway", + env=os.environ, + ) + + subprocess.run( + [ + "fly", + "secrets", + "set", + f"CONFIG_JSON={json.dumps(cfg)}", + ], + cwd="state/gateway", + env=os.environ, + ) + + subprocess.run( + ["fly", "ip", "allocate-v4", "-y"], + cwd="state/gateway", + env=os.environ, + ) + + subprocess.run( + ["fly", "ip", "allocate-v6"], + cwd="state/gateway", + env=os.environ, + ) + + subprocess.run( + ["fly", "deploy", "-y"], + cwd="state/gateway", + env=os.environ, + ) + + +if __name__ == "__main__": + shutil.rmtree("state/", ignore_errors=True) + shutil.copytree("template/", "state/", dirs_exist_ok=True) + + pg_cfg = setup_postgres() + cp_redis_cfg = setup_redis(name="control-plane") + bc_redis_cfg = setup_redis(name="blobcache") + juicefs_redis_cfg = setup_redis(name="juicefs") + cp_storage_cfg = setup_storage("control-plane") + images_storage_cfg = setup_storage("images") + + setup_gateway( + pg_cfg, cp_redis_cfg, bc_redis_cfg, juicefs_redis_cfg, cp_storage_cfg, images_storage_cfg + ) diff --git a/manifests/fly/template/gateway/config.tpl.json b/manifests/fly/template/gateway/config.tpl.json new file mode 100644 index 000000000..663029639 --- /dev/null +++ b/manifests/fly/template/gateway/config.tpl.json @@ -0,0 +1,263 @@ +{ + "cluster_name": "beta9", + "debug_mode": false, + "database": { + "postgres": { + "host": "", + "port": 5432, + "username": "", + "password": "", + "name": "", + "timezone": "UTC" + }, + "redis": { + "mode": "single", + "addrs": [], + "password": "", + "enable_tls": false, + "insecure_skip_verify": false, + "dial_timeout": "30s", + "max_retries": 3 + } + }, + "storage": { + "mode": "juicefs", + "filesystem_name": "beta9-fs", + "filesystem_path": "/data", + "object_path": "/data/objects", + "juicefs": { + "redis_uri": "", + "aws_s3_bucket": "", + "aws_access_key": "", + "aws_secret_key": "", + "block_size": 4096, + "cache_size": 0, + "prefetch": 1, + "buffer_size": 300 + } + }, + "gateway_service": { + "host": "beta9-gateway", + "external_url": "http://localhost:1994", + "external_host": "control-plane-gateway.fly.dev", + "invoke_url_type": "path", + "grpc": { + "port": 1993, + "external_port": 1993, + "max_recv_msg_size": 1024, + "max_send_msg_size": 1024 + }, + "http": { + "port": 1994, + "enable_pretty_logs": true, + "cors": { + "allow_origins": "*", + "allow_headers": "*", + "allow_methods": "*" + } + }, + "shutdown_timeout": "180s", + "stub_limits": { + "memory": 32768, + "max_replicas": 10 + } + }, + "image_service": { + "local_cache_enabled": false, + "registry_store": "s3", + "registry_credential_provider": "s3", + "build_container_pool_selector": "build", + "registries": { + "docker": { + "username": "beamcloud", + "password": null + }, + "s3": { + "bucket_name": "", + "region": "auto", + "access_key": "", + "secret_key": "", + "endpoint": "https://fly.storage.tigris.dev" + } + }, + "runner": { + "base_image_name": "beta9-runner", + "base_image_registry": "public.ecr.aws/n4e0e1y0", + "tags": { + "python3.8": "py38-latest", + "python3.9": "py39-latest", + "python3.10": "py310-latest", + "python3.11": "py311-latest", + "python3.12": "py312-latest", + "micromamba3.8": "micromamba3.8-latest", + "micromamba3.9": "micromamba3.9-latest", + "micromamba3.10": "micromamba3.10-latest", + "micromamba3.11": "micromamba3.11-latest", + "micromamba3.12": "micromamba3.12-latest" + } + } + }, + "worker": { + "pools": { + "build": { + "mode": "external", + "requires_pool_selector": true, + "provider": "generic", + "pool_sizing": { + "default_worker_cpu": "8000m", + "default_worker_gpu_type": "", + "default_worker_memory": "32Gi", + "min_free_cpu": "0m", + "min_free_gpu": 0, + "min_free_memory": "0" + } + }, + "a10g": { + "gpu_type": "A10G", + "mode": "external", + "provider": "generic", + "runtime": "nvidia", + "pool_sizing": { + "default_worker_cpu": "8000m", + "default_worker_gpu_type": "A10G", + "default_worker_memory": "32Gi", + "min_free_cpu": "0m", + "min_free_gpu": 0, + "min_free_memory": "0" + } + } + }, + "use_host_resolv_conf": false, + "host_network": true, + "use_gateway_service_hostname": false, + "image_tag": "latest", + "image_name": "beta9-worker", + "image_registry": "public.ecr.aws/n4e0e1y0", + "image_pull_secrets": [], + "namespace": "beta9", + "service_account_name": "default", + "image_pvc_name": "beta9-images", + "job_resources_enforced": true, + "runc_resources_enforced": false, + "default_worker_cpu_request": 2000, + "default_worker_memory_request": 1024, + "termination_grace_period": 30, + "add_worker_timeout": "10m", + "blob_cache_enabled": true + }, + "providers": { + "generic": { + "agent": {} + } + }, + "tailscale": { + "control_url": "", + "user": "", + "auth_key": "", + "enabled": true, + "debug": false, + "host_name": "" + }, + "proxy": {}, + "blobcache": { + "debug_mode": true, + "port": 2049, + "tls_enabled": false, + "rtt_threshold_milliseconds": 1, + "max_cache_pct": 60, + "page_size_bytes": 2000000, + "grpc_dial_timeout_s": 1, + "grpc_message_size_bytes": 10000000000, + "discovery_interval_s": 5, + "tailscale": { + "control_url": "", + "user": "", + "auth_key": "", + "enabled": true, + "debug": false, + "host_name": "", + "ephemeral": true, + "state_dir": "/tmp", + "dial_timeout_s": 1 + }, + "metadata": { + "redis_addr": "", + "redis_passwd": "", + "redis_tls_enabled": false + }, + "blobfs": { + "max_background_tasks": 12, + "enabled": false, + "mount_point": "/cache", + "sources": [ + { + "mode": "juicefs", + "fs_name": "beta9-fs", + "fs_path": "/data", + "juicefs": { + "redis_uri": "", + "bucket": "", + "access_key": "", + "secret_key": "", + "cache_size": 0, + "block_size": 16384, + "prefetch": 128, + "buffer_size": 300 + } + }, + { + "mode": "mountpoint", + "fs_name": "beta9-images", + "fs_path": "/images", + "mountpoint": { + "bucket_name": "", + "access_key": "", + "secret_key": "", + "region": "auto", + "endpoint_url": "https://fly.storage.tigris.dev" + } + } + ] + } + }, + "monitoring": { + "container_metrics_interval": "3s", + "metrics_collector": "openmeter", + "prometheus": { + "scrape_workers": true, + "port": 9090 + }, + "telemetry": { + "enabled": false, + "endpoint": "http://tempo.monitoring:4318", + "meter_interval": "3s", + "trace_interval": "3s", + "trace_sample_ratio": 1 + }, + "fluentbit": { + "events": { + "endpoint": "http://fluent-bit.monitoring:9880", + "max_conns": 0, + "max_idle_conns": 30, + "idle_conn_timeout": "10s", + "dial_timeout": "2s", + "keep_alive": "30s" + } + }, + "openmeter": { + "server_url": "http://some-metering-server:8080", + "api_key": "http://some-metering-server:8080" + } + }, + "agent": { + "control_plane_redis": { + "hostname": "" + }, + "blobcache_redis": { + "hostname": "" + }, + "juicefs_redis": { + "redisURI": "" + } + } +} diff --git a/manifests/fly/template/gateway/fly.toml b/manifests/fly/template/gateway/fly.toml new file mode 100644 index 000000000..b00c6d3cb --- /dev/null +++ b/manifests/fly/template/gateway/fly.toml @@ -0,0 +1,37 @@ +app = 'control-plane-gateway' +primary_region = 'lax' + +[build] + image = 'public.ecr.aws/n4e0e1y0/beta9-gateway:0.1.277' + +[processes] + gateway = '/usr/local/bin/gateway' + +[[services]] + internal_port = 1994 + protocol = 'tcp' + processes = ["gateway"] + + [[services.ports]] + handlers = ['http', 'tls'] + tls_options = { "alpn" = ["http/1.1"] } + port = 443 + +[[services]] + internal_port = 1993 + protocol = 'tcp' + processes = ["gateway"] + + [[services.ports]] + handlers = ['tls'] + tls_options = { "alpn" = ["h2"] } + port = 1993 + + [services.ports.http_options] + h2_backend = true + +[[vm]] + memory = '1gb' + cpu_kind = 'shared' + cpus = 1 + \ No newline at end of file diff --git a/manifests/fly/template/headscale/Dockerfile b/manifests/fly/template/headscale/Dockerfile new file mode 100644 index 000000000..edc958614 --- /dev/null +++ b/manifests/fly/template/headscale/Dockerfile @@ -0,0 +1,6 @@ +FROM headscale/headscale:latest + +WORKDIR /app + +COPY config.yaml /etc/headscale/config.yaml +COPY acl.json /etc/headscale/acl_policy.json \ No newline at end of file diff --git a/manifests/fly/template/headscale/acl.json b/manifests/fly/template/headscale/acl.json new file mode 100644 index 000000000..321a9da7d --- /dev/null +++ b/manifests/fly/template/headscale/acl.json @@ -0,0 +1,9 @@ +{ + "acls": [ + { + "action": "accept", + "src": ["*"], + "dst": ["*:*"] + } + ] +} diff --git a/manifests/fly/template/headscale/fly.toml b/manifests/fly/template/headscale/fly.toml new file mode 100644 index 000000000..c2d534df8 --- /dev/null +++ b/manifests/fly/template/headscale/fly.toml @@ -0,0 +1,37 @@ +# fly.toml app configuration file generated for beam-headscale-test on 2024-01-30T10:03:21-05:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'control-plane-headscale' +primary_region = 'ewr' +kill_signal = 'SIGINT' +kill_timeout = '10s' + +[build] + +[processes] + headscale = 'serve' + +[http_service] + internal_port = 8080 + force_https = true + auto_stop_machines = true + auto_start_machines = true + min_machines_running = 1 + processes = ['headscale'] + +[[services]] + protocol = 'tcp' + internal_port = 8080 + processes = ['headscale'] + +[[services.ports]] + port = 443 + handlers = ['tls', 'http'] + +[[vm]] + cpu_kind = 'shared' + cpus = 8 + memory_mb = 2048 + processes = ['headscale'] diff --git a/manifests/fly/template/postgres/fly.toml b/manifests/fly/template/postgres/fly.toml new file mode 100644 index 000000000..8ef85a079 --- /dev/null +++ b/manifests/fly/template/postgres/fly.toml @@ -0,0 +1,49 @@ +# fly.toml app configuration file generated for control-plane-postgres on 2024-11-19T13:50:01-07:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'control-plane-postgres' +primary_region = 'lax' + +[build] + image = 'postgres' + +[env] + PRIMARY_REGION = 'lax' + +[[mounts]] + source = 'pg_data' + destination = '/bitnami/postgresql' + +[[services]] + protocol = 'tcp' + internal_port = 5432 + auto_start_machines = false + + [[services.ports]] + port = 5432 + handlers = ['pg_tls'] + + [services.concurrency] + type = 'connections' + hard_limit = 1000 + soft_limit = 1000 + +[[files]] + guest_path = '/opt/bitnami/postgresql/conf/pg_hba.conf' + local_path = './pg_hba.conf' + +[[files]] + guest_path = '/opt/bitnami/postgresql/conf/postgresql.conf' + local_path = './postgresql.conf' + +[[vm]] + memory = '1gb' + cpu_kind = 'shared' + cpus = 1 + +[[metrics]] + port = 9187 + path = '/metrics' + https = false diff --git a/manifests/fly/template/postgres/pg_hba.conf b/manifests/fly/template/postgres/pg_hba.conf new file mode 100644 index 000000000..72b26b844 --- /dev/null +++ b/manifests/fly/template/postgres/pg_hba.conf @@ -0,0 +1,5 @@ +host all all 0.0.0.0/0 md5 +host all all ::/0 md5 +local all all md5 +host all all 127.0.0.1/32 md5 +host all all ::1/128 md5 \ No newline at end of file diff --git a/manifests/fly/template/postgres/postgresql.conf b/manifests/fly/template/postgres/postgresql.conf new file mode 100644 index 000000000..aaa2d569b --- /dev/null +++ b/manifests/fly/template/postgres/postgresql.conf @@ -0,0 +1,822 @@ +# ----------------------------- +# PostgreSQL configuration file +# ----------------------------- +# +# This file consists of lines of the form: +# +# name = value +# +# (The "=" is optional.) Whitespace may be used. Comments are introduced with +# "#" anywhere on a line. The complete list of parameter names and allowed +# values can be found in the PostgreSQL documentation. +# +# The commented-out settings shown in this file represent the default values. +# Re-commenting a setting is NOT sufficient to revert it to the default value; +# you need to reload the server. +# +# This file is read on server startup and when the server receives a SIGHUP +# signal. If you edit the file on a running system, you have to SIGHUP the +# server for the changes to take effect, run "pg_ctl reload", or execute +# "SELECT pg_reload_conf()". Some parameters, which are marked below, +# require a server shutdown and restart to take effect. +# +# Any parameter can also be given as a command-line option to the server, e.g., +# "postgres -c log_connections=on". Some parameters can be changed at run time +# with the "SET" SQL command. +# +# Memory units: B = bytes Time units: us = microseconds +# kB = kilobytes ms = milliseconds +# MB = megabytes s = seconds +# GB = gigabytes min = minutes +# TB = terabytes h = hours +# d = days + + +#------------------------------------------------------------------------------ +# FILE LOCATIONS +#------------------------------------------------------------------------------ + +# The default values of these variables are driven from the -D command-line +# option or PGDATA environment variable, represented here as ConfigDir. + +#data_directory = 'ConfigDir' # use data in another directory + # (change requires restart) +#hba_file = 'ConfigDir/pg_hba.conf' # host-based authentication file + # (change requires restart) +#ident_file = 'ConfigDir/pg_ident.conf' # ident configuration file + # (change requires restart) + +# If external_pid_file is not explicitly set, no extra PID file is written. +#external_pid_file = '' # write an extra PID file + # (change requires restart) + + +#------------------------------------------------------------------------------ +# CONNECTIONS AND AUTHENTICATION +#------------------------------------------------------------------------------ + +# - Connection Settings - + +listen_addresses = '*' + # comma-separated list of addresses; + # defaults to 'localhost'; use '*' for all + # (change requires restart) +port = '5432' +#max_connections = 100 # (change requires restart) +#reserved_connections = 0 # (change requires restart) +#superuser_reserved_connections = 3 # (change requires restart) +#unix_socket_directories = '/tmp' # comma-separated list of directories + # (change requires restart) +#unix_socket_group = '' # (change requires restart) +#unix_socket_permissions = 0777 # begin with 0 to use octal notation + # (change requires restart) +#bonjour = off # advertise server via Bonjour + # (change requires restart) +#bonjour_name = '' # defaults to the computer name + # (change requires restart) + +# - TCP settings - +# see "man tcp" for details + +#tcp_keepalives_idle = 0 # TCP_KEEPIDLE, in seconds; + # 0 selects the system default +#tcp_keepalives_interval = 0 # TCP_KEEPINTVL, in seconds; + # 0 selects the system default +#tcp_keepalives_count = 0 # TCP_KEEPCNT; + # 0 selects the system default +#tcp_user_timeout = 0 # TCP_USER_TIMEOUT, in milliseconds; + # 0 selects the system default + +#client_connection_check_interval = 0 # time between checks for client + # disconnection while running queries; + # 0 for never + +# - Authentication - + +#authentication_timeout = 1min # 1s-600s +#password_encryption = scram-sha-256 # scram-sha-256 or md5 +#scram_iterations = 4096 +#db_user_namespace = off + +# GSSAPI using Kerberos +#krb_server_keyfile = 'FILE:${sysconfdir}/krb5.keytab' +#krb_caseins_users = off +#gss_accept_delegation = off + +# - SSL - + +#ssl = off +#ssl_ca_file = '' +#ssl_cert_file = 'server.crt' +#ssl_crl_file = '' +#ssl_crl_dir = '' +#ssl_key_file = 'server.key' +#ssl_ciphers = 'HIGH:MEDIUM:+3DES:!aNULL' # allowed SSL ciphers +#ssl_prefer_server_ciphers = on +#ssl_ecdh_curve = 'prime256v1' +#ssl_min_protocol_version = 'TLSv1.2' +#ssl_max_protocol_version = '' +#ssl_dh_params_file = '' +#ssl_passphrase_command = '' +#ssl_passphrase_command_supports_reload = off + + +#------------------------------------------------------------------------------ +# RESOURCE USAGE (except WAL) +#------------------------------------------------------------------------------ + +# - Memory - + +#shared_buffers = 128MB # min 128kB + # (change requires restart) +#huge_pages = try # on, off, or try + # (change requires restart) +#huge_page_size = 0 # zero for system default + # (change requires restart) +#temp_buffers = 8MB # min 800kB +#max_prepared_transactions = 0 # zero disables the feature + # (change requires restart) +# Caution: it is not advisable to set max_prepared_transactions nonzero unless +# you actively intend to use prepared transactions. +#work_mem = 4MB # min 64kB +#hash_mem_multiplier = 2.0 # 1-1000.0 multiplier on hash table work_mem +#maintenance_work_mem = 64MB # min 1MB +#autovacuum_work_mem = -1 # min 1MB, or -1 to use maintenance_work_mem +#logical_decoding_work_mem = 64MB # min 64kB +#max_stack_depth = 2MB # min 100kB +#shared_memory_type = mmap # the default is the first option + # supported by the operating system: + # mmap + # sysv + # windows + # (change requires restart) +#dynamic_shared_memory_type = posix # the default is usually the first option + # supported by the operating system: + # posix + # sysv + # windows + # mmap + # (change requires restart) +#min_dynamic_shared_memory = 0MB # (change requires restart) +#vacuum_buffer_usage_limit = 256kB # size of vacuum and analyze buffer access strategy ring; + # 0 to disable vacuum buffer access strategy; + # range 128kB to 16GB + +# - Disk - + +#temp_file_limit = -1 # limits per-process temp file space + # in kilobytes, or -1 for no limit + +# - Kernel Resources - + +#max_files_per_process = 1000 # min 64 + # (change requires restart) + +# - Cost-Based Vacuum Delay - + +#vacuum_cost_delay = 0 # 0-100 milliseconds (0 disables) +#vacuum_cost_page_hit = 1 # 0-10000 credits +#vacuum_cost_page_miss = 2 # 0-10000 credits +#vacuum_cost_page_dirty = 20 # 0-10000 credits +#vacuum_cost_limit = 200 # 1-10000 credits + +# - Background Writer - + +#bgwriter_delay = 200ms # 10-10000ms between rounds +#bgwriter_lru_maxpages = 100 # max buffers written/round, 0 disables +#bgwriter_lru_multiplier = 2.0 # 0-10.0 multiplier on buffers scanned/round +#bgwriter_flush_after = 0 # measured in pages, 0 disables + +# - Asynchronous Behavior - + +#backend_flush_after = 0 # measured in pages, 0 disables +#effective_io_concurrency = 1 # 1-1000; 0 disables prefetching +#maintenance_io_concurrency = 10 # 1-1000; 0 disables prefetching +#max_worker_processes = 8 # (change requires restart) +#max_parallel_workers_per_gather = 2 # taken from max_parallel_workers +#max_parallel_maintenance_workers = 2 # taken from max_parallel_workers +#max_parallel_workers = 8 # maximum number of max_worker_processes that + # can be used in parallel operations +#parallel_leader_participation = on +#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate + # (change requires restart) + + +#------------------------------------------------------------------------------ +# WRITE-AHEAD LOG +#------------------------------------------------------------------------------ + +# - Settings - + +wal_level = 'replica' + # (change requires restart) +fsync = 'on' + # (turning this off can cause + # unrecoverable data corruption) +#synchronous_commit = on # synchronization level; + # off, local, remote_write, remote_apply, or on +#wal_sync_method = fsync # the default is the first option + # supported by the operating system: + # open_datasync + # fdatasync (default on Linux and FreeBSD) + # fsync + # fsync_writethrough + # open_sync +#full_page_writes = on # recover from partial page writes +#wal_log_hints = off # also do full page writes of non-critical updates + # (change requires restart) +#wal_compression = off # enables compression of full-page writes; + # off, pglz, lz4, zstd, or on +#wal_init_zero = on # zero-fill new WAL files +#wal_recycle = on # recycle WAL files +#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers + # (change requires restart) +#wal_writer_delay = 200ms # 1-10000 milliseconds +#wal_writer_flush_after = 1MB # measured in pages, 0 disables +#wal_skip_threshold = 2MB + +#commit_delay = 0 # range 0-100000, in microseconds +#commit_siblings = 5 # range 1-1000 + +# - Checkpoints - + +#checkpoint_timeout = 5min # range 30s-1d +#checkpoint_completion_target = 0.9 # checkpoint target duration, 0.0 - 1.0 +#checkpoint_flush_after = 0 # measured in pages, 0 disables +#checkpoint_warning = 30s # 0 disables +max_wal_size = '400MB' +#min_wal_size = 80MB + +# - Prefetching during recovery - + +#recovery_prefetch = try # prefetch pages referenced in the WAL? +#wal_decode_buffer_size = 512kB # lookahead window used for prefetching + # (change requires restart) + +# - Archiving - + +#archive_mode = off # enables archiving; off, on, or always + # (change requires restart) +#archive_library = '' # library to use to archive a WAL file + # (empty string indicates archive_command should + # be used) +#archive_command = '' # command to use to archive a WAL file + # placeholders: %p = path of file to archive + # %f = file name only + # e.g. 'test ! -f /mnt/server/archivedir/%f && cp %p /mnt/server/archivedir/%f' +#archive_timeout = 0 # force a WAL file switch after this + # number of seconds; 0 disables + +# - Archive Recovery - + +# These are only used in recovery mode. + +#restore_command = '' # command to use to restore an archived WAL file + # placeholders: %p = path of file to restore + # %f = file name only + # e.g. 'cp /mnt/server/archivedir/%f %p' +#archive_cleanup_command = '' # command to execute at every restartpoint +#recovery_end_command = '' # command to execute at completion of recovery + +# - Recovery Target - + +# Set these only when performing a targeted recovery. + +#recovery_target = '' # 'immediate' to end recovery as soon as a + # consistent state is reached + # (change requires restart) +#recovery_target_name = '' # the named restore point to which recovery will proceed + # (change requires restart) +#recovery_target_time = '' # the time stamp up to which recovery will proceed + # (change requires restart) +#recovery_target_xid = '' # the transaction ID up to which recovery will proceed + # (change requires restart) +#recovery_target_lsn = '' # the WAL LSN up to which recovery will proceed + # (change requires restart) +#recovery_target_inclusive = on # Specifies whether to stop: + # just after the specified recovery target (on) + # just before the recovery target (off) + # (change requires restart) +#recovery_target_timeline = 'latest' # 'current', 'latest', or timeline ID + # (change requires restart) +#recovery_target_action = 'pause' # 'pause', 'promote', 'shutdown' + # (change requires restart) + + +#------------------------------------------------------------------------------ +# REPLICATION +#------------------------------------------------------------------------------ + +# - Sending Servers - + +# Set these on the primary and on any standby that will send replication data. + +max_wal_senders = '16' + # (change requires restart) +#max_replication_slots = 10 # max number of replication slots + # (change requires restart) +wal_keep_size = '128MB' +#max_slot_wal_keep_size = -1 # in megabytes; -1 disables +#wal_sender_timeout = 60s # in milliseconds; 0 disables +#track_commit_timestamp = off # collect timestamp of transaction commit + # (change requires restart) + +# - Primary Server - + +# These settings are ignored on a standby server. + +#synchronous_standby_names = '' # standby servers that provide sync rep + # method to choose sync standbys, number of sync standbys, + # and comma-separated list of application_name + # from standby(s); '*' = all + +# - Standby Servers - + +# These settings are ignored on a primary server. + +#primary_conninfo = '' # connection string to sending server +#primary_slot_name = '' # replication slot on sending server +hot_standby = 'on' + # (change requires restart) +#max_standby_archive_delay = 30s # max delay before canceling queries + # when reading WAL from archive; + # -1 allows indefinite delay +#max_standby_streaming_delay = 30s # max delay before canceling queries + # when reading streaming WAL; + # -1 allows indefinite delay +#wal_receiver_create_temp_slot = off # create temp slot if primary_slot_name + # is not set +#wal_receiver_status_interval = 10s # send replies at least this often + # 0 disables +#hot_standby_feedback = off # send info from standby to prevent + # query conflicts +#wal_receiver_timeout = 60s # time that receiver waits for + # communication from primary + # in milliseconds; 0 disables +#wal_retrieve_retry_interval = 5s # time to wait before retrying to + # retrieve WAL after a failed attempt +#recovery_min_apply_delay = 0 # minimum delay for applying changes during recovery + +# - Subscribers - + +# These settings are ignored on a publisher. + +#max_logical_replication_workers = 4 # taken from max_worker_processes + # (change requires restart) +#max_sync_workers_per_subscription = 2 # taken from max_logical_replication_workers +#max_parallel_apply_workers_per_subscription = 2 # taken from max_logical_replication_workers + + +#------------------------------------------------------------------------------ +# QUERY TUNING +#------------------------------------------------------------------------------ + +# - Planner Method Configuration - + +#enable_async_append = on +#enable_bitmapscan = on +#enable_gathermerge = on +#enable_hashagg = on +#enable_hashjoin = on +#enable_incremental_sort = on +#enable_indexscan = on +#enable_indexonlyscan = on +#enable_material = on +#enable_memoize = on +#enable_mergejoin = on +#enable_nestloop = on +#enable_parallel_append = on +#enable_parallel_hash = on +#enable_partition_pruning = on +#enable_partitionwise_join = off +#enable_partitionwise_aggregate = off +#enable_presorted_aggregate = on +#enable_seqscan = on +#enable_sort = on +#enable_tidscan = on + +# - Planner Cost Constants - + +#seq_page_cost = 1.0 # measured on an arbitrary scale +#random_page_cost = 4.0 # same scale as above +#cpu_tuple_cost = 0.01 # same scale as above +#cpu_index_tuple_cost = 0.005 # same scale as above +#cpu_operator_cost = 0.0025 # same scale as above +#parallel_setup_cost = 1000.0 # same scale as above +#parallel_tuple_cost = 0.1 # same scale as above +#min_parallel_table_scan_size = 8MB +#min_parallel_index_scan_size = 512kB +#effective_cache_size = 4GB + +#jit_above_cost = 100000 # perform JIT compilation if available + # and query more expensive than this; + # -1 disables +#jit_inline_above_cost = 500000 # inline small functions if query is + # more expensive than this; -1 disables +#jit_optimize_above_cost = 500000 # use expensive JIT optimizations if + # query is more expensive than this; + # -1 disables + +# - Genetic Query Optimizer - + +#geqo = on +#geqo_threshold = 12 +#geqo_effort = 5 # range 1-10 +#geqo_pool_size = 0 # selects default based on effort +#geqo_generations = 0 # selects default based on effort +#geqo_selection_bias = 2.0 # range 1.5-2.0 +#geqo_seed = 0.0 # range 0.0-1.0 + +# - Other Planner Options - + +#default_statistics_target = 100 # range 1-10000 +#constraint_exclusion = partition # on, off, or partition +#cursor_tuple_fraction = 0.1 # range 0.0-1.0 +#from_collapse_limit = 8 +#jit = on # allow JIT compilation +#join_collapse_limit = 8 # 1 disables collapsing of explicit + # JOIN clauses +#plan_cache_mode = auto # auto, force_generic_plan or + # force_custom_plan +#recursive_worktable_factor = 10.0 # range 0.001-1000000 + + +#------------------------------------------------------------------------------ +# REPORTING AND LOGGING +#------------------------------------------------------------------------------ + +# - Where to Log - + +#log_destination = 'stderr' # Valid values are combinations of + # stderr, csvlog, jsonlog, syslog, and + # eventlog, depending on platform. + # csvlog and jsonlog require + # logging_collector to be on. + +# This is used when logging to stderr: +#logging_collector = off # Enable capturing of stderr, jsonlog, + # and csvlog into log files. Required + # to be on for csvlogs and jsonlogs. + # (change requires restart) + +# These are only used if logging_collector is on: +#log_directory = 'log' # directory where log files are written, + # can be absolute or relative to PGDATA +#log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern, + # can include strftime() escapes +#log_file_mode = 0600 # creation mode for log files, + # begin with 0 to use octal notation +#log_rotation_age = 1d # Automatic rotation of logfiles will + # happen after that time. 0 disables. +#log_rotation_size = 10MB # Automatic rotation of logfiles will + # happen after that much log output. + # 0 disables. +#log_truncate_on_rotation = off # If on, an existing log file with the + # same name as the new log file will be + # truncated rather than appended to. + # But such truncation only occurs on + # time-driven rotation, not on restarts + # or size-driven rotation. Default is + # off, meaning append to existing files + # in all cases. + +# These are relevant when logging to syslog: +#syslog_facility = 'LOCAL0' +#syslog_ident = 'postgres' +#syslog_sequence_numbers = on +#syslog_split_messages = on + +# This is only relevant when logging to eventlog (Windows): +# (change requires restart) +#event_source = 'PostgreSQL' + +# - When to Log - + +#log_min_messages = warning # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # info + # notice + # warning + # error + # log + # fatal + # panic + +#log_min_error_statement = error # values in order of decreasing detail: + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # info + # notice + # warning + # error + # log + # fatal + # panic (effectively off) + +#log_min_duration_statement = -1 # -1 is disabled, 0 logs all statements + # and their durations, > 0 logs only + # statements running at least this number + # of milliseconds + +#log_min_duration_sample = -1 # -1 is disabled, 0 logs a sample of statements + # and their durations, > 0 logs only a sample of + # statements running at least this number + # of milliseconds; + # sample fraction is determined by log_statement_sample_rate + +#log_statement_sample_rate = 1.0 # fraction of logged statements exceeding + # log_min_duration_sample to be logged; + # 1.0 logs all such statements, 0.0 never logs + + +#log_transaction_sample_rate = 0.0 # fraction of transactions whose statements + # are logged regardless of their duration; 1.0 logs all + # statements from all transactions, 0.0 never logs + +#log_startup_progress_interval = 10s # Time between progress updates for + # long-running startup operations. + # 0 disables the feature, > 0 indicates + # the interval in milliseconds. + +# - What to Log - + +#debug_print_parse = off +#debug_print_rewritten = off +#debug_print_plan = off +#debug_pretty_print = on +#log_autovacuum_min_duration = 10min # log autovacuum activity; + # -1 disables, 0 logs all actions and + # their durations, > 0 logs only + # actions running at least this number + # of milliseconds. +#log_checkpoints = on +log_connections = 'false' +log_disconnections = 'false' +#log_duration = off +#log_error_verbosity = default # terse, default, or verbose messages +log_hostname = 'false' +#log_line_prefix = '%m [%p] ' # special values: + # %a = application name + # %u = user name + # %d = database name + # %r = remote host and port + # %h = remote host + # %b = backend type + # %p = process ID + # %P = process ID of parallel group leader + # %t = timestamp without milliseconds + # %m = timestamp with milliseconds + # %n = timestamp with milliseconds (as a Unix epoch) + # %Q = query ID (0 if none or not computed) + # %i = command tag + # %e = SQL state + # %c = session ID + # %l = session line number + # %s = session start timestamp + # %v = virtual transaction ID + # %x = transaction ID (0 if none) + # %q = stop here in non-session + # processes + # %% = '%' + # e.g. '<%u%%%d> ' +#log_lock_waits = off # log lock waits >= deadlock_timeout +#log_recovery_conflict_waits = off # log standby recovery conflict waits + # >= deadlock_timeout +#log_parameter_max_length = -1 # when logging statements, limit logged + # bind-parameter values to N bytes; + # -1 means print in full, 0 disables +#log_parameter_max_length_on_error = 0 # when logging an error, limit logged + # bind-parameter values to N bytes; + # -1 means print in full, 0 disables +#log_statement = 'none' # none, ddl, mod, all +#log_replication_commands = off +#log_temp_files = -1 # log temporary files equal or larger + # than the specified size in kilobytes; + # -1 disables, 0 logs all temp files +#log_timezone = 'GMT' + +# - Process Title - + +#cluster_name = '' # added to process titles if nonempty + # (change requires restart) +#update_process_title = on + + +#------------------------------------------------------------------------------ +# STATISTICS +#------------------------------------------------------------------------------ + +# - Cumulative Query and Index Statistics - + +#track_activities = on +#track_activity_query_size = 1024 # (change requires restart) +#track_counts = on +#track_io_timing = off +#track_wal_io_timing = off +#track_functions = none # none, pl, all +#stats_fetch_consistency = cache # cache, none, snapshot + + +# - Monitoring - + +#compute_query_id = auto +#log_statement_stats = off +#log_parser_stats = off +#log_planner_stats = off +#log_executor_stats = off + + +#------------------------------------------------------------------------------ +# AUTOVACUUM +#------------------------------------------------------------------------------ + +#autovacuum = on # Enable autovacuum subprocess? 'on' + # requires track_counts to also be on. +#autovacuum_max_workers = 3 # max number of autovacuum subprocesses + # (change requires restart) +#autovacuum_naptime = 1min # time between autovacuum runs +#autovacuum_vacuum_threshold = 50 # min number of row updates before + # vacuum +#autovacuum_vacuum_insert_threshold = 1000 # min number of row inserts + # before vacuum; -1 disables insert + # vacuums +#autovacuum_analyze_threshold = 50 # min number of row updates before + # analyze +#autovacuum_vacuum_scale_factor = 0.2 # fraction of table size before vacuum +#autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table + # size before insert vacuum +#autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze +#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum + # (change requires restart) +#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age + # before forced vacuum + # (change requires restart) +#autovacuum_vacuum_cost_delay = 2ms # default vacuum cost delay for + # autovacuum, in milliseconds; + # -1 means use vacuum_cost_delay +#autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for + # autovacuum, -1 means use + # vacuum_cost_limit + + +#------------------------------------------------------------------------------ +# CLIENT CONNECTION DEFAULTS +#------------------------------------------------------------------------------ + +# - Statement Behavior - + +client_min_messages = 'error' + # debug5 + # debug4 + # debug3 + # debug2 + # debug1 + # log + # notice + # warning + # error +#search_path = '"$user", public' # schema names +#row_security = on +#default_table_access_method = 'heap' +#default_tablespace = '' # a tablespace name, '' uses the default +#default_toast_compression = 'pglz' # 'pglz' or 'lz4' +#temp_tablespaces = '' # a list of tablespace names, '' uses + # only default tablespace +#check_function_bodies = on +#default_transaction_isolation = 'read committed' +#default_transaction_read_only = off +#default_transaction_deferrable = off +#session_replication_role = 'origin' +#statement_timeout = 0 # in milliseconds, 0 is disabled +#lock_timeout = 0 # in milliseconds, 0 is disabled +#idle_in_transaction_session_timeout = 0 # in milliseconds, 0 is disabled +#idle_session_timeout = 0 # in milliseconds, 0 is disabled +#vacuum_freeze_table_age = 150000000 +#vacuum_freeze_min_age = 50000000 +#vacuum_failsafe_age = 1600000000 +#vacuum_multixact_freeze_table_age = 150000000 +#vacuum_multixact_freeze_min_age = 5000000 +#vacuum_multixact_failsafe_age = 1600000000 +#bytea_output = 'hex' # hex, escape +#xmlbinary = 'base64' +#xmloption = 'content' +#gin_pending_list_limit = 4MB +#createrole_self_grant = '' # set and/or inherit + +# - Locale and Formatting - + +#datestyle = 'iso, mdy' +#intervalstyle = 'postgres' +#timezone = 'GMT' +#timezone_abbreviations = 'Default' # Select the set of available time zone + # abbreviations. Currently, there are + # Default + # Australia (historical usage) + # India + # You can create your own file in + # share/timezonesets/. +#extra_float_digits = 1 # min -15, max 3; any value >0 actually + # selects precise output mode +#client_encoding = sql_ascii # actually, defaults to database + # encoding + +# These settings are initialized by initdb, but they can be changed. +#lc_messages = 'C' # locale for system error message + # strings +#lc_monetary = 'C' # locale for monetary formatting +#lc_numeric = 'C' # locale for number formatting +#lc_time = 'C' # locale for time formatting + +#icu_validation_level = warning # report ICU locale validation + # errors at the given level + +# default configuration for text search +#default_text_search_config = 'pg_catalog.simple' + +# - Shared Library Preloading - + +#local_preload_libraries = '' +#session_preload_libraries = '' +shared_preload_libraries = 'pg_cron' +#jit_provider = 'llvmjit' # JIT library to use + +# - Other Defaults - + +#dynamic_library_path = '$libdir' +#gin_fuzzy_search_limit = 0 + + +#------------------------------------------------------------------------------ +# LOCK MANAGEMENT +#------------------------------------------------------------------------------ + +#deadlock_timeout = 1s +#max_locks_per_transaction = 64 # min 10 + # (change requires restart) +#max_pred_locks_per_transaction = 64 # min 10 + # (change requires restart) +#max_pred_locks_per_relation = -2 # negative values mean + # (max_pred_locks_per_transaction + # / -max_pred_locks_per_relation) - 1 +#max_pred_locks_per_page = 2 # min 0 + + +#------------------------------------------------------------------------------ +# VERSION AND PLATFORM COMPATIBILITY +#------------------------------------------------------------------------------ + +# - Previous PostgreSQL Versions - + +#array_nulls = on +#backslash_quote = safe_encoding # on, off, or safe_encoding +#escape_string_warning = on +#lo_compat_privileges = off +#quote_all_identifiers = off +#standard_conforming_strings = on +#synchronize_seqscans = on + +# - Other Platforms and Clients - + +#transform_null_equals = off + + +#------------------------------------------------------------------------------ +# ERROR HANDLING +#------------------------------------------------------------------------------ + +#exit_on_error = off # terminate session on any error? +#restart_after_crash = on # reinitialize after backend crash? +#data_sync_retry = off # retry or panic on failure to fsync + # data? + # (change requires restart) +#recovery_init_sync_method = fsync # fsync, syncfs (Linux 5.8+) + + +#------------------------------------------------------------------------------ +# CONFIG FILE INCLUDES +#------------------------------------------------------------------------------ + +# These options allow settings to be loaded from files other than the +# default postgresql.conf. Note that these are directives, not variable +# assignments, so they can usefully be given more than once. + +include_dir = 'conf.d' +#include_dir = '...' # include files ending in '.conf' from + # a directory, e.g., 'conf.d' +#include_if_exists = '...' # include file only if it exists +#include = '...' # include file + + +#------------------------------------------------------------------------------ +# CUSTOMIZED OPTIONS +#------------------------------------------------------------------------------ + +# Add settings for extensions here +pgaudit.log_catalog = 'off' \ No newline at end of file diff --git a/manifests/fly/template/redis-blobcache/fly.toml b/manifests/fly/template/redis-blobcache/fly.toml new file mode 100644 index 000000000..0bcabd3e4 --- /dev/null +++ b/manifests/fly/template/redis-blobcache/fly.toml @@ -0,0 +1,37 @@ +# fly.toml app configuration file generated for blobcache-redis on 2024-11-18T16:43:08-07:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'blobcache-redis' +primary_region = 'ewr' + +[build] + image = 'redis' + +[[mounts]] + source = 'redis_server' + destination = '/data' + +[[services]] + protocol = 'tcp' + internal_port = 6379 + auto_start_machines = true + min_machines_running = 1 + + [[services.ports]] + port = 6379 + + [[services.tcp_checks]] + interval = '10s' + timeout = '2s' + +[[vm]] + memory = '1gb' + cpu_kind = 'shared' + cpus = 1 + +[[metrics]] + port = 9091 + path = '/metrics' + https = false diff --git a/manifests/fly/template/redis-control-plane/fly.toml b/manifests/fly/template/redis-control-plane/fly.toml new file mode 100644 index 000000000..a6b50c6b0 --- /dev/null +++ b/manifests/fly/template/redis-control-plane/fly.toml @@ -0,0 +1,37 @@ +# fly.toml app configuration file generated for control-plane-redis-white-voice-9219 on 2024-11-18T16:42:23-07:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'control-plane-redis-white-voice-9219' +primary_region = 'ewr' + +[build] + image = 'redis' + +[[mounts]] + source = 'redis_server' + destination = '/data' + +[[services]] + protocol = 'tcp' + internal_port = 6379 + auto_start_machines = true + min_machines_running = 1 + + [[services.ports]] + port = 6379 + + [[services.tcp_checks]] + interval = '10s' + timeout = '2s' + +[[vm]] + memory = '1gb' + cpu_kind = 'shared' + cpus = 1 + +[[metrics]] + port = 9091 + path = '/metrics' + https = false diff --git a/manifests/fly/template/redis-juicefs/fly.toml b/manifests/fly/template/redis-juicefs/fly.toml new file mode 100644 index 000000000..0aa7d81c5 --- /dev/null +++ b/manifests/fly/template/redis-juicefs/fly.toml @@ -0,0 +1,37 @@ +# fly.toml app configuration file generated for juicefs-redis-proud-water-2981 on 2024-11-18T16:43:48-07:00 +# +# See https://fly.io/docs/reference/configuration/ for information about how to use this file. +# + +app = 'juicefs-redis-proud-water-2981' +primary_region = 'ewr' + +[build] + image = 'redis' + +[[mounts]] + source = 'redis_server' + destination = '/data' + +[[services]] + protocol = 'tcp' + internal_port = 6379 + auto_start_machines = true + min_machines_running = 1 + + [[services.ports]] + port = 6379 + + [[services.tcp_checks]] + interval = '10s' + timeout = '2s' + +[[vm]] + memory = '1gb' + cpu_kind = 'shared' + cpus = 1 + +[[metrics]] + port = 9091 + path = '/metrics' + https = false diff --git a/pkg/common/config.default.yaml b/pkg/common/config.default.yaml index e6e6d9f3e..f68c95081 100644 --- a/pkg/common/config.default.yaml +++ b/pkg/common/config.default.yaml @@ -235,6 +235,8 @@ monitoring: openmeter: serverUrl: "" apiKey: "" +agent: + dynamicServiceHosts: false abstractions: bot: systemPrompt: "" diff --git a/pkg/providers/remote_config.go b/pkg/providers/remote_config.go index 2116ff7f6..e48bf498c 100644 --- a/pkg/providers/remote_config.go +++ b/pkg/providers/remote_config.go @@ -26,34 +26,41 @@ func GetRemoteConfig(baseConfig types.AppConfig, tailscale *network.Tailscale) ( return nil, err } - redisHostname, err := tailscale.ResolveService("control-plane-redis", connectTimeout) - if err != nil { - return nil, err - } - remoteConfig.Database.Redis.Addrs[0] = fmt.Sprintf("%s:%d", redisHostname, 6379) - remoteConfig.Database.Redis.InsecureSkipVerify = true - - if baseConfig.Storage.Mode == storage.StorageModeJuiceFS { - juiceFsRedisHostname, err := tailscale.ResolveService("juicefs-redis", connectTimeout) + var redisHostname string + if remoteConfig.Agent.DynamicServiceHosts { + redisHostname, err = tailscale.ResolveService("control-plane-redis", connectTimeout) if err != nil { return nil, err } + remoteConfig.Database.Redis.Addrs[0] = fmt.Sprintf("%s:%d", redisHostname, 6379) + } + remoteConfig.Database.Redis.InsecureSkipVerify = true - juiceFsRedisHostname = fmt.Sprintf("%s:%d", juiceFsRedisHostname, 6379) - + if baseConfig.Storage.Mode == storage.StorageModeJuiceFS { parsedUrl, err := url.Parse(remoteConfig.Storage.JuiceFS.RedisURI) if err != nil { return nil, err } juicefsRedisPassword, _ := parsedUrl.User.Password() - remoteConfig.Storage.JuiceFS.RedisURI = fmt.Sprintf("rediss://:%s@%s/0", juicefsRedisPassword, juiceFsRedisHostname) + scheme := parsedUrl.Scheme + + if remoteConfig.Agent.DynamicServiceHosts { + juiceFsRedisHostname, err := tailscale.ResolveService("juicefs-redis", connectTimeout) + if err != nil { + return nil, err + } + remoteConfig.Storage.JuiceFS.RedisURI = fmt.Sprintf("%s://:%s@%s/0", scheme, juicefsRedisPassword, juiceFsRedisHostname) + } } if baseConfig.Worker.BlobCacheEnabled { - blobcacheRedisHostname, err := tailscale.ResolveService("blobcache-redis", connectTimeout) - if err != nil { - return nil, err + var blobcacheRedisHostname string + if remoteConfig.Agent.DynamicServiceHosts { + blobcacheRedisHostname, err = tailscale.ResolveService("blobcache-redis", connectTimeout) + if err != nil { + return nil, err + } } remoteConfig.BlobCache.Metadata.RedisAddr = fmt.Sprintf("%s:%d", blobcacheRedisHostname, 6379) diff --git a/pkg/scheduler/pool_external.go b/pkg/scheduler/pool_external.go index 4f108217e..c0437b86a 100644 --- a/pkg/scheduler/pool_external.go +++ b/pkg/scheduler/pool_external.go @@ -405,7 +405,7 @@ func (wpc *ExternalWorkerPoolController) getWorkerEnvironment(workerId, machineI }, { Name: "BETA9_GATEWAY_PORT", - Value: "443", + Value: strconv.Itoa(wpc.config.GatewayService.GRPC.ExternalPort), }, { Name: "POD_HOSTNAME", diff --git a/pkg/types/config.go b/pkg/types/config.go index d01c32415..a58e9d81c 100644 --- a/pkg/types/config.go +++ b/pkg/types/config.go @@ -21,6 +21,7 @@ type AppConfig struct { Proxy ProxyConfig `key:"proxy" json:"proxy"` Monitoring MonitoringConfig `key:"monitoring" json:"monitoring"` BlobCache blobcache.BlobCacheConfig `key:"blobcache" json:"blobcache"` + Agent AgentConfig `key:"agent" json:"agent"` Abstractions AbstractionConfig `key:"abstractions" json:"abstractions"` } @@ -69,6 +70,7 @@ type PostgresConfig struct { type GRPCConfig struct { Port int `key:"port" json:"port"` + ExternalPort int `key:"externalPort" json:"external_port"` MaxRecvMsgSize int `key:"maxRecvMsgSize" json:"max_recv_msg_size"` MaxSendMsgSize int `key:"maxSendMsgSize" json:"max_send_msg_size"` } @@ -292,6 +294,14 @@ type EC2ProviderConfig struct { Agent ProviderAgentConfig `key:"agent" json:"agent"` } +type AgentRedisConfig struct { + Hostname string `key:"hostname" json:"hostname"` +} + +type AgentConfig struct { + DynamicServiceHosts bool `key:"dynamicServiceHosts" json:"dynamic_service_hosts"` +} + type OCIProviderConfig struct { Tenancy string `key:"tenancy" json:"tenancy"` UserId string `key:"userId" json:"user_id"` diff --git a/sdk/src/beta9/channel.py b/sdk/src/beta9/channel.py index d5193ecc4..b8caf7508 100644 --- a/sdk/src/beta9/channel.py +++ b/sdk/src/beta9/channel.py @@ -31,10 +31,11 @@ def __init__( addr: str, token: Optional[str] = None, credentials: Optional[ChannelCredentials] = None, + tls: bool = False, ): if credentials is not None: channel = grpc.secure_channel(addr, credentials) - elif addr.endswith("443"): + elif addr.endswith("443") or tls: channel = grpc.secure_channel(addr, grpc.ssl_channel_credentials()) else: channel = grpc.insecure_channel(addr) @@ -136,6 +137,7 @@ def get_channel(context: Optional[ConfigContext] = None) -> Channel: return Channel( addr=f"{context.gateway_host}:{context.gateway_port}", token=context.token, + tls=context.tls, ) @@ -152,6 +154,7 @@ def prompt_first_auth(settings: SDKSettings) -> None: channel = Channel( addr=f"{context.gateway_host}:{context.gateway_port}", token=context.token, + tls=context.tls, ) terminal.header("Authorizing with gateway") diff --git a/sdk/src/beta9/cli/machine.py b/sdk/src/beta9/cli/machine.py index ecb5411e9..caea18fdc 100644 --- a/sdk/src/beta9/cli/machine.py +++ b/sdk/src/beta9/cli/machine.py @@ -170,8 +170,7 @@ def create_machine(service: ServiceClient, pool: str): f"Created machine with ID: '{res.machine.id}'. Use the following command to setup the node:" ) - text = textwrap.dedent( - f"""\ + cmd_text = f"""\ # -- Agent setup sudo curl -L -o agent https://release.beam.cloud/agent/agent && \\ sudo chmod +x agent && \\ @@ -180,9 +179,14 @@ def create_machine(service: ServiceClient, pool: str): --tailscale-url "{res.machine.tailscale_url}" \\ --tailscale-auth "{res.machine.tailscale_auth}" \\ --pool-name "{res.machine.pool_name}" \\ - --provider-name "{res.machine.provider_name}" + --provider-name "{res.machine.provider_name}\"""" + + if not service._config.gateway_host.endswith("beam.cloud"): + cmd_text += f""" \\ + --gateway-url "{service._config.gateway_host}" """ - ) + + text = textwrap.dedent(cmd_text + "\n") if res.machine.user_data: text = f"""# -- User data\n{res.machine.user_data}\n{text}""" diff --git a/sdk/src/beta9/config.py b/sdk/src/beta9/config.py index e401c8d18..d95b26a2e 100644 --- a/sdk/src/beta9/config.py +++ b/sdk/src/beta9/config.py @@ -4,6 +4,7 @@ import ipaddress import os import socket +import ssl from dataclasses import asdict, dataclass from pathlib import Path from typing import Any, Mapping, MutableMapping, Optional, Tuple, Union @@ -55,6 +56,7 @@ class ConfigContext: token: Optional[str] = None gateway_host: Optional[str] = None gateway_port: Optional[int] = None + tls: bool = False @classmethod def from_dict(cls, data: Mapping[str, Any]) -> "ConfigContext": @@ -144,12 +146,14 @@ def get_config_context(name: str = DEFAULT_CONTEXT_NAME) -> ConfigContext: gateway_host = os.getenv("BETA9_GATEWAY_HOST", None) gateway_port = os.getenv("BETA9_GATEWAY_PORT", None) token = os.getenv("BETA9_TOKEN", None) + tls = is_tls_enabled(gateway_host, gateway_port) if gateway_host and gateway_port and token: return ConfigContext( token=token, gateway_host=gateway_host, gateway_port=gateway_port, + tls=tls, ) terminal.header(f"Context '{name}' does not exist. Let's try setting it up.") @@ -193,6 +197,15 @@ def prompt_for_config_context( while not (gateway_port := prompt_gateway_port()) or not validate_port(gateway_port): terminal.warn("Gateway port is invalid.") + # if gateway_port not in [443, "443"]: + # tls = prompt_tls(text="TLS", default=False) + # else: + # tls = True + + tls = False + if is_tls_enabled(gateway_host, gateway_port): + tls = True + if require_token: while not (token := terminal.prompt(text="Token", default=None)) or len(token) < 64: terminal.warn("Token is invalid.") @@ -206,6 +219,7 @@ def prompt_for_config_context( token=token, gateway_host=gateway_host, gateway_port=gateway_port, + tls=tls, ) @@ -233,3 +247,16 @@ def validate_port(value: Any) -> bool: pass return False + + +def is_tls_enabled(host: str, port: int) -> bool: + try: + # Extract hostname and default to port 443 for HTTPS + + # Create SSL context and test the connection + context = ssl.create_default_context() + with socket.create_connection((host, port), timeout=5) as sock: + with context.wrap_socket(sock, server_hostname=host): + return True + except (ssl.SSLError, socket.error): + return False