feat: sandbox provider DB-driven config, k8s sandbox namespace setup
Some checks failed
Stuffle/nebula-os/pipeline/head There was a failure building this commit

- services.py: read sandbox provider slug+config from installation_provider_config
  (same pattern as embeddings); env SANDBOX_PROVIDER is fallback only
- constellation.config.json: fix sandbox builtin_default to k8s-job
- setup_sandbox.sh: namespace=sandbox, SA=sandbox, NEBULA_SA=nebula, Harbor ci project
- db/migrations/036: seed installation_provider_config sandbox row (k8s-job)
- scripts/sandbox.Dockerfile: python:3.11-slim image for sandbox jobs
- db/db-verify.sh: schema drift verification script
This commit is contained in:
2026-04-19 14:39:09 +05:30
parent 0f039207fe
commit 4d174c6f65
6 changed files with 386 additions and 13 deletions

View File

@@ -157,7 +157,7 @@
"agent": false,
"task": false
},
"builtin_default": "nebula-sandbox",
"builtin_default": "k8s-job",
"setup_required": false,
"setup_step": null
},

287
db/db-verify.sh Executable file
View File

@@ -0,0 +1,287 @@
#!/usr/bin/env bash
# =============================================================================
# db-verify.sh — NebulaOS DB sync check (read-only)
#
# Compares the combined expected schema (bootstrap.sql + all migrations/*.sql)
# against the live database and reports any discrepancies.
# Makes NO changes to the database.
#
# Usage:
# ./db/db-verify.sh
# DB_PASSWORD=secret ./db/db-verify.sh # use local psql (port-fwd)
# KUBE_POD=my-pod ./db/db-verify.sh # override pod name
#
# Connection priority:
# 1. Local psql — if psql is on PATH and DB_PASSWORD is set
# 2. kubectl exec — into the postgres pod in namespace infra (no password needed)
#
# Env vars (all optional):
# DB_NAME default: nebulaos
# DB_USER default: postgres
# DB_HOST default: localhost (psql mode only)
# DB_PORT default: 5432 (psql mode only)
# DB_PASSWORD required for psql mode; not needed for kubectl mode
# KUBE_NS default: infra
# KUBE_POD auto-detected from label app=postgres if not set
# =============================================================================
set -euo pipefail
RED='\033[0;31m'; YELLOW='\033[1;33m'; GREEN='\033[0;32m'
CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
ok() { echo -e "${GREEN}${NC} $*"; }
fail() { echo -e "${RED}${NC} $*"; }
extra() { echo -e "${YELLOW} ~${NC} $*"; }
section() { echo -e "\n${CYAN}${BOLD}── $* ──${NC}"; }
info() { echo -e " $*"; }
MISSING_TABLES=()
MISSING_COLS=()
EXTRA_COUNT=0
# ── Resolve paths ──────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
BOOTSTRAP_SQL="$SCRIPT_DIR/bootstrap.sql"
MIGRATIONS_DIR="$SCRIPT_DIR/migrations"
# ── Load environment ───────────────────────────────────────────────────────────
for env_file in "$PROJECT_ROOT/.env" "$PROJECT_ROOT/.env.local"; do
[[ -f "$env_file" ]] && { set -a; source "$env_file"; set +a; }
done
DB_HOST="${DB_HOST:-localhost}"
DB_PORT="${DB_PORT:-5432}"
DB_NAME="${DB_NAME:-nebulaos}"
DB_USER="${DB_USER:-postgres}"
DB_PASSWORD="${DB_PASSWORD:-}"
KUBE_NS="${KUBE_NS:-infra}"
KUBE_POD="${KUBE_POD:-}"
# ── Determine connection mode ──────────────────────────────────────────────────
if command -v psql &>/dev/null && [[ -n "$DB_PASSWORD" ]]; then
MODE="psql"
elif command -v kubectl &>/dev/null; then
MODE="kubectl"
if [[ -z "$KUBE_POD" ]]; then
KUBE_POD=$(kubectl get pod -n "$KUBE_NS" -l app=postgres \
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
fi
if [[ -z "$KUBE_POD" ]]; then
echo -e "${RED}[ERROR]${NC} Cannot locate postgres pod in namespace '$KUBE_NS'."
echo " Set KUBE_POD=<pod-name> or run with psql + DB_PASSWORD."
exit 1
fi
else
echo -e "${RED}[ERROR]${NC} Neither psql (with DB_PASSWORD) nor kubectl is available."
echo " Copy this script into the postgres pod and run it there:"
echo " kubectl cp $0 $KUBE_NS/<pod>:/tmp/db-verify.sh"
echo " kubectl exec -n $KUBE_NS <pod> -- bash /tmp/db-verify.sh"
exit 1
fi
# ── Query helper ───────────────────────────────────────────────────────────────
run_sql() {
local sql="$1"
if [[ "$MODE" == "psql" ]]; then
PGPASSWORD="$DB_PASSWORD" psql \
-h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" \
-t -A -c "$sql" 2>/dev/null
else
kubectl exec -n "$KUBE_NS" "$KUBE_POD" -- \
psql -U "$DB_USER" -d "$DB_NAME" -t -A -c "$sql" 2>/dev/null
fi
}
# ── Header ─────────────────────────────────────────────────────────────────────
echo -e "${BOLD}NebulaOS DB Sync Verification${NC}"
if [[ "$MODE" == "psql" ]]; then
info "Mode : local psql"
info "Target : $DB_USER@$DB_HOST:$DB_PORT/$DB_NAME"
else
info "Mode : kubectl exec"
info "Pod : $KUBE_NS/$KUBE_POD"
info "DB : $DB_NAME (user: $DB_USER)"
fi
# ── Connectivity ───────────────────────────────────────────────────────────────
section "Connectivity"
if ! run_sql "SELECT 1" > /dev/null; then
echo -e "${RED}[ERROR]${NC} Cannot connect to database '$DB_NAME'."
exit 1
fi
ok "Connected to '$DB_NAME'"
# ── Parse SQL files → expected schema ─────────────────────────────────────────
section "Parsing SQL files"
SQL_FILES=("$BOOTSTRAP_SQL")
while IFS= read -r f; do SQL_FILES+=("$f"); done \
< <(find "$MIGRATIONS_DIR" -name "*.sql" 2>/dev/null | sort)
MIGRATION_COUNT=$(( ${#SQL_FILES[@]} - 1 ))
info "bootstrap.sql + $MIGRATION_COUNT migration file(s)"
# AWK: extract TABLE:<name> and COL:<table>.<col> from all SQL files.
# Handles:
# CREATE TABLE IF NOT EXISTS <name> ( ... );
# ALTER TABLE <name> ADD COLUMN IF NOT EXISTS <col> <type>;
# Column names always start with lowercase in our schema; SQL keywords
# (CONSTRAINT, PRIMARY, UNIQUE, etc.) are uppercase — used to skip them.
EXPECTED_RAW=$(awk '
FNR == 1 { alter_tbl = ""; in_create = 0 }
/^CREATE TABLE IF NOT EXISTS / {
tbl = $6
gsub(/"/, "", tbl); gsub(/\(/, "", tbl); gsub(/;/, "", tbl)
gsub(/public\./, "", tbl)
current_table = tbl
in_create = 1
print "T:" tbl
next
}
in_create && /^\);/ { in_create = 0; next }
in_create && /^[[:space:]]+[a-z_]/ {
col = $1
gsub(/,/, "", col); gsub(/"/, "", col)
if (col !~ /^(unique|check|constraint|primary|foreign|exclude)$/ && col != "")
print "C:" current_table "." col
next
}
/^ALTER TABLE / {
alter_tbl = $3
gsub(/"/, "", alter_tbl); gsub(/public\./, "", alter_tbl)
}
/ADD COLUMN IF NOT EXISTS / {
n = split($0, a, "ADD COLUMN IF NOT EXISTS ")
if (n < 2) next
col = a[2]
gsub(/^ +/, "", col)
split(col, b, " ")
col = b[1]
gsub(/,/, "", col); gsub(/;/, "", col); gsub(/"/, "", col)
if (alter_tbl != "" && col != "")
print "C:" alter_tbl "." col
}
' "${SQL_FILES[@]}" | sort -u)
EXPECTED_TABLES=$(echo "$EXPECTED_RAW" | grep '^T:' | sed 's/^T://')
EXPECTED_COLS=$(echo "$EXPECTED_RAW" | grep '^C:' | sed 's/^C://')
ET=$(echo "$EXPECTED_TABLES" | grep -c . || echo 0)
EC=$(echo "$EXPECTED_COLS" | grep -c . || echo 0)
info "Expected tables : $ET"
info "Expected col refs: $EC"
# ── Query live DB ──────────────────────────────────────────────────────────────
section "Querying live DB"
ACTUAL_TABLES=$(run_sql "
SELECT table_name FROM information_schema.tables
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
ORDER BY table_name;
")
ACTUAL_COLS=$(run_sql "
SELECT table_name || '.' || column_name
FROM information_schema.columns
WHERE table_schema = 'public'
ORDER BY table_name, ordinal_position;
")
AT=$(echo "$ACTUAL_TABLES" | grep -c . || true)
AC=$(echo "$ACTUAL_COLS" | grep -c . || true)
info "Actual tables : $AT"
info "Actual columns : $AC"
if [[ -z "$ACTUAL_TABLES" || "$AT" -lt 5 ]]; then
echo -e "${RED}[ERROR]${NC} DB tables query returned no/insufficient results ($AT rows)."
echo " Possible transient kubectl/psql failure. Re-run the script."
exit 1
fi
if [[ -z "$ACTUAL_COLS" || "$AC" -lt 50 ]]; then
echo -e "${RED}[ERROR]${NC} DB columns query returned no/insufficient results ($AC rows)."
echo " Possible transient kubectl/psql failure. Re-run the script."
exit 1
fi
# ── Table comparison ───────────────────────────────────────────────────────────
section "Tables"
while IFS= read -r tbl; do
[[ -z "$tbl" ]] && continue
if echo "$ACTUAL_TABLES" | grep -Fxq "$tbl"; then
ok "$tbl"
else
fail "$tbl ← NOT IN DB"
MISSING_TABLES+=("$tbl")
fi
done <<< "$EXPECTED_TABLES"
while IFS= read -r tbl; do
[[ -z "$tbl" ]] && continue
if ! echo "$EXPECTED_TABLES" | grep -Fxq "$tbl"; then
extra "$tbl (in DB, not in any SQL file)"
((EXTRA_COUNT++)) || true
fi
done <<< "$ACTUAL_TABLES"
# ── Column comparison ──────────────────────────────────────────────────────────
section "Missing columns (skipping tables already flagged above)"
FOUND_COL_ISSUES=0
while IFS= read -r col; do
[[ -z "$col" ]] && continue
tbl="${col%%.*}"
# Skip columns whose parent table is itself missing (already reported)
skip=false
for mt in "${MISSING_TABLES[@]:-__none__}"; do
[[ "$mt" == "$tbl" ]] && { skip=true; break; }
done
$skip && continue
if ! echo "$ACTUAL_COLS" | grep -Fxq "$col"; then
fail "$col ← MISSING"
MISSING_COLS+=("$col")
((FOUND_COL_ISSUES++)) || true
fi
done <<< "$EXPECTED_COLS"
[[ $FOUND_COL_ISSUES -eq 0 ]] && ok "All expected columns are present"
# ── Summary ────────────────────────────────────────────────────────────────────
section "Summary"
MT=${#MISSING_TABLES[@]}
MC=${#MISSING_COLS[@]}
TOTAL=$((MT + MC))
[[ $MT -gt 0 ]] && echo -e "${RED} Missing tables : $MT${NC}"
[[ $MC -gt 0 ]] && echo -e "${RED} Missing columns : $MC${NC}"
[[ $EXTRA_COUNT -gt 0 ]] && \
echo -e "${YELLOW} Unknown tables : $EXTRA_COUNT (in DB, not in SQL files — informational)${NC}"
if [[ $TOTAL -eq 0 ]]; then
echo -e "\n${GREEN}${BOLD} ✓ DB is in sync with all SQL files${NC}"
exit 0
else
echo -e "\n${RED}${BOLD} ✗ DB is OUT OF SYNC — $TOTAL issue(s) found${NC}"
if [[ $MT -gt 0 ]]; then
echo ""
echo " Tables to apply (check migrations dir for the source file):"
for t in "${MISSING_TABLES[@]}"; do
src=$(grep -rl "CREATE TABLE IF NOT EXISTS.*\b${t}\b" "$MIGRATIONS_DIR" 2>/dev/null | head -1 || true)
[[ -n "$src" ]] && echo " $(basename "$src")$t" || echo " bootstrap.sql → $t"
done
fi
echo ""
echo " Fix options:"
echo " ./db/db-migrate.sh --dry-run # full bootstrap preview"
echo " ./db/db-migrate.sh # apply bootstrap + all migrations"
exit 1
fi

View File

@@ -0,0 +1,26 @@
-- 036_sandbox_provider_config.sql
-- Seeds the active sandbox provider row in installation_provider_config.
-- config_encrypted is left NULL here — the admin UI (POST /setup/step/providers
-- or Admin Settings) is the correct place to store encrypted k8s config.
-- services.py reads provider_slug from this row; config falls back to env vars
-- (K8S_SANDBOX_NAMESPACE, K8S_SANDBOX_SERVICE_ACCOUNT, SANDBOX_IMAGE) until
-- an operator saves config via the UI.
UPDATE installation_provider_config
SET is_active = FALSE, updated_at = NOW()
WHERE category = 'sandbox' AND is_active = TRUE;
INSERT INTO installation_provider_config
(id, category, provider_slug, provider_name, config_encrypted,
is_active, notes, configured_by)
VALUES (
'pconf_' || lower(left(md5(random()::text), 20)),
'sandbox',
'k8s-job',
'K8s Job Sandbox',
NULL,
TRUE,
'Seeded by migration 036 — configure namespace/image/SA via Admin Settings',
'migration'
)
ON CONFLICT DO NOTHING;

View File

@@ -0,0 +1,17 @@
FROM python:3.11-slim
# Security: run as nobody (uid 65534), no shell for nobody in distroless style
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends curl ca-certificates && \
rm -rf /var/lib/apt/lists/* && \
groupadd -g 65534 nobody-group 2>/dev/null || true && \
useradd -u 65534 -g 65534 -s /sbin/nologin -M nobody-nebula 2>/dev/null || true
# Pre-install common plugin deps — add your plugin requirements here
RUN pip install --no-cache-dir requests httpx pydantic==2.* typing_extensions
USER 65534
WORKDIR /workspace
# Smoke test: verify Python works as nobody
RUN python3 -c "import sys; print('sandbox python ok:', sys.version)"

View File

@@ -5,14 +5,14 @@
# Prereqs: kubectl configured and pointing at your cluster, helm (for metrics only).
#
# Usage:
# ./scripts/setup_sandbox.sh [--harbor-host harbor.armco.dev] [--namespace nebula-sandbox]
# ./scripts/setup_sandbox.sh [--harbor-host harbor.armco.dev] [--namespace sandbox]
#
# Env overrides (or pass as --flag):
# HARBOR_HOST (default: harbor.armco.dev)
# SANDBOX_NAMESPACE (default: nebula-sandbox)
# SANDBOX_SA (default: nebula-sandbox)
# SANDBOX_NAMESPACE (default: sandbox)
# SANDBOX_SA (default: sandbox)
# NEBULA_NAMESPACE (default: products) — namespace where nebula-os backend runs
# NEBULA_SA (default: nebula-os) — ServiceAccount used by nebula-os pods
# NEBULA_SA (default: nebula) — ServiceAccount used by nebula-os pods
# SANDBOX_IMAGE_TAG (default: latest)
# SKIP_BUILD set to 1 to skip Docker build/push step
# SKIP_NETPOL set to 1 to skip NetworkPolicy (if CNI doesn't support it)
@@ -22,10 +22,10 @@ set -euo pipefail
# ── Defaults ──────────────────────────────────────────────────────────────────
HARBOR_HOST="${HARBOR_HOST:-harbor.armco.dev}"
SANDBOX_NAMESPACE="${SANDBOX_NAMESPACE:-nebula-sandbox}"
SANDBOX_SA="${SANDBOX_SA:-nebula-sandbox}"
SANDBOX_NAMESPACE="${SANDBOX_NAMESPACE:-sandbox}"
SANDBOX_SA="${SANDBOX_SA:-sandbox}"
NEBULA_NAMESPACE="${NEBULA_NAMESPACE:-products}"
NEBULA_SA="${NEBULA_SA:-nebula-os}"
NEBULA_SA="${NEBULA_SA:-nebula}"
SANDBOX_IMAGE_TAG="${SANDBOX_IMAGE_TAG:-latest}"
SKIP_BUILD="${SKIP_BUILD:-0}"
SKIP_NETPOL="${SKIP_NETPOL:-0}"
@@ -43,7 +43,7 @@ while [[ $# -gt 0 ]]; do
esac
done
SANDBOX_IMAGE="${HARBOR_HOST}/nebula/sandbox:${SANDBOX_IMAGE_TAG}"
SANDBOX_IMAGE="${HARBOR_HOST}/ci/nebula-sandbox:${SANDBOX_IMAGE_TAG}"
# ── Helpers ───────────────────────────────────────────────────────────────────
@@ -206,6 +206,7 @@ if [[ "${SKIP_NETPOL}" == "1" ]]; then
warn "Skipping NetworkPolicy (--skip-netpol set). Egress enforcement relies on application layer only."
else
info "Step 6/7 — NetworkPolicy (default deny-all egress from sandbox pods)..."
warn "Cluster CNI is Flannel VXLAN (k3s default) — NetworkPolicy objects will be created but are NOT enforced at the kernel level. Enforcement requires Calico/Cilium. Objects are applied now for future CNI upgrade readiness."
kubectl_apply "
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy

View File

@@ -514,13 +514,55 @@ async def build_container(config: Dict[str, Any]) -> ServiceContainer:
# ------------------------------------------------------------------
# 9c — Sandbox provider (via SandboxProviderFactory)
#
# SANDBOX_PROVIDER: "legacy" (default) | "docker" | "k8s-job"
# Factory handles slug normalisation, lifecycle, and fallback.
# Provider slug is read from installation_provider_config (DB) so
# admins can switch providers via the Settings UI without redeploying.
# DB value takes precedence; falls back to SANDBOX_PROVIDER env var,
# then "legacy". Per-provider config (namespace, image, SA) is read
# from config_encrypted when present, merged over env-var defaults.
# ------------------------------------------------------------------
from src.providers.sandbox_factory import SandboxProviderFactory
try:
_sb_row = await db.fetchrow(
"SELECT provider_slug, config_encrypted"
" FROM installation_provider_config"
" WHERE category = 'sandbox' AND is_active = true"
" LIMIT 1"
)
_db_sandbox_slug = _sb_row["provider_slug"] if _sb_row else None
_db_sandbox_cfg: Dict[str, Any] = {}
if _sb_row and _sb_row["config_encrypted"]:
try:
import json as _sbjson
from src.connectors.config_cipher import (
IntegrationConfigCipher as _SbCipher,
)
_raw_sb = _sb_row["config_encrypted"]
if isinstance(_raw_sb, str):
_raw_sb = _sbjson.loads(_raw_sb)
_db_sandbox_cfg = _SbCipher().decrypt(_raw_sb) or {}
except Exception:
_db_sandbox_cfg = {}
except Exception:
_db_sandbox_slug = None
_db_sandbox_cfg = {}
_sandbox_slug = _db_sandbox_slug or config.get("SANDBOX_PROVIDER", "legacy")
_sandbox_config = {**config, **_db_sandbox_cfg}
log.info("sandbox_provider_selected", {
"component": "core.services",
"operation": "build_container",
"entity_id": "system",
"metadata": {
"slug": _sandbox_slug,
"source": "db" if _db_sandbox_slug else "env",
},
})
sandbox_provider = await SandboxProviderFactory.build(
slug=config.get("SANDBOX_PROVIDER", "legacy"),
config=config,
slug=_sandbox_slug,
config=_sandbox_config,
event_bus=platform_event_bus,
network_control=network_control,
filesystem_jail=filesystem_jail,