fix: db-verify.sh bugs + bootstrap pgvector optional + migration idempotency
All checks were successful
Stuffle/nebula-os/pipeline/head This commit looks good
All checks were successful
Stuffle/nebula-os/pipeline/head This commit looks good
db-verify.sh: - Fix set -o pipefail + grep -q SIGPIPE false negatives: write ACTUAL_COLS/ ACTUAL_TABLES to temp files, grep files directly instead of echo|grep - Fix AWK stale alter_tbl bug: DO $$ blocks leaked table names into ADD COLUMN matches; now tracks in_do_block state and resets alter_tbl on new statements bootstrap.sql: - Make pgvector extension + vector(1536) columns fully optional via DO $$ guards so bootstrap applies cleanly on postgres without pgvector installed - Add ALTER TABLE ADD COLUMN IF NOT EXISTS guards for tasks columns that CREATE TABLE IF NOT EXISTS silently skips on pre-existing tables 034_marketplace_and_source_model.sql: - Add ALTER TABLE guard for is_malicious + scan_notes columns
This commit is contained in:
@@ -7,7 +7,11 @@
|
||||
-- Enable required extensions
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
|
||||
CREATE EXTENSION IF NOT EXISTS "vector"; -- pgvector for semantic memory embeddings
|
||||
DO $$ BEGIN
|
||||
CREATE EXTENSION IF NOT EXISTS "vector";
|
||||
EXCEPTION WHEN OTHERS THEN
|
||||
RAISE NOTICE 'pgvector not available — vector columns will be skipped. Install pgvector for semantic search.';
|
||||
END $$; -- pgvector for semantic memory embeddings (optional)
|
||||
|
||||
-- ============================================================================
|
||||
-- SYSTEM LOGS TABLE
|
||||
@@ -243,8 +247,16 @@ CREATE INDEX IF NOT EXISTS idx_tasks_priority ON tasks(priority DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_tasks_created_at ON tasks(created_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_tasks_task_type ON tasks(task_type);
|
||||
|
||||
-- User attribution columns — allow quota deduction and identity surfacing without a join
|
||||
-- Ensure all tasks columns exist (safe to re-run — idempotent via IF NOT EXISTS)
|
||||
ALTER TABLE tasks
|
||||
ADD COLUMN IF NOT EXISTS task_type VARCHAR(100) NOT NULL DEFAULT 'workflow_step',
|
||||
ADD COLUMN IF NOT EXISTS input JSONB NOT NULL DEFAULT '{}',
|
||||
ADD COLUMN IF NOT EXISTS output JSONB,
|
||||
ADD COLUMN IF NOT EXISTS retry_count INTEGER NOT NULL DEFAULT 0,
|
||||
ADD COLUMN IF NOT EXISTS max_retries INTEGER NOT NULL DEFAULT 3,
|
||||
ADD COLUMN IF NOT EXISTS timeout_seconds INTEGER NOT NULL DEFAULT 300,
|
||||
ADD COLUMN IF NOT EXISTS idempotency_key VARCHAR(255),
|
||||
ADD COLUMN IF NOT EXISTS started_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS created_by VARCHAR(255) DEFAULT NULL,
|
||||
ADD COLUMN IF NOT EXISTS user_email VARCHAR(255) DEFAULT NULL,
|
||||
ADD COLUMN IF NOT EXISTS user_display_name VARCHAR(255) DEFAULT NULL;
|
||||
@@ -1146,7 +1158,7 @@ CREATE TABLE IF NOT EXISTS agent_memories (
|
||||
is_promoted BOOLEAN NOT NULL DEFAULT false,
|
||||
promoted_at TIMESTAMPTZ,
|
||||
promoted_by TEXT,
|
||||
embedding vector(1536), -- text-embedding-3-small / ada-002 dimensions
|
||||
-- embedding column added conditionally below (requires pgvector)
|
||||
name TEXT,
|
||||
description TEXT,
|
||||
is_pristine BOOLEAN NOT NULL DEFAULT false,
|
||||
@@ -1216,6 +1228,14 @@ CREATE INDEX IF NOT EXISTS idx_agent_memories_parent
|
||||
ON agent_memories(parent_id)
|
||||
WHERE parent_id IS NOT NULL;
|
||||
|
||||
-- Add embedding column to agent_memories when pgvector is available
|
||||
DO $$ BEGIN
|
||||
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
|
||||
ALTER TABLE agent_memories ADD COLUMN IF NOT EXISTS embedding vector(1536);
|
||||
END IF;
|
||||
EXCEPTION WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
-- HNSW index for fast approximate nearest-neighbour semantic search
|
||||
-- Only created when pgvector is available (will silently skip if column missing)
|
||||
DO $$ BEGIN
|
||||
@@ -3276,7 +3296,7 @@ CREATE TABLE IF NOT EXISTS rag_chunks (
|
||||
corpus_id VARCHAR(80) NOT NULL REFERENCES rag_corpora(corpus_id) ON DELETE CASCADE,
|
||||
source_id VARCHAR(80) REFERENCES rag_sources(source_id) ON DELETE CASCADE,
|
||||
content TEXT NOT NULL,
|
||||
embedding vector(1536),
|
||||
-- embedding column added conditionally below (requires pgvector)
|
||||
chunk_index INTEGER NOT NULL DEFAULT 0,
|
||||
token_count INTEGER,
|
||||
metadata JSONB NOT NULL DEFAULT '{}',
|
||||
@@ -3287,6 +3307,14 @@ CREATE INDEX IF NOT EXISTS idx_rag_chunks_corpus ON rag_chunks(corpus_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_rag_chunks_source ON rag_chunks(source_id) WHERE source_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_rag_chunks_created_at ON rag_chunks(created_at DESC);
|
||||
|
||||
-- Add embedding column to rag_chunks when pgvector is available
|
||||
DO $$ BEGIN
|
||||
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
|
||||
ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS embedding vector(1536);
|
||||
END IF;
|
||||
EXCEPTION WHEN OTHERS THEN NULL;
|
||||
END $$;
|
||||
|
||||
-- ============================================================================
|
||||
-- CHAT SESSIONS & MESSAGES
|
||||
-- ============================================================================
|
||||
|
||||
@@ -130,9 +130,15 @@ info "bootstrap.sql + $MIGRATION_COUNT migration file(s)"
|
||||
# Column names always start with lowercase in our schema; SQL keywords
|
||||
# (CONSTRAINT, PRIMARY, UNIQUE, etc.) are uppercase — used to skip them.
|
||||
EXPECTED_RAW=$(awk '
|
||||
FNR == 1 { alter_tbl = ""; in_create = 0 }
|
||||
FNR == 1 { alter_tbl = ""; in_create = 0; in_do_block = 0 }
|
||||
|
||||
# Track DO $$ blocks — any ALTER TABLE inside them is procedural, not schema DDL
|
||||
/^DO \$\$/ || /^DO\$\$/ { in_do_block = 1 }
|
||||
in_do_block && /END \$\$/ { in_do_block = 0; alter_tbl = ""; next }
|
||||
in_do_block { next }
|
||||
|
||||
/^CREATE TABLE IF NOT EXISTS / {
|
||||
alter_tbl = "" # reset stale alter context
|
||||
tbl = $6
|
||||
gsub(/"/, "", tbl); gsub(/\(/, "", tbl); gsub(/;/, "", tbl)
|
||||
gsub(/public\./, "", tbl)
|
||||
@@ -152,12 +158,19 @@ EXPECTED_RAW=$(awk '
|
||||
next
|
||||
}
|
||||
|
||||
# Top-level statements that are not ALTER TABLE reset the alter context
|
||||
/^(CREATE|INSERT|UPDATE|DELETE|DROP|COMMENT|GRANT|REVOKE|SET|SELECT|WITH|DO) / {
|
||||
alter_tbl = ""
|
||||
}
|
||||
|
||||
/^ALTER TABLE / {
|
||||
alter_tbl = $3
|
||||
gsub(/"/, "", alter_tbl); gsub(/public\./, "", alter_tbl)
|
||||
}
|
||||
|
||||
# Only match ADD COLUMN when alter_tbl is set (i.e., inside a real ALTER TABLE)
|
||||
/ADD COLUMN IF NOT EXISTS / {
|
||||
if (alter_tbl == "") next
|
||||
n = split($0, a, "ADD COLUMN IF NOT EXISTS ")
|
||||
if (n < 2) next
|
||||
col = a[2]
|
||||
@@ -165,9 +178,12 @@ EXPECTED_RAW=$(awk '
|
||||
split(col, b, " ")
|
||||
col = b[1]
|
||||
gsub(/,/, "", col); gsub(/;/, "", col); gsub(/"/, "", col)
|
||||
if (alter_tbl != "" && col != "")
|
||||
if (col != "")
|
||||
print "C:" alter_tbl "." col
|
||||
}
|
||||
|
||||
# Reset alter_tbl at end of statement (line ends with ;)
|
||||
/;[[:space:]]*$/ && !/^[[:space:]]/ { if ($1 != "ALTER") alter_tbl = "" }
|
||||
' "${SQL_FILES[@]}" | sort -u)
|
||||
|
||||
EXPECTED_TABLES=$(echo "$EXPECTED_RAW" | grep '^T:' | sed 's/^T://')
|
||||
@@ -210,12 +226,19 @@ if [[ -z "$ACTUAL_COLS" || "$AC" -lt 50 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Write to temp files to avoid set -o pipefail + grep -q SIGPIPE false negatives.
|
||||
# When grep -q finds a match and exits early, echo gets SIGPIPE (exit 141).
|
||||
# pipefail then returns 141 (non-zero) even though the match was found.
|
||||
_TMP_TABLES=$(mktemp) && echo "$ACTUAL_TABLES" > "$_TMP_TABLES"
|
||||
_TMP_COLS=$(mktemp) && echo "$ACTUAL_COLS" > "$_TMP_COLS"
|
||||
trap 'rm -f "$_TMP_TABLES" "$_TMP_COLS"' EXIT
|
||||
|
||||
# ── Table comparison ───────────────────────────────────────────────────────────
|
||||
section "Tables"
|
||||
|
||||
while IFS= read -r tbl; do
|
||||
[[ -z "$tbl" ]] && continue
|
||||
if echo "$ACTUAL_TABLES" | grep -Fxq "$tbl"; then
|
||||
if grep -Fxq "$tbl" "$_TMP_TABLES"; then
|
||||
ok "$tbl"
|
||||
else
|
||||
fail "$tbl ← NOT IN DB"
|
||||
@@ -225,7 +248,7 @@ done <<< "$EXPECTED_TABLES"
|
||||
|
||||
while IFS= read -r tbl; do
|
||||
[[ -z "$tbl" ]] && continue
|
||||
if ! echo "$EXPECTED_TABLES" | grep -Fxq "$tbl"; then
|
||||
if ! grep -Fxq "$tbl" <<< "$EXPECTED_TABLES"; then
|
||||
extra "$tbl (in DB, not in any SQL file)"
|
||||
((EXTRA_COUNT++)) || true
|
||||
fi
|
||||
@@ -245,7 +268,7 @@ while IFS= read -r col; do
|
||||
done
|
||||
$skip && continue
|
||||
|
||||
if ! echo "$ACTUAL_COLS" | grep -Fxq "$col"; then
|
||||
if ! grep -Fxq "$col" "$_TMP_COLS"; then
|
||||
fail "$col ← MISSING"
|
||||
MISSING_COLS+=("$col")
|
||||
((FOUND_COL_ISSUES++)) || true
|
||||
|
||||
@@ -106,3 +106,8 @@ CREATE TABLE IF NOT EXISTS benchmark_promotion_log (
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_benchmark_promotion_log_agent_id
|
||||
ON benchmark_promotion_log(agent_id);
|
||||
|
||||
-- Idempotent guards: add columns that CREATE TABLE IF NOT EXISTS skips on existing tables
|
||||
ALTER TABLE marketplace_agents
|
||||
ADD COLUMN IF NOT EXISTS is_malicious BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
ADD COLUMN IF NOT EXISTS scan_notes TEXT;
|
||||
|
||||
Reference in New Issue
Block a user