fix: db-verify.sh bugs + bootstrap pgvector optional + migration idempotency
All checks were successful
Stuffle/nebula-os/pipeline/head This commit looks good

db-verify.sh:
- Fix set -o pipefail + grep -q SIGPIPE false negatives: write ACTUAL_COLS/
  ACTUAL_TABLES to temp files, grep files directly instead of echo|grep
- Fix AWK stale alter_tbl bug: DO $$ blocks leaked table names into ADD COLUMN
  matches; now tracks in_do_block state and resets alter_tbl on new statements

bootstrap.sql:
- Make pgvector extension + vector(1536) columns fully optional via DO $$ guards
  so bootstrap applies cleanly on postgres without pgvector installed
- Add ALTER TABLE ADD COLUMN IF NOT EXISTS guards for tasks columns that
  CREATE TABLE IF NOT EXISTS silently skips on pre-existing tables

034_marketplace_and_source_model.sql:
- Add ALTER TABLE guard for is_malicious + scan_notes columns
This commit is contained in:
2026-04-21 09:37:45 +05:30
parent 4748e74311
commit fdf04efbad
3 changed files with 65 additions and 9 deletions

View File

@@ -7,7 +7,11 @@
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
CREATE EXTENSION IF NOT EXISTS "vector"; -- pgvector for semantic memory embeddings
DO $$ BEGIN
CREATE EXTENSION IF NOT EXISTS "vector";
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'pgvector not available — vector columns will be skipped. Install pgvector for semantic search.';
END $$; -- pgvector for semantic memory embeddings (optional)
-- ============================================================================
-- SYSTEM LOGS TABLE
@@ -243,8 +247,16 @@ CREATE INDEX IF NOT EXISTS idx_tasks_priority ON tasks(priority DESC);
CREATE INDEX IF NOT EXISTS idx_tasks_created_at ON tasks(created_at DESC);
CREATE INDEX IF NOT EXISTS idx_tasks_task_type ON tasks(task_type);
-- User attribution columns — allow quota deduction and identity surfacing without a join
-- Ensure all tasks columns exist (safe to re-run — idempotent via IF NOT EXISTS)
ALTER TABLE tasks
ADD COLUMN IF NOT EXISTS task_type VARCHAR(100) NOT NULL DEFAULT 'workflow_step',
ADD COLUMN IF NOT EXISTS input JSONB NOT NULL DEFAULT '{}',
ADD COLUMN IF NOT EXISTS output JSONB,
ADD COLUMN IF NOT EXISTS retry_count INTEGER NOT NULL DEFAULT 0,
ADD COLUMN IF NOT EXISTS max_retries INTEGER NOT NULL DEFAULT 3,
ADD COLUMN IF NOT EXISTS timeout_seconds INTEGER NOT NULL DEFAULT 300,
ADD COLUMN IF NOT EXISTS idempotency_key VARCHAR(255),
ADD COLUMN IF NOT EXISTS started_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS created_by VARCHAR(255) DEFAULT NULL,
ADD COLUMN IF NOT EXISTS user_email VARCHAR(255) DEFAULT NULL,
ADD COLUMN IF NOT EXISTS user_display_name VARCHAR(255) DEFAULT NULL;
@@ -1146,7 +1158,7 @@ CREATE TABLE IF NOT EXISTS agent_memories (
is_promoted BOOLEAN NOT NULL DEFAULT false,
promoted_at TIMESTAMPTZ,
promoted_by TEXT,
embedding vector(1536), -- text-embedding-3-small / ada-002 dimensions
-- embedding column added conditionally below (requires pgvector)
name TEXT,
description TEXT,
is_pristine BOOLEAN NOT NULL DEFAULT false,
@@ -1216,6 +1228,14 @@ CREATE INDEX IF NOT EXISTS idx_agent_memories_parent
ON agent_memories(parent_id)
WHERE parent_id IS NOT NULL;
-- Add embedding column to agent_memories when pgvector is available
DO $$ BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
ALTER TABLE agent_memories ADD COLUMN IF NOT EXISTS embedding vector(1536);
END IF;
EXCEPTION WHEN OTHERS THEN NULL;
END $$;
-- HNSW index for fast approximate nearest-neighbour semantic search
-- Only created when pgvector is available (will silently skip if column missing)
DO $$ BEGIN
@@ -3276,7 +3296,7 @@ CREATE TABLE IF NOT EXISTS rag_chunks (
corpus_id VARCHAR(80) NOT NULL REFERENCES rag_corpora(corpus_id) ON DELETE CASCADE,
source_id VARCHAR(80) REFERENCES rag_sources(source_id) ON DELETE CASCADE,
content TEXT NOT NULL,
embedding vector(1536),
-- embedding column added conditionally below (requires pgvector)
chunk_index INTEGER NOT NULL DEFAULT 0,
token_count INTEGER,
metadata JSONB NOT NULL DEFAULT '{}',
@@ -3287,6 +3307,14 @@ CREATE INDEX IF NOT EXISTS idx_rag_chunks_corpus ON rag_chunks(corpus_id);
CREATE INDEX IF NOT EXISTS idx_rag_chunks_source ON rag_chunks(source_id) WHERE source_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_rag_chunks_created_at ON rag_chunks(created_at DESC);
-- Add embedding column to rag_chunks when pgvector is available
DO $$ BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
ALTER TABLE rag_chunks ADD COLUMN IF NOT EXISTS embedding vector(1536);
END IF;
EXCEPTION WHEN OTHERS THEN NULL;
END $$;
-- ============================================================================
-- CHAT SESSIONS & MESSAGES
-- ============================================================================

View File

@@ -130,9 +130,15 @@ info "bootstrap.sql + $MIGRATION_COUNT migration file(s)"
# Column names always start with lowercase in our schema; SQL keywords
# (CONSTRAINT, PRIMARY, UNIQUE, etc.) are uppercase — used to skip them.
EXPECTED_RAW=$(awk '
FNR == 1 { alter_tbl = ""; in_create = 0 }
FNR == 1 { alter_tbl = ""; in_create = 0; in_do_block = 0 }
# Track DO $$ blocks — any ALTER TABLE inside them is procedural, not schema DDL
/^DO \$\$/ || /^DO\$\$/ { in_do_block = 1 }
in_do_block && /END \$\$/ { in_do_block = 0; alter_tbl = ""; next }
in_do_block { next }
/^CREATE TABLE IF NOT EXISTS / {
alter_tbl = "" # reset stale alter context
tbl = $6
gsub(/"/, "", tbl); gsub(/\(/, "", tbl); gsub(/;/, "", tbl)
gsub(/public\./, "", tbl)
@@ -152,12 +158,19 @@ EXPECTED_RAW=$(awk '
next
}
# Top-level statements that are not ALTER TABLE reset the alter context
/^(CREATE|INSERT|UPDATE|DELETE|DROP|COMMENT|GRANT|REVOKE|SET|SELECT|WITH|DO) / {
alter_tbl = ""
}
/^ALTER TABLE / {
alter_tbl = $3
gsub(/"/, "", alter_tbl); gsub(/public\./, "", alter_tbl)
}
# Only match ADD COLUMN when alter_tbl is set (i.e., inside a real ALTER TABLE)
/ADD COLUMN IF NOT EXISTS / {
if (alter_tbl == "") next
n = split($0, a, "ADD COLUMN IF NOT EXISTS ")
if (n < 2) next
col = a[2]
@@ -165,9 +178,12 @@ EXPECTED_RAW=$(awk '
split(col, b, " ")
col = b[1]
gsub(/,/, "", col); gsub(/;/, "", col); gsub(/"/, "", col)
if (alter_tbl != "" && col != "")
if (col != "")
print "C:" alter_tbl "." col
}
# Reset alter_tbl at end of statement (line ends with ;)
/;[[:space:]]*$/ && !/^[[:space:]]/ { if ($1 != "ALTER") alter_tbl = "" }
' "${SQL_FILES[@]}" | sort -u)
EXPECTED_TABLES=$(echo "$EXPECTED_RAW" | grep '^T:' | sed 's/^T://')
@@ -210,12 +226,19 @@ if [[ -z "$ACTUAL_COLS" || "$AC" -lt 50 ]]; then
exit 1
fi
# Write to temp files to avoid set -o pipefail + grep -q SIGPIPE false negatives.
# When grep -q finds a match and exits early, echo gets SIGPIPE (exit 141).
# pipefail then returns 141 (non-zero) even though the match was found.
_TMP_TABLES=$(mktemp) && echo "$ACTUAL_TABLES" > "$_TMP_TABLES"
_TMP_COLS=$(mktemp) && echo "$ACTUAL_COLS" > "$_TMP_COLS"
trap 'rm -f "$_TMP_TABLES" "$_TMP_COLS"' EXIT
# ── Table comparison ───────────────────────────────────────────────────────────
section "Tables"
while IFS= read -r tbl; do
[[ -z "$tbl" ]] && continue
if echo "$ACTUAL_TABLES" | grep -Fxq "$tbl"; then
if grep -Fxq "$tbl" "$_TMP_TABLES"; then
ok "$tbl"
else
fail "$tbl ← NOT IN DB"
@@ -225,7 +248,7 @@ done <<< "$EXPECTED_TABLES"
while IFS= read -r tbl; do
[[ -z "$tbl" ]] && continue
if ! echo "$EXPECTED_TABLES" | grep -Fxq "$tbl"; then
if ! grep -Fxq "$tbl" <<< "$EXPECTED_TABLES"; then
extra "$tbl (in DB, not in any SQL file)"
((EXTRA_COUNT++)) || true
fi
@@ -245,7 +268,7 @@ while IFS= read -r col; do
done
$skip && continue
if ! echo "$ACTUAL_COLS" | grep -Fxq "$col"; then
if ! grep -Fxq "$col" "$_TMP_COLS"; then
fail "$col ← MISSING"
MISSING_COLS+=("$col")
((FOUND_COL_ISSUES++)) || true

View File

@@ -106,3 +106,8 @@ CREATE TABLE IF NOT EXISTS benchmark_promotion_log (
CREATE INDEX IF NOT EXISTS idx_benchmark_promotion_log_agent_id
ON benchmark_promotion_log(agent_id);
-- Idempotent guards: add columns that CREATE TABLE IF NOT EXISTS skips on existing tables
ALTER TABLE marketplace_agents
ADD COLUMN IF NOT EXISTS is_malicious BOOLEAN NOT NULL DEFAULT FALSE,
ADD COLUMN IF NOT EXISTS scan_notes TEXT;