fix: bridge-first model resolution — bypass 503 when extension connected

Backend (chat.py): - Add bridge_mode field to ChatRequest - Add bridge_required + bridge_messages fields to ChatResponse - When bridge_mode=true (or model set + no backend provider): skip LLM call, return assembled RAG+system_prompt messages - Backend never calls localhost — LLM call happens in browser Frontend (ChatWorkspace.tsx): - isBridgeActive = bridge.status === 'connected' (not model-gated) - effectiveModel chain: chatModel ?? bridge.selectedModel ?? bridgeModels[0] - Send bridge_mode:true when bridge connected - On bridge_required response: call localBridgeFetch with bridge_messages Resolution order (user-facing chat): 1. Browser bridge (extension installed + local model) — zero backend config 2. Offered AI / free tier (NEBULA_LLM_API_KEY in prod env) — TODO: set in prod 3. BYOAI (user key in Settings) 4. 503 client.ts: - ApiChatRequest.bridge_mode?: boolean - ApiChatResponse.bridge_required?: boolean - ApiChatResponse.bridge_messages?: [{role,content}[]]
2026-04-18 17:33:25 +05:30
parent 4d174c6f65
commit 92bc4dbcc2
3 changed files with 85 additions and 8 deletions
--- a/src/api/routers/chat.py
+++ b/src/api/routers/chat.py
@@ -440,6 +440,14 @@ class ChatRequest(BaseModel):
        default=None,
        description="Override model. Defaults to platform default.",
    )
+    bridge_mode: bool = Field(
+        default=False,
+        description=(
+            "When True the backend assembles RAG context + system prompt "
+            "and returns bridge_messages for the client to call the local bridge. "
+            "Automatically enabled when model is set and no backend provider is configured."
+        ),
+    )


 class ToolActionResult(BaseModel):
@@ -477,6 +485,9 @@ class ChatResponse(BaseModel):
    latency_ms: int
    correlation_id: str
    tool_actions: List[ToolAction] = Field(default_factory=list)
+    # Bridge mode: when set, frontend should call local bridge with these messages
+    bridge_required: bool = Field(default=False)
+    bridge_messages: Optional[List[Dict[str, Any]]] = Field(default=None)


 # ── Message persistence ───────────────────────────────────────────────────────
@@ -1880,7 +1891,12 @@ async def chat(
        db, body.session_id, "user", body.message, mode="nebula"
    )

-    if model_provider is None:
+    # Bridge mode: when a client model override is set but no backend provider
+    # is configured, fall through to RAG+context assembly and return the
+    # assembled messages for the client (browser extension) to call locally.
+    use_bridge_mode = body.bridge_mode or (body.model is not None and model_provider is None)
+
+    if model_provider is None and not use_bridge_mode:
        log.error("chat_no_model_provider", {
            "component": "api.chat",
            "operation": "chat",
@@ -1945,6 +1961,33 @@ async def chat(

    messages.append({"role": "user", "content": body.message})

+    # ── 3a. Bridge mode — return assembled messages to client ────────────────
+    if use_bridge_mode:
+        latency_ms = int((time.monotonic() - start_ms) * 1000)
+        model_name = body.model or "local"
+        log.info("chat_bridge_mode", {
+            "component": "api.chat",
+            "operation": "chat",
+            "entity_id": "nebula-assistant",
+            "correlation_id": cid,
+            "metadata": {
+                "model": model_name,
+                "chunks_used": chunks_used,
+                "message_count": len(messages),
+            },
+        })
+        return {
+            "reply": "",
+            "corpus_id": corpus_id,
+            "chunks_used": chunks_used,
+            "model_used": model_name,
+            "latency_ms": latency_ms,
+            "correlation_id": cid,
+            "tool_actions": [],
+            "bridge_required": True,
+            "bridge_messages": messages,
+        }
+
    # ── 3. LLM call (with tool-calling enabled) ─────────────────────────────
    model = body.model or os.getenv("NEBULA_LLM_DEFAULT_MODEL", "gpt-4o-mini")
    try:
--- a/webapp/src/api/client.ts
+++ b/webapp/src/api/client.ts
@@ -662,12 +662,14 @@ export const policiesApi = {
    page_size?: number
    policy_type?: string
    enabled_only?: boolean
+    agent_id?: string | null
  }) => {
    const qs = new URLSearchParams()
    if (params?.page)         qs.set('page',         String(params.page))
    if (params?.page_size)    qs.set('page_size',    String(params.page_size))
    if (params?.policy_type)  qs.set('policy_type',  params.policy_type)
    if (params?.enabled_only) qs.set('enabled_only', 'true')
+    if (params?.agent_id)     qs.set('agent_id',     params.agent_id)
    const q = qs.toString()
    return get<ApiPolicyList>(`/policies${q ? `?${q}` : ''}`)
  },
@@ -2548,6 +2550,7 @@ export interface ApiChatRequest {
  corpus_id?: string
  top_k?: number
  model?: string
+  bridge_mode?: boolean
 }

 export interface ApiChatToolAction {
@@ -2583,6 +2586,8 @@ export interface ApiChatResponse {
  latency_ms: number
  correlation_id: string
  tool_actions?: ApiChatToolAction[]
+  bridge_required?: boolean
+  bridge_messages?: Array<{ role: string; content: string }>
 }

 export interface ApiChatCorpus {
--- a/webapp/src/components/layout/ChatWorkspace.tsx
+++ b/webapp/src/components/layout/ChatWorkspace.tsx
@@ -17,7 +17,7 @@ import {
 } from '@/api/client'
 import { useWebSocket } from '@/hooks/useWebSocket'
 import { useLocalBridge } from '@/hooks/useLocalBridge'
-import { localBridgeListModelsDetailed, type BridgeModelInfo } from '@/lib/localBridge'
+import { localBridgeListModelsDetailed, localBridgeFetch, type BridgeModelInfo } from '@/lib/localBridge'
 import { formatRelativeTime } from '@/lib/utils'
 import { useShell } from '@/context/ShellContext'

@@ -966,9 +966,16 @@ export function ChatWorkspace() {
  const { activeSessionId, setActiveSessionId } = useShell()
  const bridge = useLocalBridge()
  const [bridgeModels, setBridgeModels] = useState<BridgeModelInfo[]>([])
-  // chatModel: null = use bridge.selectedModel or backend default; string = user override
+  // chatModel: null = use bridge.selectedModel or first available model
  const [chatModel, setChatModel] = useState<string | null>(null)
-  const effectiveModel = chatModel ?? (bridge.status === 'connected' ? bridge.selectedModel : null)
+
+  // Resolve model: explicit selection → bridge popup selection → first fetched model
+  const effectiveModel = chatModel
+    ?? (bridge.status === 'connected' ? bridge.selectedModel : null)
+    ?? (bridge.status === 'connected' && bridgeModels.length > 0 ? bridgeModels[0].id : null)
+
+  // Bridge is active when extension is connected — model auto-resolved above
+  const isBridgeActive = bridge.status === 'connected'

  // ── Fetch local models for chat model picker (via bridge, not raw fetch) ──────

@@ -1063,14 +1070,36 @@ export function ChatWorkspace() {
  }, [sessionId, qc, setActiveSessionId])

  const nebulaMut = useMutation({
-    mutationFn: ({ message, sid }: { message: string; sid?: string }) =>
-      chatApi.send({
+    mutationFn: async ({ message, sid }: { message: string; sid?: string }) => {
+      const resp = await chatApi.send({
        message,
        session_id: sid ?? undefined,
        history: history.slice(-10),
-        // Pass model override when a local model is selected
        model: effectiveModel ?? undefined,
-      }),
+        // Tell backend to return assembled messages instead of calling LLM
+        // when bridge is connected (backend has no access to localhost)
+        bridge_mode: isBridgeActive,
+      })
+
+      // When backend returns assembled RAG+context messages, call local bridge
+      if (resp.bridge_required && resp.bridge_messages && effectiveModel) {
+        const bridgeResult = await localBridgeFetch({
+          model: effectiveModel,
+          messages: resp.bridge_messages,
+          temperature: 0.4,
+          max_tokens: 2048,
+        })
+        return {
+          ...resp,
+          reply: bridgeResult.content,
+          model_used: bridgeResult.model || effectiveModel,
+          bridge_required: false,
+          bridge_messages: undefined,
+        }
+      }
+
+      return resp
+    },
    onSuccess: (resp, { message }) => {
      // Extract tryItData from tool_actions if present
      const tryItAction = resp.tool_actions?.find(ta => ta.tool === 'try_it_out')