diff --git a/src/api/routers/chat.py b/src/api/routers/chat.py
index ae4a7dd4..af70b5b3 100644
--- a/src/api/routers/chat.py
+++ b/src/api/routers/chat.py
@@ -440,6 +440,14 @@ class ChatRequest(BaseModel):
         default=None,
         description="Override model. Defaults to platform default.",
     )
+    bridge_mode: bool = Field(
+        default=False,
+        description=(
+            "When True the backend assembles RAG context + system prompt "
+            "and returns bridge_messages for the client to call the local bridge. "
+            "Automatically enabled when model is set and no backend provider is configured."
+        ),
+    )
 
 
 class ToolActionResult(BaseModel):
@@ -477,6 +485,9 @@ class ChatResponse(BaseModel):
     latency_ms: int
     correlation_id: str
     tool_actions: List[ToolAction] = Field(default_factory=list)
+    # Bridge mode: when set, frontend should call local bridge with these messages
+    bridge_required: bool = Field(default=False)
+    bridge_messages: Optional[List[Dict[str, Any]]] = Field(default=None)
 
 
 # ── Message persistence ───────────────────────────────────────────────────────
@@ -1880,7 +1891,12 @@ async def chat(
         db, body.session_id, "user", body.message, mode="nebula"
     )
 
-    if model_provider is None:
+    # Bridge mode: when a client model override is set but no backend provider
+    # is configured, fall through to RAG+context assembly and return the
+    # assembled messages for the client (browser extension) to call locally.
+    use_bridge_mode = body.bridge_mode or (body.model is not None and model_provider is None)
+
+    if model_provider is None and not use_bridge_mode:
         log.error("chat_no_model_provider", {
             "component": "api.chat",
             "operation": "chat",
@@ -1945,6 +1961,33 @@ async def chat(
 
     messages.append({"role": "user", "content": body.message})
 
+    # ── 3a. Bridge mode — return assembled messages to client ────────────────
+    if use_bridge_mode:
+        latency_ms = int((time.monotonic() - start_ms) * 1000)
+        model_name = body.model or "local"
+        log.info("chat_bridge_mode", {
+            "component": "api.chat",
+            "operation": "chat",
+            "entity_id": "nebula-assistant",
+            "correlation_id": cid,
+            "metadata": {
+                "model": model_name,
+                "chunks_used": chunks_used,
+                "message_count": len(messages),
+            },
+        })
+        return {
+            "reply": "",
+            "corpus_id": corpus_id,
+            "chunks_used": chunks_used,
+            "model_used": model_name,
+            "latency_ms": latency_ms,
+            "correlation_id": cid,
+            "tool_actions": [],
+            "bridge_required": True,
+            "bridge_messages": messages,
+        }
+
     # ── 3. LLM call (with tool-calling enabled) ─────────────────────────────
     model = body.model or os.getenv("NEBULA_LLM_DEFAULT_MODEL", "gpt-4o-mini")
     try:
diff --git a/webapp/src/api/client.ts b/webapp/src/api/client.ts
index a5c6b6bf..df712e7b 100644
--- a/webapp/src/api/client.ts
+++ b/webapp/src/api/client.ts
@@ -662,12 +662,14 @@ export const policiesApi = {
     page_size?: number
     policy_type?: string
     enabled_only?: boolean
+    agent_id?: string | null
   }) => {
     const qs = new URLSearchParams()
     if (params?.page)         qs.set('page',         String(params.page))
     if (params?.page_size)    qs.set('page_size',    String(params.page_size))
     if (params?.policy_type)  qs.set('policy_type',  params.policy_type)
     if (params?.enabled_only) qs.set('enabled_only', 'true')
+    if (params?.agent_id)     qs.set('agent_id',     params.agent_id)
     const q = qs.toString()
     return get<ApiPolicyList>(`/policies${q ? `?${q}` : ''}`)
   },
@@ -2548,6 +2550,7 @@ export interface ApiChatRequest {
   corpus_id?: string
   top_k?: number
   model?: string
+  bridge_mode?: boolean
 }
 
 export interface ApiChatToolAction {
@@ -2583,6 +2586,8 @@ export interface ApiChatResponse {
   latency_ms: number
   correlation_id: string
   tool_actions?: ApiChatToolAction[]
+  bridge_required?: boolean
+  bridge_messages?: Array<{ role: string; content: string }>
 }
 
 export interface ApiChatCorpus {
diff --git a/webapp/src/components/layout/ChatWorkspace.tsx b/webapp/src/components/layout/ChatWorkspace.tsx
index 6ccd09d4..e1084174 100644
--- a/webapp/src/components/layout/ChatWorkspace.tsx
+++ b/webapp/src/components/layout/ChatWorkspace.tsx
@@ -17,7 +17,7 @@ import {
 } from '@/api/client'
 import { useWebSocket } from '@/hooks/useWebSocket'
 import { useLocalBridge } from '@/hooks/useLocalBridge'
-import { localBridgeListModelsDetailed, type BridgeModelInfo } from '@/lib/localBridge'
+import { localBridgeListModelsDetailed, localBridgeFetch, type BridgeModelInfo } from '@/lib/localBridge'
 import { formatRelativeTime } from '@/lib/utils'
 import { useShell } from '@/context/ShellContext'
 
@@ -966,9 +966,16 @@ export function ChatWorkspace() {
   const { activeSessionId, setActiveSessionId } = useShell()
   const bridge = useLocalBridge()
   const [bridgeModels, setBridgeModels] = useState<BridgeModelInfo[]>([])
-  // chatModel: null = use bridge.selectedModel or backend default; string = user override
+  // chatModel: null = use bridge.selectedModel or first available model
   const [chatModel, setChatModel] = useState<string | null>(null)
-  const effectiveModel = chatModel ?? (bridge.status === 'connected' ? bridge.selectedModel : null)
+
+  // Resolve model: explicit selection → bridge popup selection → first fetched model
+  const effectiveModel = chatModel
+    ?? (bridge.status === 'connected' ? bridge.selectedModel : null)
+    ?? (bridge.status === 'connected' && bridgeModels.length > 0 ? bridgeModels[0].id : null)
+
+  // Bridge is active when extension is connected — model auto-resolved above
+  const isBridgeActive = bridge.status === 'connected'
 
   // ── Fetch local models for chat model picker (via bridge, not raw fetch) ──────
 
@@ -1063,14 +1070,36 @@ export function ChatWorkspace() {
   }, [sessionId, qc, setActiveSessionId])
 
   const nebulaMut = useMutation({
-    mutationFn: ({ message, sid }: { message: string; sid?: string }) =>
-      chatApi.send({
+    mutationFn: async ({ message, sid }: { message: string; sid?: string }) => {
+      const resp = await chatApi.send({
         message,
         session_id: sid ?? undefined,
         history: history.slice(-10),
-        // Pass model override when a local model is selected
         model: effectiveModel ?? undefined,
-      }),
+        // Tell backend to return assembled messages instead of calling LLM
+        // when bridge is connected (backend has no access to localhost)
+        bridge_mode: isBridgeActive,
+      })
+
+      // When backend returns assembled RAG+context messages, call local bridge
+      if (resp.bridge_required && resp.bridge_messages && effectiveModel) {
+        const bridgeResult = await localBridgeFetch({
+          model: effectiveModel,
+          messages: resp.bridge_messages,
+          temperature: 0.4,
+          max_tokens: 2048,
+        })
+        return {
+          ...resp,
+          reply: bridgeResult.content,
+          model_used: bridgeResult.model || effectiveModel,
+          bridge_required: false,
+          bridge_messages: undefined,
+        }
+      }
+
+      return resp
+    },
     onSuccess: (resp, { message }) => {
       // Extract tryItData from tool_actions if present
       const tryItAction = resp.tool_actions?.find(ta => ta.tool === 'try_it_out')