diff --git a/src/api/routers/chat.py b/src/api/routers/chat.py index ae4a7dd4..af70b5b3 100644 --- a/src/api/routers/chat.py +++ b/src/api/routers/chat.py @@ -440,6 +440,14 @@ class ChatRequest(BaseModel): default=None, description="Override model. Defaults to platform default.", ) + bridge_mode: bool = Field( + default=False, + description=( + "When True the backend assembles RAG context + system prompt " + "and returns bridge_messages for the client to call the local bridge. " + "Automatically enabled when model is set and no backend provider is configured." + ), + ) class ToolActionResult(BaseModel): @@ -477,6 +485,9 @@ class ChatResponse(BaseModel): latency_ms: int correlation_id: str tool_actions: List[ToolAction] = Field(default_factory=list) + # Bridge mode: when set, frontend should call local bridge with these messages + bridge_required: bool = Field(default=False) + bridge_messages: Optional[List[Dict[str, Any]]] = Field(default=None) # ── Message persistence ─────────────────────────────────────────────────────── @@ -1880,7 +1891,12 @@ async def chat( db, body.session_id, "user", body.message, mode="nebula" ) - if model_provider is None: + # Bridge mode: when a client model override is set but no backend provider + # is configured, fall through to RAG+context assembly and return the + # assembled messages for the client (browser extension) to call locally. + use_bridge_mode = body.bridge_mode or (body.model is not None and model_provider is None) + + if model_provider is None and not use_bridge_mode: log.error("chat_no_model_provider", { "component": "api.chat", "operation": "chat", @@ -1945,6 +1961,33 @@ async def chat( messages.append({"role": "user", "content": body.message}) + # ── 3a. Bridge mode — return assembled messages to client ──────────────── + if use_bridge_mode: + latency_ms = int((time.monotonic() - start_ms) * 1000) + model_name = body.model or "local" + log.info("chat_bridge_mode", { + "component": "api.chat", + "operation": "chat", + "entity_id": "nebula-assistant", + "correlation_id": cid, + "metadata": { + "model": model_name, + "chunks_used": chunks_used, + "message_count": len(messages), + }, + }) + return { + "reply": "", + "corpus_id": corpus_id, + "chunks_used": chunks_used, + "model_used": model_name, + "latency_ms": latency_ms, + "correlation_id": cid, + "tool_actions": [], + "bridge_required": True, + "bridge_messages": messages, + } + # ── 3. LLM call (with tool-calling enabled) ───────────────────────────── model = body.model or os.getenv("NEBULA_LLM_DEFAULT_MODEL", "gpt-4o-mini") try: diff --git a/webapp/src/api/client.ts b/webapp/src/api/client.ts index a5c6b6bf..df712e7b 100644 --- a/webapp/src/api/client.ts +++ b/webapp/src/api/client.ts @@ -662,12 +662,14 @@ export const policiesApi = { page_size?: number policy_type?: string enabled_only?: boolean + agent_id?: string | null }) => { const qs = new URLSearchParams() if (params?.page) qs.set('page', String(params.page)) if (params?.page_size) qs.set('page_size', String(params.page_size)) if (params?.policy_type) qs.set('policy_type', params.policy_type) if (params?.enabled_only) qs.set('enabled_only', 'true') + if (params?.agent_id) qs.set('agent_id', params.agent_id) const q = qs.toString() return get(`/policies${q ? `?${q}` : ''}`) }, @@ -2548,6 +2550,7 @@ export interface ApiChatRequest { corpus_id?: string top_k?: number model?: string + bridge_mode?: boolean } export interface ApiChatToolAction { @@ -2583,6 +2586,8 @@ export interface ApiChatResponse { latency_ms: number correlation_id: string tool_actions?: ApiChatToolAction[] + bridge_required?: boolean + bridge_messages?: Array<{ role: string; content: string }> } export interface ApiChatCorpus { diff --git a/webapp/src/components/layout/ChatWorkspace.tsx b/webapp/src/components/layout/ChatWorkspace.tsx index 6ccd09d4..e1084174 100644 --- a/webapp/src/components/layout/ChatWorkspace.tsx +++ b/webapp/src/components/layout/ChatWorkspace.tsx @@ -17,7 +17,7 @@ import { } from '@/api/client' import { useWebSocket } from '@/hooks/useWebSocket' import { useLocalBridge } from '@/hooks/useLocalBridge' -import { localBridgeListModelsDetailed, type BridgeModelInfo } from '@/lib/localBridge' +import { localBridgeListModelsDetailed, localBridgeFetch, type BridgeModelInfo } from '@/lib/localBridge' import { formatRelativeTime } from '@/lib/utils' import { useShell } from '@/context/ShellContext' @@ -966,9 +966,16 @@ export function ChatWorkspace() { const { activeSessionId, setActiveSessionId } = useShell() const bridge = useLocalBridge() const [bridgeModels, setBridgeModels] = useState([]) - // chatModel: null = use bridge.selectedModel or backend default; string = user override + // chatModel: null = use bridge.selectedModel or first available model const [chatModel, setChatModel] = useState(null) - const effectiveModel = chatModel ?? (bridge.status === 'connected' ? bridge.selectedModel : null) + + // Resolve model: explicit selection → bridge popup selection → first fetched model + const effectiveModel = chatModel + ?? (bridge.status === 'connected' ? bridge.selectedModel : null) + ?? (bridge.status === 'connected' && bridgeModels.length > 0 ? bridgeModels[0].id : null) + + // Bridge is active when extension is connected — model auto-resolved above + const isBridgeActive = bridge.status === 'connected' // ── Fetch local models for chat model picker (via bridge, not raw fetch) ────── @@ -1063,14 +1070,36 @@ export function ChatWorkspace() { }, [sessionId, qc, setActiveSessionId]) const nebulaMut = useMutation({ - mutationFn: ({ message, sid }: { message: string; sid?: string }) => - chatApi.send({ + mutationFn: async ({ message, sid }: { message: string; sid?: string }) => { + const resp = await chatApi.send({ message, session_id: sid ?? undefined, history: history.slice(-10), - // Pass model override when a local model is selected model: effectiveModel ?? undefined, - }), + // Tell backend to return assembled messages instead of calling LLM + // when bridge is connected (backend has no access to localhost) + bridge_mode: isBridgeActive, + }) + + // When backend returns assembled RAG+context messages, call local bridge + if (resp.bridge_required && resp.bridge_messages && effectiveModel) { + const bridgeResult = await localBridgeFetch({ + model: effectiveModel, + messages: resp.bridge_messages, + temperature: 0.4, + max_tokens: 2048, + }) + return { + ...resp, + reply: bridgeResult.content, + model_used: bridgeResult.model || effectiveModel, + bridge_required: false, + bridge_messages: undefined, + } + } + + return resp + }, onSuccess: (resp, { message }) => { // Extract tryItData from tool_actions if present const tryItAction = resp.tool_actions?.find(ta => ta.tool === 'try_it_out')