fix: bridge-first model resolution — bypass 503 when extension connected

Backend (chat.py):
- Add bridge_mode field to ChatRequest
- Add bridge_required + bridge_messages fields to ChatResponse
- When bridge_mode=true (or model set + no backend provider):
  skip LLM call, return assembled RAG+system_prompt messages
- Backend never calls localhost — LLM call happens in browser

Frontend (ChatWorkspace.tsx):
- isBridgeActive = bridge.status === 'connected' (not model-gated)
- effectiveModel chain: chatModel ?? bridge.selectedModel ?? bridgeModels[0]
- Send bridge_mode:true when bridge connected
- On bridge_required response: call localBridgeFetch with bridge_messages

Resolution order (user-facing chat):
  1. Browser bridge (extension installed + local model) — zero backend config
  2. Offered AI / free tier (NEBULA_LLM_API_KEY in prod env) — TODO: set in prod
  3. BYOAI (user key in Settings)
  4. 503

client.ts:
- ApiChatRequest.bridge_mode?: boolean
- ApiChatResponse.bridge_required?: boolean
- ApiChatResponse.bridge_messages?: [{role,content}[]]
This commit is contained in:
2026-04-18 17:33:25 +05:30
parent 4d174c6f65
commit 92bc4dbcc2
3 changed files with 85 additions and 8 deletions

View File

@@ -440,6 +440,14 @@ class ChatRequest(BaseModel):
default=None,
description="Override model. Defaults to platform default.",
)
bridge_mode: bool = Field(
default=False,
description=(
"When True the backend assembles RAG context + system prompt "
"and returns bridge_messages for the client to call the local bridge. "
"Automatically enabled when model is set and no backend provider is configured."
),
)
class ToolActionResult(BaseModel):
@@ -477,6 +485,9 @@ class ChatResponse(BaseModel):
latency_ms: int
correlation_id: str
tool_actions: List[ToolAction] = Field(default_factory=list)
# Bridge mode: when set, frontend should call local bridge with these messages
bridge_required: bool = Field(default=False)
bridge_messages: Optional[List[Dict[str, Any]]] = Field(default=None)
# ── Message persistence ───────────────────────────────────────────────────────
@@ -1880,7 +1891,12 @@ async def chat(
db, body.session_id, "user", body.message, mode="nebula"
)
if model_provider is None:
# Bridge mode: when a client model override is set but no backend provider
# is configured, fall through to RAG+context assembly and return the
# assembled messages for the client (browser extension) to call locally.
use_bridge_mode = body.bridge_mode or (body.model is not None and model_provider is None)
if model_provider is None and not use_bridge_mode:
log.error("chat_no_model_provider", {
"component": "api.chat",
"operation": "chat",
@@ -1945,6 +1961,33 @@ async def chat(
messages.append({"role": "user", "content": body.message})
# ── 3a. Bridge mode — return assembled messages to client ────────────────
if use_bridge_mode:
latency_ms = int((time.monotonic() - start_ms) * 1000)
model_name = body.model or "local"
log.info("chat_bridge_mode", {
"component": "api.chat",
"operation": "chat",
"entity_id": "nebula-assistant",
"correlation_id": cid,
"metadata": {
"model": model_name,
"chunks_used": chunks_used,
"message_count": len(messages),
},
})
return {
"reply": "",
"corpus_id": corpus_id,
"chunks_used": chunks_used,
"model_used": model_name,
"latency_ms": latency_ms,
"correlation_id": cid,
"tool_actions": [],
"bridge_required": True,
"bridge_messages": messages,
}
# ── 3. LLM call (with tool-calling enabled) ─────────────────────────────
model = body.model or os.getenv("NEBULA_LLM_DEFAULT_MODEL", "gpt-4o-mini")
try:

View File

@@ -662,12 +662,14 @@ export const policiesApi = {
page_size?: number
policy_type?: string
enabled_only?: boolean
agent_id?: string | null
}) => {
const qs = new URLSearchParams()
if (params?.page) qs.set('page', String(params.page))
if (params?.page_size) qs.set('page_size', String(params.page_size))
if (params?.policy_type) qs.set('policy_type', params.policy_type)
if (params?.enabled_only) qs.set('enabled_only', 'true')
if (params?.agent_id) qs.set('agent_id', params.agent_id)
const q = qs.toString()
return get<ApiPolicyList>(`/policies${q ? `?${q}` : ''}`)
},
@@ -2548,6 +2550,7 @@ export interface ApiChatRequest {
corpus_id?: string
top_k?: number
model?: string
bridge_mode?: boolean
}
export interface ApiChatToolAction {
@@ -2583,6 +2586,8 @@ export interface ApiChatResponse {
latency_ms: number
correlation_id: string
tool_actions?: ApiChatToolAction[]
bridge_required?: boolean
bridge_messages?: Array<{ role: string; content: string }>
}
export interface ApiChatCorpus {

View File

@@ -17,7 +17,7 @@ import {
} from '@/api/client'
import { useWebSocket } from '@/hooks/useWebSocket'
import { useLocalBridge } from '@/hooks/useLocalBridge'
import { localBridgeListModelsDetailed, type BridgeModelInfo } from '@/lib/localBridge'
import { localBridgeListModelsDetailed, localBridgeFetch, type BridgeModelInfo } from '@/lib/localBridge'
import { formatRelativeTime } from '@/lib/utils'
import { useShell } from '@/context/ShellContext'
@@ -966,9 +966,16 @@ export function ChatWorkspace() {
const { activeSessionId, setActiveSessionId } = useShell()
const bridge = useLocalBridge()
const [bridgeModels, setBridgeModels] = useState<BridgeModelInfo[]>([])
// chatModel: null = use bridge.selectedModel or backend default; string = user override
// chatModel: null = use bridge.selectedModel or first available model
const [chatModel, setChatModel] = useState<string | null>(null)
const effectiveModel = chatModel ?? (bridge.status === 'connected' ? bridge.selectedModel : null)
// Resolve model: explicit selection → bridge popup selection → first fetched model
const effectiveModel = chatModel
?? (bridge.status === 'connected' ? bridge.selectedModel : null)
?? (bridge.status === 'connected' && bridgeModels.length > 0 ? bridgeModels[0].id : null)
// Bridge is active when extension is connected — model auto-resolved above
const isBridgeActive = bridge.status === 'connected'
// ── Fetch local models for chat model picker (via bridge, not raw fetch) ──────
@@ -1063,14 +1070,36 @@ export function ChatWorkspace() {
}, [sessionId, qc, setActiveSessionId])
const nebulaMut = useMutation({
mutationFn: ({ message, sid }: { message: string; sid?: string }) =>
chatApi.send({
mutationFn: async ({ message, sid }: { message: string; sid?: string }) => {
const resp = await chatApi.send({
message,
session_id: sid ?? undefined,
history: history.slice(-10),
// Pass model override when a local model is selected
model: effectiveModel ?? undefined,
}),
// Tell backend to return assembled messages instead of calling LLM
// when bridge is connected (backend has no access to localhost)
bridge_mode: isBridgeActive,
})
// When backend returns assembled RAG+context messages, call local bridge
if (resp.bridge_required && resp.bridge_messages && effectiveModel) {
const bridgeResult = await localBridgeFetch({
model: effectiveModel,
messages: resp.bridge_messages,
temperature: 0.4,
max_tokens: 2048,
})
return {
...resp,
reply: bridgeResult.content,
model_used: bridgeResult.model || effectiveModel,
bridge_required: false,
bridge_messages: undefined,
}
}
return resp
},
onSuccess: (resp, { message }) => {
// Extract tryItData from tool_actions if present
const tryItAction = resp.tool_actions?.find(ta => ta.tool === 'try_it_out')