fix: bridge-first model resolution — bypass 503 when extension connected
Backend (chat.py):
- Add bridge_mode field to ChatRequest
- Add bridge_required + bridge_messages fields to ChatResponse
- When bridge_mode=true (or model set + no backend provider):
skip LLM call, return assembled RAG+system_prompt messages
- Backend never calls localhost — LLM call happens in browser
Frontend (ChatWorkspace.tsx):
- isBridgeActive = bridge.status === 'connected' (not model-gated)
- effectiveModel chain: chatModel ?? bridge.selectedModel ?? bridgeModels[0]
- Send bridge_mode:true when bridge connected
- On bridge_required response: call localBridgeFetch with bridge_messages
Resolution order (user-facing chat):
1. Browser bridge (extension installed + local model) — zero backend config
2. Offered AI / free tier (NEBULA_LLM_API_KEY in prod env) — TODO: set in prod
3. BYOAI (user key in Settings)
4. 503
client.ts:
- ApiChatRequest.bridge_mode?: boolean
- ApiChatResponse.bridge_required?: boolean
- ApiChatResponse.bridge_messages?: [{role,content}[]]
This commit is contained in:
@@ -440,6 +440,14 @@ class ChatRequest(BaseModel):
|
||||
default=None,
|
||||
description="Override model. Defaults to platform default.",
|
||||
)
|
||||
bridge_mode: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
"When True the backend assembles RAG context + system prompt "
|
||||
"and returns bridge_messages for the client to call the local bridge. "
|
||||
"Automatically enabled when model is set and no backend provider is configured."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class ToolActionResult(BaseModel):
|
||||
@@ -477,6 +485,9 @@ class ChatResponse(BaseModel):
|
||||
latency_ms: int
|
||||
correlation_id: str
|
||||
tool_actions: List[ToolAction] = Field(default_factory=list)
|
||||
# Bridge mode: when set, frontend should call local bridge with these messages
|
||||
bridge_required: bool = Field(default=False)
|
||||
bridge_messages: Optional[List[Dict[str, Any]]] = Field(default=None)
|
||||
|
||||
|
||||
# ── Message persistence ───────────────────────────────────────────────────────
|
||||
@@ -1880,7 +1891,12 @@ async def chat(
|
||||
db, body.session_id, "user", body.message, mode="nebula"
|
||||
)
|
||||
|
||||
if model_provider is None:
|
||||
# Bridge mode: when a client model override is set but no backend provider
|
||||
# is configured, fall through to RAG+context assembly and return the
|
||||
# assembled messages for the client (browser extension) to call locally.
|
||||
use_bridge_mode = body.bridge_mode or (body.model is not None and model_provider is None)
|
||||
|
||||
if model_provider is None and not use_bridge_mode:
|
||||
log.error("chat_no_model_provider", {
|
||||
"component": "api.chat",
|
||||
"operation": "chat",
|
||||
@@ -1945,6 +1961,33 @@ async def chat(
|
||||
|
||||
messages.append({"role": "user", "content": body.message})
|
||||
|
||||
# ── 3a. Bridge mode — return assembled messages to client ────────────────
|
||||
if use_bridge_mode:
|
||||
latency_ms = int((time.monotonic() - start_ms) * 1000)
|
||||
model_name = body.model or "local"
|
||||
log.info("chat_bridge_mode", {
|
||||
"component": "api.chat",
|
||||
"operation": "chat",
|
||||
"entity_id": "nebula-assistant",
|
||||
"correlation_id": cid,
|
||||
"metadata": {
|
||||
"model": model_name,
|
||||
"chunks_used": chunks_used,
|
||||
"message_count": len(messages),
|
||||
},
|
||||
})
|
||||
return {
|
||||
"reply": "",
|
||||
"corpus_id": corpus_id,
|
||||
"chunks_used": chunks_used,
|
||||
"model_used": model_name,
|
||||
"latency_ms": latency_ms,
|
||||
"correlation_id": cid,
|
||||
"tool_actions": [],
|
||||
"bridge_required": True,
|
||||
"bridge_messages": messages,
|
||||
}
|
||||
|
||||
# ── 3. LLM call (with tool-calling enabled) ─────────────────────────────
|
||||
model = body.model or os.getenv("NEBULA_LLM_DEFAULT_MODEL", "gpt-4o-mini")
|
||||
try:
|
||||
|
||||
@@ -662,12 +662,14 @@ export const policiesApi = {
|
||||
page_size?: number
|
||||
policy_type?: string
|
||||
enabled_only?: boolean
|
||||
agent_id?: string | null
|
||||
}) => {
|
||||
const qs = new URLSearchParams()
|
||||
if (params?.page) qs.set('page', String(params.page))
|
||||
if (params?.page_size) qs.set('page_size', String(params.page_size))
|
||||
if (params?.policy_type) qs.set('policy_type', params.policy_type)
|
||||
if (params?.enabled_only) qs.set('enabled_only', 'true')
|
||||
if (params?.agent_id) qs.set('agent_id', params.agent_id)
|
||||
const q = qs.toString()
|
||||
return get<ApiPolicyList>(`/policies${q ? `?${q}` : ''}`)
|
||||
},
|
||||
@@ -2548,6 +2550,7 @@ export interface ApiChatRequest {
|
||||
corpus_id?: string
|
||||
top_k?: number
|
||||
model?: string
|
||||
bridge_mode?: boolean
|
||||
}
|
||||
|
||||
export interface ApiChatToolAction {
|
||||
@@ -2583,6 +2586,8 @@ export interface ApiChatResponse {
|
||||
latency_ms: number
|
||||
correlation_id: string
|
||||
tool_actions?: ApiChatToolAction[]
|
||||
bridge_required?: boolean
|
||||
bridge_messages?: Array<{ role: string; content: string }>
|
||||
}
|
||||
|
||||
export interface ApiChatCorpus {
|
||||
|
||||
@@ -17,7 +17,7 @@ import {
|
||||
} from '@/api/client'
|
||||
import { useWebSocket } from '@/hooks/useWebSocket'
|
||||
import { useLocalBridge } from '@/hooks/useLocalBridge'
|
||||
import { localBridgeListModelsDetailed, type BridgeModelInfo } from '@/lib/localBridge'
|
||||
import { localBridgeListModelsDetailed, localBridgeFetch, type BridgeModelInfo } from '@/lib/localBridge'
|
||||
import { formatRelativeTime } from '@/lib/utils'
|
||||
import { useShell } from '@/context/ShellContext'
|
||||
|
||||
@@ -966,9 +966,16 @@ export function ChatWorkspace() {
|
||||
const { activeSessionId, setActiveSessionId } = useShell()
|
||||
const bridge = useLocalBridge()
|
||||
const [bridgeModels, setBridgeModels] = useState<BridgeModelInfo[]>([])
|
||||
// chatModel: null = use bridge.selectedModel or backend default; string = user override
|
||||
// chatModel: null = use bridge.selectedModel or first available model
|
||||
const [chatModel, setChatModel] = useState<string | null>(null)
|
||||
const effectiveModel = chatModel ?? (bridge.status === 'connected' ? bridge.selectedModel : null)
|
||||
|
||||
// Resolve model: explicit selection → bridge popup selection → first fetched model
|
||||
const effectiveModel = chatModel
|
||||
?? (bridge.status === 'connected' ? bridge.selectedModel : null)
|
||||
?? (bridge.status === 'connected' && bridgeModels.length > 0 ? bridgeModels[0].id : null)
|
||||
|
||||
// Bridge is active when extension is connected — model auto-resolved above
|
||||
const isBridgeActive = bridge.status === 'connected'
|
||||
|
||||
// ── Fetch local models for chat model picker (via bridge, not raw fetch) ──────
|
||||
|
||||
@@ -1063,14 +1070,36 @@ export function ChatWorkspace() {
|
||||
}, [sessionId, qc, setActiveSessionId])
|
||||
|
||||
const nebulaMut = useMutation({
|
||||
mutationFn: ({ message, sid }: { message: string; sid?: string }) =>
|
||||
chatApi.send({
|
||||
mutationFn: async ({ message, sid }: { message: string; sid?: string }) => {
|
||||
const resp = await chatApi.send({
|
||||
message,
|
||||
session_id: sid ?? undefined,
|
||||
history: history.slice(-10),
|
||||
// Pass model override when a local model is selected
|
||||
model: effectiveModel ?? undefined,
|
||||
}),
|
||||
// Tell backend to return assembled messages instead of calling LLM
|
||||
// when bridge is connected (backend has no access to localhost)
|
||||
bridge_mode: isBridgeActive,
|
||||
})
|
||||
|
||||
// When backend returns assembled RAG+context messages, call local bridge
|
||||
if (resp.bridge_required && resp.bridge_messages && effectiveModel) {
|
||||
const bridgeResult = await localBridgeFetch({
|
||||
model: effectiveModel,
|
||||
messages: resp.bridge_messages,
|
||||
temperature: 0.4,
|
||||
max_tokens: 2048,
|
||||
})
|
||||
return {
|
||||
...resp,
|
||||
reply: bridgeResult.content,
|
||||
model_used: bridgeResult.model || effectiveModel,
|
||||
bridge_required: false,
|
||||
bridge_messages: undefined,
|
||||
}
|
||||
}
|
||||
|
||||
return resp
|
||||
},
|
||||
onSuccess: (resp, { message }) => {
|
||||
// Extract tryItData from tool_actions if present
|
||||
const tryItAction = resp.tool_actions?.find(ta => ta.tool === 'try_it_out')
|
||||
|
||||
Reference in New Issue
Block a user