persist dimensions

This commit is contained in:
phact 2025-10-10 14:16:58 -04:00
parent 77f558e690
commit 6266e5c18d
10 changed files with 70 additions and 6 deletions

View file

@ -29,6 +29,8 @@ export interface ChunkResult {
owner_email?: string; owner_email?: string;
file_size?: number; file_size?: number;
connector_type?: string; connector_type?: string;
embedding_model?: string;
embedding_dimensions?: number;
index?: number; index?: number;
} }
@ -43,6 +45,8 @@ export interface File {
owner_email?: string; owner_email?: string;
size: number; size: number;
connector_type: string; connector_type: string;
embedding_model?: string;
embedding_dimensions?: number;
status?: status?:
| "processing" | "processing"
| "active" | "active"
@ -134,6 +138,8 @@ export const useGetSearchQuery = (
owner_email?: string; owner_email?: string;
file_size?: number; file_size?: number;
connector_type?: string; connector_type?: string;
embedding_model?: string;
embedding_dimensions?: number;
} }
>(); >();
@ -142,6 +148,15 @@ export const useGetSearchQuery = (
if (existing) { if (existing) {
existing.chunks.push(chunk); existing.chunks.push(chunk);
existing.totalScore += chunk.score; existing.totalScore += chunk.score;
if (!existing.embedding_model && chunk.embedding_model) {
existing.embedding_model = chunk.embedding_model;
}
if (
existing.embedding_dimensions == null &&
typeof chunk.embedding_dimensions === "number"
) {
existing.embedding_dimensions = chunk.embedding_dimensions;
}
} else { } else {
fileMap.set(chunk.filename, { fileMap.set(chunk.filename, {
filename: chunk.filename, filename: chunk.filename,
@ -154,6 +169,8 @@ export const useGetSearchQuery = (
owner_email: chunk.owner_email, owner_email: chunk.owner_email,
file_size: chunk.file_size, file_size: chunk.file_size,
connector_type: chunk.connector_type, connector_type: chunk.connector_type,
embedding_model: chunk.embedding_model,
embedding_dimensions: chunk.embedding_dimensions,
}); });
} }
}); });
@ -169,6 +186,8 @@ export const useGetSearchQuery = (
owner_email: file.owner_email || "", owner_email: file.owner_email || "",
size: file.file_size || 0, size: file.file_size || 0,
connector_type: file.connector_type || "local", connector_type: file.connector_type || "local",
embedding_model: file.embedding_model,
embedding_dimensions: file.embedding_dimensions,
chunks: file.chunks, chunks: file.chunks,
})); }));

View file

@ -19,6 +19,8 @@ export interface TaskFileEntry {
updated_at?: string; updated_at?: string;
duration_seconds?: number; duration_seconds?: number;
filename?: string; filename?: string;
embedding_model?: string;
embedding_dimensions?: number;
[key: string]: unknown; [key: string]: unknown;
} }

View file

@ -168,7 +168,7 @@
} }
.header-notifications { .header-notifications {
@apply absolute right-[0px] top-[-4px] h-1 w-1 rounded-full bg-destructive; @apply absolute right-1 top-1 h-2 w-2 rounded-full bg-destructive;
} }
.header-menu-bar { .header-menu-bar {

View file

@ -86,6 +86,8 @@ function SearchPage() {
connector_type: taskFile.connector_type, connector_type: taskFile.connector_type,
status: taskFile.status, status: taskFile.status,
error: taskFile.error, error: taskFile.error,
embedding_model: taskFile.embedding_model,
embedding_dimensions: taskFile.embedding_dimensions,
}; };
}); });
@ -124,7 +126,7 @@ function SearchPage() {
const gridRef = useRef<AgGridReact>(null); const gridRef = useRef<AgGridReact>(null);
const columnDefs = [ const columnDefs: ColDef<File>[] = [
{ {
field: "filename", field: "filename",
headerName: "Source", headerName: "Source",
@ -200,6 +202,28 @@ function SearchPage() {
); );
}, },
}, },
{
field: "embedding_model",
headerName: "Embedding model",
minWidth: 200,
cellRenderer: ({ data }: CustomCellRendererProps<File>) => (
<span className="text-xs text-muted-foreground">
{data?.embedding_model || "—"}
</span>
),
},
{
field: "embedding_dimensions",
headerName: "Dimensions",
width: 110,
cellRenderer: ({ data }: CustomCellRendererProps<File>) => (
<span className="text-xs text-muted-foreground">
{typeof data?.embedding_dimensions === "number"
? data.embedding_dimensions.toString()
: "—"}
</span>
),
},
{ {
field: "status", field: "status",
headerName: "Status", headerName: "Status",

View file

@ -129,7 +129,7 @@ export function LayoutWrapper({ children }: { children: React.ReactNode }) {
{/* Task Notification Bell */} {/* Task Notification Bell */}
<button <button
onClick={toggleMenu} onClick={toggleMenu}
className="h-8 w-8 hover:bg-muted rounded-lg flex items-center justify-center" className="relative h-8 w-8 hover:bg-muted rounded-lg flex items-center justify-center"
> >
<Bell size={16} className="text-muted-foreground" /> <Bell size={16} className="text-muted-foreground" />
{activeTasks.length > 0 && ( {activeTasks.length > 0 && (

View file

@ -33,6 +33,8 @@ export interface TaskFile {
created_at: string; created_at: string;
updated_at: string; updated_at: string;
error?: string; error?: string;
embedding_model?: string;
embedding_dimensions?: number;
} }
interface TaskContextType { interface TaskContextType {
tasks: Task[]; tasks: Task[];
@ -108,6 +110,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
created_at: now, created_at: now,
updated_at: now, updated_at: now,
error: file.error, error: file.error,
embedding_model: file.embedding_model,
embedding_dimensions: file.embedding_dimensions,
})); }));
setFiles((prevFiles) => [...prevFiles, ...filesToAdd]); setFiles((prevFiles) => [...prevFiles, ...filesToAdd]);
@ -214,6 +218,14 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
? fileInfoEntry.updated_at ? fileInfoEntry.updated_at
: now, : now,
error: fileError, error: fileError,
embedding_model:
typeof fileInfoEntry.embedding_model === "string"
? fileInfoEntry.embedding_model
: undefined,
embedding_dimensions:
typeof fileInfoEntry.embedding_dimensions === "number"
? fileInfoEntry.embedding_dimensions
: undefined,
}; };
if (existingFileIndex >= 0) { if (existingFileIndex >= 0) {

View file

@ -226,6 +226,7 @@ class TaskProcessor:
embedding_field_name: vect, embedding_field_name: vect,
# Track which model was used # Track which model was used
"embedding_model": embedding_model, "embedding_model": embedding_model,
"embedding_dimensions": len(vect),
"file_size": file_size, "file_size": file_size,
"connector_type": connector_type, "connector_type": connector_type,
"indexed_time": datetime.datetime.now().isoformat(), "indexed_time": datetime.datetime.now().isoformat(),

View file

@ -282,6 +282,7 @@ class SearchService:
"file_size", "file_size",
"connector_type", "connector_type",
"embedding_model", # Include embedding model in results "embedding_model", # Include embedding model in results
"embedding_dimensions",
"allowed_users", "allowed_users",
"allowed_groups", "allowed_groups",
], ],
@ -333,6 +334,7 @@ class SearchService:
"file_size": hit["_source"].get("file_size"), "file_size": hit["_source"].get("file_size"),
"connector_type": hit["_source"].get("connector_type"), "connector_type": hit["_source"].get("connector_type"),
"embedding_model": hit["_source"].get("embedding_model"), # Include in results "embedding_model": hit["_source"].get("embedding_model"), # Include in results
"embedding_dimensions": hit["_source"].get("embedding_dimensions"),
} }
) )

View file

@ -116,7 +116,10 @@ async def ensure_embedding_field_exists(
# Also ensure the embedding_model tracking field exists as keyword # Also ensure the embedding_model tracking field exists as keyword
"embedding_model": { "embedding_model": {
"type": "keyword" "type": "keyword"
} },
"embedding_dimensions": {
"type": "integer"
},
} }
} }

View file

@ -54,6 +54,7 @@ def create_dynamic_index_body(embedding_model: str) -> dict:
}, },
# Track which embedding model was used for this chunk # Track which embedding model was used for this chunk
"embedding_model": {"type": "keyword"}, "embedding_model": {"type": "keyword"},
"embedding_dimensions": {"type": "integer"},
"source_url": {"type": "keyword"}, "source_url": {"type": "keyword"},
"connector_type": {"type": "keyword"}, "connector_type": {"type": "keyword"},
"owner": {"type": "keyword"}, "owner": {"type": "keyword"},