persist dimensions

This commit is contained in:
phact 2025-10-10 14:16:58 -04:00
parent 77f558e690
commit 6266e5c18d
10 changed files with 70 additions and 6 deletions

View file

@ -29,6 +29,8 @@ export interface ChunkResult {
owner_email?: string;
file_size?: number;
connector_type?: string;
embedding_model?: string;
embedding_dimensions?: number;
index?: number;
}
@ -43,6 +45,8 @@ export interface File {
owner_email?: string;
size: number;
connector_type: string;
embedding_model?: string;
embedding_dimensions?: number;
status?:
| "processing"
| "active"
@ -134,6 +138,8 @@ export const useGetSearchQuery = (
owner_email?: string;
file_size?: number;
connector_type?: string;
embedding_model?: string;
embedding_dimensions?: number;
}
>();
@ -142,6 +148,15 @@ export const useGetSearchQuery = (
if (existing) {
existing.chunks.push(chunk);
existing.totalScore += chunk.score;
if (!existing.embedding_model && chunk.embedding_model) {
existing.embedding_model = chunk.embedding_model;
}
if (
existing.embedding_dimensions == null &&
typeof chunk.embedding_dimensions === "number"
) {
existing.embedding_dimensions = chunk.embedding_dimensions;
}
} else {
fileMap.set(chunk.filename, {
filename: chunk.filename,
@ -154,6 +169,8 @@ export const useGetSearchQuery = (
owner_email: chunk.owner_email,
file_size: chunk.file_size,
connector_type: chunk.connector_type,
embedding_model: chunk.embedding_model,
embedding_dimensions: chunk.embedding_dimensions,
});
}
});
@ -169,6 +186,8 @@ export const useGetSearchQuery = (
owner_email: file.owner_email || "",
size: file.file_size || 0,
connector_type: file.connector_type || "local",
embedding_model: file.embedding_model,
embedding_dimensions: file.embedding_dimensions,
chunks: file.chunks,
}));

View file

@ -19,6 +19,8 @@ export interface TaskFileEntry {
updated_at?: string;
duration_seconds?: number;
filename?: string;
embedding_model?: string;
embedding_dimensions?: number;
[key: string]: unknown;
}

View file

@ -168,7 +168,7 @@
}
.header-notifications {
@apply absolute right-[0px] top-[-4px] h-1 w-1 rounded-full bg-destructive;
@apply absolute right-1 top-1 h-2 w-2 rounded-full bg-destructive;
}
.header-menu-bar {

View file

@ -86,6 +86,8 @@ function SearchPage() {
connector_type: taskFile.connector_type,
status: taskFile.status,
error: taskFile.error,
embedding_model: taskFile.embedding_model,
embedding_dimensions: taskFile.embedding_dimensions,
};
});
@ -124,7 +126,7 @@ function SearchPage() {
const gridRef = useRef<AgGridReact>(null);
const columnDefs = [
const columnDefs: ColDef<File>[] = [
{
field: "filename",
headerName: "Source",
@ -200,6 +202,28 @@ function SearchPage() {
);
},
},
{
field: "embedding_model",
headerName: "Embedding model",
minWidth: 200,
cellRenderer: ({ data }: CustomCellRendererProps<File>) => (
<span className="text-xs text-muted-foreground">
{data?.embedding_model || "—"}
</span>
),
},
{
field: "embedding_dimensions",
headerName: "Dimensions",
width: 110,
cellRenderer: ({ data }: CustomCellRendererProps<File>) => (
<span className="text-xs text-muted-foreground">
{typeof data?.embedding_dimensions === "number"
? data.embedding_dimensions.toString()
: "—"}
</span>
),
},
{
field: "status",
headerName: "Status",

View file

@ -129,7 +129,7 @@ export function LayoutWrapper({ children }: { children: React.ReactNode }) {
{/* Task Notification Bell */}
<button
onClick={toggleMenu}
className="h-8 w-8 hover:bg-muted rounded-lg flex items-center justify-center"
className="relative h-8 w-8 hover:bg-muted rounded-lg flex items-center justify-center"
>
<Bell size={16} className="text-muted-foreground" />
{activeTasks.length > 0 && (

View file

@ -33,6 +33,8 @@ export interface TaskFile {
created_at: string;
updated_at: string;
error?: string;
embedding_model?: string;
embedding_dimensions?: number;
}
interface TaskContextType {
tasks: Task[];
@ -108,6 +110,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
created_at: now,
updated_at: now,
error: file.error,
embedding_model: file.embedding_model,
embedding_dimensions: file.embedding_dimensions,
}));
setFiles((prevFiles) => [...prevFiles, ...filesToAdd]);
@ -214,6 +218,14 @@ export function TaskProvider({ children }: { children: React.ReactNode }) {
? fileInfoEntry.updated_at
: now,
error: fileError,
embedding_model:
typeof fileInfoEntry.embedding_model === "string"
? fileInfoEntry.embedding_model
: undefined,
embedding_dimensions:
typeof fileInfoEntry.embedding_dimensions === "number"
? fileInfoEntry.embedding_dimensions
: undefined,
};
if (existingFileIndex >= 0) {

View file

@ -226,6 +226,7 @@ class TaskProcessor:
embedding_field_name: vect,
# Track which model was used
"embedding_model": embedding_model,
"embedding_dimensions": len(vect),
"file_size": file_size,
"connector_type": connector_type,
"indexed_time": datetime.datetime.now().isoformat(),
@ -763,4 +764,4 @@ class LangflowFileProcessor(TaskProcessor):
file_task.error_message = str(e)
file_task.updated_at = time.time()
upload_task.failed_files += 1
raise
raise

View file

@ -282,6 +282,7 @@ class SearchService:
"file_size",
"connector_type",
"embedding_model", # Include embedding model in results
"embedding_dimensions",
"allowed_users",
"allowed_groups",
],
@ -333,6 +334,7 @@ class SearchService:
"file_size": hit["_source"].get("file_size"),
"connector_type": hit["_source"].get("connector_type"),
"embedding_model": hit["_source"].get("embedding_model"), # Include in results
"embedding_dimensions": hit["_source"].get("embedding_dimensions"),
}
)

View file

@ -116,7 +116,10 @@ async def ensure_embedding_field_exists(
# Also ensure the embedding_model tracking field exists as keyword
"embedding_model": {
"type": "keyword"
}
},
"embedding_dimensions": {
"type": "integer"
},
}
}

View file

@ -54,6 +54,7 @@ def create_dynamic_index_body(embedding_model: str) -> dict:
},
# Track which embedding model was used for this chunk
"embedding_model": {"type": "keyword"},
"embedding_dimensions": {"type": "integer"},
"source_url": {"type": "keyword"},
"connector_type": {"type": "keyword"},
"owner": {"type": "keyword"},
@ -67,4 +68,4 @@ def create_dynamic_index_body(embedding_model: str) -> dict:
"metadata": {"type": "object"},
}
},
}
}