Put huqie.txt in RAGFflow image
This commit is contained in:
parent
27b0550876
commit
cfc63d3c4a
4 changed files with 15 additions and 555632 deletions
|
|
@ -10,11 +10,14 @@ WORKDIR /ragflow
|
|||
# Copy models downloaded via download_deps.py
|
||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
||||
tar --exclude='.*' -cf - \
|
||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/rag,target=/rag \
|
||||
mkdir -p /usr/share/infinity/resource/rag/ && \
|
||||
cp /rag/huqie.txt /usr/share/infinity/resource/rag/
|
||||
|
||||
# https://github.com/chrismattmann/tika-python
|
||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||
|
|
|
|||
|
|
@ -8,3 +8,5 @@ COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base
|
|||
COPY nltk_data /nltk_data
|
||||
|
||||
COPY huggingface.co /huggingface.co
|
||||
|
||||
COPY resource/rag/ /rag
|
||||
|
|
|
|||
|
|
@ -5,11 +5,14 @@
|
|||
# requires-python = ">=3.10"
|
||||
# dependencies = [
|
||||
# "nltk",
|
||||
# "huggingface_hub",
|
||||
# "gitpython"
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import git
|
||||
import urllib.request
|
||||
from typing import Union
|
||||
|
||||
|
|
@ -43,7 +46,6 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
|
|||
repos = [
|
||||
"InfiniFlow/text_concat_xgb_v1.0",
|
||||
"InfiniFlow/deepdoc",
|
||||
"InfiniFlow/huqie",
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -75,3 +77,8 @@ if __name__ == "__main__":
|
|||
for repo_id in repos:
|
||||
print(f"Downloading huggingface repo {repo_id}...")
|
||||
download_model(repo_id)
|
||||
|
||||
repo_url = "https://github.com/infiniflow/resource.git"
|
||||
clone_dir = os.path.abspath("resource")
|
||||
print(f"Cloning GitHub repo {repo_url}...")
|
||||
repo = git.Repo.clone_from(repo_url, clone_dir)
|
||||
|
|
|
|||
555629
rag/res/huqie.txt
555629
rag/res/huqie.txt
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue