Put huqie.txt in RAGFflow image
This commit is contained in:
parent
27b0550876
commit
cfc63d3c4a
4 changed files with 15 additions and 555632 deletions
|
|
@ -10,11 +10,14 @@ WORKDIR /ragflow
|
||||||
# Copy models downloaded via download_deps.py
|
# Copy models downloaded via download_deps.py
|
||||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
|
||||||
tar --exclude='.*' -cf - \
|
tar --exclude='.*' -cf - \
|
||||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||||
/huggingface.co/InfiniFlow/deepdoc \
|
/huggingface.co/InfiniFlow/deepdoc \
|
||||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||||
|
|
||||||
|
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/rag,target=/rag \
|
||||||
|
mkdir -p /usr/share/infinity/resource/rag/ && \
|
||||||
|
cp /rag/huqie.txt /usr/share/infinity/resource/rag/
|
||||||
|
|
||||||
# https://github.com/chrismattmann/tika-python
|
# https://github.com/chrismattmann/tika-python
|
||||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||||
|
|
|
||||||
|
|
@ -8,3 +8,5 @@ COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base
|
||||||
COPY nltk_data /nltk_data
|
COPY nltk_data /nltk_data
|
||||||
|
|
||||||
COPY huggingface.co /huggingface.co
|
COPY huggingface.co /huggingface.co
|
||||||
|
|
||||||
|
COPY resource/rag/ /rag
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,14 @@
|
||||||
# requires-python = ">=3.10"
|
# requires-python = ">=3.10"
|
||||||
# dependencies = [
|
# dependencies = [
|
||||||
# "nltk",
|
# "nltk",
|
||||||
|
# "huggingface_hub",
|
||||||
|
# "gitpython"
|
||||||
# ]
|
# ]
|
||||||
# ///
|
# ///
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
|
import git
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
|
|
@ -43,7 +46,6 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
|
||||||
repos = [
|
repos = [
|
||||||
"InfiniFlow/text_concat_xgb_v1.0",
|
"InfiniFlow/text_concat_xgb_v1.0",
|
||||||
"InfiniFlow/deepdoc",
|
"InfiniFlow/deepdoc",
|
||||||
"InfiniFlow/huqie",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -75,3 +77,8 @@ if __name__ == "__main__":
|
||||||
for repo_id in repos:
|
for repo_id in repos:
|
||||||
print(f"Downloading huggingface repo {repo_id}...")
|
print(f"Downloading huggingface repo {repo_id}...")
|
||||||
download_model(repo_id)
|
download_model(repo_id)
|
||||||
|
|
||||||
|
repo_url = "https://github.com/infiniflow/resource.git"
|
||||||
|
clone_dir = os.path.abspath("resource")
|
||||||
|
print(f"Cloning GitHub repo {repo_url}...")
|
||||||
|
repo = git.Repo.clone_from(repo_url, clone_dir)
|
||||||
|
|
|
||||||
555629
rag/res/huqie.txt
555629
rag/res/huqie.txt
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue