diff --git a/Dockerfile b/Dockerfile index 4b2c922c3..d102c1c57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,8 @@ ENV DEBIAN_FRONTEND=noninteractive # Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ if [ "$NEED_MIRROR" == "1" ]; then \ - sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + sed -i 's|http://ports.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ + sed -i 's|http://archive.ubuntu.com|http://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \ fi; \ rm -f /etc/apt/apt.conf.d/docker-clean && \ echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ diff --git a/download_deps.py b/download_deps.py index baf65303a..3ada5be2c 100644 --- a/download_deps.py +++ b/download_deps.py @@ -6,6 +6,7 @@ # dependencies = [ # "huggingface-hub", # "nltk", +# "argparse", # ] # /// @@ -13,16 +14,29 @@ from huggingface_hub import snapshot_download import nltk import os import urllib.request +import argparse -urls = [ - "http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", - "http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", - "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar", - "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5", - "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", - "https://bit.ly/chrome-linux64-121-0-6167-85", - "https://bit.ly/chromedriver-linux64-121-0-6167-85", -] +def get_urls(use_china_mirrors=False): + if use_china_mirrors: + return [ + "http://mirrors.tuna.tsinghua.edu.cn/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", + "http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", + "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar", + "https://repo.huaweicloud.com/repository/maven/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5", + "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", + "https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chrome-linux64.zip", + "https://storage.googleapis.com/chrome-for-testing-public/121.0.6167.85/linux64/chromedriver-linux64.zip", + ] + else: + return [ + "http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb", + "http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb", + "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar", + "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5", + "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken", + "https://bit.ly/chrome-linux64-121-0-6167-85", + "https://bit.ly/chromedriver-linux64-121-0-6167-85", + ] repos = [ "InfiniFlow/text_concat_xgb_v1.0", @@ -39,6 +53,12 @@ def download_model(repo_id): if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Download dependencies with optional China mirror support') + parser.add_argument('--china-mirrors', action='store_true', help='Use China-accessible mirrors for downloads') + args = parser.parse_args() + + urls = get_urls(args.china_mirrors) + for url in urls: filename = url.split("/")[-1] print(f"Downloading {url}...") @@ -52,4 +72,4 @@ if __name__ == "__main__": for repo_id in repos: print(f"Downloading huggingface repo {repo_id}...") - download_model(repo_id) + download_model(repo_id) \ No newline at end of file