- Fix uv pip install syntax to use --python flag instead of incorrect venv activation - Add proper CPU-only PyTorch installation in main and mineru environments - Update entrypoint scripts to check for pre-installed packages first - Ensure proper fallback to runtime installation when needed The previous commit only included documentation files, this commit adds the actual implementation.
124 lines
3.6 KiB
Python
124 lines
3.6 KiB
Python
#
|
|
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import base64
|
|
import hashlib
|
|
import uuid
|
|
import requests
|
|
import threading
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
import logging
|
|
|
|
def get_uuid():
|
|
return uuid.uuid1().hex
|
|
|
|
|
|
def download_img(url):
|
|
if not url:
|
|
return ""
|
|
response = requests.get(url)
|
|
return "data:" + \
|
|
response.headers.get('Content-Type', 'image/jpg') + ";" + \
|
|
"base64," + base64.b64encode(response.content).decode("utf-8")
|
|
|
|
|
|
def hash_str2int(line: str, mod: int = 10 ** 8) -> int:
|
|
return int(hashlib.sha1(line.encode("utf-8")).hexdigest(), 16) % mod
|
|
|
|
def convert_bytes(size_in_bytes: int) -> str:
|
|
"""
|
|
Format size in bytes.
|
|
"""
|
|
if size_in_bytes == 0:
|
|
return "0 B"
|
|
|
|
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
|
|
i = 0
|
|
size = float(size_in_bytes)
|
|
|
|
while size >= 1024 and i < len(units) - 1:
|
|
size /= 1024
|
|
i += 1
|
|
|
|
if i == 0 or size >= 100:
|
|
return f"{size:.0f} {units[i]}"
|
|
elif size >= 10:
|
|
return f"{size:.1f} {units[i]}"
|
|
else:
|
|
return f"{size:.2f} {units[i]}"
|
|
|
|
|
|
def once(func):
|
|
"""
|
|
A thread-safe decorator that ensures the decorated function runs exactly once,
|
|
caching and returning its result for all subsequent calls. This prevents
|
|
race conditions in multi-thread environments by using a lock to protect
|
|
the execution state.
|
|
|
|
Args:
|
|
func (callable): The function to be executed only once.
|
|
|
|
Returns:
|
|
callable: A wrapper function that executes `func` on the first call
|
|
and returns the cached result thereafter.
|
|
|
|
Example:
|
|
@once
|
|
def compute_expensive_value():
|
|
print("Computing...")
|
|
return 42
|
|
|
|
# First call: executes and prints
|
|
# Subsequent calls: return 42 without executing
|
|
"""
|
|
executed = False
|
|
result = None
|
|
lock = threading.Lock()
|
|
def wrapper(*args, **kwargs):
|
|
nonlocal executed, result
|
|
with lock:
|
|
if not executed:
|
|
executed = True
|
|
result = func(*args, **kwargs)
|
|
return result
|
|
return wrapper
|
|
|
|
@once
|
|
def pip_install_torch():
|
|
device = os.getenv("DEVICE", "cpu")
|
|
if device == "cpu":
|
|
return
|
|
|
|
logging.info("Installing pytorch")
|
|
|
|
# Check if GPU PyTorch is explicitly requested
|
|
gpu_pytorch = os.getenv("GPU_PYTORCH", "false").lower() == "true"
|
|
|
|
if gpu_pytorch:
|
|
# Install GPU version of PyTorch
|
|
logging.info("Installing GPU PyTorch (large download with CUDA dependencies)")
|
|
pkg_names = ["torch>=2.5.0,<3.0.0"]
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", *pkg_names])
|
|
else:
|
|
# Install CPU-only version to avoid CUDA dependencies
|
|
logging.info("Installing CPU-only PyTorch to avoid CUDA dependencies")
|
|
subprocess.check_call([
|
|
sys.executable, "-m", "pip", "install",
|
|
"torch>=2.5.0,<3.0.0", "torchvision",
|
|
"--index-url", "https://download.pytorch.org/whl/cpu"
|
|
])
|