added automation and test fixes
This commit is contained in:
parent
1b30be1a77
commit
5440fa766c
5 changed files with 812 additions and 30 deletions
2
.github/workflows/e2e_tests.yml
vendored
2
.github/workflows/e2e_tests.yml
vendored
|
|
@ -297,7 +297,7 @@ jobs:
|
|||
- name: Run Knowledge Graph Quality Test
|
||||
env:
|
||||
ENV: 'dev'
|
||||
# Model selection is handled by the test with fallback priority
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
|
|
|
|||
174
.github/workflows/publish_pypi.yml
vendored
Normal file
174
.github/workflows/publish_pypi.yml
vendored
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
name: Publish to PyPI
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
test_pypi:
|
||||
description: 'Publish to Test PyPI instead of PyPI'
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write # Required for trusted publishing and attestations
|
||||
attestations: write # Required for package attestations
|
||||
|
||||
jobs:
|
||||
security-scan:
|
||||
name: Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install uv
|
||||
uv sync --dev
|
||||
|
||||
- name: Run safety check for known vulnerabilities
|
||||
run: |
|
||||
pip install safety
|
||||
safety check --json > safety-report.json || true
|
||||
|
||||
- name: Run bandit security linter
|
||||
run: |
|
||||
pip install bandit
|
||||
bandit -r cognee/ -f json -o bandit-report.json || true
|
||||
|
||||
- name: Upload security reports as artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: security-reports
|
||||
path: |
|
||||
safety-report.json
|
||||
bandit-report.json
|
||||
|
||||
- name: Check for high-severity vulnerabilities
|
||||
run: |
|
||||
# Fail if high-severity vulnerabilities are found
|
||||
if [ -f safety-report.json ]; then
|
||||
python -c "
|
||||
import json
|
||||
import sys
|
||||
try:
|
||||
with open('safety-report.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list) and len(data) > 0:
|
||||
high_severity = [v for v in data if v.get('severity', '').lower() in ['high', 'critical']]
|
||||
if high_severity:
|
||||
print('HIGH SEVERITY VULNERABILITIES FOUND:')
|
||||
for vuln in high_severity:
|
||||
print(f' - {vuln.get(\"vulnerability\", \"Unknown\")} in {vuln.get(\"package\", \"Unknown\")}')
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f'Error parsing safety report: {e}')
|
||||
pass
|
||||
"
|
||||
fi
|
||||
|
||||
build-and-publish:
|
||||
name: Build and publish to PyPI
|
||||
needs: security-scan
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: ${{ github.event.inputs.test_pypi == 'true' && 'testpypi' || 'pypi' }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install build twine hatchling
|
||||
|
||||
- name: Build package
|
||||
run: |
|
||||
python -m build
|
||||
|
||||
- name: Generate package hashes
|
||||
run: |
|
||||
cd dist
|
||||
sha256sum * > SHA256SUMS
|
||||
sha512sum * > SHA512SUMS
|
||||
echo "Generated checksums:"
|
||||
cat SHA256SUMS
|
||||
cat SHA512SUMS
|
||||
|
||||
- name: Verify package integrity
|
||||
run: |
|
||||
cd dist
|
||||
sha256sum -c SHA256SUMS
|
||||
sha512sum -c SHA512SUMS
|
||||
echo "Package integrity verified"
|
||||
|
||||
- name: Check package with twine
|
||||
run: |
|
||||
twine check dist/*
|
||||
|
||||
- name: Generate SBOM (Software Bill of Materials)
|
||||
run: |
|
||||
pip install cyclonedx-bom
|
||||
cyclonedx-py requirements -o cognee-sbom.json
|
||||
|
||||
- name: Upload build artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-files
|
||||
path: |
|
||||
dist/
|
||||
cognee-sbom.json
|
||||
|
||||
- name: Generate attestations for built packages
|
||||
uses: actions/attest-build-provenance@v1
|
||||
with:
|
||||
subject-path: 'dist/*'
|
||||
|
||||
- name: Publish to Test PyPI
|
||||
if: github.event.inputs.test_pypi == 'true'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: https://test.pypi.org/legacy/
|
||||
attestations: true
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: github.event.inputs.test_pypi != 'true'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
attestations: true
|
||||
|
||||
- name: Create release with hashes
|
||||
if: github.event_name == 'release'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
# Upload hash files to the release
|
||||
gh release upload ${{ github.event.release.tag_name }} \
|
||||
dist/SHA256SUMS \
|
||||
dist/SHA512SUMS \
|
||||
cognee-sbom.json \
|
||||
--clobber
|
||||
|
||||
- name: Security notice
|
||||
run: |
|
||||
echo "::notice::Package published successfully with security attestations"
|
||||
echo "::notice::Checksums and SBOM uploaded to release assets"
|
||||
echo "::notice::Users can verify package integrity using the provided checksums"
|
||||
290
.github/workflows/security_verification.yml
vendored
Normal file
290
.github/workflows/security_verification.yml
vendored
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
name: Security Verification
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, dev ]
|
||||
pull_request:
|
||||
branches: [ main, dev ]
|
||||
schedule:
|
||||
- cron: '0 2 * * 0' # Weekly security scan on Sundays at 2 AM UTC
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
security-events: write
|
||||
actions: read
|
||||
|
||||
jobs:
|
||||
dependency-scan:
|
||||
name: Dependency Vulnerability Scan
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install uv
|
||||
uv sync --dev
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@master
|
||||
with:
|
||||
scan-type: 'fs'
|
||||
scan-ref: '.'
|
||||
format: 'sarif'
|
||||
output: 'trivy-results.sarif'
|
||||
|
||||
- name: Upload Trivy scan results to GitHub Security tab
|
||||
uses: github/codeql-action/upload-sarif@v3
|
||||
if: always()
|
||||
with:
|
||||
sarif_file: 'trivy-results.sarif'
|
||||
|
||||
- name: Run pip-audit for Python vulnerabilities
|
||||
run: |
|
||||
pip install pip-audit
|
||||
pip-audit --format=json --output=pip-audit-results.json || true
|
||||
|
||||
- name: Check for critical vulnerabilities
|
||||
run: |
|
||||
python -c "
|
||||
import json
|
||||
import sys
|
||||
try:
|
||||
with open('pip-audit-results.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
vulns = data.get('vulnerabilities', [])
|
||||
critical_vulns = [v for v in vulns if v.get('aliases', []) and any('CVE' in alias for alias in v['aliases'])]
|
||||
if critical_vulns:
|
||||
print('CRITICAL VULNERABILITIES FOUND:')
|
||||
for vuln in critical_vulns:
|
||||
print(f' - {vuln.get(\"id\", \"Unknown\")} in {vuln.get(\"package\", \"Unknown\")}')
|
||||
sys.exit(1)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
print('No vulnerabilities file found or invalid format')
|
||||
pass
|
||||
"
|
||||
|
||||
- name: Upload vulnerability reports
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: vulnerability-reports
|
||||
path: |
|
||||
trivy-results.sarif
|
||||
pip-audit-results.json
|
||||
|
||||
code-quality-scan:
|
||||
name: Code Quality & Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install analysis tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install bandit[toml] semgrep safety
|
||||
|
||||
- name: Run Bandit security linter
|
||||
run: |
|
||||
bandit -r cognee/ -f json -o bandit-report.json || true
|
||||
bandit -r cognee/ -f txt || true
|
||||
|
||||
- name: Run Semgrep security analysis
|
||||
run: |
|
||||
semgrep --config=auto --json --output=semgrep-results.json cognee/ || true
|
||||
|
||||
- name: Run Safety check
|
||||
run: |
|
||||
safety check --json --output safety-results.json || true
|
||||
|
||||
- name: Upload security scan results
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: security-analysis
|
||||
path: |
|
||||
bandit-report.json
|
||||
semgrep-results.json
|
||||
safety-results.json
|
||||
|
||||
package-integrity:
|
||||
name: Package Integrity & Signing
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install build twine hatchling
|
||||
|
||||
- name: Build package
|
||||
run: |
|
||||
python -m build
|
||||
|
||||
- name: Generate package hashes
|
||||
run: |
|
||||
cd dist
|
||||
sha256sum * > SHA256SUMS
|
||||
sha512sum * > SHA512SUMS
|
||||
md5sum * > MD5SUMS
|
||||
echo "Generated checksums:"
|
||||
cat SHA256SUMS
|
||||
|
||||
- name: Import GPG key
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
env:
|
||||
GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||
run: |
|
||||
if [ -n "$GPG_PRIVATE_KEY" ]; then
|
||||
echo "$GPG_PRIVATE_KEY" | gpg --batch --import
|
||||
echo "GPG key imported successfully"
|
||||
# List imported keys for verification
|
||||
gpg --list-secret-keys --keyid-format LONG
|
||||
else
|
||||
echo "GPG_PRIVATE_KEY not set, skipping GPG signing"
|
||||
fi
|
||||
|
||||
- name: Sign packages with GPG
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
env:
|
||||
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||
run: |
|
||||
if [ -n "$GPG_PASSPHRASE" ]; then
|
||||
cd dist
|
||||
for file in *; do
|
||||
if [ -f "$file" ]; then
|
||||
echo "Signing $file..."
|
||||
gpg --batch --yes --passphrase "$GPG_PASSPHRASE" --detach-sign --armor "$file"
|
||||
echo "Created signature: $file.asc"
|
||||
fi
|
||||
done
|
||||
# Sign the checksum files
|
||||
gpg --batch --yes --passphrase "$GPG_PASSPHRASE" --detach-sign --armor SHA256SUMS
|
||||
gpg --batch --yes --passphrase "$GPG_PASSPHRASE" --detach-sign --armor SHA512SUMS
|
||||
echo "All files signed successfully"
|
||||
else
|
||||
echo "GPG_PASSPHRASE not set, skipping signing"
|
||||
fi
|
||||
|
||||
- name: Verify signatures
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: |
|
||||
cd dist
|
||||
for sig_file in *.asc; do
|
||||
if [ -f "$sig_file" ]; then
|
||||
echo "Verifying signature: $sig_file"
|
||||
gpg --verify "$sig_file"
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Upload signed packages
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: signed-packages
|
||||
path: |
|
||||
dist/
|
||||
retention-days: 30
|
||||
|
||||
security-policy-check:
|
||||
name: Security Policy Compliance
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check for security policy files
|
||||
run: |
|
||||
echo "Checking for security policy files..."
|
||||
|
||||
# Check for SECURITY.md
|
||||
if [ -f "SECURITY.md" ]; then
|
||||
echo "✓ SECURITY.md found"
|
||||
else
|
||||
echo "✗ SECURITY.md not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for CODE_OF_CONDUCT.md
|
||||
if [ -f "CODE_OF_CONDUCT.md" ]; then
|
||||
echo "✓ CODE_OF_CONDUCT.md found"
|
||||
else
|
||||
echo "✗ CODE_OF_CONDUCT.md not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for LICENSE file
|
||||
if [ -f "LICENSE" ] || [ -f "LICENSE.md" ] || [ -f "LICENSE.txt" ]; then
|
||||
echo "✓ LICENSE file found"
|
||||
else
|
||||
echo "✗ LICENSE file not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Validate Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install uv
|
||||
uv sync --dev
|
||||
|
||||
# Check for pinned dependencies in production
|
||||
echo "Checking for properly pinned dependencies..."
|
||||
python -c "
|
||||
import tomllib
|
||||
with open('pyproject.toml', 'rb') as f:
|
||||
data = tomllib.load(f)
|
||||
|
||||
deps = data.get('project', {}).get('dependencies', [])
|
||||
unpinned = []
|
||||
for dep in deps:
|
||||
if '>=' in dep and '<' not in dep:
|
||||
unpinned.append(dep)
|
||||
|
||||
if unpinned:
|
||||
print('WARNING: Unpinned dependencies found:')
|
||||
for dep in unpinned:
|
||||
print(f' - {dep}')
|
||||
else:
|
||||
print('✓ All dependencies properly version-constrained')
|
||||
"
|
||||
|
||||
- name: Check for secrets in code
|
||||
run: |
|
||||
pip install detect-secrets
|
||||
detect-secrets scan --all-files --baseline .secrets.baseline || true
|
||||
|
||||
# Basic regex checks for common secrets
|
||||
echo "Checking for potential secrets..."
|
||||
if grep -r "password\s*=" . --include="*.py" --include="*.yml" --include="*.yaml" | grep -v ".git" | grep -v "example" | grep -v "test"; then
|
||||
echo "WARNING: Potential hardcoded passwords found"
|
||||
fi
|
||||
|
||||
if grep -r "api_key\s*=" . --include="*.py" --include="*.yml" --include="*.yaml" | grep -v ".git" | grep -v "example" | grep -v "test"; then
|
||||
echo "WARNING: Potential hardcoded API keys found"
|
||||
fi
|
||||
|
|
@ -12,40 +12,21 @@ logger = get_logger()
|
|||
async def test_knowledge_graph_quality_with_gpt4o():
|
||||
"""
|
||||
Test that verifies all main concepts and entities from a specific document are found
|
||||
in the knowledge graph using GPT-4o model for high-quality entity extraction.
|
||||
in the knowledge graph using the configured LLM model for entity extraction.
|
||||
|
||||
This test addresses the issue where HotPotQA questions may not reflect diminishing
|
||||
quality of knowledge graph creation after data model changes.
|
||||
"""
|
||||
|
||||
# Configure model with fallback for better availability
|
||||
preferred_models = ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "gpt-3.5-turbo"]
|
||||
selected_model = None
|
||||
The model is configured via the LLM_MODEL environment variable.
|
||||
"""
|
||||
|
||||
# Ensure we have API key
|
||||
if not os.environ.get("LLM_API_KEY"):
|
||||
raise ValueError("LLM_API_KEY must be set for this test")
|
||||
|
||||
# Try to find an available model by testing actual availability
|
||||
from cognee.infrastructure.llm.utils import test_llm_connection
|
||||
|
||||
for model in preferred_models:
|
||||
try:
|
||||
os.environ["LLM_MODEL"] = model
|
||||
cognee.config.set_llm_model(model)
|
||||
|
||||
# Test the model availability
|
||||
await test_llm_connection()
|
||||
|
||||
selected_model = model
|
||||
print(f"Successfully using model: {model}")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"Model {model} not available: {e}")
|
||||
continue
|
||||
|
||||
if not selected_model:
|
||||
raise ValueError("No suitable model available from: " + ", ".join(preferred_models))
|
||||
# Get model from environment variable
|
||||
current_model = os.environ.get("LLM_MODEL", "gpt-4o")
|
||||
print(f"Using model from environment: {current_model}")
|
||||
|
||||
# Set up test directories
|
||||
data_directory_path = str(
|
||||
|
|
@ -253,17 +234,17 @@ async def test_knowledge_graph_quality_with_gpt4o():
|
|||
|
||||
print("QUALITY ASSESSMENT:")
|
||||
print("-" * 40)
|
||||
print(f"Model used: {selected_model}")
|
||||
print(f"Model used: {current_model}")
|
||||
print()
|
||||
|
||||
# Adjust quality thresholds based on model capability
|
||||
if selected_model == "gpt-4o":
|
||||
if current_model == "gpt-4o":
|
||||
min_entity_coverage = 0.70 # At least 70% of entities should be found
|
||||
min_concept_coverage = 0.60 # At least 60% of concepts should be found
|
||||
elif selected_model == "gpt-4o-mini":
|
||||
elif current_model == "gpt-4o-mini":
|
||||
min_entity_coverage = 0.65 # Slightly lower for mini model
|
||||
min_concept_coverage = 0.55 # Slightly lower for mini model
|
||||
elif selected_model == "gpt-4-turbo":
|
||||
elif current_model == "gpt-4-turbo":
|
||||
min_entity_coverage = 0.68 # Good performance expected
|
||||
min_concept_coverage = 0.58 # Good performance expected
|
||||
else: # gpt-3.5-turbo or other models
|
||||
|
|
|
|||
337
scripts/verify_package.py
Normal file
337
scripts/verify_package.py
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Cognee Package Verification Script
|
||||
|
||||
This script helps users verify the integrity and authenticity of Cognee packages
|
||||
by checking hashes, GPG signatures, and package metadata.
|
||||
|
||||
Usage:
|
||||
python verify_package.py [package_file] [--check-all] [--verbose]
|
||||
|
||||
Examples:
|
||||
python verify_package.py cognee-0.2.1.tar.gz
|
||||
python verify_package.py --check-all --verbose
|
||||
python verify_package.py cognee-0.2.1-py3-none-any.whl --verify-signature
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import hashlib
|
||||
import json
|
||||
import argparse
|
||||
import subprocess
|
||||
import tempfile
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import zipfile
|
||||
import tarfile
|
||||
|
||||
|
||||
class PackageVerifier:
|
||||
"""Handles package verification operations."""
|
||||
|
||||
def __init__(self, verbose: bool = False):
|
||||
self.verbose = verbose
|
||||
self.github_api_base = "https://api.github.com/repos/topoteretes/cognee"
|
||||
self.github_releases_base = "https://github.com/topoteretes/cognee/releases"
|
||||
|
||||
def log(self, message: str, level: str = "INFO"):
|
||||
"""Log messages with different levels."""
|
||||
if self.verbose or level in ["ERROR", "WARNING"]:
|
||||
print(f"[{level}] {message}")
|
||||
|
||||
def calculate_hash(self, file_path: str, algorithm: str = "sha256") -> str:
|
||||
"""Calculate hash of a file."""
|
||||
hash_obj = hashlib.new(algorithm)
|
||||
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_obj.update(chunk)
|
||||
return hash_obj.hexdigest()
|
||||
except Exception as e:
|
||||
self.log(f"Error calculating {algorithm} hash: {e}", "ERROR")
|
||||
return ""
|
||||
|
||||
def verify_hash(self, file_path: str, expected_hash: str, algorithm: str = "sha256") -> bool:
|
||||
"""Verify file hash against expected value."""
|
||||
calculated_hash = self.calculate_hash(file_path, algorithm)
|
||||
|
||||
if not calculated_hash:
|
||||
return False
|
||||
|
||||
match = calculated_hash.lower() == expected_hash.lower()
|
||||
|
||||
if match:
|
||||
self.log(f"✓ {algorithm.upper()} hash verification PASSED", "INFO")
|
||||
else:
|
||||
self.log(f"✗ {algorithm.upper()} hash verification FAILED", "ERROR")
|
||||
self.log(f" Expected: {expected_hash}", "ERROR")
|
||||
self.log(f" Calculated: {calculated_hash}", "ERROR")
|
||||
|
||||
return match
|
||||
|
||||
def verify_gpg_signature(self, file_path: str, signature_path: str) -> bool:
|
||||
"""Verify GPG signature of a file."""
|
||||
try:
|
||||
# Check if gpg is available
|
||||
subprocess.run(
|
||||
["gpg", "--version"],
|
||||
check=True,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
self.log("GPG not found. Please install GPG to verify signatures.", "WARNING")
|
||||
return False
|
||||
|
||||
if not os.path.exists(signature_path):
|
||||
self.log(f"Signature file not found: {signature_path}", "WARNING")
|
||||
return False
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["gpg", "--verify", signature_path, file_path], capture_output=True, text=True
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
self.log("✓ GPG signature verification PASSED", "INFO")
|
||||
return True
|
||||
else:
|
||||
self.log("✗ GPG signature verification FAILED", "ERROR")
|
||||
self.log(f"GPG error: {result.stderr}", "ERROR")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.log(f"Error verifying GPG signature: {e}", "ERROR")
|
||||
return False
|
||||
|
||||
def get_latest_release_info(self) -> Optional[Dict]:
|
||||
"""Get latest release information from GitHub API."""
|
||||
try:
|
||||
url = f"{self.github_api_base}/releases/latest"
|
||||
with urllib.request.urlopen(url) as response:
|
||||
return json.loads(response.read())
|
||||
except Exception as e:
|
||||
self.log(f"Error fetching release info: {e}", "ERROR")
|
||||
return None
|
||||
|
||||
def download_checksum_file(
|
||||
self, release_info: Dict, checksum_type: str = "SHA256SUMS"
|
||||
) -> Optional[str]:
|
||||
"""Download checksum file from GitHub release."""
|
||||
for asset in release_info.get("assets", []):
|
||||
if asset["name"] == checksum_type:
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w+", delete=False, suffix=f".{checksum_type}"
|
||||
) as tmp:
|
||||
with urllib.request.urlopen(asset["browser_download_url"]) as response:
|
||||
tmp.write(response.read().decode("utf-8"))
|
||||
return tmp.name
|
||||
except Exception as e:
|
||||
self.log(f"Error downloading {checksum_type}: {e}", "ERROR")
|
||||
return None
|
||||
return None
|
||||
|
||||
def parse_checksum_file(self, checksum_file: str) -> Dict[str, str]:
|
||||
"""Parse checksum file and return filename -> hash mapping."""
|
||||
checksums = {}
|
||||
try:
|
||||
with open(checksum_file, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith("#"):
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
hash_value = parts[0]
|
||||
filename = parts[1].lstrip("*") # Remove binary mode indicator
|
||||
checksums[filename] = hash_value
|
||||
except Exception as e:
|
||||
self.log(f"Error parsing checksum file: {e}", "ERROR")
|
||||
return checksums
|
||||
|
||||
def verify_package_metadata(self, package_path: str) -> bool:
|
||||
"""Verify package metadata and structure."""
|
||||
self.log(f"Verifying package metadata for: {package_path}")
|
||||
|
||||
if package_path.endswith(".whl"):
|
||||
return self._verify_wheel_metadata(package_path)
|
||||
elif package_path.endswith(".tar.gz"):
|
||||
return self._verify_tarball_metadata(package_path)
|
||||
else:
|
||||
self.log(f"Unsupported package format: {package_path}", "WARNING")
|
||||
return False
|
||||
|
||||
def _verify_wheel_metadata(self, wheel_path: str) -> bool:
|
||||
"""Verify wheel package metadata."""
|
||||
try:
|
||||
with zipfile.ZipFile(wheel_path, "r") as wheel:
|
||||
# Check for required metadata files
|
||||
required_files = ["METADATA", "WHEEL"]
|
||||
metadata_files = [
|
||||
f for f in wheel.namelist() if any(req in f for req in required_files)
|
||||
]
|
||||
|
||||
if not metadata_files:
|
||||
self.log("✗ Required metadata files not found in wheel", "ERROR")
|
||||
return False
|
||||
|
||||
# Read and validate METADATA
|
||||
metadata_content = None
|
||||
for file in wheel.namelist():
|
||||
if file.endswith("METADATA"):
|
||||
metadata_content = wheel.read(file).decode("utf-8")
|
||||
break
|
||||
|
||||
if metadata_content:
|
||||
if "Name: cognee" in metadata_content:
|
||||
self.log("✓ Package metadata verification PASSED", "INFO")
|
||||
return True
|
||||
else:
|
||||
self.log("✗ Package name verification FAILED", "ERROR")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Error verifying wheel metadata: {e}", "ERROR")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def _verify_tarball_metadata(self, tarball_path: str) -> bool:
|
||||
"""Verify tarball package metadata."""
|
||||
try:
|
||||
with tarfile.open(tarball_path, "r:gz") as tar:
|
||||
# Look for PKG-INFO or pyproject.toml
|
||||
metadata_files = [
|
||||
f for f in tar.getnames() if "PKG-INFO" in f or "pyproject.toml" in f
|
||||
]
|
||||
|
||||
if not metadata_files:
|
||||
self.log("✗ No metadata files found in tarball", "ERROR")
|
||||
return False
|
||||
|
||||
# Check PKG-INFO if available
|
||||
for file in metadata_files:
|
||||
if "PKG-INFO" in file:
|
||||
member = tar.getmember(file)
|
||||
content = tar.extractfile(member).read().decode("utf-8")
|
||||
if "Name: cognee" in content:
|
||||
self.log("✓ Package metadata verification PASSED", "INFO")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.log(f"Error verifying tarball metadata: {e}", "ERROR")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def verify_package(self, package_path: str, verify_signature: bool = False) -> bool:
|
||||
"""Comprehensive package verification."""
|
||||
if not os.path.exists(package_path):
|
||||
self.log(f"Package file not found: {package_path}", "ERROR")
|
||||
return False
|
||||
|
||||
self.log(f"Starting verification of: {package_path}")
|
||||
verification_results = []
|
||||
|
||||
# 1. Verify package metadata
|
||||
metadata_ok = self.verify_package_metadata(package_path)
|
||||
verification_results.append(metadata_ok)
|
||||
|
||||
# 2. Get release info and checksums
|
||||
release_info = self.get_latest_release_info()
|
||||
if not release_info:
|
||||
self.log("Could not fetch release information", "WARNING")
|
||||
return all(verification_results)
|
||||
|
||||
# 3. Download and verify checksums
|
||||
checksum_file = self.download_checksum_file(release_info, "SHA256SUMS")
|
||||
if checksum_file:
|
||||
checksums = self.parse_checksum_file(checksum_file)
|
||||
filename = os.path.basename(package_path)
|
||||
|
||||
if filename in checksums:
|
||||
hash_ok = self.verify_hash(package_path, checksums[filename], "sha256")
|
||||
verification_results.append(hash_ok)
|
||||
else:
|
||||
self.log(f"No checksum found for {filename}", "WARNING")
|
||||
|
||||
os.unlink(checksum_file) # Clean up temp file
|
||||
|
||||
# 4. Verify GPG signature if requested
|
||||
if verify_signature:
|
||||
signature_path = f"{package_path}.asc"
|
||||
if os.path.exists(signature_path):
|
||||
sig_ok = self.verify_gpg_signature(package_path, signature_path)
|
||||
verification_results.append(sig_ok)
|
||||
else:
|
||||
self.log(f"Signature file not found: {signature_path}", "WARNING")
|
||||
|
||||
# Overall result
|
||||
all_passed = all(verification_results)
|
||||
if all_passed:
|
||||
self.log("🎉 Package verification PASSED", "INFO")
|
||||
else:
|
||||
self.log("❌ Package verification FAILED", "ERROR")
|
||||
|
||||
return all_passed
|
||||
|
||||
def verify_all_packages(self, directory: str = ".", verify_signature: bool = False) -> bool:
|
||||
"""Verify all Cognee packages in a directory."""
|
||||
package_files = []
|
||||
|
||||
for file in os.listdir(directory):
|
||||
if file.startswith("cognee") and (file.endswith(".whl") or file.endswith(".tar.gz")):
|
||||
package_files.append(os.path.join(directory, file))
|
||||
|
||||
if not package_files:
|
||||
self.log("No Cognee packages found in directory", "WARNING")
|
||||
return False
|
||||
|
||||
all_results = []
|
||||
for package_file in package_files:
|
||||
self.log(f"\n{'=' * 60}")
|
||||
result = self.verify_package(package_file, verify_signature)
|
||||
all_results.append(result)
|
||||
|
||||
return all(all_results)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Verify Cognee package integrity and authenticity")
|
||||
parser.add_argument("package", nargs="?", help="Path to package file to verify")
|
||||
parser.add_argument(
|
||||
"--check-all", action="store_true", help="Verify all packages in current directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verify-signature", action="store_true", help="Also verify GPG signatures"
|
||||
)
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.package and not args.check_all:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
verifier = PackageVerifier(verbose=args.verbose)
|
||||
|
||||
try:
|
||||
if args.check_all:
|
||||
success = verifier.verify_all_packages(".", args.verify_signature)
|
||||
else:
|
||||
success = verifier.verify_package(args.package, args.verify_signature)
|
||||
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nVerification interrupted by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Reference in a new issue