diff --git a/.github/workflows/daily_pypi_download_stats.yaml b/.github/workflows/daily_pypi_download_stats.yaml new file mode 100644 index 000000000..ab4c8051d --- /dev/null +++ b/.github/workflows/daily_pypi_download_stats.yaml @@ -0,0 +1,36 @@ +name: Update Cognee Stats Daily + +on: + schedule: + - cron: '0 1 * * *' # Runs every day at 01:00 UTC + +jobs: + update_stats: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + pip install requests posthog + + - name: Run Update Script + env: + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }} + run: | + cd tools # Change to the 'tools' directory + echo "Current working directory after changing to tools:" + pwd # Print the working directory again + echo "List of folders in the tools directory:" + ls -la # List all files and folders in the 'tools' directory + python daily_pypi_downloads.py # Run the script \ No newline at end of file diff --git a/.github/workflows/daily_twitter_stats.yaml b/.github/workflows/daily_twitter_stats.yaml new file mode 100644 index 000000000..7100d2b4d --- /dev/null +++ b/.github/workflows/daily_twitter_stats.yaml @@ -0,0 +1,36 @@ +name: Send Twitter Followers to Segment + +on: + schedule: + - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed. + workflow_dispatch: # Allows manual triggering of the workflow + +jobs: + send-followers: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + pip install tweepy requests + + - name: Send Twitter Followers to Segment + env: + TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }} + TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} + SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }} + TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }} + run: | + cd tools + python daily_twitter_stats.py + diff --git a/.github/workflows/historical_pypi_download_stats.yaml b/.github/workflows/historical_pypi_download_stats.yaml new file mode 100644 index 000000000..9c0da5357 --- /dev/null +++ b/.github/workflows/historical_pypi_download_stats.yaml @@ -0,0 +1,29 @@ +name: Historical Import of Cognee Stats + +on: + workflow_dispatch: + +jobs: + import_stats: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + pip install requests posthog + + - name: Run Historical Import Script + env: + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }} + run: | + cd tools # Change to the 'tools' directory + python historical_import_cognee_stats.py diff --git a/tools/daily_pypi_downloads.py b/tools/daily_pypi_downloads.py new file mode 100644 index 000000000..d57092c3b --- /dev/null +++ b/tools/daily_pypi_downloads.py @@ -0,0 +1,77 @@ +import uuid + +import requests +import posthog +import os +from datetime import datetime, timedelta + +# Replace with your PostHog Project API Key +POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') +POSTHOG_API_HOST = 'https://eu.i.posthog.com' + +# Initialize PostHog client +posthog.project_api_key = POSTHOG_API_KEY +posthog.host = POSTHOG_API_HOST + +# Read last processed date from file +state_file = 'last_processed_date.txt' +if os.path.exists(state_file): + with open(state_file, 'r') as f: + last_processed_date = f.read().strip() + last_processed_date = datetime.strptime(last_processed_date, '%Y-%m-%d') +else: + # If no state file, start from 2 days ago + last_processed_date = datetime.utcnow() - timedelta(days=2) + +# Calculate the next date to process +next_date = last_processed_date + timedelta(days=1) +today = datetime.utcnow().date() + +if next_date.date() >= today: + print("No new data to process.") + exit(0) + +date_str = next_date.strftime('%Y-%m-%d') + +# Fetch download data for the date +package = 'cognee' +url = f'https://pypistats.org/api/packages/{package}/overall' + +response = requests.get(url) +if response.status_code != 200: + print(f"Failed to fetch data: {response.status_code}") + exit(1) + +data = response.json() + +# Find the entry for the date we want +downloads = None +for entry in data['data']: + if entry['date'] == date_str: + downloads = entry['downloads'] + break + +if downloads is None: + print(f"No data available for date {date_str}") + exit(1) + +# Create a unique message_id +message_id = f"cognee_downloads_{date_str}" + +distinct_id = str(uuid.uuid4()) + +# Send an event to PostHog +posthog.capture( + distinct_id=distinct_id, + event='cognee_downloads', + properties={ + 'date': date_str, + 'downloads': downloads, + } +) + +print(f"Data for {date_str} updated in PostHog successfully.") + +# Update the state file +with open(state_file, 'w') as f: + f.write(date_str) diff --git a/tools/daily_twitter_stats.py b/tools/daily_twitter_stats.py new file mode 100644 index 000000000..43bedda7b --- /dev/null +++ b/tools/daily_twitter_stats.py @@ -0,0 +1,66 @@ +import tweepy +import requests +import json +from datetime import datetime + +# Twitter API credentials from GitHub Secrets +API_KEY = '${{ secrets.TWITTER_API_KEY }}' +API_SECRET = '${{ secrets.TWITTER_API_SECRET }}' +ACCESS_TOKEN = '${{ secrets.TWITTER_ACCESS_TOKEN }}' +ACCESS_SECRET = '${{ secrets.TWITTER_ACCESS_SECRET }}' +USERNAME = '${{ secrets.TWITTER_USERNAME }}' +SEGMENT_WRITE_KEY = '${{ secrets.SEGMENT_WRITE_KEY }}' + +# Initialize Tweepy API +auth = tweepy.OAuthHandler(API_KEY, API_SECRET) +auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET) +twitter_api = tweepy.API(auth) + +# Segment endpoint +SEGMENT_ENDPOINT = 'https://api.segment.io/v1/track' + + +def get_follower_count(username): + try: + user = twitter_api.get_user(screen_name=username) + return user.followers_count + except tweepy.TweepError as e: + print(f'Error fetching follower count: {e}') + return None + + +def send_data_to_segment(username, follower_count): + current_time = datetime.now().isoformat() + + data = { + 'userId': username, + 'event': 'Follower Count Update', + 'properties': { + 'username': username, + 'follower_count': follower_count, + 'timestamp': current_time + }, + 'timestamp': current_time + } + + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Basic {SEGMENT_WRITE_KEY.encode("utf-8").decode("utf-8")}' + } + + try: + response = requests.post(SEGMENT_ENDPOINT, headers=headers, data=json.dumps(data)) + + if response.status_code == 200: + print(f'Successfully sent data to Segment for {username}') + else: + print(f'Failed to send data to Segment. Status code: {response.status_code}, Response: {response.text}') + except requests.exceptions.RequestException as e: + print(f'Error sending data to Segment: {e}') + + +follower_count = get_follower_count(USERNAME) +if follower_count is not None: + send_data_to_segment(USERNAME, follower_count) +else: + print('Failed to retrieve follower count.') diff --git a/tools/historical_import_cognee_stats.py b/tools/historical_import_cognee_stats.py new file mode 100644 index 000000000..68f864e8a --- /dev/null +++ b/tools/historical_import_cognee_stats.py @@ -0,0 +1,58 @@ +import uuid + +import requests +import posthog +import os +from datetime import datetime, timedelta + +# Replace with your PostHog Project API Key +POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') +POSTHOG_API_HOST = 'https://eu.i.posthog.com' + +# Initialize PostHog client +posthog.project_api_key = POSTHOG_API_KEY +posthog.host = POSTHOG_API_HOST + +# Fetch historical download data for the last 180 days +package = 'cognee' +url = f'https://pypistats.org/api/packages/{package}/overall' + +response = requests.get(url) +if response.status_code != 200: + print(f"Failed to fetch data: {response.status_code}") + exit(1) + +data = response.json() + +# Exclude today and yesterday +today = datetime.utcnow().date() +yesterday = today - timedelta(days=1) + +# Process and send data to PostHog +for entry in data['data']: + date_str = entry['date'] + date_obj = datetime.strptime(date_str, '%Y-%m-%d').date() + downloads = entry['downloads'] + + # Skip today and yesterday + if date_obj >= yesterday: + continue + + # Create a unique message_id + message_id = f"cognee_downloads_{date_str}" + + distinct_id = str(uuid.uuid4()) + + # Send an event to PostHog + posthog.capture( + distinct_id=distinct_id, + event='cognee_downloads', + properties={ + 'date': date_str, + 'downloads': downloads, + } + ) + + print(f"Data for {date_str} imported successfully.") + +print("Historical data import completed.")