diff --git a/.github/workflows/daily_pypi_download_stats.yaml b/.github/workflows/daily_pypi_download_stats.yaml new file mode 100644 index 000000000..5192eb960 --- /dev/null +++ b/.github/workflows/daily_pypi_download_stats.yaml @@ -0,0 +1,36 @@ +name: analytics | Update Cognee Stats Daily + +on: + schedule: + - cron: '0 1 * * *' # Runs every day at 01:00 UTC + +jobs: + update_stats: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + pip install requests posthog + + - name: Run Update Script + env: + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }} + run: | + cd tools # Change to the 'tools' directory + echo "Current working directory after changing to tools:" + pwd # Print the working directory again + echo "List of folders in the tools directory:" + ls -la # List all files and folders in the 'tools' directory + python daily_pypi_downloads.py # Run the script \ No newline at end of file diff --git a/.github/workflows/daily_twitter_stats.yaml b/.github/workflows/daily_twitter_stats.yaml new file mode 100644 index 000000000..e6e6e9b93 --- /dev/null +++ b/.github/workflows/daily_twitter_stats.yaml @@ -0,0 +1,38 @@ +#name: analytics | Send Twitter Followers to Segment +# +#on: pull_request +# +##on: +## schedule: +## - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed. +## workflow_dispatch: # Allows manual triggering of the workflow +# +#jobs: +# send-followers: +# runs-on: ubuntu-latest +# +# steps: +# - name: Checkout repository +# uses: actions/checkout@v3 +# +# - name: Set up Python +# uses: actions/setup-python@v4 +# with: +# python-version: '3.x' +# +# - name: Install dependencies +# run: | +# pip install tweepy requests +# +# - name: Send Twitter Followers to Segment +# env: +# TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }} +# TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }} +# TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} +# TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} +# SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }} +# TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }} +# run: | +# cd tools +# python daily_twitter_stats.py +# diff --git a/.github/workflows/posthog_pipeline.yaml b/.github/workflows/posthog_pipeline.yaml new file mode 100644 index 000000000..1e34ee8cf --- /dev/null +++ b/.github/workflows/posthog_pipeline.yaml @@ -0,0 +1,44 @@ +name: analytics | Push GitHub Data to PostHog + +on: + schedule: + - cron: '0 0 * * *' # Runs every day at midnight + workflow_dispatch: + +jobs: + push-data: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests posthog + + - name: Print working directory, list folders, and run script + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: | + echo "Current working directory:" + pwd # Print the current working directory + echo "List of folders in the current directory:" + ls -la # List all files and folders in the current directory + echo "Changing to tools directory..." + cd tools # Change to the 'tools' directory + echo "Current working directory after changing to tools:" + pwd # Print the working directory again + echo "List of folders in the tools directory:" + ls -la # List all files and folders in the 'tools' directory + python push_to_posthog.py # Run the script + diff --git a/.github/workflows/release_discord_action.yml b/.github/workflows/release_discord_action.yml index faedf42ca..472687214 100644 --- a/.github/workflows/release_discord_action.yml +++ b/.github/workflows/release_discord_action.yml @@ -1,4 +1,4 @@ -name: Send Release to Discord +name: automation | Send Release to Discord on: release: diff --git a/tools/daily_pypi_downloads.py b/tools/daily_pypi_downloads.py new file mode 100644 index 000000000..64a0956ed --- /dev/null +++ b/tools/daily_pypi_downloads.py @@ -0,0 +1,81 @@ +import uuid + +import requests +import posthog +import os +from datetime import datetime, timedelta + +# Replace with your PostHog Project API Key +POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') +POSTHOG_API_HOST = 'https://eu.i.posthog.com' + +# Initialize PostHog client +posthog.project_api_key = POSTHOG_API_KEY +posthog.host = POSTHOG_API_HOST + +# Read last processed date from file +state_file = 'last_processed_date.txt' +if os.path.exists(state_file): + with open(state_file, 'r') as f: + last_processed_date = f.read().strip() + last_processed_date = datetime.strptime(last_processed_date, '%Y-%m-%d') +else: + # If no state file, start from 2 days ago + last_processed_date = datetime.utcnow() - timedelta(days=2) + +# Calculate the next date to process +next_date = last_processed_date + timedelta(days=1) +today = datetime.utcnow().date() + +if next_date.date() >= today: + print("No new data to process.") + exit(0) + +date_str = next_date.strftime('%Y-%m-%d') + +# Fetch download data for the date +package = 'cognee' +url = f'https://pypistats.org/api/packages/{package}/overall' + +response = requests.get(url) +if response.status_code != 200: + print(f"Failed to fetch data: {response.status_code}") + exit(1) + +data = response.json() + +# Find the entry for the date we want +downloads = None +for entry in data['data']: + if entry['date'] == date_str: + downloads = entry['downloads'] + category = entry.get('category') + break + +if downloads is None: + print(f"No data available for date {date_str}") + exit(1) + +# Create a unique message_id +message_id = f"cognee_downloads_{date_str}" + +distinct_id = str(uuid.uuid4()) + +# Send an event to PostHog +event_name = 'cognee_lib_downloads_with_mirrors' if category == 'with_mirrors' else 'cognee_lib_downloads_without_mirrors' + +if event_name == 'cognee_lib_downloads_without_mirrors': + posthog.capture( + distinct_id=str(uuid.uuid4()), + event=event_name, + properties={ + 'category': category, + 'date': date_str, + 'downloads': downloads, + } + ) +print(f"Data for {date_str} updated in PostHog successfully. Downloads is {downloads}") + +# Update the state file +with open(state_file, 'w') as f: + f.write(date_str) diff --git a/tools/daily_twitter_stats.py b/tools/daily_twitter_stats.py new file mode 100644 index 000000000..43bedda7b --- /dev/null +++ b/tools/daily_twitter_stats.py @@ -0,0 +1,66 @@ +import tweepy +import requests +import json +from datetime import datetime + +# Twitter API credentials from GitHub Secrets +API_KEY = '${{ secrets.TWITTER_API_KEY }}' +API_SECRET = '${{ secrets.TWITTER_API_SECRET }}' +ACCESS_TOKEN = '${{ secrets.TWITTER_ACCESS_TOKEN }}' +ACCESS_SECRET = '${{ secrets.TWITTER_ACCESS_SECRET }}' +USERNAME = '${{ secrets.TWITTER_USERNAME }}' +SEGMENT_WRITE_KEY = '${{ secrets.SEGMENT_WRITE_KEY }}' + +# Initialize Tweepy API +auth = tweepy.OAuthHandler(API_KEY, API_SECRET) +auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET) +twitter_api = tweepy.API(auth) + +# Segment endpoint +SEGMENT_ENDPOINT = 'https://api.segment.io/v1/track' + + +def get_follower_count(username): + try: + user = twitter_api.get_user(screen_name=username) + return user.followers_count + except tweepy.TweepError as e: + print(f'Error fetching follower count: {e}') + return None + + +def send_data_to_segment(username, follower_count): + current_time = datetime.now().isoformat() + + data = { + 'userId': username, + 'event': 'Follower Count Update', + 'properties': { + 'username': username, + 'follower_count': follower_count, + 'timestamp': current_time + }, + 'timestamp': current_time + } + + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Basic {SEGMENT_WRITE_KEY.encode("utf-8").decode("utf-8")}' + } + + try: + response = requests.post(SEGMENT_ENDPOINT, headers=headers, data=json.dumps(data)) + + if response.status_code == 200: + print(f'Successfully sent data to Segment for {username}') + else: + print(f'Failed to send data to Segment. Status code: {response.status_code}, Response: {response.text}') + except requests.exceptions.RequestException as e: + print(f'Error sending data to Segment: {e}') + + +follower_count = get_follower_count(USERNAME) +if follower_count is not None: + send_data_to_segment(USERNAME, follower_count) +else: + print('Failed to retrieve follower count.') diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py new file mode 100644 index 000000000..15cf05df6 --- /dev/null +++ b/tools/push_to_posthog.py @@ -0,0 +1,76 @@ +# extract_and_push_github_data.py +import uuid + +import requests +import os +from posthog import Posthog + +# Get environment variables +GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') +REPO = os.getenv('GITHUB_REPOSITORY') +POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') # Your PostHog Project API Key +POSTHOG_HOST = 'https://eu.i.posthog.com' # Default PostHog Cloud + +headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json" +} + +# Initialize PostHog client +posthog = Posthog( + api_key=POSTHOG_API_KEY, + host=POSTHOG_HOST +) + +posthog.debug = True + +def get_repo_info(): + url = f"https://api.github.com/repos/{REPO}" + response = requests.get(url, headers=headers) + if response.status_code == 200: + return response.json() + else: + print(f"Error fetching repo info: {response.status_code}") + return None + +def main(): + repo_info = get_repo_info() + + if repo_info: + # Prepare data to send to PostHog + properties = { + 'repo_name': repo_info.get('full_name'), + 'stars': repo_info.get('stargazers_count'), + 'forks': repo_info.get('forks_count'), + 'open_issues': repo_info.get('open_issues_count'), + 'watchers': repo_info.get('subscribers_count'), + 'created_at': repo_info.get('created_at'), + 'updated_at': repo_info.get('updated_at'), + 'pushed_at': repo_info.get('pushed_at'), + 'language': repo_info.get('language'), + 'license': repo_info.get('license').get('name') if repo_info.get('license') else None, + 'topics': repo_info.get('topics') + } + + print("Repository information: ", properties) + + distinct_id = str(uuid.uuid4()) + + # Send event to PostHog + result = posthog.capture( + distinct_id=distinct_id, # You can customize this identifier + event='cognee_lib_github_repo_stats', + properties=properties + ) + + print("PostHog response: ", result) + + print("Data sent to PostHog successfully.") + else: + print("Failed to retrieve repository information.") + + # Close PostHog client + posthog.shutdown() + +if __name__ == "__main__": + main()