From e815a20dd3a450e6d4dfa2925db47f2ed75efe46 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:38:50 +0200 Subject: [PATCH 01/18] Update the demo --- .github/workflows/posthog_pipeline.yaml | 33 ++++++++++++ tools/push_to_posthogh.py | 67 +++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 .github/workflows/posthog_pipeline.yaml create mode 100644 tools/push_to_posthogh.py diff --git a/.github/workflows/posthog_pipeline.yaml b/.github/workflows/posthog_pipeline.yaml new file mode 100644 index 000000000..c8d3ab140 --- /dev/null +++ b/.github/workflows/posthog_pipeline.yaml @@ -0,0 +1,33 @@ +name: Push GitHub Data to PostHog + +on: + schedule: + - cron: '0 0 * * *' # Runs every day at midnight + workflow_dispatch: + +jobs: + push-data: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests posthog + + - name: Run data extraction and push script + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: | + python tools/push_to_posthog.py diff --git a/tools/push_to_posthogh.py b/tools/push_to_posthogh.py new file mode 100644 index 000000000..b044b7a31 --- /dev/null +++ b/tools/push_to_posthogh.py @@ -0,0 +1,67 @@ +# extract_and_push_github_data.py + +import requests +import os +from posthog import Posthog + +# Get environment variables +GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') +REPO = os.getenv('GITHUB_REPOSITORY') +POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') # Your PostHog Project API Key +POSTHOG_HOST = os.getenv('POSTHOG_HOST', 'https://app.posthog.com') # Default PostHog Cloud + +headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json" +} + +# Initialize PostHog client +posthog = Posthog( + api_key=POSTHOG_API_KEY, + host=POSTHOG_HOST +) + +def get_repo_info(): + url = f"https://api.github.com/repos/{REPO}" + response = requests.get(url, headers=headers) + if response.status_code == 200: + return response.json() + else: + print(f"Error fetching repo info: {response.status_code}") + return None + +def main(): + repo_info = get_repo_info() + + if repo_info: + # Prepare data to send to PostHog + properties = { + 'repo_name': repo_info.get('full_name'), + 'stars': repo_info.get('stargazers_count'), + 'forks': repo_info.get('forks_count'), + 'open_issues': repo_info.get('open_issues_count'), + 'watchers': repo_info.get('subscribers_count'), + 'created_at': repo_info.get('created_at'), + 'updated_at': repo_info.get('updated_at'), + 'pushed_at': repo_info.get('pushed_at'), + 'language': repo_info.get('language'), + 'license': repo_info.get('license').get('name') if repo_info.get('license') else None, + 'topics': repo_info.get('topics') + } + + # Send event to PostHog + posthog.capture( + distinct_id='github_repo', # You can customize this identifier + event='GitHub Repo Stats', + properties=properties + ) + + print("Data sent to PostHog successfully.") + else: + print("Failed to retrieve repository information.") + + # Close PostHog client + posthog.shutdown() + +if __name__ == "__main__": + main() From 8d2c10a8551732f5509b9bd582fc3ccde8b7aa62 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:09:10 +0200 Subject: [PATCH 02/18] Add posthog test task --- .github/workflows/posthog_pipeline.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/posthog_pipeline.yaml b/.github/workflows/posthog_pipeline.yaml index c8d3ab140..07f03e718 100644 --- a/.github/workflows/posthog_pipeline.yaml +++ b/.github/workflows/posthog_pipeline.yaml @@ -1,8 +1,11 @@ name: Push GitHub Data to PostHog on: - schedule: - - cron: '0 0 * * *' # Runs every day at midnight + pull_request: + branches: + - main +# schedule: +# - cron: '0 0 * * *' # Runs every day at midnight workflow_dispatch: jobs: From 86654e1775923b73b19a143dcdf29101697bf104 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:19:30 +0200 Subject: [PATCH 03/18] Add posthog test task --- tools/{push_to_posthogh.py => push_to_posthog.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/{push_to_posthogh.py => push_to_posthog.py} (100%) diff --git a/tools/push_to_posthogh.py b/tools/push_to_posthog.py similarity index 100% rename from tools/push_to_posthogh.py rename to tools/push_to_posthog.py From c1442d9b9019b330a9d4e9405f8e0cb0c36f90b7 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:24:03 +0200 Subject: [PATCH 04/18] Add posthog test task --- .github/workflows/posthog_pipeline.yaml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/posthog_pipeline.yaml b/.github/workflows/posthog_pipeline.yaml index 07f03e718..7a9e430d8 100644 --- a/.github/workflows/posthog_pipeline.yaml +++ b/.github/workflows/posthog_pipeline.yaml @@ -26,11 +26,22 @@ jobs: python -m pip install --upgrade pip pip install requests posthog - - name: Run data extraction and push script + - name: Print working directory, list folders, and run script env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} GITHUB_REPOSITORY: ${{ github.repository }} run: | - python tools/push_to_posthog.py + echo "Current working directory:" + pwd # Print the current working directory + echo "List of folders in the current directory:" + ls -la # List all files and folders in the current directory + echo "Changing to tools directory..." + cd tools # Change to the 'tools' directory + echo "Current working directory after changing to tools:" + pwd # Print the working directory again + echo "List of folders in the tools directory:" + ls -la # List all files and folders in the 'tools' directory + python push_to_posthog.py # Run the script + From 8fe6f12cd89dd25f771ae59b7843bb306da090c9 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:34:57 +0200 Subject: [PATCH 05/18] Add posthog test task --- tools/push_to_posthog.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py index b044b7a31..14fbce173 100644 --- a/tools/push_to_posthog.py +++ b/tools/push_to_posthog.py @@ -49,6 +49,8 @@ def main(): 'topics': repo_info.get('topics') } + print("Repository information: ", properties) + # Send event to PostHog posthog.capture( distinct_id='github_repo', # You can customize this identifier From 24c1b46b4e167f5c736c33514b3e333a39e12899 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:41:28 +0200 Subject: [PATCH 06/18] Add posthog test task --- tools/push_to_posthog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py index 14fbce173..30f4ddf40 100644 --- a/tools/push_to_posthog.py +++ b/tools/push_to_posthog.py @@ -8,7 +8,7 @@ from posthog import Posthog GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') REPO = os.getenv('GITHUB_REPOSITORY') POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') # Your PostHog Project API Key -POSTHOG_HOST = os.getenv('POSTHOG_HOST', 'https://app.posthog.com') # Default PostHog Cloud +POSTHOG_HOST = os.getenv('POSTHOG_HOST', 'https://eu.i.posthog.com') # Default PostHog Cloud headers = { "Authorization": f"token {GITHUB_TOKEN}", From 96e234192e1754c48cc643848b0459f423d49dbe Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:29:58 +0200 Subject: [PATCH 07/18] Add posthog test task --- tools/push_to_posthog.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py index 30f4ddf40..8eb9cb693 100644 --- a/tools/push_to_posthog.py +++ b/tools/push_to_posthog.py @@ -21,6 +21,8 @@ posthog = Posthog( host=POSTHOG_HOST ) +posthog.debug = True + def get_repo_info(): url = f"https://api.github.com/repos/{REPO}" response = requests.get(url, headers=headers) From 5f03c92c47e711eb2646c6020edbc57cff05f15d Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:35:12 +0200 Subject: [PATCH 08/18] Add posthog test task --- tools/push_to_posthog.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py index 8eb9cb693..59e61f550 100644 --- a/tools/push_to_posthog.py +++ b/tools/push_to_posthog.py @@ -54,12 +54,14 @@ def main(): print("Repository information: ", properties) # Send event to PostHog - posthog.capture( + result = posthog.capture( distinct_id='github_repo', # You can customize this identifier event='GitHub Repo Stats', properties=properties ) + print("PostHog response: ", result) + print("Data sent to PostHog successfully.") else: print("Failed to retrieve repository information.") From a257b864d58e62d318d3447ce38b4d8f12785f81 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:43:14 +0200 Subject: [PATCH 09/18] Add posthog test task --- tools/push_to_posthog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py index 59e61f550..66355b729 100644 --- a/tools/push_to_posthog.py +++ b/tools/push_to_posthog.py @@ -8,7 +8,7 @@ from posthog import Posthog GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') REPO = os.getenv('GITHUB_REPOSITORY') POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') # Your PostHog Project API Key -POSTHOG_HOST = os.getenv('POSTHOG_HOST', 'https://eu.i.posthog.com') # Default PostHog Cloud +POSTHOG_HOST = 'https://eu.i.posthog.com' # Default PostHog Cloud headers = { "Authorization": f"token {GITHUB_TOKEN}", From 550d53caf7bb865bfdc228394d8f3b4686971349 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:45:22 +0200 Subject: [PATCH 10/18] Add posthog test task --- tools/push_to_posthog.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py index 66355b729..0b9af1c94 100644 --- a/tools/push_to_posthog.py +++ b/tools/push_to_posthog.py @@ -1,4 +1,5 @@ # extract_and_push_github_data.py +import uuid import requests import os @@ -53,9 +54,11 @@ def main(): print("Repository information: ", properties) + distinct_id = str(uuid.uuid4()) + # Send event to PostHog result = posthog.capture( - distinct_id='github_repo', # You can customize this identifier + distinct_id=distinct_id, # You can customize this identifier event='GitHub Repo Stats', properties=properties ) From b436b4af4c5bd0496d62661f3f90c311e4c778d7 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Wed, 25 Sep 2024 18:47:11 +0200 Subject: [PATCH 11/18] Add posthog test task --- .github/workflows/posthog_pipeline.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/posthog_pipeline.yaml b/.github/workflows/posthog_pipeline.yaml index 7a9e430d8..f979b362b 100644 --- a/.github/workflows/posthog_pipeline.yaml +++ b/.github/workflows/posthog_pipeline.yaml @@ -1,11 +1,8 @@ name: Push GitHub Data to PostHog on: - pull_request: - branches: - - main -# schedule: -# - cron: '0 0 * * *' # Runs every day at midnight + schedule: + - cron: '0 0 * * *' # Runs every day at midnight workflow_dispatch: jobs: From 168b4d96a10defebca9fb5c4143896909868d3af Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sun, 29 Sep 2024 21:22:55 +0200 Subject: [PATCH 12/18] Add tasks for segment sync and posthog sync --- .../workflows/daily_pypi_download_stats.yaml | 36 +++++++++ .github/workflows/daily_twitter_stats.yaml | 36 +++++++++ .../historical_pypi_download_stats.yaml | 29 +++++++ tools/daily_pypi_downloads.py | 77 +++++++++++++++++++ tools/daily_twitter_stats.py | 66 ++++++++++++++++ tools/historical_import_cognee_stats.py | 58 ++++++++++++++ 6 files changed, 302 insertions(+) create mode 100644 .github/workflows/daily_pypi_download_stats.yaml create mode 100644 .github/workflows/daily_twitter_stats.yaml create mode 100644 .github/workflows/historical_pypi_download_stats.yaml create mode 100644 tools/daily_pypi_downloads.py create mode 100644 tools/daily_twitter_stats.py create mode 100644 tools/historical_import_cognee_stats.py diff --git a/.github/workflows/daily_pypi_download_stats.yaml b/.github/workflows/daily_pypi_download_stats.yaml new file mode 100644 index 000000000..ab4c8051d --- /dev/null +++ b/.github/workflows/daily_pypi_download_stats.yaml @@ -0,0 +1,36 @@ +name: Update Cognee Stats Daily + +on: + schedule: + - cron: '0 1 * * *' # Runs every day at 01:00 UTC + +jobs: + update_stats: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + pip install requests posthog + + - name: Run Update Script + env: + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }} + run: | + cd tools # Change to the 'tools' directory + echo "Current working directory after changing to tools:" + pwd # Print the working directory again + echo "List of folders in the tools directory:" + ls -la # List all files and folders in the 'tools' directory + python daily_pypi_downloads.py # Run the script \ No newline at end of file diff --git a/.github/workflows/daily_twitter_stats.yaml b/.github/workflows/daily_twitter_stats.yaml new file mode 100644 index 000000000..7100d2b4d --- /dev/null +++ b/.github/workflows/daily_twitter_stats.yaml @@ -0,0 +1,36 @@ +name: Send Twitter Followers to Segment + +on: + schedule: + - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed. + workflow_dispatch: # Allows manual triggering of the workflow + +jobs: + send-followers: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + pip install tweepy requests + + - name: Send Twitter Followers to Segment + env: + TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }} + TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} + SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }} + TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }} + run: | + cd tools + python daily_twitter_stats.py + diff --git a/.github/workflows/historical_pypi_download_stats.yaml b/.github/workflows/historical_pypi_download_stats.yaml new file mode 100644 index 000000000..9c0da5357 --- /dev/null +++ b/.github/workflows/historical_pypi_download_stats.yaml @@ -0,0 +1,29 @@ +name: Historical Import of Cognee Stats + +on: + workflow_dispatch: + +jobs: + import_stats: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Install Dependencies + run: | + pip install requests posthog + + - name: Run Historical Import Script + env: + POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} + POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }} + run: | + cd tools # Change to the 'tools' directory + python historical_import_cognee_stats.py diff --git a/tools/daily_pypi_downloads.py b/tools/daily_pypi_downloads.py new file mode 100644 index 000000000..d57092c3b --- /dev/null +++ b/tools/daily_pypi_downloads.py @@ -0,0 +1,77 @@ +import uuid + +import requests +import posthog +import os +from datetime import datetime, timedelta + +# Replace with your PostHog Project API Key +POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') +POSTHOG_API_HOST = 'https://eu.i.posthog.com' + +# Initialize PostHog client +posthog.project_api_key = POSTHOG_API_KEY +posthog.host = POSTHOG_API_HOST + +# Read last processed date from file +state_file = 'last_processed_date.txt' +if os.path.exists(state_file): + with open(state_file, 'r') as f: + last_processed_date = f.read().strip() + last_processed_date = datetime.strptime(last_processed_date, '%Y-%m-%d') +else: + # If no state file, start from 2 days ago + last_processed_date = datetime.utcnow() - timedelta(days=2) + +# Calculate the next date to process +next_date = last_processed_date + timedelta(days=1) +today = datetime.utcnow().date() + +if next_date.date() >= today: + print("No new data to process.") + exit(0) + +date_str = next_date.strftime('%Y-%m-%d') + +# Fetch download data for the date +package = 'cognee' +url = f'https://pypistats.org/api/packages/{package}/overall' + +response = requests.get(url) +if response.status_code != 200: + print(f"Failed to fetch data: {response.status_code}") + exit(1) + +data = response.json() + +# Find the entry for the date we want +downloads = None +for entry in data['data']: + if entry['date'] == date_str: + downloads = entry['downloads'] + break + +if downloads is None: + print(f"No data available for date {date_str}") + exit(1) + +# Create a unique message_id +message_id = f"cognee_downloads_{date_str}" + +distinct_id = str(uuid.uuid4()) + +# Send an event to PostHog +posthog.capture( + distinct_id=distinct_id, + event='cognee_downloads', + properties={ + 'date': date_str, + 'downloads': downloads, + } +) + +print(f"Data for {date_str} updated in PostHog successfully.") + +# Update the state file +with open(state_file, 'w') as f: + f.write(date_str) diff --git a/tools/daily_twitter_stats.py b/tools/daily_twitter_stats.py new file mode 100644 index 000000000..43bedda7b --- /dev/null +++ b/tools/daily_twitter_stats.py @@ -0,0 +1,66 @@ +import tweepy +import requests +import json +from datetime import datetime + +# Twitter API credentials from GitHub Secrets +API_KEY = '${{ secrets.TWITTER_API_KEY }}' +API_SECRET = '${{ secrets.TWITTER_API_SECRET }}' +ACCESS_TOKEN = '${{ secrets.TWITTER_ACCESS_TOKEN }}' +ACCESS_SECRET = '${{ secrets.TWITTER_ACCESS_SECRET }}' +USERNAME = '${{ secrets.TWITTER_USERNAME }}' +SEGMENT_WRITE_KEY = '${{ secrets.SEGMENT_WRITE_KEY }}' + +# Initialize Tweepy API +auth = tweepy.OAuthHandler(API_KEY, API_SECRET) +auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET) +twitter_api = tweepy.API(auth) + +# Segment endpoint +SEGMENT_ENDPOINT = 'https://api.segment.io/v1/track' + + +def get_follower_count(username): + try: + user = twitter_api.get_user(screen_name=username) + return user.followers_count + except tweepy.TweepError as e: + print(f'Error fetching follower count: {e}') + return None + + +def send_data_to_segment(username, follower_count): + current_time = datetime.now().isoformat() + + data = { + 'userId': username, + 'event': 'Follower Count Update', + 'properties': { + 'username': username, + 'follower_count': follower_count, + 'timestamp': current_time + }, + 'timestamp': current_time + } + + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Basic {SEGMENT_WRITE_KEY.encode("utf-8").decode("utf-8")}' + } + + try: + response = requests.post(SEGMENT_ENDPOINT, headers=headers, data=json.dumps(data)) + + if response.status_code == 200: + print(f'Successfully sent data to Segment for {username}') + else: + print(f'Failed to send data to Segment. Status code: {response.status_code}, Response: {response.text}') + except requests.exceptions.RequestException as e: + print(f'Error sending data to Segment: {e}') + + +follower_count = get_follower_count(USERNAME) +if follower_count is not None: + send_data_to_segment(USERNAME, follower_count) +else: + print('Failed to retrieve follower count.') diff --git a/tools/historical_import_cognee_stats.py b/tools/historical_import_cognee_stats.py new file mode 100644 index 000000000..68f864e8a --- /dev/null +++ b/tools/historical_import_cognee_stats.py @@ -0,0 +1,58 @@ +import uuid + +import requests +import posthog +import os +from datetime import datetime, timedelta + +# Replace with your PostHog Project API Key +POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') +POSTHOG_API_HOST = 'https://eu.i.posthog.com' + +# Initialize PostHog client +posthog.project_api_key = POSTHOG_API_KEY +posthog.host = POSTHOG_API_HOST + +# Fetch historical download data for the last 180 days +package = 'cognee' +url = f'https://pypistats.org/api/packages/{package}/overall' + +response = requests.get(url) +if response.status_code != 200: + print(f"Failed to fetch data: {response.status_code}") + exit(1) + +data = response.json() + +# Exclude today and yesterday +today = datetime.utcnow().date() +yesterday = today - timedelta(days=1) + +# Process and send data to PostHog +for entry in data['data']: + date_str = entry['date'] + date_obj = datetime.strptime(date_str, '%Y-%m-%d').date() + downloads = entry['downloads'] + + # Skip today and yesterday + if date_obj >= yesterday: + continue + + # Create a unique message_id + message_id = f"cognee_downloads_{date_str}" + + distinct_id = str(uuid.uuid4()) + + # Send an event to PostHog + posthog.capture( + distinct_id=distinct_id, + event='cognee_downloads', + properties={ + 'date': date_str, + 'downloads': downloads, + } + ) + + print(f"Data for {date_str} imported successfully.") + +print("Historical data import completed.") From 354c8b1f61cd0b9b6248123a4baf4a591c8009b3 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:44:32 +0200 Subject: [PATCH 13/18] Add tasks for segment sync and posthog sync --- .github/workflows/daily_pypi_download_stats.yaml | 2 +- .github/workflows/daily_twitter_stats.yaml | 2 +- .github/workflows/historical_pypi_download_stats.yaml | 2 +- .github/workflows/posthog_pipeline.yaml | 2 +- .github/workflows/release_discord_action.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/daily_pypi_download_stats.yaml b/.github/workflows/daily_pypi_download_stats.yaml index ab4c8051d..5192eb960 100644 --- a/.github/workflows/daily_pypi_download_stats.yaml +++ b/.github/workflows/daily_pypi_download_stats.yaml @@ -1,4 +1,4 @@ -name: Update Cognee Stats Daily +name: analytics | Update Cognee Stats Daily on: schedule: diff --git a/.github/workflows/daily_twitter_stats.yaml b/.github/workflows/daily_twitter_stats.yaml index 7100d2b4d..1893f86eb 100644 --- a/.github/workflows/daily_twitter_stats.yaml +++ b/.github/workflows/daily_twitter_stats.yaml @@ -1,4 +1,4 @@ -name: Send Twitter Followers to Segment +name: analytics | Send Twitter Followers to Segment on: schedule: diff --git a/.github/workflows/historical_pypi_download_stats.yaml b/.github/workflows/historical_pypi_download_stats.yaml index 9c0da5357..15622a7b6 100644 --- a/.github/workflows/historical_pypi_download_stats.yaml +++ b/.github/workflows/historical_pypi_download_stats.yaml @@ -1,4 +1,4 @@ -name: Historical Import of Cognee Stats +name: analytics | Historical Import of Cognee Stats on: workflow_dispatch: diff --git a/.github/workflows/posthog_pipeline.yaml b/.github/workflows/posthog_pipeline.yaml index f979b362b..1e34ee8cf 100644 --- a/.github/workflows/posthog_pipeline.yaml +++ b/.github/workflows/posthog_pipeline.yaml @@ -1,4 +1,4 @@ -name: Push GitHub Data to PostHog +name: analytics | Push GitHub Data to PostHog on: schedule: diff --git a/.github/workflows/release_discord_action.yml b/.github/workflows/release_discord_action.yml index faedf42ca..472687214 100644 --- a/.github/workflows/release_discord_action.yml +++ b/.github/workflows/release_discord_action.yml @@ -1,4 +1,4 @@ -name: Send Release to Discord +name: automation | Send Release to Discord on: release: From 9dd955524b44df5c412e23bd9a973aac1dd518e7 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:12:48 +0200 Subject: [PATCH 14/18] Add tasks for segment sync and posthog sync --- .github/workflows/daily_pypi_download_stats.yaml | 8 ++++---- .github/workflows/daily_twitter_stats.yaml | 10 ++++++---- .github/workflows/historical_pypi_download_stats.yaml | 6 ++++-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/daily_pypi_download_stats.yaml b/.github/workflows/daily_pypi_download_stats.yaml index 5192eb960..71181f005 100644 --- a/.github/workflows/daily_pypi_download_stats.yaml +++ b/.github/workflows/daily_pypi_download_stats.yaml @@ -1,8 +1,8 @@ name: analytics | Update Cognee Stats Daily - -on: - schedule: - - cron: '0 1 * * *' # Runs every day at 01:00 UTC +on: pull_request +#on: +# schedule: +# - cron: '0 1 * * *' # Runs every day at 01:00 UTC jobs: update_stats: diff --git a/.github/workflows/daily_twitter_stats.yaml b/.github/workflows/daily_twitter_stats.yaml index 1893f86eb..747d936a8 100644 --- a/.github/workflows/daily_twitter_stats.yaml +++ b/.github/workflows/daily_twitter_stats.yaml @@ -1,9 +1,11 @@ name: analytics | Send Twitter Followers to Segment -on: - schedule: - - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed. - workflow_dispatch: # Allows manual triggering of the workflow +on: pull_request + +#on: +# schedule: +# - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed. +# workflow_dispatch: # Allows manual triggering of the workflow jobs: send-followers: diff --git a/.github/workflows/historical_pypi_download_stats.yaml b/.github/workflows/historical_pypi_download_stats.yaml index 15622a7b6..70b970e46 100644 --- a/.github/workflows/historical_pypi_download_stats.yaml +++ b/.github/workflows/historical_pypi_download_stats.yaml @@ -1,7 +1,9 @@ name: analytics | Historical Import of Cognee Stats -on: - workflow_dispatch: +on: pull_request + +#on: +# workflow_dispatch: jobs: import_stats: From cce2e386be7acd62bf928a54cced8ee3f982685a Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:34:23 +0200 Subject: [PATCH 15/18] Add tasks for segment sync and posthog sync --- .../workflows/daily_pypi_download_stats.yaml | 8 +-- .../historical_pypi_download_stats.yaml | 31 ---------- tools/historical_import_cognee_stats.py | 58 ------------------- 3 files changed, 4 insertions(+), 93 deletions(-) delete mode 100644 .github/workflows/historical_pypi_download_stats.yaml delete mode 100644 tools/historical_import_cognee_stats.py diff --git a/.github/workflows/daily_pypi_download_stats.yaml b/.github/workflows/daily_pypi_download_stats.yaml index 71181f005..5192eb960 100644 --- a/.github/workflows/daily_pypi_download_stats.yaml +++ b/.github/workflows/daily_pypi_download_stats.yaml @@ -1,8 +1,8 @@ name: analytics | Update Cognee Stats Daily -on: pull_request -#on: -# schedule: -# - cron: '0 1 * * *' # Runs every day at 01:00 UTC + +on: + schedule: + - cron: '0 1 * * *' # Runs every day at 01:00 UTC jobs: update_stats: diff --git a/.github/workflows/historical_pypi_download_stats.yaml b/.github/workflows/historical_pypi_download_stats.yaml deleted file mode 100644 index 70b970e46..000000000 --- a/.github/workflows/historical_pypi_download_stats.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: analytics | Historical Import of Cognee Stats - -on: pull_request - -#on: -# workflow_dispatch: - -jobs: - import_stats: - runs-on: ubuntu-latest - - steps: - - name: Checkout Repository - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install Dependencies - run: | - pip install requests posthog - - - name: Run Historical Import Script - env: - POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} - POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }} - run: | - cd tools # Change to the 'tools' directory - python historical_import_cognee_stats.py diff --git a/tools/historical_import_cognee_stats.py b/tools/historical_import_cognee_stats.py deleted file mode 100644 index 68f864e8a..000000000 --- a/tools/historical_import_cognee_stats.py +++ /dev/null @@ -1,58 +0,0 @@ -import uuid - -import requests -import posthog -import os -from datetime import datetime, timedelta - -# Replace with your PostHog Project API Key -POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') -POSTHOG_API_HOST = 'https://eu.i.posthog.com' - -# Initialize PostHog client -posthog.project_api_key = POSTHOG_API_KEY -posthog.host = POSTHOG_API_HOST - -# Fetch historical download data for the last 180 days -package = 'cognee' -url = f'https://pypistats.org/api/packages/{package}/overall' - -response = requests.get(url) -if response.status_code != 200: - print(f"Failed to fetch data: {response.status_code}") - exit(1) - -data = response.json() - -# Exclude today and yesterday -today = datetime.utcnow().date() -yesterday = today - timedelta(days=1) - -# Process and send data to PostHog -for entry in data['data']: - date_str = entry['date'] - date_obj = datetime.strptime(date_str, '%Y-%m-%d').date() - downloads = entry['downloads'] - - # Skip today and yesterday - if date_obj >= yesterday: - continue - - # Create a unique message_id - message_id = f"cognee_downloads_{date_str}" - - distinct_id = str(uuid.uuid4()) - - # Send an event to PostHog - posthog.capture( - distinct_id=distinct_id, - event='cognee_downloads', - properties={ - 'date': date_str, - 'downloads': downloads, - } - ) - - print(f"Data for {date_str} imported successfully.") - -print("Historical data import completed.") From 1d4e06fdf382562ef834cb843639bed01f2eb2a4 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 30 Sep 2024 18:30:39 +0200 Subject: [PATCH 16/18] Add tasks for segment sync and posthog sync --- tools/daily_pypi_downloads.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tools/daily_pypi_downloads.py b/tools/daily_pypi_downloads.py index d57092c3b..64a0956ed 100644 --- a/tools/daily_pypi_downloads.py +++ b/tools/daily_pypi_downloads.py @@ -49,6 +49,7 @@ downloads = None for entry in data['data']: if entry['date'] == date_str: downloads = entry['downloads'] + category = entry.get('category') break if downloads is None: @@ -61,16 +62,19 @@ message_id = f"cognee_downloads_{date_str}" distinct_id = str(uuid.uuid4()) # Send an event to PostHog -posthog.capture( - distinct_id=distinct_id, - event='cognee_downloads', - properties={ - 'date': date_str, - 'downloads': downloads, - } -) +event_name = 'cognee_lib_downloads_with_mirrors' if category == 'with_mirrors' else 'cognee_lib_downloads_without_mirrors' -print(f"Data for {date_str} updated in PostHog successfully.") +if event_name == 'cognee_lib_downloads_without_mirrors': + posthog.capture( + distinct_id=str(uuid.uuid4()), + event=event_name, + properties={ + 'category': category, + 'date': date_str, + 'downloads': downloads, + } + ) +print(f"Data for {date_str} updated in PostHog successfully. Downloads is {downloads}") # Update the state file with open(state_file, 'w') as f: From d2954de4c79f888631347877409bdc1671cddcb8 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 30 Sep 2024 19:09:05 +0200 Subject: [PATCH 17/18] Add tasks for segment sync and posthog sync --- tools/push_to_posthog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/push_to_posthog.py b/tools/push_to_posthog.py index 0b9af1c94..15cf05df6 100644 --- a/tools/push_to_posthog.py +++ b/tools/push_to_posthog.py @@ -59,7 +59,7 @@ def main(): # Send event to PostHog result = posthog.capture( distinct_id=distinct_id, # You can customize this identifier - event='GitHub Repo Stats', + event='cognee_lib_github_repo_stats', properties=properties ) From 8eeaafcabae772a11c110df3b7300742f5d81062 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 30 Sep 2024 19:10:23 +0200 Subject: [PATCH 18/18] Add tasks for segment sync and posthog sync --- .github/workflows/daily_twitter_stats.yaml | 76 +++++++++++----------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/.github/workflows/daily_twitter_stats.yaml b/.github/workflows/daily_twitter_stats.yaml index 747d936a8..e6e6e9b93 100644 --- a/.github/workflows/daily_twitter_stats.yaml +++ b/.github/workflows/daily_twitter_stats.yaml @@ -1,38 +1,38 @@ -name: analytics | Send Twitter Followers to Segment - -on: pull_request - -#on: -# schedule: -# - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed. -# workflow_dispatch: # Allows manual triggering of the workflow - -jobs: - send-followers: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' - - - name: Install dependencies - run: | - pip install tweepy requests - - - name: Send Twitter Followers to Segment - env: - TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }} - TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} - SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }} - TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }} - run: | - cd tools - python daily_twitter_stats.py - +#name: analytics | Send Twitter Followers to Segment +# +#on: pull_request +# +##on: +## schedule: +## - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed. +## workflow_dispatch: # Allows manual triggering of the workflow +# +#jobs: +# send-followers: +# runs-on: ubuntu-latest +# +# steps: +# - name: Checkout repository +# uses: actions/checkout@v3 +# +# - name: Set up Python +# uses: actions/setup-python@v4 +# with: +# python-version: '3.x' +# +# - name: Install dependencies +# run: | +# pip install tweepy requests +# +# - name: Send Twitter Followers to Segment +# env: +# TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }} +# TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }} +# TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} +# TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }} +# SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }} +# TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }} +# run: | +# cd tools +# python daily_twitter_stats.py +#