Add tasks for segment sync and posthog sync
This commit is contained in:
parent
b436b4af4c
commit
168b4d96a1
6 changed files with 302 additions and 0 deletions
36
.github/workflows/daily_pypi_download_stats.yaml
vendored
Normal file
36
.github/workflows/daily_pypi_download_stats.yaml
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
name: Update Cognee Stats Daily
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 1 * * *' # Runs every day at 01:00 UTC
|
||||
|
||||
jobs:
|
||||
update_stats:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
pip install requests posthog
|
||||
|
||||
- name: Run Update Script
|
||||
env:
|
||||
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||
POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
|
||||
run: |
|
||||
cd tools # Change to the 'tools' directory
|
||||
echo "Current working directory after changing to tools:"
|
||||
pwd # Print the working directory again
|
||||
echo "List of folders in the tools directory:"
|
||||
ls -la # List all files and folders in the 'tools' directory
|
||||
python daily_pypi_downloads.py # Run the script
|
||||
36
.github/workflows/daily_twitter_stats.yaml
vendored
Normal file
36
.github/workflows/daily_twitter_stats.yaml
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
name: Send Twitter Followers to Segment
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed.
|
||||
workflow_dispatch: # Allows manual triggering of the workflow
|
||||
|
||||
jobs:
|
||||
send-followers:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install tweepy requests
|
||||
|
||||
- name: Send Twitter Followers to Segment
|
||||
env:
|
||||
TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }}
|
||||
TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }}
|
||||
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
|
||||
TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
|
||||
SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }}
|
||||
TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }}
|
||||
run: |
|
||||
cd tools
|
||||
python daily_twitter_stats.py
|
||||
|
||||
29
.github/workflows/historical_pypi_download_stats.yaml
vendored
Normal file
29
.github/workflows/historical_pypi_download_stats.yaml
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
name: Historical Import of Cognee Stats
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
import_stats:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.x'
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
pip install requests posthog
|
||||
|
||||
- name: Run Historical Import Script
|
||||
env:
|
||||
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||
POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
|
||||
run: |
|
||||
cd tools # Change to the 'tools' directory
|
||||
python historical_import_cognee_stats.py
|
||||
77
tools/daily_pypi_downloads.py
Normal file
77
tools/daily_pypi_downloads.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import uuid
|
||||
|
||||
import requests
|
||||
import posthog
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Replace with your PostHog Project API Key
|
||||
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY')
|
||||
POSTHOG_API_HOST = 'https://eu.i.posthog.com'
|
||||
|
||||
# Initialize PostHog client
|
||||
posthog.project_api_key = POSTHOG_API_KEY
|
||||
posthog.host = POSTHOG_API_HOST
|
||||
|
||||
# Read last processed date from file
|
||||
state_file = 'last_processed_date.txt'
|
||||
if os.path.exists(state_file):
|
||||
with open(state_file, 'r') as f:
|
||||
last_processed_date = f.read().strip()
|
||||
last_processed_date = datetime.strptime(last_processed_date, '%Y-%m-%d')
|
||||
else:
|
||||
# If no state file, start from 2 days ago
|
||||
last_processed_date = datetime.utcnow() - timedelta(days=2)
|
||||
|
||||
# Calculate the next date to process
|
||||
next_date = last_processed_date + timedelta(days=1)
|
||||
today = datetime.utcnow().date()
|
||||
|
||||
if next_date.date() >= today:
|
||||
print("No new data to process.")
|
||||
exit(0)
|
||||
|
||||
date_str = next_date.strftime('%Y-%m-%d')
|
||||
|
||||
# Fetch download data for the date
|
||||
package = 'cognee'
|
||||
url = f'https://pypistats.org/api/packages/{package}/overall'
|
||||
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to fetch data: {response.status_code}")
|
||||
exit(1)
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Find the entry for the date we want
|
||||
downloads = None
|
||||
for entry in data['data']:
|
||||
if entry['date'] == date_str:
|
||||
downloads = entry['downloads']
|
||||
break
|
||||
|
||||
if downloads is None:
|
||||
print(f"No data available for date {date_str}")
|
||||
exit(1)
|
||||
|
||||
# Create a unique message_id
|
||||
message_id = f"cognee_downloads_{date_str}"
|
||||
|
||||
distinct_id = str(uuid.uuid4())
|
||||
|
||||
# Send an event to PostHog
|
||||
posthog.capture(
|
||||
distinct_id=distinct_id,
|
||||
event='cognee_downloads',
|
||||
properties={
|
||||
'date': date_str,
|
||||
'downloads': downloads,
|
||||
}
|
||||
)
|
||||
|
||||
print(f"Data for {date_str} updated in PostHog successfully.")
|
||||
|
||||
# Update the state file
|
||||
with open(state_file, 'w') as f:
|
||||
f.write(date_str)
|
||||
66
tools/daily_twitter_stats.py
Normal file
66
tools/daily_twitter_stats.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
import tweepy
|
||||
import requests
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
# Twitter API credentials from GitHub Secrets
|
||||
API_KEY = '${{ secrets.TWITTER_API_KEY }}'
|
||||
API_SECRET = '${{ secrets.TWITTER_API_SECRET }}'
|
||||
ACCESS_TOKEN = '${{ secrets.TWITTER_ACCESS_TOKEN }}'
|
||||
ACCESS_SECRET = '${{ secrets.TWITTER_ACCESS_SECRET }}'
|
||||
USERNAME = '${{ secrets.TWITTER_USERNAME }}'
|
||||
SEGMENT_WRITE_KEY = '${{ secrets.SEGMENT_WRITE_KEY }}'
|
||||
|
||||
# Initialize Tweepy API
|
||||
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
|
||||
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
|
||||
twitter_api = tweepy.API(auth)
|
||||
|
||||
# Segment endpoint
|
||||
SEGMENT_ENDPOINT = 'https://api.segment.io/v1/track'
|
||||
|
||||
|
||||
def get_follower_count(username):
|
||||
try:
|
||||
user = twitter_api.get_user(screen_name=username)
|
||||
return user.followers_count
|
||||
except tweepy.TweepError as e:
|
||||
print(f'Error fetching follower count: {e}')
|
||||
return None
|
||||
|
||||
|
||||
def send_data_to_segment(username, follower_count):
|
||||
current_time = datetime.now().isoformat()
|
||||
|
||||
data = {
|
||||
'userId': username,
|
||||
'event': 'Follower Count Update',
|
||||
'properties': {
|
||||
'username': username,
|
||||
'follower_count': follower_count,
|
||||
'timestamp': current_time
|
||||
},
|
||||
'timestamp': current_time
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Basic {SEGMENT_WRITE_KEY.encode("utf-8").decode("utf-8")}'
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(SEGMENT_ENDPOINT, headers=headers, data=json.dumps(data))
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f'Successfully sent data to Segment for {username}')
|
||||
else:
|
||||
print(f'Failed to send data to Segment. Status code: {response.status_code}, Response: {response.text}')
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f'Error sending data to Segment: {e}')
|
||||
|
||||
|
||||
follower_count = get_follower_count(USERNAME)
|
||||
if follower_count is not None:
|
||||
send_data_to_segment(USERNAME, follower_count)
|
||||
else:
|
||||
print('Failed to retrieve follower count.')
|
||||
58
tools/historical_import_cognee_stats.py
Normal file
58
tools/historical_import_cognee_stats.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
import uuid
|
||||
|
||||
import requests
|
||||
import posthog
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Replace with your PostHog Project API Key
|
||||
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY')
|
||||
POSTHOG_API_HOST = 'https://eu.i.posthog.com'
|
||||
|
||||
# Initialize PostHog client
|
||||
posthog.project_api_key = POSTHOG_API_KEY
|
||||
posthog.host = POSTHOG_API_HOST
|
||||
|
||||
# Fetch historical download data for the last 180 days
|
||||
package = 'cognee'
|
||||
url = f'https://pypistats.org/api/packages/{package}/overall'
|
||||
|
||||
response = requests.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to fetch data: {response.status_code}")
|
||||
exit(1)
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Exclude today and yesterday
|
||||
today = datetime.utcnow().date()
|
||||
yesterday = today - timedelta(days=1)
|
||||
|
||||
# Process and send data to PostHog
|
||||
for entry in data['data']:
|
||||
date_str = entry['date']
|
||||
date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
|
||||
downloads = entry['downloads']
|
||||
|
||||
# Skip today and yesterday
|
||||
if date_obj >= yesterday:
|
||||
continue
|
||||
|
||||
# Create a unique message_id
|
||||
message_id = f"cognee_downloads_{date_str}"
|
||||
|
||||
distinct_id = str(uuid.uuid4())
|
||||
|
||||
# Send an event to PostHog
|
||||
posthog.capture(
|
||||
distinct_id=distinct_id,
|
||||
event='cognee_downloads',
|
||||
properties={
|
||||
'date': date_str,
|
||||
'downloads': downloads,
|
||||
}
|
||||
)
|
||||
|
||||
print(f"Data for {date_str} imported successfully.")
|
||||
|
||||
print("Historical data import completed.")
|
||||
Loading…
Add table
Reference in a new issue