Add tasks for segment sync and posthog sync
This commit is contained in:
parent
b436b4af4c
commit
168b4d96a1
6 changed files with 302 additions and 0 deletions
36
.github/workflows/daily_pypi_download_stats.yaml
vendored
Normal file
36
.github/workflows/daily_pypi_download_stats.yaml
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
name: Update Cognee Stats Daily
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 1 * * *' # Runs every day at 01:00 UTC
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
update_stats:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout Repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
persist-credentials: false
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
pip install requests posthog
|
||||||
|
|
||||||
|
- name: Run Update Script
|
||||||
|
env:
|
||||||
|
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||||
|
POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
|
||||||
|
run: |
|
||||||
|
cd tools # Change to the 'tools' directory
|
||||||
|
echo "Current working directory after changing to tools:"
|
||||||
|
pwd # Print the working directory again
|
||||||
|
echo "List of folders in the tools directory:"
|
||||||
|
ls -la # List all files and folders in the 'tools' directory
|
||||||
|
python daily_pypi_downloads.py # Run the script
|
||||||
36
.github/workflows/daily_twitter_stats.yaml
vendored
Normal file
36
.github/workflows/daily_twitter_stats.yaml
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
name: Send Twitter Followers to Segment
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed.
|
||||||
|
workflow_dispatch: # Allows manual triggering of the workflow
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
send-followers:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
pip install tweepy requests
|
||||||
|
|
||||||
|
- name: Send Twitter Followers to Segment
|
||||||
|
env:
|
||||||
|
TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }}
|
||||||
|
TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }}
|
||||||
|
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
|
||||||
|
TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
|
||||||
|
SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }}
|
||||||
|
TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }}
|
||||||
|
run: |
|
||||||
|
cd tools
|
||||||
|
python daily_twitter_stats.py
|
||||||
|
|
||||||
29
.github/workflows/historical_pypi_download_stats.yaml
vendored
Normal file
29
.github/workflows/historical_pypi_download_stats.yaml
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
name: Historical Import of Cognee Stats
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
import_stats:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout Repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
pip install requests posthog
|
||||||
|
|
||||||
|
- name: Run Historical Import Script
|
||||||
|
env:
|
||||||
|
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
|
||||||
|
POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
|
||||||
|
run: |
|
||||||
|
cd tools # Change to the 'tools' directory
|
||||||
|
python historical_import_cognee_stats.py
|
||||||
77
tools/daily_pypi_downloads.py
Normal file
77
tools/daily_pypi_downloads.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import posthog
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Replace with your PostHog Project API Key
|
||||||
|
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY')
|
||||||
|
POSTHOG_API_HOST = 'https://eu.i.posthog.com'
|
||||||
|
|
||||||
|
# Initialize PostHog client
|
||||||
|
posthog.project_api_key = POSTHOG_API_KEY
|
||||||
|
posthog.host = POSTHOG_API_HOST
|
||||||
|
|
||||||
|
# Read last processed date from file
|
||||||
|
state_file = 'last_processed_date.txt'
|
||||||
|
if os.path.exists(state_file):
|
||||||
|
with open(state_file, 'r') as f:
|
||||||
|
last_processed_date = f.read().strip()
|
||||||
|
last_processed_date = datetime.strptime(last_processed_date, '%Y-%m-%d')
|
||||||
|
else:
|
||||||
|
# If no state file, start from 2 days ago
|
||||||
|
last_processed_date = datetime.utcnow() - timedelta(days=2)
|
||||||
|
|
||||||
|
# Calculate the next date to process
|
||||||
|
next_date = last_processed_date + timedelta(days=1)
|
||||||
|
today = datetime.utcnow().date()
|
||||||
|
|
||||||
|
if next_date.date() >= today:
|
||||||
|
print("No new data to process.")
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
date_str = next_date.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
# Fetch download data for the date
|
||||||
|
package = 'cognee'
|
||||||
|
url = f'https://pypistats.org/api/packages/{package}/overall'
|
||||||
|
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
print(f"Failed to fetch data: {response.status_code}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Find the entry for the date we want
|
||||||
|
downloads = None
|
||||||
|
for entry in data['data']:
|
||||||
|
if entry['date'] == date_str:
|
||||||
|
downloads = entry['downloads']
|
||||||
|
break
|
||||||
|
|
||||||
|
if downloads is None:
|
||||||
|
print(f"No data available for date {date_str}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Create a unique message_id
|
||||||
|
message_id = f"cognee_downloads_{date_str}"
|
||||||
|
|
||||||
|
distinct_id = str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Send an event to PostHog
|
||||||
|
posthog.capture(
|
||||||
|
distinct_id=distinct_id,
|
||||||
|
event='cognee_downloads',
|
||||||
|
properties={
|
||||||
|
'date': date_str,
|
||||||
|
'downloads': downloads,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Data for {date_str} updated in PostHog successfully.")
|
||||||
|
|
||||||
|
# Update the state file
|
||||||
|
with open(state_file, 'w') as f:
|
||||||
|
f.write(date_str)
|
||||||
66
tools/daily_twitter_stats.py
Normal file
66
tools/daily_twitter_stats.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
import tweepy
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Twitter API credentials from GitHub Secrets
|
||||||
|
API_KEY = '${{ secrets.TWITTER_API_KEY }}'
|
||||||
|
API_SECRET = '${{ secrets.TWITTER_API_SECRET }}'
|
||||||
|
ACCESS_TOKEN = '${{ secrets.TWITTER_ACCESS_TOKEN }}'
|
||||||
|
ACCESS_SECRET = '${{ secrets.TWITTER_ACCESS_SECRET }}'
|
||||||
|
USERNAME = '${{ secrets.TWITTER_USERNAME }}'
|
||||||
|
SEGMENT_WRITE_KEY = '${{ secrets.SEGMENT_WRITE_KEY }}'
|
||||||
|
|
||||||
|
# Initialize Tweepy API
|
||||||
|
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
|
||||||
|
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
|
||||||
|
twitter_api = tweepy.API(auth)
|
||||||
|
|
||||||
|
# Segment endpoint
|
||||||
|
SEGMENT_ENDPOINT = 'https://api.segment.io/v1/track'
|
||||||
|
|
||||||
|
|
||||||
|
def get_follower_count(username):
|
||||||
|
try:
|
||||||
|
user = twitter_api.get_user(screen_name=username)
|
||||||
|
return user.followers_count
|
||||||
|
except tweepy.TweepError as e:
|
||||||
|
print(f'Error fetching follower count: {e}')
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def send_data_to_segment(username, follower_count):
|
||||||
|
current_time = datetime.now().isoformat()
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'userId': username,
|
||||||
|
'event': 'Follower Count Update',
|
||||||
|
'properties': {
|
||||||
|
'username': username,
|
||||||
|
'follower_count': follower_count,
|
||||||
|
'timestamp': current_time
|
||||||
|
},
|
||||||
|
'timestamp': current_time
|
||||||
|
}
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Basic {SEGMENT_WRITE_KEY.encode("utf-8").decode("utf-8")}'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(SEGMENT_ENDPOINT, headers=headers, data=json.dumps(data))
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
print(f'Successfully sent data to Segment for {username}')
|
||||||
|
else:
|
||||||
|
print(f'Failed to send data to Segment. Status code: {response.status_code}, Response: {response.text}')
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
print(f'Error sending data to Segment: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
follower_count = get_follower_count(USERNAME)
|
||||||
|
if follower_count is not None:
|
||||||
|
send_data_to_segment(USERNAME, follower_count)
|
||||||
|
else:
|
||||||
|
print('Failed to retrieve follower count.')
|
||||||
58
tools/historical_import_cognee_stats.py
Normal file
58
tools/historical_import_cognee_stats.py
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import posthog
|
||||||
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Replace with your PostHog Project API Key
|
||||||
|
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY')
|
||||||
|
POSTHOG_API_HOST = 'https://eu.i.posthog.com'
|
||||||
|
|
||||||
|
# Initialize PostHog client
|
||||||
|
posthog.project_api_key = POSTHOG_API_KEY
|
||||||
|
posthog.host = POSTHOG_API_HOST
|
||||||
|
|
||||||
|
# Fetch historical download data for the last 180 days
|
||||||
|
package = 'cognee'
|
||||||
|
url = f'https://pypistats.org/api/packages/{package}/overall'
|
||||||
|
|
||||||
|
response = requests.get(url)
|
||||||
|
if response.status_code != 200:
|
||||||
|
print(f"Failed to fetch data: {response.status_code}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Exclude today and yesterday
|
||||||
|
today = datetime.utcnow().date()
|
||||||
|
yesterday = today - timedelta(days=1)
|
||||||
|
|
||||||
|
# Process and send data to PostHog
|
||||||
|
for entry in data['data']:
|
||||||
|
date_str = entry['date']
|
||||||
|
date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
|
||||||
|
downloads = entry['downloads']
|
||||||
|
|
||||||
|
# Skip today and yesterday
|
||||||
|
if date_obj >= yesterday:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create a unique message_id
|
||||||
|
message_id = f"cognee_downloads_{date_str}"
|
||||||
|
|
||||||
|
distinct_id = str(uuid.uuid4())
|
||||||
|
|
||||||
|
# Send an event to PostHog
|
||||||
|
posthog.capture(
|
||||||
|
distinct_id=distinct_id,
|
||||||
|
event='cognee_downloads',
|
||||||
|
properties={
|
||||||
|
'date': date_str,
|
||||||
|
'downloads': downloads,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Data for {date_str} imported successfully.")
|
||||||
|
|
||||||
|
print("Historical data import completed.")
|
||||||
Loading…
Add table
Reference in a new issue