Add tasks for segment sync and posthog sync

This commit is contained in:
Vasilije 2024-09-29 21:22:55 +02:00
parent b436b4af4c
commit 168b4d96a1
6 changed files with 302 additions and 0 deletions

View file

@ -0,0 +1,36 @@
name: Update Cognee Stats Daily
on:
schedule:
- cron: '0 1 * * *' # Runs every day at 01:00 UTC
jobs:
update_stats:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
pip install requests posthog
- name: Run Update Script
env:
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
run: |
cd tools # Change to the 'tools' directory
echo "Current working directory after changing to tools:"
pwd # Print the working directory again
echo "List of folders in the tools directory:"
ls -la # List all files and folders in the 'tools' directory
python daily_pypi_downloads.py # Run the script

View file

@ -0,0 +1,36 @@
name: Send Twitter Followers to Segment
on:
schedule:
- cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed.
workflow_dispatch: # Allows manual triggering of the workflow
jobs:
send-followers:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
pip install tweepy requests
- name: Send Twitter Followers to Segment
env:
TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }}
TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }}
TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }}
TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }}
run: |
cd tools
python daily_twitter_stats.py

View file

@ -0,0 +1,29 @@
name: Historical Import of Cognee Stats
on:
workflow_dispatch:
jobs:
import_stats:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
pip install requests posthog
- name: Run Historical Import Script
env:
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
run: |
cd tools # Change to the 'tools' directory
python historical_import_cognee_stats.py

View file

@ -0,0 +1,77 @@
import uuid
import requests
import posthog
import os
from datetime import datetime, timedelta
# Replace with your PostHog Project API Key
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY')
POSTHOG_API_HOST = 'https://eu.i.posthog.com'
# Initialize PostHog client
posthog.project_api_key = POSTHOG_API_KEY
posthog.host = POSTHOG_API_HOST
# Read last processed date from file
state_file = 'last_processed_date.txt'
if os.path.exists(state_file):
with open(state_file, 'r') as f:
last_processed_date = f.read().strip()
last_processed_date = datetime.strptime(last_processed_date, '%Y-%m-%d')
else:
# If no state file, start from 2 days ago
last_processed_date = datetime.utcnow() - timedelta(days=2)
# Calculate the next date to process
next_date = last_processed_date + timedelta(days=1)
today = datetime.utcnow().date()
if next_date.date() >= today:
print("No new data to process.")
exit(0)
date_str = next_date.strftime('%Y-%m-%d')
# Fetch download data for the date
package = 'cognee'
url = f'https://pypistats.org/api/packages/{package}/overall'
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to fetch data: {response.status_code}")
exit(1)
data = response.json()
# Find the entry for the date we want
downloads = None
for entry in data['data']:
if entry['date'] == date_str:
downloads = entry['downloads']
break
if downloads is None:
print(f"No data available for date {date_str}")
exit(1)
# Create a unique message_id
message_id = f"cognee_downloads_{date_str}"
distinct_id = str(uuid.uuid4())
# Send an event to PostHog
posthog.capture(
distinct_id=distinct_id,
event='cognee_downloads',
properties={
'date': date_str,
'downloads': downloads,
}
)
print(f"Data for {date_str} updated in PostHog successfully.")
# Update the state file
with open(state_file, 'w') as f:
f.write(date_str)

View file

@ -0,0 +1,66 @@
import tweepy
import requests
import json
from datetime import datetime
# Twitter API credentials from GitHub Secrets
API_KEY = '${{ secrets.TWITTER_API_KEY }}'
API_SECRET = '${{ secrets.TWITTER_API_SECRET }}'
ACCESS_TOKEN = '${{ secrets.TWITTER_ACCESS_TOKEN }}'
ACCESS_SECRET = '${{ secrets.TWITTER_ACCESS_SECRET }}'
USERNAME = '${{ secrets.TWITTER_USERNAME }}'
SEGMENT_WRITE_KEY = '${{ secrets.SEGMENT_WRITE_KEY }}'
# Initialize Tweepy API
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
twitter_api = tweepy.API(auth)
# Segment endpoint
SEGMENT_ENDPOINT = 'https://api.segment.io/v1/track'
def get_follower_count(username):
try:
user = twitter_api.get_user(screen_name=username)
return user.followers_count
except tweepy.TweepError as e:
print(f'Error fetching follower count: {e}')
return None
def send_data_to_segment(username, follower_count):
current_time = datetime.now().isoformat()
data = {
'userId': username,
'event': 'Follower Count Update',
'properties': {
'username': username,
'follower_count': follower_count,
'timestamp': current_time
},
'timestamp': current_time
}
headers = {
'Content-Type': 'application/json',
'Authorization': f'Basic {SEGMENT_WRITE_KEY.encode("utf-8").decode("utf-8")}'
}
try:
response = requests.post(SEGMENT_ENDPOINT, headers=headers, data=json.dumps(data))
if response.status_code == 200:
print(f'Successfully sent data to Segment for {username}')
else:
print(f'Failed to send data to Segment. Status code: {response.status_code}, Response: {response.text}')
except requests.exceptions.RequestException as e:
print(f'Error sending data to Segment: {e}')
follower_count = get_follower_count(USERNAME)
if follower_count is not None:
send_data_to_segment(USERNAME, follower_count)
else:
print('Failed to retrieve follower count.')

View file

@ -0,0 +1,58 @@
import uuid
import requests
import posthog
import os
from datetime import datetime, timedelta
# Replace with your PostHog Project API Key
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY')
POSTHOG_API_HOST = 'https://eu.i.posthog.com'
# Initialize PostHog client
posthog.project_api_key = POSTHOG_API_KEY
posthog.host = POSTHOG_API_HOST
# Fetch historical download data for the last 180 days
package = 'cognee'
url = f'https://pypistats.org/api/packages/{package}/overall'
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to fetch data: {response.status_code}")
exit(1)
data = response.json()
# Exclude today and yesterday
today = datetime.utcnow().date()
yesterday = today - timedelta(days=1)
# Process and send data to PostHog
for entry in data['data']:
date_str = entry['date']
date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
downloads = entry['downloads']
# Skip today and yesterday
if date_obj >= yesterday:
continue
# Create a unique message_id
message_id = f"cognee_downloads_{date_str}"
distinct_id = str(uuid.uuid4())
# Send an event to PostHog
posthog.capture(
distinct_id=distinct_id,
event='cognee_downloads',
properties={
'date': date_str,
'downloads': downloads,
}
)
print(f"Data for {date_str} imported successfully.")
print("Historical data import completed.")