Merge pull request #139 from topoteretes/COG-332

Update the demo
This commit is contained in:
Vasilije 2024-09-30 19:10:43 +02:00 committed by GitHub
commit b94c757d77
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 342 additions and 1 deletions

View file

@ -0,0 +1,36 @@
name: analytics | Update Cognee Stats Daily
on:
schedule:
- cron: '0 1 * * *' # Runs every day at 01:00 UTC
jobs:
update_stats:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
pip install requests posthog
- name: Run Update Script
env:
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_API_HOST: ${{ secrets.POSTHOG_API_HOST }}
run: |
cd tools # Change to the 'tools' directory
echo "Current working directory after changing to tools:"
pwd # Print the working directory again
echo "List of folders in the tools directory:"
ls -la # List all files and folders in the 'tools' directory
python daily_pypi_downloads.py # Run the script

View file

@ -0,0 +1,38 @@
#name: analytics | Send Twitter Followers to Segment
#
#on: pull_request
#
##on:
## schedule:
## - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed.
## workflow_dispatch: # Allows manual triggering of the workflow
#
#jobs:
# send-followers:
# runs-on: ubuntu-latest
#
# steps:
# - name: Checkout repository
# uses: actions/checkout@v3
#
# - name: Set up Python
# uses: actions/setup-python@v4
# with:
# python-version: '3.x'
#
# - name: Install dependencies
# run: |
# pip install tweepy requests
#
# - name: Send Twitter Followers to Segment
# env:
# TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }}
# TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }}
# TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
# TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
# SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }}
# TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }}
# run: |
# cd tools
# python daily_twitter_stats.py
#

44
.github/workflows/posthog_pipeline.yaml vendored Normal file
View file

@ -0,0 +1,44 @@
name: analytics | Push GitHub Data to PostHog
on:
schedule:
- cron: '0 0 * * *' # Runs every day at midnight
workflow_dispatch:
jobs:
push-data:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests posthog
- name: Print working directory, list folders, and run script
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
GITHUB_REPOSITORY: ${{ github.repository }}
run: |
echo "Current working directory:"
pwd # Print the current working directory
echo "List of folders in the current directory:"
ls -la # List all files and folders in the current directory
echo "Changing to tools directory..."
cd tools # Change to the 'tools' directory
echo "Current working directory after changing to tools:"
pwd # Print the working directory again
echo "List of folders in the tools directory:"
ls -la # List all files and folders in the 'tools' directory
python push_to_posthog.py # Run the script

View file

@ -1,4 +1,4 @@
name: Send Release to Discord
name: automation | Send Release to Discord
on:
release:

View file

@ -0,0 +1,81 @@
import uuid
import requests
import posthog
import os
from datetime import datetime, timedelta
# Replace with your PostHog Project API Key
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY')
POSTHOG_API_HOST = 'https://eu.i.posthog.com'
# Initialize PostHog client
posthog.project_api_key = POSTHOG_API_KEY
posthog.host = POSTHOG_API_HOST
# Read last processed date from file
state_file = 'last_processed_date.txt'
if os.path.exists(state_file):
with open(state_file, 'r') as f:
last_processed_date = f.read().strip()
last_processed_date = datetime.strptime(last_processed_date, '%Y-%m-%d')
else:
# If no state file, start from 2 days ago
last_processed_date = datetime.utcnow() - timedelta(days=2)
# Calculate the next date to process
next_date = last_processed_date + timedelta(days=1)
today = datetime.utcnow().date()
if next_date.date() >= today:
print("No new data to process.")
exit(0)
date_str = next_date.strftime('%Y-%m-%d')
# Fetch download data for the date
package = 'cognee'
url = f'https://pypistats.org/api/packages/{package}/overall'
response = requests.get(url)
if response.status_code != 200:
print(f"Failed to fetch data: {response.status_code}")
exit(1)
data = response.json()
# Find the entry for the date we want
downloads = None
for entry in data['data']:
if entry['date'] == date_str:
downloads = entry['downloads']
category = entry.get('category')
break
if downloads is None:
print(f"No data available for date {date_str}")
exit(1)
# Create a unique message_id
message_id = f"cognee_downloads_{date_str}"
distinct_id = str(uuid.uuid4())
# Send an event to PostHog
event_name = 'cognee_lib_downloads_with_mirrors' if category == 'with_mirrors' else 'cognee_lib_downloads_without_mirrors'
if event_name == 'cognee_lib_downloads_without_mirrors':
posthog.capture(
distinct_id=str(uuid.uuid4()),
event=event_name,
properties={
'category': category,
'date': date_str,
'downloads': downloads,
}
)
print(f"Data for {date_str} updated in PostHog successfully. Downloads is {downloads}")
# Update the state file
with open(state_file, 'w') as f:
f.write(date_str)

View file

@ -0,0 +1,66 @@
import tweepy
import requests
import json
from datetime import datetime
# Twitter API credentials from GitHub Secrets
API_KEY = '${{ secrets.TWITTER_API_KEY }}'
API_SECRET = '${{ secrets.TWITTER_API_SECRET }}'
ACCESS_TOKEN = '${{ secrets.TWITTER_ACCESS_TOKEN }}'
ACCESS_SECRET = '${{ secrets.TWITTER_ACCESS_SECRET }}'
USERNAME = '${{ secrets.TWITTER_USERNAME }}'
SEGMENT_WRITE_KEY = '${{ secrets.SEGMENT_WRITE_KEY }}'
# Initialize Tweepy API
auth = tweepy.OAuthHandler(API_KEY, API_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
twitter_api = tweepy.API(auth)
# Segment endpoint
SEGMENT_ENDPOINT = 'https://api.segment.io/v1/track'
def get_follower_count(username):
try:
user = twitter_api.get_user(screen_name=username)
return user.followers_count
except tweepy.TweepError as e:
print(f'Error fetching follower count: {e}')
return None
def send_data_to_segment(username, follower_count):
current_time = datetime.now().isoformat()
data = {
'userId': username,
'event': 'Follower Count Update',
'properties': {
'username': username,
'follower_count': follower_count,
'timestamp': current_time
},
'timestamp': current_time
}
headers = {
'Content-Type': 'application/json',
'Authorization': f'Basic {SEGMENT_WRITE_KEY.encode("utf-8").decode("utf-8")}'
}
try:
response = requests.post(SEGMENT_ENDPOINT, headers=headers, data=json.dumps(data))
if response.status_code == 200:
print(f'Successfully sent data to Segment for {username}')
else:
print(f'Failed to send data to Segment. Status code: {response.status_code}, Response: {response.text}')
except requests.exceptions.RequestException as e:
print(f'Error sending data to Segment: {e}')
follower_count = get_follower_count(USERNAME)
if follower_count is not None:
send_data_to_segment(USERNAME, follower_count)
else:
print('Failed to retrieve follower count.')

76
tools/push_to_posthog.py Normal file
View file

@ -0,0 +1,76 @@
# extract_and_push_github_data.py
import uuid
import requests
import os
from posthog import Posthog
# Get environment variables
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
REPO = os.getenv('GITHUB_REPOSITORY')
POSTHOG_API_KEY = os.getenv('POSTHOG_API_KEY') # Your PostHog Project API Key
POSTHOG_HOST = 'https://eu.i.posthog.com' # Default PostHog Cloud
headers = {
"Authorization": f"token {GITHUB_TOKEN}",
"Accept": "application/vnd.github.v3+json"
}
# Initialize PostHog client
posthog = Posthog(
api_key=POSTHOG_API_KEY,
host=POSTHOG_HOST
)
posthog.debug = True
def get_repo_info():
url = f"https://api.github.com/repos/{REPO}"
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print(f"Error fetching repo info: {response.status_code}")
return None
def main():
repo_info = get_repo_info()
if repo_info:
# Prepare data to send to PostHog
properties = {
'repo_name': repo_info.get('full_name'),
'stars': repo_info.get('stargazers_count'),
'forks': repo_info.get('forks_count'),
'open_issues': repo_info.get('open_issues_count'),
'watchers': repo_info.get('subscribers_count'),
'created_at': repo_info.get('created_at'),
'updated_at': repo_info.get('updated_at'),
'pushed_at': repo_info.get('pushed_at'),
'language': repo_info.get('language'),
'license': repo_info.get('license').get('name') if repo_info.get('license') else None,
'topics': repo_info.get('topics')
}
print("Repository information: ", properties)
distinct_id = str(uuid.uuid4())
# Send event to PostHog
result = posthog.capture(
distinct_id=distinct_id, # You can customize this identifier
event='cognee_lib_github_repo_stats',
properties=properties
)
print("PostHog response: ", result)
print("Data sent to PostHog successfully.")
else:
print("Failed to retrieve repository information.")
# Close PostHog client
posthog.shutdown()
if __name__ == "__main__":
main()