# Define the date range for the last year end_date = datetime.now() start_date = end_date - timedelta(days=365) # Team composition pipeline to get the top 5 teams based on total matches top_teams_pipeline = [ { "$unwind": "$players.team1" }, { "$match": { "date": { "$gte": int(start_date.timestamp() * 1000), "$lte": int(end_date.timestamp() * 1000) } } }, { "$group": { "_id": "$team1.name", "totalMatches": { "$sum": 1 } } }, { "$sort": { "totalMatches": -1 } }, { "$limit": 5 } ] top_teams_results = list(collection.aggregate(top_teams_pipeline)) top_teams = [team['_id'] for team in top_teams_results] # Roster changes pipeline limited to the top 5 teams and the last year roster_changes_pipeline = [ { "$match": { "date": { "$gte": int(start_date.timestamp() * 1000), "$lte": int(end_date.timestamp() * 1000) }, "team1.name": { "$in": top_teams } } }, { "$unwind": "$players.team1" }, { "$group": { "_id": { "team": "$team1.name", "date": "$date" }, "players": { "$addToSet": "$players.team1.name" } } }, { "$group": { "_id": "$_id.team", "rosterChanges": { "$push": { "date": "$_id.date", "players": "$players" } } } } ] roster_changes_results = list(collection.aggregate(roster_changes_pipeline)) # Simulate a calculation of roster changes over time roster_changes = [] for team in roster_changes_results: for i in range(1, len(team['rosterChanges'])): if team['rosterChanges'][i]['players'] != team['rosterChanges'][i-1]['players']: roster_changes.append({ 'team': team['_id'], 'date': team['rosterChanges'][i]['date'], 'roster_change': True }) else: roster_changes.append({ 'team': team['_id'], 'date': team['rosterChanges'][i]['date'], 'roster_change': False }) roster_changes_df = pd.DataFrame(roster_changes) # Performance data pipeline limited to the top 5 teams and the last year performance_pipeline = [ { "$match": { "date": { "$gte": int(start_date.timestamp() * 1000), "$lte": int(end_date.timestamp() * 1000) }, "team1.name": { "$in": top_teams } } }, { "$group": { "_id": { "team": "$team1.name", "date": "$date" }, "totalMatches": { "$sum": 1 }, "wins": { "$sum": { "$cond": [{ "$eq": ["$winnerTeam.name", "$team1.name"] }, 1, 0] } } } }, { "$project": { "_id": 0, "team": "$_id.team", "date": "$_id.date", "totalMatches": 1, "wins": 1, "winRate": { "$multiply": [ { "$divide": ["$wins", "$totalMatches"] }, 100 ] } } } ] performance_results = list(collection.aggregate(performance_pipeline)) performance_df = pd.DataFrame(performance_results) # Merge the roster changes DataFrame with the performance DataFrame merged_df = pd.merge(roster_changes_df, performance_df, on=['team', 'date']) # Convert timestamp to datetime for plotting merged_df['date'] = pd.to_datetime(merged_df['date'], unit='ms') # Plot the impact of roster changes on team performance plt.figure(figsize=(12, 8)) sns.lineplot(x='date', y='winRate', hue='roster_change', style='team', data=merged_df, markers=True) plt.title('Impact of Roster Changes on Team Dynamics (Top 5 Teams, Last Year)') plt.xlabel('Date') plt.ylabel('Win Rate (%)') plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', title='Roster Change') plt.tight_layout() plt.show()