
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# =============================================================================
# Load Cleaned Data
# =============================================================================
df = pd.read_csv('temp_files/cleaned.csv', parse_dates=['sys_created_on', 'closed_at'])
df['priority_num'] = df['priority'].str.extract(r'(\d+)').astype(int)

print("=" * 70)
print("PHASE 3A: DEEP STATISTICAL ANALYSIS")
print("=" * 70)

# =============================================================================
# 1. MTTR Distribution Modeling with Percentile Benchmarking
# =============================================================================
print("\\n--- 1. MTTR Distribution Modeling & Percentile Benchmarking ---")

resolved_df = df[df['is_resolved'] == True].copy()

# Percentile analysis per Tower
tower_mttr = resolved_df.groupby('Tower')['mttr_hours'].agg([
    ('count', 'count'),
    ('mean', 'mean'),
    ('median', 'median'),
    ('p25', lambda x: x.quantile(0.25)),
    ('p50', lambda x: x.quantile(0.50)),
    ('p75', lambda x: x.quantile(0.75)),
    ('p90', lambda x: x.quantile(0.90)),
    ('p95', lambda x: x.quantile(0.95)),
    ('p99', lambda x: x.quantile(0.99))
]).reset_index()

tower_mttr = tower_mttr.sort_values('median', ascending=False)
tower_mttr.to_csv('temp_files/mttr_tower_benchmarks.csv', index=False)
print(f"Tower MTTR benchmarks saved. Top 5 slowest towers by median:")
print(tower_mttr[['Tower', 'count', 'median', 'p90', 'p95']].head())

# Percentile analysis per Domain
domain_mttr = resolved_df.groupby('Domain')['mttr_hours'].agg([
    ('count', 'count'),
    ('mean', 'mean'),
    ('median', 'median'),
    ('p25', lambda x: x.quantile(0.25)),
    ('p50', lambda x: x.quantile(0.50)),
    ('p75', lambda x: x.quantile(0.75)),
    ('p90', lambda x: x.quantile(0.90)),
    ('p95', lambda x: x.quantile(0.95)),
    ('p99', lambda x: x.quantile(0.99))
]).reset_index()

domain_mttr = domain_mttr.sort_values('median', ascending=False)
domain_mttr.to_csv('temp_files/mttr_domain_benchmarks.csv', index=False)
print(f"\\nDomain MTTR benchmarks saved. Results:")
print(domain_mttr[['Domain', 'count', 'median', 'p90', 'p95']])

# Plot: MTTR Percentile Benchmarking by Tower (Top 15)
top_towers = tower_mttr.head(15).copy()
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=top_towers['Tower'], y=top_towers['p50'], mode='lines+markers', name='P50 (Median)', line=dict(color='#2E86AB', width=3)))
fig1.add_trace(go.Scatter(x=top_towers['Tower'], y=top_towers['p75'], mode='lines+markers', name='P75', line=dict(color='#F18F01', width=2)))
fig1.add_trace(go.Scatter(x=top_towers['Tower'], y=top_towers['p90'], mode='lines+markers', name='P90', line=dict(color='#C73E1D', width=2)))
fig1.add_trace(go.Scatter(x=top_towers['Tower'], y=top_towers['p95'], mode='lines+markers', name='P95', line=dict(color='#8B0000', width=2, dash='dash')))

fig1.update_layout(
    title='MTTR Percentile Benchmarking by Tower (Top 15 Slowest)',
    xaxis_title='Tower', yaxis_title='MTTR (Hours)',
    xaxis_tickangle=-45,
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
    template='plotly_white'
)
fig1.write_html('assets/images/html/mttr_tower_benchmarks.html')
fig1.write_image('assets/images/png/mttr_tower_benchmarks.png', width=1200, height=650, scale=2)
print("Saved: mttr_tower_benchmarks")

# Plot: MTTR Percentile Benchmarking by Domain
fig2 = go.Figure()
fig2.add_trace(go.Bar(x=domain_mttr['Domain'], y=domain_mttr['p50'], name='P50 (Median)', marker_color='#2E86AB'))
fig2.add_trace(go.Bar(x=domain_mttr['Domain'], y=domain_mttr['p75'], name='P75', marker_color='#F18F01'))
fig2.add_trace(go.Bar(x=domain_mttr['Domain'], y=domain_mttr['p90'], name='P90', marker_color='#C73E1D'))

fig2.update_layout(
    title='MTTR Percentile Benchmarking by Domain',
    xaxis_title='Domain', yaxis_title='MTTR (Hours)',
    xaxis_tickangle=-30,
    barmode='group',
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
    template='plotly_white'
)
fig2.write_html('assets/images/html/mttr_domain_benchmarks.html')
fig2.write_image('assets/images/png/mttr_domain_benchmarks.png', width=1100, height=600, scale=2)
print("Saved: mttr_domain_benchmarks")

# Survival-style analysis: Percentage of tickets resolved within X hours
time_thresholds = [4, 8, 24, 48, 72, 168, 336, 720]
survival_data = []

for tower in resolved_df['Tower'].unique():
    tower_data = resolved_df[resolved_df['Tower'] == tower]['mttr_hours']
    n = len(tower_data)
    row = {'Tower': tower, 'count': n}
    for t in time_thresholds:
        pct = (tower_data <= t).mean() * 100
        row[f'within_{t}h'] = pct
    survival_data.append(row)

survival_df = pd.DataFrame(survival_data)
survival_df.to_csv('temp_files/mttr_survival_analysis.csv', index=False)

# Plot survival curves for top 10 towers by volume
top10_towers = resolved_df['Tower'].value_counts().head(10).index.tolist()
survival_top10 = survival_df[survival_df['Tower'].isin(top10_towers)].copy()

fig3 = go.Figure()
colors = px.colors.qualitative.Set3
for i, tower in enumerate(top10_towers):
    tower_row = survival_top10[survival_top10['Tower'] == tower].iloc[0]
    y_vals = [tower_row[f'within_{t}h'] for t in time_thresholds]
    fig3.add_trace(go.Scatter(x=time_thresholds, y=y_vals, mode='lines+markers', name=tower, line=dict(color=colors[i % len(colors)], width=2)))

fig3.update_layout(
    title='Resolution Survival Curves - Top 10 Towers by Volume',
    xaxis_title='Time (Hours)', yaxis_title='% Tickets Resolved Within Threshold',
    xaxis_type='log',
    legend=dict(orientation='v', yanchor='top', y=1, xanchor='left', x=1.02),
    template='plotly_white',
    height=650, width=1100
)
fig3.write_html('assets/images/html/mttr_survival_curves.html')
fig3.write_image('assets/images/png/mttr_survival_curves.png', width=1100, height=650, scale=2)
print("Saved: mttr_survival_curves")

# =============================================================================
# 2. Open Ticket Aging and Backlog Trajectory Analysis
# =============================================================================
print("\\n--- 2. Open Ticket Aging & Backlog Trajectory Analysis ---")

open_df = df[df['is_resolved'] == False].copy()
open_df['age_days'] = open_df['ticket_age_hours'] / 24

# Age cohort analysis
age_cohorts = [0, 30, 60, 90, 180, 270, 365, 500, 1000]
age_labels = ['<30d', '30-60d', '60-90d', '90-180d', '180-270d', '270-365d', '365-500d', '500d+']
open_df['age_cohort'] = pd.cut(open_df['age_days'], bins=age_cohorts, labels=age_labels, include_lowest=True)

cohort_summary = open_df['age_cohort'].value_counts().sort_index().reset_index()
cohort_summary.columns = ['age_cohort', 'count']
cohort_summary['pct'] = cohort_summary['count'] / cohort_summary['count'].sum() * 100
print(f"\\nOpen Ticket Age Cohorts:")
print(cohort_summary)

fig4 = px.bar(cohort_summary, x='age_cohort', y='count', color='count',
              title='Open Ticket Backlog by Age Cohort',
              color_continuous_scale='Reds',
              text=cohort_summary['pct'].apply(lambda x: f'{x:.1f}%'))
fig4.update_layout(xaxis_title='Age Cohort', yaxis_title='Ticket Count', template='plotly_white')
fig4.write_html('assets/images/html/backlog_age_cohorts.html')
fig4.write_image('assets/images/png/backlog_age_cohorts.png', width=1000, height=600, scale=2)
print("Saved: backlog_age_cohorts")

# Backlog by assignment group (top 20)
backlog_by_group = open_df.groupby('assignment_group').agg(
    open_count=('Tkt #', 'count'),
    median_age=('age_days', 'median'),
    max_age=('age_days', 'max')
).reset_index().sort_values('open_count', ascending=False).head(20)

fig5 = px.scatter(backlog_by_group, x='median_age', y='open_count', size='max_age',
                  hover_data=['assignment_group'],
                  title='Open Ticket Backlog: Volume vs Median Age by Assignment Group (Top 20)',
                  color='open_count', color_continuous_scale='Reds')
fig5.update_layout(xaxis_title='Median Age (Days)', yaxis_title='Open Ticket Count', template='plotly_white')
fig5.write_html('assets/images/html/backlog_group_scatter.html')
fig5.write_image('assets/images/png/backlog_group_scatter.png', width=1000, height=600, scale=2)
print("Saved: backlog_group_scatter")

# Backlog trajectory by Tower
backlog_tower = open_df.groupby('Tower').agg(
    open_count=('Tkt #', 'count'),
    median_age=('age_days', 'median')
).reset_index().sort_values('open_count', ascending=False)

fig6 = px.bar(backlog_tower, x='Tower', y='open_count', color='median_age',
              title='Open Ticket Backlog by Tower (Color = Median Age in Days)',
              color_continuous_scale='RdYlBu_r')
fig6.update_layout(xaxis_title='Tower', yaxis_title='Open Ticket Count', xaxis_tickangle=-45, template='plotly_white')
fig6.write_html('assets/images/html/backlog_tower.html')
fig6.write_image('assets/images/png/backlog_tower.png', width=1100, height=600, scale=2)
print("Saved: backlog_tower")

# Monthly creation vs closure trajectory
df['created_month'] = df['sys_created_on'].dt.to_period('M')
df['closed_month'] = df['closed_at'].dt.to_period('M')

monthly_created = df.groupby('created_month').size().reset_index(name='created')
monthly_created['created_month_str'] = monthly_created['created_month'].astype(str)
monthly_closed = df[df['is_resolved']].groupby('closed_month').size().reset_index(name='closed')
monthly_closed['closed_month_str'] = monthly_closed['closed_month'].astype(str)

# Align months
all_months = pd.period_range(start=df['sys_created_on'].min(), end=df['sys_created_on'].max(), freq='M')
trajectory = pd.DataFrame({'month': all_months})
trajectory['month_str'] = trajectory['month'].astype(str)
trajectory = trajectory.merge(monthly_created, left_on='month', right_on='created_month', how='left')
trajectory = trajectory.merge(monthly_closed, left_on='month', right_on='closed_month', how='left')
trajectory['created'] = trajectory['created'].fillna(0).astype(int)
trajectory['closed'] = trajectory['closed'].fillna(0).astype(int)
trajectory['net_backlog_change'] = trajectory['created'] - trajectory['closed']
trajectory['cumulative_backlog'] = trajectory['net_backlog_change'].cumsum()

print(f"\\nBacklog Trajectory (last 6 months):")
print(trajectory[['month_str', 'created', 'closed', 'net_backlog_change', 'cumulative_backlog']].tail(6))

fig7 = make_subplots(specs=[[{"secondary_y": True}]])
fig7.add_trace(go.Bar(x=trajectory['month_str'], y=trajectory['created'], name='Created', marker_color='#2E86AB'), secondary_y=False)
fig7.add_trace(go.Bar(x=trajectory['month_str'], y=trajectory['closed'], name='Closed', marker_color='#4CAF50'), secondary_y=False)
fig7.add_trace(go.Scatter(x=trajectory['month_str'], y=trajectory['cumulative_backlog'], name='Cumulative Backlog', mode='lines+markers', line=dict(color='#C73E1D', width=3)), secondary_y=True)

fig7.update_layout(
    title='Monthly Incident Creation vs Closure & Cumulative Backlog Trajectory',
    xaxis_title='Month', template='plotly_white',
    legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
    barmode='group'
)
fig7.update_yaxes(title_text='Ticket Count', secondary_y=False)
fig7.update_yaxes(title_text='Cumulative Backlog', secondary_y=True)
fig7.write_html('assets/images/html/backlog_trajectory.html')
fig7.write_image('assets/images/png/backlog_trajectory.png', width=1100, height=600, scale=2)
print("Saved: backlog_trajectory")

# =============================================================================
# 3. Incident State Transition Latency Analysis
# =============================================================================
print("\\n--- 3. Incident State Transition Latency Analysis ---")

# Since we don't have full transition history, we analyze:
# a) Distribution of tickets by state and their age
# b) State dwell time inference from creation/closure vs current state

state_analysis = df.groupby('incident_state').agg(
    count=('Tkt #', 'count'),
    resolved_count=('is_resolved', 'sum'),
    median_mttr=('mttr_hours', lambda x: x[x.notna()].median()),
    median_age=('ticket_age_hours', lambda x: x[x.notna()].median() / 24)
).reset_index()

# For open tickets, compute median age per state
open_state_analysis = open_df.groupby('incident_state').agg(
    open_count=('Tkt #', 'count'),
    median_age_days=('age_days', 'median'),
    p75_age_days=('age_days', lambda x: x.quantile(0.75)),
    p90_age_days=('age_days', lambda x: x.quantile(0.90))
).reset_index()

state_analysis = state_analysis.merge(open_state_analysis, on='incident_state', how='left')
state_analysis.to_csv('temp_files/state_latency_analysis.csv', index=False)

print(f"\\nState Latency Analysis:")
print(state_analysis[['incident_state', 'count', 'median_mttr', 'median_age_days', 'p90_age_days']])

# Plot: State dwell times (median age for open tickets, median MTTR for closed)
fig8 = make_subplots(rows=1, cols=2, subplot_titles=('Open Tickets: Median Age by State', 'Resolved Tickets: Median MTTR by State'))

# Left: Open ticket ages
open_states = state_analysis[state_analysis['open_count'].notna()].copy()
fig8.add_trace(go.Bar(x=open_states['incident_state'], y=open_states['median_age_days'], name='Median Age (Days)', marker_color='#E94F37'), row=1, col=1)

# Right: Resolved MTTR
resolved_states = state_analysis[state_analysis['median_mttr'].notna()].copy()
fig8.add_trace(go.Bar(x=resolved_states['incident_state'], y=resolved_states['median_mttr'], name='Median MTTR (Hours)', marker_color='#2E86AB'), row=1, col=2)

fig8.update_layout(height=550, width=1200, title_text='Incident State Transition Latency Analysis', template='plotly_white', showlegend=False)
fig8.write_html('assets/images/html/state_latency_analysis.html')
fig8.write_image('assets/images/png/state_latency_analysis.png', width=1200, height=550, scale=2)
print("Saved: state_latency_analysis")

# State Machine Friction Analysis: Tickets stuck in intermediate states
intermediate_states = ['Assigned', 'Work in Progress', 'Pending']
friction_df = df[df['incident_state'].isin(intermediate_states)].copy()
friction_summary = friction_df.groupby('incident_state').agg(
    count=('Tkt #', 'count'),
    median_age=('ticket_age_hours', lambda x: x[x.notna()].median() / 24),
    p90_age=('ticket_age_hours', lambda x: x[x.notna()].quantile(0.90) / 24),
    pct_of_total=('Tkt #', lambda x: len(x) / len(df) * 100)
).reset_index()

print(f"\\nState Machine Friction (Intermediate States):")
print(friction_summary)

fig9 = px.bar(friction_summary, x='incident_state', y='count', color='median_age',
              title='State Machine Friction: Tickets Stuck in Intermediate States',
              color_continuous_scale='RdYlBu_r', text=friction_summary['pct_of_total'].apply(lambda x: f'{x:.1f}%'))
fig9.update_layout(xaxis_title='Incident State', yaxis_title='Ticket Count', template='plotly_white')
fig9.write_html('assets/images/html/state_machine_friction.html')
fig9.write_image('assets/images/png/state_machine_friction.png', width=900, height=550, scale=2)
print("Saved: state_machine_friction")

# Resolution velocity by state: How long to go from New -> Closed
# Analyze resolved tickets: time from creation to closure by their final state
resolved_by_final_state = resolved_df.groupby('incident_state')['mttr_hours'].agg([
    ('count', 'count'),
    ('median', 'median'),
    ('mean', 'mean'),
    ('p75', lambda x: x.quantile(0.75)),
    ('p90', lambda x: x.quantile(0.90))
]).reset_index()

print(f"\\nResolution Velocity by Final State:")
print(resolved_by_final_state)

fig10 = px.bar(resolved_by_final_state, x='incident_state', y='median', color='count',
               title='Resolution Velocity by Final State (Median MTTR)',
               color_continuous_scale='Blues')
fig10.update_layout(xaxis_title='Final Incident State', yaxis_title='Median MTTR (Hours)', template='plotly_white')
fig10.write_html('assets/images/html/resolution_velocity_state.html')
fig10.write_image('assets/images/png/resolution_velocity_state.png', width=900, height=550, scale=2)
print("Saved: resolution_velocity_state")

print("\\n" + "=" * 70)
print("DEEP STATISTICAL ANALYSIS COMPLETE")
print("=" * 70)
