Files
ETL_Datamanagement/dashboard_update.py
Philipp Jacoby 2384a33be2 init
2026-01-10 16:18:54 +01:00

559 lines
22 KiB
Python

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import networkx as nx
from pathlib import Path
st.set_page_config(page_title="Hetionet Dashboard", layout="wide")
# Custom CSS
st.markdown("""
<style>
.stTabs [data-baseweb="tab-list"] {
gap: 24px;
}
.stTabs [data-baseweb="tab"] {
height: 50px;
padding-left: 20px;
padding-right: 20px;
}
.stat-box {
background: grey;
padding: 20px;
border-radius: 10px;
color: white;
text-align: center;
margin: 10px 0;
}
</style>
""", unsafe_allow_html=True)
st.title("Hetionet Drug Analysis Dashboard")
# Sidebar
st.sidebar.header("Global Search")
search_type = st.sidebar.selectbox("Search for:", ["Gene", "Disease", "Drug"])
search_term = st.sidebar.text_input(f"Enter {search_type} name:")
st.sidebar.markdown("---")
st.sidebar.header("Navigation")
page = st.sidebar.radio("Select Analysis:", [
"Overview",
"Hotspot Genes",
"Drug Repurposing",
"Polypharmacy Risk",
"Symptom Triangle",
"Super Drugs",
"Drug Conflicts",
"Network Graph",
"Compare Drugs"
])
# Load data
try:
data_dir = Path("neo4j_csv")
with st.spinner("Loading data..."):
genes = pd.read_csv(data_dir / "nodes_Gene.csv")
diseases = pd.read_csv(data_dir / "nodes_Disease.csv")
repurposing = pd.read_csv(data_dir / "analysis_drug_repurposing.csv")
polypharmacy = pd.read_csv(data_dir / "analysis_polypharmacy_risk.csv")
symptom_triangle = pd.read_csv(data_dir / "analysis_symptom_triangle.csv")
super_drugs = pd.read_csv(data_dir / "analysis_super_drugs.csv")
# Try loading new files
try:
drug_conflicts = pd.read_csv(data_dir / "analysis_drug_conflicts.csv")
network_nodes = pd.read_csv(data_dir / "network_nodes.csv")
network_edges = pd.read_csv(data_dir / "network_edges.csv")
except:
drug_conflicts = None
network_nodes = None
network_edges = None
# Global Search Results
if search_term:
st.sidebar.markdown("---")
st.sidebar.subheader("Search Results")
if search_type == "Gene":
results = genes[genes['name'].str.contains(search_term, case=False, na=False)]
if len(results) > 0:
st.sidebar.success(f"Found {len(results)} genes")
for _, row in results.head(5).iterrows():
st.sidebar.write(f"**{row['name']}**: {row['num_diseases']} diseases")
else:
st.sidebar.warning("No genes found")
elif search_type == "Disease":
results = diseases[diseases['name'].str.contains(search_term, case=False, na=False)]
if len(results) > 0:
st.sidebar.success(f"Found {len(results)} diseases")
for _, row in results.head(5).iterrows():
st.sidebar.write(f"**{row['name']}**: {row['num_symptoms']} symptoms")
else:
st.sidebar.warning("No diseases found")
elif search_type == "Drug":
results = super_drugs[super_drugs['name'].str.contains(search_term, case=False, na=False)]
if len(results) > 0:
st.sidebar.success(f"Found {len(results)} drugs")
for _, row in results.head(5).iterrows():
st.sidebar.write(f"**{row['name']}**: Score {row['super_score']:.2f}")
else:
st.sidebar.warning("No drugs found")
# OVERVIEW PAGE
if page == "Overview":
st.header("Dataset Overview")
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Genes", f"{len(genes):,}")
col2.metric("Total Diseases", f"{len(diseases):,}")
col3.metric("Repurposing Opportunities", f"{len(repurposing):,}")
col4.metric("Analyzed Drugs", f"{len(super_drugs):,}")
# STATISTICS BOXES
st.markdown("---")
st.subheader("Key Statistics")
col1, col2, col3 = st.columns(3)
with col1:
avg_diseases_per_gene = genes[genes['num_diseases'] > 0]['num_diseases'].mean()
st.markdown(f"""
<div class="stat-box">
<h3>avg. Diseases per Gene</h3>
<h1>{avg_diseases_per_gene:.1f}</h1>
<p>For genes with disease associations</p>
</div>
""", unsafe_allow_html=True)
with col2:
avg_symptoms_per_disease = diseases[diseases['num_symptoms'] > 0]['num_symptoms'].mean()
st.markdown(f"""
<div class="stat-box">
<h3>avg. symptoms per disease</h3>
<h1>{avg_symptoms_per_disease:.1f}</h1>
<p>For diseases with documented symptoms</p>
</div>
""", unsafe_allow_html=True)
with col3:
avg_side_effects = polypharmacy['num_side_effects'].mean()
st.markdown(f"""
<div class="stat-box">
<h3>avg. side effect per drug</h3>
<h1>{avg_side_effects:.1f}</h1>
<p>Across all analyzed compounds</p>
</div>
""", unsafe_allow_html=True)
st.markdown("---")
col1, col2 = st.columns(2)
with col1:
st.subheader("Top Genes by Disease Count")
top_genes = genes.nlargest(10, 'num_diseases')[['name', 'num_diseases']]
fig = px.bar(top_genes, x='name', y='num_diseases', color='num_diseases')
fig.update_layout(
showlegend=False,
xaxis_title="Gene",
yaxis_title="Number of Diseases"
)
# Enable chart export
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
csv = top_genes.to_csv(index=False).encode('utf-8')
st.download_button("Download Data", csv, "top_genes.csv", "text/csv")
with col2:
st.subheader("Top Diseases by Symptom Count")
top_diseases = diseases.nlargest(10, 'num_symptoms')[['name', 'num_symptoms']]
fig = px.bar(top_diseases, x='name', y='num_symptoms', color='num_symptoms', color_continuous_scale='Reds')
fig.update_layout(
showlegend=False,
xaxis_title="Disease",
yaxis_title="Number of Symptoms"
)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
csv = top_diseases.to_csv(index=False).encode('utf-8')
st.download_button("Download Data", csv, "top_diseases.csv", "text/csv")
# HOTSPOT GENES PAGE
elif page == "Hotspot Genes":
st.header("🧬 Hotspot Genes - Most Disease Associations")
col1, col2 = st.columns([3, 1])
with col2:
n_genes = st.slider("Number of genes:", 10, 50, 20)
min_diseases = st.slider("Min diseases:", 0, 50, 0)
with col1:
filtered_genes = genes[genes['num_diseases'] >= min_diseases].nlargest(n_genes, 'num_diseases')
fig = px.bar(
filtered_genes,
x='name',
y='num_diseases',
title=f'Top {n_genes} Genes (min {min_diseases} diseases)',
color='num_diseases',
color_continuous_scale='Viridis'
)
fig.update_layout(height=600, showlegend=False)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
st.dataframe(filtered_genes[['name', 'num_diseases']].reset_index(drop=True), use_container_width=True)
csv = filtered_genes.to_csv(index=False).encode('utf-8')
st.download_button("Download Filtered Data", csv, "hotspot_genes.csv", "text/csv")
# DRUG REPURPOSING PAGE
elif page == "Drug Repurposing":
st.header("Drug Repurposing Opportunities")
col1, col2 = st.columns([2, 1])
with col1:
top_n = st.slider("Show top N opportunities:", 10, 50, 20)
top_repurpose = repurposing.nlargest(top_n, 'shared_genes')
fig = px.scatter(
top_repurpose,
x='disease',
y='candidate_drug',
size='shared_genes',
color='shared_genes',
title='Drug Repurposing Candidates',
color_continuous_scale='Reds',
height=600
)
fig.update_xaxes(tickangle=-45)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
with col2:
st.subheader("Filter by Disease")
diseases_list = sorted(repurposing['disease'].unique())
selected = st.selectbox("Select disease:", diseases_list)
filtered = repurposing[repurposing['disease'] == selected].nlargest(10, 'shared_genes')
st.dataframe(filtered[['candidate_drug', 'shared_genes']].reset_index(drop=True), height=400)
csv = filtered.to_csv(index=False).encode('utf-8')
st.download_button("Download", csv, f"repurposing_{selected}.csv", "text/csv")
# POLYPHARMACY RISK PAGE
elif page == "Polypharmacy Risk":
st.header("Polypharmacy Risk Analysis")
col1, col2 = st.columns([3, 1])
with col2:
top_n = st.slider("Number of drugs:", 10, 30, 20)
min_risk = st.slider("Min risk score:", 0, 100, 0)
with col1:
filtered_risk = polypharmacy[polypharmacy['risk_score'] >= min_risk].nlargest(top_n, 'num_side_effects')
fig = px.scatter(
filtered_risk,
x='num_diseases_treated',
y='num_side_effects',
size='risk_score',
color='risk_score',
hover_data=['name'],
title='Drugs: Side Effects vs Diseases Treated',
color_continuous_scale='Reds'
)
fig.update_layout(height=600)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
st.dataframe(
filtered_risk[['name', 'num_diseases_treated', 'num_side_effects', 'risk_score']].reset_index(drop=True),
use_container_width=True
)
csv = filtered_risk.to_csv(index=False).encode('utf-8')
st.download_button("Download Risk Data", csv, "polypharmacy_risk.csv", "text/csv")
# SYMPTOM TRIANGLE PAGE
elif page == "Symptom Triangle":
st.header("Symptom-Disease-Drug Connections")
top_n = st.slider("Number of symptoms:", 10, 30, 20)
top_symptoms = symptom_triangle.nlargest(top_n, 'impact_score')
fig = px.scatter(
top_symptoms,
x='num_diseases',
y='num_treating_drugs',
size='impact_score',
color='drugs_with_side_effects',
hover_data=['symptom'],
title='Symptom Impact Analysis',
color_continuous_scale='RdYlGn_r'
)
fig.update_layout(height=600)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
st.dataframe(
top_symptoms[['symptom', 'num_diseases', 'num_treating_drugs', 'drugs_with_side_effects', 'impact_score']].reset_index(drop=True),
use_container_width=True
)
csv = top_symptoms.to_csv(index=False).encode('utf-8')
st.download_button("Download Symptom Data", csv, "symptom_triangle.csv", "text/csv")
# SUPER DRUGS PAGE
elif page == "Super Drugs":
st.header("Super-Drug Score (Best Benefit/Risk Ratio)")
col1, col2 = st.columns([3, 1])
with col2:
top_n = st.slider("Number of drugs:", 10, 30, 20)
min_score = st.slider("Min super score:", 0.0, 5.0, 0.0, 0.1)
with col1:
filtered_super = super_drugs[super_drugs['super_score'] >= min_score].nlargest(top_n, 'super_score')
fig = px.scatter(
filtered_super,
x='num_diseases_treated',
y='num_side_effects',
size='super_score',
color='super_score',
hover_data=['name'],
title='Super Drugs Analysis',
color_continuous_scale='Viridis_r'
)
fig.update_layout(height=600)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
st.dataframe(
filtered_super[['name', 'num_diseases_treated', 'num_side_effects', 'super_score']].reset_index(drop=True),
use_container_width=True
)
perfect = super_drugs[(super_drugs['num_side_effects'] == 0) & (super_drugs['num_diseases_treated'] > 0)]
st.info(f"💎 Found {len(perfect)} drugs with ZERO documented side effects!")
csv = filtered_super.to_csv(index=False).encode('utf-8')
st.download_button("Download Super Drugs", csv, "super_drugs.csv", "text/csv")
# DRUG CONFLICTS PAGE
elif page == "Drug Conflicts":
st.header("Drug Conflicts - Overlapping Side Effects")
if drug_conflicts is not None and len(drug_conflicts) > 0:
col1, col2 = st.columns([3, 1])
with col2:
top_n = st.slider("Number of conflicts:", 10, 50, 20)
min_overlap = st.slider("Min shared side effects:", 0, 100, 10)
with col1:
filtered_conflicts = drug_conflicts[
drug_conflicts['shared_side_effects'] >= min_overlap
].nlargest(top_n, 'shared_side_effects')
fig = px.scatter(
filtered_conflicts,
x='drug1_total_se',
y='drug2_total_se',
size='shared_side_effects',
color='overlap_percentage',
hover_data=['drug1', 'drug2', 'shared_side_effects'],
title='Drug Pairs with Overlapping Side Effects',
labels={'drug1_total_se': 'Drug 1 Total SE', 'drug2_total_se': 'Drug 2 Total SE'},
color_continuous_scale='Reds'
)
fig.update_layout(height=600)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
st.warning("These drug combinations may have compounded side effects!")
st.dataframe(
filtered_conflicts[['drug1', 'drug2', 'shared_side_effects', 'overlap_percentage']].reset_index(drop=True),
use_container_width=True
)
csv = filtered_conflicts.to_csv(index=False).encode('utf-8')
st.download_button("Download Conflicts", csv, "drug_conflicts.csv", "text/csv")
else:
st.warning("Drug conflicts data not available. Run the ETL script to generate this analysis.")
# NETWORK GRAPH PAGE
elif page == "Network Graph":
st.header("Disease-Gene-Drug Network")
if network_nodes is not None and network_edges is not None:
st.info("Interactive network visualization showing connections between diseases, genes, and drugs")
# Create networkx graph
G = nx.Graph()
# Add nodes
for _, row in network_nodes.iterrows():
G.add_node(row['id'], label=row['label'], type=row['type'])
# Add edges
for _, row in network_edges.iterrows():
G.add_edge(row['source'], row['target'])
# Create layout
pos = nx.spring_layout(G, k=0.5, iterations=50)
# Create edge trace
edge_x = []
edge_y = []
for edge in G.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.extend([x0, x1, None])
edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(
x=edge_x, y=edge_y,
line=dict(width=0.5, color='#888'),
hoverinfo='none',
mode='lines'
)
# Create node traces (separate by type for legend)
node_traces = []
color_map = {
'Disease': '#ff4444',
'Gene': '#4444ff',
'Compound': '#44ff44'
}
for node_type, color in color_map.items():
node_x = []
node_y = []
node_text = []
for node in G.nodes():
if G.nodes[node]['type'] == node_type:
x, y = pos[node]
node_x.append(x)
node_y.append(y)
node_text.append(f"{node_type}: {G.nodes[node]['label']}")
if node_x:
node_trace = go.Scatter(
x=node_x, y=node_y,
mode='markers',
name=node_type,
hoverinfo='text',
text=node_text,
marker=dict(
color=color,
size=12,
line=dict(width=2, color='white')
)
)
node_traces.append(node_trace)
# Create figure
fig = go.Figure(data=[edge_trace] + node_traces,
layout=go.Layout(
title='Disease-Gene-Drug Network',
showlegend=True,
hovermode='closest',
margin=dict(b=0,l=0,r=0,t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
height=700
))
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
col1, col2, col3 = st.columns(3)
col1.metric("🔴 Diseases", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Disease']))
col2.metric("🔵 Genes", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Gene']))
col3.metric("🟢 Drugs", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Compound']))
else:
st.warning("Network data not available. Run the ETL script to generate this visualization.")
# COMPARE DRUGS PAGE
elif page == "Compare Drugs 🆕":
st.header("⚖️ Compare Drugs Side-by-Side")
drug_names = sorted(super_drugs['name'].unique())
col1, col2 = st.columns(2)
with col1:
drug1 = st.selectbox("Select Drug 1:", drug_names, key='drug1')
with col2:
drug2 = st.selectbox("Select Drug 2:", drug_names, key='drug2')
if drug1 and drug2:
drug1_data = super_drugs[super_drugs['name'] == drug1].iloc[0]
drug2_data = super_drugs[super_drugs['name'] == drug2].iloc[0]
st.markdown("---")
col1, col2 = st.columns(2)
with col1:
st.subheader(f"{drug1}")
st.metric("Diseases Treated", int(drug1_data['num_diseases_treated']))
st.metric("Side Effects", int(drug1_data['num_side_effects']))
st.metric("Super Score", f"{drug1_data['super_score']:.2f}")
with col2:
st.subheader(f"{drug2}")
st.metric("Diseases Treated", int(drug2_data['num_diseases_treated']))
st.metric("Side Effects", int(drug2_data['num_side_effects']))
st.metric("Super Score", f"{drug2_data['super_score']:.2f}")
# Comparison chart
comparison_df = pd.DataFrame({
'Metric': ['Diseases Treated', 'Side Effects', 'Super Score'],
drug1: [drug1_data['num_diseases_treated'], drug1_data['num_side_effects'], drug1_data['super_score']],
drug2: [drug2_data['num_diseases_treated'], drug2_data['num_side_effects'], drug2_data['super_score']]
})
fig = px.bar(
comparison_df,
x='Metric',
y=[drug1, drug2],
barmode='group',
title='Side-by-Side Comparison'
)
config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config)
# Winner determination
st.markdown("---")
st.subheader("🏆 Recommendation")
if drug1_data['super_score'] > drug2_data['super_score']:
st.success(f"**{drug1}** has a better benefit/risk ratio (Super Score: {drug1_data['super_score']:.2f})")
elif drug2_data['super_score'] > drug1_data['super_score']:
st.success(f"**{drug2}** has a better benefit/risk ratio (Super Score: {drug2_data['super_score']:.2f})")
else:
st.info("Both drugs have the same Super Score")
except FileNotFoundError as e:
st.error(f"Could not find data files")
st.write("Please ensure you're running the dashboard from the correct directory with access to `neo4j_csv/` folder")
except Exception as e:
st.error(f"Error: {str(e)}")
st.exception(e)