559 lines
22 KiB
Python
559 lines
22 KiB
Python
import streamlit as st
|
|
import pandas as pd
|
|
import plotly.express as px
|
|
import plotly.graph_objects as go
|
|
import networkx as nx
|
|
from pathlib import Path
|
|
|
|
st.set_page_config(page_title="Hetionet Dashboard", layout="wide")
|
|
|
|
# Custom CSS
|
|
st.markdown("""
|
|
<style>
|
|
.stTabs [data-baseweb="tab-list"] {
|
|
gap: 24px;
|
|
}
|
|
.stTabs [data-baseweb="tab"] {
|
|
height: 50px;
|
|
padding-left: 20px;
|
|
padding-right: 20px;
|
|
}
|
|
.stat-box {
|
|
background: grey;
|
|
padding: 20px;
|
|
border-radius: 10px;
|
|
color: white;
|
|
text-align: center;
|
|
margin: 10px 0;
|
|
}
|
|
</style>
|
|
""", unsafe_allow_html=True)
|
|
|
|
st.title("Hetionet Drug Analysis Dashboard")
|
|
|
|
# Sidebar
|
|
st.sidebar.header("Global Search")
|
|
search_type = st.sidebar.selectbox("Search for:", ["Gene", "Disease", "Drug"])
|
|
search_term = st.sidebar.text_input(f"Enter {search_type} name:")
|
|
|
|
st.sidebar.markdown("---")
|
|
st.sidebar.header("Navigation")
|
|
page = st.sidebar.radio("Select Analysis:", [
|
|
"Overview",
|
|
"Hotspot Genes",
|
|
"Drug Repurposing",
|
|
"Polypharmacy Risk",
|
|
"Symptom Triangle",
|
|
"Super Drugs",
|
|
"Drug Conflicts",
|
|
"Network Graph",
|
|
"Compare Drugs"
|
|
])
|
|
|
|
# Load data
|
|
try:
|
|
data_dir = Path("neo4j_csv")
|
|
|
|
with st.spinner("Loading data..."):
|
|
genes = pd.read_csv(data_dir / "nodes_Gene.csv")
|
|
diseases = pd.read_csv(data_dir / "nodes_Disease.csv")
|
|
repurposing = pd.read_csv(data_dir / "analysis_drug_repurposing.csv")
|
|
polypharmacy = pd.read_csv(data_dir / "analysis_polypharmacy_risk.csv")
|
|
symptom_triangle = pd.read_csv(data_dir / "analysis_symptom_triangle.csv")
|
|
super_drugs = pd.read_csv(data_dir / "analysis_super_drugs.csv")
|
|
|
|
# Try loading new files
|
|
try:
|
|
drug_conflicts = pd.read_csv(data_dir / "analysis_drug_conflicts.csv")
|
|
network_nodes = pd.read_csv(data_dir / "network_nodes.csv")
|
|
network_edges = pd.read_csv(data_dir / "network_edges.csv")
|
|
except:
|
|
drug_conflicts = None
|
|
network_nodes = None
|
|
network_edges = None
|
|
|
|
# Global Search Results
|
|
if search_term:
|
|
st.sidebar.markdown("---")
|
|
st.sidebar.subheader("Search Results")
|
|
|
|
if search_type == "Gene":
|
|
results = genes[genes['name'].str.contains(search_term, case=False, na=False)]
|
|
if len(results) > 0:
|
|
st.sidebar.success(f"Found {len(results)} genes")
|
|
for _, row in results.head(5).iterrows():
|
|
st.sidebar.write(f"**{row['name']}**: {row['num_diseases']} diseases")
|
|
else:
|
|
st.sidebar.warning("No genes found")
|
|
|
|
elif search_type == "Disease":
|
|
results = diseases[diseases['name'].str.contains(search_term, case=False, na=False)]
|
|
if len(results) > 0:
|
|
st.sidebar.success(f"Found {len(results)} diseases")
|
|
for _, row in results.head(5).iterrows():
|
|
st.sidebar.write(f"**{row['name']}**: {row['num_symptoms']} symptoms")
|
|
else:
|
|
st.sidebar.warning("No diseases found")
|
|
|
|
elif search_type == "Drug":
|
|
results = super_drugs[super_drugs['name'].str.contains(search_term, case=False, na=False)]
|
|
if len(results) > 0:
|
|
st.sidebar.success(f"Found {len(results)} drugs")
|
|
for _, row in results.head(5).iterrows():
|
|
st.sidebar.write(f"**{row['name']}**: Score {row['super_score']:.2f}")
|
|
else:
|
|
st.sidebar.warning("No drugs found")
|
|
|
|
# OVERVIEW PAGE
|
|
if page == "Overview":
|
|
st.header("Dataset Overview")
|
|
|
|
col1, col2, col3, col4 = st.columns(4)
|
|
col1.metric("Total Genes", f"{len(genes):,}")
|
|
col2.metric("Total Diseases", f"{len(diseases):,}")
|
|
col3.metric("Repurposing Opportunities", f"{len(repurposing):,}")
|
|
col4.metric("Analyzed Drugs", f"{len(super_drugs):,}")
|
|
|
|
# STATISTICS BOXES
|
|
st.markdown("---")
|
|
st.subheader("Key Statistics")
|
|
|
|
col1, col2, col3 = st.columns(3)
|
|
|
|
with col1:
|
|
avg_diseases_per_gene = genes[genes['num_diseases'] > 0]['num_diseases'].mean()
|
|
st.markdown(f"""
|
|
<div class="stat-box">
|
|
<h3>avg. Diseases per Gene</h3>
|
|
<h1>{avg_diseases_per_gene:.1f}</h1>
|
|
<p>For genes with disease associations</p>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
with col2:
|
|
avg_symptoms_per_disease = diseases[diseases['num_symptoms'] > 0]['num_symptoms'].mean()
|
|
st.markdown(f"""
|
|
<div class="stat-box">
|
|
<h3>avg. symptoms per disease</h3>
|
|
<h1>{avg_symptoms_per_disease:.1f}</h1>
|
|
<p>For diseases with documented symptoms</p>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
with col3:
|
|
avg_side_effects = polypharmacy['num_side_effects'].mean()
|
|
st.markdown(f"""
|
|
<div class="stat-box">
|
|
<h3>avg. side effect per drug</h3>
|
|
<h1>{avg_side_effects:.1f}</h1>
|
|
<p>Across all analyzed compounds</p>
|
|
</div>
|
|
""", unsafe_allow_html=True)
|
|
|
|
st.markdown("---")
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
|
with col1:
|
|
st.subheader("Top Genes by Disease Count")
|
|
top_genes = genes.nlargest(10, 'num_diseases')[['name', 'num_diseases']]
|
|
fig = px.bar(top_genes, x='name', y='num_diseases', color='num_diseases')
|
|
fig.update_layout(
|
|
showlegend=False,
|
|
xaxis_title="Gene",
|
|
yaxis_title="Number of Diseases"
|
|
)
|
|
# Enable chart export
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
csv = top_genes.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download Data", csv, "top_genes.csv", "text/csv")
|
|
|
|
with col2:
|
|
st.subheader("Top Diseases by Symptom Count")
|
|
top_diseases = diseases.nlargest(10, 'num_symptoms')[['name', 'num_symptoms']]
|
|
fig = px.bar(top_diseases, x='name', y='num_symptoms', color='num_symptoms', color_continuous_scale='Reds')
|
|
fig.update_layout(
|
|
showlegend=False,
|
|
xaxis_title="Disease",
|
|
yaxis_title="Number of Symptoms"
|
|
)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
csv = top_diseases.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download Data", csv, "top_diseases.csv", "text/csv")
|
|
|
|
# HOTSPOT GENES PAGE
|
|
elif page == "Hotspot Genes":
|
|
st.header("Hotspot Genes - Most Disease Associations")
|
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
|
with col2:
|
|
n_genes = st.slider("Number of genes:", 10, 50, 20)
|
|
min_diseases = st.slider("Min diseases:", 0, 50, 0)
|
|
|
|
with col1:
|
|
filtered_genes = genes[genes['num_diseases'] >= min_diseases].nlargest(n_genes, 'num_diseases')
|
|
|
|
fig = px.bar(
|
|
filtered_genes,
|
|
x='name',
|
|
y='num_diseases',
|
|
title=f'Top {n_genes} Genes (min {min_diseases} diseases)',
|
|
color='num_diseases',
|
|
color_continuous_scale='Viridis'
|
|
)
|
|
fig.update_layout(height=600, showlegend=False)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
st.dataframe(filtered_genes[['name', 'num_diseases']].reset_index(drop=True), use_container_width=True)
|
|
|
|
csv = filtered_genes.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download Filtered Data", csv, "hotspot_genes.csv", "text/csv")
|
|
|
|
# DRUG REPURPOSING PAGE
|
|
elif page == "Drug Repurposing":
|
|
st.header("Drug Repurposing Opportunities")
|
|
|
|
col1, col2 = st.columns([2, 1])
|
|
|
|
with col1:
|
|
top_n = st.slider("Show top N opportunities:", 10, 50, 20)
|
|
top_repurpose = repurposing.nlargest(top_n, 'shared_genes')
|
|
|
|
fig = px.scatter(
|
|
top_repurpose,
|
|
x='disease',
|
|
y='candidate_drug',
|
|
size='shared_genes',
|
|
color='shared_genes',
|
|
title='Drug Repurposing Candidates',
|
|
color_continuous_scale='Reds',
|
|
height=600
|
|
)
|
|
fig.update_xaxes(tickangle=-45)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
with col2:
|
|
st.subheader("Filter by Disease")
|
|
diseases_list = sorted(repurposing['disease'].unique())
|
|
selected = st.selectbox("Select disease:", diseases_list)
|
|
|
|
filtered = repurposing[repurposing['disease'] == selected].nlargest(10, 'shared_genes')
|
|
st.dataframe(filtered[['candidate_drug', 'shared_genes']].reset_index(drop=True), height=400)
|
|
|
|
csv = filtered.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download", csv, f"repurposing_{selected}.csv", "text/csv")
|
|
|
|
# POLYPHARMACY RISK PAGE
|
|
elif page == "Polypharmacy Risk":
|
|
st.header("Polypharmacy Risk Analysis")
|
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
|
with col2:
|
|
top_n = st.slider("Number of drugs:", 10, 30, 20)
|
|
min_risk = st.slider("Min risk score:", 0, 100, 0)
|
|
|
|
with col1:
|
|
filtered_risk = polypharmacy[polypharmacy['risk_score'] >= min_risk].nlargest(top_n, 'num_side_effects')
|
|
|
|
fig = px.scatter(
|
|
filtered_risk,
|
|
x='num_diseases_treated',
|
|
y='num_side_effects',
|
|
size='risk_score',
|
|
color='risk_score',
|
|
hover_data=['name'],
|
|
title='Drugs: Side Effects vs Diseases Treated',
|
|
color_continuous_scale='Reds'
|
|
)
|
|
fig.update_layout(height=600)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
st.dataframe(
|
|
filtered_risk[['name', 'num_diseases_treated', 'num_side_effects', 'risk_score']].reset_index(drop=True),
|
|
use_container_width=True
|
|
)
|
|
|
|
csv = filtered_risk.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download Risk Data", csv, "polypharmacy_risk.csv", "text/csv")
|
|
|
|
# SYMPTOM TRIANGLE PAGE
|
|
elif page == "Symptom Triangle":
|
|
st.header("Symptom-Disease-Drug Connections")
|
|
|
|
top_n = st.slider("Number of symptoms:", 10, 30, 20)
|
|
top_symptoms = symptom_triangle.nlargest(top_n, 'impact_score')
|
|
|
|
fig = px.scatter(
|
|
top_symptoms,
|
|
x='num_diseases',
|
|
y='num_treating_drugs',
|
|
size='impact_score',
|
|
color='drugs_with_side_effects',
|
|
hover_data=['symptom'],
|
|
title='Symptom Impact Analysis',
|
|
color_continuous_scale='RdYlGn_r'
|
|
)
|
|
fig.update_layout(height=600)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
st.dataframe(
|
|
top_symptoms[['symptom', 'num_diseases', 'num_treating_drugs', 'drugs_with_side_effects', 'impact_score']].reset_index(drop=True),
|
|
use_container_width=True
|
|
)
|
|
|
|
csv = top_symptoms.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download Symptom Data", csv, "symptom_triangle.csv", "text/csv")
|
|
|
|
# SUPER DRUGS PAGE
|
|
elif page == "Super Drugs":
|
|
st.header("Super-Drug Score (Best Benefit/Risk Ratio)")
|
|
|
|
col1, col2 = st.columns([3, 1])
|
|
|
|
with col2:
|
|
top_n = st.slider("Number of drugs:", 10, 30, 20)
|
|
min_score = st.slider("Min super score:", 0.0, 5.0, 0.0, 0.1)
|
|
|
|
with col1:
|
|
filtered_super = super_drugs[super_drugs['super_score'] >= min_score].nlargest(top_n, 'super_score')
|
|
|
|
fig = px.scatter(
|
|
filtered_super,
|
|
x='num_diseases_treated',
|
|
y='num_side_effects',
|
|
size='super_score',
|
|
color='super_score',
|
|
hover_data=['name'],
|
|
title='Super Drugs Analysis',
|
|
color_continuous_scale='Viridis_r'
|
|
)
|
|
fig.update_layout(height=600)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
st.dataframe(
|
|
filtered_super[['name', 'num_diseases_treated', 'num_side_effects', 'super_score']].reset_index(drop=True),
|
|
use_container_width=True
|
|
)
|
|
|
|
perfect = super_drugs[(super_drugs['num_side_effects'] == 0) & (super_drugs['num_diseases_treated'] > 0)]
|
|
st.info(f"Found {len(perfect)} drugs with ZERO documented side effects!")
|
|
|
|
csv = filtered_super.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download Super Drugs", csv, "super_drugs.csv", "text/csv")
|
|
|
|
# DRUG CONFLICTS PAGE
|
|
elif page == "Drug Conflicts":
|
|
st.header("Drug Conflicts - Overlapping Side Effects")
|
|
|
|
if drug_conflicts is not None and len(drug_conflicts) > 0:
|
|
col1, col2 = st.columns([3, 1])
|
|
|
|
with col2:
|
|
top_n = st.slider("Number of conflicts:", 10, 50, 20)
|
|
min_overlap = st.slider("Min shared side effects:", 0, 100, 10)
|
|
|
|
with col1:
|
|
filtered_conflicts = drug_conflicts[
|
|
drug_conflicts['shared_side_effects'] >= min_overlap
|
|
].nlargest(top_n, 'shared_side_effects')
|
|
|
|
fig = px.scatter(
|
|
filtered_conflicts,
|
|
x='drug1_total_se',
|
|
y='drug2_total_se',
|
|
size='shared_side_effects',
|
|
color='overlap_percentage',
|
|
hover_data=['drug1', 'drug2', 'shared_side_effects'],
|
|
title='Drug Pairs with Overlapping Side Effects',
|
|
labels={'drug1_total_se': 'Drug 1 Total SE', 'drug2_total_se': 'Drug 2 Total SE'},
|
|
color_continuous_scale='Reds'
|
|
)
|
|
fig.update_layout(height=600)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
st.warning("These drug combinations may have compounded side effects!")
|
|
st.dataframe(
|
|
filtered_conflicts[['drug1', 'drug2', 'shared_side_effects', 'overlap_percentage']].reset_index(drop=True),
|
|
use_container_width=True
|
|
)
|
|
|
|
csv = filtered_conflicts.to_csv(index=False).encode('utf-8')
|
|
st.download_button("Download Conflicts", csv, "drug_conflicts.csv", "text/csv")
|
|
else:
|
|
st.warning("Drug conflicts data not available. Run the ETL script to generate this analysis.")
|
|
|
|
# NETWORK GRAPH PAGE
|
|
elif page == "Network Graph":
|
|
st.header("Disease-Gene-Drug Network")
|
|
|
|
if network_nodes is not None and network_edges is not None:
|
|
st.info("Interactive network visualization showing connections between diseases, genes, and drugs")
|
|
|
|
# Create networkx graph
|
|
G = nx.Graph()
|
|
|
|
# Add nodes
|
|
for _, row in network_nodes.iterrows():
|
|
G.add_node(row['id'], label=row['label'], type=row['type'])
|
|
|
|
# Add edges
|
|
for _, row in network_edges.iterrows():
|
|
G.add_edge(row['source'], row['target'])
|
|
|
|
# Create layout
|
|
pos = nx.spring_layout(G, k=0.5, iterations=50)
|
|
|
|
# Create edge trace
|
|
edge_x = []
|
|
edge_y = []
|
|
for edge in G.edges():
|
|
x0, y0 = pos[edge[0]]
|
|
x1, y1 = pos[edge[1]]
|
|
edge_x.extend([x0, x1, None])
|
|
edge_y.extend([y0, y1, None])
|
|
|
|
edge_trace = go.Scatter(
|
|
x=edge_x, y=edge_y,
|
|
line=dict(width=0.5, color='#888'),
|
|
hoverinfo='none',
|
|
mode='lines'
|
|
)
|
|
|
|
# Create node traces (separate by type for legend)
|
|
node_traces = []
|
|
color_map = {
|
|
'Disease': '#ff4444',
|
|
'Gene': '#4444ff',
|
|
'Compound': '#44ff44'
|
|
}
|
|
|
|
for node_type, color in color_map.items():
|
|
node_x = []
|
|
node_y = []
|
|
node_text = []
|
|
|
|
for node in G.nodes():
|
|
if G.nodes[node]['type'] == node_type:
|
|
x, y = pos[node]
|
|
node_x.append(x)
|
|
node_y.append(y)
|
|
node_text.append(f"{node_type}: {G.nodes[node]['label']}")
|
|
|
|
if node_x:
|
|
node_trace = go.Scatter(
|
|
x=node_x, y=node_y,
|
|
mode='markers',
|
|
name=node_type,
|
|
hoverinfo='text',
|
|
text=node_text,
|
|
marker=dict(
|
|
color=color,
|
|
size=12,
|
|
line=dict(width=2, color='white')
|
|
)
|
|
)
|
|
node_traces.append(node_trace)
|
|
|
|
# Create figure
|
|
fig = go.Figure(data=[edge_trace] + node_traces,
|
|
layout=go.Layout(
|
|
title='Disease-Gene-Drug Network',
|
|
showlegend=True,
|
|
hovermode='closest',
|
|
margin=dict(b=0,l=0,r=0,t=40),
|
|
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
|
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
|
height=700
|
|
))
|
|
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
col1, col2, col3 = st.columns(3)
|
|
col1.metric("🔴 Diseases", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Disease']))
|
|
col2.metric("🔵 Genes", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Gene']))
|
|
col3.metric("🟢 Drugs", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Compound']))
|
|
else:
|
|
st.warning("Network data not available. Run the ETL script to generate this visualization.")
|
|
|
|
# COMPARE DRUGS PAGE
|
|
elif page == "Compare Drugs":
|
|
st.header("⚖️ Compare Drugs Side-by-Side")
|
|
|
|
drug_names = sorted(super_drugs['name'].unique())
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
|
with col1:
|
|
drug1 = st.selectbox("Select Drug 1:", drug_names, key='drug1')
|
|
|
|
with col2:
|
|
drug2 = st.selectbox("Select Drug 2:", drug_names, key='drug2')
|
|
|
|
if drug1 and drug2:
|
|
drug1_data = super_drugs[super_drugs['name'] == drug1].iloc[0]
|
|
drug2_data = super_drugs[super_drugs['name'] == drug2].iloc[0]
|
|
|
|
st.markdown("---")
|
|
|
|
col1, col2 = st.columns(2)
|
|
|
|
with col1:
|
|
st.subheader(f"{drug1}")
|
|
st.metric("Diseases Treated", int(drug1_data['num_diseases_treated']))
|
|
st.metric("Side Effects", int(drug1_data['num_side_effects']))
|
|
st.metric("Super Score", f"{drug1_data['super_score']:.2f}")
|
|
|
|
with col2:
|
|
st.subheader(f"{drug2}")
|
|
st.metric("Diseases Treated", int(drug2_data['num_diseases_treated']))
|
|
st.metric("Side Effects", int(drug2_data['num_side_effects']))
|
|
st.metric("Super Score", f"{drug2_data['super_score']:.2f}")
|
|
|
|
# Comparison chart
|
|
comparison_df = pd.DataFrame({
|
|
'Metric': ['Diseases Treated', 'Side Effects', 'Super Score'],
|
|
drug1: [drug1_data['num_diseases_treated'], drug1_data['num_side_effects'], drug1_data['super_score']],
|
|
drug2: [drug2_data['num_diseases_treated'], drug2_data['num_side_effects'], drug2_data['super_score']]
|
|
})
|
|
|
|
fig = px.bar(
|
|
comparison_df,
|
|
x='Metric',
|
|
y=[drug1, drug2],
|
|
barmode='group',
|
|
title='Side-by-Side Comparison'
|
|
)
|
|
config = {'displayModeBar': True, 'displaylogo': False}
|
|
st.plotly_chart(fig, use_container_width=True, config=config)
|
|
|
|
# Winner determination
|
|
st.markdown("---")
|
|
st.subheader("Recommendation")
|
|
|
|
if drug1_data['super_score'] > drug2_data['super_score']:
|
|
st.success(f"**{drug1}** has a better benefit/risk ratio (Super Score: {drug1_data['super_score']:.2f})")
|
|
elif drug2_data['super_score'] > drug1_data['super_score']:
|
|
st.success(f"**{drug2}** has a better benefit/risk ratio (Super Score: {drug2_data['super_score']:.2f})")
|
|
else:
|
|
st.info("Both drugs have the same Super Score")
|
|
|
|
except FileNotFoundError as e:
|
|
st.error(f"Could not find data files")
|
|
st.write("Please ensure you're running the dashboard from the correct directory with access to `neo4j_csv/` folder")
|
|
|
|
except Exception as e:
|
|
st.error(f"Error: {str(e)}")
|
|
st.exception(e) |