import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go import networkx as nx from pathlib import Path st.set_page_config(page_title="Hetionet Dashboard", layout="wide") # Custom CSS st.markdown(""" """, unsafe_allow_html=True) st.title("Hetionet Drug Analysis Dashboard") # Sidebar st.sidebar.header("Global Search") search_type = st.sidebar.selectbox("Search for:", ["Gene", "Disease", "Drug"]) search_term = st.sidebar.text_input(f"Enter {search_type} name:") st.sidebar.markdown("---") st.sidebar.header("Navigation") page = st.sidebar.radio("Select Analysis:", [ "Overview", "Hotspot Genes", "Drug Repurposing", "Polypharmacy Risk", "Symptom Triangle", "Super Drugs", "Drug Conflicts", "Network Graph", "Compare Drugs" ]) # Load data try: data_dir = Path("neo4j_csv") with st.spinner("Loading data..."): genes = pd.read_csv(data_dir / "nodes_Gene.csv") diseases = pd.read_csv(data_dir / "nodes_Disease.csv") repurposing = pd.read_csv(data_dir / "analysis_drug_repurposing.csv") polypharmacy = pd.read_csv(data_dir / "analysis_polypharmacy_risk.csv") symptom_triangle = pd.read_csv(data_dir / "analysis_symptom_triangle.csv") super_drugs = pd.read_csv(data_dir / "analysis_super_drugs.csv") # Try loading new files try: drug_conflicts = pd.read_csv(data_dir / "analysis_drug_conflicts.csv") network_nodes = pd.read_csv(data_dir / "network_nodes.csv") network_edges = pd.read_csv(data_dir / "network_edges.csv") except: drug_conflicts = None network_nodes = None network_edges = None # Global Search Results if search_term: st.sidebar.markdown("---") st.sidebar.subheader("Search Results") if search_type == "Gene": results = genes[genes['name'].str.contains(search_term, case=False, na=False)] if len(results) > 0: st.sidebar.success(f"Found {len(results)} genes") for _, row in results.head(5).iterrows(): st.sidebar.write(f"**{row['name']}**: {row['num_diseases']} diseases") else: st.sidebar.warning("No genes found") elif search_type == "Disease": results = diseases[diseases['name'].str.contains(search_term, case=False, na=False)] if len(results) > 0: st.sidebar.success(f"Found {len(results)} diseases") for _, row in results.head(5).iterrows(): st.sidebar.write(f"**{row['name']}**: {row['num_symptoms']} symptoms") else: st.sidebar.warning("No diseases found") elif search_type == "Drug": results = super_drugs[super_drugs['name'].str.contains(search_term, case=False, na=False)] if len(results) > 0: st.sidebar.success(f"Found {len(results)} drugs") for _, row in results.head(5).iterrows(): st.sidebar.write(f"**{row['name']}**: Score {row['super_score']:.2f}") else: st.sidebar.warning("No drugs found") # overview page if page == "Overview": st.header("Dataset Overview") col1, col2, col3, col4 = st.columns(4) col1.metric("Total Genes", f"{len(genes):,}") col2.metric("Total Diseases", f"{len(diseases):,}") col3.metric("Repurposing Opportunities", f"{len(repurposing):,}") col4.metric("Analyzed Drugs", f"{len(super_drugs):,}") # statistics boxes st.markdown("---") st.subheader("Key Statistics") col1, col2, col3 = st.columns(3) with col1: avg_diseases_per_gene = genes[genes['num_diseases'] > 0]['num_diseases'].mean() st.markdown(f"""

avg. Diseases per Gene

{avg_diseases_per_gene:.1f}

For genes with disease associations

""", unsafe_allow_html=True) with col2: avg_symptoms_per_disease = diseases[diseases['num_symptoms'] > 0]['num_symptoms'].mean() st.markdown(f"""

avg. symptoms per disease

{avg_symptoms_per_disease:.1f}

For diseases with documented symptoms

""", unsafe_allow_html=True) with col3: avg_side_effects = polypharmacy['num_side_effects'].mean() st.markdown(f"""

avg. side effect per drug

{avg_side_effects:.1f}

Across all analyzed compounds

""", unsafe_allow_html=True) st.markdown("---") col1, col2 = st.columns(2) with col1: st.subheader("Top Genes by Disease Count") top_genes = genes.nlargest(10, 'num_diseases')[['name', 'num_diseases']] fig = px.bar(top_genes, x='name', y='num_diseases', color='num_diseases') fig.update_layout( showlegend=False, xaxis_title="Gene", yaxis_title="Number of Diseases" ) # enable chart export config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) csv = top_genes.to_csv(index=False).encode('utf-8') st.download_button("Download Data", csv, "top_genes.csv", "text/csv") with col2: st.subheader("Top Diseases by Symptom Count") top_diseases = diseases.nlargest(10, 'num_symptoms')[['name', 'num_symptoms']] fig = px.bar(top_diseases, x='name', y='num_symptoms', color='num_symptoms', color_continuous_scale='Reds') fig.update_layout( showlegend=False, xaxis_title="Disease", yaxis_title="Number of Symptoms" ) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) csv = top_diseases.to_csv(index=False).encode('utf-8') st.download_button("Download Data", csv, "top_diseases.csv", "text/csv") # hotspot gene page elif page == "Hotspot Genes": st.header("Hotspot Genes - Most Disease Associations") col1, col2 = st.columns([3, 1]) with col2: n_genes = st.slider("Number of genes:", 10, 50, 20) min_diseases = st.slider("Min diseases:", 0, 50, 0) with col1: filtered_genes = genes[genes['num_diseases'] >= min_diseases].nlargest(n_genes, 'num_diseases') fig = px.bar( filtered_genes, x='name', y='num_diseases', title=f'Top {n_genes} Genes (min {min_diseases} diseases)', color='num_diseases', color_continuous_scale='Viridis' ) fig.update_layout(height=600, showlegend=False) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) st.dataframe(filtered_genes[['name', 'num_diseases']].reset_index(drop=True), use_container_width=True) csv = filtered_genes.to_csv(index=False).encode('utf-8') st.download_button("Download Filtered Data", csv, "hotspot_genes.csv", "text/csv") # drug repurposing page elif page == "Drug Repurposing": st.header("Drug Repurposing Opportunities") col1, col2 = st.columns([2, 1]) with col1: top_n = st.slider("Show top N opportunities:", 10, 50, 20) top_repurpose = repurposing.nlargest(top_n, 'shared_genes') fig = px.scatter( top_repurpose, x='disease', y='candidate_drug', size='shared_genes', color='shared_genes', title='Drug Repurposing Candidates', color_continuous_scale='Reds', height=600 ) fig.update_xaxes(tickangle=-45) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) with col2: st.subheader("Filter by Disease") diseases_list = sorted(repurposing['disease'].unique()) selected = st.selectbox("Select disease:", diseases_list) filtered = repurposing[repurposing['disease'] == selected].nlargest(10, 'shared_genes') st.dataframe(filtered[['candidate_drug', 'shared_genes']].reset_index(drop=True), height=400) csv = filtered.to_csv(index=False).encode('utf-8') st.download_button("Download", csv, f"repurposing_{selected}.csv", "text/csv") # polypharmacy risk page elif page == "Polypharmacy Risk": st.header("Polypharmacy Risk Analysis") col1, col2 = st.columns([3, 1]) with col2: top_n = st.slider("Number of drugs:", 10, 30, 20) min_risk = st.slider("Min risk score:", 0, 100, 0) with col1: filtered_risk = polypharmacy[polypharmacy['risk_score'] >= min_risk].nlargest(top_n, 'num_side_effects') fig = px.scatter( filtered_risk, x='num_diseases_treated', y='num_side_effects', size='risk_score', color='risk_score', hover_data=['name'], title='Drugs: Side Effects vs Diseases Treated', color_continuous_scale='Reds' ) fig.update_layout(height=600) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) st.dataframe( filtered_risk[['name', 'num_diseases_treated', 'num_side_effects', 'risk_score']].reset_index(drop=True), use_container_width=True ) csv = filtered_risk.to_csv(index=False).encode('utf-8') st.download_button("Download Risk Data", csv, "polypharmacy_risk.csv", "text/csv") # symptop triangle page elif page == "Symptom Triangle": st.header("Symptom-Disease-Drug Connections") top_n = st.slider("Number of symptoms:", 10, 30, 20) top_symptoms = symptom_triangle.nlargest(top_n, 'impact_score') fig = px.scatter( top_symptoms, x='num_diseases', y='num_treating_drugs', size='impact_score', color='drugs_with_side_effects', hover_data=['symptom'], title='Symptom Impact Analysis', color_continuous_scale='RdYlGn_r' ) fig.update_layout(height=600) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) st.dataframe( top_symptoms[['symptom', 'num_diseases', 'num_treating_drugs', 'drugs_with_side_effects', 'impact_score']].reset_index(drop=True), use_container_width=True ) csv = top_symptoms.to_csv(index=False).encode('utf-8') st.download_button("Download Symptom Data", csv, "symptom_triangle.csv", "text/csv") # super drugs page elif page == "Super Drugs": st.header("Super-Drug Score (Best Benefit/Risk Ratio)") col1, col2 = st.columns([3, 1]) with col2: top_n = st.slider("Number of drugs:", 10, 30, 20) min_score = st.slider("Min super score:", 0.0, 5.0, 0.0, 0.1) with col1: filtered_super = super_drugs[super_drugs['super_score'] >= min_score].nlargest(top_n, 'super_score') fig = px.scatter( filtered_super, x='num_diseases_treated', y='num_side_effects', size='super_score', color='super_score', hover_data=['name'], title='Super Drugs Analysis', color_continuous_scale='Viridis_r' ) fig.update_layout(height=600) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) st.dataframe( filtered_super[['name', 'num_diseases_treated', 'num_side_effects', 'super_score']].reset_index(drop=True), use_container_width=True ) perfect = super_drugs[(super_drugs['num_side_effects'] == 0) & (super_drugs['num_diseases_treated'] > 0)] st.info(f"Found {len(perfect)} drugs with ZERO documented side effects!") csv = filtered_super.to_csv(index=False).encode('utf-8') st.download_button("Download Super Drugs", csv, "super_drugs.csv", "text/csv") # drug conflicts page elif page == "Drug Conflicts": st.header("Drug Conflicts - Overlapping Side Effects") if drug_conflicts is not None and len(drug_conflicts) > 0: col1, col2 = st.columns([3, 1]) with col2: top_n = st.slider("Number of conflicts:", 10, 50, 20) min_overlap = st.slider("Min shared side effects:", 0, 100, 10) with col1: filtered_conflicts = drug_conflicts[ drug_conflicts['shared_side_effects'] >= min_overlap ].nlargest(top_n, 'shared_side_effects') fig = px.scatter( filtered_conflicts, x='drug1_total_se', y='drug2_total_se', size='shared_side_effects', color='overlap_percentage', hover_data=['drug1', 'drug2', 'shared_side_effects'], title='Drug Pairs with Overlapping Side Effects', labels={'drug1_total_se': 'Drug 1 Total SE', 'drug2_total_se': 'Drug 2 Total SE'}, color_continuous_scale='Reds' ) fig.update_layout(height=600) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) st.warning("These drug combinations may have compounded side effects!") st.dataframe( filtered_conflicts[['drug1', 'drug2', 'shared_side_effects', 'overlap_percentage']].reset_index(drop=True), use_container_width=True ) csv = filtered_conflicts.to_csv(index=False).encode('utf-8') st.download_button("Download Conflicts", csv, "drug_conflicts.csv", "text/csv") else: st.warning("Drug conflicts data not available. Run the ETL script to generate this analysis.") # network graph page elif page == "Network Graph": st.header("Disease-Gene-Drug Network") if network_nodes is not None and network_edges is not None: st.info("Interactive network visualization showing connections between diseases, genes, and drugs") # create networkx graph G = nx.Graph() # add nodes for _, row in network_nodes.iterrows(): G.add_node(row['id'], label=row['label'], type=row['type']) # add edges for _, row in network_edges.iterrows(): G.add_edge(row['source'], row['target']) # create layout pos = nx.spring_layout(G, k=0.5, iterations=50) # create edge trace edge_x = [] edge_y = [] for edge in G.edges(): x0, y0 = pos[edge[0]] x1, y1 = pos[edge[1]] edge_x.extend([x0, x1, None]) edge_y.extend([y0, y1, None]) edge_trace = go.Scatter( x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines' ) # create node traces (separate by type for legend) node_traces = [] color_map = { 'Disease': '#ff4444', 'Gene': '#4444ff', 'Compound': '#44ff44' } for node_type, color in color_map.items(): node_x = [] node_y = [] node_text = [] for node in G.nodes(): if G.nodes[node]['type'] == node_type: x, y = pos[node] node_x.append(x) node_y.append(y) node_text.append(f"{node_type}: {G.nodes[node]['label']}") if node_x: node_trace = go.Scatter( x=node_x, y=node_y, mode='markers', name=node_type, hoverinfo='text', text=node_text, marker=dict( color=color, size=12, line=dict(width=2, color='white') ) ) node_traces.append(node_trace) # create figure fig = go.Figure(data=[edge_trace] + node_traces, layout=go.Layout( title='Disease-Gene-Drug Network', showlegend=True, hovermode='closest', margin=dict(b=0,l=0,r=0,t=40), xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), height=700 )) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) col1, col2, col3 = st.columns(3) col1.metric("🔴 Diseases", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Disease'])) col2.metric("🔵 Genes", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Gene'])) col3.metric("🟢 Drugs", len([n for n in G.nodes() if G.nodes[n]['type'] == 'Compound'])) else: st.warning("Network data not available. Run the ETL script to generate this visualization.") # compare drugs page elif page == "Compare Drugs": st.header("⚖️ Compare Drugs Side-by-Side") drug_names = sorted(super_drugs['name'].unique()) col1, col2 = st.columns(2) with col1: drug1 = st.selectbox("Select Drug 1:", drug_names, key='drug1') with col2: drug2 = st.selectbox("Select Drug 2:", drug_names, key='drug2') if drug1 and drug2: drug1_data = super_drugs[super_drugs['name'] == drug1].iloc[0] drug2_data = super_drugs[super_drugs['name'] == drug2].iloc[0] st.markdown("---") col1, col2 = st.columns(2) with col1: st.subheader(f"{drug1}") st.metric("Diseases Treated", int(drug1_data['num_diseases_treated'])) st.metric("Side Effects", int(drug1_data['num_side_effects'])) st.metric("Super Score", f"{drug1_data['super_score']:.2f}") with col2: st.subheader(f"{drug2}") st.metric("Diseases Treated", int(drug2_data['num_diseases_treated'])) st.metric("Side Effects", int(drug2_data['num_side_effects'])) st.metric("Super Score", f"{drug2_data['super_score']:.2f}") # comparison chart comparison_df = pd.DataFrame({ 'Metric': ['Diseases Treated', 'Side Effects', 'Super Score'], drug1: [drug1_data['num_diseases_treated'], drug1_data['num_side_effects'], drug1_data['super_score']], drug2: [drug2_data['num_diseases_treated'], drug2_data['num_side_effects'], drug2_data['super_score']] }) fig = px.bar( comparison_df, x='Metric', y=[drug1, drug2], barmode='group', title='Side-by-Side Comparison' ) config = {'displayModeBar': True, 'displaylogo': False} st.plotly_chart(fig, use_container_width=True, config=config) # winner determination st.markdown("---") st.subheader("Recommendation") if drug1_data['super_score'] > drug2_data['super_score']: st.success(f"**{drug1}** has a better benefit/risk ratio (Super Score: {drug1_data['super_score']:.2f})") elif drug2_data['super_score'] > drug1_data['super_score']: st.success(f"**{drug2}** has a better benefit/risk ratio (Super Score: {drug2_data['super_score']:.2f})") else: st.info("Both drugs have the same Super Score") except FileNotFoundError as e: st.error(f"Could not find data files") st.write("Please ensure you're running the dashboard from the correct directory with access to `neo4j_csv/` folder") except Exception as e: st.error(f"Error: {str(e)}") st.exception(e)