fix comments etc.

This commit is contained in:
Philipp Jacoby
2026-02-10 17:57:43 +01:00
parent 3003310be0
commit 8965b04a61
5 changed files with 59 additions and 60 deletions

View File

@@ -473,19 +473,20 @@ Follow these steps exactly in the order provided:
python neo4j_etl.py python neo4j_etl.py
``` ```
**Eingabe:** Das Skript wird Sie nacheinander nach Ihrem **Datenbank-Usernamen** (Standard: `neo4j`) und Ihrem **Passwort** fragen. **Input:** The script will ask you for your **database-username** (default: `neo4j`) and your **password**.
**Verarbeitung:** Das Skript liest automatisch alle Abfragen aus dem Verzeichnis `neo4jqueries/analysis_queries` aus. **Processing:** The script automatically reads and executes cypher queries in the following directory `neo4jqueries/analysis_queries`.
**Ausgabe:** Die Ergebnisse der Analyse-Queries werden direkt in der Konsole ausgegeben. **Output:** Results of the analysis will be displayed on the terminal.
--- ---
### Projektstruktur ### Structure
| Verzeichnis / Datei | Funktion | | Directory / file | Functionality |
| :---------------------------------- | :--------------------------------------------------------- | | :---------------------------------- | :----------------------------------------------------- |
| `neo4j_etl.py` | Das Python-Skript zur Ausführung der Analyse-Queries. | | `neo4j_etl.py` | Python-Script for execting analysis queries. |
| `neo4jqueries/loadingQueriesNeo4j/` | Enthält alle Cypher-Dateien für den initialen Datenimport. | | `neo4jqueries/loadingQueriesNeo4j/` | Contains all Cypher files for the initial data import. |
| `neo4jqueries/analysis_queries/` | Enthält Cypher-Dateien für die statistische Auswertung. | | `neo4jqueries/analysis_queries/` | Includes Cypher files for analysis. |
| | |
--- ---

View File

@@ -104,7 +104,7 @@ try:
else: else:
st.sidebar.warning("No drugs found") st.sidebar.warning("No drugs found")
# OVERVIEW PAGE # overview page
if page == "Overview": if page == "Overview":
st.header("Dataset Overview") st.header("Dataset Overview")
@@ -114,7 +114,7 @@ try:
col3.metric("Repurposing Opportunities", f"{len(repurposing):,}") col3.metric("Repurposing Opportunities", f"{len(repurposing):,}")
col4.metric("Analyzed Drugs", f"{len(super_drugs):,}") col4.metric("Analyzed Drugs", f"{len(super_drugs):,}")
# STATISTICS BOXES # statistics boxes
st.markdown("---") st.markdown("---")
st.subheader("Key Statistics") st.subheader("Key Statistics")
@@ -163,7 +163,7 @@ try:
xaxis_title="Gene", xaxis_title="Gene",
yaxis_title="Number of Diseases" yaxis_title="Number of Diseases"
) )
# Enable chart export # enable chart export
config = {'displayModeBar': True, 'displaylogo': False} config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config) st.plotly_chart(fig, use_container_width=True, config=config)
@@ -185,7 +185,7 @@ try:
csv = top_diseases.to_csv(index=False).encode('utf-8') csv = top_diseases.to_csv(index=False).encode('utf-8')
st.download_button("Download Data", csv, "top_diseases.csv", "text/csv") st.download_button("Download Data", csv, "top_diseases.csv", "text/csv")
# HOTSPOT GENES PAGE # hotspot gene page
elif page == "Hotspot Genes": elif page == "Hotspot Genes":
st.header("Hotspot Genes - Most Disease Associations") st.header("Hotspot Genes - Most Disease Associations")
@@ -215,7 +215,7 @@ try:
csv = filtered_genes.to_csv(index=False).encode('utf-8') csv = filtered_genes.to_csv(index=False).encode('utf-8')
st.download_button("Download Filtered Data", csv, "hotspot_genes.csv", "text/csv") st.download_button("Download Filtered Data", csv, "hotspot_genes.csv", "text/csv")
# DRUG REPURPOSING PAGE # drug repurposing page
elif page == "Drug Repurposing": elif page == "Drug Repurposing":
st.header("Drug Repurposing Opportunities") st.header("Drug Repurposing Opportunities")
@@ -250,7 +250,7 @@ try:
csv = filtered.to_csv(index=False).encode('utf-8') csv = filtered.to_csv(index=False).encode('utf-8')
st.download_button("Download", csv, f"repurposing_{selected}.csv", "text/csv") st.download_button("Download", csv, f"repurposing_{selected}.csv", "text/csv")
# POLYPHARMACY RISK PAGE # polypharmacy risk page
elif page == "Polypharmacy Risk": elif page == "Polypharmacy Risk":
st.header("Polypharmacy Risk Analysis") st.header("Polypharmacy Risk Analysis")
@@ -285,7 +285,7 @@ try:
csv = filtered_risk.to_csv(index=False).encode('utf-8') csv = filtered_risk.to_csv(index=False).encode('utf-8')
st.download_button("Download Risk Data", csv, "polypharmacy_risk.csv", "text/csv") st.download_button("Download Risk Data", csv, "polypharmacy_risk.csv", "text/csv")
# SYMPTOM TRIANGLE PAGE # symptop triangle page
elif page == "Symptom Triangle": elif page == "Symptom Triangle":
st.header("Symptom-Disease-Drug Connections") st.header("Symptom-Disease-Drug Connections")
@@ -314,7 +314,7 @@ try:
csv = top_symptoms.to_csv(index=False).encode('utf-8') csv = top_symptoms.to_csv(index=False).encode('utf-8')
st.download_button("Download Symptom Data", csv, "symptom_triangle.csv", "text/csv") st.download_button("Download Symptom Data", csv, "symptom_triangle.csv", "text/csv")
# SUPER DRUGS PAGE # super drugs page
elif page == "Super Drugs": elif page == "Super Drugs":
st.header("Super-Drug Score (Best Benefit/Risk Ratio)") st.header("Super-Drug Score (Best Benefit/Risk Ratio)")
@@ -352,7 +352,7 @@ try:
csv = filtered_super.to_csv(index=False).encode('utf-8') csv = filtered_super.to_csv(index=False).encode('utf-8')
st.download_button("Download Super Drugs", csv, "super_drugs.csv", "text/csv") st.download_button("Download Super Drugs", csv, "super_drugs.csv", "text/csv")
# DRUG CONFLICTS PAGE # drug conflicts page
elif page == "Drug Conflicts": elif page == "Drug Conflicts":
st.header("Drug Conflicts - Overlapping Side Effects") st.header("Drug Conflicts - Overlapping Side Effects")
@@ -394,28 +394,28 @@ try:
else: else:
st.warning("Drug conflicts data not available. Run the ETL script to generate this analysis.") st.warning("Drug conflicts data not available. Run the ETL script to generate this analysis.")
# NETWORK GRAPH PAGE # network graph page
elif page == "Network Graph": elif page == "Network Graph":
st.header("Disease-Gene-Drug Network") st.header("Disease-Gene-Drug Network")
if network_nodes is not None and network_edges is not None: if network_nodes is not None and network_edges is not None:
st.info("Interactive network visualization showing connections between diseases, genes, and drugs") st.info("Interactive network visualization showing connections between diseases, genes, and drugs")
# Create networkx graph # create networkx graph
G = nx.Graph() G = nx.Graph()
# Add nodes # add nodes
for _, row in network_nodes.iterrows(): for _, row in network_nodes.iterrows():
G.add_node(row['id'], label=row['label'], type=row['type']) G.add_node(row['id'], label=row['label'], type=row['type'])
# Add edges # add edges
for _, row in network_edges.iterrows(): for _, row in network_edges.iterrows():
G.add_edge(row['source'], row['target']) G.add_edge(row['source'], row['target'])
# Create layout # create layout
pos = nx.spring_layout(G, k=0.5, iterations=50) pos = nx.spring_layout(G, k=0.5, iterations=50)
# Create edge trace # create edge trace
edge_x = [] edge_x = []
edge_y = [] edge_y = []
for edge in G.edges(): for edge in G.edges():
@@ -431,7 +431,7 @@ try:
mode='lines' mode='lines'
) )
# Create node traces (separate by type for legend) # create node traces (separate by type for legend)
node_traces = [] node_traces = []
color_map = { color_map = {
'Disease': '#ff4444', 'Disease': '#ff4444',
@@ -466,7 +466,7 @@ try:
) )
node_traces.append(node_trace) node_traces.append(node_trace)
# Create figure # create figure
fig = go.Figure(data=[edge_trace] + node_traces, fig = go.Figure(data=[edge_trace] + node_traces,
layout=go.Layout( layout=go.Layout(
title='Disease-Gene-Drug Network', title='Disease-Gene-Drug Network',
@@ -488,7 +488,7 @@ try:
else: else:
st.warning("Network data not available. Run the ETL script to generate this visualization.") st.warning("Network data not available. Run the ETL script to generate this visualization.")
# COMPARE DRUGS PAGE # compare drugs page
elif page == "Compare Drugs": elif page == "Compare Drugs":
st.header("⚖️ Compare Drugs Side-by-Side") st.header("⚖️ Compare Drugs Side-by-Side")
@@ -522,7 +522,7 @@ try:
st.metric("Side Effects", int(drug2_data['num_side_effects'])) st.metric("Side Effects", int(drug2_data['num_side_effects']))
st.metric("Super Score", f"{drug2_data['super_score']:.2f}") st.metric("Super Score", f"{drug2_data['super_score']:.2f}")
# Comparison chart # comparison chart
comparison_df = pd.DataFrame({ comparison_df = pd.DataFrame({
'Metric': ['Diseases Treated', 'Side Effects', 'Super Score'], 'Metric': ['Diseases Treated', 'Side Effects', 'Super Score'],
drug1: [drug1_data['num_diseases_treated'], drug1_data['num_side_effects'], drug1_data['super_score']], drug1: [drug1_data['num_diseases_treated'], drug1_data['num_side_effects'], drug1_data['super_score']],
@@ -539,7 +539,7 @@ try:
config = {'displayModeBar': True, 'displaylogo': False} config = {'displayModeBar': True, 'displaylogo': False}
st.plotly_chart(fig, use_container_width=True, config=config) st.plotly_chart(fig, use_container_width=True, config=config)
# Winner determination # winner determination
st.markdown("---") st.markdown("---")
st.subheader("Recommendation") st.subheader("Recommendation")

50
etl.py
View File

@@ -4,7 +4,7 @@ from pathlib import Path
from collections import defaultdict from collections import defaultdict
# KONFIGURATION # config
INPUT_JSON = "hetionet-v1.0.json" INPUT_JSON = "hetionet-v1.0.json"
OUTPUT_DIR = Path("neo4j_csv") OUTPUT_DIR = Path("neo4j_csv")
@@ -14,7 +14,7 @@ print("="*60)
print("HETIONET ETL PIPELINE (OPTIMIZED + SPLIT EDGES)") print("HETIONET ETL PIPELINE (OPTIMIZED + SPLIT EDGES)")
print("="*60) print("="*60)
# EXTRACT # extract
print("\nPHASE 1: EXTRACTION") print("\nPHASE 1: EXTRACTION")
print("-"*60) print("-"*60)
@@ -29,7 +29,7 @@ edges_raw = data["edges"]
print(f"Nodes loaded: {len(nodes_raw):,}") print(f"Nodes loaded: {len(nodes_raw):,}")
print(f"Edges loaded: {len(edges_raw):,}") print(f"Edges loaded: {len(edges_raw):,}")
# TRANSFORM NODES # transfomr nodes
print("\nPHASE 2: TRANSFORM NODES") print("\nPHASE 2: TRANSFORM NODES")
print("-"*60) print("-"*60)
@@ -48,7 +48,7 @@ for node in nodes_raw:
nodes_df = pd.DataFrame(nodes_flat) nodes_df = pd.DataFrame(nodes_flat)
# Spaltennamen Neo4j-sicher machen # make column names neo4j safe
nodes_df.columns = ( nodes_df.columns = (
nodes_df.columns nodes_df.columns
.str.replace(" ", "_") .str.replace(" ", "_")
@@ -59,11 +59,11 @@ nodes_df.columns = (
print(f"Processed {len(nodes_df):,} nodes") print(f"Processed {len(nodes_df):,} nodes")
print(f" Columns: {', '.join(nodes_df.columns[:5])}...") print(f" Columns: {', '.join(nodes_df.columns[:5])}...")
# Create lookup dictionaries # create lookup dictionaries
node_id_to_kind = dict(zip(nodes_df['id'], nodes_df['kind'])) node_id_to_kind = dict(zip(nodes_df['id'], nodes_df['kind']))
node_id_to_name = dict(zip(nodes_df['id'], nodes_df['name'])) node_id_to_name = dict(zip(nodes_df['id'], nodes_df['name']))
# Create sets for fast membership testing # create sets for fast membership testing
gene_ids = set(nodes_df[nodes_df['kind'] == 'Gene']['id']) gene_ids = set(nodes_df[nodes_df['kind'] == 'Gene']['id'])
disease_ids = set(nodes_df[nodes_df['kind'] == 'Disease']['id']) disease_ids = set(nodes_df[nodes_df['kind'] == 'Disease']['id'])
symptom_ids = set(nodes_df[nodes_df['kind'] == 'Symptom']['id']) symptom_ids = set(nodes_df[nodes_df['kind'] == 'Symptom']['id'])
@@ -77,7 +77,7 @@ print(f" - Symptoms: {len(symptom_ids):,}")
print(f" - Compounds: {len(compound_ids):,}") print(f" - Compounds: {len(compound_ids):,}")
print(f" - Side Effects: {len(sideeffect_ids):,}") print(f" - Side Effects: {len(sideeffect_ids):,}")
# Export nodes by type # export nodes by type
print("\nExporting node files...") print("\nExporting node files...")
for kind in nodes_df["kind"].unique(): for kind in nodes_df["kind"].unique():
df_kind = ( df_kind = (
@@ -89,7 +89,7 @@ for kind in nodes_df["kind"].unique():
df_kind.to_csv(filename, index=False) df_kind.to_csv(filename, index=False)
print(f" {filename.name} ({len(df_kind):,} rows)") print(f" {filename.name} ({len(df_kind):,} rows)")
# TRANSFORM EDGES # transform edges
print("\nPHASE 3: TRANSFORM EDGES") print("\nPHASE 3: TRANSFORM EDGES")
print("-"*60) print("-"*60)
@@ -107,7 +107,7 @@ for i, edge in enumerate(edges_raw):
edges_df = pd.DataFrame(edges) edges_df = pd.DataFrame(edges)
# Relationship-Typen Neo4j-sicher machen # make relationship types neo4j safe
edges_df["type"] = edges_df["type"].str.replace(" ", "_").str.replace("-", "_") edges_df["type"] = edges_df["type"].str.replace(" ", "_").str.replace("-", "_")
# split edges into seperate files # split edges into seperate files
@@ -120,13 +120,13 @@ for edge_type in sorted(edge_types):
edges_subset = edges_df[edges_df['type'] == edge_type] edges_subset = edges_df[edges_df['type'] == edge_type]
filename = OUTPUT_DIR / f"edges_{edge_type}.csv" filename = OUTPUT_DIR / f"edges_{edge_type}.csv"
# Only export source and target (type is in filename) # only export source and target (type is in filename)
edges_subset[['source', 'target']].to_csv(filename, index=False) edges_subset[['source', 'target']].to_csv(filename, index=False)
size_mb = filename.stat().st_size / (1024*1024) size_mb = filename.stat().st_size / (1024*1024)
print(f" ✓ edges_{edge_type:20s}.csv ({len(edges_subset):>10,} rows, {size_mb:>6.2f} MB)") print(f" ✓ edges_{edge_type:20s}.csv ({len(edges_subset):>10,} rows, {size_mb:>6.2f} MB)")
# Also keep the combined file for backward compatibility # also keep the combined file for backward compatibility
edges_file = OUTPUT_DIR / "edges_all.csv" edges_file = OUTPUT_DIR / "edges_all.csv"
edges_df.to_csv(edges_file, index=False) edges_df.to_csv(edges_file, index=False)
print(f"\n ✓ edges_all.csv (combined) ({len(edges_df):,} rows)") print(f"\n ✓ edges_all.csv (combined) ({len(edges_df):,} rows)")
@@ -136,7 +136,7 @@ print(f" Total edges: {len(edges_df):,}")
print(f" Split into {len(edge_types)} separate CSV files") print(f" Split into {len(edge_types)} separate CSV files")
print(f" Each file can be loaded independently!") print(f" Each file can be loaded independently!")
# Pre-filter edges by type for analysis # pre-filter edges by type for analysis
print("\nEdge type distribution:") print("\nEdge type distribution:")
edges_by_type = {} edges_by_type = {}
for edge_type in sorted(edge_types): for edge_type in sorted(edge_types):
@@ -145,14 +145,12 @@ for edge_type in sorted(edge_types):
pct = 100 * count / len(edges_df) pct = 100 * count / len(edges_df)
print(f" - {edge_type:20s}: {count:>10,} ({pct:>5.1f}%)") print(f" - {edge_type:20s}: {count:>10,} ({pct:>5.1f}%)")
# [ANALYSES - keeping all the existing analysis code...]
# (Keeping the same analysis code as before)
print("\n" + "="*60) print("\n" + "="*60)
print("PHASE 4: ANALYSES") print("PHASE 4: ANALYSES")
print("="*60) print("="*60)
# ANALYSIS 1: HOTSPOT GENES # analysis 1: hotspot genes
print("\nAnalysis 1: Hotspot Genes") print("\nAnalysis 1: Hotspot Genes")
print("-"*60) print("-"*60)
@@ -183,7 +181,7 @@ genes_df_sorted.to_csv(OUTPUT_DIR / "nodes_Gene.csv", index=False)
print(f"Top gene: {genes_df_sorted.iloc[0]['name']} ({int(genes_df_sorted.iloc[0]['num_diseases'])} diseases)") print(f"Top gene: {genes_df_sorted.iloc[0]['name']} ({int(genes_df_sorted.iloc[0]['num_diseases'])} diseases)")
# ANALYSIS 2: DISEASE SYMPTOM DIVERSITY # analysis 2: disease symptom diversity
print("\nAnalysis 2: Disease Symptom Diversity") print("\nAnalysis 2: Disease Symptom Diversity")
print("-"*60) print("-"*60)
@@ -208,7 +206,7 @@ disease_df_sorted.to_csv(OUTPUT_DIR / "nodes_Disease.csv", index=False)
print(f"Top disease: {disease_df_sorted.iloc[0]['name']} ({int(disease_df_sorted.iloc[0]['num_symptoms'])} symptoms)") print(f"Top disease: {disease_df_sorted.iloc[0]['name']} ({int(disease_df_sorted.iloc[0]['num_symptoms'])} symptoms)")
# Build indices for drug analyses # build indices for drug analyses
print("\nBuilding indices for drug analyses...") print("\nBuilding indices for drug analyses...")
disease_to_genes = defaultdict(set) disease_to_genes = defaultdict(set)
gene_to_diseases = defaultdict(set) gene_to_diseases = defaultdict(set)
@@ -238,7 +236,7 @@ print(f"\n💡 For faster Neo4j loading, use the split edge files:")
print(f" edges_associates.csv, edges_treats.csv, etc.") print(f" edges_associates.csv, edges_treats.csv, etc.")
print(f" Instead of the combined edges_all.csv") print(f" Instead of the combined edges_all.csv")
# ANALYSIS 3: DRUG REPURPOSING # analysis 3: drug repurposing
print("\nAnalysis 3: Drug Repurposing Opportunities") print("\nAnalysis 3: Drug Repurposing Opportunities")
print("-"*60) print("-"*60)
@@ -273,7 +271,7 @@ if len(repurposing_df) > 0:
repurposing_df.to_csv(OUTPUT_DIR / "analysis_drug_repurposing.csv", index=False) repurposing_df.to_csv(OUTPUT_DIR / "analysis_drug_repurposing.csv", index=False)
print(f"Found {len(repurposing_df):,} repurposing opportunities") print(f"Found {len(repurposing_df):,} repurposing opportunities")
# ANALYSIS 4: POLYPHARMACY RISK # analysis 4: polypharmacy risk
print("\nAnalysis 4: Polypharmacy Risk") print("\nAnalysis 4: Polypharmacy Risk")
print("-"*60) print("-"*60)
@@ -294,7 +292,7 @@ if len(drug_sideeffects) > 0:
drug_risk_sorted.to_csv(OUTPUT_DIR / "analysis_polypharmacy_risk.csv", index=False) drug_risk_sorted.to_csv(OUTPUT_DIR / "analysis_polypharmacy_risk.csv", index=False)
print(f"Analyzed {len(drug_risk_sorted):,} drugs for side effects") print(f"Analyzed {len(drug_risk_sorted):,} drugs for side effects")
# ANALYSIS 5: SYMPTOM TRIANGLE # analysis 5: symptom triangle
print("\nAnalysis 5: Symptom-Disease-Drug Triangle") print("\nAnalysis 5: Symptom-Disease-Drug Triangle")
print("-"*60) print("-"*60)
@@ -326,7 +324,7 @@ if len(symptom_triangle_df) > 0:
symptom_triangle_df.to_csv(OUTPUT_DIR / "analysis_symptom_triangle.csv", index=False) symptom_triangle_df.to_csv(OUTPUT_DIR / "analysis_symptom_triangle.csv", index=False)
print(f"Analyzed {len(symptom_triangle_df):,} symptoms") print(f"Analyzed {len(symptom_triangle_df):,} symptoms")
# ANALYSIS 6: SUPER DRUGS # analysis 6: super drugs
print("\nAnalysis 6: Super-Drug Score") print("\nAnalysis 6: Super-Drug Score")
print("-"*60) print("-"*60)
@@ -352,7 +350,7 @@ if len(super_drugs_df) > 0:
super_drugs_df.to_csv(OUTPUT_DIR / "analysis_super_drugs.csv", index=False) super_drugs_df.to_csv(OUTPUT_DIR / "analysis_super_drugs.csv", index=False)
print(f"Analyzed {len(super_drugs_df):,} drugs") print(f"Analyzed {len(super_drugs_df):,} drugs")
# ANALYSIS 7: DRUG CONFLICTS # analysis 7: drug conflicts
print("\nAnalysis 7: Drug Conflicts") print("\nAnalysis 7: Drug Conflicts")
print("-"*60) print("-"*60)
@@ -390,7 +388,7 @@ if len(drug_conflicts_df) > 0:
drug_conflicts_df.to_csv(OUTPUT_DIR / "analysis_drug_conflicts.csv", index=False) drug_conflicts_df.to_csv(OUTPUT_DIR / "analysis_drug_conflicts.csv", index=False)
print(f"Found {len(drug_conflicts_df):,} drug conflict pairs") print(f"Found {len(drug_conflicts_df):,} drug conflict pairs")
# ANALYSIS 8: NETWORK DATA # analysis 8: network data
print("\nAnalysis 8: Network Visualization Data") print("\nAnalysis 8: Network Visualization Data")
print("-"*60) print("-"*60)
@@ -401,7 +399,7 @@ network_edges = []
node_id_counter = 0 node_id_counter = 0
id_mapping = {} id_mapping = {}
# Add disease nodes # add disease nodes
for disease_id in top_diseases: for disease_id in top_diseases:
node_id = f"d_{node_id_counter}" node_id = f"d_{node_id_counter}"
id_mapping[disease_id] = node_id id_mapping[disease_id] = node_id
@@ -413,7 +411,7 @@ for disease_id in top_diseases:
}) })
node_id_counter += 1 node_id_counter += 1
# Add genes # add genes
disease_genes = gene_disease_edges[ disease_genes = gene_disease_edges[
gene_disease_edges['source'].isin(top_diseases) gene_disease_edges['source'].isin(top_diseases)
].head(150) ].head(150)
@@ -437,7 +435,7 @@ for _, row in disease_genes.iterrows():
'type': 'associates' 'type': 'associates'
}) })
# Add drugs # add drugs
drug_treatments = treats_edges[treats_edges['target'].isin(top_diseases)].head(50) drug_treatments = treats_edges[treats_edges['target'].isin(top_diseases)].head(50)
for _, row in drug_treatments.iterrows(): for _, row in drug_treatments.iterrows():

View File

@@ -1,4 +1,4 @@
LOAD CSV WITH HEADERS FROM 'file:///edges_treats.csv' AS row LOAD CSV WITH HEADERS FROM 'file:///edges_associates.csv' AS row
MATCH (source {id: row.source}) MATCH (source {id: row.source})
MATCH (target {id: row.target}) MATCH (target {id: row.target})
CREATE (source)-[:TREATS]->(target); CREATE (source)-[:ASSOCIATES]->(target);

View File

@@ -1,4 +1,4 @@
LOAD CSV WITH HEADERS FROM 'file:///edges_causes.csv' AS row LOAD CSV WITH HEADERS FROM 'file:///edges_upregulates.csv' AS row
MATCH (source {id: row.source}) MATCH (source {id: row.source})
MATCH (target {id: row.target}) MATCH (target {id: row.target})
CREATE (source)-[:CAUSES]->(target); CREATE (source)-[:UPREGULATES]->(target);