diff --git a/schnellerereETL.py b/schnellerereETL.py index 6963bff..61a8ed5 100644 --- a/schnellerereETL.py +++ b/schnellerereETL.py @@ -2,6 +2,49 @@ import json import pandas as pd from pathlib import Path from collections import defaultdict +from neo4j import GraphDatabase + +NEO4J_URI = "bolt://localhost:7687" +NEO4J_USER = "neo4j" +NEO4J_PASSWORD = "password" + +driver = GraphDatabase.driver( + NEO4J_URI, + auth=(NEO4J_USER, NEO4J_PASSWORD) +) + + + +def load_nodes(df, label): + with driver.session() as session: + for _, row in df.iterrows(): + session.run( + f""" + MERGE (n:{label} {{id: $id}}) + SET n += $props + """, + id=row["id"], + props=row.drop("id").dropna().to_dict() + ) + + +def load_edges(edges_df): + with driver.session() as session: + for _, row in edges_df.iterrows(): + session.run( + """ + MATCH (s {id: $source}) + MATCH (t {id: $target}) + CALL apoc.create.relationship(s, $type, {}, t) + YIELD rel + RETURN rel + """, + source=row["source"], + target=row["target"], + type=row["type"].upper() + ) + + # KONFIGURATION @@ -433,6 +476,19 @@ network_edges_df = pd.DataFrame(network_edges) network_nodes_df.to_csv(OUTPUT_DIR / "network_nodes.csv", index=False) network_edges_df.to_csv(OUTPUT_DIR / "network_edges.csv", index=False) + + +load_nodes(nodes_df[nodes_df['kind']=="Gene"], "Gene") +load_nodes(nodes_df[nodes_df['kind']=="Disease"], "Disease") +load_nodes(nodes_df[nodes_df['kind']=="Compound"], "Compound") +load_nodes(nodes_df[nodes_df['kind']=="Symptom"], "Symptom") +load_nodes(nodes_df[nodes_df['kind']=="Side Effect"], "SideEffect") + + +load_edges(edges_df) + + + print(f"Network: {len(network_nodes_df)} nodes, {len(network_edges_df)} edges")