adjust comments

This commit is contained in:
Ianatrix
2026-02-08 15:11:57 +01:00
parent d744904523
commit 23efc52da9

View File

@@ -3,9 +3,9 @@ import pandas as pd
from pathlib import Path
from collections import defaultdict
# ==============================
# KONFIGURATION
# ==============================
INPUT_JSON = "hetionet-v1.0.json"
OUTPUT_DIR = Path("neo4j_csv")
OUTPUT_DIR.mkdir(exist_ok=True)
@@ -14,9 +14,8 @@ print("="*60)
print("HETIONET ETL PIPELINE")
print("="*60)
# ==============================
# EXTRACT
# ==============================
print("\nPHASE 1: EXTRACTION")
print("-"*60)
print("Loading JSON data...")
@@ -30,9 +29,8 @@ edges_raw = data["edges"]
print(f"Nodes loaded: {len(nodes_raw):,}")
print(f"Edges loaded: {len(edges_raw):,}")
# ==============================
# TRANSFORM NODES
# ==============================
print("\n PHASE 2: TRANSFORM NODES")
print("-"*60)
@@ -91,9 +89,8 @@ for kind in nodes_df["kind"].unique():
df_kind.to_csv(filename, index=False)
print(f" {filename.name} ({len(df_kind):,} rows)")
# ==============================
# TRANSFORM EDGES
# ==============================
print("\nPHASE 3: TRANSFORM EDGES")
print("-"*60)
@@ -127,9 +124,9 @@ for edge_type in ['associates', 'treats', 'presents', 'causes', 'regulates', 'up
if len(edges_by_type[edge_type]) > 0:
print(f" - {edge_type}: {len(edges_by_type[edge_type]):,}")
# ==============================
# ANALYSES
# ==============================
print("\n" + "="*60)
print("PHASE 4: ANALYSES")
print("="*60)
@@ -438,9 +435,7 @@ network_edges_df.to_csv(OUTPUT_DIR / "network_edges.csv", index=False)
print(f"Network: {len(network_nodes_df)} nodes, {len(network_edges_df)} edges")
# ==============================
# SUMMARY
# ==============================
print("\n" + "="*60)
print("ETL PIPELINE COMPLETED SUCCESSFULLY")
print("="*60)