adjust comments
This commit is contained in:
@@ -3,9 +3,9 @@ import pandas as pd
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
# ==============================
|
||||
|
||||
# KONFIGURATION
|
||||
# ==============================
|
||||
|
||||
INPUT_JSON = "hetionet-v1.0.json"
|
||||
OUTPUT_DIR = Path("neo4j_csv")
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
@@ -14,9 +14,8 @@ print("="*60)
|
||||
print("HETIONET ETL PIPELINE")
|
||||
print("="*60)
|
||||
|
||||
# ==============================
|
||||
# EXTRACT
|
||||
# ==============================
|
||||
|
||||
print("\nPHASE 1: EXTRACTION")
|
||||
print("-"*60)
|
||||
print("Loading JSON data...")
|
||||
@@ -30,9 +29,8 @@ edges_raw = data["edges"]
|
||||
print(f"Nodes loaded: {len(nodes_raw):,}")
|
||||
print(f"Edges loaded: {len(edges_raw):,}")
|
||||
|
||||
# ==============================
|
||||
# TRANSFORM – NODES
|
||||
# ==============================
|
||||
|
||||
print("\n PHASE 2: TRANSFORM NODES")
|
||||
print("-"*60)
|
||||
|
||||
@@ -91,9 +89,8 @@ for kind in nodes_df["kind"].unique():
|
||||
df_kind.to_csv(filename, index=False)
|
||||
print(f" {filename.name} ({len(df_kind):,} rows)")
|
||||
|
||||
# ==============================
|
||||
# TRANSFORM – EDGES
|
||||
# ==============================
|
||||
|
||||
print("\nPHASE 3: TRANSFORM EDGES")
|
||||
print("-"*60)
|
||||
|
||||
@@ -127,9 +124,9 @@ for edge_type in ['associates', 'treats', 'presents', 'causes', 'regulates', 'up
|
||||
if len(edges_by_type[edge_type]) > 0:
|
||||
print(f" - {edge_type}: {len(edges_by_type[edge_type]):,}")
|
||||
|
||||
# ==============================
|
||||
|
||||
# ANALYSES
|
||||
# ==============================
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("PHASE 4: ANALYSES")
|
||||
print("="*60)
|
||||
@@ -438,9 +435,7 @@ network_edges_df.to_csv(OUTPUT_DIR / "network_edges.csv", index=False)
|
||||
|
||||
print(f"Network: {len(network_nodes_df)} nodes, {len(network_edges_df)} edges")
|
||||
|
||||
# ==============================
|
||||
# SUMMARY
|
||||
# ==============================
|
||||
|
||||
print("\n" + "="*60)
|
||||
print("ETL PIPELINE COMPLETED SUCCESSFULLY")
|
||||
print("="*60)
|
||||
|
||||
Reference in New Issue
Block a user