finishes setup
This commit is contained in:
81
neo4j_etl.py
Normal file
81
neo4j_etl.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from neo4j import GraphDatabase
|
||||
import pandas as pd
|
||||
import os
|
||||
import glob
|
||||
import getpass
|
||||
|
||||
|
||||
# Neo4j Connection
|
||||
|
||||
NEO4J_URI = "bolt://localhost:7687"
|
||||
NEO4J_USER = input("Neo4j username: ")
|
||||
NEO4J_PASSWORD = getpass.getpass("Neo4j password: ")
|
||||
|
||||
driver = GraphDatabase.driver(
|
||||
NEO4J_URI,
|
||||
auth=(NEO4J_USER, NEO4J_PASSWORD)
|
||||
)
|
||||
|
||||
|
||||
# Helper Functions
|
||||
|
||||
|
||||
def test_connection():
|
||||
try:
|
||||
with driver.session() as session:
|
||||
result = session.run("RETURN 1")
|
||||
if result.single():
|
||||
print("✓ Connection successful")
|
||||
return True
|
||||
else:
|
||||
print("✗ Error connecting")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"✗ Error with the connection: {e}")
|
||||
return False
|
||||
|
||||
def run_query(query, parameters=None):
|
||||
"""Run a Cypher query and return a Pandas DataFrame"""
|
||||
with driver.session() as session:
|
||||
result = session.run(query, parameters)
|
||||
df = pd.DataFrame([record.data() for record in result])
|
||||
return df
|
||||
|
||||
|
||||
# Check Neo4j connection
|
||||
|
||||
if not test_connection():
|
||||
print("Cannot connect to Neo4j")
|
||||
exit(1)
|
||||
|
||||
|
||||
# Folder for results
|
||||
|
||||
output_dir = "query_results"
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
|
||||
# Run all .cypher files in 'queries/' folder
|
||||
|
||||
cypher_files = sorted(glob.glob("analysis_queries/*.cypher"))
|
||||
|
||||
for file in cypher_files:
|
||||
with open(file, "r", encoding="utf-8") as f:
|
||||
query = f.read()
|
||||
print(f"\nRunning {file}")
|
||||
try:
|
||||
df = run_query(query)
|
||||
if df.empty:
|
||||
print("⚠ No results returned")
|
||||
else:
|
||||
print(df.head(5)) # show top 5 rows
|
||||
safe_name = os.path.splitext(os.path.basename(file))[0]
|
||||
csv_path = os.path.join(output_dir, f"{safe_name}.csv")
|
||||
df.to_csv(csv_path, index=False, encoding="utf-8-sig")
|
||||
print(f"✓ Saved to {csv_path}")
|
||||
except Exception as e:
|
||||
print(f"✗ Error running query '{file}': {e}")
|
||||
|
||||
|
||||
driver.close()
|
||||
print("\nAll queries executed.")
|
||||
Reference in New Issue
Block a user