icm/database.py

66 lines
1.6 KiB
Python

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
SQLALCHEMY_DATABASE_URL = "sqlite:///./db_dev.sqlite3" # to be configure
# SQLALCHEMY_DATABASE_URL = "postgresql://user:password@postgresserver/db"
engine = create_engine(
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
def create_db():
import models
models.Base.metadata.create_all(bind=engine)
def _json_quotes(s):
return s.replace("'", '"')
def adapt_movie_data(data_in: dict):
import copy, json
data_out = copy.deepcopy(data_in)
data_out["genres"] = [
genre["name"] for genre in json.loads(_json_quotes(data_in["genres"]))
]
data_out["description"] = data_in["overview"]
return data_out
def fill_db(
db=SessionLocal(), movie_input_file: str = "input_data/movies_metadata.csv"
):
import crud
import csv
page_size = 1_00
with open(movie_input_file) as csvfile:
for count, movie_data in enumerate(csv.DictReader(csvfile), start=1):
if count % page_size == 0:
db.commit()
adapted_data = adapt_movie_data(movie_data)
if not adapted_data["title"]:
print(count, "should be fixed")
print(adapted_data)
continue
crud.create_movie(db, batch_mode=True, **adapted_data)
db.commit()
if __name__ == "__main__":
create_db()
fill_db()