diff --git a/.gitignore b/.gitignore index dabdbca..8f5ae89 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ __pycache__/ *.py[o|c] **_build/ geckodriver.log -sql_app.db +*.sqlite3 diff --git a/Makefile b/Makefile index 403e1e2..80ea42f 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,7 @@ coverage_opt=--cov --cov-report=term-missing:skip-covered --durations=10 +target=dev + clean: pipenv --rm @@ -48,12 +50,25 @@ venv_install: venv_test: make -f MakefileVenv test -db_clean: - rm sql_app.db && touch sql_app.db +db_clean: db_delete + touch db_$(target).sqlite3 db_fill: pipenv run python database.py +db_delete: + ([ -f db_$(target).sqlite3 ] && rm db_$(target).sqlite3) || echo "No file db_$(target).sqlite3 to drop" + + +db_save: + cp -f db_$(target).sqlite3 backup_$(USER)_$(target).sqlite3 + +db_restore: + cp backup_$(USER)_$(target).sqlite3 db_$(target).sqlite3 + + + + db_reset: db_clean db_fill diff --git a/database.py b/database.py index d93f256..a1fc403 100644 --- a/database.py +++ b/database.py @@ -2,7 +2,7 @@ from sqlalchemy import create_engine from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker -SQLALCHEMY_DATABASE_URL = "sqlite:///./sql_app.db" +SQLALCHEMY_DATABASE_URL = "sqlite:///./db_dev.sqlite3" # to be configure # SQLALCHEMY_DATABASE_URL = "postgresql://user:password@postgresserver/db" engine = create_engine( @@ -46,11 +46,17 @@ def fill_db( page_size = 1_00 with open(movie_input_file) as csvfile: for count, movie_data in enumerate(csv.DictReader(csvfile), start=1): - adapted_data = adapt_movie_data(movie_data) - crud.create_movie(db, batch_mode=True, **adapted_data) if count % page_size == 0: db.commit() + adapted_data = adapt_movie_data(movie_data) + + if not adapted_data["title"]: + print(count, "should be fixed") + print(adapted_data) + continue + + crud.create_movie(db, batch_mode=True, **adapted_data) db.commit() diff --git a/utests/test_sql_database.py b/utests/test_sql_database.py index 6a1f474..46c5c42 100644 --- a/utests/test_sql_database.py +++ b/utests/test_sql_database.py @@ -143,4 +143,25 @@ def test_sample_import_toy_story(): assert "Andy" in toy_story["description"] - # non regression + +def test_title_is_taken_form_original_title_is_missing(): + """ + t0113002,en,Midnight Man + 19763 Midnight Man (among others) has an unescaped \n that makes import fail + + in the csv the movie 'Avalanche Sharks' @ line 35587 + has no tiltle, we fix this here to get quicker but we need a better solution + """ + movie_title = "Midnight Man" + + file_path = "utests/movie_error_missing_title.csv" + file_path = "input_data/movies_metadata.csv" + movies = client.get("movies") + movies_by_title = {m["title"]: m for m in movies.json()} + + assert movie_title not in movies_by_title, "The movie should not be pre existing" + with db_context() as db: + fill_db(db, file_path) + + movies = client.get("movies") + movies_by_title = {m["title"]: m for m in movies.json()}