From 353facc893630b8c77ab99495b6ecc50435fb703 Mon Sep 17 00:00:00 2001 From: kosiakkatrina <54268893+kosiakkatrina@users.noreply.github.com> Date: Mon, 15 May 2023 14:44:25 +0100 Subject: [PATCH] CLDC-2304 Deduplicate sales logs in bulk upload (#1612) * Deduplicate 2022 sales logs * Don't run deduplication on staging * Do not create duplicate logs * Add deduplication to the 23/24 sales --- app/services/bulk_upload/sales/validator.rb | 5 ++ .../bulk_upload/sales/year2022/row_parser.rb | 42 ++++++++++-- .../bulk_upload/sales/year2023/row_parser.rb | 42 ++++++++++-- .../sales/year2022/row_parser_spec.rb | 65 +++++++++++++++++++ .../sales/year2023/row_parser_spec.rb | 65 +++++++++++++++++++ 5 files changed, 209 insertions(+), 10 deletions(-) diff --git a/app/services/bulk_upload/sales/validator.rb b/app/services/bulk_upload/sales/validator.rb index 54831d36e..a72e3bbcf 100644 --- a/app/services/bulk_upload/sales/validator.rb +++ b/app/services/bulk_upload/sales/validator.rb @@ -36,6 +36,7 @@ class BulkUpload::Sales::Validator def create_logs? return false if any_setup_errors? return false if row_parsers.any?(&:block_log_creation?) + return false if any_logs_already_exist? && FeatureToggle.bulk_upload_duplicate_log_check_enabled? row_parsers.each do |row_parser| row_parser.log.blank_invalid_non_setup_fields! @@ -54,6 +55,10 @@ class BulkUpload::Sales::Validator .positive? end + def any_logs_already_exist? + row_parsers.any?(&:log_already_exists?) + end + private def any_logs_invalid? diff --git a/app/services/bulk_upload/sales/year2022/row_parser.rb b/app/services/bulk_upload/sales/year2022/row_parser.rb index 851ed4bfd..c55500eb9 100644 --- a/app/services/bulk_upload/sales/year2022/row_parser.rb +++ b/app/services/bulk_upload/sales/year2022/row_parser.rb @@ -285,6 +285,7 @@ class BulkUpload::Sales::Year2022::RowParser validate :validate_created_by_related, on: :after_log validate :validate_relevant_collection_window, on: :after_log validate :validate_incomplete_soft_validations, on: :after_log + validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? } def self.question_for_field(field) QUESTIONS[field] @@ -337,6 +338,12 @@ class BulkUpload::Sales::Year2022::RowParser block_log_creation end + def log_already_exists? + @log_already_exists ||= SalesLog + .where(status: %w[not_started in_progress completed]) + .exists?(duplicate_check_fields.index_with { |field| log.public_send(field) }) + end + private def buyer_not_interviewed? @@ -591,7 +598,7 @@ private attributes["othtype"] = field_85 - attributes["owning_organisation_id"] = owning_organisation_id + attributes["owning_organisation"] = owning_organisation attributes["created_by"] = created_by || bulk_upload.user attributes["hhregres"] = hhregres attributes["hhregresstill"] = hhregresstill @@ -803,10 +810,6 @@ private Organisation.find_by_id_on_multiple_fields(field_92) end - def owning_organisation_id - owning_organisation&.id - end - def created_by @created_by ||= User.find_by(email: field_93) end @@ -848,6 +851,18 @@ private @questions ||= log.form.subsections.flat_map { |ss| ss.applicable_questions(log) } end + def duplicate_check_fields + %w[ + saledate + age1 + sex1 + ecstat1 + owning_organisation + postcode_full + purchid + ] + end + def validate_owning_org_data_given if field_92.blank? block_log_creation! @@ -985,4 +1000,21 @@ private end end end + + def validate_if_log_already_exists + if log_already_exists? + error_message = "This is a duplicate log" + + errors.add(:field_92, error_message) # Owning org + errors.add(:field_2, error_message) # Sale completion date + errors.add(:field_3, error_message) # Sale completion date + errors.add(:field_4, error_message) # Sale completion date + errors.add(:field_41, error_message) # Postcode + errors.add(:field_42, error_message) # Postcode + errors.add(:field_7, error_message) # Buyer 1 age + errors.add(:field_13, error_message) # Buyer 1 gender + errors.add(:field_24, error_message) # Buyer 1 working situation + errors.add(:field_1, error_message) # Purchaser code + end + end end diff --git a/app/services/bulk_upload/sales/year2023/row_parser.rb b/app/services/bulk_upload/sales/year2023/row_parser.rb index 67cf9a284..11cd9f9a7 100644 --- a/app/services/bulk_upload/sales/year2023/row_parser.rb +++ b/app/services/bulk_upload/sales/year2023/row_parser.rb @@ -410,6 +410,7 @@ class BulkUpload::Sales::Year2023::RowParser validate :validate_address_line_1, on: :after_log validate :validate_town_or_city, on: :after_log validate :validate_postcode, on: :after_log + validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? } def self.question_for_field(field) QUESTIONS[field] @@ -466,6 +467,12 @@ class BulkUpload::Sales::Year2023::RowParser "#" end + def log_already_exists? + @log_already_exists ||= SalesLog + .where(status: %w[not_started in_progress completed]) + .exists?(duplicate_check_fields.index_with { |field| log.public_send(field) }) + end + private def prevtenbuy2 @@ -786,7 +793,7 @@ private attributes["othtype"] = field_11 - attributes["owning_organisation_id"] = owning_organisation_id + attributes["owning_organisation"] = owning_organisation attributes["created_by"] = created_by || bulk_upload.user attributes["hhregres"] = hhregres attributes["hhregresstill"] = hhregresstill @@ -1000,10 +1007,6 @@ private Organisation.find_by_id_on_multiple_fields(field_1) end - def owning_organisation_id - owning_organisation&.id - end - def created_by @created_by ||= User.find_by(email: field_2) end @@ -1035,6 +1038,18 @@ private @questions ||= log.form.subsections.flat_map { |ss| ss.applicable_questions(log) } end + def duplicate_check_fields + %w[ + saledate + age1 + sex1 + ecstat1 + owning_organisation + postcode_full + purchid + ] + end + def validate_owning_org_data_given if field_1.blank? block_log_creation! @@ -1156,4 +1171,21 @@ private errors.add(:field_5, I18n.t("validations.date.outside_collection_window", year_combo: bulk_upload.year_combo, start_year: bulk_upload.year, end_year: bulk_upload.end_year), category: :setup) end end + + def validate_if_log_already_exists + if log_already_exists? + error_message = "This is a duplicate log" + + errors.add(:field_1, error_message) # Owning org + errors.add(:field_3, error_message) # Sale completion date + errors.add(:field_4, error_message) # Sale completion date + errors.add(:field_5, error_message) # Sale completion date + errors.add(:field_24, error_message) # Postcode + errors.add(:field_25, error_message) # Postcode + errors.add(:field_30, error_message) # Buyer 1 age + errors.add(:field_31, error_message) # Buyer 1 gender + errors.add(:field_35, error_message) # Buyer 1 working situation + errors.add(:field_6, error_message) # Purchaser code + end + end end diff --git a/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb b/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb index 4d37280dc..1fc85ebb5 100644 --- a/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb +++ b/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb @@ -539,6 +539,71 @@ RSpec.describe BulkUpload::Sales::Year2022::RowParser do end end end + + context "when the log already exists in the db" do + let(:attributes) { valid_attributes } + + before do + parser.log.save! + parser.instance_variable_set(:@valid, nil) + end + + it "is not a valid row" do + expect(parser).not_to be_valid + end + + it "adds an error to all (and only) the fields used to determine duplicates" do + parser.valid? + + error_message = "This is a duplicate log" + + [ + :field_92, # Owning org + :field_2, # Sale completion date + :field_3, # Sale completion date + :field_4, # Sale completion date + :field_41, # Postcode + :field_42, # Postcode + :field_7, # Buyer 1 age + :field_13, # Buyer 1 gender + :field_24, # Buyer 1 working situation + :field_1, # Purchaser code + ].each do |field| + expect(parser.errors[field]).to include(error_message) + end + end + end + + context "when a hidden log already exists in db" do + before do + parser.log.status = "pending" + parser.log.skip_update_status = true + parser.log.save! + end + + it "is a valid row" do + expect(parser).to be_valid + end + + it "does not add duplicate errors" do + parser.valid? + + [ + :field_92, # Owning org + :field_2, # Sale completion date + :field_3, # Sale completion date + :field_4, # Sale completion date + :field_41, # Postcode + :field_42, # Postcode + :field_7, # Buyer 1 age + :field_13, # Buyer 1 gender + :field_24, # Buyer 1 working situation + :field_1, # Purchaser code + ].each do |field| + expect(parser.errors[field]).to be_blank + end + end + end end describe "inferences" do diff --git a/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb b/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb index e49e47db7..8c5b899b9 100644 --- a/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb +++ b/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb @@ -467,6 +467,71 @@ RSpec.describe BulkUpload::Sales::Year2023::RowParser do end end + context "when the log already exists in the db" do + let(:attributes) { valid_attributes } + + before do + parser.log.save! + parser.instance_variable_set(:@valid, nil) + end + + it "is not a valid row" do + expect(parser).not_to be_valid + end + + it "adds an error to all (and only) the fields used to determine duplicates" do + parser.valid? + + error_message = "This is a duplicate log" + + [ + :field_1, # Owning org + :field_3, # Sale completion date + :field_4, # Sale completion date + :field_5, # Sale completion date + :field_24, # Postcode + :field_25, # Postcode + :field_30, # Buyer 1 age + :field_31, # Buyer 1 gender + :field_35, # Buyer 1 working situation + :field_6, # Purchaser code + ].each do |field| + expect(parser.errors[field]).to include(error_message) + end + end + end + + context "when a hidden log already exists in db" do + before do + parser.log.status = "pending" + parser.log.skip_update_status = true + parser.log.save! + end + + it "is a valid row" do + expect(parser).to be_valid + end + + it "does not add duplicate errors" do + parser.valid? + + [ + :field_1, # Owning org + :field_3, # Sale completion date + :field_4, # Sale completion date + :field_5, # Sale completion date + :field_24, # Postcode + :field_25, # Postcode + :field_30, # Buyer 1 age + :field_31, # Buyer 1 gender + :field_35, # Buyer 1 working situation + :field_6, # Purchaser code + ].each do |field| + expect(parser.errors[field]).to be_blank + end + end + end + describe "#field_19" do # UPRN context "when UPRN known" do let(:attributes) { setup_section_params.merge({ field_19: "100023336956" }) }