diff --git a/lib/tasks/count_duplicates.rake b/lib/tasks/count_duplicates.rake index 1ce82975a..138dc09f9 100644 --- a/lib/tasks/count_duplicates.rake +++ b/lib/tasks/count_duplicates.rake @@ -11,13 +11,38 @@ namespace :count_duplicates do end end - BYTE_ORDER_MARK = "\uFEFF".freeze - EXPIRATION_TIME = 72.hours.to_i filename = "scheme-duplicates-#{Time.zone.now}.csv" storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) - storage_service.write_file(filename, BYTE_ORDER_MARK + duplicates_csv) + storage_service.write_file(filename, "#{duplicates_csv}") - url = storage_service.get_presigned_url(filename, EXPIRATION_TIME) + url = storage_service.get_presigned_url(filename, 72.hours.to_i) + Rails.logger.info("Download URL: #{url}") + end + + desc "Count the number of duplicate locations per organisation" + task location_duplicates_per_org: :environment do + duplicates_csv = CSV.generate(headers: true) do |csv| + csv << ["Organisation id", "Number of duplicate sets", "Total duplicate locations"] + + Organisation.visible.each do |organisation| + duplicate_sets_count = 0 + total_duplicate_locations = 0 + organisation.owned_schemes.each do |scheme| + duplicate_sets_count += scheme.locations.duplicate_sets.count + total_duplicate_locations += scheme.locations.duplicate_sets.sum(&:size) + end + + if duplicate_sets_count.positive? + csv << [organisation.id, duplicate_sets_count, total_duplicate_locations] + end + end + end + + filename = "location-duplicates-#{Time.zone.now}.csv" + storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) + storage_service.write_file(filename, "#{duplicates_csv}") + + url = storage_service.get_presigned_url(filename, 72.hours.to_i) Rails.logger.info("Download URL: #{url}") end end diff --git a/spec/lib/tasks/count_duplicates_spec.rb b/spec/lib/tasks/count_duplicates_spec.rb index b939c05e4..e25d8db92 100644 --- a/spec/lib/tasks/count_duplicates_spec.rb +++ b/spec/lib/tasks/count_duplicates_spec.rb @@ -46,4 +46,53 @@ RSpec.describe "count_duplicates" do end end end + + describe "count_duplicates:location_duplicates_per_org", type: :task do + subject(:task) { Rake::Task["count_duplicates:location_duplicates_per_org"] } + + let(:storage_service) { instance_double(Storage::S3Service) } + let(:test_url) { "test_url" } + + before do + Rake.application.rake_require("tasks/count_duplicates") + Rake::Task.define_task(:environment) + task.reenable + allow(Storage::S3Service).to receive(:new).and_return(storage_service) + allow(storage_service).to receive(:write_file) + allow(storage_service).to receive(:get_presigned_url).and_return(test_url) + end + + context "when the rake task is run" do + context "and there are no duplicate locations" do + let!(:organisation) { create(:organisation) } + + it "creates a csv with headers only" do + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate locations\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + + context "and there are duplicate locations" do + let(:organisation) { create(:organisation) } + let(:scheme) { create(:scheme, owning_organisation: organisation) } + let(:organisation2) { create(:organisation) } + let(:scheme2) { create(:scheme, owning_organisation: organisation2) } + let(:scheme3) { create(:scheme, owning_organisation: organisation2) } + + before do + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme:) + create_list(:location, 3, postcode: "A1 1AB", mobility_type: "A", scheme:) + create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2) + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3) + end + + it "creates a csv with correct duplicate numbers" do + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate locations\n#{organisation.id},2,5\n#{organisation2.id},2,7\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + end + end end