diff --git a/app/models/location.rb b/app/models/location.rb index 6f2963e39..728caf49d 100644 --- a/app/models/location.rb +++ b/app/models/location.rb @@ -133,7 +133,19 @@ class Location < ApplicationRecord scope.pluck("ARRAY_AGG(id)") } - DUPLICATE_LOCATION_ATTRIBUTES = %w[scheme_id postcode mobility_type].freeze + scope :duplicate_sets_within_duplicate_schemes, lambda { + scope = visible + .group(*DUPLICATE_LOCATION_ATTRIBUTES - %w[scheme_id]) + .where(scheme_id: nil) + .where.not(postcode: nil) + .where.not(mobility_type: nil) + .having( + "COUNT(*) > 1", + ) + scope.pluck("ARRAY_AGG(id)") + } + + DUPLICATE_LOCATION_ATTRIBUTES = %w[postcode mobility_type].freeze LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h enum local_authorities: LOCAL_AUTHORITIES diff --git a/lib/tasks/count_duplicates.rake b/lib/tasks/count_duplicates.rake index 138dc09f9..ef4f53ec2 100644 --- a/lib/tasks/count_duplicates.rake +++ b/lib/tasks/count_duplicates.rake @@ -22,18 +22,33 @@ namespace :count_duplicates do desc "Count the number of duplicate locations per organisation" task location_duplicates_per_org: :environment do duplicates_csv = CSV.generate(headers: true) do |csv| - csv << ["Organisation id", "Number of duplicate sets", "Total duplicate locations"] + csv << ["Organisation id", "Duplicate sets within individual schemes", "Duplicate locations within individual schemes", "All duplicate sets", "All duplicates"] Organisation.visible.each do |organisation| - duplicate_sets_count = 0 - total_duplicate_locations = 0 + duplicate_sets_within_individual_schemes = [] + organisation.owned_schemes.each do |scheme| - duplicate_sets_count += scheme.locations.duplicate_sets.count - total_duplicate_locations += scheme.locations.duplicate_sets.sum(&:size) + duplicate_sets_within_individual_schemes += scheme.locations.duplicate_sets + end + duplicate_locations_within_individual_schemes = duplicate_sets_within_individual_schemes.flatten + + duplicate_sets_within_duplicate_schemes = [] + if organisation.owned_schemes.duplicate_sets.count.positive? + organisation.owned_schemes.duplicate_sets.each do |duplicate_set| + duplicate_sets_within_duplicate_schemes += Location.where(scheme_id: duplicate_set).duplicate_sets + end + duplicate_locations_within_duplicate_schemes_ids = duplicate_sets_within_duplicate_schemes.flatten + + duplicate_sets_within_individual_schemes_without_intersecting_sets = duplicate_sets_within_individual_schemes.reject { |set| set.any? { |id| duplicate_sets_within_duplicate_schemes.any? { |duplicate_set| duplicate_set.include?(id) } } } + all_duplicate_sets_count = (duplicate_sets_within_individual_schemes_without_intersecting_sets + duplicate_sets_within_duplicate_schemes).count + all_duplicate_locations_count = (duplicate_locations_within_duplicate_schemes_ids + duplicate_locations_within_individual_schemes).uniq.count + else + all_duplicate_sets_count = duplicate_sets_within_individual_schemes.count + all_duplicate_locations_count = duplicate_locations_within_individual_schemes.count end - if duplicate_sets_count.positive? - csv << [organisation.id, duplicate_sets_count, total_duplicate_locations] + if all_duplicate_locations_count.positive? + csv << [organisation.id, duplicate_sets_within_individual_schemes.count, duplicate_locations_within_individual_schemes.count, all_duplicate_sets_count, all_duplicate_locations_count] end end end diff --git a/spec/lib/tasks/count_duplicates_spec.rb b/spec/lib/tasks/count_duplicates_spec.rb index be1b0351e..99da5b2fb 100644 --- a/spec/lib/tasks/count_duplicates_spec.rb +++ b/spec/lib/tasks/count_duplicates_spec.rb @@ -71,7 +71,7 @@ RSpec.describe "count_duplicates" do end it "creates a csv with headers only" do - expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate locations\n") + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n") expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") task.invoke end @@ -79,20 +79,29 @@ RSpec.describe "count_duplicates" do context "and there are duplicate locations" do let(:organisation) { create(:organisation) } - let(:scheme) { create(:scheme, owning_organisation: organisation) } + let(:scheme_a) { create(:scheme, :duplicate, owning_organisation: organisation) } + let(:scheme_b) { create(:scheme, :duplicate, owning_organisation: organisation) } + let(:scheme_c) { create(:scheme, owning_organisation: organisation) } let(:organisation2) { create(:organisation) } let(:scheme2) { create(:scheme, owning_organisation: organisation2) } let(:scheme3) { create(:scheme, owning_organisation: organisation2) } before do - create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme:) - create_list(:location, 3, postcode: "A1 1AB", mobility_type: "A", scheme:) + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_a) # Location A + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_a) # Location B + + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) # Location A + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_b) # Location B + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "N", scheme: scheme_b) # Location C + + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_c) # Location B + create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2) create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3) end it "creates a csv with correct duplicate numbers" do - expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate locations\n#{organisation.id},2,5\n#{organisation2.id},2,7\n") + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n#{organisation.id},3,6,4,9\n#{organisation2.id},2,7,2,7\n") expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") task.invoke end