Browse Source

Update location duplicate count

pull/2645/head
Kat 2 years ago committed by kosiakkatrina
parent
commit
94a0ee267f
  1. 14
      app/models/location.rb
  2. 29
      lib/tasks/count_duplicates.rake
  3. 19
      spec/lib/tasks/count_duplicates_spec.rb

14
app/models/location.rb

@ -133,7 +133,19 @@ class Location < ApplicationRecord
scope.pluck("ARRAY_AGG(id)") scope.pluck("ARRAY_AGG(id)")
} }
DUPLICATE_LOCATION_ATTRIBUTES = %w[scheme_id postcode mobility_type].freeze scope :duplicate_sets_within_duplicate_schemes, lambda {
scope = visible
.group(*DUPLICATE_LOCATION_ATTRIBUTES - %w[scheme_id])
.where(scheme_id: nil)
.where.not(postcode: nil)
.where.not(mobility_type: nil)
.having(
"COUNT(*) > 1",
)
scope.pluck("ARRAY_AGG(id)")
}
DUPLICATE_LOCATION_ATTRIBUTES = %w[postcode mobility_type].freeze
LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h
enum local_authorities: LOCAL_AUTHORITIES enum local_authorities: LOCAL_AUTHORITIES

29
lib/tasks/count_duplicates.rake

@ -22,18 +22,33 @@ namespace :count_duplicates do
desc "Count the number of duplicate locations per organisation" desc "Count the number of duplicate locations per organisation"
task location_duplicates_per_org: :environment do task location_duplicates_per_org: :environment do
duplicates_csv = CSV.generate(headers: true) do |csv| duplicates_csv = CSV.generate(headers: true) do |csv|
csv << ["Organisation id", "Number of duplicate sets", "Total duplicate locations"] csv << ["Organisation id", "Duplicate sets within individual schemes", "Duplicate locations within individual schemes", "All duplicate sets", "All duplicates"]
Organisation.visible.each do |organisation| Organisation.visible.each do |organisation|
duplicate_sets_count = 0 duplicate_sets_within_individual_schemes = []
total_duplicate_locations = 0
organisation.owned_schemes.each do |scheme| organisation.owned_schemes.each do |scheme|
duplicate_sets_count += scheme.locations.duplicate_sets.count duplicate_sets_within_individual_schemes += scheme.locations.duplicate_sets
total_duplicate_locations += scheme.locations.duplicate_sets.sum(&:size) end
duplicate_locations_within_individual_schemes = duplicate_sets_within_individual_schemes.flatten
duplicate_sets_within_duplicate_schemes = []
if organisation.owned_schemes.duplicate_sets.count.positive?
organisation.owned_schemes.duplicate_sets.each do |duplicate_set|
duplicate_sets_within_duplicate_schemes += Location.where(scheme_id: duplicate_set).duplicate_sets
end
duplicate_locations_within_duplicate_schemes_ids = duplicate_sets_within_duplicate_schemes.flatten
duplicate_sets_within_individual_schemes_without_intersecting_sets = duplicate_sets_within_individual_schemes.reject { |set| set.any? { |id| duplicate_sets_within_duplicate_schemes.any? { |duplicate_set| duplicate_set.include?(id) } } }
all_duplicate_sets_count = (duplicate_sets_within_individual_schemes_without_intersecting_sets + duplicate_sets_within_duplicate_schemes).count
all_duplicate_locations_count = (duplicate_locations_within_duplicate_schemes_ids + duplicate_locations_within_individual_schemes).uniq.count
else
all_duplicate_sets_count = duplicate_sets_within_individual_schemes.count
all_duplicate_locations_count = duplicate_locations_within_individual_schemes.count
end end
if duplicate_sets_count.positive? if all_duplicate_locations_count.positive?
csv << [organisation.id, duplicate_sets_count, total_duplicate_locations] csv << [organisation.id, duplicate_sets_within_individual_schemes.count, duplicate_locations_within_individual_schemes.count, all_duplicate_sets_count, all_duplicate_locations_count]
end end
end end
end end

19
spec/lib/tasks/count_duplicates_spec.rb

@ -71,7 +71,7 @@ RSpec.describe "count_duplicates" do
end end
it "creates a csv with headers only" do it "creates a csv with headers only" do
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate locations\n") expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke task.invoke
end end
@ -79,20 +79,29 @@ RSpec.describe "count_duplicates" do
context "and there are duplicate locations" do context "and there are duplicate locations" do
let(:organisation) { create(:organisation) } let(:organisation) { create(:organisation) }
let(:scheme) { create(:scheme, owning_organisation: organisation) } let(:scheme_a) { create(:scheme, :duplicate, owning_organisation: organisation) }
let(:scheme_b) { create(:scheme, :duplicate, owning_organisation: organisation) }
let(:scheme_c) { create(:scheme, owning_organisation: organisation) }
let(:organisation2) { create(:organisation) } let(:organisation2) { create(:organisation) }
let(:scheme2) { create(:scheme, owning_organisation: organisation2) } let(:scheme2) { create(:scheme, owning_organisation: organisation2) }
let(:scheme3) { create(:scheme, owning_organisation: organisation2) } let(:scheme3) { create(:scheme, owning_organisation: organisation2) }
before do before do
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme:) create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_a) # Location A
create_list(:location, 3, postcode: "A1 1AB", mobility_type: "A", scheme:) create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_a) # Location B
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) # Location A
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_b) # Location B
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "N", scheme: scheme_b) # Location C
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_c) # Location B
create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2) create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2)
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3) create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3)
end end
it "creates a csv with correct duplicate numbers" do it "creates a csv with correct duplicate numbers" do
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate locations\n#{organisation.id},2,5\n#{organisation2.id},2,7\n") expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n#{organisation.id},3,6,4,9\n#{organisation2.id},2,7,2,7\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke task.invoke
end end

Loading…
Cancel
Save