diff --git a/app/models/location.rb b/app/models/location.rb index 19cf5e211..03af24a94 100644 --- a/app/models/location.rb +++ b/app/models/location.rb @@ -121,6 +121,30 @@ class Location < ApplicationRecord scope :visible, -> { where(discarded_at: nil) } + scope :duplicate_sets, lambda { + scope = visible + .group(*DUPLICATE_LOCATION_ATTRIBUTES) + .where.not(scheme_id: nil) + .where.not(postcode: nil) + .where.not(mobility_type: nil) + .having( + "COUNT(*) > 1", + ) + scope.pluck("ARRAY_AGG(id)") + } + + scope :duplicate_sets_within_given_schemes, lambda { + scope = visible + .group(*DUPLICATE_LOCATION_ATTRIBUTES - %w[scheme_id]) + .where.not(postcode: nil) + .where.not(mobility_type: nil) + .having( + "COUNT(*) > 1", + ) + scope.pluck("ARRAY_AGG(id)") + } + + DUPLICATE_LOCATION_ATTRIBUTES = %w[scheme_id postcode mobility_type].freeze LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h enum local_authorities: LOCAL_AUTHORITIES diff --git a/app/models/scheme.rb b/app/models/scheme.rb index 6d3524723..2c73acc06 100644 --- a/app/models/scheme.rb +++ b/app/models/scheme.rb @@ -103,6 +103,22 @@ class Scheme < ApplicationRecord scope :visible, -> { where(discarded_at: nil) } + scope :duplicate_sets, lambda { + scope = visible + .group(*DUPLICATE_SCHEME_ATTRIBUTES) + .where.not(scheme_type: nil) + .where.not(registered_under_care_act: nil) + .where.not(primary_client_group: nil) + .where.not(has_other_client_group: nil) + .where.not(secondary_client_group: nil).or(where(has_other_client_group: 0)) + .where.not(support_type: nil) + .where.not(intended_stay: nil) + .having( + "COUNT(*) > 1", + ) + scope.pluck("ARRAY_AGG(id)") + } + validate :validate_confirmed validate :validate_owning_organisation @@ -192,6 +208,8 @@ class Scheme < ApplicationRecord "Missing": "X", }.freeze + DUPLICATE_SCHEME_ATTRIBUTES = %w[scheme_type registered_under_care_act primary_client_group secondary_client_group has_other_client_group support_type intended_stay].freeze + enum arrangement_type: ARRANGEMENT_TYPE, _suffix: true def self.find_by_id_on_multiple_fields(scheme_id, location_id) diff --git a/lib/tasks/count_duplicates.rake b/lib/tasks/count_duplicates.rake new file mode 100644 index 000000000..e65688b4d --- /dev/null +++ b/lib/tasks/count_duplicates.rake @@ -0,0 +1,63 @@ +namespace :count_duplicates do + desc "Count the number of duplicate schemes per organisation" + task scheme_duplicates_per_org: :environment do + duplicates_csv = CSV.generate(headers: true) do |csv| + csv << ["Organisation id", "Number of duplicate sets", "Total duplicate schemes"] + + Organisation.visible.each do |organisation| + if organisation.owned_schemes.duplicate_sets.count.positive? + csv << [organisation.id, organisation.owned_schemes.duplicate_sets.count, organisation.owned_schemes.duplicate_sets.sum(&:size)] + end + end + end + + filename = "scheme-duplicates-#{Time.zone.now}.csv" + storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) + storage_service.write_file(filename, "#{duplicates_csv}") + + url = storage_service.get_presigned_url(filename, 72.hours.to_i) + Rails.logger.info("Download URL: #{url}") + end + + desc "Count the number of duplicate locations per organisation" + task location_duplicates_per_org: :environment do + duplicates_csv = CSV.generate(headers: true) do |csv| + csv << ["Organisation id", "Duplicate sets within individual schemes", "Duplicate locations within individual schemes", "All duplicate sets", "All duplicates"] + + Organisation.visible.each do |organisation| + duplicate_sets_within_individual_schemes = [] + + organisation.owned_schemes.each do |scheme| + duplicate_sets_within_individual_schemes += scheme.locations.duplicate_sets + end + duplicate_locations_within_individual_schemes = duplicate_sets_within_individual_schemes.flatten + + duplicate_sets_within_duplicate_schemes = [] + if organisation.owned_schemes.duplicate_sets.count.positive? + organisation.owned_schemes.duplicate_sets.each do |duplicate_set| + duplicate_sets_within_duplicate_schemes += Location.where(scheme_id: duplicate_set).duplicate_sets_within_given_schemes + end + duplicate_locations_within_duplicate_schemes_ids = duplicate_sets_within_duplicate_schemes.flatten + + duplicate_sets_within_individual_schemes_without_intersecting_sets = duplicate_sets_within_individual_schemes.reject { |set| set.any? { |id| duplicate_sets_within_duplicate_schemes.any? { |duplicate_set| duplicate_set.include?(id) } } } + all_duplicate_sets_count = (duplicate_sets_within_individual_schemes_without_intersecting_sets + duplicate_sets_within_duplicate_schemes).count + all_duplicate_locations_count = (duplicate_locations_within_duplicate_schemes_ids + duplicate_locations_within_individual_schemes).uniq.count + else + all_duplicate_sets_count = duplicate_sets_within_individual_schemes.count + all_duplicate_locations_count = duplicate_locations_within_individual_schemes.count + end + + if all_duplicate_locations_count.positive? + csv << [organisation.id, duplicate_sets_within_individual_schemes.count, duplicate_locations_within_individual_schemes.count, all_duplicate_sets_count, all_duplicate_locations_count] + end + end + end + + filename = "location-duplicates-#{Time.zone.now}.csv" + storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) + storage_service.write_file(filename, "#{duplicates_csv}") + + url = storage_service.get_presigned_url(filename, 72.hours.to_i) + Rails.logger.info("Download URL: #{url}") + end +end diff --git a/spec/factories/scheme.rb b/spec/factories/scheme.rb index 7983bfa0a..5f4ad30bc 100644 --- a/spec/factories/scheme.rb +++ b/spec/factories/scheme.rb @@ -32,5 +32,14 @@ FactoryBot.define do confirmed { false } support_type { nil } end + trait :duplicate do + scheme_type { 4 } + registered_under_care_act { 1 } + primary_client_group { "O" } + secondary_client_group { "H" } + has_other_client_group { 1 } + support_type { 2 } + intended_stay { "M" } + end end end diff --git a/spec/lib/tasks/count_duplicates_spec.rb b/spec/lib/tasks/count_duplicates_spec.rb new file mode 100644 index 000000000..99da5b2fb --- /dev/null +++ b/spec/lib/tasks/count_duplicates_spec.rb @@ -0,0 +1,111 @@ +require "rails_helper" +require "rake" + +RSpec.describe "count_duplicates" do + before do + allow(Storage::S3Service).to receive(:new).and_return(storage_service) + allow(storage_service).to receive(:write_file) + allow(storage_service).to receive(:get_presigned_url).and_return(test_url) + end + + describe "count_duplicates:scheme_duplicates_per_org", type: :task do + subject(:task) { Rake::Task["count_duplicates:scheme_duplicates_per_org"] } + + let(:storage_service) { instance_double(Storage::S3Service) } + let(:test_url) { "test_url" } + + before do + Rake.application.rake_require("tasks/count_duplicates") + Rake::Task.define_task(:environment) + task.reenable + end + + context "when the rake task is run" do + context "and there are no duplicate schemes" do + before do + create(:organisation) + end + + it "creates a csv with headers only" do + expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + + context "and there are duplicate schemes" do + let(:organisation) { create(:organisation) } + let(:organisation2) { create(:organisation) } + + before do + create_list(:scheme, 2, :duplicate, owning_organisation: organisation) + create_list(:scheme, 3, :duplicate, primary_client_group: "I", owning_organisation: organisation) + create_list(:scheme, 5, :duplicate, owning_organisation: organisation2) + end + + it "creates a csv with correct duplicate numbers" do + expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n#{organisation.id},2,5\n#{organisation2.id},1,5\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + end + end + + describe "count_duplicates:location_duplicates_per_org", type: :task do + subject(:task) { Rake::Task["count_duplicates:location_duplicates_per_org"] } + + let(:storage_service) { instance_double(Storage::S3Service) } + let(:test_url) { "test_url" } + + before do + Rake.application.rake_require("tasks/count_duplicates") + Rake::Task.define_task(:environment) + task.reenable + end + + context "when the rake task is run" do + context "and there are no duplicate locations" do + before do + create(:organisation) + end + + it "creates a csv with headers only" do + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + + context "and there are duplicate locations" do + let(:organisation) { create(:organisation) } + let(:scheme_a) { create(:scheme, :duplicate, owning_organisation: organisation) } + let(:scheme_b) { create(:scheme, :duplicate, owning_organisation: organisation) } + let(:scheme_c) { create(:scheme, owning_organisation: organisation) } + let(:organisation2) { create(:organisation) } + let(:scheme2) { create(:scheme, owning_organisation: organisation2) } + let(:scheme3) { create(:scheme, owning_organisation: organisation2) } + + before do + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_a) # Location A + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_a) # Location B + + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) # Location A + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_b) # Location B + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "N", scheme: scheme_b) # Location C + + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_c) # Location B + + create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2) + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3) + end + + it "creates a csv with correct duplicate numbers" do + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n#{organisation.id},3,6,4,9\n#{organisation2.id},2,7,2,7\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + end + end +end diff --git a/spec/models/location_spec.rb b/spec/models/location_spec.rb index 4856c5662..79265d361 100644 --- a/spec/models/location_spec.rb +++ b/spec/models/location_spec.rb @@ -831,6 +831,76 @@ RSpec.describe Location, type: :model do expect(described_class.active.count).to eq(2) end end + + context "when getting list of duplicate locations" do + let!(:scheme) { create(:scheme) } + let!(:location) { create(:location, postcode: "AB1 2CD", mobility_type: "M", scheme:) } + let!(:duplicate_location) { create(:location, postcode: "AB1 2CD", mobility_type: "M", scheme:) } + let(:duplicate_sets) { described_class.duplicate_sets } + + it "returns a list of duplicates for the same scheme" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id) + end + + context "when there is a deleted duplicate location" do + before do + create(:location, postcode: "AB1 2CD", mobility_type: "M", discarded_at: Time.zone.now, scheme:) + end + + it "does not return the deleted location as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id) + end + end + + context "when there is a location with a different postcode" do + before do + create(:location, postcode: "A1 1AB", mobility_type: "M", scheme:) + end + + it "does not return a location with a different postcode as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id) + end + end + + context "when there is a location with a different mobility_type" do + before do + create(:location, postcode: "AB1 2CD", mobility_type: "A", scheme:) + end + + it "does not return a location with a different mobility_type as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id) + end + end + + context "when there is a location with a different scheme" do + before do + create(:location, postcode: "AB1 2CD", mobility_type: "M") + end + + it "does not return a location with a different scheme as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id) + end + end + + context "when there is a location with nil values for duplicate check fields" do + before do + [location, duplicate_location].each do |l| + l.postcode = nil + l.mobility_type = nil + l.save!(validate: false) + end + end + + it "does not return a location with nil values as a duplicate" do + expect(duplicate_sets).to be_empty + end + end + end end describe "status" do diff --git a/spec/models/scheme_spec.rb b/spec/models/scheme_spec.rb index 9b3db15a4..5ca529d3e 100644 --- a/spec/models/scheme_spec.rb +++ b/spec/models/scheme_spec.rb @@ -208,6 +208,135 @@ RSpec.describe Scheme, type: :model do end end end + + context "when getting list of duplicate schemes" do + let(:organisation) { create(:organisation) } + let!(:scheme) { create(:scheme, :duplicate, owning_organisation: organisation) } + let!(:duplicate_scheme) { create(:scheme, :duplicate, owning_organisation: organisation) } + let(:duplicate_sets) { described_class.duplicate_sets } + + it "returns a list of duplicates in the same organisation" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + + context "when there is a deleted duplicate scheme" do + before do + create(:scheme, :duplicate, discarded_at: Time.zone.now) + end + + it "does not return the deleted scheme as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with a different scheme_type" do + before do + create(:scheme, :duplicate, scheme_type: 7) + end + + it "does not return a scheme with a different scheme_type as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with a different registered_under_care_act" do + before do + create(:scheme, :duplicate, registered_under_care_act: 2) + end + + it "does not return a scheme with a different registered_under_care_act as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with a different primary_client_group" do + before do + create(:scheme, :duplicate, primary_client_group: "H") + end + + it "does not return a scheme with a different primary_client_group as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with a different secondary_client_group" do + before do + create(:scheme, :duplicate, secondary_client_group: "O") + end + + it "does not return a scheme with a different secondary_client_group as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with a different has_other_client_group" do + before do + create(:scheme, :duplicate, has_other_client_group: 0) + end + + it "does not return a scheme with a different has_other_client_group as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with a different support_type" do + before do + create(:scheme, :duplicate, support_type: 4) + end + + it "does not return a scheme with a different support_type as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with a different intended_stay" do + before do + create(:scheme, :duplicate, intended_stay: "P") + end + + it "does not return a scheme with a different intended_stay as a duplicate" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + + context "when there is a scheme with nil values for duplicate check fields" do + before do + [scheme, duplicate_scheme].each do |s| + s.scheme_type = nil + s.registered_under_care_act = nil + s.primary_client_group = nil + s.secondary_client_group = nil + s.has_other_client_group = nil + s.support_type = nil + s.intended_stay = nil + s.save!(validate: false) + end + end + + it "does not return a scheme with nil values as a duplicate" do + expect(duplicate_sets).to be_empty + end + end + + context "when there are duplicate schemes without secondary client group" do + let!(:scheme) { create(:scheme, :duplicate, owning_organisation: organisation, secondary_client_group: nil, has_other_client_group: 0) } + let!(:duplicate_scheme) { create(:scheme, :duplicate, owning_organisation: organisation, secondary_client_group: nil, has_other_client_group: 0) } + + it "does not returns the duplicates" do + expect(duplicate_sets.count).to eq(1) + expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id) + end + end + end end end