From 9f0d9da9fd51e6bdca26cc3d20f403f857f83d94 Mon Sep 17 00:00:00 2001 From: Kat Date: Fri, 2 Aug 2024 10:26:05 +0100 Subject: [PATCH] Add users export service --- app/services/exports/export_service.rb | 1 - app/services/exports/user_export_constants.rb | 18 ++ app/services/exports/user_export_service.rb | 156 +++++++++++++ spec/fixtures/exports/user.xml | 17 ++ .../lettings_log_export_service_spec.rb | 6 +- .../exports/user_export_service_spec.rb | 219 ++++++++++++++++++ 6 files changed, 413 insertions(+), 4 deletions(-) create mode 100644 app/services/exports/user_export_constants.rb create mode 100644 app/services/exports/user_export_service.rb create mode 100644 spec/fixtures/exports/user.xml create mode 100644 spec/services/exports/user_export_service_spec.rb diff --git a/app/services/exports/export_service.rb b/app/services/exports/export_service.rb index ddb6ea58c..095a812f8 100644 --- a/app/services/exports/export_service.rb +++ b/app/services/exports/export_service.rb @@ -1,6 +1,5 @@ module Exports class ExportService - include Exports::LettingsLogExportConstants include CollectionTimeHelper def initialize(storage_service, logger = Rails.logger) diff --git a/app/services/exports/user_export_constants.rb b/app/services/exports/user_export_constants.rb new file mode 100644 index 000000000..9ce5840d9 --- /dev/null +++ b/app/services/exports/user_export_constants.rb @@ -0,0 +1,18 @@ +module Exports::UserExportConstants + MAX_XML_RECORDS = 10_000 + + EXPORT_FIELDS = Set[ + "id", + "email", + "name", + "phone", + "organisation_id", + "organisation_name", + "role", + "is_dpo", + "is_key_contact", + "active", + "sign_in_count", + "last_sign_in_at", + ] +end diff --git a/app/services/exports/user_export_service.rb b/app/services/exports/user_export_service.rb new file mode 100644 index 000000000..d9f06451e --- /dev/null +++ b/app/services/exports/user_export_service.rb @@ -0,0 +1,156 @@ +module Exports + class UserExportService + include Exports::UserExportConstants + include CollectionTimeHelper + + def initialize(storage_service, start_time, logger = Rails.logger) + @storage_service = storage_service + @logger = logger + @start_time = start_time + end + + def export_xml_users(full_update: false) + recent_export = LogsExport.order("started_at").last + + collection = "users" + base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1 + export = build_export_run(collection, base_number, full_update) + archives_for_manifest = write_export_archive(export, collection, recent_export, full_update) + + export.empty_export = archives_for_manifest.empty? + export.save! + + archives_for_manifest + end + + private + + def build_export_run(collection, base_number, full_update) + @logger.info("Building export run for #{collection}") + previous_exports_with_data = LogsExport.where(collection:, empty_export: false) + + increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1 + + if full_update + base_number += 1 if LogsExport.any? # Only increment when it's not the first run + increment_number = 1 + else + increment_number += 1 + end + + if previous_exports_with_data.empty? + return LogsExport.new(collection:, base_number:, started_at: @start_time) + end + + LogsExport.new(collection:, started_at: @start_time, base_number:, increment_number:) + end + + def get_archive_name(collection, base_number, increment) + return unless collection + + base_number_str = "f#{base_number.to_s.rjust(4, '0')}" + increment_str = "inc#{increment.to_s.rjust(4, '0')}" + "core_#{collection}_#{current_collection_start_year}_#{current_collection_start_year + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase + end + + def write_export_archive(export, collection, recent_export, full_update) + archive = get_archive_name(collection, export.base_number, export.increment_number) + + initial_users_count = retrieve_users(recent_export, full_update).count + @logger.info("Creating #{archive} - #{initial_users_count} users") + return {} if initial_users_count.zero? + + zip_file = Zip::File.open_buffer(StringIO.new) + + part_number = 1 + last_processed_marker = nil + users_count_after_export = 0 + + loop do + users_slice = if last_processed_marker.present? + retrieve_users(recent_export, full_update) + .where("created_at > ?", last_processed_marker) + .order(:created_at) + .limit(MAX_XML_RECORDS).to_a + else + retrieve_users(recent_export, full_update) + .order(:created_at) + .limit(MAX_XML_RECORDS).to_a + end + + break if users_slice.empty? + + data_xml = build_export_xml(users_slice) + part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" + zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) + part_number += 1 + last_processed_marker = users_slice.last.created_at + users_count_after_export += users_slice.count + @logger.info("Added #{archive}_#{part_number_str}.xml") + end + + manifest_xml = build_manifest_xml(users_count_after_export) + zip_file.add("manifest.xml", manifest_xml) + + # Required by S3 to avoid Aws::S3::Errors::BadDigest + zip_io = zip_file.write_buffer + zip_io.rewind + @logger.info("Writing #{archive}.zip") + @storage_service.write_file("#{archive}.zip", zip_io) + { archive => Time.zone.now } + end + + def retrieve_users(recent_export, full_update) + if !full_update && recent_export + params = { from: recent_export.started_at, to: @start_time } + User.where("(updated_at >= :from AND updated_at <= :to)", params) + else + params = { to: @start_time } + User.where("updated_at <= :to", params) + end + end + + def xml_doc_to_temp_file(xml_doc) + file = Tempfile.new + xml_doc.write_xml_to(file, encoding: "UTF-8") + file.rewind + file + end + + def build_manifest_xml(record_number) + doc = Nokogiri::XML("") + doc.at("report") << doc.create_element("form-data-summary") + doc.at("form-data-summary") << doc.create_element("records") + doc.at("records") << doc.create_element("count-of-records", record_number) + + xml_doc_to_temp_file(doc) + end + + def apply_cds_transformation(user) + attribute_hash = user.attributes_before_type_cast + attribute_hash["role"] = user.role + attribute_hash["organisation_name"] = user.organisation.name + attribute_hash["active"] = user.active? + attribute_hash + end + + def build_export_xml(users) + doc = Nokogiri::XML("") + + users.each do |user| + attribute_hash = apply_cds_transformation(user) + form = doc.create_element("form") + doc.at("forms") << form + attribute_hash.each do |key, value| + if !EXPORT_FIELDS.include?(key) + next + else + form << doc.create_element(key, value) + end + end + end + + xml_doc_to_temp_file(doc) + end + end +end diff --git a/spec/fixtures/exports/user.xml b/spec/fixtures/exports/user.xml new file mode 100644 index 000000000..98226c556 --- /dev/null +++ b/spec/fixtures/exports/user.xml @@ -0,0 +1,17 @@ + + +
+ {id} + {email} + Danny Rojas + {organisation_id} + 5 + + data_provider + 1234512345123 + false + false + true + MHCLG + +
diff --git a/spec/services/exports/lettings_log_export_service_spec.rb b/spec/services/exports/lettings_log_export_service_spec.rb index 75192a1ec..58b0442f4 100644 --- a/spec/services/exports/lettings_log_export_service_spec.rb +++ b/spec/services/exports/lettings_log_export_service_spec.rb @@ -11,8 +11,6 @@ RSpec.describe Exports::LettingsLogExportService do let(:real_2021_2022_form) { Form.new("config/forms/2021_2022.json") } let(:real_2022_2023_form) { Form.new("config/forms/2022_2023.json") } - let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" } - let(:expected_master_manifest_rerun) { "Manifest_2022_05_01_0002.csv" } let(:expected_zip_filename) { "core_2021_2022_apr_mar_f0001_inc0001.zip" } let(:expected_data_filename) { "core_2021_2022_apr_mar_f0001_inc0001_pt001.xml" } let(:expected_manifest_filename) { "manifest.xml" } @@ -50,6 +48,7 @@ RSpec.describe Exports::LettingsLogExportService do context "when exporting daily lettings logs in XML" do context "and no lettings logs is available for export" do it "returns an empty archives list" do + expect(storage_service).not_to receive(:write_file) expect(export_service.export_xml_lettings_logs).to eq({}) end end @@ -74,7 +73,7 @@ RSpec.describe Exports::LettingsLogExportService do end it "returns empty archives list for archives manifest" do - export_service.export_xml_lettings_logs + expect(storage_service).not_to receive(:write_file) expect(export_service.export_xml_lettings_logs).to eq({}) end end @@ -333,6 +332,7 @@ RSpec.describe Exports::LettingsLogExportService do end it "does not add any entry for the master manifest (no lettings logs)" do + expect(storage_service).not_to receive(:write_file) expect(export_service.export_xml_lettings_logs).to eq({}) end end diff --git a/spec/services/exports/user_export_service_spec.rb b/spec/services/exports/user_export_service_spec.rb new file mode 100644 index 000000000..b3a19e0f7 --- /dev/null +++ b/spec/services/exports/user_export_service_spec.rb @@ -0,0 +1,219 @@ +require "rails_helper" + +RSpec.describe Exports::UserExportService do + subject(:export_service) { described_class.new(storage_service, start_time) } + + let(:storage_service) { instance_double(Storage::S3Service) } + + let(:xml_export_file) { File.open("spec/fixtures/exports/user.xml", "r:UTF-8") } + let(:local_manifest_file) { File.open("spec/fixtures/exports/manifest.xml", "r:UTF-8") } + + let(:expected_zip_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001.zip" } + let(:expected_data_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001_pt001.xml" } + let(:expected_manifest_filename) { "manifest.xml" } + let(:start_time) { Time.zone.local(2022, 5, 1) } + let(:organisation) { create(:organisation, with_dsa: false) } + + def replace_entity_ids(user, export_template) + export_template.sub!(/\{id\}/, user["id"].to_s) + export_template.sub!(/\{organisation_id\}/, user["organisation_id"].to_s) + export_template.sub!(/\{email\}/, user["email"].to_s) + end + + def replace_record_number(export_template, record_number) + export_template.sub!(/\{recno\}/, record_number.to_s) + end + + before do + Timecop.freeze(start_time) + Singleton.__init__(FormHandler) + allow(storage_service).to receive(:write_file) + end + + after do + Timecop.return + end + + context "when exporting daily users in XML" do + context "and no users are available for export" do + it "returns an empty archives list" do + expect(export_service.export_xml_users).to eq({}) + end + end + + context "and one user is available for export" do + let!(:user) { create(:user, organisation:) } + + it "generates a ZIP export file with the expected filename" do + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) + export_service.export_xml_users + end + + it "generates an XML export file with the expected filename within the ZIP file" do + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) + expect(entry).not_to be_nil + expect(entry.name).to eq(expected_data_filename) + end + export_service.export_xml_users + end + + it "generates an XML manifest file with the expected content within the ZIP file" do + expected_content = replace_record_number(local_manifest_file.read, 1) + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) + expect(entry).not_to be_nil + expect(entry.get_input_stream.read).to eq(expected_content) + end + + export_service.export_xml_users + end + + it "generates an XML export file with the expected content within the ZIP file" do + expected_content = replace_entity_ids(user, xml_export_file.read) + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) + expect(entry).not_to be_nil + expect(entry.get_input_stream.read).to eq(expected_content) + end + + export_service.export_xml_users + end + + it "returns the list with correct archive" do + expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time }) + end + end + + context "and multiple users are available for export" do + before do + create(:user, organisation:) + create(:user, organisation:) + end + + it "generates an XML manifest file with the expected content within the ZIP file" do + expected_content = replace_record_number(local_manifest_file.read, 2) + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) + expect(entry).not_to be_nil + expect(entry.get_input_stream.read).to eq(expected_content) + end + + export_service.export_xml_users + end + + it "creates an export record in a database with correct time" do + expect { export_service.export_xml_users } + .to change(LogsExport, :count).by(1) + expect(LogsExport.last.started_at).to be_within(2.seconds).of(start_time) + end + + context "when this is the first export (full)" do + it "returns a ZIP archive for the master manifest (existing lettings logs)" do + expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "").gsub(".zip", "") => start_time }) + end + end + + context "and underlying data changes between getting the logs and writting the manifest" do + def remove_users(users) + users.each(&:destroy) + file = Tempfile.new + doc = Nokogiri::XML("") + doc.write_xml_to(file, encoding: "UTF-8") + file.rewind + file + end + + def create_fake_maifest + file = Tempfile.new + doc = Nokogiri::XML("") + doc.write_xml_to(file, encoding: "UTF-8") + file.rewind + file + end + + it "maintains the same record number" do + # rubocop:disable RSpec/SubjectStub + allow(export_service).to receive(:build_export_xml) do |users| + remove_users(users) + end + allow(export_service).to receive(:build_manifest_xml) do + create_fake_maifest + end + + expect(export_service).to receive(:build_manifest_xml).with(2) + # rubocop:enable RSpec/SubjectStub + export_service.export_xml_users + end + end + + context "when this is a second export (partial)" do + before do + start_time = Time.zone.local(2022, 6, 1) + LogsExport.new(started_at: start_time).save! # this should be user export + end + + it "does not add any entry for the master manifest (no users)" do + expect(export_service.export_xml_users).to eq({}) + end + end + end + + context "and a previous export has run the same day having users" do + before do + create(:user, organisation:) + export_service.export_xml_users + end + + context "and we trigger another full update" do + it "increments the base number" do + export_service.export_xml_users(full_update: true) + expect(LogsExport.last.base_number).to eq(2) + end + + it "resets the increment number" do + export_service.export_xml_users(full_update: true) + expect(LogsExport.last.increment_number).to eq(1) + end + + it "returns a correct archives list for manifest file" do + expect(export_service.export_xml_users(full_update: true)).to eq({ "core_users_2022_2023_apr_mar_f0002_inc0001" => start_time }) + end + + it "generates a ZIP export file with the expected filename" do + expect(storage_service).to receive(:write_file).with("core_users_2022_2023_apr_mar_f0002_inc0001.zip", any_args) + export_service.export_xml_users(full_update: true) + end + end + end + + context "and a previous export has run having no users" do + before { export_service.export_xml_users } + + it "doesn't increment the manifest number by 1" do + export_service.export_xml_users + + expect(LogsExport.last.increment_number).to eq(1) + end + end + + context "and a user has been migrated since the previous partial export" do + before do + create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:) + create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:) + LogsExport.create!(started_at: Time.zone.local(2022, 4, 26), base_number: 1, increment_number: 1) + end + + it "generates an XML manifest file with the expected content within the ZIP file" do + expected_content = replace_record_number(local_manifest_file.read, 2) + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) + expect(entry).not_to be_nil + expect(entry.get_input_stream.read).to eq(expected_content) + end + + expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time }) + end + end + end +end