diff --git a/app/services/exports/export_service.rb b/app/services/exports/export_service.rb
index ddb6ea58c..095a812f8 100644
--- a/app/services/exports/export_service.rb
+++ b/app/services/exports/export_service.rb
@@ -1,6 +1,5 @@
module Exports
class ExportService
- include Exports::LettingsLogExportConstants
include CollectionTimeHelper
def initialize(storage_service, logger = Rails.logger)
diff --git a/app/services/exports/user_export_constants.rb b/app/services/exports/user_export_constants.rb
new file mode 100644
index 000000000..9ce5840d9
--- /dev/null
+++ b/app/services/exports/user_export_constants.rb
@@ -0,0 +1,18 @@
+module Exports::UserExportConstants
+ MAX_XML_RECORDS = 10_000
+
+ EXPORT_FIELDS = Set[
+ "id",
+ "email",
+ "name",
+ "phone",
+ "organisation_id",
+ "organisation_name",
+ "role",
+ "is_dpo",
+ "is_key_contact",
+ "active",
+ "sign_in_count",
+ "last_sign_in_at",
+ ]
+end
diff --git a/app/services/exports/user_export_service.rb b/app/services/exports/user_export_service.rb
new file mode 100644
index 000000000..d9f06451e
--- /dev/null
+++ b/app/services/exports/user_export_service.rb
@@ -0,0 +1,156 @@
+module Exports
+ class UserExportService
+ include Exports::UserExportConstants
+ include CollectionTimeHelper
+
+ def initialize(storage_service, start_time, logger = Rails.logger)
+ @storage_service = storage_service
+ @logger = logger
+ @start_time = start_time
+ end
+
+ def export_xml_users(full_update: false)
+ recent_export = LogsExport.order("started_at").last
+
+ collection = "users"
+ base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1
+ export = build_export_run(collection, base_number, full_update)
+ archives_for_manifest = write_export_archive(export, collection, recent_export, full_update)
+
+ export.empty_export = archives_for_manifest.empty?
+ export.save!
+
+ archives_for_manifest
+ end
+
+ private
+
+ def build_export_run(collection, base_number, full_update)
+ @logger.info("Building export run for #{collection}")
+ previous_exports_with_data = LogsExport.where(collection:, empty_export: false)
+
+ increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1
+
+ if full_update
+ base_number += 1 if LogsExport.any? # Only increment when it's not the first run
+ increment_number = 1
+ else
+ increment_number += 1
+ end
+
+ if previous_exports_with_data.empty?
+ return LogsExport.new(collection:, base_number:, started_at: @start_time)
+ end
+
+ LogsExport.new(collection:, started_at: @start_time, base_number:, increment_number:)
+ end
+
+ def get_archive_name(collection, base_number, increment)
+ return unless collection
+
+ base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
+ increment_str = "inc#{increment.to_s.rjust(4, '0')}"
+ "core_#{collection}_#{current_collection_start_year}_#{current_collection_start_year + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
+ end
+
+ def write_export_archive(export, collection, recent_export, full_update)
+ archive = get_archive_name(collection, export.base_number, export.increment_number)
+
+ initial_users_count = retrieve_users(recent_export, full_update).count
+ @logger.info("Creating #{archive} - #{initial_users_count} users")
+ return {} if initial_users_count.zero?
+
+ zip_file = Zip::File.open_buffer(StringIO.new)
+
+ part_number = 1
+ last_processed_marker = nil
+ users_count_after_export = 0
+
+ loop do
+ users_slice = if last_processed_marker.present?
+ retrieve_users(recent_export, full_update)
+ .where("created_at > ?", last_processed_marker)
+ .order(:created_at)
+ .limit(MAX_XML_RECORDS).to_a
+ else
+ retrieve_users(recent_export, full_update)
+ .order(:created_at)
+ .limit(MAX_XML_RECORDS).to_a
+ end
+
+ break if users_slice.empty?
+
+ data_xml = build_export_xml(users_slice)
+ part_number_str = "pt#{part_number.to_s.rjust(3, '0')}"
+ zip_file.add("#{archive}_#{part_number_str}.xml", data_xml)
+ part_number += 1
+ last_processed_marker = users_slice.last.created_at
+ users_count_after_export += users_slice.count
+ @logger.info("Added #{archive}_#{part_number_str}.xml")
+ end
+
+ manifest_xml = build_manifest_xml(users_count_after_export)
+ zip_file.add("manifest.xml", manifest_xml)
+
+ # Required by S3 to avoid Aws::S3::Errors::BadDigest
+ zip_io = zip_file.write_buffer
+ zip_io.rewind
+ @logger.info("Writing #{archive}.zip")
+ @storage_service.write_file("#{archive}.zip", zip_io)
+ { archive => Time.zone.now }
+ end
+
+ def retrieve_users(recent_export, full_update)
+ if !full_update && recent_export
+ params = { from: recent_export.started_at, to: @start_time }
+ User.where("(updated_at >= :from AND updated_at <= :to)", params)
+ else
+ params = { to: @start_time }
+ User.where("updated_at <= :to", params)
+ end
+ end
+
+ def xml_doc_to_temp_file(xml_doc)
+ file = Tempfile.new
+ xml_doc.write_xml_to(file, encoding: "UTF-8")
+ file.rewind
+ file
+ end
+
+ def build_manifest_xml(record_number)
+ doc = Nokogiri::XML("")
+ doc.at("report") << doc.create_element("form-data-summary")
+ doc.at("form-data-summary") << doc.create_element("records")
+ doc.at("records") << doc.create_element("count-of-records", record_number)
+
+ xml_doc_to_temp_file(doc)
+ end
+
+ def apply_cds_transformation(user)
+ attribute_hash = user.attributes_before_type_cast
+ attribute_hash["role"] = user.role
+ attribute_hash["organisation_name"] = user.organisation.name
+ attribute_hash["active"] = user.active?
+ attribute_hash
+ end
+
+ def build_export_xml(users)
+ doc = Nokogiri::XML("")
+
+ users.each do |user|
+ attribute_hash = apply_cds_transformation(user)
+ form = doc.create_element("form")
+ doc.at("forms") << form
+ attribute_hash.each do |key, value|
+ if !EXPORT_FIELDS.include?(key)
+ next
+ else
+ form << doc.create_element(key, value)
+ end
+ end
+ end
+
+ xml_doc_to_temp_file(doc)
+ end
+ end
+end
diff --git a/spec/fixtures/exports/user.xml b/spec/fixtures/exports/user.xml
new file mode 100644
index 000000000..98226c556
--- /dev/null
+++ b/spec/fixtures/exports/user.xml
@@ -0,0 +1,17 @@
+
+
+
+
diff --git a/spec/services/exports/lettings_log_export_service_spec.rb b/spec/services/exports/lettings_log_export_service_spec.rb
index 75192a1ec..58b0442f4 100644
--- a/spec/services/exports/lettings_log_export_service_spec.rb
+++ b/spec/services/exports/lettings_log_export_service_spec.rb
@@ -11,8 +11,6 @@ RSpec.describe Exports::LettingsLogExportService do
let(:real_2021_2022_form) { Form.new("config/forms/2021_2022.json") }
let(:real_2022_2023_form) { Form.new("config/forms/2022_2023.json") }
- let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" }
- let(:expected_master_manifest_rerun) { "Manifest_2022_05_01_0002.csv" }
let(:expected_zip_filename) { "core_2021_2022_apr_mar_f0001_inc0001.zip" }
let(:expected_data_filename) { "core_2021_2022_apr_mar_f0001_inc0001_pt001.xml" }
let(:expected_manifest_filename) { "manifest.xml" }
@@ -50,6 +48,7 @@ RSpec.describe Exports::LettingsLogExportService do
context "when exporting daily lettings logs in XML" do
context "and no lettings logs is available for export" do
it "returns an empty archives list" do
+ expect(storage_service).not_to receive(:write_file)
expect(export_service.export_xml_lettings_logs).to eq({})
end
end
@@ -74,7 +73,7 @@ RSpec.describe Exports::LettingsLogExportService do
end
it "returns empty archives list for archives manifest" do
- export_service.export_xml_lettings_logs
+ expect(storage_service).not_to receive(:write_file)
expect(export_service.export_xml_lettings_logs).to eq({})
end
end
@@ -333,6 +332,7 @@ RSpec.describe Exports::LettingsLogExportService do
end
it "does not add any entry for the master manifest (no lettings logs)" do
+ expect(storage_service).not_to receive(:write_file)
expect(export_service.export_xml_lettings_logs).to eq({})
end
end
diff --git a/spec/services/exports/user_export_service_spec.rb b/spec/services/exports/user_export_service_spec.rb
new file mode 100644
index 000000000..b3a19e0f7
--- /dev/null
+++ b/spec/services/exports/user_export_service_spec.rb
@@ -0,0 +1,219 @@
+require "rails_helper"
+
+RSpec.describe Exports::UserExportService do
+ subject(:export_service) { described_class.new(storage_service, start_time) }
+
+ let(:storage_service) { instance_double(Storage::S3Service) }
+
+ let(:xml_export_file) { File.open("spec/fixtures/exports/user.xml", "r:UTF-8") }
+ let(:local_manifest_file) { File.open("spec/fixtures/exports/manifest.xml", "r:UTF-8") }
+
+ let(:expected_zip_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001.zip" }
+ let(:expected_data_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001_pt001.xml" }
+ let(:expected_manifest_filename) { "manifest.xml" }
+ let(:start_time) { Time.zone.local(2022, 5, 1) }
+ let(:organisation) { create(:organisation, with_dsa: false) }
+
+ def replace_entity_ids(user, export_template)
+ export_template.sub!(/\{id\}/, user["id"].to_s)
+ export_template.sub!(/\{organisation_id\}/, user["organisation_id"].to_s)
+ export_template.sub!(/\{email\}/, user["email"].to_s)
+ end
+
+ def replace_record_number(export_template, record_number)
+ export_template.sub!(/\{recno\}/, record_number.to_s)
+ end
+
+ before do
+ Timecop.freeze(start_time)
+ Singleton.__init__(FormHandler)
+ allow(storage_service).to receive(:write_file)
+ end
+
+ after do
+ Timecop.return
+ end
+
+ context "when exporting daily users in XML" do
+ context "and no users are available for export" do
+ it "returns an empty archives list" do
+ expect(export_service.export_xml_users).to eq({})
+ end
+ end
+
+ context "and one user is available for export" do
+ let!(:user) { create(:user, organisation:) }
+
+ it "generates a ZIP export file with the expected filename" do
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
+ export_service.export_xml_users
+ end
+
+ it "generates an XML export file with the expected filename within the ZIP file" do
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
+ expect(entry).not_to be_nil
+ expect(entry.name).to eq(expected_data_filename)
+ end
+ export_service.export_xml_users
+ end
+
+ it "generates an XML manifest file with the expected content within the ZIP file" do
+ expected_content = replace_record_number(local_manifest_file.read, 1)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
+ expect(entry).not_to be_nil
+ expect(entry.get_input_stream.read).to eq(expected_content)
+ end
+
+ export_service.export_xml_users
+ end
+
+ it "generates an XML export file with the expected content within the ZIP file" do
+ expected_content = replace_entity_ids(user, xml_export_file.read)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
+ expect(entry).not_to be_nil
+ expect(entry.get_input_stream.read).to eq(expected_content)
+ end
+
+ export_service.export_xml_users
+ end
+
+ it "returns the list with correct archive" do
+ expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
+ end
+ end
+
+ context "and multiple users are available for export" do
+ before do
+ create(:user, organisation:)
+ create(:user, organisation:)
+ end
+
+ it "generates an XML manifest file with the expected content within the ZIP file" do
+ expected_content = replace_record_number(local_manifest_file.read, 2)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
+ expect(entry).not_to be_nil
+ expect(entry.get_input_stream.read).to eq(expected_content)
+ end
+
+ export_service.export_xml_users
+ end
+
+ it "creates an export record in a database with correct time" do
+ expect { export_service.export_xml_users }
+ .to change(LogsExport, :count).by(1)
+ expect(LogsExport.last.started_at).to be_within(2.seconds).of(start_time)
+ end
+
+ context "when this is the first export (full)" do
+ it "returns a ZIP archive for the master manifest (existing lettings logs)" do
+ expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "").gsub(".zip", "") => start_time })
+ end
+ end
+
+ context "and underlying data changes between getting the logs and writting the manifest" do
+ def remove_users(users)
+ users.each(&:destroy)
+ file = Tempfile.new
+ doc = Nokogiri::XML("")
+ doc.write_xml_to(file, encoding: "UTF-8")
+ file.rewind
+ file
+ end
+
+ def create_fake_maifest
+ file = Tempfile.new
+ doc = Nokogiri::XML("")
+ doc.write_xml_to(file, encoding: "UTF-8")
+ file.rewind
+ file
+ end
+
+ it "maintains the same record number" do
+ # rubocop:disable RSpec/SubjectStub
+ allow(export_service).to receive(:build_export_xml) do |users|
+ remove_users(users)
+ end
+ allow(export_service).to receive(:build_manifest_xml) do
+ create_fake_maifest
+ end
+
+ expect(export_service).to receive(:build_manifest_xml).with(2)
+ # rubocop:enable RSpec/SubjectStub
+ export_service.export_xml_users
+ end
+ end
+
+ context "when this is a second export (partial)" do
+ before do
+ start_time = Time.zone.local(2022, 6, 1)
+ LogsExport.new(started_at: start_time).save! # this should be user export
+ end
+
+ it "does not add any entry for the master manifest (no users)" do
+ expect(export_service.export_xml_users).to eq({})
+ end
+ end
+ end
+
+ context "and a previous export has run the same day having users" do
+ before do
+ create(:user, organisation:)
+ export_service.export_xml_users
+ end
+
+ context "and we trigger another full update" do
+ it "increments the base number" do
+ export_service.export_xml_users(full_update: true)
+ expect(LogsExport.last.base_number).to eq(2)
+ end
+
+ it "resets the increment number" do
+ export_service.export_xml_users(full_update: true)
+ expect(LogsExport.last.increment_number).to eq(1)
+ end
+
+ it "returns a correct archives list for manifest file" do
+ expect(export_service.export_xml_users(full_update: true)).to eq({ "core_users_2022_2023_apr_mar_f0002_inc0001" => start_time })
+ end
+
+ it "generates a ZIP export file with the expected filename" do
+ expect(storage_service).to receive(:write_file).with("core_users_2022_2023_apr_mar_f0002_inc0001.zip", any_args)
+ export_service.export_xml_users(full_update: true)
+ end
+ end
+ end
+
+ context "and a previous export has run having no users" do
+ before { export_service.export_xml_users }
+
+ it "doesn't increment the manifest number by 1" do
+ export_service.export_xml_users
+
+ expect(LogsExport.last.increment_number).to eq(1)
+ end
+ end
+
+ context "and a user has been migrated since the previous partial export" do
+ before do
+ create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:)
+ create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:)
+ LogsExport.create!(started_at: Time.zone.local(2022, 4, 26), base_number: 1, increment_number: 1)
+ end
+
+ it "generates an XML manifest file with the expected content within the ZIP file" do
+ expected_content = replace_record_number(local_manifest_file.read, 2)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
+ expect(entry).not_to be_nil
+ expect(entry.get_input_stream.read).to eq(expected_content)
+ end
+
+ expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
+ end
+ end
+ end
+end