Browse Source

Add users export service

pull/2652/head
Kat 2 years ago committed by kosiakkatrina
parent
commit
9f0d9da9fd
  1. 1
      app/services/exports/export_service.rb
  2. 18
      app/services/exports/user_export_constants.rb
  3. 156
      app/services/exports/user_export_service.rb
  4. 17
      spec/fixtures/exports/user.xml
  5. 6
      spec/services/exports/lettings_log_export_service_spec.rb
  6. 219
      spec/services/exports/user_export_service_spec.rb

1
app/services/exports/export_service.rb

@ -1,6 +1,5 @@
module Exports module Exports
class ExportService class ExportService
include Exports::LettingsLogExportConstants
include CollectionTimeHelper include CollectionTimeHelper
def initialize(storage_service, logger = Rails.logger) def initialize(storage_service, logger = Rails.logger)

18
app/services/exports/user_export_constants.rb

@ -0,0 +1,18 @@
module Exports::UserExportConstants
MAX_XML_RECORDS = 10_000
EXPORT_FIELDS = Set[
"id",
"email",
"name",
"phone",
"organisation_id",
"organisation_name",
"role",
"is_dpo",
"is_key_contact",
"active",
"sign_in_count",
"last_sign_in_at",
]
end

156
app/services/exports/user_export_service.rb

@ -0,0 +1,156 @@
module Exports
class UserExportService
include Exports::UserExportConstants
include CollectionTimeHelper
def initialize(storage_service, start_time, logger = Rails.logger)
@storage_service = storage_service
@logger = logger
@start_time = start_time
end
def export_xml_users(full_update: false)
recent_export = LogsExport.order("started_at").last
collection = "users"
base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1
export = build_export_run(collection, base_number, full_update)
archives_for_manifest = write_export_archive(export, collection, recent_export, full_update)
export.empty_export = archives_for_manifest.empty?
export.save!
archives_for_manifest
end
private
def build_export_run(collection, base_number, full_update)
@logger.info("Building export run for #{collection}")
previous_exports_with_data = LogsExport.where(collection:, empty_export: false)
increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1
if full_update
base_number += 1 if LogsExport.any? # Only increment when it's not the first run
increment_number = 1
else
increment_number += 1
end
if previous_exports_with_data.empty?
return LogsExport.new(collection:, base_number:, started_at: @start_time)
end
LogsExport.new(collection:, started_at: @start_time, base_number:, increment_number:)
end
def get_archive_name(collection, base_number, increment)
return unless collection
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"core_#{collection}_#{current_collection_start_year}_#{current_collection_start_year + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
def write_export_archive(export, collection, recent_export, full_update)
archive = get_archive_name(collection, export.base_number, export.increment_number)
initial_users_count = retrieve_users(recent_export, full_update).count
@logger.info("Creating #{archive} - #{initial_users_count} users")
return {} if initial_users_count.zero?
zip_file = Zip::File.open_buffer(StringIO.new)
part_number = 1
last_processed_marker = nil
users_count_after_export = 0
loop do
users_slice = if last_processed_marker.present?
retrieve_users(recent_export, full_update)
.where("created_at > ?", last_processed_marker)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
else
retrieve_users(recent_export, full_update)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
end
break if users_slice.empty?
data_xml = build_export_xml(users_slice)
part_number_str = "pt#{part_number.to_s.rjust(3, '0')}"
zip_file.add("#{archive}_#{part_number_str}.xml", data_xml)
part_number += 1
last_processed_marker = users_slice.last.created_at
users_count_after_export += users_slice.count
@logger.info("Added #{archive}_#{part_number_str}.xml")
end
manifest_xml = build_manifest_xml(users_count_after_export)
zip_file.add("manifest.xml", manifest_xml)
# Required by S3 to avoid Aws::S3::Errors::BadDigest
zip_io = zip_file.write_buffer
zip_io.rewind
@logger.info("Writing #{archive}.zip")
@storage_service.write_file("#{archive}.zip", zip_io)
{ archive => Time.zone.now }
end
def retrieve_users(recent_export, full_update)
if !full_update && recent_export
params = { from: recent_export.started_at, to: @start_time }
User.where("(updated_at >= :from AND updated_at <= :to)", params)
else
params = { to: @start_time }
User.where("updated_at <= :to", params)
end
end
def xml_doc_to_temp_file(xml_doc)
file = Tempfile.new
xml_doc.write_xml_to(file, encoding: "UTF-8")
file.rewind
file
end
def build_manifest_xml(record_number)
doc = Nokogiri::XML("<report/>")
doc.at("report") << doc.create_element("form-data-summary")
doc.at("form-data-summary") << doc.create_element("records")
doc.at("records") << doc.create_element("count-of-records", record_number)
xml_doc_to_temp_file(doc)
end
def apply_cds_transformation(user)
attribute_hash = user.attributes_before_type_cast
attribute_hash["role"] = user.role
attribute_hash["organisation_name"] = user.organisation.name
attribute_hash["active"] = user.active?
attribute_hash
end
def build_export_xml(users)
doc = Nokogiri::XML("<forms/>")
users.each do |user|
attribute_hash = apply_cds_transformation(user)
form = doc.create_element("form")
doc.at("forms") << form
attribute_hash.each do |key, value|
if !EXPORT_FIELDS.include?(key)
next
else
form << doc.create_element(key, value)
end
end
end
xml_doc_to_temp_file(doc)
end
end
end

17
spec/fixtures/exports/user.xml vendored

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<forms>
<form>
<id>{id}</id>
<email>{email}</email>
<name>Danny Rojas</name>
<organisation_id>{organisation_id}</organisation_id>
<sign_in_count>5</sign_in_count>
<last_sign_in_at/>
<role>data_provider</role>
<phone>1234512345123</phone>
<is_dpo>false</is_dpo>
<is_key_contact>false</is_key_contact>
<active>true</active>
<organisation_name>MHCLG</organisation_name>
</form>
</forms>

6
spec/services/exports/lettings_log_export_service_spec.rb

@ -11,8 +11,6 @@ RSpec.describe Exports::LettingsLogExportService do
let(:real_2021_2022_form) { Form.new("config/forms/2021_2022.json") } let(:real_2021_2022_form) { Form.new("config/forms/2021_2022.json") }
let(:real_2022_2023_form) { Form.new("config/forms/2022_2023.json") } let(:real_2022_2023_form) { Form.new("config/forms/2022_2023.json") }
let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" }
let(:expected_master_manifest_rerun) { "Manifest_2022_05_01_0002.csv" }
let(:expected_zip_filename) { "core_2021_2022_apr_mar_f0001_inc0001.zip" } let(:expected_zip_filename) { "core_2021_2022_apr_mar_f0001_inc0001.zip" }
let(:expected_data_filename) { "core_2021_2022_apr_mar_f0001_inc0001_pt001.xml" } let(:expected_data_filename) { "core_2021_2022_apr_mar_f0001_inc0001_pt001.xml" }
let(:expected_manifest_filename) { "manifest.xml" } let(:expected_manifest_filename) { "manifest.xml" }
@ -50,6 +48,7 @@ RSpec.describe Exports::LettingsLogExportService do
context "when exporting daily lettings logs in XML" do context "when exporting daily lettings logs in XML" do
context "and no lettings logs is available for export" do context "and no lettings logs is available for export" do
it "returns an empty archives list" do it "returns an empty archives list" do
expect(storage_service).not_to receive(:write_file)
expect(export_service.export_xml_lettings_logs).to eq({}) expect(export_service.export_xml_lettings_logs).to eq({})
end end
end end
@ -74,7 +73,7 @@ RSpec.describe Exports::LettingsLogExportService do
end end
it "returns empty archives list for archives manifest" do it "returns empty archives list for archives manifest" do
export_service.export_xml_lettings_logs expect(storage_service).not_to receive(:write_file)
expect(export_service.export_xml_lettings_logs).to eq({}) expect(export_service.export_xml_lettings_logs).to eq({})
end end
end end
@ -333,6 +332,7 @@ RSpec.describe Exports::LettingsLogExportService do
end end
it "does not add any entry for the master manifest (no lettings logs)" do it "does not add any entry for the master manifest (no lettings logs)" do
expect(storage_service).not_to receive(:write_file)
expect(export_service.export_xml_lettings_logs).to eq({}) expect(export_service.export_xml_lettings_logs).to eq({})
end end
end end

219
spec/services/exports/user_export_service_spec.rb

@ -0,0 +1,219 @@
require "rails_helper"
RSpec.describe Exports::UserExportService do
subject(:export_service) { described_class.new(storage_service, start_time) }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:xml_export_file) { File.open("spec/fixtures/exports/user.xml", "r:UTF-8") }
let(:local_manifest_file) { File.open("spec/fixtures/exports/manifest.xml", "r:UTF-8") }
let(:expected_zip_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001.zip" }
let(:expected_data_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001_pt001.xml" }
let(:expected_manifest_filename) { "manifest.xml" }
let(:start_time) { Time.zone.local(2022, 5, 1) }
let(:organisation) { create(:organisation, with_dsa: false) }
def replace_entity_ids(user, export_template)
export_template.sub!(/\{id\}/, user["id"].to_s)
export_template.sub!(/\{organisation_id\}/, user["organisation_id"].to_s)
export_template.sub!(/\{email\}/, user["email"].to_s)
end
def replace_record_number(export_template, record_number)
export_template.sub!(/\{recno\}/, record_number.to_s)
end
before do
Timecop.freeze(start_time)
Singleton.__init__(FormHandler)
allow(storage_service).to receive(:write_file)
end
after do
Timecop.return
end
context "when exporting daily users in XML" do
context "and no users are available for export" do
it "returns an empty archives list" do
expect(export_service.export_xml_users).to eq({})
end
end
context "and one user is available for export" do
let!(:user) { create(:user, organisation:) }
it "generates a ZIP export file with the expected filename" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
export_service.export_xml_users
end
it "generates an XML export file with the expected filename within the ZIP file" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
expect(entry).not_to be_nil
expect(entry.name).to eq(expected_data_filename)
end
export_service.export_xml_users
end
it "generates an XML manifest file with the expected content within the ZIP file" do
expected_content = replace_record_number(local_manifest_file.read, 1)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
export_service.export_xml_users
end
it "generates an XML export file with the expected content within the ZIP file" do
expected_content = replace_entity_ids(user, xml_export_file.read)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
export_service.export_xml_users
end
it "returns the list with correct archive" do
expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
end
end
context "and multiple users are available for export" do
before do
create(:user, organisation:)
create(:user, organisation:)
end
it "generates an XML manifest file with the expected content within the ZIP file" do
expected_content = replace_record_number(local_manifest_file.read, 2)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
export_service.export_xml_users
end
it "creates an export record in a database with correct time" do
expect { export_service.export_xml_users }
.to change(LogsExport, :count).by(1)
expect(LogsExport.last.started_at).to be_within(2.seconds).of(start_time)
end
context "when this is the first export (full)" do
it "returns a ZIP archive for the master manifest (existing lettings logs)" do
expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "").gsub(".zip", "") => start_time })
end
end
context "and underlying data changes between getting the logs and writting the manifest" do
def remove_users(users)
users.each(&:destroy)
file = Tempfile.new
doc = Nokogiri::XML("<forms/>")
doc.write_xml_to(file, encoding: "UTF-8")
file.rewind
file
end
def create_fake_maifest
file = Tempfile.new
doc = Nokogiri::XML("<forms/>")
doc.write_xml_to(file, encoding: "UTF-8")
file.rewind
file
end
it "maintains the same record number" do
# rubocop:disable RSpec/SubjectStub
allow(export_service).to receive(:build_export_xml) do |users|
remove_users(users)
end
allow(export_service).to receive(:build_manifest_xml) do
create_fake_maifest
end
expect(export_service).to receive(:build_manifest_xml).with(2)
# rubocop:enable RSpec/SubjectStub
export_service.export_xml_users
end
end
context "when this is a second export (partial)" do
before do
start_time = Time.zone.local(2022, 6, 1)
LogsExport.new(started_at: start_time).save! # this should be user export
end
it "does not add any entry for the master manifest (no users)" do
expect(export_service.export_xml_users).to eq({})
end
end
end
context "and a previous export has run the same day having users" do
before do
create(:user, organisation:)
export_service.export_xml_users
end
context "and we trigger another full update" do
it "increments the base number" do
export_service.export_xml_users(full_update: true)
expect(LogsExport.last.base_number).to eq(2)
end
it "resets the increment number" do
export_service.export_xml_users(full_update: true)
expect(LogsExport.last.increment_number).to eq(1)
end
it "returns a correct archives list for manifest file" do
expect(export_service.export_xml_users(full_update: true)).to eq({ "core_users_2022_2023_apr_mar_f0002_inc0001" => start_time })
end
it "generates a ZIP export file with the expected filename" do
expect(storage_service).to receive(:write_file).with("core_users_2022_2023_apr_mar_f0002_inc0001.zip", any_args)
export_service.export_xml_users(full_update: true)
end
end
end
context "and a previous export has run having no users" do
before { export_service.export_xml_users }
it "doesn't increment the manifest number by 1" do
export_service.export_xml_users
expect(LogsExport.last.increment_number).to eq(1)
end
end
context "and a user has been migrated since the previous partial export" do
before do
create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:)
create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:)
LogsExport.create!(started_at: Time.zone.local(2022, 4, 26), base_number: 1, increment_number: 1)
end
it "generates an XML manifest file with the expected content within the ZIP file" do
expected_content = replace_record_number(local_manifest_file.read, 2)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
end
end
end
end
Loading…
Cancel
Save