6 changed files with 413 additions and 4 deletions
@ -0,0 +1,18 @@ |
|||||||
|
module Exports::UserExportConstants |
||||||
|
MAX_XML_RECORDS = 10_000 |
||||||
|
|
||||||
|
EXPORT_FIELDS = Set[ |
||||||
|
"id", |
||||||
|
"email", |
||||||
|
"name", |
||||||
|
"phone", |
||||||
|
"organisation_id", |
||||||
|
"organisation_name", |
||||||
|
"role", |
||||||
|
"is_dpo", |
||||||
|
"is_key_contact", |
||||||
|
"active", |
||||||
|
"sign_in_count", |
||||||
|
"last_sign_in_at", |
||||||
|
] |
||||||
|
end |
||||||
@ -0,0 +1,156 @@ |
|||||||
|
module Exports |
||||||
|
class UserExportService |
||||||
|
include Exports::UserExportConstants |
||||||
|
include CollectionTimeHelper |
||||||
|
|
||||||
|
def initialize(storage_service, start_time, logger = Rails.logger) |
||||||
|
@storage_service = storage_service |
||||||
|
@logger = logger |
||||||
|
@start_time = start_time |
||||||
|
end |
||||||
|
|
||||||
|
def export_xml_users(full_update: false) |
||||||
|
recent_export = LogsExport.order("started_at").last |
||||||
|
|
||||||
|
collection = "users" |
||||||
|
base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1 |
||||||
|
export = build_export_run(collection, base_number, full_update) |
||||||
|
archives_for_manifest = write_export_archive(export, collection, recent_export, full_update) |
||||||
|
|
||||||
|
export.empty_export = archives_for_manifest.empty? |
||||||
|
export.save! |
||||||
|
|
||||||
|
archives_for_manifest |
||||||
|
end |
||||||
|
|
||||||
|
private |
||||||
|
|
||||||
|
def build_export_run(collection, base_number, full_update) |
||||||
|
@logger.info("Building export run for #{collection}") |
||||||
|
previous_exports_with_data = LogsExport.where(collection:, empty_export: false) |
||||||
|
|
||||||
|
increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1 |
||||||
|
|
||||||
|
if full_update |
||||||
|
base_number += 1 if LogsExport.any? # Only increment when it's not the first run |
||||||
|
increment_number = 1 |
||||||
|
else |
||||||
|
increment_number += 1 |
||||||
|
end |
||||||
|
|
||||||
|
if previous_exports_with_data.empty? |
||||||
|
return LogsExport.new(collection:, base_number:, started_at: @start_time) |
||||||
|
end |
||||||
|
|
||||||
|
LogsExport.new(collection:, started_at: @start_time, base_number:, increment_number:) |
||||||
|
end |
||||||
|
|
||||||
|
def get_archive_name(collection, base_number, increment) |
||||||
|
return unless collection |
||||||
|
|
||||||
|
base_number_str = "f#{base_number.to_s.rjust(4, '0')}" |
||||||
|
increment_str = "inc#{increment.to_s.rjust(4, '0')}" |
||||||
|
"core_#{collection}_#{current_collection_start_year}_#{current_collection_start_year + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase |
||||||
|
end |
||||||
|
|
||||||
|
def write_export_archive(export, collection, recent_export, full_update) |
||||||
|
archive = get_archive_name(collection, export.base_number, export.increment_number) |
||||||
|
|
||||||
|
initial_users_count = retrieve_users(recent_export, full_update).count |
||||||
|
@logger.info("Creating #{archive} - #{initial_users_count} users") |
||||||
|
return {} if initial_users_count.zero? |
||||||
|
|
||||||
|
zip_file = Zip::File.open_buffer(StringIO.new) |
||||||
|
|
||||||
|
part_number = 1 |
||||||
|
last_processed_marker = nil |
||||||
|
users_count_after_export = 0 |
||||||
|
|
||||||
|
loop do |
||||||
|
users_slice = if last_processed_marker.present? |
||||||
|
retrieve_users(recent_export, full_update) |
||||||
|
.where("created_at > ?", last_processed_marker) |
||||||
|
.order(:created_at) |
||||||
|
.limit(MAX_XML_RECORDS).to_a |
||||||
|
else |
||||||
|
retrieve_users(recent_export, full_update) |
||||||
|
.order(:created_at) |
||||||
|
.limit(MAX_XML_RECORDS).to_a |
||||||
|
end |
||||||
|
|
||||||
|
break if users_slice.empty? |
||||||
|
|
||||||
|
data_xml = build_export_xml(users_slice) |
||||||
|
part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" |
||||||
|
zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) |
||||||
|
part_number += 1 |
||||||
|
last_processed_marker = users_slice.last.created_at |
||||||
|
users_count_after_export += users_slice.count |
||||||
|
@logger.info("Added #{archive}_#{part_number_str}.xml") |
||||||
|
end |
||||||
|
|
||||||
|
manifest_xml = build_manifest_xml(users_count_after_export) |
||||||
|
zip_file.add("manifest.xml", manifest_xml) |
||||||
|
|
||||||
|
# Required by S3 to avoid Aws::S3::Errors::BadDigest |
||||||
|
zip_io = zip_file.write_buffer |
||||||
|
zip_io.rewind |
||||||
|
@logger.info("Writing #{archive}.zip") |
||||||
|
@storage_service.write_file("#{archive}.zip", zip_io) |
||||||
|
{ archive => Time.zone.now } |
||||||
|
end |
||||||
|
|
||||||
|
def retrieve_users(recent_export, full_update) |
||||||
|
if !full_update && recent_export |
||||||
|
params = { from: recent_export.started_at, to: @start_time } |
||||||
|
User.where("(updated_at >= :from AND updated_at <= :to)", params) |
||||||
|
else |
||||||
|
params = { to: @start_time } |
||||||
|
User.where("updated_at <= :to", params) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
def xml_doc_to_temp_file(xml_doc) |
||||||
|
file = Tempfile.new |
||||||
|
xml_doc.write_xml_to(file, encoding: "UTF-8") |
||||||
|
file.rewind |
||||||
|
file |
||||||
|
end |
||||||
|
|
||||||
|
def build_manifest_xml(record_number) |
||||||
|
doc = Nokogiri::XML("<report/>") |
||||||
|
doc.at("report") << doc.create_element("form-data-summary") |
||||||
|
doc.at("form-data-summary") << doc.create_element("records") |
||||||
|
doc.at("records") << doc.create_element("count-of-records", record_number) |
||||||
|
|
||||||
|
xml_doc_to_temp_file(doc) |
||||||
|
end |
||||||
|
|
||||||
|
def apply_cds_transformation(user) |
||||||
|
attribute_hash = user.attributes_before_type_cast |
||||||
|
attribute_hash["role"] = user.role |
||||||
|
attribute_hash["organisation_name"] = user.organisation.name |
||||||
|
attribute_hash["active"] = user.active? |
||||||
|
attribute_hash |
||||||
|
end |
||||||
|
|
||||||
|
def build_export_xml(users) |
||||||
|
doc = Nokogiri::XML("<forms/>") |
||||||
|
|
||||||
|
users.each do |user| |
||||||
|
attribute_hash = apply_cds_transformation(user) |
||||||
|
form = doc.create_element("form") |
||||||
|
doc.at("forms") << form |
||||||
|
attribute_hash.each do |key, value| |
||||||
|
if !EXPORT_FIELDS.include?(key) |
||||||
|
next |
||||||
|
else |
||||||
|
form << doc.create_element(key, value) |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
xml_doc_to_temp_file(doc) |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
@ -0,0 +1,17 @@ |
|||||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||||
|
<forms> |
||||||
|
<form> |
||||||
|
<id>{id}</id> |
||||||
|
<email>{email}</email> |
||||||
|
<name>Danny Rojas</name> |
||||||
|
<organisation_id>{organisation_id}</organisation_id> |
||||||
|
<sign_in_count>5</sign_in_count> |
||||||
|
<last_sign_in_at/> |
||||||
|
<role>data_provider</role> |
||||||
|
<phone>1234512345123</phone> |
||||||
|
<is_dpo>false</is_dpo> |
||||||
|
<is_key_contact>false</is_key_contact> |
||||||
|
<active>true</active> |
||||||
|
<organisation_name>MHCLG</organisation_name> |
||||||
|
</form> |
||||||
|
</forms> |
||||||
@ -0,0 +1,219 @@ |
|||||||
|
require "rails_helper" |
||||||
|
|
||||||
|
RSpec.describe Exports::UserExportService do |
||||||
|
subject(:export_service) { described_class.new(storage_service, start_time) } |
||||||
|
|
||||||
|
let(:storage_service) { instance_double(Storage::S3Service) } |
||||||
|
|
||||||
|
let(:xml_export_file) { File.open("spec/fixtures/exports/user.xml", "r:UTF-8") } |
||||||
|
let(:local_manifest_file) { File.open("spec/fixtures/exports/manifest.xml", "r:UTF-8") } |
||||||
|
|
||||||
|
let(:expected_zip_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001.zip" } |
||||||
|
let(:expected_data_filename) { "core_users_2022_2023_apr_mar_f0001_inc0001_pt001.xml" } |
||||||
|
let(:expected_manifest_filename) { "manifest.xml" } |
||||||
|
let(:start_time) { Time.zone.local(2022, 5, 1) } |
||||||
|
let(:organisation) { create(:organisation, with_dsa: false) } |
||||||
|
|
||||||
|
def replace_entity_ids(user, export_template) |
||||||
|
export_template.sub!(/\{id\}/, user["id"].to_s) |
||||||
|
export_template.sub!(/\{organisation_id\}/, user["organisation_id"].to_s) |
||||||
|
export_template.sub!(/\{email\}/, user["email"].to_s) |
||||||
|
end |
||||||
|
|
||||||
|
def replace_record_number(export_template, record_number) |
||||||
|
export_template.sub!(/\{recno\}/, record_number.to_s) |
||||||
|
end |
||||||
|
|
||||||
|
before do |
||||||
|
Timecop.freeze(start_time) |
||||||
|
Singleton.__init__(FormHandler) |
||||||
|
allow(storage_service).to receive(:write_file) |
||||||
|
end |
||||||
|
|
||||||
|
after do |
||||||
|
Timecop.return |
||||||
|
end |
||||||
|
|
||||||
|
context "when exporting daily users in XML" do |
||||||
|
context "and no users are available for export" do |
||||||
|
it "returns an empty archives list" do |
||||||
|
expect(export_service.export_xml_users).to eq({}) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and one user is available for export" do |
||||||
|
let!(:user) { create(:user, organisation:) } |
||||||
|
|
||||||
|
it "generates a ZIP export file with the expected filename" do |
||||||
|
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) |
||||||
|
export_service.export_xml_users |
||||||
|
end |
||||||
|
|
||||||
|
it "generates an XML export file with the expected filename within the ZIP file" do |
||||||
|
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| |
||||||
|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) |
||||||
|
expect(entry).not_to be_nil |
||||||
|
expect(entry.name).to eq(expected_data_filename) |
||||||
|
end |
||||||
|
export_service.export_xml_users |
||||||
|
end |
||||||
|
|
||||||
|
it "generates an XML manifest file with the expected content within the ZIP file" do |
||||||
|
expected_content = replace_record_number(local_manifest_file.read, 1) |
||||||
|
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| |
||||||
|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) |
||||||
|
expect(entry).not_to be_nil |
||||||
|
expect(entry.get_input_stream.read).to eq(expected_content) |
||||||
|
end |
||||||
|
|
||||||
|
export_service.export_xml_users |
||||||
|
end |
||||||
|
|
||||||
|
it "generates an XML export file with the expected content within the ZIP file" do |
||||||
|
expected_content = replace_entity_ids(user, xml_export_file.read) |
||||||
|
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| |
||||||
|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) |
||||||
|
expect(entry).not_to be_nil |
||||||
|
expect(entry.get_input_stream.read).to eq(expected_content) |
||||||
|
end |
||||||
|
|
||||||
|
export_service.export_xml_users |
||||||
|
end |
||||||
|
|
||||||
|
it "returns the list with correct archive" do |
||||||
|
expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time }) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and multiple users are available for export" do |
||||||
|
before do |
||||||
|
create(:user, organisation:) |
||||||
|
create(:user, organisation:) |
||||||
|
end |
||||||
|
|
||||||
|
it "generates an XML manifest file with the expected content within the ZIP file" do |
||||||
|
expected_content = replace_record_number(local_manifest_file.read, 2) |
||||||
|
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| |
||||||
|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) |
||||||
|
expect(entry).not_to be_nil |
||||||
|
expect(entry.get_input_stream.read).to eq(expected_content) |
||||||
|
end |
||||||
|
|
||||||
|
export_service.export_xml_users |
||||||
|
end |
||||||
|
|
||||||
|
it "creates an export record in a database with correct time" do |
||||||
|
expect { export_service.export_xml_users } |
||||||
|
.to change(LogsExport, :count).by(1) |
||||||
|
expect(LogsExport.last.started_at).to be_within(2.seconds).of(start_time) |
||||||
|
end |
||||||
|
|
||||||
|
context "when this is the first export (full)" do |
||||||
|
it "returns a ZIP archive for the master manifest (existing lettings logs)" do |
||||||
|
expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "").gsub(".zip", "") => start_time }) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and underlying data changes between getting the logs and writting the manifest" do |
||||||
|
def remove_users(users) |
||||||
|
users.each(&:destroy) |
||||||
|
file = Tempfile.new |
||||||
|
doc = Nokogiri::XML("<forms/>") |
||||||
|
doc.write_xml_to(file, encoding: "UTF-8") |
||||||
|
file.rewind |
||||||
|
file |
||||||
|
end |
||||||
|
|
||||||
|
def create_fake_maifest |
||||||
|
file = Tempfile.new |
||||||
|
doc = Nokogiri::XML("<forms/>") |
||||||
|
doc.write_xml_to(file, encoding: "UTF-8") |
||||||
|
file.rewind |
||||||
|
file |
||||||
|
end |
||||||
|
|
||||||
|
it "maintains the same record number" do |
||||||
|
# rubocop:disable RSpec/SubjectStub |
||||||
|
allow(export_service).to receive(:build_export_xml) do |users| |
||||||
|
remove_users(users) |
||||||
|
end |
||||||
|
allow(export_service).to receive(:build_manifest_xml) do |
||||||
|
create_fake_maifest |
||||||
|
end |
||||||
|
|
||||||
|
expect(export_service).to receive(:build_manifest_xml).with(2) |
||||||
|
# rubocop:enable RSpec/SubjectStub |
||||||
|
export_service.export_xml_users |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "when this is a second export (partial)" do |
||||||
|
before do |
||||||
|
start_time = Time.zone.local(2022, 6, 1) |
||||||
|
LogsExport.new(started_at: start_time).save! # this should be user export |
||||||
|
end |
||||||
|
|
||||||
|
it "does not add any entry for the master manifest (no users)" do |
||||||
|
expect(export_service.export_xml_users).to eq({}) |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and a previous export has run the same day having users" do |
||||||
|
before do |
||||||
|
create(:user, organisation:) |
||||||
|
export_service.export_xml_users |
||||||
|
end |
||||||
|
|
||||||
|
context "and we trigger another full update" do |
||||||
|
it "increments the base number" do |
||||||
|
export_service.export_xml_users(full_update: true) |
||||||
|
expect(LogsExport.last.base_number).to eq(2) |
||||||
|
end |
||||||
|
|
||||||
|
it "resets the increment number" do |
||||||
|
export_service.export_xml_users(full_update: true) |
||||||
|
expect(LogsExport.last.increment_number).to eq(1) |
||||||
|
end |
||||||
|
|
||||||
|
it "returns a correct archives list for manifest file" do |
||||||
|
expect(export_service.export_xml_users(full_update: true)).to eq({ "core_users_2022_2023_apr_mar_f0002_inc0001" => start_time }) |
||||||
|
end |
||||||
|
|
||||||
|
it "generates a ZIP export file with the expected filename" do |
||||||
|
expect(storage_service).to receive(:write_file).with("core_users_2022_2023_apr_mar_f0002_inc0001.zip", any_args) |
||||||
|
export_service.export_xml_users(full_update: true) |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and a previous export has run having no users" do |
||||||
|
before { export_service.export_xml_users } |
||||||
|
|
||||||
|
it "doesn't increment the manifest number by 1" do |
||||||
|
export_service.export_xml_users |
||||||
|
|
||||||
|
expect(LogsExport.last.increment_number).to eq(1) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and a user has been migrated since the previous partial export" do |
||||||
|
before do |
||||||
|
create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:) |
||||||
|
create(:user, updated_at: Time.zone.local(2022, 4, 27), organisation:) |
||||||
|
LogsExport.create!(started_at: Time.zone.local(2022, 4, 26), base_number: 1, increment_number: 1) |
||||||
|
end |
||||||
|
|
||||||
|
it "generates an XML manifest file with the expected content within the ZIP file" do |
||||||
|
expected_content = replace_record_number(local_manifest_file.read, 2) |
||||||
|
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| |
||||||
|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) |
||||||
|
expect(entry).not_to be_nil |
||||||
|
expect(entry.get_input_stream.read).to eq(expected_content) |
||||||
|
end |
||||||
|
|
||||||
|
expect(export_service.export_xml_users).to eq({ expected_zip_filename.gsub(".zip", "") => start_time }) |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
Loading…
Reference in new issue