diff --git a/app/services/exports/lettings_log_export_service.rb b/app/services/exports/lettings_log_export_service.rb index 07ed4425b..bf0212782 100644 --- a/app/services/exports/lettings_log_export_service.rb +++ b/app/services/exports/lettings_log_export_service.rb @@ -1,5 +1,5 @@ module Exports - class LettingsLogExportService + class LettingsLogExportService < Exports::XmlExportService include Exports::LettingsLogExportConstants include CollectionTimeHelper @@ -28,26 +28,6 @@ module Exports private - def build_export_run(collection, base_number, full_update) - @logger.info("Building export run for #{collection}") - previous_exports_with_data = Export.where(collection:, empty_export: false) - - increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1 - - if full_update - base_number += 1 if Export.any? # Only increment when it's not the first run - increment_number = 1 - else - increment_number += 1 - end - - if previous_exports_with_data.empty? - return Export.new(collection:, base_number:, started_at: @start_time) - end - - Export.new(collection:, started_at: @start_time, base_number:, increment_number:) - end - def get_archive_name(collection, base_number, increment) return unless collection @@ -56,79 +36,16 @@ module Exports "core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase end - def write_export_archive(export, collection, recent_export, full_update) - archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?) - - initial_logs_count = retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection).count - @logger.info("Creating #{archive} - #{initial_logs_count} logs") - return {} if initial_logs_count.zero? - - zip_file = Zip::File.open_buffer(StringIO.new) - - part_number = 1 - last_processed_marker = nil - logs_count_after_export = 0 - - loop do - lettings_logs_slice = if last_processed_marker.present? - retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection) - .where("created_at > ?", last_processed_marker) - .order(:created_at) - .limit(MAX_XML_RECORDS).to_a - else - retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection) - .order(:created_at) - .limit(MAX_XML_RECORDS).to_a - end - - break if lettings_logs_slice.empty? - - data_xml = build_export_xml(lettings_logs_slice) - part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" - zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) - part_number += 1 - last_processed_marker = lettings_logs_slice.last.created_at - logs_count_after_export += lettings_logs_slice.count - @logger.info("Added #{archive}_#{part_number_str}.xml") - end - - manifest_xml = build_manifest_xml(logs_count_after_export) - zip_file.add("manifest.xml", manifest_xml) - - # Required by S3 to avoid Aws::S3::Errors::BadDigest - zip_io = zip_file.write_buffer - zip_io.rewind - @logger.info("Writing #{archive}.zip") - @storage_service.write_file("#{archive}.zip", zip_io) - { archive => Time.zone.now } - end - - def retrieve_lettings_logs(recent_export, full_update) + def retrieve_resources(recent_export, full_update, collection) if !full_update && recent_export params = { from: recent_export.started_at, to: @start_time } - LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params) + LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params).filter_by_year(collection) else params = { to: @start_time } - LettingsLog.exportable.where("updated_at <= :to", params) + LettingsLog.exportable.where("updated_at <= :to", params).filter_by_year(collection) end end - def xml_doc_to_temp_file(xml_doc) - file = Tempfile.new - xml_doc.write_xml_to(file, encoding: "UTF-8") - file.rewind - file - end - - def build_manifest_xml(record_number) - doc = Nokogiri::XML("") - doc.at("report") << doc.create_element("form-data-summary") - doc.at("form-data-summary") << doc.create_element("records") - doc.at("records") << doc.create_element("count-of-records", record_number) - - xml_doc_to_temp_file(doc) - end - def apply_cds_transformation(lettings_log, export_mode) attribute_hash = lettings_log.attributes_before_type_cast attribute_hash["formid"] = attribute_hash["old_form_id"] || (attribute_hash["id"] + LOG_ID_OFFSET) @@ -258,11 +175,5 @@ module Exports xml_doc_to_temp_file(doc) end - - def collection_years_to_export(collection_year) - return [collection_year] if collection_year.present? - - FormHandler.instance.lettings_forms.values.map { |f| f.start_date.year }.uniq - end end end diff --git a/app/services/exports/user_export_service.rb b/app/services/exports/user_export_service.rb index 7d97b48b4..e5f5e91a5 100644 --- a/app/services/exports/user_export_service.rb +++ b/app/services/exports/user_export_service.rb @@ -1,5 +1,5 @@ module Exports - class UserExportService + class UserExportService < Exports::XmlExportService include Exports::UserExportConstants include CollectionTimeHelper @@ -25,26 +25,6 @@ module Exports private - def build_export_run(collection, base_number, full_update) - @logger.info("Building export run for #{collection}") - previous_exports_with_data = Export.where(collection:, empty_export: false) - - increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1 - - if full_update - base_number += 1 if Export.any? # Only increment when it's not the first run - increment_number = 1 - else - increment_number += 1 - end - - if previous_exports_with_data.empty? - return Export.new(collection:, base_number:, started_at: @start_time) - end - - Export.new(collection:, started_at: @start_time, base_number:, increment_number:) - end - def get_archive_name(collection, base_number, increment) return unless collection @@ -53,54 +33,7 @@ module Exports "core_#{collection}_#{base_number_str}_#{increment_str}".downcase end - def write_export_archive(export, collection, recent_export, full_update) - archive = get_archive_name(collection, export.base_number, export.increment_number) - - initial_users_count = retrieve_users(recent_export, full_update).count - @logger.info("Creating #{archive} - #{initial_users_count} users") - return {} if initial_users_count.zero? - - zip_file = Zip::File.open_buffer(StringIO.new) - - part_number = 1 - last_processed_marker = nil - users_count_after_export = 0 - - loop do - users_slice = if last_processed_marker.present? - retrieve_users(recent_export, full_update) - .where("created_at > ?", last_processed_marker) - .order(:created_at) - .limit(MAX_XML_RECORDS).to_a - else - retrieve_users(recent_export, full_update) - .order(:created_at) - .limit(MAX_XML_RECORDS).to_a - end - - break if users_slice.empty? - - data_xml = build_export_xml(users_slice) - part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" - zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) - part_number += 1 - last_processed_marker = users_slice.last.created_at - users_count_after_export += users_slice.count - @logger.info("Added #{archive}_#{part_number_str}.xml") - end - - manifest_xml = build_manifest_xml(users_count_after_export) - zip_file.add("manifest.xml", manifest_xml) - - # Required by S3 to avoid Aws::S3::Errors::BadDigest - zip_io = zip_file.write_buffer - zip_io.rewind - @logger.info("Writing #{archive}.zip") - @storage_service.write_file("#{archive}.zip", zip_io) - { archive => Time.zone.now } - end - - def retrieve_users(recent_export, full_update) + def retrieve_resources(recent_export, full_update, _collection) if !full_update && recent_export params = { from: recent_export.started_at, to: @start_time } User.where("(updated_at >= :from AND updated_at <= :to)", params) @@ -110,30 +43,6 @@ module Exports end end - def xml_doc_to_temp_file(xml_doc) - file = Tempfile.new - xml_doc.write_xml_to(file, encoding: "UTF-8") - file.rewind - file - end - - def build_manifest_xml(record_number) - doc = Nokogiri::XML("") - doc.at("report") << doc.create_element("form-data-summary") - doc.at("form-data-summary") << doc.create_element("records") - doc.at("records") << doc.create_element("count-of-records", record_number) - - xml_doc_to_temp_file(doc) - end - - def apply_cds_transformation(user) - attribute_hash = user.attributes_before_type_cast - attribute_hash["role"] = user.role - attribute_hash["organisation_name"] = user.organisation.name - attribute_hash["active"] = user.active? - attribute_hash - end - def build_export_xml(users) doc = Nokogiri::XML("") @@ -152,5 +61,13 @@ module Exports xml_doc_to_temp_file(doc) end + + def apply_cds_transformation(user) + attribute_hash = user.attributes_before_type_cast + attribute_hash["role"] = user.role + attribute_hash["organisation_name"] = user.organisation.name + attribute_hash["active"] = user.active? + attribute_hash + end end end diff --git a/app/services/exports/xml_export_service.rb b/app/services/exports/xml_export_service.rb new file mode 100644 index 000000000..c9bad5ff9 --- /dev/null +++ b/app/services/exports/xml_export_service.rb @@ -0,0 +1,120 @@ +module Exports + class XmlExportService + include Exports::LettingsLogExportConstants + include CollectionTimeHelper + + def initialize(storage_service, start_time, logger = Rails.logger) + @storage_service = storage_service + @logger = logger + @start_time = start_time + end + + def export_xml_lettings_logs(full_update: false, collection_year: nil) + archives_for_manifest = {} + recent_export = Export.order("started_at").last + collection_years_to_export(collection_year).each do |collection| + base_number = Export.where(empty_export: false, collection:).maximum(:base_number) || 1 + export = build_export_run(collection, base_number, full_update) + archives = write_export_archive(export, collection, recent_export, full_update) + + archives_for_manifest.merge!(archives) + + export.empty_export = archives.empty? + export.save! + end + + archives_for_manifest + end + + private + + def build_export_run(collection, base_number, full_update) + @logger.info("Building export run for #{collection}") + previous_exports_with_data = Export.where(collection:, empty_export: false) + + increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1 + + if full_update + base_number += 1 if Export.any? # Only increment when it's not the first run + increment_number = 1 + else + increment_number += 1 + end + + if previous_exports_with_data.empty? + return Export.new(collection:, base_number:, started_at: @start_time) + end + + Export.new(collection:, started_at: @start_time, base_number:, increment_number:) + end + + def write_export_archive(export, collection, recent_export, full_update) + archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?) + + initial_count = retrieve_resources(recent_export, full_update, collection).count + @logger.info("Creating #{archive} - #{initial_count} resources") + return {} if initial_count.zero? + + zip_file = Zip::File.open_buffer(StringIO.new) + + part_number = 1 + last_processed_marker = nil + count_after_export = 0 + + loop do + slice = if last_processed_marker.present? + retrieve_resources(recent_export, full_update, collection) + .where("created_at > ?", last_processed_marker) + .order(:created_at) + .limit(MAX_XML_RECORDS).to_a + else + retrieve_resources(recent_export, full_update, collection) + .order(:created_at) + .limit(MAX_XML_RECORDS).to_a + end + + break if slice.empty? + + data_xml = build_export_xml(slice) + part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" + zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) + part_number += 1 + last_processed_marker = slice.last.created_at + count_after_export += slice.count + @logger.info("Added #{archive}_#{part_number_str}.xml") + end + + manifest_xml = build_manifest_xml(count_after_export) + zip_file.add("manifest.xml", manifest_xml) + + # Required by S3 to avoid Aws::S3::Errors::BadDigest + zip_io = zip_file.write_buffer + zip_io.rewind + @logger.info("Writing #{archive}.zip") + @storage_service.write_file("#{archive}.zip", zip_io) + { archive => Time.zone.now } + end + + def xml_doc_to_temp_file(xml_doc) + file = Tempfile.new + xml_doc.write_xml_to(file, encoding: "UTF-8") + file.rewind + file + end + + def build_manifest_xml(record_number) + doc = Nokogiri::XML("") + doc.at("report") << doc.create_element("form-data-summary") + doc.at("form-data-summary") << doc.create_element("records") + doc.at("records") << doc.create_element("count-of-records", record_number) + + xml_doc_to_temp_file(doc) + end + + def collection_years_to_export(collection_year) + return [collection_year] if collection_year.present? + + FormHandler.instance.lettings_forms.values.map { |f| f.start_date.year }.uniq + end + end +end diff --git a/spec/services/exports/lettings_log_export_service_spec.rb b/spec/services/exports/lettings_log_export_service_spec.rb index 6006f7e81..fd4df5538 100644 --- a/spec/services/exports/lettings_log_export_service_spec.rb +++ b/spec/services/exports/lettings_log_export_service_spec.rb @@ -207,15 +207,15 @@ RSpec.describe Exports::LettingsLogExportService do expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args) expect(Rails.logger).to receive(:info).with("Building export run for 2021") - expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0001_inc0001 - 1 logs") + expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0001_inc0001 - 1 resources") expect(Rails.logger).to receive(:info).with("Added core_2021_2022_apr_mar_f0001_inc0001_pt001.xml") expect(Rails.logger).to receive(:info).with("Writing core_2021_2022_apr_mar_f0001_inc0001.zip") expect(Rails.logger).to receive(:info).with("Building export run for 2022") - expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 logs") + expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources") expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml") expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip") expect(Rails.logger).to receive(:info).with("Building export run for 2023") - expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 logs") + expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 resources") export_service.export_xml_lettings_logs end @@ -223,7 +223,7 @@ RSpec.describe Exports::LettingsLogExportService do it "generates zip export files only for specified year" do expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args) expect(Rails.logger).to receive(:info).with("Building export run for 2022") - expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 logs") + expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources") expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml") expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip") @@ -242,15 +242,15 @@ RSpec.describe Exports::LettingsLogExportService do expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args) expect(Rails.logger).to receive(:info).with("Building export run for 2021") - expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0007_inc0004 - 1 logs") + expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0007_inc0004 - 1 resources") expect(Rails.logger).to receive(:info).with("Added core_2021_2022_apr_mar_f0007_inc0004_pt001.xml") expect(Rails.logger).to receive(:info).with("Writing core_2021_2022_apr_mar_f0007_inc0004.zip") expect(Rails.logger).to receive(:info).with("Building export run for 2022") - expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 logs") + expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources") expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml") expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip") expect(Rails.logger).to receive(:info).with("Building export run for 2023") - expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 logs") + expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 resources") export_service.export_xml_lettings_logs end diff --git a/spec/services/exports/user_export_service_spec.rb b/spec/services/exports/user_export_service_spec.rb index d033e2f81..1f0406d01 100644 --- a/spec/services/exports/user_export_service_spec.rb +++ b/spec/services/exports/user_export_service_spec.rb @@ -114,7 +114,7 @@ RSpec.describe Exports::UserExportService do end end - context "and underlying data changes between getting the logs and writting the manifest" do + context "and underlying data changes between getting the users and writting the manifest" do def remove_users(users) users.each(&:destroy) file = Tempfile.new