diff --git a/app/jobs/data_export_xml_job.rb b/app/jobs/data_export_xml_job.rb index 8b825f6df..76ce32ec1 100644 --- a/app/jobs/data_export_xml_job.rb +++ b/app/jobs/data_export_xml_job.rb @@ -3,8 +3,8 @@ class DataExportXmlJob < ApplicationJob def perform(full_update: false) storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"]) - export_service = Exports::LettingsLogExportService.new(storage_service) + export_service = Exports::ExportService.new(storage_service) - export_service.export_xml_lettings_logs(full_update:) + export_service.export_xml(full_update:) end end diff --git a/app/services/exports/export_service.rb b/app/services/exports/export_service.rb new file mode 100644 index 000000000..ddb6ea58c --- /dev/null +++ b/app/services/exports/export_service.rb @@ -0,0 +1,49 @@ +module Exports + class ExportService + include Exports::LettingsLogExportConstants + include CollectionTimeHelper + + def initialize(storage_service, logger = Rails.logger) + @storage_service = storage_service + @logger = logger + end + + def export_xml(full_update: false, collection_year: nil) + start_time = Time.zone.now + daily_run_number = get_daily_run_number + + export_service = Exports::LettingsLogExportService.new(@storage_service, start_time) + archives_for_manifest = export_service.export_xml_lettings_logs(full_update:, collection_year:) + + write_master_manifest(daily_run_number, archives_for_manifest) + end + + private + + def get_daily_run_number + today = Time.zone.today + LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).select(:started_at).distinct.count + 1 + end + + def write_master_manifest(daily_run, archive_datetimes) + today = Time.zone.today + increment_number = daily_run.to_s.rjust(4, "0") + month = today.month.to_s.rjust(2, "0") + day = today.day.to_s.rjust(2, "0") + file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv" + string_io = build_manifest_csv_io(archive_datetimes) + @storage_service.write_file(file_path, string_io) + end + + def build_manifest_csv_io(archive_datetimes) + headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"] + csv_string = CSV.generate do |csv| + csv << headers + archive_datetimes.each do |(archive, datetime)| + csv << [archive, datetime, "#{archive}.zip"] + end + end + StringIO.new(csv_string) + end + end +end diff --git a/app/services/exports/lettings_log_export_service.rb b/app/services/exports/lettings_log_export_service.rb index 0c3b6eec4..a8ffa2f51 100644 --- a/app/services/exports/lettings_log_export_service.rb +++ b/app/services/exports/lettings_log_export_service.rb @@ -3,20 +3,19 @@ module Exports include Exports::LettingsLogExportConstants include CollectionTimeHelper - def initialize(storage_service, logger = Rails.logger) + def initialize(storage_service, start_time, logger = Rails.logger) @storage_service = storage_service @logger = logger + @start_time = start_time end def export_xml_lettings_logs(full_update: false, collection_year: nil) - start_time = Time.zone.now - daily_run_number = get_daily_run_number archives_for_manifest = {} recent_export = LogsExport.order("started_at").last collection_years_to_export(collection_year).each do |collection| base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1 - export = build_export_run(collection, start_time, base_number, full_update) - archives = write_export_archive(export, collection, start_time, recent_export, full_update) + export = build_export_run(collection, base_number, full_update) + archives = write_export_archive(export, collection, recent_export, full_update) archives_for_manifest.merge!(archives) @@ -24,17 +23,12 @@ module Exports export.save! end - write_master_manifest(daily_run_number, archives_for_manifest) + archives_for_manifest end private - def get_daily_run_number - today = Time.zone.today - LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).select(:started_at).distinct.count + 1 - end - - def build_export_run(collection, current_time, base_number, full_update) + def build_export_run(collection, base_number, full_update) @logger.info("Building export run for #{collection}") previous_exports_with_data = LogsExport.where(collection:, empty_export: false) @@ -48,20 +42,10 @@ module Exports end if previous_exports_with_data.empty? - return LogsExport.new(collection:, base_number:, started_at: current_time) + return LogsExport.new(collection:, base_number:, started_at: @start_time) end - LogsExport.new(collection:, started_at: current_time, base_number:, increment_number:) - end - - def write_master_manifest(daily_run, archive_datetimes) - today = Time.zone.today - increment_number = daily_run.to_s.rjust(4, "0") - month = today.month.to_s.rjust(2, "0") - day = today.day.to_s.rjust(2, "0") - file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv" - string_io = build_manifest_csv_io(archive_datetimes) - @storage_service.write_file(file_path, string_io) + LogsExport.new(collection:, started_at: @start_time, base_number:, increment_number:) end def get_archive_name(collection, base_number, increment) @@ -72,10 +56,10 @@ module Exports "core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase end - def write_export_archive(export, collection, start_time, recent_export, full_update) + def write_export_archive(export, collection, recent_export, full_update) archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?) - initial_logs_count = retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection).count + initial_logs_count = retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection).count @logger.info("Creating #{archive} - #{initial_logs_count} logs") return {} if initial_logs_count.zero? @@ -87,12 +71,12 @@ module Exports loop do lettings_logs_slice = if last_processed_marker.present? - retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection) + retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection) .where("created_at > ?", last_processed_marker) .order(:created_at) .limit(MAX_XML_RECORDS).to_a else - retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection) + retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection) .order(:created_at) .limit(MAX_XML_RECORDS).to_a end @@ -119,27 +103,16 @@ module Exports { archive => Time.zone.now } end - def retrieve_lettings_logs(start_time, recent_export, full_update) + def retrieve_lettings_logs(recent_export, full_update) if !full_update && recent_export - params = { from: recent_export.started_at, to: start_time } + params = { from: recent_export.started_at, to: @start_time } LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params) else - params = { to: start_time } + params = { to: @start_time } LettingsLog.exportable.where("updated_at <= :to", params) end end - def build_manifest_csv_io(archive_datetimes) - headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"] - csv_string = CSV.generate do |csv| - csv << headers - archive_datetimes.each do |(archive, datetime)| - csv << [archive, datetime, "#{archive}.zip"] - end - end - StringIO.new(csv_string) - end - def xml_doc_to_temp_file(xml_doc) file = Tempfile.new xml_doc.write_xml_to(file, encoding: "UTF-8") diff --git a/lib/tasks/data_export.rake b/lib/tasks/data_export.rake index 719462cb4..e2445638b 100644 --- a/lib/tasks/data_export.rake +++ b/lib/tasks/data_export.rake @@ -10,8 +10,8 @@ namespace :core do task :full_data_export_xml, %i[year] => :environment do |_task, args| collection_year = args[:year].present? ? args[:year].to_i : nil storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"]) - export_service = Exports::LettingsLogExportService.new(storage_service) + export_service = Exports::ExportService.new(storage_service) - export_service.export_xml_lettings_logs(full_update: true, collection_year:) + export_service.export_xml(full_update: true, collection_year:) end end diff --git a/spec/lib/tasks/data_export_spec.rb b/spec/lib/tasks/data_export_spec.rb index afb99f872..4e383e864 100644 --- a/spec/lib/tasks/data_export_spec.rb +++ b/spec/lib/tasks/data_export_spec.rb @@ -4,7 +4,7 @@ require "rake" describe "rake core:data_export", type: task do let(:export_bucket) { "export_bucket" } let(:storage_service) { instance_double(Storage::S3Service) } - let(:export_service) { instance_double(Exports::LettingsLogExportService) } + let(:export_service) { instance_double(Exports::ExportService) } before do Rake.application.rake_require("tasks/data_export") @@ -12,7 +12,7 @@ describe "rake core:data_export", type: task do task.reenable allow(Storage::S3Service).to receive(:new).and_return(storage_service) - allow(Exports::LettingsLogExportService).to receive(:new).and_return(export_service) + allow(Exports::ExportService).to receive(:new).and_return(export_service) allow(ENV).to receive(:[]) allow(ENV).to receive(:[]).with("EXPORT_BUCKET").and_return(export_bucket) end @@ -30,7 +30,7 @@ describe "rake core:data_export", type: task do context "with all available years" do it "calls the export service" do - expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true, collection_year: nil) + expect(export_service).to receive(:export_xml).with(full_update: true, collection_year: nil) task.invoke end @@ -38,7 +38,7 @@ describe "rake core:data_export", type: task do context "with a specific year" do it "calls the export service" do - expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true, collection_year: 2022) + expect(export_service).to receive(:export_xml).with(full_update: true, collection_year: 2022) task.invoke("2022") end diff --git a/spec/services/exports/export_service_spec.rb b/spec/services/exports/export_service_spec.rb new file mode 100644 index 000000000..3e270ddf7 --- /dev/null +++ b/spec/services/exports/export_service_spec.rb @@ -0,0 +1,77 @@ +require "rails_helper" + +RSpec.describe Exports::ExportService do + subject(:export_service) { described_class.new(storage_service) } + + let(:storage_service) { instance_double(Storage::S3Service) } + let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" } + let(:start_time) { Time.zone.local(2022, 5, 1) } + let(:user) { FactoryBot.create(:user, email: "test1@example.com") } + + before do + Timecop.freeze(start_time) + Singleton.__init__(FormHandler) + allow(storage_service).to receive(:write_file) + allow(Exports::LettingsLogExportService).to receive(:new).and_return(lettings_logs_export_service) + end + + after do + Timecop.return + end + + context "when exporting daily XMLs" do + context "and no lettings archives get created in lettings logs export" do + let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: {}) } + + it "generates a master manifest with the correct name" do + expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) + export_service.export_xml + end + + it "generates a master manifest with CSV headers but no data" do + actual_content = nil + expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n" + allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string } + + export_service.export_xml + expect(actual_content).to eq(expected_content) + end + end + + context "and one lettings archive gets created in lettings logs export" do + let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: { "some_file_base_name" => start_time }) } + + it "generates a master manifest with the correct name" do + expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) + export_service.export_xml + end + + it "generates a master manifest with CSV headers and correct data" do + actual_content = nil + expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\nsome_file_base_name,2022-05-01 00:00:00 +0100,some_file_base_name.zip\n" + allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string } + + export_service.export_xml + expect(actual_content).to eq(expected_content) + end + end + + context "and multiple lettings archives get created in lettings logs export" do + let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: { "some_file_base_name" => start_time, "second_file_base_name" => start_time }) } + + it "generates a master manifest with the correct name" do + expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) + export_service.export_xml + end + + it "generates a master manifest with CSV headers and correct data" do + actual_content = nil + expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\nsome_file_base_name,2022-05-01 00:00:00 +0100,some_file_base_name.zip\nsecond_file_base_name,2022-05-01 00:00:00 +0100,second_file_base_name.zip\n" + allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string } + + export_service.export_xml + expect(actual_content).to eq(expected_content) + end + end + end +end diff --git a/spec/services/exports/lettings_log_export_service_spec.rb b/spec/services/exports/lettings_log_export_service_spec.rb index b3f33f24f..75192a1ec 100644 --- a/spec/services/exports/lettings_log_export_service_spec.rb +++ b/spec/services/exports/lettings_log_export_service_spec.rb @@ -1,7 +1,7 @@ require "rails_helper" RSpec.describe Exports::LettingsLogExportService do - subject(:export_service) { described_class.new(storage_service) } + subject(:export_service) { described_class.new(storage_service, start_time) } let(:storage_service) { instance_double(Storage::S3Service) } @@ -49,18 +49,8 @@ RSpec.describe Exports::LettingsLogExportService do context "when exporting daily lettings logs in XML" do context "and no lettings logs is available for export" do - it "generates a master manifest with the correct name" do - expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) - export_service.export_xml_lettings_logs - end - - it "generates a master manifest with CSV headers but no data" do - actual_content = nil - expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n" - allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string } - - export_service.export_xml_lettings_logs - expect(actual_content).to eq(expected_content) + it "returns an empty archives list" do + expect(export_service.export_xml_lettings_logs).to eq({}) end end @@ -83,13 +73,9 @@ RSpec.describe Exports::LettingsLogExportService do ) end - it "generates a master manifest with CSV headers but no data" do - actual_content = nil - expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n" - allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string } - + it "returns empty archives list for archives manifest" do export_service.export_xml_lettings_logs - expect(actual_content).to eq(expected_content) + expect(export_service.export_xml_lettings_logs).to eq({}) end end @@ -101,15 +87,6 @@ RSpec.describe Exports::LettingsLogExportService do export_service.export_xml_lettings_logs end - it "generates an XML manifest file with the expected filename within the ZIP file" do - expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| - entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) - expect(entry).not_to be_nil - expect(entry.name).to eq(expected_manifest_filename) - end - export_service.export_xml_lettings_logs - end - it "generates an XML export file with the expected filename within the ZIP file" do expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) @@ -141,13 +118,8 @@ RSpec.describe Exports::LettingsLogExportService do export_service.export_xml_lettings_logs end - it "generates a master manifest with CSV headers" do - actual_content = nil - expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\ncore_2021_2022_apr_mar_f0001_inc0001,2022-05-01 00:00:00 +0100,#{expected_zip_filename}\n" - allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string } - - export_service.export_xml_lettings_logs - expect(actual_content).to eq(expected_content) + it "returns the list with correct archive" do + expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "") => start_time }) end end @@ -178,8 +150,10 @@ RSpec.describe Exports::LettingsLogExportService do end context "with 23/24 collection period" do + let(:start_time) { Time.zone.local(2023, 4, 3) } + before do - Timecop.freeze(Time.zone.local(2023, 4, 3)) + Timecop.freeze(start_time) Singleton.__init__(FormHandler) stub_request(:get, "https://api.os.uk/search/places/v1/uprn?dataset=DPA,LPI&key=OS_DATA_KEY&uprn=100023336956") .to_return(status: 200, body: '{"status":200,"results":[{"DPA":{ @@ -309,13 +283,8 @@ RSpec.describe Exports::LettingsLogExportService do end context "when this is the first export (full)" do - it "records a ZIP archive in the master manifest (existing lettings logs)" do - expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) do |_, csv_content| - csv = CSV.parse(csv_content, headers: true) - expect(csv&.count).to be > 0 - end - - export_service.export_xml_lettings_logs + it "returns a ZIP archive for the master manifest (existing lettings logs)" do + expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "").gsub(".zip", "") => start_time }) end end @@ -363,12 +332,8 @@ RSpec.describe Exports::LettingsLogExportService do LogsExport.new(started_at: start_time).save! end - it "does not add any entry in the master manifest (no lettings logs)" do - expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content| - csv = CSV.parse(csv_content, headers: true) - expect(csv&.count).to eq(0) - end - export_service.export_xml_lettings_logs + it "does not add any entry for the master manifest (no lettings logs)" do + expect(export_service.export_xml_lettings_logs).to eq({}) end end end @@ -379,11 +344,6 @@ RSpec.describe Exports::LettingsLogExportService do export_service.export_xml_lettings_logs end - it "increments the master manifest number by 1" do - expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) - export_service.export_xml_lettings_logs - end - context "and we trigger another full update" do it "increments the base number" do export_service.export_xml_lettings_logs(full_update: true) @@ -395,12 +355,8 @@ RSpec.describe Exports::LettingsLogExportService do expect(LogsExport.last.increment_number).to eq(1) end - it "records a ZIP archive in the master manifest (existing lettings logs)" do - expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content| - csv = CSV.parse(csv_content, headers: true) - expect(csv&.count).to be > 0 - end - export_service.export_xml_lettings_logs(full_update: true) + it "returns a correct archives list for manifest file" do + expect(export_service.export_xml_lettings_logs(full_update: true)).to eq({ "core_2021_2022_apr_mar_f0002_inc0001" => start_time }) end it "generates a ZIP export file with the expected filename" do @@ -429,14 +385,13 @@ RSpec.describe Exports::LettingsLogExportService do it "generates an XML manifest file with the expected content within the ZIP file" do expected_content = replace_record_number(local_manifest_file.read, 2) - expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) expect(entry).not_to be_nil expect(entry.get_input_stream.read).to eq(expected_content) end - export_service.export_xml_lettings_logs + expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "") => start_time }) end end @@ -461,8 +416,10 @@ RSpec.describe Exports::LettingsLogExportService do end context "with 24/25 collection period" do + let(:start_time) { Time.zone.local(2024, 4, 3) } + before do - Timecop.freeze(Time.zone.local(2024, 4, 3)) + Timecop.freeze(start_time) Singleton.__init__(FormHandler) end