Browse Source

Refactor manifest creation into a separate job

pull/2652/head
Kat 2 years ago committed by kosiakkatrina
parent
commit
dfe331f614
  1. 4
      app/jobs/data_export_xml_job.rb
  2. 49
      app/services/exports/export_service.rb
  3. 57
      app/services/exports/lettings_log_export_service.rb
  4. 4
      lib/tasks/data_export.rake
  5. 8
      spec/lib/tasks/data_export_spec.rb
  6. 77
      spec/services/exports/export_service_spec.rb
  7. 83
      spec/services/exports/lettings_log_export_service_spec.rb

4
app/jobs/data_export_xml_job.rb

@ -3,8 +3,8 @@ class DataExportXmlJob < ApplicationJob
def perform(full_update: false)
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"])
export_service = Exports::LettingsLogExportService.new(storage_service)
export_service = Exports::ExportService.new(storage_service)
export_service.export_xml_lettings_logs(full_update:)
export_service.export_xml(full_update:)
end
end

49
app/services/exports/export_service.rb

@ -0,0 +1,49 @@
module Exports
class ExportService
include Exports::LettingsLogExportConstants
include CollectionTimeHelper
def initialize(storage_service, logger = Rails.logger)
@storage_service = storage_service
@logger = logger
end
def export_xml(full_update: false, collection_year: nil)
start_time = Time.zone.now
daily_run_number = get_daily_run_number
export_service = Exports::LettingsLogExportService.new(@storage_service, start_time)
archives_for_manifest = export_service.export_xml_lettings_logs(full_update:, collection_year:)
write_master_manifest(daily_run_number, archives_for_manifest)
end
private
def get_daily_run_number
today = Time.zone.today
LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).select(:started_at).distinct.count + 1
end
def write_master_manifest(daily_run, archive_datetimes)
today = Time.zone.today
increment_number = daily_run.to_s.rjust(4, "0")
month = today.month.to_s.rjust(2, "0")
day = today.day.to_s.rjust(2, "0")
file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv"
string_io = build_manifest_csv_io(archive_datetimes)
@storage_service.write_file(file_path, string_io)
end
def build_manifest_csv_io(archive_datetimes)
headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"]
csv_string = CSV.generate do |csv|
csv << headers
archive_datetimes.each do |(archive, datetime)|
csv << [archive, datetime, "#{archive}.zip"]
end
end
StringIO.new(csv_string)
end
end
end

57
app/services/exports/lettings_log_export_service.rb

@ -3,20 +3,19 @@ module Exports
include Exports::LettingsLogExportConstants
include CollectionTimeHelper
def initialize(storage_service, logger = Rails.logger)
def initialize(storage_service, start_time, logger = Rails.logger)
@storage_service = storage_service
@logger = logger
@start_time = start_time
end
def export_xml_lettings_logs(full_update: false, collection_year: nil)
start_time = Time.zone.now
daily_run_number = get_daily_run_number
archives_for_manifest = {}
recent_export = LogsExport.order("started_at").last
collection_years_to_export(collection_year).each do |collection|
base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1
export = build_export_run(collection, start_time, base_number, full_update)
archives = write_export_archive(export, collection, start_time, recent_export, full_update)
export = build_export_run(collection, base_number, full_update)
archives = write_export_archive(export, collection, recent_export, full_update)
archives_for_manifest.merge!(archives)
@ -24,17 +23,12 @@ module Exports
export.save!
end
write_master_manifest(daily_run_number, archives_for_manifest)
archives_for_manifest
end
private
def get_daily_run_number
today = Time.zone.today
LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).select(:started_at).distinct.count + 1
end
def build_export_run(collection, current_time, base_number, full_update)
def build_export_run(collection, base_number, full_update)
@logger.info("Building export run for #{collection}")
previous_exports_with_data = LogsExport.where(collection:, empty_export: false)
@ -48,20 +42,10 @@ module Exports
end
if previous_exports_with_data.empty?
return LogsExport.new(collection:, base_number:, started_at: current_time)
return LogsExport.new(collection:, base_number:, started_at: @start_time)
end
LogsExport.new(collection:, started_at: current_time, base_number:, increment_number:)
end
def write_master_manifest(daily_run, archive_datetimes)
today = Time.zone.today
increment_number = daily_run.to_s.rjust(4, "0")
month = today.month.to_s.rjust(2, "0")
day = today.day.to_s.rjust(2, "0")
file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv"
string_io = build_manifest_csv_io(archive_datetimes)
@storage_service.write_file(file_path, string_io)
LogsExport.new(collection:, started_at: @start_time, base_number:, increment_number:)
end
def get_archive_name(collection, base_number, increment)
@ -72,10 +56,10 @@ module Exports
"core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
def write_export_archive(export, collection, start_time, recent_export, full_update)
def write_export_archive(export, collection, recent_export, full_update)
archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?)
initial_logs_count = retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection).count
initial_logs_count = retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection).count
@logger.info("Creating #{archive} - #{initial_logs_count} logs")
return {} if initial_logs_count.zero?
@ -87,12 +71,12 @@ module Exports
loop do
lettings_logs_slice = if last_processed_marker.present?
retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection)
retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection)
.where("created_at > ?", last_processed_marker)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
else
retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection)
retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
end
@ -119,27 +103,16 @@ module Exports
{ archive => Time.zone.now }
end
def retrieve_lettings_logs(start_time, recent_export, full_update)
def retrieve_lettings_logs(recent_export, full_update)
if !full_update && recent_export
params = { from: recent_export.started_at, to: start_time }
params = { from: recent_export.started_at, to: @start_time }
LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params)
else
params = { to: start_time }
params = { to: @start_time }
LettingsLog.exportable.where("updated_at <= :to", params)
end
end
def build_manifest_csv_io(archive_datetimes)
headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"]
csv_string = CSV.generate do |csv|
csv << headers
archive_datetimes.each do |(archive, datetime)|
csv << [archive, datetime, "#{archive}.zip"]
end
end
StringIO.new(csv_string)
end
def xml_doc_to_temp_file(xml_doc)
file = Tempfile.new
xml_doc.write_xml_to(file, encoding: "UTF-8")

4
lib/tasks/data_export.rake

@ -10,8 +10,8 @@ namespace :core do
task :full_data_export_xml, %i[year] => :environment do |_task, args|
collection_year = args[:year].present? ? args[:year].to_i : nil
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"])
export_service = Exports::LettingsLogExportService.new(storage_service)
export_service = Exports::ExportService.new(storage_service)
export_service.export_xml_lettings_logs(full_update: true, collection_year:)
export_service.export_xml(full_update: true, collection_year:)
end
end

8
spec/lib/tasks/data_export_spec.rb

@ -4,7 +4,7 @@ require "rake"
describe "rake core:data_export", type: task do
let(:export_bucket) { "export_bucket" }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:export_service) { instance_double(Exports::LettingsLogExportService) }
let(:export_service) { instance_double(Exports::ExportService) }
before do
Rake.application.rake_require("tasks/data_export")
@ -12,7 +12,7 @@ describe "rake core:data_export", type: task do
task.reenable
allow(Storage::S3Service).to receive(:new).and_return(storage_service)
allow(Exports::LettingsLogExportService).to receive(:new).and_return(export_service)
allow(Exports::ExportService).to receive(:new).and_return(export_service)
allow(ENV).to receive(:[])
allow(ENV).to receive(:[]).with("EXPORT_BUCKET").and_return(export_bucket)
end
@ -30,7 +30,7 @@ describe "rake core:data_export", type: task do
context "with all available years" do
it "calls the export service" do
expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true, collection_year: nil)
expect(export_service).to receive(:export_xml).with(full_update: true, collection_year: nil)
task.invoke
end
@ -38,7 +38,7 @@ describe "rake core:data_export", type: task do
context "with a specific year" do
it "calls the export service" do
expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true, collection_year: 2022)
expect(export_service).to receive(:export_xml).with(full_update: true, collection_year: 2022)
task.invoke("2022")
end

77
spec/services/exports/export_service_spec.rb

@ -0,0 +1,77 @@
require "rails_helper"
RSpec.describe Exports::ExportService do
subject(:export_service) { described_class.new(storage_service) }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" }
let(:start_time) { Time.zone.local(2022, 5, 1) }
let(:user) { FactoryBot.create(:user, email: "test1@example.com") }
before do
Timecop.freeze(start_time)
Singleton.__init__(FormHandler)
allow(storage_service).to receive(:write_file)
allow(Exports::LettingsLogExportService).to receive(:new).and_return(lettings_logs_export_service)
end
after do
Timecop.return
end
context "when exporting daily XMLs" do
context "and no lettings archives get created in lettings logs export" do
let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: {}) }
it "generates a master manifest with the correct name" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args)
export_service.export_xml
end
it "generates a master manifest with CSV headers but no data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml
expect(actual_content).to eq(expected_content)
end
end
context "and one lettings archive gets created in lettings logs export" do
let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: { "some_file_base_name" => start_time }) }
it "generates a master manifest with the correct name" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args)
export_service.export_xml
end
it "generates a master manifest with CSV headers and correct data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\nsome_file_base_name,2022-05-01 00:00:00 +0100,some_file_base_name.zip\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml
expect(actual_content).to eq(expected_content)
end
end
context "and multiple lettings archives get created in lettings logs export" do
let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: { "some_file_base_name" => start_time, "second_file_base_name" => start_time }) }
it "generates a master manifest with the correct name" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args)
export_service.export_xml
end
it "generates a master manifest with CSV headers and correct data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\nsome_file_base_name,2022-05-01 00:00:00 +0100,some_file_base_name.zip\nsecond_file_base_name,2022-05-01 00:00:00 +0100,second_file_base_name.zip\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml
expect(actual_content).to eq(expected_content)
end
end
end
end

83
spec/services/exports/lettings_log_export_service_spec.rb

@ -1,7 +1,7 @@
require "rails_helper"
RSpec.describe Exports::LettingsLogExportService do
subject(:export_service) { described_class.new(storage_service) }
subject(:export_service) { described_class.new(storage_service, start_time) }
let(:storage_service) { instance_double(Storage::S3Service) }
@ -49,18 +49,8 @@ RSpec.describe Exports::LettingsLogExportService do
context "when exporting daily lettings logs in XML" do
context "and no lettings logs is available for export" do
it "generates a master manifest with the correct name" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args)
export_service.export_xml_lettings_logs
end
it "generates a master manifest with CSV headers but no data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml_lettings_logs
expect(actual_content).to eq(expected_content)
it "returns an empty archives list" do
expect(export_service.export_xml_lettings_logs).to eq({})
end
end
@ -83,13 +73,9 @@ RSpec.describe Exports::LettingsLogExportService do
)
end
it "generates a master manifest with CSV headers but no data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
it "returns empty archives list for archives manifest" do
export_service.export_xml_lettings_logs
expect(actual_content).to eq(expected_content)
expect(export_service.export_xml_lettings_logs).to eq({})
end
end
@ -101,15 +87,6 @@ RSpec.describe Exports::LettingsLogExportService do
export_service.export_xml_lettings_logs
end
it "generates an XML manifest file with the expected filename within the ZIP file" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.name).to eq(expected_manifest_filename)
end
export_service.export_xml_lettings_logs
end
it "generates an XML export file with the expected filename within the ZIP file" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
@ -141,13 +118,8 @@ RSpec.describe Exports::LettingsLogExportService do
export_service.export_xml_lettings_logs
end
it "generates a master manifest with CSV headers" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\ncore_2021_2022_apr_mar_f0001_inc0001,2022-05-01 00:00:00 +0100,#{expected_zip_filename}\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml_lettings_logs
expect(actual_content).to eq(expected_content)
it "returns the list with correct archive" do
expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
end
end
@ -178,8 +150,10 @@ RSpec.describe Exports::LettingsLogExportService do
end
context "with 23/24 collection period" do
let(:start_time) { Time.zone.local(2023, 4, 3) }
before do
Timecop.freeze(Time.zone.local(2023, 4, 3))
Timecop.freeze(start_time)
Singleton.__init__(FormHandler)
stub_request(:get, "https://api.os.uk/search/places/v1/uprn?dataset=DPA,LPI&key=OS_DATA_KEY&uprn=100023336956")
.to_return(status: 200, body: '{"status":200,"results":[{"DPA":{
@ -309,13 +283,8 @@ RSpec.describe Exports::LettingsLogExportService do
end
context "when this is the first export (full)" do
it "records a ZIP archive in the master manifest (existing lettings logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) do |_, csv_content|
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to be > 0
end
export_service.export_xml_lettings_logs
it "returns a ZIP archive for the master manifest (existing lettings logs)" do
expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "").gsub(".zip", "") => start_time })
end
end
@ -363,12 +332,8 @@ RSpec.describe Exports::LettingsLogExportService do
LogsExport.new(started_at: start_time).save!
end
it "does not add any entry in the master manifest (no lettings logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content|
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to eq(0)
end
export_service.export_xml_lettings_logs
it "does not add any entry for the master manifest (no lettings logs)" do
expect(export_service.export_xml_lettings_logs).to eq({})
end
end
end
@ -379,11 +344,6 @@ RSpec.describe Exports::LettingsLogExportService do
export_service.export_xml_lettings_logs
end
it "increments the master manifest number by 1" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args)
export_service.export_xml_lettings_logs
end
context "and we trigger another full update" do
it "increments the base number" do
export_service.export_xml_lettings_logs(full_update: true)
@ -395,12 +355,8 @@ RSpec.describe Exports::LettingsLogExportService do
expect(LogsExport.last.increment_number).to eq(1)
end
it "records a ZIP archive in the master manifest (existing lettings logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content|
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to be > 0
end
export_service.export_xml_lettings_logs(full_update: true)
it "returns a correct archives list for manifest file" do
expect(export_service.export_xml_lettings_logs(full_update: true)).to eq({ "core_2021_2022_apr_mar_f0002_inc0001" => start_time })
end
it "generates a ZIP export file with the expected filename" do
@ -429,14 +385,13 @@ RSpec.describe Exports::LettingsLogExportService do
it "generates an XML manifest file with the expected content within the ZIP file" do
expected_content = replace_record_number(local_manifest_file.read, 2)
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
export_service.export_xml_lettings_logs
expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
end
end
@ -461,8 +416,10 @@ RSpec.describe Exports::LettingsLogExportService do
end
context "with 24/25 collection period" do
let(:start_time) { Time.zone.local(2024, 4, 3) }
before do
Timecop.freeze(Time.zone.local(2024, 4, 3))
Timecop.freeze(start_time)
Singleton.__init__(FormHandler)
end

Loading…
Cancel
Save