Browse Source

Refactor manifest creation into a separate job

pull/2652/head
Kat 2 years ago committed by kosiakkatrina
parent
commit
dfe331f614
  1. 4
      app/jobs/data_export_xml_job.rb
  2. 49
      app/services/exports/export_service.rb
  3. 57
      app/services/exports/lettings_log_export_service.rb
  4. 4
      lib/tasks/data_export.rake
  5. 8
      spec/lib/tasks/data_export_spec.rb
  6. 77
      spec/services/exports/export_service_spec.rb
  7. 83
      spec/services/exports/lettings_log_export_service_spec.rb

4
app/jobs/data_export_xml_job.rb

@ -3,8 +3,8 @@ class DataExportXmlJob < ApplicationJob
def perform(full_update: false) def perform(full_update: false)
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"]) storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"])
export_service = Exports::LettingsLogExportService.new(storage_service) export_service = Exports::ExportService.new(storage_service)
export_service.export_xml_lettings_logs(full_update:) export_service.export_xml(full_update:)
end end
end end

49
app/services/exports/export_service.rb

@ -0,0 +1,49 @@
module Exports
class ExportService
include Exports::LettingsLogExportConstants
include CollectionTimeHelper
def initialize(storage_service, logger = Rails.logger)
@storage_service = storage_service
@logger = logger
end
def export_xml(full_update: false, collection_year: nil)
start_time = Time.zone.now
daily_run_number = get_daily_run_number
export_service = Exports::LettingsLogExportService.new(@storage_service, start_time)
archives_for_manifest = export_service.export_xml_lettings_logs(full_update:, collection_year:)
write_master_manifest(daily_run_number, archives_for_manifest)
end
private
def get_daily_run_number
today = Time.zone.today
LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).select(:started_at).distinct.count + 1
end
def write_master_manifest(daily_run, archive_datetimes)
today = Time.zone.today
increment_number = daily_run.to_s.rjust(4, "0")
month = today.month.to_s.rjust(2, "0")
day = today.day.to_s.rjust(2, "0")
file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv"
string_io = build_manifest_csv_io(archive_datetimes)
@storage_service.write_file(file_path, string_io)
end
def build_manifest_csv_io(archive_datetimes)
headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"]
csv_string = CSV.generate do |csv|
csv << headers
archive_datetimes.each do |(archive, datetime)|
csv << [archive, datetime, "#{archive}.zip"]
end
end
StringIO.new(csv_string)
end
end
end

57
app/services/exports/lettings_log_export_service.rb

@ -3,20 +3,19 @@ module Exports
include Exports::LettingsLogExportConstants include Exports::LettingsLogExportConstants
include CollectionTimeHelper include CollectionTimeHelper
def initialize(storage_service, logger = Rails.logger) def initialize(storage_service, start_time, logger = Rails.logger)
@storage_service = storage_service @storage_service = storage_service
@logger = logger @logger = logger
@start_time = start_time
end end
def export_xml_lettings_logs(full_update: false, collection_year: nil) def export_xml_lettings_logs(full_update: false, collection_year: nil)
start_time = Time.zone.now
daily_run_number = get_daily_run_number
archives_for_manifest = {} archives_for_manifest = {}
recent_export = LogsExport.order("started_at").last recent_export = LogsExport.order("started_at").last
collection_years_to_export(collection_year).each do |collection| collection_years_to_export(collection_year).each do |collection|
base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1 base_number = LogsExport.where(empty_export: false, collection:).maximum(:base_number) || 1
export = build_export_run(collection, start_time, base_number, full_update) export = build_export_run(collection, base_number, full_update)
archives = write_export_archive(export, collection, start_time, recent_export, full_update) archives = write_export_archive(export, collection, recent_export, full_update)
archives_for_manifest.merge!(archives) archives_for_manifest.merge!(archives)
@ -24,17 +23,12 @@ module Exports
export.save! export.save!
end end
write_master_manifest(daily_run_number, archives_for_manifest) archives_for_manifest
end end
private private
def get_daily_run_number def build_export_run(collection, base_number, full_update)
today = Time.zone.today
LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).select(:started_at).distinct.count + 1
end
def build_export_run(collection, current_time, base_number, full_update)
@logger.info("Building export run for #{collection}") @logger.info("Building export run for #{collection}")
previous_exports_with_data = LogsExport.where(collection:, empty_export: false) previous_exports_with_data = LogsExport.where(collection:, empty_export: false)
@ -48,20 +42,10 @@ module Exports
end end
if previous_exports_with_data.empty? if previous_exports_with_data.empty?
return LogsExport.new(collection:, base_number:, started_at: current_time) return LogsExport.new(collection:, base_number:, started_at: @start_time)
end end
LogsExport.new(collection:, started_at: current_time, base_number:, increment_number:) LogsExport.new(collection:, started_at: @start_time, base_number:, increment_number:)
end
def write_master_manifest(daily_run, archive_datetimes)
today = Time.zone.today
increment_number = daily_run.to_s.rjust(4, "0")
month = today.month.to_s.rjust(2, "0")
day = today.day.to_s.rjust(2, "0")
file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv"
string_io = build_manifest_csv_io(archive_datetimes)
@storage_service.write_file(file_path, string_io)
end end
def get_archive_name(collection, base_number, increment) def get_archive_name(collection, base_number, increment)
@ -72,10 +56,10 @@ module Exports
"core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase "core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end end
def write_export_archive(export, collection, start_time, recent_export, full_update) def write_export_archive(export, collection, recent_export, full_update)
archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?) archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?)
initial_logs_count = retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection).count initial_logs_count = retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection).count
@logger.info("Creating #{archive} - #{initial_logs_count} logs") @logger.info("Creating #{archive} - #{initial_logs_count} logs")
return {} if initial_logs_count.zero? return {} if initial_logs_count.zero?
@ -87,12 +71,12 @@ module Exports
loop do loop do
lettings_logs_slice = if last_processed_marker.present? lettings_logs_slice = if last_processed_marker.present?
retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection) retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection)
.where("created_at > ?", last_processed_marker) .where("created_at > ?", last_processed_marker)
.order(:created_at) .order(:created_at)
.limit(MAX_XML_RECORDS).to_a .limit(MAX_XML_RECORDS).to_a
else else
retrieve_lettings_logs(start_time, recent_export, full_update).filter_by_year(collection) retrieve_lettings_logs(recent_export, full_update).filter_by_year(collection)
.order(:created_at) .order(:created_at)
.limit(MAX_XML_RECORDS).to_a .limit(MAX_XML_RECORDS).to_a
end end
@ -119,27 +103,16 @@ module Exports
{ archive => Time.zone.now } { archive => Time.zone.now }
end end
def retrieve_lettings_logs(start_time, recent_export, full_update) def retrieve_lettings_logs(recent_export, full_update)
if !full_update && recent_export if !full_update && recent_export
params = { from: recent_export.started_at, to: start_time } params = { from: recent_export.started_at, to: @start_time }
LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params) LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params)
else else
params = { to: start_time } params = { to: @start_time }
LettingsLog.exportable.where("updated_at <= :to", params) LettingsLog.exportable.where("updated_at <= :to", params)
end end
end end
def build_manifest_csv_io(archive_datetimes)
headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"]
csv_string = CSV.generate do |csv|
csv << headers
archive_datetimes.each do |(archive, datetime)|
csv << [archive, datetime, "#{archive}.zip"]
end
end
StringIO.new(csv_string)
end
def xml_doc_to_temp_file(xml_doc) def xml_doc_to_temp_file(xml_doc)
file = Tempfile.new file = Tempfile.new
xml_doc.write_xml_to(file, encoding: "UTF-8") xml_doc.write_xml_to(file, encoding: "UTF-8")

4
lib/tasks/data_export.rake

@ -10,8 +10,8 @@ namespace :core do
task :full_data_export_xml, %i[year] => :environment do |_task, args| task :full_data_export_xml, %i[year] => :environment do |_task, args|
collection_year = args[:year].present? ? args[:year].to_i : nil collection_year = args[:year].present? ? args[:year].to_i : nil
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"]) storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"])
export_service = Exports::LettingsLogExportService.new(storage_service) export_service = Exports::ExportService.new(storage_service)
export_service.export_xml_lettings_logs(full_update: true, collection_year:) export_service.export_xml(full_update: true, collection_year:)
end end
end end

8
spec/lib/tasks/data_export_spec.rb

@ -4,7 +4,7 @@ require "rake"
describe "rake core:data_export", type: task do describe "rake core:data_export", type: task do
let(:export_bucket) { "export_bucket" } let(:export_bucket) { "export_bucket" }
let(:storage_service) { instance_double(Storage::S3Service) } let(:storage_service) { instance_double(Storage::S3Service) }
let(:export_service) { instance_double(Exports::LettingsLogExportService) } let(:export_service) { instance_double(Exports::ExportService) }
before do before do
Rake.application.rake_require("tasks/data_export") Rake.application.rake_require("tasks/data_export")
@ -12,7 +12,7 @@ describe "rake core:data_export", type: task do
task.reenable task.reenable
allow(Storage::S3Service).to receive(:new).and_return(storage_service) allow(Storage::S3Service).to receive(:new).and_return(storage_service)
allow(Exports::LettingsLogExportService).to receive(:new).and_return(export_service) allow(Exports::ExportService).to receive(:new).and_return(export_service)
allow(ENV).to receive(:[]) allow(ENV).to receive(:[])
allow(ENV).to receive(:[]).with("EXPORT_BUCKET").and_return(export_bucket) allow(ENV).to receive(:[]).with("EXPORT_BUCKET").and_return(export_bucket)
end end
@ -30,7 +30,7 @@ describe "rake core:data_export", type: task do
context "with all available years" do context "with all available years" do
it "calls the export service" do it "calls the export service" do
expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true, collection_year: nil) expect(export_service).to receive(:export_xml).with(full_update: true, collection_year: nil)
task.invoke task.invoke
end end
@ -38,7 +38,7 @@ describe "rake core:data_export", type: task do
context "with a specific year" do context "with a specific year" do
it "calls the export service" do it "calls the export service" do
expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true, collection_year: 2022) expect(export_service).to receive(:export_xml).with(full_update: true, collection_year: 2022)
task.invoke("2022") task.invoke("2022")
end end

77
spec/services/exports/export_service_spec.rb

@ -0,0 +1,77 @@
require "rails_helper"
RSpec.describe Exports::ExportService do
subject(:export_service) { described_class.new(storage_service) }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" }
let(:start_time) { Time.zone.local(2022, 5, 1) }
let(:user) { FactoryBot.create(:user, email: "test1@example.com") }
before do
Timecop.freeze(start_time)
Singleton.__init__(FormHandler)
allow(storage_service).to receive(:write_file)
allow(Exports::LettingsLogExportService).to receive(:new).and_return(lettings_logs_export_service)
end
after do
Timecop.return
end
context "when exporting daily XMLs" do
context "and no lettings archives get created in lettings logs export" do
let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: {}) }
it "generates a master manifest with the correct name" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args)
export_service.export_xml
end
it "generates a master manifest with CSV headers but no data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml
expect(actual_content).to eq(expected_content)
end
end
context "and one lettings archive gets created in lettings logs export" do
let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: { "some_file_base_name" => start_time }) }
it "generates a master manifest with the correct name" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args)
export_service.export_xml
end
it "generates a master manifest with CSV headers and correct data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\nsome_file_base_name,2022-05-01 00:00:00 +0100,some_file_base_name.zip\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml
expect(actual_content).to eq(expected_content)
end
end
context "and multiple lettings archives get created in lettings logs export" do
let(:lettings_logs_export_service) { instance_double("Exports::LettingsLogExportService", export_xml_lettings_logs: { "some_file_base_name" => start_time, "second_file_base_name" => start_time }) }
it "generates a master manifest with the correct name" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args)
export_service.export_xml
end
it "generates a master manifest with CSV headers and correct data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\nsome_file_base_name,2022-05-01 00:00:00 +0100,some_file_base_name.zip\nsecond_file_base_name,2022-05-01 00:00:00 +0100,second_file_base_name.zip\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml
expect(actual_content).to eq(expected_content)
end
end
end
end

83
spec/services/exports/lettings_log_export_service_spec.rb

@ -1,7 +1,7 @@
require "rails_helper" require "rails_helper"
RSpec.describe Exports::LettingsLogExportService do RSpec.describe Exports::LettingsLogExportService do
subject(:export_service) { described_class.new(storage_service) } subject(:export_service) { described_class.new(storage_service, start_time) }
let(:storage_service) { instance_double(Storage::S3Service) } let(:storage_service) { instance_double(Storage::S3Service) }
@ -49,18 +49,8 @@ RSpec.describe Exports::LettingsLogExportService do
context "when exporting daily lettings logs in XML" do context "when exporting daily lettings logs in XML" do
context "and no lettings logs is available for export" do context "and no lettings logs is available for export" do
it "generates a master manifest with the correct name" do it "returns an empty archives list" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) expect(export_service.export_xml_lettings_logs).to eq({})
export_service.export_xml_lettings_logs
end
it "generates a master manifest with CSV headers but no data" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml_lettings_logs
expect(actual_content).to eq(expected_content)
end end
end end
@ -83,13 +73,9 @@ RSpec.describe Exports::LettingsLogExportService do
) )
end end
it "generates a master manifest with CSV headers but no data" do it "returns empty archives list for archives manifest" do
actual_content = nil
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml_lettings_logs export_service.export_xml_lettings_logs
expect(actual_content).to eq(expected_content) expect(export_service.export_xml_lettings_logs).to eq({})
end end
end end
@ -101,15 +87,6 @@ RSpec.describe Exports::LettingsLogExportService do
export_service.export_xml_lettings_logs export_service.export_xml_lettings_logs
end end
it "generates an XML manifest file with the expected filename within the ZIP file" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.name).to eq(expected_manifest_filename)
end
export_service.export_xml_lettings_logs
end
it "generates an XML export file with the expected filename within the ZIP file" do it "generates an XML export file with the expected filename within the ZIP file" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
@ -141,13 +118,8 @@ RSpec.describe Exports::LettingsLogExportService do
export_service.export_xml_lettings_logs export_service.export_xml_lettings_logs
end end
it "generates a master manifest with CSV headers" do it "returns the list with correct archive" do
actual_content = nil expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
expected_content = "zip-name,date-time zipped folder generated,zip-file-uri\ncore_2021_2022_apr_mar_f0001_inc0001,2022-05-01 00:00:00 +0100,#{expected_zip_filename}\n"
allow(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) { |_, arg2| actual_content = arg2&.string }
export_service.export_xml_lettings_logs
expect(actual_content).to eq(expected_content)
end end
end end
@ -178,8 +150,10 @@ RSpec.describe Exports::LettingsLogExportService do
end end
context "with 23/24 collection period" do context "with 23/24 collection period" do
let(:start_time) { Time.zone.local(2023, 4, 3) }
before do before do
Timecop.freeze(Time.zone.local(2023, 4, 3)) Timecop.freeze(start_time)
Singleton.__init__(FormHandler) Singleton.__init__(FormHandler)
stub_request(:get, "https://api.os.uk/search/places/v1/uprn?dataset=DPA,LPI&key=OS_DATA_KEY&uprn=100023336956") stub_request(:get, "https://api.os.uk/search/places/v1/uprn?dataset=DPA,LPI&key=OS_DATA_KEY&uprn=100023336956")
.to_return(status: 200, body: '{"status":200,"results":[{"DPA":{ .to_return(status: 200, body: '{"status":200,"results":[{"DPA":{
@ -309,13 +283,8 @@ RSpec.describe Exports::LettingsLogExportService do
end end
context "when this is the first export (full)" do context "when this is the first export (full)" do
it "records a ZIP archive in the master manifest (existing lettings logs)" do it "returns a ZIP archive for the master manifest (existing lettings logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) do |_, csv_content| expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "").gsub(".zip", "") => start_time })
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to be > 0
end
export_service.export_xml_lettings_logs
end end
end end
@ -363,12 +332,8 @@ RSpec.describe Exports::LettingsLogExportService do
LogsExport.new(started_at: start_time).save! LogsExport.new(started_at: start_time).save!
end end
it "does not add any entry in the master manifest (no lettings logs)" do it "does not add any entry for the master manifest (no lettings logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content| expect(export_service.export_xml_lettings_logs).to eq({})
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to eq(0)
end
export_service.export_xml_lettings_logs
end end
end end
end end
@ -379,11 +344,6 @@ RSpec.describe Exports::LettingsLogExportService do
export_service.export_xml_lettings_logs export_service.export_xml_lettings_logs
end end
it "increments the master manifest number by 1" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args)
export_service.export_xml_lettings_logs
end
context "and we trigger another full update" do context "and we trigger another full update" do
it "increments the base number" do it "increments the base number" do
export_service.export_xml_lettings_logs(full_update: true) export_service.export_xml_lettings_logs(full_update: true)
@ -395,12 +355,8 @@ RSpec.describe Exports::LettingsLogExportService do
expect(LogsExport.last.increment_number).to eq(1) expect(LogsExport.last.increment_number).to eq(1)
end end
it "records a ZIP archive in the master manifest (existing lettings logs)" do it "returns a correct archives list for manifest file" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content| expect(export_service.export_xml_lettings_logs(full_update: true)).to eq({ "core_2021_2022_apr_mar_f0002_inc0001" => start_time })
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to be > 0
end
export_service.export_xml_lettings_logs(full_update: true)
end end
it "generates a ZIP export file with the expected filename" do it "generates a ZIP export file with the expected filename" do
@ -429,14 +385,13 @@ RSpec.describe Exports::LettingsLogExportService do
it "generates an XML manifest file with the expected content within the ZIP file" do it "generates an XML manifest file with the expected content within the ZIP file" do
expected_content = replace_record_number(local_manifest_file.read, 2) expected_content = replace_record_number(local_manifest_file.read, 2)
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content) expect(entry.get_input_stream.read).to eq(expected_content)
end end
export_service.export_xml_lettings_logs expect(export_service.export_xml_lettings_logs).to eq({ expected_zip_filename.gsub(".zip", "") => start_time })
end end
end end
@ -461,8 +416,10 @@ RSpec.describe Exports::LettingsLogExportService do
end end
context "with 24/25 collection period" do context "with 24/25 collection period" do
let(:start_time) { Time.zone.local(2024, 4, 3) }
before do before do
Timecop.freeze(Time.zone.local(2024, 4, 3)) Timecop.freeze(start_time)
Singleton.__init__(FormHandler) Singleton.__init__(FormHandler)
end end

Loading…
Cancel
Save