Browse Source

refactor write_export_archive to only hold MAX_XML_RECORDS in memory at a time

pull/1859/head
Kat 3 years ago
parent
commit
3b0b824404
  1. 81
      app/services/exports/lettings_log_export_service.rb

81
app/services/exports/lettings_log_export_service.rb

@ -63,53 +63,58 @@ module Exports
@storage_service.write_file(file_path, string_io) @storage_service.write_file(file_path, string_io)
end end
def get_archive_name(lettings_log, base_number, increment) def get_archive_name(collection, base_number, increment)
return unless lettings_log.startdate return unless collection
collection_start = lettings_log.collection_start_year
start_month = collection_start_date(lettings_log.startdate).strftime("%b")
end_month = collection_end_date(lettings_log.startdate).strftime("%b")
base_number_str = "f#{base_number.to_s.rjust(4, '0')}" base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}" increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"core_#{collection_start}_#{collection_start + 1}_#{start_month}_#{end_month}_#{base_number_str}_#{increment_str}".downcase "core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end end
def write_export_archive(export, collection, start_time, full_update) def write_export_archive(export, collection, start_time, full_update)
@logger.info("Writing export archives") @logger.info("Writing export archives")
# Order lettings logs per archive archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?)
lettings_logs = retrieve_lettings_logs(start_time, full_update).filter_by_year(collection)
return {} unless lettings_logs.exists? # Write archive
logs_count = retrieve_lettings_logs(start_time, full_update).filter_by_year(collection).count
archive_name = get_archive_name(lettings_logs.first, export.base_number, export.increment_number) #archive name would be the same for all logs because they're already filtered by year (?) @logger.info("Writing #{archive} - #{logs_count} logs")
lettings_logs_per_archive = { archive_name => lettings_logs } manifest_xml = build_manifest_xml(logs_count)
return {} if logs_count.zero?
# Write all archives
archive_datetimes = {} zip_file = Zip::File.open_buffer(StringIO.new)
lettings_logs_per_archive.each do |archive, lettings_logs_to_export| zip_file.add("manifest.xml", manifest_xml)
@logger.info("Writing #{archive} - #{lettings_logs_to_export.count} logs")
manifest_xml = build_manifest_xml(lettings_logs_to_export.count) part_number = 1
zip_file = Zip::File.open_buffer(StringIO.new) last_processed_marker = nil
zip_file.add("manifest.xml", manifest_xml)
loop do
part_number = 1 lettings_logs_slice = if last_processed_marker.present?
lettings_logs_to_export.each_slice(MAX_XML_RECORDS) do |lettings_logs_slice| retrieve_lettings_logs(start_time, full_update).filter_by_year(collection)
data_xml = build_export_xml(lettings_logs_slice) .where("created_at > ?", last_processed_marker)
part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" .order(:created_at)
@logger.info("Adding #{archive}_#{part_number_str}.xml") .limit(MAX_XML_RECORDS)
zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) else
part_number += 1 retrieve_lettings_logs(start_time, full_update).filter_by_year(collection)
end .order(:created_at)
.limit(MAX_XML_RECORDS)
# Required by S3 to avoid Aws::S3::Errors::BadDigest end
zip_io = zip_file.write_buffer
zip_io.rewind break if lettings_logs_slice.empty?
@logger.info("Writting #{archive}.zip")
@storage_service.write_file("#{archive}.zip", zip_io) data_xml = build_export_xml(lettings_logs_slice)
archive_datetimes[archive] = Time.zone.now part_number_str = "pt#{part_number.to_s.rjust(3, '0')}"
@logger.info("Adding #{archive}_#{part_number_str}.xml")
zip_file.add("#{archive}_#{part_number_str}.xml", data_xml)
part_number += 1
last_processed_marker = lettings_logs_slice.last.created_at
end end
# rubocop:enable Style/CombinableLoops
archive_datetimes # Required by S3 to avoid Aws::S3::Errors::BadDigest
zip_io = zip_file.write_buffer
zip_io.rewind
@logger.info("Writting #{archive}.zip")
@storage_service.write_file("#{archive}.zip", zip_io)
{ archive => Time.zone.now }
end end
def retrieve_lettings_logs(start_time, full_update) def retrieve_lettings_logs(start_time, full_update)

Loading…
Cancel
Save