diff --git a/app/services/bulk_upload/sales/year2022/csv_parser.rb b/app/services/bulk_upload/sales/year2022/csv_parser.rb new file mode 100644 index 000000000..e0ae500fd --- /dev/null +++ b/app/services/bulk_upload/sales/year2022/csv_parser.rb @@ -0,0 +1,70 @@ +require "csv" + +class BulkUpload::Sales::Year2022::CsvParser + MIN_COLUMNS = 125 + MAX_COLUMNS = 126 + + attr_reader :path + + def initialize(path:) + @path = path + end + + def row_offset + with_headers? ? 5 : 0 + end + + def col_offset + with_headers? ? 1 : 0 + end + + def cols + @cols ||= ("A".."DV").to_a + end + + def row_parsers + @row_parsers ||= body_rows.map do |row| + stripped_row = row[col_offset..] + headers = ("field_1".."field_125").to_a + hash = Hash[headers.zip(stripped_row)] + + BulkUpload::Lettings::Year2022::RowParser.new(hash) + end + end + + def body_rows + rows[row_offset..] + end + + def rows + @rows ||= CSV.parse(normalised_string, row_sep:) + end + + def column_for_field(field) + cols[headers.find_index(field) + col_offset] + end + +private + + def headers + @headers ||= ("field_1".."field_125").to_a + end + + def with_headers? + rows[0][0]&.match?(/\D+/) + end + + def row_sep + "\n" + end + + def normalised_string + return @normalised_string if @normalised_string + + @normalised_string = File.read(path, encoding: "bom|utf-8") + @normalised_string.gsub!("\r\n", "\n") + @normalised_string.scrub!("") + + @normalised_string + end +end diff --git a/spec/services/bulk_upload/sales/year2022/csv_parser_spec.rb b/spec/services/bulk_upload/sales/year2022/csv_parser_spec.rb new file mode 100644 index 000000000..4b16291e9 --- /dev/null +++ b/spec/services/bulk_upload/sales/year2022/csv_parser_spec.rb @@ -0,0 +1,97 @@ +require "rails_helper" + +RSpec.describe BulkUpload::Sales::Year2022::CsvParser do + subject(:service) { described_class.new(path:) } + + let(:path) { file_fixture("2022_23_sales_bulk_upload.csv") } + + context "when parsing csv with headers" do + it "returns correct offsets" do + expect(service.row_offset).to eq(5) + expect(service.col_offset).to eq(1) + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_7.to_i).to eq(30) + end + end + + context "when parsing csv without headers" do + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:sales_log, :completed) } + + before do + file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row) + file.rewind + end + + it "returns correct offsets" do + expect(service.row_offset).to eq(0) + expect(service.col_offset).to eq(0) + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_7.to_i).to eql(log.age1) + end + end + + context "when parsing with BOM aka byte order mark" do + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:sales_log, :completed) } + let(:bom) { "\uFEFF" } + + before do + file.write(bom) + file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row) + file.close + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_7.to_i).to eql(log.age1) + end + end + + context "when an invalid byte sequence" do + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:sales_log, :completed) } + let(:invalid_sequence) { "\x81" } + + before do + file.write(invalid_sequence) + file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row) + file.close + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_7.to_i).to eql(log.age1) + end + end + + describe "#column_for_field", aggregate_failures: true do + context "when headers present" do + it "returns correct column" do + expect(service.column_for_field("field_1")).to eql("B") + expect(service.column_for_field("field_125")).to eql("DV") + end + end + + context "when no headers" do + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:sales_log, :completed) } + + before do + file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row) + file.rewind + end + + it "returns correct column" do + expect(service.column_for_field("field_1")).to eql("A") + expect(service.column_for_field("field_125")).to eql("DU") + end + end + end +end diff --git a/spec/support/bulk_upload/log_to_csv.rb b/spec/support/bulk_upload/log_to_csv.rb index 55a199e7f..f281c460d 100644 --- a/spec/support/bulk_upload/log_to_csv.rb +++ b/spec/support/bulk_upload/log_to_csv.rb @@ -16,6 +16,10 @@ class BulkUpload::LogToCsv (row_prefix + to_2022_row).flatten.join(",") + line_ending end + def to_2022_sales_csv_row + (row_prefix + to_2022_sales_row).flatten.join(",") + line_ending + end + def to_2023_csv_row(seed: nil) if seed row = to_2023_row.shuffle(random: Random.new(seed)) @@ -198,6 +202,155 @@ class BulkUpload::LogToCsv ] end + def to_2022_sales_row + [ + log.purchid, # 1 + log.saledate&.day, + log.saledate&.month, + log.saledate&.strftime("%y"), + nil, + log.noint, + log.age1, + log.age2, + log.age3, + log.age4, + log.age5, + log.age6, + + log.sex1, + log.sex2, + log.sex3, + log.sex4, + log.sex5, + log.sex6, + + log.relat2, + log.relat3, # 20 + log.relat4, + log.relat5, + log.relat6, + + log.ecstat1, + log.ecstat2, + log.ecstat3, + log.ecstat4, + log.ecstat5, + log.ecstat6, + + log.ethnic, # 30 + log.national, + log.income1, + log.income2, + log.inc1mort, + log.inc2mort, + log.savings, + log.prevown, + nil, + + log.prevten, + log.prevloc, # 40 + ((log.ppostcode_full || "").split(" ") || [""]).first, + ((log.ppostcode_full || "").split(" ") || [""]).last, + previous_postcode_known, + + log.pregyrha, + log.pregla, + log.pregghb, + log.pregother, + + log.disabled, + log.wheel, + log.beds, # 50 + log.proptype, + log.builtype, + log.la, + ((log.postcode_full || "").split(" ") || [""]).first, + ((log.postcode_full || "").split(" ") || [""]).last, + log.wchair, + + log.type, # shared ownership + log.resale, + log.hodate&.day, + log.hodate&.month, # 60 + log.hodate&.strftime("%y"), + log.exdate&.day, + log.exdate&.month, + log.exdate&.strftime("%y"), + log.lanomagr, + + log.frombeds, + log.fromprop, + + log.value, + log.equity, + log.mortgage, # 70 + log.extrabor, + log.deposit, + log.cashdis, + + log.mrent, + log.mscharge, + + log.type, # discounted ownership + log.value, + log.grant, + log.grant, + log.discount, + log.mortgage, # 80 + log.extrabor, + log.extrabor, + log.deposit, + log.mscharge, + + log.type, # outright sale + log.othtype, + nil, + + log.value, + log.mortgage, + log.extrabor, + log.deposit, # 90 + log.mscharge, + + log.owning_organisation&.old_visible_id, + log.created_by&.email, + nil, + hhregres, + nil, + log.armedforcesspouse, + log.mortgagelender, # shared ownership + log.mortgagelenderother, + log.mortgagelender, # discounted ownership 100 + log.mortgagelenderother, + log.mortgagelender, # outright ownership + log.mortgagelenderother, + + log.hb, + log.mortlen, # shared ownership + log.mortlen, # discounted ownership + log.mortlen, # outright ownership + + log.proplen, # discounted ownership + log.jointmore, + log.proplen, # shared ownership 110 + log.staircase, + log.privacynotice, + log.ownershipsch, + log.companybuy, # outright sale + log.buylivein, + log.jointpur, + log.buy1livein, + log.buy2livein, + log.hholdcount, + log.stairbought, # 120 + log.stairowned, + log.socprevten, + log.mortgageused, # shared ownership + log.mortgageused, # discounted ownership + log.mortgageused, # outright ownership + ] + end + private def renewal @@ -279,4 +432,12 @@ private 1 end end + + def hhregres + if log.hhregres == 1 + log.hhregresstill + else + log.hhregres + end + end end