diff --git a/app/services/bulk_upload/lettings/year2023/csv_parser.rb b/app/services/bulk_upload/lettings/year2023/csv_parser.rb new file mode 100644 index 000000000..6b441ab7b --- /dev/null +++ b/app/services/bulk_upload/lettings/year2023/csv_parser.rb @@ -0,0 +1,67 @@ +require "csv" + +class BulkUpload::Lettings::Year2023::CsvParser + attr_reader :path + + def initialize(path:) + @path = path + end + + def row_offset + with_headers? ? 7 : 0 + end + + def col_offset + with_headers? ? 1 : 0 + end + + def cols + @cols ||= ("A".."EL").to_a + end + + def row_parsers + @row_parsers ||= body_rows.map do |row| + stripped_row = row[col_offset..] + hash = Hash[field_numbers.zip(stripped_row)] + + BulkUpload::Lettings::Year2023::RowParser.new(hash) + end + end + + def body_rows + rows[row_offset..] + end + + def rows + @rows ||= CSV.parse(normalised_string, row_sep:) + end + +private + + def default_field_numbers + [5, nil, nil, 15, 16, nil, 13, 40, 41, 42, 43, 46, 52, 56, 60, 64, 68, 72, 76, 47, 53, 57, 61, 65, 69, 73, 77, 51, 55, 59, 63, 67, 71, 75, 50, 54, 58, 62, 66, 70, 74, 78, 48, 49, 79, 81, 82, 123, 124, 122, 120, 102, 103, nil, 83, 84, 85, 86, 87, 88, 104, 109, 107, 108, 106, 100, 101, 105, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 126, 128, 129, 130, 131, 132, 127, 125, 133, 134, 33, 34, 35, 36, 37, 38, nil, 7, 8, 9, 28, 14, 32, 29, 30, 31, 26, 27, 25, 23, 24, nil, 1, 3, 2, 80, nil, 121, 44, 89, 98, 92, 95, 90, 91, 93, 94, 97, 96, 99, 10, 11, 12, 45, 39, 6, 4, 17, 18, 19, 20, 21, 22].map { |h| h.present? ? "field_#{h}" : "field_blank" } + end + + def field_numbers + # TODO: handle if there are no headers + rows[row_offset - 1][col_offset..].map { |h| h.present? ? "field_#{h}" : "field_blank" } + end + + def with_headers? + rows[0][0]&.match?(/Question/) + end + + def row_sep + "\n" + end + + def normalised_string + return @normalised_string if @normalised_string + + @normalised_string = File.read(path, encoding: "bom|utf-8") + @normalised_string.gsub!("\r\n", "\n") + @normalised_string.scrub!("") + + @normalised_string + end +end diff --git a/app/services/bulk_upload/lettings/year2023/row_parser.rb b/app/services/bulk_upload/lettings/year2023/row_parser.rb index ecc094926..c3ab05c24 100644 --- a/app/services/bulk_upload/lettings/year2023/row_parser.rb +++ b/app/services/bulk_upload/lettings/year2023/row_parser.rb @@ -142,6 +142,8 @@ class BulkUpload::Lettings::Year2023::RowParser attribute :bulk_upload attribute :block_log_creation, :boolean, default: -> { false } + attribute :field_blank + attribute :field_1, :string attribute :field_2, :string attribute :field_3, :string @@ -268,14 +270,14 @@ class BulkUpload::Lettings::Year2023::RowParser attribute :field_124, :integer attribute :field_125, :integer attribute :field_126, :integer - attribute :field_127, :double - attribute :field_128, :double - attribute :field_129, :double - attribute :field_130, :double - attribute :field_131, :double - attribute :field_132, :double + attribute :field_127, :decimal + attribute :field_128, :decimal + attribute :field_129, :decimal + attribute :field_130, :decimal + attribute :field_131, :decimal + attribute :field_132, :decimal attribute :field_133, :integer - attribute :field_134, :double + attribute :field_134, :decimal def self.question_for_field(field) QUESTIONS[field] diff --git a/spec/services/bulk_upload/lettings/year2023/csv_parser_spec.rb b/spec/services/bulk_upload/lettings/year2023/csv_parser_spec.rb new file mode 100644 index 000000000..af063e7bf --- /dev/null +++ b/spec/services/bulk_upload/lettings/year2023/csv_parser_spec.rb @@ -0,0 +1,111 @@ +require "rails_helper" + +RSpec.describe BulkUpload::Lettings::Year2023::CsvParser do + subject(:service) { described_class.new(path:) } + + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:lettings_log, :completed) } + + context "when parsing csv with headers" do + before do + file.write("Question\n") + file.write("Additional info\n") + file.write("Values\n") + file.write("Can be empty?\n") + file.write("Type of letting the question applies to\n") + file.write("Duplicate check field?\n") + file.write(BulkUpload::LogToCsv.new(log:).default_2023_field_numbers_row) + file.write(BulkUpload::LogToCsv.new(log:).to_2023_csv_row) + file.rewind + end + + it "returns correct offsets" do + expect(service.row_offset).to eq(7) + expect(service.col_offset).to eq(1) + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_13).to eql(log.tenancycode) + end + end + + context "when parsing csv with headers in arbitrary order" do + let(:seed) { rand } + + before do + file.write("Question\n") + file.write("Additional info\n") + file.write("Values\n") + file.write("Can be empty?\n") + file.write("Type of letting the question applies to\n") + file.write("Duplicate check field?\n") + file.write(BulkUpload::LogToCsv.new(log:).default_2023_field_numbers_row(seed:)) + file.write(BulkUpload::LogToCsv.new(log:).to_2023_csv_row(seed:)) + file.rewind + end + + it "returns correct offsets" do + expect(service.row_offset).to eq(7) + expect(service.col_offset).to eq(1) + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_13).to eql(log.tenancycode) + end + end + + # context "when parsing csv without headers" do + # let(:file) { Tempfile.new } + # let(:path) { file.path } + # let(:log) { build(:lettings_log, :completed) } + + # before do + # file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_csv_row) + # file.rewind + # end + + # it "returns correct offsets" do + # expect(service.row_offset).to eq(0) + # expect(service.col_offset).to eq(0) + # end + + # it "parses csv correctly" do + # expect(service.row_parsers[0].field_12.to_i).to eql(log.age1) + # end + # end + + # context "when parsing with BOM aka byte order mark" do + # let(:file) { Tempfile.new } + # let(:path) { file.path } + # let(:log) { build(:lettings_log, :completed) } + # let(:bom) { "\uFEFF" } + + # before do + # file.write(bom) + # file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_csv_row) + # file.close + # end + + # it "parses csv correctly" do + # expect(service.row_parsers[0].field_12.to_i).to eql(log.age1) + # end + # end + + # context "when an invalid byte sequence" do + # let(:file) { Tempfile.new } + # let(:path) { file.path } + # let(:log) { build(:lettings_log, :completed) } + # let(:invalid_sequence) { "\x81" } + + # before do + # file.write(invalid_sequence) + # file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_csv_row) + # file.close + # end + + # it "parses csv correctly" do + # expect(service.row_parsers[0].field_12.to_i).to eql(log.age1) + # end + # end +end diff --git a/spec/support/bulk_upload/log_to_csv.rb b/spec/support/bulk_upload/log_to_csv.rb index f11adea3d..4ad8f4c98 100644 --- a/spec/support/bulk_upload/log_to_csv.rb +++ b/spec/support/bulk_upload/log_to_csv.rb @@ -8,29 +8,49 @@ class BulkUpload::LogToCsv @overrides = overrides end + def prefix_offset + [nil] * col_offset + end + def to_2022_csv_row - (to_2022_row + [line_ending]).flatten.join(",") + (prefix_offset + to_2022_row).flatten.join(",") + line_ending end - def to_2023_csv_row - (to_2023_row + [line_ending]).flatten.join(",") + def to_2023_csv_row(seed: nil) + if seed + row = to_2023_row.shuffle(random: Random.new(seed)) + (prefix_offset + row).flatten.join(",") + line_ending + else + (prefix_offset + to_2023_row).flatten.join(",") + line_ending + end end def to_2023_row to_2022_row + [ - # needstype, - # location, - # uprn, - # address_line_1, - # address_line_2, - # town_or_city, - # county, + nil, # needstype, + nil, # location, + nil, # uprn, + nil, # address_line_1, + nil, # address_line_2, + nil, # town_or_city, + nil, # county, ] end + def default_2023_field_numbers_row(seed: nil) + if seed + ["Bulk upload field number"] + default_2023_field_numbers.shuffle(random: Random.new(seed)) + else + ["Bulk upload field number"] + default_2023_field_numbers + end.flatten.join(",") + line_ending + end + + def default_2023_field_numbers + [5, nil, nil, 15, 16, nil, 13, 40, 41, 42, 43, 46, 52, 56, 60, 64, 68, 72, 76, 47, 53, 57, 61, 65, 69, 73, 77, 51, 55, 59, 63, 67, 71, 75, 50, 54, 58, 62, 66, 70, 74, 78, 48, 49, 79, 81, 82, 123, 124, 122, 120, 102, 103, nil, 83, 84, 85, 86, 87, 88, 104, 109, 107, 108, 106, 100, 101, 105, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 126, 128, 129, 130, 131, 132, 127, 125, 133, 134, 33, 34, 35, 36, 37, 38, nil, 7, 8, 9, 28, 14, 32, 29, 30, 31, 26, 27, 25, 23, 24, nil, 1, 3, 2, 80, nil, 121, 44, 89, 98, 92, 95, 90, 91, 93, 94, 97, 96, 99, 10, 11, 12, 45, 39, 6, 4, 17, 18, 19, 20, 21, 22] + end + def to_2022_row [ - [nil] * col_offset, # 0 log.renttype, # 1 nil, nil,