Browse Source

Add sales csv parser

pull/1574/head
Kat 3 years ago
parent
commit
aec4ee41d1
  1. 70
      app/services/bulk_upload/sales/year2022/csv_parser.rb
  2. 97
      spec/services/bulk_upload/sales/year2022/csv_parser_spec.rb
  3. 161
      spec/support/bulk_upload/log_to_csv.rb

70
app/services/bulk_upload/sales/year2022/csv_parser.rb

@ -0,0 +1,70 @@
require "csv"
class BulkUpload::Sales::Year2022::CsvParser
MIN_COLUMNS = 125
MAX_COLUMNS = 126
attr_reader :path
def initialize(path:)
@path = path
end
def row_offset
with_headers? ? 5 : 0
end
def col_offset
with_headers? ? 1 : 0
end
def cols
@cols ||= ("A".."DV").to_a
end
def row_parsers
@row_parsers ||= body_rows.map do |row|
stripped_row = row[col_offset..]
headers = ("field_1".."field_125").to_a
hash = Hash[headers.zip(stripped_row)]
BulkUpload::Lettings::Year2022::RowParser.new(hash)
end
end
def body_rows
rows[row_offset..]
end
def rows
@rows ||= CSV.parse(normalised_string, row_sep:)
end
def column_for_field(field)
cols[headers.find_index(field) + col_offset]
end
private
def headers
@headers ||= ("field_1".."field_125").to_a
end
def with_headers?
rows[0][0]&.match?(/\D+/)
end
def row_sep
"\n"
end
def normalised_string
return @normalised_string if @normalised_string
@normalised_string = File.read(path, encoding: "bom|utf-8")
@normalised_string.gsub!("\r\n", "\n")
@normalised_string.scrub!("")
@normalised_string
end
end

97
spec/services/bulk_upload/sales/year2022/csv_parser_spec.rb

@ -0,0 +1,97 @@
require "rails_helper"
RSpec.describe BulkUpload::Sales::Year2022::CsvParser do
subject(:service) { described_class.new(path:) }
let(:path) { file_fixture("2022_23_sales_bulk_upload.csv") }
context "when parsing csv with headers" do
it "returns correct offsets" do
expect(service.row_offset).to eq(5)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_7.to_i).to eq(30)
end
end
context "when parsing csv without headers" do
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed) }
before do
file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row)
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(0)
expect(service.col_offset).to eq(0)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_7.to_i).to eql(log.age1)
end
end
context "when parsing with BOM aka byte order mark" do
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed) }
let(:bom) { "\uFEFF" }
before do
file.write(bom)
file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row)
file.close
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_7.to_i).to eql(log.age1)
end
end
context "when an invalid byte sequence" do
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed) }
let(:invalid_sequence) { "\x81" }
before do
file.write(invalid_sequence)
file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row)
file.close
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_7.to_i).to eql(log.age1)
end
end
describe "#column_for_field", aggregate_failures: true do
context "when headers present" do
it "returns correct column" do
expect(service.column_for_field("field_1")).to eql("B")
expect(service.column_for_field("field_125")).to eql("DV")
end
end
context "when no headers" do
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed) }
before do
file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_sales_csv_row)
file.rewind
end
it "returns correct column" do
expect(service.column_for_field("field_1")).to eql("A")
expect(service.column_for_field("field_125")).to eql("DU")
end
end
end
end

161
spec/support/bulk_upload/log_to_csv.rb

@ -16,6 +16,10 @@ class BulkUpload::LogToCsv
(row_prefix + to_2022_row).flatten.join(",") + line_ending (row_prefix + to_2022_row).flatten.join(",") + line_ending
end end
def to_2022_sales_csv_row
(row_prefix + to_2022_sales_row).flatten.join(",") + line_ending
end
def to_2023_csv_row(seed: nil) def to_2023_csv_row(seed: nil)
if seed if seed
row = to_2023_row.shuffle(random: Random.new(seed)) row = to_2023_row.shuffle(random: Random.new(seed))
@ -198,6 +202,155 @@ class BulkUpload::LogToCsv
] ]
end end
def to_2022_sales_row
[
log.purchid, # 1
log.saledate&.day,
log.saledate&.month,
log.saledate&.strftime("%y"),
nil,
log.noint,
log.age1,
log.age2,
log.age3,
log.age4,
log.age5,
log.age6,
log.sex1,
log.sex2,
log.sex3,
log.sex4,
log.sex5,
log.sex6,
log.relat2,
log.relat3, # 20
log.relat4,
log.relat5,
log.relat6,
log.ecstat1,
log.ecstat2,
log.ecstat3,
log.ecstat4,
log.ecstat5,
log.ecstat6,
log.ethnic, # 30
log.national,
log.income1,
log.income2,
log.inc1mort,
log.inc2mort,
log.savings,
log.prevown,
nil,
log.prevten,
log.prevloc, # 40
((log.ppostcode_full || "").split(" ") || [""]).first,
((log.ppostcode_full || "").split(" ") || [""]).last,
previous_postcode_known,
log.pregyrha,
log.pregla,
log.pregghb,
log.pregother,
log.disabled,
log.wheel,
log.beds, # 50
log.proptype,
log.builtype,
log.la,
((log.postcode_full || "").split(" ") || [""]).first,
((log.postcode_full || "").split(" ") || [""]).last,
log.wchair,
log.type, # shared ownership
log.resale,
log.hodate&.day,
log.hodate&.month, # 60
log.hodate&.strftime("%y"),
log.exdate&.day,
log.exdate&.month,
log.exdate&.strftime("%y"),
log.lanomagr,
log.frombeds,
log.fromprop,
log.value,
log.equity,
log.mortgage, # 70
log.extrabor,
log.deposit,
log.cashdis,
log.mrent,
log.mscharge,
log.type, # discounted ownership
log.value,
log.grant,
log.grant,
log.discount,
log.mortgage, # 80
log.extrabor,
log.extrabor,
log.deposit,
log.mscharge,
log.type, # outright sale
log.othtype,
nil,
log.value,
log.mortgage,
log.extrabor,
log.deposit, # 90
log.mscharge,
log.owning_organisation&.old_visible_id,
log.created_by&.email,
nil,
hhregres,
nil,
log.armedforcesspouse,
log.mortgagelender, # shared ownership
log.mortgagelenderother,
log.mortgagelender, # discounted ownership 100
log.mortgagelenderother,
log.mortgagelender, # outright ownership
log.mortgagelenderother,
log.hb,
log.mortlen, # shared ownership
log.mortlen, # discounted ownership
log.mortlen, # outright ownership
log.proplen, # discounted ownership
log.jointmore,
log.proplen, # shared ownership 110
log.staircase,
log.privacynotice,
log.ownershipsch,
log.companybuy, # outright sale
log.buylivein,
log.jointpur,
log.buy1livein,
log.buy2livein,
log.hholdcount,
log.stairbought, # 120
log.stairowned,
log.socprevten,
log.mortgageused, # shared ownership
log.mortgageused, # discounted ownership
log.mortgageused, # outright ownership
]
end
private private
def renewal def renewal
@ -279,4 +432,12 @@ private
1 1
end end
end end
def hhregres
if log.hhregres == 1
log.hhregresstill
else
log.hhregres
end
end
end end

Loading…
Cancel
Save