From 68faee2efd0dd8aa72096f18330cdbf1c9b313c4 Mon Sep 17 00:00:00 2001 From: James Rose Date: Wed, 8 Feb 2023 14:01:13 +0000 Subject: [PATCH] Schedule data export tasks in sidekiq-cron (#952) * Schedule data export tasks in sidekiq-cron - Split combined XML and CSV data jobs into seperate background jobs - Add sidekiq-cron to the project - Schedule new XML and CSV jobs to run every day at 5am - Adjust data export Rake tasks to use new job * Add specs for DataExport{Xml,Csv}Job * x * Run bundle install * Change review app export bucket name --------- Co-authored-by: Kat --- .github/workflows/review_pipeline.yml | 2 +- Gemfile | 3 ++- Gemfile.lock | 17 ++++++++++++----- Procfile.dev | 1 + app/jobs/data_export_csv_job.rb | 10 ++++++++++ app/jobs/data_export_xml_job.rb | 10 ++++++++++ config/initializers/sidekiq.rb | 10 ++++++++++ config/sidekiq_cron_schedule.yml | 8 ++++++++ lib/tasks/data_export.rake | 17 +++++++---------- spec/jobs/data_export_csv_job_spec.rb | 19 +++++++++++++++++++ spec/jobs/data_export_xml_job_spec.rb | 27 +++++++++++++++++++++++++++ spec/lib/tasks/data_export_spec.rb | 18 ++++++------------ 12 files changed, 113 insertions(+), 29 deletions(-) create mode 100644 app/jobs/data_export_csv_job.rb create mode 100644 app/jobs/data_export_xml_job.rb create mode 100644 config/sidekiq_cron_schedule.yml create mode 100644 spec/jobs/data_export_csv_job_spec.rb create mode 100644 spec/jobs/data_export_xml_job_spec.rb diff --git a/.github/workflows/review_pipeline.yml b/.github/workflows/review_pipeline.yml index 1c7beff53..1e7ae0efb 100644 --- a/.github/workflows/review_pipeline.yml +++ b/.github/workflows/review_pipeline.yml @@ -118,7 +118,7 @@ jobs: cf set-env $APP_NAME GOVUK_NOTIFY_API_KEY $GOVUK_NOTIFY_API_KEY cf set-env $APP_NAME RAILS_MASTER_KEY $RAILS_MASTER_KEY cf set-env $APP_NAME IMPORT_PAAS_INSTANCE $IMPORT_PAAS_INSTANCE - cf set-env $APP_NAME EXPORT_PAAS_INSTANCE $EXPORT_PAAS_INSTANCE + cf set-env $APP_NAME EXPORT_PAAS_INSTANCE "dluhc-core-review-export-bucket" cf set-env $APP_NAME S3_CONFIG $S3_CONFIG cf set-env $APP_NAME CSV_DOWNLOAD_PAAS_INSTANCE "dluhc-core-review-csv-bucket" cf set-env $APP_NAME SENTRY_DSN $SENTRY_DSN diff --git a/Gemfile b/Gemfile index 02bcdf3b7..b20b7fe97 100644 --- a/Gemfile +++ b/Gemfile @@ -50,7 +50,7 @@ gem "paper_trail" gem "paper_trail-globalid" # Request rate limiting gem "rack-attack" -gem "redis" +gem "redis", "~> 4.8" # Receive exceptions and configure alerts gem "sentry-rails" gem "sentry-ruby" @@ -60,6 +60,7 @@ gem "possessive" gem "auto_strip_attributes" # Use sidekiq for background processing gem "sidekiq" +gem "sidekiq-cron" group :development, :test do # Check gems for known vulnerabilities diff --git a/Gemfile.lock b/Gemfile.lock index cb46ee64a..76ab1021c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -158,6 +158,8 @@ GEM rubocop smart_properties erubi (1.12.0) + et-orbi (1.2.7) + tzinfo excon (0.92.5) factory_bot (6.2.1) activesupport (>= 5.0.0) @@ -167,6 +169,9 @@ GEM faker (2.23.0) i18n (>= 1.8.11, < 2) ffi (1.15.5) + fugit (1.8.1) + et-orbi (~> 1, >= 1.2.7) + raabro (~> 1.4) globalid (1.0.1) activesupport (>= 5.0) govuk-components (3.2.1) @@ -267,6 +272,7 @@ GEM public_suffix (5.0.0) puma (5.6.5) nio4r (~> 2.0) + raabro (1.4.0) racc (1.6.2) rack (2.2.6.2) rack-attack (6.6.1) @@ -308,10 +314,7 @@ GEM rb-inotify (0.10.1) ffi (~> 1.0) redcarpet (3.5.1) - redis (5.0.5) - redis-client (>= 0.9.0) - redis-client (0.9.0) - connection_pool + redis (4.8.0) regexp_parser (2.5.0) request_store (1.5.1) rack (>= 1.4) @@ -385,6 +388,9 @@ GEM connection_pool (>= 2.2.2) rack (~> 2.0) redis (>= 4.5.0) + sidekiq-cron (1.8.0) + fugit (~> 1) + sidekiq (>= 4.2.1) simplecov (0.21.2) docile (~> 1.1) simplecov-html (~> 0.11) @@ -467,7 +473,7 @@ DEPENDENCIES rack-attack rack-mini-profiler (~> 2.0) rails (~> 7.0.2) - redis + redis (~> 4.8) roo rspec-rails rubocop-govuk (= 4.3.0) @@ -477,6 +483,7 @@ DEPENDENCIES sentry-rails sentry-ruby sidekiq + sidekiq-cron simplecov stimulus-rails timecop (~> 0.9.4) diff --git a/Procfile.dev b/Procfile.dev index 03c54b1d7..59b48dfe2 100644 --- a/Procfile.dev +++ b/Procfile.dev @@ -1,2 +1,3 @@ web: bin/rails server -p 3000 +redis: redis-server js: yarn build --watch diff --git a/app/jobs/data_export_csv_job.rb b/app/jobs/data_export_csv_job.rb new file mode 100644 index 000000000..8db19a202 --- /dev/null +++ b/app/jobs/data_export_csv_job.rb @@ -0,0 +1,10 @@ +class DataExportCsvJob < ApplicationJob + queue_as :default + + def perform + storage_service = Storage::S3Service.new(Configuration::PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"]) + export_service = Exports::LettingsLogExportService.new(storage_service) + + export_service.export_csv_lettings_logs + end +end diff --git a/app/jobs/data_export_xml_job.rb b/app/jobs/data_export_xml_job.rb new file mode 100644 index 000000000..b26b65364 --- /dev/null +++ b/app/jobs/data_export_xml_job.rb @@ -0,0 +1,10 @@ +class DataExportXmlJob < ApplicationJob + queue_as :default + + def perform(full_update: false) + storage_service = Storage::S3Service.new(Configuration::PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"]) + export_service = Exports::LettingsLogExportService.new(storage_service) + + export_service.export_xml_lettings_logs(full_update:) + end +end diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index 59e0064d8..597de6400 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -1,4 +1,5 @@ require "sidekiq/web" +require "sidekiq/cron/web" if Rails.env.staging? || Rails.env.production? redis_url = Configuration::PaasConfigurationService.new.redis_uris[:"dluhc-core-#{Rails.env}-redis"] @@ -23,3 +24,12 @@ if Rails.env.review? config.redis = { url: redis_url } end end + +# Until https://github.com/sidekiq-cron/sidekiq-cron/issues/357 is fixed. +Redis.silence_deprecations = true + +Sidekiq.configure_server do |config| + config.on(:startup) do + Sidekiq::Cron::Job.load_from_hash YAML.load_file("config/sidekiq_cron_schedule.yml") + end +end diff --git a/config/sidekiq_cron_schedule.yml b/config/sidekiq_cron_schedule.yml new file mode 100644 index 000000000..46cb10ff2 --- /dev/null +++ b/config/sidekiq_cron_schedule.yml @@ -0,0 +1,8 @@ +data_export_csv: + cron: "every day at 5am" + class: "DataExportCsvJob" + queue: default +data_export_xml: + cron: "every day at 5am" + class: "DataExportXmlJob" + queue: default diff --git a/lib/tasks/data_export.rake b/lib/tasks/data_export.rake index 5bf9649af..17f258293 100644 --- a/lib/tasks/data_export.rake +++ b/lib/tasks/data_export.rake @@ -1,17 +1,14 @@ namespace :core do + desc "Export data CSVs for import into Central Data System (CDS)" + task data_export_csv: :environment do |_task, _args| + DataExportCsvJob.perform_later + end + desc "Export data XMLs for import into Central Data System (CDS)" - task :data_export, %i[format full_update] => :environment do |_task, args| - format = args[:format] + task :data_export_xml, %i[full_update] => :environment do |_task, args| full_update = args[:full_update].present? && args[:full_update] == "true" - storage_service = Storage::S3Service.new(Configuration::PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"]) - export_service = Exports::LettingsLogExportService.new(storage_service) - - if format.present? && format == "CSV" - export_service.export_csv_lettings_logs - else - export_service.export_xml_lettings_logs(full_update:) - end + DataExportXmlJob.perform_later(full_update:) end end diff --git a/spec/jobs/data_export_csv_job_spec.rb b/spec/jobs/data_export_csv_job_spec.rb new file mode 100644 index 000000000..cf06752c7 --- /dev/null +++ b/spec/jobs/data_export_csv_job_spec.rb @@ -0,0 +1,19 @@ +require "rails_helper" + +describe DataExportCsvJob do + let(:storage_service) { instance_double(Storage::S3Service) } + let(:paas_config_service) { instance_double(Configuration::PaasConfigurationService) } + let(:export_service) { instance_double(Exports::LettingsLogExportService) } + + before do + allow(Storage::S3Service).to receive(:new).and_return(storage_service) + allow(Configuration::PaasConfigurationService).to receive(:new).and_return(paas_config_service) + allow(Exports::LettingsLogExportService).to receive(:new).and_return(export_service) + end + + it "calls the export service" do + expect(export_service).to receive(:export_csv_lettings_logs) + + described_class.perform_now + end +end diff --git a/spec/jobs/data_export_xml_job_spec.rb b/spec/jobs/data_export_xml_job_spec.rb new file mode 100644 index 000000000..748c8201f --- /dev/null +++ b/spec/jobs/data_export_xml_job_spec.rb @@ -0,0 +1,27 @@ +require "rails_helper" + +describe DataExportXmlJob do + let(:storage_service) { instance_double(Storage::S3Service) } + let(:paas_config_service) { instance_double(Configuration::PaasConfigurationService) } + let(:export_service) { instance_double(Exports::LettingsLogExportService) } + + before do + allow(Storage::S3Service).to receive(:new).and_return(storage_service) + allow(Configuration::PaasConfigurationService).to receive(:new).and_return(paas_config_service) + allow(Exports::LettingsLogExportService).to receive(:new).and_return(export_service) + end + + it "calls the export service" do + expect(export_service).to receive(:export_xml_lettings_logs) + + described_class.perform_now + end + + context "with full update enabled" do + it "calls the export service" do + expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true) + + described_class.perform_now(full_update: true) + end + end +end diff --git a/spec/lib/tasks/data_export_spec.rb b/spec/lib/tasks/data_export_spec.rb index cca6e2dcd..e1242e6b6 100644 --- a/spec/lib/tasks/data_export_spec.rb +++ b/spec/lib/tasks/data_export_spec.rb @@ -2,8 +2,6 @@ require "rails_helper" require "rake" describe "rake core:data_export", type: task do - subject(:task) { Rake::Task["core:data_export"] } - let(:paas_instance) { "paas_export_instance" } let(:storage_service) { instance_double(Storage::S3Service) } let(:paas_config_service) { instance_double(Configuration::PaasConfigurationService) } @@ -22,22 +20,18 @@ describe "rake core:data_export", type: task do end context "when exporting lettings logs with no parameters" do - it "starts the XML export process" do - expect(Storage::S3Service).to receive(:new).with(paas_config_service, paas_instance) - expect(Exports::LettingsLogExportService).to receive(:new).with(storage_service) - expect(export_service).to receive(:export_xml_lettings_logs) + let(:task) { Rake::Task["core:data_export_xml"] } - task.invoke + it "starts the XML export process" do + expect { task.invoke }.to enqueue_job(DataExportXmlJob) end end context "when exporting lettings logs with CSV format" do - it "starts the CSV export process" do - expect(Storage::S3Service).to receive(:new).with(paas_config_service, paas_instance) - expect(Exports::LettingsLogExportService).to receive(:new).with(storage_service) - expect(export_service).to receive(:export_csv_lettings_logs) + let(:task) { Rake::Task["core:data_export_csv"] } - task.invoke("CSV", "false") + it "starts the CSV export process" do + expect { task.invoke }.to enqueue_job(DataExportCsvJob) end end end