From 5b9761a92c934916ac9b5bf27ba32ed1e1ee92b7 Mon Sep 17 00:00:00 2001 From: Gregor Schmidt Date: Mon, 2 May 2016 12:44:27 +0200 Subject: [PATCH 3/3] Mulit-pass CSV import If required by the subclass, the import now parses the CSV file in multiple passes. This is used by the CSV import to add parent relations to issues created at a later stage during the import. This makes it easier to produce the CSV, since the order within the file is no longer relevant. --- app/models/import.rb | 96 +++++++++++++++------ app/models/import_item.rb | 3 + app/models/issue_import.rb | 57 +++++++++---- ...6125940_add_completed_passes_to_import_items.rb | 9 ++ test/unit/issue_import_test.rb | 98 ++++++++++++++++++++++ 5 files changed, 222 insertions(+), 41 deletions(-) create mode 100644 db/migrate/20160426125940_add_completed_passes_to_import_items.rb diff --git a/app/models/import.rb b/app/models/import.rb index 22b90b6..2cfb389 100644 --- a/app/models/import.rb +++ b/app/models/import.rb @@ -139,44 +139,74 @@ class Import < ActiveRecord::Base end # Imports items and returns the position of the last processed item - def run(options={}) + def run(options = {}, current_pass = completed_passes + 1) + if current_pass > required_passes + # Abort recursion + + update_attribute :finished, true + remove_file + + return total_items + end + max_items = options[:max_items] max_time = options[:max_time] - current = 0 imported = 0 - resume_after = items.maximum(:position) || 0 interrupted = false started_on = Time.now - read_items do |row, position| - if (max_items && imported >= max_items) || (max_time && Time.now >= started_on + max_time) + current = run_pass(options, current_pass) do |position| + if (max_items && imported >= max_items) || (max_time && Time.now - started_on >= max_time) + # interrupt import interrupted = true - break + false + else + # continue importing item + imported += 1 + true end - if position > resume_after - item = items.build - item.position = position - - if object = build_object(row) - if object.save - item.obj_id = object.id - else - item.message = object.errors.full_messages.join("\n") - end - end + end - item.save! - imported += 1 + if !interrupted + update_attribute(:total_items, current) if total_items.nil? + + if max_items + options = options.merge(:max_items => max_items - imported) end - current = position + if max_time + options = options.merge(:max_time => max_time - (Time.now - started_on)) + end + + run(options, current_pass + 1) + else + current end + end + + def run_pass(options={}, current_pass) + resume_after = items.where(:completed_passes => current_pass).maximum(:position) || 0 - if imported == 0 || interrupted == false - if total_items.nil? - update_attribute :total_items, current + current = 0 + + read_items do |row, position| + next unless position > resume_after + + break unless yield(position) + + current = position + + item = items.where(:position => position).first_or_initialize + + if object = build_object(row, item, current_pass) + if object.save + item.obj_id = object.id + else + item.message = object.errors.full_messages.join("\n") + end end - update_attribute :finished, true - remove_file + + item.completed_passes = current_pass + item.save! end current @@ -190,6 +220,19 @@ class Import < ActiveRecord::Base items.where("obj_id IS NOT NULL") end + # Should be overridden in sub class to implement multi-pass import + def required_passes + 1 + end + + def completed_passes + if total_items.present? && items.count == total_items + items.minimum(:completed_passes) + else + 0 + end + end + private def read_rows @@ -223,7 +266,8 @@ class Import < ActiveRecord::Base # Builds a record for the given row and returns it # To be implemented by subclasses - def build_object(row) + def build_object(row, item, pass) + raise NotImplementedError, "Subclass responsibility" end # Generates a filename used to store the import file diff --git a/app/models/import_item.rb b/app/models/import_item.rb index c02c0bc..d8ab382 100644 --- a/app/models/import_item.rb +++ b/app/models/import_item.rb @@ -19,4 +19,7 @@ class ImportItem < ActiveRecord::Base belongs_to :import validates_presence_of :import_id, :position + + validates_numericality_of :completed_passes, :only_integer => true, + :greater_than_or_equal_to => 0 end diff --git a/app/models/issue_import.rb b/app/models/issue_import.rb index b6b20a1..dc822ef 100644 --- a/app/models/issue_import.rb +++ b/app/models/issue_import.rb @@ -57,13 +57,32 @@ class IssueImport < Import mapping['create_versions'] == '1' end + def required_passes + if mapping['parent_issue_id'] + 2 + else + 1 + end + end + private - def build_object(row) - issue = Issue.new - issue.author = user + def build_object(row, item, pass) + if (pass == 1) + issue = Issue.new + issue.author = user + build_issue(row, issue) + else + issue = Issue.find(item.obj_id) + build_relations(row, issue) + end issue.notify = false + issue + end + + + def build_issue(row, issue) attributes = { 'project_id' => mapping['project_id'], 'tracker_id' => mapping['tracker_id'], @@ -110,18 +129,6 @@ class IssueImport < Import attributes['is_private'] = '1' end end - if parent_issue_id = row_value(row, 'parent_issue_id') - if parent_issue_id =~ /\A(#)?(\d+)\z/ - parent_issue_id = $2 - if $1 - attributes['parent_issue_id'] = parent_issue_id - elsif issue_id = items.where(:position => parent_issue_id).first.try(:obj_id) - attributes['parent_issue_id'] = issue_id - end - else - attributes['parent_issue_id'] = parent_issue_id - end - end if start_date = row_date(row, 'start_date') attributes['start_date'] = start_date end @@ -151,4 +158,24 @@ class IssueImport < Import issue.send :safe_attributes=, attributes, user issue end + + def build_relations(row, issue) + attributes = {} + + if parent_issue_id = row_value(row, 'parent_issue_id') + if parent_issue_id =~ /\A(#)?(\d+)\z/ + parent_issue_id = $2 + if $1 + attributes['parent_issue_id'] = parent_issue_id + elsif issue_id = items.where(:position => parent_issue_id).first.try(:obj_id) + attributes['parent_issue_id'] = issue_id + end + else + attributes['parent_issue_id'] = parent_issue_id + end + end + + issue.send :safe_attributes=, attributes, user + issue + end end diff --git a/db/migrate/20160426125940_add_completed_passes_to_import_items.rb b/db/migrate/20160426125940_add_completed_passes_to_import_items.rb new file mode 100644 index 0000000..aa30d8d --- /dev/null +++ b/db/migrate/20160426125940_add_completed_passes_to_import_items.rb @@ -0,0 +1,9 @@ +class AddCompletedPassesToImportItems < ActiveRecord::Migration + def self.up + add_column :import_items, :completed_passes, :integer, :default => 0, :null => false + end + + def self.donw + remove_column :imports_item, :completed_passes + end +end diff --git a/test/unit/issue_import_test.rb b/test/unit/issue_import_test.rb index 3c1bd99..8e72fdb 100644 --- a/test/unit/issue_import_test.rb +++ b/test/unit/issue_import_test.rb @@ -130,4 +130,102 @@ class IssueImportTest < ActiveSupport::TestCase import.run assert !File.exists?(file_path) end + + def test_multi_step_run + # max_items < total_items + import = generate_import_with_mapping + + assert_difference 'Issue.count', 5 do + assert_equal 2, import.run(:max_items => 2) + assert_not import.finished + assert_equal 4, import.run(:max_items => 2) + assert_not import.finished + assert_equal 5, import.run(:max_items => 2) + assert import.finished + end + + + # max_items > total_items + import = generate_import_with_mapping + + assert_difference 'Issue.count', 5 do + assert_equal 5, import.run(:max_items => 6) + assert import.finished + end + + + # max_items == total_items + import = generate_import_with_mapping + + assert_difference 'Issue.count', 5 do + assert_equal 5, import.run(:max_items => 5) + assert import.finished + end + end + + def test_multi_step_multi_pass_run + # max_items < total_items + import = generate_import_with_mapping + import.mapping.merge!('parent_issue_id' => '5') + + assert_difference 'Issue.count', 5 do + assert_equal 2, import.run(:max_items => 2) + assert_not import.finished + + assert_equal 4, import.run(:max_items => 2) + assert_not import.finished + + assert_equal 1, import.run(:max_items => 2) + assert_not import.finished + + assert_equal 3, import.run(:max_items => 2) + assert_not import.finished + + assert_equal 5, import.run(:max_items => 2) + assert import.finished + end + + + # max_items > total_items + import = generate_import_with_mapping + import.mapping.merge!('parent_issue_id' => '5') + + assert_difference 'Issue.count', 5 do + assert_equal 1, import.run(:max_items => 6) + assert_not import.finished + + assert_equal 5, import.run(:max_items => 6) + assert import.finished + end + + + # max_items == total_items + import = generate_import_with_mapping + import.mapping.merge!('parent_issue_id' => '5') + + assert_difference 'Issue.count', 5 do + assert_equal 0, import.run(:max_items => 5) + assert_not import.finished + + assert_equal 5, import.run(:max_items => 5) + assert import.finished + end + end + + def test_required_passes + # Imports w/o relation mappings need just a single pass + import = generate_import_with_mapping + + assert_equal 1, import.required_passes + import.run + assert_equal 1, import.completed_passes + + # Imports w/ references to other rows need 2 passes + import = generate_import_with_mapping + import.mapping.merge!('parent_issue_id' => '5') + + assert_equal 2, import.required_passes + import.run + assert_equal 2, import.completed_passes + end end -- 2.8.0