From 25d5c0572f55924277a81e1061c9f78a7f1b9501 Mon Sep 17 00:00:00 2001 From: Jens Kraemer Date: Wed, 14 Jun 2017 13:10:13 +0800 Subject: [PATCH 3/5] store fulltext in the attachment model and make it searchable - introduces a configuration option to disable the feature if desired (enabled by default) - adds the fulltext text column to attachments table and populates it after an attachment is created (if activated) - adds attachments.fulltext to the columns searched when attachments are searched (if activated) - testcase --- app/models/attachment.rb | 20 ++++++++++++++++++++ config/configuration.yml.example | 9 +++++++++ .../20170613064930_add_fulltext_to_attachments.rb | 5 +++++ .../acts_as_searchable/lib/acts_as_searchable.rb | 7 ++++++- lib/redmine/configuration.rb | 1 + lib/tasks/redmine.rake | 5 +++++ test/unit/attachment_test.rb | 10 ++++++++++ 7 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20170613064930_add_fulltext_to_attachments.rb diff --git a/app/models/attachment.rb b/app/models/attachment.rb index e2bc570..0ee8637 100644 --- a/app/models/attachment.rb +++ b/app/models/attachment.rb @@ -57,6 +57,7 @@ class Attachment < ActiveRecord::Base after_rollback :delete_from_disk, :on => :create after_commit :delete_from_disk, :on => :destroy after_commit :reuse_existing_file_if_possible, :on => :create + after_commit :extract_fulltext, :on => :create safe_attributes 'filename', 'content_type', 'description' @@ -414,6 +415,15 @@ class Attachment < ActiveRecord::Base digest.size < 64 ? "MD5" : "SHA256" if digest.present? end + def extract_fulltext + if Redmine::Configuration['enable_fulltext_search'] and + readable? and + text = Redmine::TextExtractor.new(self).text + + update_column :fulltext, text + end + end + private def reuse_existing_file_if_possible @@ -472,6 +482,16 @@ class Attachment < ActiveRecord::Base time.strftime("%Y/%m") end + def self.extract_fulltext + if Redmine::Configuration['enable_fulltext_search'] + Attachment.where(fulltext: nil).find_in_batches do |group| + group.each{|a| a.extract_fulltext} + end + else + logger.info "fulltext search is disabled, check configuration.yml" + end + end + # Returns an ASCII or hashed filename that do not # exists yet in the given subdirectory def self.disk_filename(filename, directory=nil) diff --git a/config/configuration.yml.example b/config/configuration.yml.example index aac978a..0974983 100644 --- a/config/configuration.yml.example +++ b/config/configuration.yml.example @@ -212,6 +212,15 @@ default: # allowed values: :memory, :file, :memcache #openid_authentication_store: :memory + + # Enable fulltext extraction and fulltext search in attachments. + # To make existing attachments fulltext searchable, run + # rake redmine:attachments:extract_fulltext + # + # Enabled by default. + # + # enable_fulltext_search: false + # Text extraction helper programs. # # commands should write the resulting plain text to STDOUT. Use __FILE__ as diff --git a/db/migrate/20170613064930_add_fulltext_to_attachments.rb b/db/migrate/20170613064930_add_fulltext_to_attachments.rb new file mode 100644 index 0000000..c3d9ca5 --- /dev/null +++ b/db/migrate/20170613064930_add_fulltext_to_attachments.rb @@ -0,0 +1,5 @@ +class AddFulltextToAttachments < ActiveRecord::Migration + def change + add_column :attachments, :fulltext, :text, :limit => 4.megabytes # room for at least 1 million characters / approx. 80 pages of english text + end +end diff --git a/lib/plugins/acts_as_searchable/lib/acts_as_searchable.rb b/lib/plugins/acts_as_searchable/lib/acts_as_searchable.rb index 3975169..a318ea6 100644 --- a/lib/plugins/acts_as_searchable/lib/acts_as_searchable.rb +++ b/lib/plugins/acts_as_searchable/lib/acts_as_searchable.rb @@ -132,10 +132,15 @@ module Redmine end if searchable_options[:search_attachments] && (options[:titles_only] ? options[:attachments] == 'only' : options[:attachments] != '0') + attachment_columns = ["#{Attachment.table_name}.filename", "#{Attachment.table_name}.description"] + if Redmine::Configuration['enable_fulltext_search'] + attachment_columns << "#{Attachment.table_name}.fulltext" + end + r |= fetch_ranks_and_ids( search_scope(user, projects, options). joins(:attachments). - where(search_tokens_condition(["#{Attachment.table_name}.filename", "#{Attachment.table_name}.description"], tokens, options[:all_words])), + where(search_tokens_condition(attachment_columns, tokens, options[:all_words])), options[:limit] ) queries += 1 diff --git a/lib/redmine/configuration.rb b/lib/redmine/configuration.rb index 9ab008e..5c203de 100644 --- a/lib/redmine/configuration.rb +++ b/lib/redmine/configuration.rb @@ -21,6 +21,7 @@ module Redmine # Configuration default values @defaults = { 'email_delivery' => nil, + 'enable_fulltext_search' => true, 'max_concurrent_ajax_uploads' => 2 } diff --git a/lib/tasks/redmine.rake b/lib/tasks/redmine.rake index 734cad0..774f1ea 100644 --- a/lib/tasks/redmine.rake +++ b/lib/tasks/redmine.rake @@ -31,6 +31,11 @@ namespace :redmine do task :update_digests => :environment do Attachment.update_digests_to_sha256 end + + desc 'Makes existing attachments fulltext searchable' + task :extract_fulltext => :environment do + Attachment.extract_fulltext + end end namespace :tokens do diff --git a/test/unit/attachment_test.rb b/test/unit/attachment_test.rb index acecd2b..6c41c08 100644 --- a/test/unit/attachment_test.rb +++ b/test/unit/attachment_test.rb @@ -447,4 +447,14 @@ class AttachmentTest < ActiveSupport::TestCase puts '(ImageMagick convert not available)' end + def test_should_extract_fulltext + a = Attachment.create( + :container => Issue.find(1), + :file => uploaded_test_file("testfile.txt", "text/plain"), + :author => User.find(1), + :content_type => 'text/plain') + a.reload + assert a.fulltext.include?("this is a text file for upload tests\r\nwith multiple lines") + end + end -- 2.1.4