From 315f82fd95c982c5d1faa0b5162c75dfdfee1481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marius=20B=C4=82LTEANU?= Date: Sun, 12 Oct 2025 11:57:47 +0300 Subject: [PATCH] Replacing html-pipeline with Loofah for HTML Filtering (#42737). diff --git a/Gemfile b/Gemfile index 41ecfab30..577a8497d 100644 --- a/Gemfile +++ b/Gemfile @@ -37,9 +37,8 @@ gem 'tzinfo-data', platforms: [:mingw, :x64_mingw, :mswin] gem 'rotp', '>= 5.0.0' gem 'rqrcode' -# HTML pipeline and sanitization -gem "html-pipeline", "~> 2.13.2" -gem "sanitize", "~> 6.0" +# HTML sanitization +gem "sanitize", "~> 7.0" # Triggering of Webhooks gem "rest-client", "~> 2.1" diff --git a/lib/redmine/wiki_formatting/common_mark/alerts_icons_filter.rb b/lib/redmine/wiki_formatting/common_mark/alerts_icons_filter.rb index 27429d778..6709cca3d 100644 --- a/lib/redmine/wiki_formatting/common_mark/alerts_icons_filter.rb +++ b/lib/redmine/wiki_formatting/common_mark/alerts_icons_filter.rb @@ -30,8 +30,8 @@ module Redmine 'important' => 'message-report', }.freeze - class AlertsIconsFilter < HTML::Pipeline::Filter - def call + class AlertsIconsFilter < Loofah::Scrubber + def scrub(doc) doc.search("p.markdown-alert-title").each do |node| parent_node = node.parent parent_class_attr = parent_node['class'] # e.g., "markdown-alert markdown-alert-note" diff --git a/lib/redmine/wiki_formatting/common_mark/external_links_filter.rb b/lib/redmine/wiki_formatting/common_mark/external_links_filter.rb index 5fd31091b..8447c2ddc 100644 --- a/lib/redmine/wiki_formatting/common_mark/external_links_filter.rb +++ b/lib/redmine/wiki_formatting/common_mark/external_links_filter.rb @@ -24,19 +24,19 @@ module Redmine module CommonMark # adds class="external" to external links, and class="email" to mailto # links - class ExternalLinksFilter < HTML::Pipeline::Filter - def call - doc.search("a").each do |node| + class ExternalLinksFilter < Loofah::Scrubber + def scrub(node) + if node.name == 'a' url = node["href"] - next unless url - next if url.starts_with?("/") || url.starts_with?("#") || !url.include?(':') + return unless url + return if url.starts_with?("/") || url.starts_with?("#") || !url.include?(':') scheme = begin URI.parse(url).scheme rescue nil end - next if scheme.blank? + return if scheme.blank? klass = node["class"].presence node["class"] = [ @@ -50,7 +50,6 @@ module Redmine node["rel"] = rel.join(" ") end end - doc end end end diff --git a/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter.rb b/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter.rb index 7a5a5978a..818cee45f 100644 --- a/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter.rb +++ b/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter.rb @@ -26,15 +26,13 @@ module Redmine # @user@example.org # - autolinked hi res image names that look like email addresses: # printscreen@2x.png - class FixupAutoLinksFilter < HTML::Pipeline::Filter + class FixupAutoLinksFilter < Loofah::Scrubber USER_LINK_PREFIX = /(@|user:)\z/ HIRES_IMAGE = /.+@\dx\.(bmp|gif|jpg|jpe|jpeg|png)\z/ - def call - doc.search("a").each do |node| - unless (url = node['href']) && url.starts_with?('mailto:') - next - end + def scrub(node) + if node.name == 'a' + return unless (url = node['href']) && url.starts_with?('mailto:') if ((p = node.previous) && p.text? && p.text =~(USER_LINK_PREFIX)) || @@ -43,7 +41,6 @@ module Redmine node.replace node.text end end - doc end end end diff --git a/lib/redmine/wiki_formatting/common_mark/formatter.rb b/lib/redmine/wiki_formatting/common_mark/formatter.rb index 2b5f3f163..7b3f0ae49 100644 --- a/lib/redmine/wiki_formatting/common_mark/formatter.rb +++ b/lib/redmine/wiki_formatting/common_mark/formatter.rb @@ -17,8 +17,6 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -require 'html/pipeline' - module Redmine module WikiFormatting module CommonMark @@ -54,14 +52,13 @@ module Redmine }.freeze, }.freeze - MarkdownPipeline = HTML::Pipeline.new [ - MarkdownFilter, - SanitizationFilter, - SyntaxHighlightFilter, - FixupAutoLinksFilter, - ExternalLinksFilter, - AlertsIconsFilter - ], PIPELINE_CONFIG + SANITIZER = SanitizationFilter.new + SCRUBBERS = [ + SyntaxHighlightFilter.new, + FixupAutoLinksFilter.new, + ExternalLinksFilter.new, + AlertsIconsFilter.new + ] class Formatter include Redmine::WikiFormatting::SectionHelper @@ -71,8 +68,13 @@ module Redmine end def to_html(*args) - result = MarkdownPipeline.call @text - result[:output].to_s + html = MarkdownFilter.new(@text, PIPELINE_CONFIG).call + fragment = Redmine::WikiFormatting::HtmlParser.parse(html) + SANITIZER.call(fragment) + SCRUBBERS.each do |scrubber| + fragment.scrub!(scrubber) + end + fragment.to_s end end end diff --git a/lib/redmine/wiki_formatting/common_mark/markdown_filter.rb b/lib/redmine/wiki_formatting/common_mark/markdown_filter.rb index b52a20f1c..3e19ae48b 100644 --- a/lib/redmine/wiki_formatting/common_mark/markdown_filter.rb +++ b/lib/redmine/wiki_formatting/common_mark/markdown_filter.rb @@ -25,10 +25,12 @@ module Redmine # We do not use the stock HTML::Pipeline::MarkdownFilter because this # does not allow for straightforward configuration of render and parsing # options - class MarkdownFilter < HTML::Pipeline::TextFilter - def initialize(text, context = nil, result = nil) - super - @text = @text.delete "\r" + class MarkdownFilter + attr_reader :context + + def initialize(text, context = nil) + @text = text.delete "\r" + @context = context end def call diff --git a/lib/redmine/wiki_formatting/common_mark/sanitization_filter.rb b/lib/redmine/wiki_formatting/common_mark/sanitization_filter.rb index af72adc32..24d8eaefd 100644 --- a/lib/redmine/wiki_formatting/common_mark/sanitization_filter.rb +++ b/lib/redmine/wiki_formatting/common_mark/sanitization_filter.rb @@ -21,44 +21,132 @@ module Redmine module WikiFormatting module CommonMark # sanitizes rendered HTML using the Sanitize gem - class SanitizationFilter < HTML::Pipeline::SanitizationFilter + class SanitizationFilter include Redmine::Helpers::URL + + attr_accessor :allowlist + + LISTS = Set.new(%w[ul ol].freeze) + LIST_ITEM = 'li' + + # List of table child elements. These must be contained by a element + # or they are not allowed through. Otherwise they can be used to break out + # of places we're using tables to contain formatted user content (like pull + # request review comments). + TABLE_ITEMS = Set.new(%w[tr td th].freeze) + TABLE = 'table' + TABLE_SECTIONS = Set.new(%w[thead tbody tfoot].freeze) + + # The main sanitization allowlist. Only these elements and attributes are + # allowed through by default. + ALLOWLIST = { + :elements => %w[ + h1 h2 h3 h4 h5 h6 br b i strong em a pre code img input tt u + div ins del sup sub p ol ul table thead tbody tfoot blockquote + dl dt dd kbd q samp var hr ruby rt rp li tr td th s strike summary + details caption figure figcaption + abbr bdo cite dfn mark small span time wbr + ].freeze, + :remove_contents => ['script'].freeze, + :attributes => { + 'a' => %w[href id name].freeze, + 'img' => %w[src longdesc].freeze, + 'code' => ['class'].freeze, + 'div' => %w[class itemscope itemtype].freeze, + 'li' => %w[id class].freeze, + 'input' => %w[class type].freeze, + 'p' => ['class'].freeze, + 'ul' => ['class'].freeze, + 'blockquote' => ['cite'].freeze, + 'del' => ['cite'].freeze, + 'ins' => ['cite'].freeze, + 'q' => ['cite'].freeze, + :all => %w[ + abbr accept accept-charset + accesskey action align alt + aria-describedby aria-hidden aria-label aria-labelledby + axis border cellpadding cellspacing char + charoff charset checked + clear cols colspan color + compact coords datetime dir + disabled enctype for frame + headers height hreflang + hspace ismap label lang + maxlength media method + multiple nohref noshade + nowrap open progress prompt readonly rel rev + role rows rowspan rules scope + selected shape size span + start style summary tabindex target + title type usemap valign value + vspace width itemprop + ].freeze + }.freeze, + :protocols => { + 'blockquote' => { 'cite' => ['http', 'https', :relative].freeze }, + 'del' => { 'cite' => ['http', 'https', :relative].freeze }, + 'ins' => { 'cite' => ['http', 'https', :relative].freeze }, + 'q' => { 'cite' => ['http', 'https', :relative].freeze }, + 'img' => { + 'src' => ['http', 'https', :relative].freeze, + 'longdesc' => ['http', 'https', :relative].freeze + }.freeze + }, + :transformers => [ + # Top-level
  • elements are removed because they can break out of + # containing markup. + lambda { |env| + name = env[:node_name] + node = env[:node] + if name == LIST_ITEM && node.ancestors.none? { |n| LISTS.include?(n.name) } + node.replace(node.children) + end + }, + + # Table child elements that are not contained by a
  • are removed. + lambda { |env| + name = env[:node_name] + node = env[:node] + if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && node.ancestors.none? { |n| n.name == TABLE } + node.replace(node.children) + end + } + ].freeze, + :css => { + :properties => %w[ + color background-color + width min-width max-width + height min-height max-height + padding padding-left padding-right padding-top padding-bottom + margin margin-left margin-right margin-top margin-bottom + border border-left border-right border-top border-bottom border-radius border-style border-collapse border-spacing + font font-style font-variant font-weight font-stretch font-size line-height font-family + text-align + float + ].freeze + } + }.freeze + RELAXED_PROTOCOL_ATTRS = { "a" => %w(href).freeze, }.freeze - ALLOWED_CSS_PROPERTIES = %w[ - color background-color - width min-width max-width - height min-height max-height - padding padding-left padding-right padding-top padding-bottom - margin margin-left margin-right margin-top margin-bottom - border border-left border-right border-top border-bottom border-radius border-style border-collapse border-spacing - font font-style font-variant font-weight font-stretch font-size line-height font-family - text-align - float - ].freeze - - def allowlist - @allowlist ||= customize_allowlist(super.deep_dup) + def initialize + @allowlist = default_allowlist + add_transformers end - private - - # customizes the allowlist defined in - # https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/sanitization_filter.rb - def customize_allowlist(allowlist) - # Disallow `name` attribute globally, allow on `a` - allowlist[:attributes][:all].delete("name") - allowlist[:attributes]["a"].push("name") + def call(doc) + # Sanitize is applied to the whole document, so the API is different from loofeh's scrubber. + Sanitize.clean_node!(doc, allowlist) + end - allowlist[:attributes][:all].push("style") - allowlist[:css] = { properties: ALLOWED_CSS_PROPERTIES } + private + def add_transformers # allow class on code tags (this holds the language info from fenced # code bocks and has the format language-foo) - allowlist[:attributes]["code"] = %w(class) - allowlist[:transformers].push lambda{|env| + allowlist[:transformers].push lambda {|env| node = env[:node] return unless node.name == "code" return unless node.has_attribute?("class") @@ -70,9 +158,7 @@ module Redmine # Allow class on div and p tags only for alert blocks # (must be exactly: "markdown-alert markdown-alert-*" for div, and "markdown-alert-title" for p) - (allowlist[:attributes]["div"] ||= []) << "class" - (allowlist[:attributes]["p"] ||= []) << "class" - allowlist[:transformers].push lambda{|env| + allowlist[:transformers].push lambda {|env| node = env[:node] return unless node.element? @@ -98,10 +184,8 @@ module Redmine # allowlist[:attributes]["td"] = %w(style) # allowlist[:css] = { properties: ["text-align"] } - # Allow `id` in a elements for footnotes - allowlist[:attributes]["a"].push "id" # Remove any `id` property not matching for footnotes - allowlist[:transformers].push lambda{|env| + allowlist[:transformers].push lambda {|env| node = env[:node] return unless node.name == "a" return unless node.has_attribute?("id") @@ -112,8 +196,7 @@ module Redmine # allow `id` in li element for footnotes # allow `class` in li element for task list items - allowlist[:attributes]["li"] = %w(id class) - allowlist[:transformers].push lambda{|env| + allowlist[:transformers].push lambda {|env| node = env[:node] return unless node.name == "li" @@ -128,11 +211,8 @@ module Redmine # allow input type = "checkbox" with class "task-list-item-checkbox" # for task list items - allowlist[:elements].push('input') - allowlist[:attributes]["input"] = %w(class type) - allowlist[:transformers].push lambda{|env| + allowlist[:transformers].push lambda {|env| node = env[:node] - return unless node.name == "input" return if node['type'] == "checkbox" && node['class'] == "task-list-item-checkbox" @@ -140,10 +220,8 @@ module Redmine } # allow class "contains-task-list" on ul for task list items - allowlist[:attributes]["ul"] = %w(class) - allowlist[:transformers].push lambda{|env| + allowlist[:transformers].push lambda {|env| node = env[:node] - return unless node.name == "ul" return if node["class"] == "contains-task-list" @@ -151,8 +229,7 @@ module Redmine } # https://github.com/rgrove/sanitize/issues/209 - allowlist[:protocols].delete("a") - allowlist[:transformers].push lambda{|env| + allowlist[:transformers].push lambda {|env| node = env[:node] return if node.type != Nokogiri::XML::Node::ELEMENT_NODE @@ -168,11 +245,12 @@ module Redmine end end } + end - # Allow `u` element to enable underline - allowlist[:elements].push('u') - - allowlist + # The allowlist to use when sanitizing. This can be passed in the context + # hash to the filter but defaults to ALLOWLIST constant value above. + def default_allowlist + ALLOWLIST.deep_dup end end end diff --git a/lib/redmine/wiki_formatting/common_mark/syntax_highlight_filter.rb b/lib/redmine/wiki_formatting/common_mark/syntax_highlight_filter.rb index 196b938c2..72bed2f5a 100644 --- a/lib/redmine/wiki_formatting/common_mark/syntax_highlight_filter.rb +++ b/lib/redmine/wiki_formatting/common_mark/syntax_highlight_filter.rb @@ -22,11 +22,11 @@ module Redmine module CommonMark # Redmine Syntax highlighting for
    
           # blocks as generated by commonmarker
    -      class SyntaxHighlightFilter < HTML::Pipeline::Filter
    -        def call
    -          doc.search("pre > code").each do |node|
    -            next unless lang = node["class"].presence
    -            next unless lang =~ /\Alanguage-(\S+)\z/
    +      class SyntaxHighlightFilter < Loofah::Scrubber
    +        def scrub(node)
    +          if node.matches?("pre > code")
    +            return unless lang = node["class"].presence
    +            return unless lang =~ /\Alanguage-(\S+)\z/
     
                 lang = $1
                 text = node.inner_text
    @@ -36,7 +36,7 @@ module Redmine
     
                 if Redmine::SyntaxHighlighting.language_supported?(lang)
                   html = Redmine::SyntaxHighlighting.highlight_by_language(text, lang)
    -              next if html.nil?
    +              return if html.nil?
     
                   node.inner_html = html
                   node["class"] = "#{lang} syntaxhl"
    @@ -45,7 +45,6 @@ module Redmine
                   node.remove_attribute("class")
                 end
               end
    -          doc
             end
           end
         end
    diff --git a/lib/redmine/wiki_formatting/html_parser.rb b/lib/redmine/wiki_formatting/html_parser.rb
    index ff40cbb6b..2987cf238 100644
    --- a/lib/redmine/wiki_formatting/html_parser.rb
    +++ b/lib/redmine/wiki_formatting/html_parser.rb
    @@ -28,6 +28,10 @@ module Redmine
             'style' => ''
           }
     
    +      def self.parse(html)
    +        Loofah.html5_fragment(html)
    +      end
    +
           def self.to_text(html)
             html = html.gsub(/[\n\r]/, ' ')
     
    diff --git a/lib/redmine/wiki_formatting/html_sanitizer.rb b/lib/redmine/wiki_formatting/html_sanitizer.rb
    index 3cd44bae2..ddd6ca1dd 100644
    --- a/lib/redmine/wiki_formatting/html_sanitizer.rb
    +++ b/lib/redmine/wiki_formatting/html_sanitizer.rb
    @@ -21,15 +21,16 @@ module Redmine
       module WikiFormatting
         # Combination of SanitizationFilter and ExternalLinksFilter
         class HtmlSanitizer
    -      Pipeline = HTML::Pipeline.new(
    -        [
    -          Redmine::WikiFormatting::CommonMark::SanitizationFilter,
    -          Redmine::WikiFormatting::CommonMark::ExternalLinksFilter,
    -        ], {})
    +      SANITIZER = Redmine::WikiFormatting::CommonMark::SanitizationFilter.new
    +      SCRUBBERS = [Redmine::WikiFormatting::CommonMark::ExternalLinksFilter.new]
     
           def self.call(html)
    -        result = Pipeline.call html
    -        result[:output].to_s
    +        fragment = HtmlParser.parse(html)
    +        SANITIZER.call(fragment)
    +        SCRUBBERS.each do |scrubber|
    +          fragment.scrub!(scrubber)
    +        end
    +        fragment.to_s
           end
         end
       end
    diff --git a/test/unit/lib/redmine/wiki_formatting/common_mark/external_links_filter_test.rb b/test/unit/lib/redmine/wiki_formatting/common_mark/external_links_filter_test.rb
    index cde6381b8..78952700d 100644
    --- a/test/unit/lib/redmine/wiki_formatting/common_mark/external_links_filter_test.rb
    +++ b/test/unit/lib/redmine/wiki_formatting/common_mark/external_links_filter_test.rb
    @@ -20,15 +20,13 @@
     require_relative '../../../../../test_helper'
     
     if Object.const_defined?(:Commonmarker)
    -  require 'redmine/wiki_formatting/common_mark/external_links_filter'
     
    -  class Redmine::WikiFormatting::CommonMark::ExternalLinksFilterTest < ActiveSupport::TestCase
    +  class Redmine::WikiFormatting::CommonMark::ExternalFilterTest < ActiveSupport::TestCase
         def filter(html)
    -      Redmine::WikiFormatting::CommonMark::ExternalLinksFilter.to_html(html, @options)
    -    end
    -
    -    def setup
    -      @options = { }
    +      fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
    +      scrubber = Redmine::WikiFormatting::CommonMark::ExternalLinksFilter.new
    +      fragment.scrub!(scrubber)
    +      fragment.to_s
         end
     
         def test_external_links_should_have_external_css_class
    diff --git a/test/unit/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter_test.rb b/test/unit/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter_test.rb
    index 1b093d718..5972b3d8a 100644
    --- a/test/unit/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter_test.rb
    +++ b/test/unit/lib/redmine/wiki_formatting/common_mark/fixup_auto_links_filter_test.rb
    @@ -20,19 +20,17 @@
     require_relative '../../../../../test_helper'
     
     if Object.const_defined?(:Commonmarker)
    -  require 'redmine/wiki_formatting/common_mark/fixup_auto_links_filter'
     
    -  class Redmine::WikiFormatting::CommonMark::FixupAutoLinksFilterTest < ActiveSupport::TestCase
    +  class Redmine::WikiFormatting::CommonMark::FixupAutoLinksScrubberTest < ActiveSupport::TestCase
         def filter(html)
    -      Redmine::WikiFormatting::CommonMark::FixupAutoLinksFilter.to_html(html, @options)
    +      fragment = Redmine::WikiFormatting::HtmlParser.parse(html)
    +      scrubber = Redmine::WikiFormatting::CommonMark::FixupAutoLinksFilter.new
    +      fragment.scrub!(scrubber)
    +      fragment.to_s
         end
     
         def format(markdown)
    -      Redmine::WikiFormatting::CommonMark::MarkdownFilter.to_html(markdown, Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG)
    -    end
    -
    -    def setup
    -      @options = { }
    +      Redmine::WikiFormatting::CommonMark::MarkdownFilter.new(markdown, Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG).call
         end
     
         def test_should_fixup_autolinked_user_references
    diff --git a/test/unit/lib/redmine/wiki_formatting/common_mark/formatter_test.rb b/test/unit/lib/redmine/wiki_formatting/common_mark/formatter_test.rb
    index f2f95ad8a..1fd728b90 100644
    --- a/test/unit/lib/redmine/wiki_formatting/common_mark/formatter_test.rb
    +++ b/test/unit/lib/redmine/wiki_formatting/common_mark/formatter_test.rb
    @@ -255,7 +255,7 @@ class Redmine::WikiFormatting::CommonMark::FormatterTest < ActionView::TestCase
     
         def test_should_support_html_tables
           text = '
    Cell
    ' - assert_equal '
    Cell
    ', to_html(text) + assert_equal '
    Cell
    ', to_html(text) end def test_should_remove_unsafe_uris @@ -289,10 +289,10 @@ class Redmine::WikiFormatting::CommonMark::FormatterTest < ActionView::TestCase

    Task list:

    EXPECTED diff --git a/test/unit/lib/redmine/wiki_formatting/common_mark/markdown_filter_test.rb b/test/unit/lib/redmine/wiki_formatting/common_mark/markdown_filter_test.rb index d5e416d2c..55bd260fb 100644 --- a/test/unit/lib/redmine/wiki_formatting/common_mark/markdown_filter_test.rb +++ b/test/unit/lib/redmine/wiki_formatting/common_mark/markdown_filter_test.rb @@ -24,7 +24,10 @@ if Object.const_defined?(:Commonmarker) class Redmine::WikiFormatting::CommonMark::MarkdownFilterTest < ActiveSupport::TestCase def filter(markdown) - Redmine::WikiFormatting::CommonMark::MarkdownFilter.to_html(markdown) + filter = Redmine::WikiFormatting::CommonMark::MarkdownFilter.new( + markdown, + Redmine::WikiFormatting::CommonMark::PIPELINE_CONFIG) + filter.call end # just a basic sanity test. more formatting tests in the formatter_test diff --git a/test/unit/lib/redmine/wiki_formatting/common_mark/sanitization_filter_test.rb b/test/unit/lib/redmine/wiki_formatting/common_mark/sanitization_filter_test.rb index b2d19eab9..0e428a3de 100644 --- a/test/unit/lib/redmine/wiki_formatting/common_mark/sanitization_filter_test.rb +++ b/test/unit/lib/redmine/wiki_formatting/common_mark/sanitization_filter_test.rb @@ -20,15 +20,13 @@ require_relative '../../../../../test_helper' if Object.const_defined?(:Commonmarker) - require 'redmine/wiki_formatting/common_mark/sanitization_filter' class Redmine::WikiFormatting::CommonMark::SanitizationFilterTest < ActiveSupport::TestCase def filter(html) - Redmine::WikiFormatting::CommonMark::SanitizationFilter.to_html(html, @options) - end - - def setup - @options = { } + fragment = Redmine::WikiFormatting::HtmlParser.parse(html) + sanitizer = Redmine::WikiFormatting::CommonMark::SanitizationFilter.new + sanitizer.call(fragment) + fragment.to_s end def test_should_filter_tags @@ -137,7 +135,7 @@ if Object.const_defined?(:Commonmarker) ], [ 'Lorem dolor sit
    amet