0001-Performance-improvements-for-git-repo-parsing.patch

Gergely Fábián, 2012-03-06 18:26

Download (5.57 KB)

View differences:

app/models/repository/git.rb
144 144
      merge_extra_info(h)
145 145
      self.save
146 146
    end
147
    # Remember what revisions we already processed (in any branches)
148
    all_revisions = []
147 149
    scm_brs.each do |br1|
148 150
      br = br1.to_s
149 151
      from_scmid = nil
150 152
      from_scmid = h["branches"][br]["last_scmid"] if h["branches"][br]
151 153
      h["branches"][br] ||= {}
152
      scm.revisions('', from_scmid, br, {:reverse => true}) do |rev|
153
        db_rev = find_changeset_by_name(rev.revision)
154

  
155
      revisions = scm.revisions('', from_scmid, br, {:reverse => true})
156
      next if revisions.nil? || revisions.empty?
157

  
158
      # Remember the last commit id here, before we start removing revisions from the array.
159
      # We'll do that for optimization, but it also means, that we may lose even all revisions.
160
      last_revision  = revisions.last
161

  
162
      # remove revisions that we have already processed (possibly in other branches)
163
      revisions.reject!{|r| all_revisions.include?(r.scmid)}
164
      # add revisions that we are to parse now to 'all processed revisions'
165
      # (this equals to a union, because we executed diff above)
166
      all_revisions += revisions.map{|r| r.scmid}
167

  
168
      # Make the search for existing revisions in the database in a more sufficient manner
169
      #  This is replacing the one-after-one queries.
170
      #  Find all revisions, that are in the database, and then remove them from the revision array.
171
      #  Then later we won't need any conditions for db existence.
172
      # Query for several revisions at once, and remove them from the revisions array, if they are there.
173
      # Do this in chunks, to avoid eventual memory problems (in case of tens of thousands of commits).
174
      # If there are no revisions (because the original code's algoritm filtered them), then this part will be stepped over.
175
      #  We make queries, just if there is any revision.
176
      limit = 100
177
      offset = 0
178
      revisions_copy = revisions.clone # revisions will change
179
      while offset < revisions_copy.size
180
        recent_changesets_slice = changesets.find(:all, :conditions => ['scmid IN (?)', revisions_copy.slice(offset, limit).map{|x| x.scmid}])
181
        # Subtract revisions that redmine already knows about
182
        recent_revisions = recent_changesets_slice.map{|c| c.scmid}
183
        revisions.reject!{|r| recent_revisions.include?(r.scmid)}
184
        offset += limit
185
      end
186

  
187
      revisions.each do |rev|
154 188
        transaction do
155
          if db_rev.nil?
156
            db_saved_rev = save_revision(rev)
157
            parents = {}
158
            parents[db_saved_rev] = rev.parents unless rev.parents.nil?
159
            parents.each do |ch, chparents|
160
              ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
161
            end
189
          # There is no search in the db for this revision, because above we ensured, that it's not in the db.
190
          db_saved_rev = save_revision(rev)
191
          parents = {}
192
          parents[db_saved_rev] = rev.parents unless rev.parents.nil?
193
          parents.each do |ch, chparents|
194
            ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
162 195
          end
163
          h["branches"][br]["last_scmid"] = rev.scmid
164
          merge_extra_info(h)
165
          self.save
196
          # saving the last scmid was moved from here, because we won't come in here, if the revision was already added for another branch
166 197
        end
167 198
      end
199

  
200
      # save the data about the last revision for this branch
201
      if last_revision
202
        h["branches"][br]["last_scmid"] = last_revision.scmid
203
        merge_extra_info(h)
204
        self.save
205
      end
168 206
    end
169 207
  end
170 208

  
script/repository_fetch_changesets
1
#!/usr/bin/env ruby
2
require File.expand_path('../../config/environment',  __FILE__)
3

  
4
if ARGV.size == 0
5
  puts "No argument given"
6
  exit
7
end
8

  
9
if ARGV[0] == 'last'
10
  rep = Repository.find(:first, :order => 'id DESC')
11
else
12
  rep = Repository.find(ARGV[0])
13
end
14

  
15
unless rep
16
  puts "No repository found"
17
  exit
18
end
19

  
20
p rep
21
time = Time.now
22
rep.fetch_changesets
23
puts "Run in "+(Time.now - time).to_s+" seconds."
0
-