From 31788560d89a27485ecec568a1445a533387f8f7 Mon Sep 17 00:00:00 2001 From: Matthew Draper Date: Sat, 5 Jan 2019 03:55:34 +1030 Subject: [PATCH] Use GitHub API to check mirror freshness when available If people have git references to a number of public GitHub repositories, this turns some multi-round-trip git communications into single HTTP requests. If the repository isn't public, this wastes an HTTP request because we still need to continue and do the mirror update -- but it remembers the failure and doesn't try again, so it's only a one-off cost. --- lib/paperback/environment.rb | 4 +- lib/paperback/git_depot.rb | 120 +++++++++++++++++++++++++++++++---- test/resolve_test.rb | 1 + 3 files changed, 112 insertions(+), 13 deletions(-) diff --git a/lib/paperback/environment.rb b/lib/paperback/environment.rb index f8647ed3..0f63e1fc 100644 --- a/lib/paperback/environment.rb +++ b/lib/paperback/environment.rb @@ -80,7 +80,7 @@ def self.lockfile_name(gemfile = self.gemfile.filename) "Gemfile.lock" end - def self.lock(output: nil, gemfile: Paperback::Environment.load_gemfile, lockfile: Paperback::Environment.lockfile_name, catalog_options: {}) + def self.lock(output: nil, gemfile: Paperback::Environment.load_gemfile, lockfile: Paperback::Environment.lockfile_name, catalog_options: {}, git_options: {}) if lockfile && File.exist?(lockfile) loader = Paperback::LockLoader.new(lockfile, gemfile) # TODO @@ -110,7 +110,7 @@ def self.lock(output: nil, gemfile: Paperback::Environment.load_gemfile, lockfil path_sources = gemfile.gems.map { |_, _, o| o[:path] }.compact require_relative "git_depot" - git_depot = Paperback::GitDepot.new(Paperback::Environment.store) + git_depot = Paperback::GitDepot.new(Paperback::Environment.store, **git_options) require_relative "path_catalog" require_relative "git_catalog" diff --git a/lib/paperback/git_depot.rb b/lib/paperback/git_depot.rb index 86e42475..1df8b45b 100644 --- a/lib/paperback/git_depot.rb +++ b/lib/paperback/git_depot.rb @@ -7,9 +7,11 @@ class Paperback::GitDepot Logger = ::Logger.new($stderr) Logger.level = $DEBUG ? ::Logger::DEBUG : ::Logger::WARN - def initialize(store, mirror_root = "~/.cache/paperback/git") + def initialize(store, cache: "~/.cache/paperback") @store = store - @mirror_root = File.expand_path(mirror_root) + @mirror_root = File.expand_path("#{cache}/git") + + @github_disabled = false end def git_path(remote, revision) @@ -39,16 +41,44 @@ def remote(remote) cache_dir end + def current_revision(mirror, remote, ref) + read_git(remote, "rev-parse", ref, chdir: mirror) + end + + def get_config(dir, name, type: nil, default: nil) + arguments = [] + arguments.append("--type", type) if type + arguments.append("--default", default) if default + + read_git(dir, "config", *arguments, "--get", name, chdir: dir) + end + + def set_config(dir, name, value, type: nil) + arguments = [] + arguments.append("--type", type) if type + + status = git(dir, "config", *arguments, "--replace-all", name, value, chdir: dir) + raise "git config failed" unless status.success? + end + def resolve(remote, ref) - mirror = remote(remote) { false } # always update mirror + ref ||= "HEAD" - r, w = IO.pipe - status = git(remote, "rev-parse", ref || "HEAD", chdir: mirror, out: w) - raise "git rev-parse failed" unless status.success? + mirror = remote(remote) do |cache_dir| + if github?(remote) + current = current_revision(cache_dir, remote, ref) + + # If we can, it's faster to ask GitHub's API whether our mirror + # is up to date + return current if github_current?(remote, ref, current, cache_dir) + end - w.close + # Other than that special case, we never consider the mirror up to + # date (i.e., we always do an update) + false + end - r.read.chomp + current_revision(mirror, remote, ref) end def resolve_and_checkout(remote, ref) @@ -77,17 +107,85 @@ def checkout(remote, revision) private - def git(remote, *arguments, **kwargs) + def github?(remote) + return false if @github_disabled + + case remote + when /\A(?:git|https?):\/\/github\.com\/([^\/]+\/[^\/]+)/ + $1.sub(/\.git$/, "") + when /\Agit@github\.com:([^\/]+\/[^\/]+)/ + $1.sub(/\.git$/, "") + end + end + + def github_current?(remote, ref, known_revision, cache_dir) + if org_and_repo = github?(remote) + allowed = get_config(cache_dir, "paperback.github", default: "true", type: "bool") + return false unless allowed == "true" + + uri = URI("https://api.github.com/repos/#{org_and_repo}/commits/#{ref}") + + request = Net::HTTP::Get.new(uri) + request["Accept"] = "application/vnd.github.v3.sha" + request["If-None-Match"] = "\"#{known_revision}\"" + + response = httpool.request(uri, request) + case response + when Net::HTTPOK + response.body + when Net::HTTPNotModified + known_revision + when Net::HTTPNotFound + if response["X-GitHub-Media-Type"] == "github.v3, param=sha" + # We're talking to the API we intended to; this 404 suggests + # it's not a public repo. We'll remember that and not bother + # trying this again. (But we won't set @github_disabled, + # because we still want to try other repos.) + set_config cache_dir, "paperback.github", "false", type: "bool" + + nil + else + @github_disabled = true + nil + end + else + @github_disabled = true + nil + end + end + end + + def httpool + @httpool ||= Paperback::Httpool.new + end + + def read_git(label, command, *arguments, **kwargs) + output = nil + + r, w = IO.pipe + status = git(label, command, *arguments, **kwargs, out: w) do + w.close + output = r.read + end + + raise "git #{command} failed" unless status.success? + + output.chomp + end + + def git(label, *arguments, **kwargs) kwargs[:in] ||= IO::NULL kwargs[:out] ||= IO::NULL kwargs[:err] ||= IO::NULL t = Time.now pid = spawn("git", *arguments, **kwargs) - logger.debug { "#{remote} [#{pid}] #{command_for_log("git", *arguments)}" } + logger.debug { "#{label} [#{pid}] #{command_for_log("git", *arguments)}" } + + yield if block_given? _, status = Process.waitpid2(pid) - logger.debug { "#{remote} [#{pid}] process exited #{status.exitstatus} (#{status.success? ? "success" : "failure"}) after #{Time.now - t}s" } + logger.debug { "#{label} [#{pid}] process exited #{status.exitstatus} (#{status.success? ? "success" : "failure"}) after #{Time.now - t}s" } status end diff --git a/test/resolve_test.rb b/test/resolve_test.rb index 87534a5e..29416b25 100644 --- a/test/resolve_test.rb +++ b/test/resolve_test.rb @@ -636,6 +636,7 @@ def lockfile_for_gemfile(gemfile) gemfile: Paperback::GemfileParser.parse(gemfile), lockfile: nil, catalog_options: { cache: cache_dir }, + git_options: { cache: cache_dir }, ) assert_match(/\AFetching sources\.\.\.+\nResolving dependencies\.\.\.+\n\z/, output.string)