Adding option to download remote files as part of the gem

- Adding minitar as dependency - Adding vendor file to handle the tasks - Adding vendor hook to install and update actions the vendor.json file should be placed at the root of the gem file This file defines all the remote sources that should be downloaded. The content of it is an array of hashes. Each hash has the following settings: - url - sha1 - files url is the url of the remote source sha1 is the sha1 fingerprint we use for validation of the downloaded file files can have different settings and is only used for .tar.gz / .tgz files not setting it extracts the whole tar.gz file An array of files allows to extract specific files A Regexp allows extracting specific files Fixes #2029
2025-04-23 22:27:21 -04:00 · 2014-11-05 23:04:02 +00:00 · 2014-11-05 23:04:02 +00:00 · 571ca87163
commit 571ca87163
parent 8b509cdf7d
4 changed files with 206 additions and 0 deletions
--- a/lib/logstash/pluginmanager/install.rb
+++ b/lib/logstash/pluginmanager/install.rb
@ -3,6 +3,7 @@ require 'logstash/namespace'
 require 'logstash/environment'
 require 'logstash/pluginmanager'
 require 'logstash/pluginmanager/util'
+require 'logstash/pluginmanager/vendor'
 require 'rubygems/dependency_installer'
 require 'rubygems/uninstaller'
 require 'jar-dependencies'
@ -52,6 +53,7 @@ class LogStash::PluginManager::Install < Clamp::Command
    end

    ::Gem.configuration.verbose = false
+    LogStash::PluginManager::Vendor.new().setup_hook
    options = {}
    options[:document] = []
    inst = Gem::DependencyInstaller.new(options)
--- a/lib/logstash/pluginmanager/update.rb
+++ b/lib/logstash/pluginmanager/update.rb
@ -2,6 +2,7 @@ require 'clamp'
 require 'logstash/namespace'
 require 'logstash/pluginmanager'
 require 'logstash/pluginmanager/util'
+require 'logstash/pluginmanager/vendor'
 require 'rubygems/dependency_installer'
 require 'rubygems/uninstaller'
 require 'jar-dependencies'
@ -62,6 +63,7 @@ class LogStash::PluginManager::Update < Clamp::Command
    end

    ::Gem.configuration.verbose = false
+    LogStash::PluginManager::Vendor.new().setup_hook
    options = {}
    options[:document] = []
    inst = Gem::DependencyInstaller.new(options)
--- a/lib/logstash/pluginmanager/vendor.rb
+++ b/lib/logstash/pluginmanager/vendor.rb
@ -0,0 +1,201 @@
+require "net/http"
+require "uri"
+require "digest/sha1"
+require 'fileutils'
+require 'logstash/json'
+
+class LogStash::PluginManager::Vendor
+
+  def fetch(url, sha1, output)
+
+    puts "Downloading #{url}"
+    actual_sha1 = download(url, output)
+
+    if actual_sha1 != sha1
+      fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
+    end
+  end # def fetch
+
+  def file_fetch(url, sha1, target)
+    filename = File.basename( URI(url).path )
+    output = "#{target}/#{filename}"
+    begin
+      actual_sha1 = file_sha1(output)
+      if actual_sha1 != sha1
+        fetch(url, sha1, output)
+      end
+    rescue Errno::ENOENT
+      fetch(url, sha1, output)
+    end
+    return output
+  end
+
+  def file_sha1(path)
+    digest = Digest::SHA1.new
+    fd = File.new(path, "r")
+    while true
+      begin
+        digest << fd.sysread(16384)
+      rescue EOFError
+        break
+      end
+    end
+    return digest.hexdigest
+  ensure
+    fd.close if fd
+  end
+
+  def download(url, output)
+    uri = URI(url)
+    digest = Digest::SHA1.new
+    tmp = "#{output}.tmp"
+    Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
+      request = Net::HTTP::Get.new(uri.path)
+      http.request(request) do |response|
+        fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
+        size = (response["content-length"].to_i || -1).to_f
+        count = 0
+        File.open(tmp, "w") do |fd|
+          response.read_body do |chunk|
+            fd.write(chunk)
+            digest << chunk
+            if size > 0 && $stdout.tty?
+              count += chunk.bytesize
+              $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
+            end
+          end
+        end
+        $stdout.write("\r      \r") if $stdout.tty?
+      end
+    end
+
+    File.rename(tmp, output)
+
+    return digest.hexdigest
+  rescue SocketError => e
+    puts "Failure while downloading #{url}: #{e}"
+    raise
+  ensure
+    File.unlink(tmp) if File.exist?(tmp)
+  end # def download
+
+  def untar(tarball, &block)
+    require "archive/tar/minitar"
+    tgz = Zlib::GzipReader.new(File.open(tarball))
+    # Pull out typesdb
+    tar = Archive::Tar::Minitar::Input.open(tgz)
+    tar.each do |entry|
+      path = block.call(entry)
+      next if path.nil?
+      parent = File.dirname(path)
+
+      FileUtils.mkdir_p(parent) unless File.directory?(parent)
+
+      # Skip this file if the output file is the same size
+      if entry.directory?
+        FileUtils.mkdir_p(path) unless File.directory?(path)
+      else
+        entry_mode = entry.instance_eval { @mode } & 0777
+        if File.exists?(path)
+          stat = File.stat(path)
+          # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
+          # expose headers in the entry.
+          entry_size = entry.instance_eval { @size }
+          # If file sizes are same, skip writing.
+          next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
+        end
+        puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
+        File.open(path, "w") do |fd|
+          # eof? check lets us skip empty files. Necessary because the API provided by
+          # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
+          # IO object. Something about empty files in this EntryStream causes
+          # IO.copy_stream to throw "can't convert nil into String" on JRuby
+          # TODO(sissel): File a bug about this.
+          while !entry.eof?
+            chunk = entry.read(16384)
+            fd.write(chunk)
+          end
+            #IO.copy_stream(entry, fd)
+        end
+        File.chmod(entry_mode, path)
+      end
+    end
+    tar.close
+    File.unlink(tarball) if File.file?(tarball)
+  end # def untar
+
+  def do_ungz(file)
+
+    outpath = file.gsub('.gz', '')
+    tgz = Zlib::GzipReader.new(File.open(file))
+    begin
+      File.open(outpath, "w") do |out|
+        IO::copy_stream(tgz, out)
+      end
+      File.unlink(file)
+    rescue
+      File.unlink(outpath) if File.file?(outpath)
+     raise
+    end
+    tgz.close
+  end
+
+  def eval_file(entry, files, prefix)
+    return false if entry.full_name =~ /PaxHeaders/
+    if !files.nil?
+      if files.is_a?(Array)
+        return false unless files.include?(entry.full_name.gsub(prefix, ''))
+        entry.full_name.split("/").last
+      elsif files.is_a?(String)
+        return false unless entry.full_name =~ Regexp.new(files)
+        entry.full_name.split("/").last
+      end
+    else
+      entry.full_name.gsub(prefix, '')
+    end
+  end
+
+  def process_downloads(files,target='')
+
+    FileUtils.mkdir_p(target) unless File.directory?(target)
+
+    files.each do |file|
+      download = file_fetch(file['url'], file['sha1'],target)
+
+      if download =~ /.tar.gz/
+        prefix = download.gsub('.tar.gz', '').gsub("#{target}/", '')
+        untar(download) do |entry|
+          next unless out = eval_file(entry, file['files'], prefix)
+          File.join(target, out)
+        end
+
+      elsif download =~ /.tgz/
+        prefix = download.gsub('.tgz', '').gsub("#{target}/", '')
+        untar(download) do |entry|
+          next unless out = eval_file(entry, file['files'], prefix)
+          File.join(target, out)
+        end
+
+      elsif download =~ /.gz/
+        ungz(download)
+      end
+    end
+  end
+
+  def setup_hook
+    Gem.post_install do |gem_installer|
+      unless ENV['VENDOR_SKIP'] == 'true'
+        vendor_file = ::File.join(gem_installer.gem_dir, 'vendor.json')
+        if ::File.exist?(vendor_file)
+          vendor_file_content = IO.read(vendor_file)
+          file_list = LogStash::Json.load(vendor_file_content)
+          process_downloads(file_list, ::File.join(gem_installer.gem_dir, 'vendor'))
+        end
+      end
+    end
+  end
+end
+
+#bla = [ {'url' => 'https://collectd.org/files/collectd-5.4.0.tar.gz', 'sha1' => 'a90fe6cc53b76b7bdd56dc57950d90787cb9c96e', 'files' => [ '/src/types.db']} ]
+#bla = [ { 'url' => 'http://www.mirrorservice.org/sites/ftp.apache.org/kafka/0.8.1.1/kafka_2.10-0.8.1.1.tgz', 'sha1' => '7288a43bb0732789ff0d76d820fc7f40553854fe', 'files' => /\.jar$/ }]
+#process_downloads(bla, 'bla')
--- a/logstash.gemspec
+++ b/logstash.gemspec
@ -39,6 +39,7 @@ Gem::Specification.new do |gem|
  # gem.add_runtime_dependency "jar-dependencies", [">= 0.1.2"]   #(MIT license)

  gem.add_runtime_dependency "ruby-maven"                       #(EPL license)
+  gem.add_runtime_dependency "minitar"

  if RUBY_PLATFORM == 'java'
    gem.platform = RUBY_PLATFORM