Add tool to generate dependency details in csv

Fixes #8725

Rough goal is to present license information for all dependencies

Fixes #8881
This commit is contained in:
Jordan Sissel 2017-11-27 15:02:20 -08:00 committed by Andrew Cholakian
parent b7af32bf6e
commit 5786dcd47a
4 changed files with 216 additions and 0 deletions

36
COPYING.csv Normal file
View file

@ -0,0 +1,36 @@
jruby,9.1.14.0,https://jruby.org/,EPL-1.0|GPL-2.0|LGPL-2.1
bytelist,,http://github.com/jruby/bytelist,EPL-1.0|GPL-2.0|LGPL-2.1
jnr-posix,,https://github.com/jnr/jnr-posix,EPL-1.0|GPL-2.0|LGPL-2.1
jruby-openssl,,https://github.com/jruby/jruby-openssl,EPL-1.0|GPL-2.0|LGPL-2.1
jruby-readline,,https://github.com/jruby/jruby-readline,EPL-1.0|GPL-2.0|LGPL-2.1
psych,,https://github.com/ruby/psych),MIT
jay-yydebug,,http://svn.codehaus.org/jruby/trunk/jay/yydebug,EPL-1.0|GPL-2.0|LGPL-2.0
control.js,,,MIT
dragdrop.js,,,MIT
effect.js,,,MIT
prototype.js,,,MIT
asm,,http://asm.objectweb.org,BSD
jline2,,https://github.com/jline/jline2,BSD
jzlib,,https://www.jcraft.com/jzlib/,BSD-3-Clause
rake,,https://github.com/ruby/rake,MIT
jcodings,,http://github.com/jruby/jcodings,MIT
joni,,http://github.com/jruby/joni,MIT
bouncycastle,,http://www.bouncycastle.org,MIT
jnr-x86asm,,https://github.com/jnr/jnr-x86asm,MIT
invokebinder,,https://github.com/headius/invokebinder,Apache-2.0
jffi,,https://github.com/jnr/jffi,Apache-2.0
jitescript,,https://github.com/qmx/jitescript,Apache-2.0
jnr-constants,,http://github.com/jnr/jnr-constants,Apache-2.0
jnr-enxio,,https://github.com/jnr/jnr-enxio,Apache-2.0
jnr-ffi,,https://github.com/jnr/jnr-jffi,Apache-2.0
jnr-netdb,,http://github.com/jnr/jnr-netdb,Apache-2.0
jnr-unixsocket,,https://github.com/jnr/jnr-unixsocket,Apache-2.0
joda-time,,http://joda-time.sourceforge.net,Apache-2.0
maven,,http://maven.apache.org/,Apache-2.0
nailgun,,http://martiansoftware.com/nailgun,Apache-2.0
options,,https://github.com/headius/options,Apache-2.0
snakeyaml,,https://github.com/asomov/snakeyaml,Apache-2.0
unsafe-fences,,https://github.com/headius/unsafe-fences,Apache-2.0
racc,,https://github.com/tenderlove/racc,Ruby
json-generator,,https://github.com/flori/json,Ruby
json-parser,,https://github.com/flori/json,Ruby
1 jruby 9.1.14.0 https://jruby.org/ EPL-1.0|GPL-2.0|LGPL-2.1
2 bytelist http://github.com/jruby/bytelist EPL-1.0|GPL-2.0|LGPL-2.1
3 jnr-posix https://github.com/jnr/jnr-posix EPL-1.0|GPL-2.0|LGPL-2.1
4 jruby-openssl https://github.com/jruby/jruby-openssl EPL-1.0|GPL-2.0|LGPL-2.1
5 jruby-readline https://github.com/jruby/jruby-readline EPL-1.0|GPL-2.0|LGPL-2.1
6 psych https://github.com/ruby/psych) MIT
7 jay-yydebug http://svn.codehaus.org/jruby/trunk/jay/yydebug EPL-1.0|GPL-2.0|LGPL-2.0
8 control.js MIT
9 dragdrop.js MIT
10 effect.js MIT
11 prototype.js MIT
12 asm http://asm.objectweb.org BSD
13 jline2 https://github.com/jline/jline2 BSD
14 jzlib https://www.jcraft.com/jzlib/ BSD-3-Clause
15 rake https://github.com/ruby/rake MIT
16 jcodings http://github.com/jruby/jcodings MIT
17 joni http://github.com/jruby/joni MIT
18 bouncycastle http://www.bouncycastle.org MIT
19 jnr-x86asm https://github.com/jnr/jnr-x86asm MIT
20 invokebinder https://github.com/headius/invokebinder Apache-2.0
21 jffi https://github.com/jnr/jffi Apache-2.0
22 jitescript https://github.com/qmx/jitescript Apache-2.0
23 jnr-constants http://github.com/jnr/jnr-constants Apache-2.0
24 jnr-enxio https://github.com/jnr/jnr-enxio Apache-2.0
25 jnr-ffi https://github.com/jnr/jnr-jffi Apache-2.0
26 jnr-netdb http://github.com/jnr/jnr-netdb Apache-2.0
27 jnr-unixsocket https://github.com/jnr/jnr-unixsocket Apache-2.0
28 joda-time http://joda-time.sourceforge.net Apache-2.0
29 maven http://maven.apache.org/ Apache-2.0
30 nailgun http://martiansoftware.com/nailgun Apache-2.0
31 options https://github.com/headius/options Apache-2.0
32 snakeyaml https://github.com/asomov/snakeyaml Apache-2.0
33 unsafe-fences https://github.com/headius/unsafe-fences Apache-2.0
34 racc https://github.com/tenderlove/racc Ruby
35 json-generator https://github.com/flori/json Ruby
36 json-parser https://github.com/flori/json Ruby

32
bin/dependencies-report Executable file
View file

@ -0,0 +1,32 @@
#!/bin/sh
# Generate a dependency report.
unset CDPATH
# This unwieldy bit of scripting is to try to catch instances where Logstash
# was launched from a symlink, rather than a full path to the Logstash binary
if [ -L "$0" ]; then
# Launched from a symlink
# --Test for the readlink binary
RL="$(which readlink)"
if [ $? -eq 0 ]; then
# readlink exists
SOURCEPATH="$(${RL} $0)"
else
# readlink not found, attempt to parse the output of stat
SOURCEPATH="$(stat -c %N $0 | awk '{print $3}' | sed -e 's/\//' -e 's/\//')"
if [ $? -ne 0 ]; then
# Failed to execute or parse stat
echo "Failed to find source library at path $(cd `dirname $0`/..; pwd)/bin/logstash.lib.sh"
echo "You may need to launch Logstash with a full path instead of a symlink."
exit 1
fi
fi
else
# Not a symlink
SOURCEPATH="$0"
fi
. "$(cd `dirname ${SOURCEPATH}`/..; pwd)/bin/logstash.lib.sh"
setup
ruby_exec "logstash-core/lib/logstash/dependency_report_runner.rb" "$@"

View file

@ -0,0 +1,131 @@
# encoding: utf-8
Thread.abort_on_exception = true
Encoding.default_external = Encoding::UTF_8
$DEBUGLIST = (ENV["DEBUG"] || "").split(",")
require "clamp"
require "logstash/namespace"
require "rubygems"
require "jars/gemspec_artifacts"
class LogStash::DependencyReport < Clamp::Command
option [ "--csv" ], "OUTPUT_PATH", "The path to write the dependency report in csv format.",
:required => true, :attribute_name => :output_path
def execute
require "csv"
CSV.open(output_path, "wb", :headers => [ "name", "version", "url", "license" ], :write_headers => true) do |csv|
puts "Finding gem dependencies"
gems.each { |d| csv << d }
puts "Finding java/jar dependencies"
jars.each { |d| csv << d }
end
# Copy in COPYING.csv which is a best-effort, hand-maintained file of dependency license information.
File.open(output_path, "a+") do |file|
extra = File.join(File.dirname(__FILE__), "..", "..", "..", "COPYING.csv")
file.write(IO.read(extra))
end
nil
end
def gems
# @mgreau requested `logstash-*` dependencies be removed from this list:
# https://github.com/elastic/logstash/pull/8837#issuecomment-351859433
Gem::Specification.reject { |g| g.name =~ /^logstash-/ }.collect do |gem|
licenses = ("UNKNOWN" if gem.licenses.empty?) || (gem.licenses.map { |l| SPDX.map(l) }.join("|") if !gem.licenses.empty?)
[gem.name, gem.version.to_s, gem.homepage, licenses]
end
end
def jars
jars = []
# For any gems with jar dependencies,
# Look at META-INF/MANIFEST.MF for any jars in each gem
# Note any important details.
Gem::Specification.select { |g| g.requirements && g.requirements.any? { |r| r =~ /^jar / } }.collect do |gem|
# Where is the gem installed
root = gem.full_gem_path
Dir.glob(File.join(root, "**", "*.jar")).collect do |path|
jar = java.util.jar.JarFile.new(path)
manifest = jar.getManifest
pom_entries = jar.entries.select { |t| t.getName.start_with?("META-INF/maven/") && t.getName.end_with?("/pom.properties") }
# Some jar files have multiple maven pom.properties files. It is unclear how to know what is correct?
# TODO(sissel): Maybe we should use all pom.properties files? None of the pom.properties/pom.xml files have license information, though.
# TODO(sissel): In some cases, there are META-INF/COPYING and
# META-INF/NOTICE.txt files? Can we use these somehow? There is no
# common syntax for parsing these files, though...
pom_map = if pom_entries.count == 1
pom_in = jar.getInputStream(pom_entries.first)
pom_content = pom_in.available.times.collect { pom_in.read }.pack("C*")
# Split non-comment lines by `key=val` into a map { key => val }
Hash[pom_content.split(/\r?\n/).grep(/^[^#]/).map { |line| line.split("=", 2) }]
else
{}
end
next if manifest.nil?
# convert manifest attributes to a map w/ keys .to_s
# without this, the attribute keys will be `Object#inspect` values
# like #<Java::JavaUtilJar::Attributes::Name0xabcdef0>
attributes = Hash[manifest.getMainAttributes.map { |k,v| [k.to_s, v] }]
begin
# Prefer the maven/pom groupId when it is available.
artifact = pom_map.fetch("artifactId", attributes.fetch("Implementation-Title"))
group = pom_map.fetch("groupId", attributes.fetch("Implementation-Vendor-Id"))
jars << [
group + ":" + artifact,
attributes.fetch("Bundle-Version"),
attributes.fetch("Bundle-DocURL"),
SPDX.map(attributes.fetch("Bundle-License")),
]
rescue KeyError => e
# The jar is missing a required manifest field, it may not have any useful manifest data.
# Ignore it and move on.
end
end
end
jars.uniq.sort
end
module SPDX
# This is a non-exhaustive, best effort list of licenses as they map to SPDX identifiers.
ALIASES = {
"Apache-2.0" => [
"Apache 2",
"apache-2.0",
"Apache 2.0",
"Apache License (2.0)",
"Apache License 2.0",
"https://www.apache.org/licenses/LICENSE-2.0.txt",
"http://www.apache.org/licenses/LICENSE-2.0.txt",
],
"Artistic-2.0" => [
"Artistic 2.0"
],
"BSD-2-Clause" => [
"2-clause BSDL",
"2-clause"
],
"GPL-2.0" => [
"GPL-2"
]
}
# Get a map of name => spdx
MAP_APACHE2 = Hash[ALIASES.map { |spdx,aliases| aliases.map { |value| [value, spdx] } }[0]]
MAP_ARTISTIC2 = Hash[ALIASES.map { |spdx,aliases| aliases.map { |value| [value, spdx] } }[1]]
MAP_BSD = Hash[ALIASES.map { |spdx,aliases| aliases.map { |value| [value, spdx] } }[2]]
MAP_GPL2 = Hash[ALIASES.map { |spdx,aliases| aliases.map { |value| [value, spdx] } }[3]]
module_function
def map(value)
MAP_APACHE2[value] || MAP_ARTISTIC2[value] || MAP_BSD[value] || MAP_GPL2[value] || value
end
end
end

View file

@ -0,0 +1,17 @@
require_relative "../../../lib/bootstrap/environment"
if $0 == __FILE__
begin
LogStash::Bundler.setup!({:without => [:build, :development]})
rescue => Bundler::GemfileNotFound
$stderr.puts("No Gemfile found. Maybe you need to run `rake artifact:tar`?")
raise
end
require "logstash/namespace"
require_relative "../../../lib/bootstrap/patches/jar_dependencies"
require "logstash/dependency_report"
exit_status = LogStash::DependencyReport.run
exit(exit_status || 0)
end