From 4c81f2525c88205adfe5b9581eafe04b0c75f3de Mon Sep 17 00:00:00 2001 From: Will Fleming Date: Wed, 18 Nov 2015 14:31:41 -0500 Subject: [PATCH 1/5] engine_config: allow configuring different cutofs per check type --- lib/cc/engine/analyzers/engine_config.rb | 19 +++++++++- .../cc/engine/analyzers/engine_config_spec.rb | 35 ++++++++++++++++--- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index 17d01bac..eaea1d10 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -18,7 +18,7 @@ def concurrency config.fetch("concurrency", 2) end - def mass_threshold_for(language) + def identical_mass_threshold_for(language) threshold = fetch_language(language).fetch("mass_threshold", nil) if threshold @@ -26,6 +26,14 @@ def mass_threshold_for(language) end end + def identical_mass_threshold_for(language) + mass_threshold_with_fallback(language, "identical_mass_threshold") + end + + def similar_mass_threshold_for(language) + mass_threshold_with_fallback(language, "similar_mass_threshold") + end + def paths_for(language) fetch_language(language).fetch("paths", nil) end @@ -66,6 +74,15 @@ def build_language_config(languages) {} end end + + def mass_threshold_with_fallback(language, key) + language_hash = fetch_language(language) + threshold = language_hash.fetch(key) do |key| + language_hash.fetch("mass_threshold", nil) + end + + threshold.to_i if threshold + end end end end diff --git a/spec/cc/engine/analyzers/engine_config_spec.rb b/spec/cc/engine/analyzers/engine_config_spec.rb index 2cf7eed3..1f1c94ad 100644 --- a/spec/cc/engine/analyzers/engine_config_spec.rb +++ b/spec/cc/engine/analyzers/engine_config_spec.rb @@ -70,13 +70,26 @@ } } }) - expect(engine_config.paths_for("elixir")).to be_nil end end - describe "mass_threshold_for" do + shared_examples "mass_threshold examples" do it "returns configured mass threshold as integer" do + engine_config = CC::Engine::Analyzers::EngineConfig.new({ + "config" => { + "languages" => { + "EliXiR" => { + specific_mass_threshold_key => "13" + } + } + } + }) + + expect(engine_config.send(tested_method, "elixir")).to eq(13) + end + + it "returns fallback mass threshold as integer" do engine_config = CC::Engine::Analyzers::EngineConfig.new({ "config" => { "languages" => { @@ -87,7 +100,7 @@ } }) - expect(engine_config.mass_threshold_for("elixir")).to eq(13) + expect(engine_config.send(tested_method, "elixir")).to eq(13) end it "returns nil when language is empty" do @@ -99,7 +112,21 @@ } }) - expect(engine_config.mass_threshold_for("ruby")).to be_nil + expect(engine_config.send(tested_method, "ruby")).to be_nil + end + end + + describe "similar_mass_threshold_for" do + include_examples "mass_threshold examples" do + let(:specific_mass_threshold_key) { "similar_mass_threshold" } + let(:tested_method) { :similar_mass_threshold_for } + end + end + + describe "identical_mass_threshold_for" do + include_examples "mass_threshold examples" do + let(:specific_mass_threshold_key) { "identical_mass_threshold" } + let(:tested_method) { :identical_mass_threshold_for } end end From 7fee16c600e307167bac5ce92d64b8ec3b79b6ad Mon Sep 17 00:00:00 2001 From: Will Fleming Date: Wed, 18 Nov 2015 15:06:11 -0500 Subject: [PATCH 2/5] fixup! engine_config: allow configuring different cutofs per check type --- lib/cc/engine/analyzers/analyzer_base.rb | 11 ++++- lib/cc/engine/analyzers/javascript/main.rb | 2 +- lib/cc/engine/analyzers/php/main.rb | 2 +- lib/cc/engine/analyzers/python/main.rb | 2 +- lib/cc/engine/analyzers/reporter.rb | 15 ++++++- lib/cc/engine/analyzers/ruby/main.rb | 2 +- .../engine/analyzers/javascript/main_spec.rb | 12 ++--- spec/cc/engine/analyzers/php/main_spec.rb | 12 ++--- spec/cc/engine/analyzers/python/main_spec.rb | 12 ++--- spec/cc/engine/analyzers/ruby/main_spec.rb | 44 ++++++++++++++++++- spec/cc/engine/analyzers/sexp_lines_spec.rb | 2 +- spec/support/helpers/analyzer_spec_helpers.rb | 10 +++++ 12 files changed, 90 insertions(+), 36 deletions(-) diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 7e111348..74938529 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -17,8 +17,15 @@ def files file_list.files end - def mass_threshold - engine_config.mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLD + def mass_threshold_for_check(type) + case type + when :identical + engine_config.identical_mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLDS.fetch(type) + when :similar + engine_config.similar_mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLDS.fetch(type) + else + raise ArgumentError.new("#{type} is not a valid check type") + end end def base_points diff --git a/lib/cc/engine/analyzers/javascript/main.rb b/lib/cc/engine/analyzers/javascript/main.rb index 59c1a900..dcaeca42 100644 --- a/lib/cc/engine/analyzers/javascript/main.rb +++ b/lib/cc/engine/analyzers/javascript/main.rb @@ -15,7 +15,7 @@ class Main < CC::Engine::Analyzers::Base "**/*.jsx" ] LANGUAGE = "javascript" - DEFAULT_MASS_THRESHOLD = 40 + DEFAULT_MASS_THRESHOLDS = {identical: 40, similar: 80}.freeze BASE_POINTS = 3000 private diff --git a/lib/cc/engine/analyzers/php/main.rb b/lib/cc/engine/analyzers/php/main.rb index ddd7d638..04718d95 100644 --- a/lib/cc/engine/analyzers/php/main.rb +++ b/lib/cc/engine/analyzers/php/main.rb @@ -14,7 +14,7 @@ class Main < CC::Engine::Analyzers::Base "**/*.inc", "**/*.module" ] - DEFAULT_MASS_THRESHOLD = 10 + DEFAULT_MASS_THRESHOLDS = {identical: 10, similar: 20}.freeze BASE_POINTS = 4_000 private diff --git a/lib/cc/engine/analyzers/python/main.rb b/lib/cc/engine/analyzers/python/main.rb index 6ec2ca50..98423ce7 100644 --- a/lib/cc/engine/analyzers/python/main.rb +++ b/lib/cc/engine/analyzers/python/main.rb @@ -12,7 +12,7 @@ module Python class Main < CC::Engine::Analyzers::Base LANGUAGE = "python" DEFAULT_PATHS = ["**/*.py"] - DEFAULT_MASS_THRESHOLD = 40 + DEFAULT_MASS_THRESHOLDS = {identical: 40, similar: 80}.freeze BASE_POINTS = 1000 private diff --git a/lib/cc/engine/analyzers/reporter.rb b/lib/cc/engine/analyzers/reporter.rb index a21a4604..f460d1d8 100644 --- a/lib/cc/engine/analyzers/reporter.rb +++ b/lib/cc/engine/analyzers/reporter.rb @@ -36,6 +36,7 @@ def process_files def report flay.report(StringIO.new).each do |issue| + next unless issue_above_threshold?(issue) violation = new_violation(issue) unless reports.include?(violation.report_name) @@ -61,7 +62,19 @@ def flay attr_reader :engine_config, :language_strategy, :io def mass_threshold - @mass_threshold ||= language_strategy.mass_threshold + @mass_threshold ||= [ + language_strategy.mass_threshold_for_check(:identical), + language_strategy.mass_threshold_for_check(:similar), + ].min + end + + def issue_above_threshold?(issue) + base_mass = issue.mass / issue.locations.count + if issue.identical? + base_mass >= language_strategy.mass_threshold_for_check(:identical) + else + base_mass >= language_strategy.mass_threshold_for_check(:similar) + end end def new_violation(issue) diff --git a/lib/cc/engine/analyzers/ruby/main.rb b/lib/cc/engine/analyzers/ruby/main.rb index 3c6846bc..5faa7535 100644 --- a/lib/cc/engine/analyzers/ruby/main.rb +++ b/lib/cc/engine/analyzers/ruby/main.rb @@ -17,7 +17,7 @@ class Main < CC::Engine::Analyzers::Base "**/*.gemspec" ] - DEFAULT_MASS_THRESHOLD = 18 + DEFAULT_MASS_THRESHOLDS = {identical: 18, similar: 36}.freeze BASE_POINTS = 10_000 TIMEOUT = 10 diff --git a/spec/cc/engine/analyzers/javascript/main_spec.rb b/spec/cc/engine/analyzers/javascript/main_spec.rb index 7e79c1c7..415c4bc5 100644 --- a/spec/cc/engine/analyzers/javascript/main_spec.rb +++ b/spec/cc/engine/analyzers/javascript/main_spec.rb @@ -61,14 +61,8 @@ end def engine_conf - CC::Engine::Analyzers::EngineConfig.new({ - 'config' => { - 'languages' => { - 'javascript' => { - 'mass_threshold' => 1 - } - } - } - }) + CC::Engine::Analyzers::EngineConfig.new(engine_config_for_language({ + "mass_threshold" => 1, + })) end end diff --git a/spec/cc/engine/analyzers/php/main_spec.rb b/spec/cc/engine/analyzers/php/main_spec.rb index 33e06340..871748e1 100644 --- a/spec/cc/engine/analyzers/php/main_spec.rb +++ b/spec/cc/engine/analyzers/php/main_spec.rb @@ -56,14 +56,8 @@ def printed_issue end def engine_conf - CC::Engine::Analyzers::EngineConfig.new({ - 'config' => { - 'languages' => { - 'php' => { - 'mass_threshold' => 5 - } - } - } - }) + CC::Engine::Analyzers::EngineConfig.new(engine_config_for_language({ + "mass_threshold" => 5, + })) end end diff --git a/spec/cc/engine/analyzers/python/main_spec.rb b/spec/cc/engine/analyzers/python/main_spec.rb index 2919bf65..d30845bb 100644 --- a/spec/cc/engine/analyzers/python/main_spec.rb +++ b/spec/cc/engine/analyzers/python/main_spec.rb @@ -39,14 +39,8 @@ end def engine_conf - CC::Engine::Analyzers::EngineConfig.new({ - "config" => { - "languages" => { - "python" => { - "mass_threshold" => 4 - } - } - } - }) + CC::Engine::Analyzers::EngineConfig.new(engine_config_for_language({ + "mass_threshold" => 4, + })) end end diff --git a/spec/cc/engine/analyzers/ruby/main_spec.rb b/spec/cc/engine/analyzers/ruby/main_spec.rb index 4f6bfe60..83b42e2c 100644 --- a/spec/cc/engine/analyzers/ruby/main_spec.rb +++ b/spec/cc/engine/analyzers/ruby/main_spec.rb @@ -55,9 +55,51 @@ expect(run_engine(engine_conf)).to eq("") }.to output(/Skipping file/).to_stderr end + + it "respects different check type thresholds" do + create_source_file("foo.rb", <<-EORUBY) + def identical + puts "identical \#{thing.bar} \#{other.fun} \#{moo ? "moo" : "cluck"}" + puts "identical \#{thing.bar} \#{other.fun} \#{moo ? "moo" : "cluck"}" + puts "identical \#{thing.bar} \#{other.fun} \#{moo ? "moo" : "cluck"}" + end + + describe 'similar1' do + before { subject.type = 'js' } + + it 'returns true' do + expect(subject.ruby?).to be true + end + end + + describe 'similar2' do + before { subject.type = 'js' } + + it 'returns true' do + expect(subject.js?).to be true + end + end + EORUBY + + config = CC::Engine::Analyzers::EngineConfig.new(engine_config_for_language({ + "identical_mass_threshold" => 5, + "similar_mass_threshold" => 20, + })) + result = run_engine(config).strip + puts "result is #{result}\n\n\n" + json = JSON.parse(result) + + expect(json["check_name"]).to eq "Identical code" + expect(json["location"]).to eq({ + "path" => "foo.rb", + "lines" => { "begin" => 2, "end" => 2 }, + }) + end end def engine_conf - CC::Engine::Analyzers::EngineConfig.new({}) + CC::Engine::Analyzers::EngineConfig.new(engine_config_for_language({ + "mass_threshold" => described_class::DEFAULT_MASS_THRESHOLDS.fetch(:identical), + })) end end diff --git a/spec/cc/engine/analyzers/sexp_lines_spec.rb b/spec/cc/engine/analyzers/sexp_lines_spec.rb index ef7fa0db..7a2bd2c3 100644 --- a/spec/cc/engine/analyzers/sexp_lines_spec.rb +++ b/spec/cc/engine/analyzers/sexp_lines_spec.rb @@ -16,7 +16,7 @@ module CC::Engine::Analyzers SOURCE flay = Flay.new({ diff: false, - mass: CC::Engine::Analyzers::Ruby::Main::DEFAULT_MASS_THRESHOLD, + mass: CC::Engine::Analyzers::Ruby::Main::DEFAULT_MASS_THRESHOLDS.fetch(:identical), summary: false, verbose: false, number: true, diff --git a/spec/support/helpers/analyzer_spec_helpers.rb b/spec/support/helpers/analyzer_spec_helpers.rb index 59d21d4b..4bf6081f 100644 --- a/spec/support/helpers/analyzer_spec_helpers.rb +++ b/spec/support/helpers/analyzer_spec_helpers.rb @@ -3,6 +3,16 @@ def create_source_file(path, content) File.write(File.join(@code, path), content) end + def engine_config_for_language(config = {}) + { + "config" => { + "languages" => { + described_class::LANGUAGE => config, + }, + }, + } + end + def run_engine(config = nil) io = StringIO.new From 279d4dc626c0d36529d2ee0171a0761867f2c9fe Mon Sep 17 00:00:00 2001 From: Will Fleming Date: Wed, 18 Nov 2015 15:47:37 -0500 Subject: [PATCH 3/5] fixup! engine_config: allow configuring different cutofs per check type --- lib/cc/engine/analyzers/engine_config.rb | 8 -------- spec/cc/engine/analyzers/ruby/main_spec.rb | 1 - 2 files changed, 9 deletions(-) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index eaea1d10..c6ede12a 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -18,14 +18,6 @@ def concurrency config.fetch("concurrency", 2) end - def identical_mass_threshold_for(language) - threshold = fetch_language(language).fetch("mass_threshold", nil) - - if threshold - threshold.to_i - end - end - def identical_mass_threshold_for(language) mass_threshold_with_fallback(language, "identical_mass_threshold") end diff --git a/spec/cc/engine/analyzers/ruby/main_spec.rb b/spec/cc/engine/analyzers/ruby/main_spec.rb index 83b42e2c..3545786d 100644 --- a/spec/cc/engine/analyzers/ruby/main_spec.rb +++ b/spec/cc/engine/analyzers/ruby/main_spec.rb @@ -86,7 +86,6 @@ def identical "similar_mass_threshold" => 20, })) result = run_engine(config).strip - puts "result is #{result}\n\n\n" json = JSON.parse(result) expect(json["check_name"]).to eq "Identical code" From 72a63530adeaf6bbac6ffc6b2639ac37d597542c Mon Sep 17 00:00:00 2001 From: Will Fleming Date: Wed, 18 Nov 2015 15:49:17 -0500 Subject: [PATCH 4/5] fixup! engine_config: allow configuring different cutofs per check type --- lib/cc/engine/analyzers/engine_config.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index c6ede12a..b365c236 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -69,7 +69,7 @@ def build_language_config(languages) def mass_threshold_with_fallback(language, key) language_hash = fetch_language(language) - threshold = language_hash.fetch(key) do |key| + threshold = language_hash.fetch(key) do language_hash.fetch("mass_threshold", nil) end From f17dc4acf15d84d7d3a8287c962bd8f77011ca17 Mon Sep 17 00:00:00 2001 From: Will Fleming Date: Wed, 18 Nov 2015 15:54:45 -0500 Subject: [PATCH 5/5] update README --- README.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 472c5a80..a6e53767 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,12 @@ duplication, try raising the threshold. If you suspect that the engine isn't catching enough duplication, try lowering the threshold. The best setting tends to differ from language to language. -To adjust this setting, add a `mass_threshold` key with your preferred value for +You can set thresholds for the two different types of duplication this engine +reports: blocks that are identical to each other, and blocks that are +structurally similar but differ in content. + +To adjust these thresholds, you can add `identical_mass_threshold` and +`similar_mass_threshold` keys with your preferred value for an enabled language: ```yaml @@ -54,11 +59,15 @@ engines: config: languages: ruby: - mass_threshold: 20 + identical_mass_threshold: 20 + similar_mass_threshold: 30 javascript: ``` -Note that you have the update the YAML structure under the `langauges` key to +If you would like to use the same threshold for both identical & similar issues, +you can just set the `mass_threshold` key. + +Note that you have to update the YAML structure under the `langauges` key to the Hash type to support extra configuration. [codeclimate]: https://codeclimate.com/dashboard