From fcb1a6dce085260ac1a30b8f3ddeb6fc19cf0c5d Mon Sep 17 00:00:00 2001 From: ABaldwinHunter Date: Tue, 19 Jan 2016 15:00:35 -0500 Subject: [PATCH] Tune duplication - 3 commits 1. FIX - Use node size as mass, instead of flay score 2. FIX - Report issue for each instance of duplicated code, not just first sexp. 3. UPDATE: Tune Python Remediation Points - Reduce AST threshold from 40 to 32 (classic is 28) - update point formula to match classic computation - don't penalize extra for identical duplication Change from remediations_points = x * score to remediation_points = x + (score-threshold) * y Since remediation points are a function of effort required to fix an issue, we're making a behavioral change to not penalize extra for identical duplication. --- config/contents/duplicated_code.md.erb | 2 +- lib/cc/engine/analyzers/analyzer_base.rb | 4 +- lib/cc/engine/analyzers/issue.rb | 89 ++++++++++++++++++ lib/cc/engine/analyzers/python/main.rb | 13 ++- lib/cc/engine/analyzers/reporter.rb | 8 +- lib/cc/engine/analyzers/ruby/main.rb | 8 +- lib/cc/engine/analyzers/violation.rb | 92 ++----------------- lib/cc/engine/analyzers/violation_read_up.rb | 6 +- .../engine/analyzers/javascript/main_spec.rb | 27 +++--- spec/cc/engine/analyzers/php/main_spec.rb | 21 ++--- spec/cc/engine/analyzers/python/main_spec.rb | 26 +++--- spec/cc/engine/analyzers/ruby/main_spec.rb | 71 ++++++++++---- spec/spec_helper.rb | 2 + 13 files changed, 217 insertions(+), 152 deletions(-) create mode 100644 lib/cc/engine/analyzers/issue.rb diff --git a/config/contents/duplicated_code.md.erb b/config/contents/duplicated_code.md.erb index 1a6fe052..9151aff0 100644 --- a/config/contents/duplicated_code.md.erb +++ b/config/contents/duplicated_code.md.erb @@ -9,7 +9,7 @@ When you violate DRY, bugs and maintenance problems are sure to follow. Duplicat ## Issue Mass Duplicated code has a calculated mass, which can be thought of as a measure of how much logic has been duplicated. -This issue has a mass of `<%= issue.mass %>`: if you would like to change the minimum mass that will be reported as an issue, please see the details in [`codeclimate-duplication`'s documentation](https://github.com/codeclimate/codeclimate-duplication). +This issue has a mass of `<%= mass %>`: if you would like to change the minimum mass that will be reported as an issue, please see the details in [`codeclimate-duplication`'s documentation](https://github.com/codeclimate/codeclimate-duplication). ## Refactorings diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index b90a32c3..f2d204bb 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -36,8 +36,8 @@ def mass_threshold engine_config.mass_threshold_for(self.class::LANGUAGE) || self.class::DEFAULT_MASS_THRESHOLD end - def calculate_points(issue) - self.class::BASE_POINTS * issue.mass + def calculate_points(mass) + self.class::BASE_POINTS * mass end private diff --git a/lib/cc/engine/analyzers/issue.rb b/lib/cc/engine/analyzers/issue.rb new file mode 100644 index 00000000..a7b64a2f --- /dev/null +++ b/lib/cc/engine/analyzers/issue.rb @@ -0,0 +1,89 @@ +require "cc/engine/analyzers/sexp_lines" +require "cc/engine/analyzers/violation_read_up" +require "digest" + +module CC + module Engine + module Analyzers + class Issue + def initialize(language_strategy:, check_name:, current_sexp:, other_sexps:) + @language_strategy = language_strategy + @check_name = check_name + @current_sexp = current_sexp + @other_sexps = other_sexps + end + + def format # rubocop:disable Metrics/MethodLength + { + "type": "issue", + "check_name": check_name, + "description": description, + "categories": ["Duplication"], + "location": format_location, + "remediation_points": calculate_points, + "other_locations": format_other_locations, + "content": content_body, + "fingerprint": fingerprint, + } + end # rubocop:enable Metrics/MethodLength + + def report_name + "#{current_sexp.file}-#{current_sexp.line}" + end + + def mass + current_sexp.mass + end + + private + + attr_reader :language_strategy, :check_name, :other_sexps, :current_sexp + + def calculate_points + language_strategy.calculate_points(mass) + end + + def format_location + format_sexp(current_sexp) + end + + def format_other_locations + other_sexps.map do |sexp| + format_sexp(sexp) + end + end + + def format_sexp(sexp) + lines = SexpLines.new(sexp) + { + "path": sexp.file.gsub(%r{^./}, ""), + "lines": { + "begin": lines.begin_line, + "end": lines.end_line, + }, + } + end + + def content_body + { "body": ViolationReadUp.new(mass).contents } + end + + def fingerprint + digest = Digest::MD5.new + digest << current_sexp.file + digest << "-" + digest << current_sexp.mass.to_s + digest << "-" + digest << check_name + digest.to_s + end + + def description + description = "#{check_name} found in #{(other_sexps.length)} other location" + description += "s" if other_sexps.length > 1 + description + end + end + end + end +end diff --git a/lib/cc/engine/analyzers/python/main.rb b/lib/cc/engine/analyzers/python/main.rb index 6ec2ca50..fb4b8013 100644 --- a/lib/cc/engine/analyzers/python/main.rb +++ b/lib/cc/engine/analyzers/python/main.rb @@ -12,11 +12,20 @@ module Python class Main < CC::Engine::Analyzers::Base LANGUAGE = "python" DEFAULT_PATHS = ["**/*.py"] - DEFAULT_MASS_THRESHOLD = 40 - BASE_POINTS = 1000 + DEFAULT_MASS_THRESHOLD = 32 + BASE_POINTS = 1_500_000 + POINTS_PER_OVERAGE = 50_000 + + def calculate_points(mass) + BASE_POINTS + (overage(mass) * POINTS_PER_OVERAGE) + end private + def overage(mass) + mass - mass_threshold + end + def process_file(path) Node.new(::CC::Engine::Analyzers::Python::Parser.new(File.binread(path), path).parse.syntax_tree, path).format end diff --git a/lib/cc/engine/analyzers/reporter.rb b/lib/cc/engine/analyzers/reporter.rb index bbe9f83d..580df613 100644 --- a/lib/cc/engine/analyzers/reporter.rb +++ b/lib/cc/engine/analyzers/reporter.rb @@ -38,9 +38,11 @@ def report flay.report(StringIO.new).each do |issue| violation = new_violation(issue) - unless reports.include?(violation.report_name) - reports.add(violation.report_name) - io.puts "#{violation.format.to_json}\0" + violation.occurrences.each do |occurrence| + unless reports.include?(occurrence.report_name) + reports.add(occurrence.report_name) + io.puts "#{occurrence.format.to_json}\0" + end end end end diff --git a/lib/cc/engine/analyzers/ruby/main.rb b/lib/cc/engine/analyzers/ruby/main.rb index d7258bab..b96b66d7 100644 --- a/lib/cc/engine/analyzers/ruby/main.rb +++ b/lib/cc/engine/analyzers/ruby/main.rb @@ -22,14 +22,14 @@ class Main < CC::Engine::Analyzers::Base POINTS_PER_OVERAGE = 100_000 TIMEOUT = 300 - def calculate_points(issue) - BASE_POINTS + (overage(issue) * POINTS_PER_OVERAGE) + def calculate_points(mass) + BASE_POINTS + (overage(mass) * POINTS_PER_OVERAGE) end private - def overage(issue) - issue.mass - mass_threshold + def overage(mass) + mass - mass_threshold end def process_file(file) diff --git a/lib/cc/engine/analyzers/violation.rb b/lib/cc/engine/analyzers/violation.rb index 4f39fbf1..f2409129 100644 --- a/lib/cc/engine/analyzers/violation.rb +++ b/lib/cc/engine/analyzers/violation.rb @@ -1,52 +1,24 @@ -require "cc/engine/analyzers/sexp_lines" -require "cc/engine/analyzers/violation_read_up" -require "digest" +require "cc/engine/analyzers/issue" module CC module Engine module Analyzers class Violation - attr_reader :issue - def initialize(language_strategy, issue, hashes) @language_strategy = language_strategy - @issue = issue @hashes = hashes + @issue = issue end - def format - { - "type": "issue", - "check_name": check_name, - "description": description, - "categories": ["Duplication"], - "location": format_location, - "remediation_points": calculate_points, - "other_locations": format_other_locations, - "content": content_body, - "fingerprint": fingerprint - } - end - - def report_name - "#{current_sexp.file}-#{current_sexp.line}" + def occurrences + hashes.map.with_index do |sexp, i| + Issue.new(language_strategy: language_strategy, check_name: check_name, current_sexp: sexp, other_sexps: other_sexps(hashes.dup, i)) + end end private - attr_reader :language_strategy, :hashes - - def current_sexp - @location ||= sorted_hashes.first - end - - def sorted_hashes - @_sorted_hashes ||= hashes.sort_by(&:file) - end - - def other_sexps - @other_locations ||= sorted_hashes.drop(1) - end + attr_reader :language_strategy, :hashes, :issue def check_name if issue.identical? @@ -56,53 +28,9 @@ def check_name end end - def calculate_points - language_strategy.calculate_points(issue) - end - - def format_location - format_sexp(current_sexp) - end - - def format_other_locations - other_sexps.map do |sexp| - format_sexp(sexp) - end - end - - def format_sexp(sexp) - lines = SexpLines.new(sexp) - { - "path": sexp.file.gsub(%r(^./), ""), - "lines": { - "begin": lines.begin_line, - "end": lines.end_line, - }, - } - end - - def content_body - @_content_body ||= { "body": ViolationReadUp.new(issue).contents } - end - - def fingerprint - digest = Digest::MD5.new - digest << current_sexp.file - digest << "-" - digest << current_sexp.mass.to_s - digest << "-" - digest << occurrences.to_s - digest.to_s - end - - def description - description = "#{check_name} found in #{occurrences} other location" - description += "s" if occurrences > 1 - description - end - - def occurrences - other_sexps.count + def other_sexps(members, i) + members.delete_at(i) + members.sort_by(&:file) end end end diff --git a/lib/cc/engine/analyzers/violation_read_up.rb b/lib/cc/engine/analyzers/violation_read_up.rb index 27d9367a..b9598899 100644 --- a/lib/cc/engine/analyzers/violation_read_up.rb +++ b/lib/cc/engine/analyzers/violation_read_up.rb @@ -4,8 +4,8 @@ module CC module Engine module Analyzers class ViolationReadUp - def initialize(issue) - @issue = issue + def initialize(mass) + @mass = mass end def contents @@ -14,7 +14,7 @@ def contents private - attr_reader :issue + attr_reader :mass TEMPLATE_REL_PATH = "../../../../config/contents/duplicated_code.md.erb" diff --git a/spec/cc/engine/analyzers/javascript/main_spec.rb b/spec/cc/engine/analyzers/javascript/main_spec.rb index 3fa0517b..4d464f38 100644 --- a/spec/cc/engine/analyzers/javascript/main_spec.rb +++ b/spec/cc/engine/analyzers/javascript/main_spec.rb @@ -1,9 +1,8 @@ +require 'spec_helper' require 'cc/engine/analyzers/javascript/main' require 'cc/engine/analyzers/reporter' require 'cc/engine/analyzers/engine_config' require 'cc/engine/analyzers/file_list' -require 'flay' -require 'tmpdir' RSpec.describe CC::Engine::Analyzers::Javascript::Main, in_tmpdir: true do include AnalyzerSpecHelpers @@ -16,7 +15,9 @@ console.log("hello JS!"); EOJS - result = run_engine(engine_conf).strip + issues = run_engine(engine_conf).strip.split("\0") + result = issues.first.strip + json = JSON.parse(result) expect(json["type"]).to eq("issue") @@ -27,13 +28,13 @@ "path" => "foo.js", "lines" => { "begin" => 1, "end" => 1 }, }) - expect(json["remediation_points"]).to eq(297000) + expect(json["remediation_points"]).to eq(33_000) expect(json["other_locations"]).to eq([ {"path" => "foo.js", "lines" => { "begin" => 2, "end" => 2} }, {"path" => "foo.js", "lines" => { "begin" => 3, "end" => 3} } ]) - expect(json["content"]["body"]).to match /This issue has a mass of `99`/ - expect(json["fingerprint"]).to eq("55ae5d0990647ef496e9e0d315f9727d") + expect(json["content"]["body"]).to match /This issue has a mass of `11`/ + expect(json["fingerprint"]).to eq("c4d29200c20d02297c6f550ad2c87c15") end it "prints an issue for similar code" do @@ -43,7 +44,9 @@ console.log("helllllllllllllllllo JS!"); EOJS - result = run_engine(engine_conf).strip + issues = run_engine(engine_conf).strip.split("\0") + result = issues.first.strip + json = JSON.parse(result) expect(json["type"]).to eq("issue") @@ -54,13 +57,13 @@ "path" => "foo.js", "lines" => { "begin" => 1, "end" => 1 }, }) - expect(json["remediation_points"]).to eq(99000) + expect(json["remediation_points"]).to eq(33_000) expect(json["other_locations"]).to eq([ {"path" => "foo.js", "lines" => { "begin" => 2, "end" => 2} }, {"path" => "foo.js", "lines" => { "begin" => 3, "end" => 3} } ]) - expect(json["content"]["body"]).to match /This issue has a mass of `33`/ - expect(json["fingerprint"]).to eq("55ae5d0990647ef496e9e0d315f9727d") + expect(json["content"]["body"]).to match /This issue has a mass of `11`/ + expect(json["fingerprint"]).to eq("d9dab8e4607e2a74da3b9eefb49eacec") end it "skips unparsable files" do @@ -91,8 +94,8 @@ Login EOJSX - result = run_engine(engine_conf).strip - issues = result.split("\0") + issues = run_engine(engine_conf).strip.split("\0") + expect(issues.length).to eq 1 end diff --git a/spec/cc/engine/analyzers/php/main_spec.rb b/spec/cc/engine/analyzers/php/main_spec.rb index c6898473..209a8106 100644 --- a/spec/cc/engine/analyzers/php/main_spec.rb +++ b/spec/cc/engine/analyzers/php/main_spec.rb @@ -1,9 +1,8 @@ +require 'spec_helper' require 'cc/engine/analyzers/php/main' require 'cc/engine/analyzers/reporter' require 'cc/engine/analyzers/engine_config' require 'cc/engine/analyzers/file_list' -require 'flay' -require 'tmpdir' RSpec.describe CC::Engine::Analyzers::Php::Main, in_tmpdir: true do include AnalyzerSpecHelpers @@ -29,7 +28,9 @@ } EOPHP - result = run_engine(engine_conf).strip + issues = run_engine(engine_conf).strip.split("\0") + result = issues.first.strip + json = JSON.parse(result) expect(json["type"]).to eq("issue") @@ -40,17 +41,18 @@ "path" => "foo.php", "lines" => { "begin" => 2, "end" => 6 }, }) - expect(json["remediation_points"]).to eq(176000) + expect(json["remediation_points"]).to eq(44_000) expect(json["other_locations"]).to eq([ {"path" => "foo.php", "lines" => { "begin" => 10, "end" => 14} }, ]) - expect(json["content"]["body"]).to match /This issue has a mass of `44`/ - expect(json["fingerprint"]).to eq("667da0e2bab866aa2fe9d014a65d57d9") + expect(json["content"]["body"]).to match /This issue has a mass of `11`/ + expect(json["fingerprint"]).to eq("8234e10d96fd6ef608085c22c91c9ab1") end it "runs against complex files" do FileUtils.cp(fixture_path("symfony_configuration.php"), File.join(@code, "configuration.php")) - result = run_engine(engine_conf).strip + issues = run_engine(engine_conf).strip.split("\0") + result = issues.first.strip expect(result).to match "\"type\":\"issue\"" end @@ -91,11 +93,6 @@ end end - def printed_issue - issue = {"type":"issue","check_name":"Identical code","description":"Similar code found in 1 other location","categories":["Duplication"],"location":{"path":"foo.php","lines":{"begin":2,"end":6}},"remediation_points":176000,"other_locations":[{"path":"foo.php","lines":{"begin":10,"end":14}}],"content":{"body": read_up}} - issue.to_json + "\0\n" - end - def engine_conf CC::Engine::Analyzers::EngineConfig.new({ 'config' => { diff --git a/spec/cc/engine/analyzers/python/main_spec.rb b/spec/cc/engine/analyzers/python/main_spec.rb index 9c3da701..f4514678 100644 --- a/spec/cc/engine/analyzers/python/main_spec.rb +++ b/spec/cc/engine/analyzers/python/main_spec.rb @@ -1,9 +1,7 @@ -require "spec_helper" -require "cc/engine/analyzers/python/main" +require 'spec_helper' +require 'cc/engine/analyzers/python/main' require 'cc/engine/analyzers/engine_config' require 'cc/engine/analyzers/file_list' -require "flay" -require "tmpdir" RSpec.describe CC::Engine::Analyzers::Python::Main, in_tmpdir: true do include AnalyzerSpecHelpers @@ -16,7 +14,9 @@ print("Hello", "python") EOJS - result = run_engine(engine_conf).strip + issues = run_engine(engine_conf).strip.split("\0") + result = issues.first.strip + json = JSON.parse(result) expect(json["type"]).to eq("issue") @@ -27,13 +27,13 @@ "path" => "foo.py", "lines" => { "begin" => 1, "end" => 1 }, }) - expect(json["remediation_points"]).to eq(54000) + expect(json["remediation_points"]).to eq(1_600_000) expect(json["other_locations"]).to eq([ {"path" => "foo.py", "lines" => { "begin" => 2, "end" => 2} }, {"path" => "foo.py", "lines" => { "begin" => 3, "end" => 3} } ]) - expect(json["content"]["body"]).to match /This issue has a mass of `54`/ - expect(json["fingerprint"]).to eq("42b832387c997f54a2012efb2159aefc") + expect(json["content"]["body"]).to match /This issue has a mass of `6`/ + expect(json["fingerprint"]).to eq("3f3d34361bcaef98839d9da6ca9fcee4") end it "prints an issue for similar code" do @@ -43,7 +43,9 @@ print("Hello from the other side", "python") EOJS - result = run_engine(engine_conf).strip + issues = run_engine(engine_conf).strip.split("\0") + result = issues.first.strip + json = JSON.parse(result) expect(json["type"]).to eq("issue") @@ -54,13 +56,13 @@ "path" => "foo.py", "lines" => { "begin" => 1, "end" => 1 }, }) - expect(json["remediation_points"]).to eq(18000) + expect(json["remediation_points"]).to eq(1_600_000) expect(json["other_locations"]).to eq([ {"path" => "foo.py", "lines" => { "begin" => 2, "end" => 2} }, {"path" => "foo.py", "lines" => { "begin" => 3, "end" => 3} } ]) - expect(json["content"]["body"]).to match /This issue has a mass of `18`/ - expect(json["fingerprint"]).to eq("42b832387c997f54a2012efb2159aefc") + expect(json["content"]["body"]).to match /This issue has a mass of `6`/ + expect(json["fingerprint"]).to eq("019118ceed60bf40b35aad581aae1b02") end diff --git a/spec/cc/engine/analyzers/ruby/main_spec.rb b/spec/cc/engine/analyzers/ruby/main_spec.rb index b3af3bac..18685c94 100644 --- a/spec/cc/engine/analyzers/ruby/main_spec.rb +++ b/spec/cc/engine/analyzers/ruby/main_spec.rb @@ -1,8 +1,7 @@ +require 'spec_helper' require 'cc/engine/analyzers/ruby/main' require 'cc/engine/analyzers/engine_config' require 'cc/engine/analyzers/file_list' -require 'flay' -require 'tmpdir' module CC::Engine::Analyzers RSpec.describe Ruby::Main, in_tmpdir: true do @@ -28,7 +27,9 @@ module CC::Engine::Analyzers end EORUBY - result = run_engine(engine_conf).strip + issues = run_engine(engine_conf).strip.split("\0") + result = issues.first.strip + json = JSON.parse(result) expect(json["type"]).to eq("issue") @@ -39,12 +40,44 @@ module CC::Engine::Analyzers "path" => "foo.rb", "lines" => { "begin" => 1, "end" => 5 }, }) - expect(json["remediation_points"]).to eq(3300000) + expect(json["remediation_points"]).to eq(1_500_000) expect(json["other_locations"]).to eq([ {"path" => "foo.rb", "lines" => { "begin" => 9, "end" => 13} }, ]) - expect(json["content"]["body"]).to match /This issue has a mass of `36`/ - expect(json["fingerprint"]).to eq("f21b75bbd135ec3ae6638364d5c73762") + expect(json["content"]["body"]).to match /This issue has a mass of `18`/ + expect(json["fingerprint"]).to eq("b7e46d8f5164922678e48942e26100f2") + end + + it "creates an issue for each occurrence of the duplicated code" do + create_source_file("foo.rb", <<-EORUBY) + describe '#ruby?' do + before { subject.type = 'ruby' } + + it 'returns true' do + expect(subject.ruby?).to be true + end + end + + describe '#js?' do + before { subject.type = 'js' } + + it 'returns true' do + expect(subject.js?).to be true + end + end + + describe '#whaddup?' do + before { subject.type = 'js' } + + it 'returns true' do + expect(subject.js?).to be true + end + end + EORUBY + + issues = run_engine(engine_conf).strip.split("\0") + + expect(issues.length).to eq(3) end it "skips unparsable files" do @@ -58,43 +91,43 @@ module CC::Engine::Analyzers end end - describe "#calculate_points(issue)" do + describe "#calculate_points(mass)" do let(:analyzer) { Ruby::Main.new(engine_config: engine_conf) } let(:base_points) { Ruby::Main::BASE_POINTS } let(:points_per) { Ruby::Main::POINTS_PER_OVERAGE } let(:threshold) { Ruby::Main::DEFAULT_MASS_THRESHOLD } - context "when issue mass exceeds threshold" do + context "when mass exceeds threshold" do it "calculates mass overage points" do - issue = double(:issue, mass: threshold + 10) - overage = issue.mass - threshold + mass = threshold + 10 + overage = mass - threshold expected_points = base_points + overage * points_per - points = analyzer.calculate_points(issue) + points = analyzer.calculate_points(mass) expect(points).to eq(expected_points) end end - context "when issue mass is less than threshold" do + context "when mass is less than threshold" do it "calculates mass overage points" do - issue = double(:issue, mass: threshold - 5) - overage = issue.mass - threshold + mass = threshold - 5 + overage = mass - threshold expected_points = base_points + overage * points_per - points = analyzer.calculate_points(issue) + points = analyzer.calculate_points(mass) expect(points).to eq(expected_points) end end - context "when issue mass equals threshold" do + context "when mass equals threshold" do it "calculates mass overage points" do - issue = double(:issue, mass: threshold) - overage = issue.mass - threshold + mass = threshold + overage = mass - threshold expected_points = base_points + overage * points_per - points = analyzer.calculate_points(issue) + points = analyzer.calculate_points(mass) expect(points).to eq(expected_points) end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 17c03b37..65f7c130 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,6 +1,8 @@ require 'bundler/setup' +require 'flay' require 'tmpdir' + Dir[File.dirname(__FILE__) + "/support/**/*.rb"].each {|f| require f } RSpec.configure do |config|