From 09e76147b96480418977b0bf1963baf91487b4d3 Mon Sep 17 00:00:00 2001 From: Jonathan Hefner Date: Sat, 21 Oct 2023 13:10:20 -0500 Subject: [PATCH] Exclude downcased acronym bigrams This prevents some unexpected results. For example, matching `ActionCable::Connection::StreamEventLoop` when searching for "select" due to "S"tream"E"vent"L"oop. --- lib/sdoc/search_index.rb | 4 ++-- spec/search_index_spec.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/sdoc/search_index.rb b/lib/sdoc/search_index.rb index 0ca0f193..1b3063f6 100644 --- a/lib/sdoc/search_index.rb +++ b/lib/sdoc/search_index.rb @@ -36,14 +36,14 @@ def derive_bigrams(name) # Example: "ActiveSupport::Cache::Store" => ":ActiveSupport:Cache:Store" strings = [":#{name}".gsub("::", ":")] + # Example: ":ActiveModel:API" => ":activemodel:api" + strings.concat(strings.map(&:downcase)) # Example: ":ActiveSupport:HashWithIndifferentAccess" => ":AS:HWIA" strings.concat(strings.map { |string| string.gsub(/([A-Z])[a-z]+/, '\1') }) # Example: ":AbstractController:Base#action_name" => " AbstractController Base action_name" strings.concat(strings.map { |string| string.tr(":#", " ") }) # Example: ":AbstractController:Base#action_name" => ":AbstractController:Base#actionname" strings.concat(strings.map { |string| string.tr("_", "") }) - # Example: ":ActiveModel:API" => ":activemodel:api" - strings.concat(strings.map(&:downcase)) # Example: ":ActiveModel:Name#<=>" => [":ActiveModel", ":Name", "#<=>"] strings.map! { |string| string.split(/(?=[ :#])/) }.flatten! diff --git a/spec/search_index_spec.rb b/spec/search_index_spec.rb index c7934bae..d240d71f 100644 --- a/spec/search_index_spec.rb +++ b/spec/search_index_spec.rb @@ -99,10 +99,10 @@ def hoge_fuga; end _(bigrams).wont_include "GR" end - it "includes downcased bigrams" do + it "includes downcased bigrams except for acronym bigrams" do bigrams = SDoc::SearchIndex.derive_bigrams("AbcDefGhi::RstUvwXyz") - bigrams.grep(/[A-Z]/).each do |uppercase| + bigrams.grep(/[A-Z]/).grep_v(/[A-Z]{2}/).each do |uppercase| _(bigrams).must_include uppercase.downcase end end