test(indexer): Adding tests for the dedicated index

This commit is contained in:
Pixelastic 2018-04-03 19:12:48 +02:00
parent 0da7131818
commit 40c113d587
4 changed files with 809 additions and 332 deletions

View File

@ -49,7 +49,7 @@ module Jekyll
invalid_credentials invalid_credentials
record_too_big record_too_big
too_many_records too_many_records
unknown_settings unknown_setting
invalid_index_name invalid_index_name
] ]
@ -135,6 +135,24 @@ module Jekyll
hash hash
end end
# Public: Returns a string explaining which attributes are the largest in
# the record
#
# record - The record hash to analyze
#
# This will be used on the `record_too_big` error, to guide users in
# finding which record is causing trouble
def self.readable_largest_record_keys(record)
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
output = []
largest_keys.each do |key, size|
size = Filesize.from("#{size} B").to_s('Kb')
output << "#{key} (#{size})"
end
output.join(', ')
end
# Public: Check if the application id is available # Public: Check if the application id is available
# #
# _context - Not used # _context - Not used
@ -162,6 +180,7 @@ module Jekyll
# Application ID and API key submitted don't match any credentials known # Application ID and API key submitted don't match any credentials known
def self.invalid_credentials?(error, _context = {}) def self.invalid_credentials?(error, _context = {})
details = error_hash(error.message) details = error_hash(error.message)
return false if details == false
if details['message'] != 'Invalid Application-ID or API key' if details['message'] != 'Invalid Application-ID or API key'
return false return false
@ -172,24 +191,6 @@ module Jekyll
} }
end end
# Public: Returns a string explaining which attributes are the largest in
# the record
#
# record - The record hash to analyze
#
# This will be used on the `record_too_big` error, to guide users in
# finding which record is causing trouble
def self.readable_largest_record_keys(record)
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
output = []
largest_keys.each do |key, size|
size = Filesize.from("#{size} B").to_s('Kb')
output << "#{key} (#{size})"
end
output.join(', ')
end
# Public: Check if the sent records are not too big # Public: Check if the sent records are not too big
# #
# context[:records] - list of records sent in the batch # context[:records] - list of records sent in the batch
@ -199,6 +200,7 @@ module Jekyll
# informations about it so the user can debug it. # informations about it so the user can debug it.
def self.record_too_big?(error, context = {}) def self.record_too_big?(error, context = {})
details = error_hash(error.message) details = error_hash(error.message)
return false if details == false
message = details['message'] message = details['message']
return false if message !~ /^Record .* is too big .*/ return false if message !~ /^Record .* is too big .*/
@ -208,8 +210,11 @@ module Jekyll
size = Filesize.from("#{size} B").to_s('Kb') size = Filesize.from("#{size} B").to_s('Kb')
object_id = details['objectID'] object_id = details['objectID']
# Getting record details # Finding the record in all the operations
record = Utils.find_by_key(context[:records], :objectID, object_id) operation = context[:operations].find do |o|
o[:action] == 'addObject' && o[:body][:objectID] == object_id
end
record = operation[:body]
probable_wrong_keys = readable_largest_record_keys(record) probable_wrong_keys = readable_largest_record_keys(record)
# Writing the full record to disk for inspection # Writing the full record to disk for inspection
@ -237,8 +242,9 @@ module Jekyll
# The API will block any call that tries to update a setting value that is # The API will block any call that tries to update a setting value that is
# not available. We'll tell the user which one so they can fix their # not available. We'll tell the user which one so they can fix their
# issue. # issue.
def self.unknown_settings?(error, context = {}) def self.unknown_setting?(error, context = {})
details = error_hash(error.message) details = error_hash(error.message)
return false if details == false
message = details['message'] message = details['message']
return false if message !~ /^Invalid object attributes.*/ return false if message !~ /^Invalid object attributes.*/
@ -259,6 +265,7 @@ module Jekyll
# Some characters are forbidden in index names # Some characters are forbidden in index names
def self.invalid_index_name?(error, _context = {}) def self.invalid_index_name?(error, _context = {})
details = error_hash(error.message) details = error_hash(error.message)
return false if details == false
message = details['message'] message = details['message']
return false if message !~ /^indexName is not valid.*/ return false if message !~ /^indexName is not valid.*/
@ -273,6 +280,7 @@ module Jekyll
# We're trying to push too many records and it goes over quota # We're trying to push too many records and it goes over quota
def self.too_many_records?(error, _context = {}) def self.too_many_records?(error, _context = {})
details = error_hash(error.message) details = error_hash(error.message)
return false if details == false
message = details['message'] message = details['message']
return false if message !~ /^Record quota exceeded.*/ return false if message !~ /^Record quota exceeded.*/

View File

@ -31,7 +31,6 @@ module Jekyll
end end
# Public: Returns the Algolia index used to store object ids # Public: Returns the Algolia index used to store object ids
# TOTEST
def self.index_object_ids def self.index_object_ids
@index_object_ids @index_object_ids
end end
@ -42,7 +41,6 @@ module Jekyll
# #
# Note: there is no API endpoint to do that, so we try to get the settings # Note: there is no API endpoint to do that, so we try to get the settings
# instead, which will fail if the index does not exist # instead, which will fail if the index does not exist
# TOTEST
def self.index_exist?(index) def self.index_exist?(index)
index.get_settings index.get_settings
true true
@ -50,6 +48,22 @@ module Jekyll
false false
end end
# Public: Get the number of records in an index
#
# index - Index to check
#
# Note: We'll do an empty query search, to match everything, but we'll
# only return the objectID and one element, to get the shortest response
# possible. It will still contain the nbHits
def self.record_count(index)
index.search(
'',
attributesToRetrieve: 'objectID',
distinct: false,
hitsPerPage: 1
)['nbHits']
end
# Public: Set the User-Agent to send to the API # Public: Set the User-Agent to send to the API
# #
# Every integrations should follow the "YYY Integration" pattern, and # Every integrations should follow the "YYY Integration" pattern, and
@ -67,55 +81,65 @@ module Jekyll
::Algolia.set_extra_header('User-Agent', user_agent) ::Algolia.set_extra_header('User-Agent', user_agent)
end end
# Public: Get an array of all object IDs stored in the main index
#
# Note: As this will be slow (grabbing them 1000 at a time), we display
# a progress bar.
def self.remote_object_ids_from_main_index
Logger.verbose("I:Inspecting existing records in index #{index.name}")
list = []
# As it might take some time, we display a progress bar
progress_bar = ProgressBar.create(
total: record_count(index),
format: 'Inspecting existing records (%j%%) |%B|'
)
begin
index.browse(
attributesToRetrieve: 'objectID',
hitsPerPage: 1000
) do |hit|
list << hit['objectID']
progress_bar.increment
end
end
list.sort
end
# Public: Get an array of all the object ids, stored in the dedicated
# index
#
# Note: This will be very fast. Each record contain 100 object id, so it
# will fit in one call each time.
def self.remote_object_ids_from_dedicated_index
list = []
begin
index_object_ids.browse(
attributesToRetrieve: 'content',
hitsPerPage: 1000
) do |hit|
list += hit['content']
end
end
list.sort
end
# Public: Returns an array of all the objectIDs in the index # Public: Returns an array of all the objectIDs in the index
# #
# Note: We use a dedicated index to store the objectIDs for faster # Note: We use a dedicated index to store the objectIDs for faster
# browsing, but if the index does not exist we read the main index. # browsing, but if the index does not exist we read the main index.
def self.remote_object_ids def self.remote_object_ids
Logger.log('I:Getting list of existing records') Logger.log('I:Getting list of existing records')
list = []
hits_per_page = 1000
# Fast version, using the dedicated index
has_dedicated_index = index_exist?(index_object_ids) has_dedicated_index = index_exist?(index_object_ids)
if has_dedicated_index return remote_object_ids_from_dedicated_index if has_dedicated_index
begin
index_object_ids.browse(
attributesToRetrieve: 'content',
hitsPerPage: hits_per_page
) do |hit|
list += hit['content']
end
end
else
# Slow versio, browsing the full index
Logger.verbose(
"I:Inspecting existing records in index #{index.name}..."
)
# As it might take some time, we display a progress bar # Slow version, browsing the full index
max_hits = index.search( remote_object_ids_from_main_index
'',
attributesToRetrieve: 'objectID',
distinct: false,
hitsPerPage: 1
)['nbHits']
progress_bar = ProgressBar.create(
total: max_hits,
format: 'Inspecting existing records (%j%%) |%B|'
)
begin
index.browse(
attributesToRetrieve: 'objectID',
hitsPerPage: hits_per_page
) do |hit|
list << hit['objectID']
progress_bar.increment
end
end
end
list.sort
end end
# Public: Returns an array of the local objectIDs # Public: Returns an array of the local objectIDs
@ -137,41 +161,43 @@ module Jekyll
remote_ids = remote_object_ids remote_ids = remote_object_ids
local_ids = local_object_ids(records) local_ids = local_object_ids(records)
# We need the list of objectIDs for deletion # Making a diff, to see what to add and what to delete
old_records_ids = remote_ids - local_ids ids_to_delete = remote_ids - local_ids
ids_to_add = local_ids - remote_ids
# For addition, we need the full records
# We build a first hash of all the records we have, with access by
new_records_ids = local_ids - remote_ids
cache = Hash[records.map { |record| [record[:objectID], record] }]
new_records = new_records_ids.map { |id| cache[id] }
# Stop if nothing to change # Stop if nothing to change
if old_records_ids.empty? && new_records.empty? if ids_to_delete.empty? && ids_to_add.empty?
Logger.log('I:Content is already up to date.') Logger.log('I:Content is already up to date.')
return return
end end
Logger.log("I:Updating records in index #{index.name}...") Logger.log("I:Updating records in index #{index.name}...")
Logger.log("I:Records to delete: #{old_records_ids.length}") Logger.log("I:Records to delete: #{ids_to_delete.length}")
Logger.log("I:Records to add: #{new_records.length}") Logger.log("I:Records to add: #{ids_to_add.length}")
return if Configurator.dry_run? return if Configurator.dry_run?
# We group delete and add operations into the same batch. Delete # Transforming ids into real records to add
# operations should still come first, to avoid hitting an overquota too records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
# soon records_to_add = ids_to_add.map { |id| records_by_id[id] }
# We group all operations into one batch
operations = [] operations = []
old_records_ids.each do |object_id|
# Deletion operations come first, to avoid hitting an overquota too soon
# if it can be avoided
ids_to_delete.each do |object_id|
operations << { operations << {
action: 'deleteObject', indexName: index.name, action: 'deleteObject', indexName: index.name,
body: { objectID: object_id } body: { objectID: object_id }
} }
end end
operations += new_records.map do |new_record| # Then we add the new records
operations += records_to_add.map do |new_record|
{ action: 'addObject', indexName: index.name, body: new_record } { action: 'addObject', indexName: index.name, body: new_record }
end end
# We batch as well the update to the index holding the object ids # We also clear the dedicated index holding the object ids and push the
# new list of ids
operations << { action: 'clear', indexName: index_object_ids.name } operations << { action: 'clear', indexName: index_object_ids.name }
local_ids.each_slice(100).each do |ids| local_ids.each_slice(100).each do |ids|
operations << { operations << {
@ -183,6 +209,12 @@ module Jekyll
execute_operations(operations) execute_operations(operations)
end end
# Public: Execute a serie of operations in a batch
#
# operations - Operations to batch
#
# Note: Will split the batch in several calls if too big, and will display
# a progress bar if this happens
def self.execute_operations(operations) def self.execute_operations(operations)
# Run the batches in slices if they are too large # Run the batches in slices if they are too large
batch_size = Configurator.algolia('indexing_batch_size') batch_size = Configurator.algolia('indexing_batch_size')
@ -202,10 +234,7 @@ module Jekyll
progress_bar.increment if should_have_progress_bar progress_bar.increment if should_have_progress_bar
rescue StandardError => error rescue StandardError => error
records = slice.map do |record| ErrorHandler.stop(error, operations: slice)
record[:body]
end
ErrorHandler.stop(error, records: records)
end end
end end
end end

View File

@ -7,6 +7,7 @@ describe(Jekyll::Algolia::ErrorHandler) do
let(:current) { Jekyll::Algolia::ErrorHandler } let(:current) { Jekyll::Algolia::ErrorHandler }
let(:configurator) { Jekyll::Algolia::Configurator } let(:configurator) { Jekyll::Algolia::Configurator }
let(:logger) { Jekyll::Algolia::Logger } let(:logger) { Jekyll::Algolia::Logger }
let(:utils) { Jekyll::Algolia::Utils }
describe '.stop' do describe '.stop' do
subject { -> { current.stop(error) } } subject { -> { current.stop(error) } }
@ -37,6 +38,64 @@ describe(Jekyll::Algolia::ErrorHandler) do
end end
end end
describe '.identify' do
let(:error) { double('Error') }
let(:context) { 'context' }
subject { current.identify(error, context) }
before do
allow(current).to receive(:unknown_application_id?).and_return(false)
allow(current).to receive(:invalid_credentials?).and_return(false)
allow(current).to receive(:record_too_big?).and_return(false)
allow(current).to receive(:too_many_records?).and_return(false)
allow(current).to receive(:unknown_setting?).and_return(false)
allow(current).to receive(:invalid_index_name?).and_return(false)
end
it 'should return false if nothing matches' do
should eq false
end
describe 'should call all methods with error and context' do
before do
current.identify(error, context)
end
it do
expect(current)
.to have_received(:unknown_application_id?)
.with(error, context)
expect(current)
.to have_received(:invalid_credentials?)
.with(error, context)
expect(current)
.to have_received(:record_too_big?)
.with(error, context)
expect(current)
.to have_received(:too_many_records?)
.with(error, context)
expect(current)
.to have_received(:unknown_setting?)
.with(error, context)
expect(current)
.to have_received(:invalid_index_name?)
.with(error, context)
end
end
describe 'should return the result of one if matches' do
before do
allow(current)
.to receive(:too_many_records?)
.and_return('foo')
end
it do
should eq(name: 'too_many_records', details: 'foo')
end
end
end
describe '.error_hash' do describe '.error_hash' do
subject { current.error_hash(message) } subject { current.error_hash(message) }
@ -105,28 +164,31 @@ describe(Jekyll::Algolia::ErrorHandler) do
it { should eq 'baz (100.00 Kb), bar (10.00 Kb), foo (1.00 Kb)' } it { should eq 'baz (100.00 Kb), bar (10.00 Kb), foo (1.00 Kb)' }
end end
describe '.identify' do describe '.unknown_application_id?' do
subject { current.identify(error, context) }
let(:error) { double('Error', message: message) } let(:error) { double('Error', message: message) }
let(:context) { {} }
context 'with unknown application_id' do subject { current.unknown_application_id?(error) }
describe 'not matching' do
let(:message) { 'foo bar' }
it { should eq false }
end
describe 'matching' do
let(:message) do let(:message) do
# rubocop:disable Metrics/LineLength # rubocop:disable Metrics/LineLength
'Cannot reach any host: '\ 'Cannot reach any host: '\
'getaddrinfo: Name or service not known (MY_APP_ID-dsn.algolia.net:443), '\ 'getaddrinfo: Name or service not known (MY_APP_ID-dsn.algolia.net:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-3.algolianet.com:443), '\ 'getaddrinfo: No address associated with hostname (MY_APP_ID-3.algolianet.com:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-1.algolianet.com:443), '\ 'getaddrinfo: No address associated with hostname (MY_APP_ID-1.algolianet.com:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-2.algolianet.com:443)' 'getaddrinfo: No address associated with hostname (MY_APP_ID-2.algolianet.com:443)'
# rubocop:enable Metrics/LineLength # rubocop:enable Metrics/LineLength
end end
it { should include(name: 'unknown_application_id') } it { should eq('application_id' => 'MY_APP_ID') }
it { should include(details: { 'application_id' => 'MY_APP_ID' }) }
end end
context 'with unknown application_id and no DSN' do describe 'matching with a DSN' do
let(:message) do let(:message) do
# rubocop:disable Metrics/LineLength # rubocop:disable Metrics/LineLength
'Cannot reach any host: '\ 'Cannot reach any host: '\
@ -137,138 +199,224 @@ describe(Jekyll::Algolia::ErrorHandler) do
# rubocop:enable Metrics/LineLength # rubocop:enable Metrics/LineLength
end end
it { should include(name: 'unknown_application_id') } it { should eq('application_id' => 'MY_APP_ID') }
it { should include(details: { 'application_id' => 'MY_APP_ID' }) } end
end
describe '.invalid_credentials?' do
let(:error) { double('Error').as_null_object }
subject { current.invalid_credentials?(error) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
end
describe 'not matching' do
let(:error_hash) { false }
it { should eq false }
end end
context 'with wrong API key' do context 'with wrong API key' do
before do let(:error_hash) do
allow(configurator) {
.to receive(:index_name) 'message' => 'Invalid Application-ID or API key',
.and_return('my_index') 'application_id' => 'MY_APP_ID'
}
end end
let(:message) do it { should eq('application_id' => 'MY_APP_ID') }
'Cannot POST to '\ end
'https://MY_APP_ID.algolia.net/1/indexes/my_index/batch: '\ end
'{"message":"Invalid Application-ID or API key","status":403}'\
"\n (403)" describe '.record_too_big?' do
let(:error) { double('Error').as_null_object }
let(:error_hash) do
{
'message' => 'Record at the position 3 '\
'objectID=deadbeef is too big size=109196 bytes. '\
'Contact us if you need an extended quota',
'objectID' => 'object_id'
}
end
let(:context) do
{
operations: [
{
action: 'deleteObject',
body: { objectID: 'object_to_delete' }
},
{
action: 'addObject',
body: { objectID: 'object_id', title: 'foo', url: 'url' }
},
{
action: 'clear'
},
{
action: 'addObject',
body: { content: %w[object_id1 object_id2] }
}
]
}
end
subject { current.record_too_big?(error, context) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
allow(utils).to receive(:find_by_key).and_return({})
allow(current).to receive(:readable_largest_record_keys)
allow(logger).to receive(:write_to_file)
end
describe 'wrongly formatted message' do
let(:error_hash) { false }
it { should eq false }
end
describe 'not matching' do
let(:error_hash) { { 'message' => 'foo bar' } }
it { should eq false }
end
it 'should get information from message' do
should include('object_id' => 'object_id')
should include('size' => '109.20 Kb')
should include('size_limit' => '10 Kb')
end
describe 'includes the nodes to index' do
before do
allow(configurator).to receive(:algolia).and_return('nodes')
end end
it { should include(name: 'invalid_credentials') }
it do it do
should include(details: { should include('nodes_to_index' => 'nodes')
'application_id' => 'MY_APP_ID'
})
end end
end end
context 'with a record too big' do describe 'includes information about the bad record' do
let(:message) do
'400: Cannot POST to '\
'https://MY_APP_ID.algolia.net/1/indexes/*/batch: '\
'{"message":"Record at the position 3 '\
'objectID=deadbeef is too big size=109196 bytes. '\
'Contact us if you need an extended quota","position":3,'\
'"objectID":"deadbeef","status":400} (400)'
end
let(:context) do
{ records: [
{
objectID: 'deadbeef',
title: 'Page title',
url: '/path/to/file.ext',
# rubocop:disable Metrics/LineLength
content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as well so we will cut it at 100 characters.'
# rubocop:enable Metrics/LineLength
},
{ objectID: 'foo' }
] }
end
let(:record_log_path) { '/source/output.log' }
let(:probable_wrong_keys) { 'foo, bar, baz' }
before do before do
allow(configurator) allow(current)
.to receive(:algolia) .to receive(:readable_largest_record_keys)
.with('nodes_to_index') .and_return('wrong_keys')
.and_return('p,li,foo') end
it do
should include('object_title' => 'foo')
should include('object_url' => 'url')
should include('probable_wrong_keys' => 'wrong_keys')
end
end
describe 'save log file' do
before do
expect(::JSON)
.to receive(:pretty_generate)
.with(objectID: 'object_id', title: 'foo', url: 'url')
.and_return('{json}')
expect(logger) expect(logger)
.to receive(:write_to_file) .to receive(:write_to_file)
.and_return(record_log_path) .with(
expect(current) 'jekyll-algolia-record-too-big-object_id.log',
.to receive(:readable_largest_record_keys) '{json}'
.and_return(probable_wrong_keys) )
.and_return('/path/to/file.log')
end end
it { should include(name: 'record_too_big') } it 'should return the path of the log file in the output' do
should include('record_log_path' => '/path/to/file.log')
it do
details = subject[:details]
expect(details).to include('object_id' => 'deadbeef')
expect(details).to include('object_title' => 'Page title')
expect(details).to include('object_url' => '/path/to/file.ext')
expect(details).to include('record_log_path' => record_log_path)
expect(details).to include('probable_wrong_keys' => probable_wrong_keys)
expect(details).to include('size' => '109.20 Kb')
expect(details).to include('size_limit' => '10 Kb')
expect(details).to include('nodes_to_index' => 'p,li,foo')
end end
end end
end
context 'with an unknown setting' do describe '.unknown_setting?' do
let(:message) do let(:error) { double('Error').as_null_object }
# rubocop:disable Metrics/LineLength let(:context) do
'400: Cannot PUT to '\ {
'https://MY_APP_ID.algolia.net/1/indexes/my_index/settings: '\ settings: {
'{"message":"Invalid object attributes: deadbeef near line:1 column:456",'\ 'iDontExist' => 'foo'
'"status":400} (400)' }
# rubocop:enable Metrics/LineLength }
end
let(:context) do
{ settings:
{
'searchableAttributes' => %w[foo bar],
'deadbeef' => 'foofoo'
} }
end
it { should include(name: 'unknown_settings') }
it do
details = subject[:details]
expect(details).to include('setting_name' => 'deadbeef')
expect(details).to include('setting_value' => 'foofoo')
end
end end
context 'with an invalid index name' do subject { current.unknown_setting?(error, context) }
before do
allow(configurator)
.to receive(:index_name)
.and_return('invalid_index_name')
end
let(:message) do
# rubocop:disable Metrics/LineLength
'400: Cannot GET to '\
'https://MY_APP_ID-dsn.algolia.net/1/indexes/invalid_index_name/settings?getVersion=2: '\
'{"message":"indexName is not valid","status":400} (400)'
# rubocop:enable Metrics/LineLength
end
it { should include(name: 'invalid_index_name') } before do
it do allow(current).to receive(:error_hash).and_return(error_hash)
details = subject[:details]
expect(details).to include('index_name' => 'invalid_index_name')
end
end end
context 'with too many record' do describe 'not matching' do
let(:message) do let(:error_hash) { false }
'403: Cannot POST to '\ it { should eq false }
'https://MY_APP_ID.algolia.net/1/indexes/*/batch: '\ end
'{"message":"Record quota exceeded, change plan or delete records.",'\
'"status":403} (403)'
end
it { should include(name: 'too_many_records') } context 'with non-existent setting' do
let(:error_hash) do
{
'message' => 'Invalid object attributes: iDontExist '\
'near line:1 column:456'
}
end
it do
should include('setting_name' => 'iDontExist')
should include('setting_value' => 'foo')
end
end
end
describe '.invalid_index_name?' do
let(:error) { double('Error').as_null_object }
subject { current.invalid_index_name?(error) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
allow(configurator).to receive(:index_name).and_return('my_index')
end
describe 'not matching' do
let(:error_hash) { false }
it { should eq false }
end
context 'with invalid index name' do
let(:error_hash) do
{
'message' => 'indexName is not valid'
}
end
it do
should include('index_name' => 'my_index')
end
end
end
describe '.too_many_records?' do
let(:error) { double('Error').as_null_object }
subject { current.too_many_records?(error) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
end
describe 'not matching' do
let(:error_hash) { false }
it { should eq false }
end
context 'with quota exceeded' do
let(:error_hash) do
{
'message' => 'Record quota exceeded, change plan or delete records.'
}
end
it do
should eq({})
end
end end
end end
end end

View File

@ -20,27 +20,67 @@ describe(Jekyll::Algolia::Indexer) do
allow(configurator).to receive(:application_id).and_return('app_id') allow(configurator).to receive(:application_id).and_return('app_id')
allow(configurator).to receive(:api_key).and_return('api_key') allow(configurator).to receive(:api_key).and_return('api_key')
allow(::Algolia).to receive(:init) allow(::Algolia).to receive(:init)
allow(::Algolia::Index) allow(::Algolia::Index).to receive(:new)
.to receive(:new)
.and_return(double('Algolia::Index', name: 'index_name'))
allow(current).to receive(:set_user_agent) allow(current).to receive(:set_user_agent)
end end
before { current.init } describe 'should instanciate Algolia with application id and api_key' do
before { current.init }
it 'should instanciate Algolia search with application id and api_key' do it do
expect(::Algolia) expect(::Algolia)
.to have_received(:init) .to have_received(:init)
.with(hash_including( .with(hash_including(
application_id: 'app_id', application_id: 'app_id',
api_key: 'api_key' api_key: 'api_key'
)) ))
end
end end
it 'should set the user agent' do
expect(current).to have_received(:set_user_agent) describe 'should set the user agent' do
before { current.init }
it do
expect(current).to have_received(:set_user_agent)
end
end end
it 'should make the index accessible' do
expect(current.index.name).to eq 'index_name' describe 'should make the index accessible' do
let(:index) { double('Algolia::Index') }
before do
allow(configurator)
.to receive(:index_name)
.and_return('index_name')
allow(::Algolia::Index)
.to receive(:new)
.with('index_name')
.and_return(index)
current.init
end
it do
expect(current.index).to eq index
end
end
describe 'should make the index for object ids accessible' do
let(:index_object_ids) { double('Algolia::Index') }
before do
allow(configurator)
.to receive(:index_name)
.and_return('index_name')
allow(::Algolia::Index)
.to receive(:new)
.with('index_name_object_ids')
.and_return(index_object_ids)
current.init
end
it do
expect(current.index_object_ids).to eq index_object_ids
end
end end
end end
@ -70,24 +110,165 @@ describe(Jekyll::Algolia::Indexer) do
end end
end end
describe '.remote_object_ids' do describe '.index_exist?' do
subject { current.remote_object_ids } let(:index) { double('Algolia::Index') }
describe 'when getting settings correctly' do
subject { current.index_exist?(index) }
before do
allow(index).to receive(:get_settings).and_return({})
end
it { should eq true }
end
describe 'when throwing an error on settings' do
subject { current.index_exist?(index) }
before do
allow(index).to receive(:get_settings).and_raise
end
it { should eq false }
end
end
describe '.record_count' do
let(:index) { double('Algolia::Index') }
let(:nb_hits) { 12 }
subject { current.record_count(index) }
before do
expect(index)
.to receive(:search)
.with(
'',
hash_including(
distinct: false, # To get the correct number of records
hitsPerPage: 1, # To get a short response
attributesToRetrieve: 'objectID', # To get a short response
)
)
.and_return('nbHits' => nb_hits)
end
it { should eq 12 }
end
describe '.remote_object_ids_from_main_index' do
let(:index) { double('Algolia::Index').as_null_object } let(:index) { double('Algolia::Index').as_null_object }
let(:progress_bar_instance) { double('ProgressBarInstance') }
subject { current.remote_object_ids_from_main_index }
before do before do
allow(current).to receive(:index).and_return(index) allow(current).to receive(:index).and_return(index)
expect(index) allow(current).to receive(:record_count)
allow(progress_bar).to receive(:create).and_return(progress_bar_instance)
allow(progress_bar_instance).to receive(:increment)
allow(index)
.to receive(:browse) .to receive(:browse)
.with(attributesToRetrieve: 'objectID')
.and_yield('objectID' => 'foo') .and_yield('objectID' => 'foo')
.and_yield('objectID' => 'bar') .and_yield('objectID' => 'bar')
end end
it { should include('foo') } it 'should return all objectID sorted' do
it { should include('bar') } should eq %w[bar foo]
# Should be ordered end
it { should eq %w[bar foo] }
describe 'should grab as many ids as possible' do
before do
current.remote_object_ids_from_main_index
end
it do
expect(index)
.to have_received(:browse)
.with(
attributesToRetrieve: 'objectID',
hitsPerPage: 1000
)
end
end
describe 'should display a progress bar' do
before do
allow(current).to receive(:record_count).and_return(12)
current.remote_object_ids_from_main_index
end
it do
expect(progress_bar)
.to have_received(:create)
.with(hash_including(
total: 12
))
expect(progress_bar_instance).to have_received(:increment).twice
end
end
end
describe '.remote_object_ids_from_dedicated_index' do
let(:index) { double('Algolia::Index') }
subject { current.remote_object_ids_from_dedicated_index }
before do
allow(current).to receive(:index_object_ids).and_return(index)
allow(index)
.to receive(:browse)
.and_yield('content' => %w[foo baz])
.and_yield('content' => ['bar'])
end
it 'should return all objectID sorted' do
should eq %w[bar baz foo]
end
describe 'should grab as many ids as possible' do
before do
current.remote_object_ids_from_dedicated_index
end
it do
expect(index)
.to have_received(:browse)
.with(
attributesToRetrieve: 'content',
hitsPerPage: 1000
)
end
end
end
describe '.remote_object_ids' do
subject { current.remote_object_ids }
before do
allow(current)
.to receive(:remote_object_ids_from_dedicated_index)
.and_return('dedicated')
allow(current)
.to receive(:remote_object_ids_from_main_index)
.and_return('main')
allow(current).to receive(:index_object_ids)
allow(current).to receive(:index_exist?).and_return(dedicated_index_exist)
end
describe 'from the main index' do
let(:dedicated_index_exist) { false }
it { should eq 'main' }
end
describe 'from the dedicated index' do
let(:dedicated_index_exist) { true }
it { should eq 'dedicated' }
end
end end
describe '.local_object_ids' do describe '.local_object_ids' do
@ -115,84 +296,168 @@ describe(Jekyll::Algolia::Indexer) do
describe '.update_records' do describe '.update_records' do
let(:index) { double('Algolia::Index', name: 'my_index') } let(:index) { double('Algolia::Index', name: 'my_index') }
let(:old_records_ids) { %w[abc] } let(:index_object_ids) do
let(:new_records) { [{ 'objectID' => 'def' }] } double('Algolia::Index', name: 'my_index_object_ids')
end
let(:remote_ids) { %w[bar baz] }
let(:records) do
[
{ objectID: 'foo', name: 'foo' },
{ objectID: 'bar', name: 'bar' }
]
end
before do
allow(current).to receive(:index).and_return(index)
allow(current).to receive(:index_object_ids).and_return(index_object_ids)
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:execute_operations)
end
context 'when nothing to update' do
let(:remote_ids) { [] }
before do
allow(current).to receive(:local_object_ids).and_return([])
current.update_records(records)
end
it do
expect(current)
.to_not have_received(:execute_operations)
end
end
context 'when running a dry run' do
let(:dry_run) { true }
before do
current.update_records(records)
end
it do
expect(current)
.to_not have_received(:execute_operations)
end
end
context 'batch operations' do
before do
current.update_records(records)
end
it 'should start with deleting old record' do
expected = {
action: 'deleteObject',
indexName: 'my_index',
body: { objectID: 'baz' }
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[0]).to eq expected
end
end
it 'should add new items after deleting old ones' do
expected = {
action: 'addObject',
indexName: 'my_index',
body: { objectID: 'foo', name: 'foo' }
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[1]).to eq expected
end
end
it 'should clear the object id index after updating the record' do
expected = {
action: 'clear',
indexName: 'my_index_object_ids'
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[2]).to eq expected
end
end
it 'should add new objectIDs to the dedicated index' do
expected = {
action: 'addObject',
indexName: 'my_index_object_ids',
body: { content: %w[bar foo] }
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[3]).to eq expected
end
end
end
context 'storing ids by group of 100' do
let(:records) do
records = []
150.times { |i| records << { objectID: "foo-#{i}" } }
records
end
before do
current.update_records(records)
end
it 'should create two records for storing the object IDs' do
expect(current)
.to have_received(:execute_operations) do |operations|
dedicated_index_operations = operations.select do |operation|
operation[:indexName] == 'my_index_object_ids' &&
operation[:action] == 'addObject'
end
expect(dedicated_index_operations.length).to eq 2
end
end
end
end
describe '.execute_operations' do
let(:indexing_batch_size) { 1000 } let(:indexing_batch_size) { 1000 }
let(:operations) { %w[foo bar] }
let(:progress_bar_instance) { double('ProgressBarInstance') } let(:progress_bar_instance) { double('ProgressBarInstance') }
before do before do
allow(::Algolia).to receive(:batch!) allow(::Algolia).to receive(:batch!)
allow(progress_bar).to receive(:create).and_return(progress_bar_instance) allow(progress_bar).to receive(:create).and_return(progress_bar_instance)
allow(progress_bar_instance).to receive(:increment) allow(progress_bar_instance).to receive(:increment)
allow(current).to receive(:index).and_return(index)
allow(configurator) allow(configurator)
.to receive(:algolia) .to receive(:algolia)
.with('indexing_batch_size') .with('indexing_batch_size')
.and_return(indexing_batch_size) .and_return(indexing_batch_size)
end end
before { current.update_records(old_records_ids, new_records) }
context 'when running a dry run' do
let(:dry_run) { true }
it do
expect(::Algolia)
.to_not have_received(:batch!)
end
end
context 'when nothing to update' do
let(:old_records_ids) { [] }
let(:new_records) { [] }
it do
expect(::Algolia)
.to_not have_received(:batch!)
end
end
it 'should batch all operations (deletions first)' do
expect(::Algolia)
.to have_received(:batch!)
.with([
{
action: 'deleteObject',
indexName: 'my_index',
body: { objectID: 'abc' }
},
{
action: 'addObject',
indexName: 'my_index',
body: { 'objectID' => 'def' }
}
])
end
context 'split in smaller batches if too many operations' do context 'split in smaller batches if too many operations' do
let(:indexing_batch_size) { 1 } let(:indexing_batch_size) { 1 }
before { current.execute_operations(operations) }
it do it do
# expect(::Algolia).to have_received(:batch!).twice
expect(::Algolia) expect(::Algolia)
.to have_received(:batch!) .to have_received(:batch!)
.ordered .ordered
.with([ .with(['foo'])
{
action: 'deleteObject',
indexName: 'my_index',
body: { objectID: 'abc' }
}
])
expect(::Algolia) expect(::Algolia)
.to have_received(:batch!) .to have_received(:batch!)
.ordered .ordered
.with([ .with(['bar'])
{
action: 'addObject',
indexName: 'my_index',
body: { 'objectID' => 'def' }
}
])
end end
end end
context 'progress bar' do context 'progress bar' do
before { current.execute_operations(operations) }
describe 'should not create it if only one batch' do describe 'should not create it if only one batch' do
it do it do
expect(progress_bar).to_not have_received(:create) expect(progress_bar).to_not have_received(:create)
@ -207,62 +472,32 @@ describe(Jekyll::Algolia::Indexer) do
end end
end end
end end
end
describe '.run' do
let(:records) { [{ objectID: 'foo' }, { objectID: 'bar' }] }
let(:remote_ids) { %w[foo baz] }
let(:index_name) { 'my_index' }
let(:index) { double('Algolia::Index', name: index_name) }
before do
allow(current).to receive(:init)
allow(current).to receive(:index).and_return(index)
allow(current).to receive(:update_settings)
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:update_records)
end
context 'with records' do
before { current.run(records) }
it { expect(current).to have_received(:init) }
it do
expect(current)
.to have_received(:update_settings)
end
it do
expect(current)
.to have_received(:update_records)
.with(['baz'], [{ objectID: 'bar' }])
end
end
context 'with empty results' do
subject { -> { current.run(records) } }
let(:records) { [] }
context 'dispatch the error to the error handler' do
before do before do
expect(configurator) allow(::Algolia).to receive(:batch!).and_raise
.to receive(:algolia) allow(error_handler).to receive(:stop)
.with('files_to_exclude')
.and_return(%w[foo.html bar.md]) current.execute_operations(operations)
expect(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('p,li')
expect(logger)
.to receive(:known_message)
.with(
'no_records_found',
hash_including(
'files_to_exclude' => 'foo.html, bar.md',
'nodes_to_index' => 'p,li'
)
)
end end
it { is_expected.to raise_error SystemExit } describe 'when only one slice' do
it do
expect(error_handler)
.to have_received(:stop)
.with(RuntimeError, operations: operations)
end
end
describe 'when split in several slices' do
let(:indexing_batch_size) { 1 }
let(:operations) { %w[foo bar] }
it do
expect(error_handler)
.to have_received(:stop)
.with(RuntimeError, operations: ['foo'])
end
end
end end
end end
@ -500,5 +735,62 @@ describe(Jekyll::Algolia::Indexer) do
) )
end end
end end
describe '.run' do
let(:records) { [{ objectID: 'foo' }, { objectID: 'bar' }] }
let(:remote_ids) { %w[foo baz] }
let(:index_name) { 'my_index' }
let(:index) { double('Algolia::Index', name: index_name) }
before do
allow(current).to receive(:init)
allow(current).to receive(:index).and_return(index)
allow(current).to receive(:update_settings)
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:update_records)
end
context 'with records' do
before { current.run(records) }
it { expect(current).to have_received(:init) }
it do
expect(current)
.to have_received(:update_settings)
end
it do
expect(current)
.to have_received(:update_records)
.with(records)
end
end
context 'with empty results' do
subject { -> { current.run(records) } }
let(:records) { [] }
before do
expect(configurator)
.to receive(:algolia)
.with('files_to_exclude')
.and_return(%w[foo.html bar.md])
expect(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('p,li')
expect(logger)
.to receive(:known_message)
.with(
'no_records_found',
hash_including(
'files_to_exclude' => 'foo.html, bar.md',
'nodes_to_index' => 'p,li'
)
)
end
it { is_expected.to raise_error SystemExit }
end
end
end end
# rubocop:enable Metrics/BlockLength # rubocop:enable Metrics/BlockLength