test(indexer): Adding tests for the dedicated index

This commit is contained in:
Pixelastic 2018-04-03 19:12:48 +02:00
parent 0da7131818
commit 40c113d587
4 changed files with 809 additions and 332 deletions

View File

@ -49,7 +49,7 @@ module Jekyll
invalid_credentials
record_too_big
too_many_records
unknown_settings
unknown_setting
invalid_index_name
]
@ -135,6 +135,24 @@ module Jekyll
hash
end
# Public: Returns a string explaining which attributes are the largest in
# the record
#
# record - The record hash to analyze
#
# This will be used on the `record_too_big` error, to guide users in
# finding which record is causing trouble
def self.readable_largest_record_keys(record)
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
output = []
largest_keys.each do |key, size|
size = Filesize.from("#{size} B").to_s('Kb')
output << "#{key} (#{size})"
end
output.join(', ')
end
# Public: Check if the application id is available
#
# _context - Not used
@ -162,6 +180,7 @@ module Jekyll
# Application ID and API key submitted don't match any credentials known
def self.invalid_credentials?(error, _context = {})
details = error_hash(error.message)
return false if details == false
if details['message'] != 'Invalid Application-ID or API key'
return false
@ -172,24 +191,6 @@ module Jekyll
}
end
# Public: Returns a string explaining which attributes are the largest in
# the record
#
# record - The record hash to analyze
#
# This will be used on the `record_too_big` error, to guide users in
# finding which record is causing trouble
def self.readable_largest_record_keys(record)
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
output = []
largest_keys.each do |key, size|
size = Filesize.from("#{size} B").to_s('Kb')
output << "#{key} (#{size})"
end
output.join(', ')
end
# Public: Check if the sent records are not too big
#
# context[:records] - list of records sent in the batch
@ -199,6 +200,7 @@ module Jekyll
# informations about it so the user can debug it.
def self.record_too_big?(error, context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^Record .* is too big .*/
@ -208,8 +210,11 @@ module Jekyll
size = Filesize.from("#{size} B").to_s('Kb')
object_id = details['objectID']
# Getting record details
record = Utils.find_by_key(context[:records], :objectID, object_id)
# Finding the record in all the operations
operation = context[:operations].find do |o|
o[:action] == 'addObject' && o[:body][:objectID] == object_id
end
record = operation[:body]
probable_wrong_keys = readable_largest_record_keys(record)
# Writing the full record to disk for inspection
@ -237,8 +242,9 @@ module Jekyll
# The API will block any call that tries to update a setting value that is
# not available. We'll tell the user which one so they can fix their
# issue.
def self.unknown_settings?(error, context = {})
def self.unknown_setting?(error, context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^Invalid object attributes.*/
@ -259,6 +265,7 @@ module Jekyll
# Some characters are forbidden in index names
def self.invalid_index_name?(error, _context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^indexName is not valid.*/
@ -273,6 +280,7 @@ module Jekyll
# We're trying to push too many records and it goes over quota
def self.too_many_records?(error, _context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^Record quota exceeded.*/

View File

@ -31,7 +31,6 @@ module Jekyll
end
# Public: Returns the Algolia index used to store object ids
# TOTEST
def self.index_object_ids
@index_object_ids
end
@ -42,7 +41,6 @@ module Jekyll
#
# Note: there is no API endpoint to do that, so we try to get the settings
# instead, which will fail if the index does not exist
# TOTEST
def self.index_exist?(index)
index.get_settings
true
@ -50,6 +48,22 @@ module Jekyll
false
end
# Public: Get the number of records in an index
#
# index - Index to check
#
# Note: We'll do an empty query search, to match everything, but we'll
# only return the objectID and one element, to get the shortest response
# possible. It will still contain the nbHits
def self.record_count(index)
index.search(
'',
attributesToRetrieve: 'objectID',
distinct: false,
hitsPerPage: 1
)['nbHits']
end
# Public: Set the User-Agent to send to the API
#
# Every integrations should follow the "YYY Integration" pattern, and
@ -67,55 +81,65 @@ module Jekyll
::Algolia.set_extra_header('User-Agent', user_agent)
end
# Public: Get an array of all object IDs stored in the main index
#
# Note: As this will be slow (grabbing them 1000 at a time), we display
# a progress bar.
def self.remote_object_ids_from_main_index
Logger.verbose("I:Inspecting existing records in index #{index.name}")
list = []
# As it might take some time, we display a progress bar
progress_bar = ProgressBar.create(
total: record_count(index),
format: 'Inspecting existing records (%j%%) |%B|'
)
begin
index.browse(
attributesToRetrieve: 'objectID',
hitsPerPage: 1000
) do |hit|
list << hit['objectID']
progress_bar.increment
end
end
list.sort
end
# Public: Get an array of all the object ids, stored in the dedicated
# index
#
# Note: This will be very fast. Each record contain 100 object id, so it
# will fit in one call each time.
def self.remote_object_ids_from_dedicated_index
list = []
begin
index_object_ids.browse(
attributesToRetrieve: 'content',
hitsPerPage: 1000
) do |hit|
list += hit['content']
end
end
list.sort
end
# Public: Returns an array of all the objectIDs in the index
#
# Note: We use a dedicated index to store the objectIDs for faster
# browsing, but if the index does not exist we read the main index.
def self.remote_object_ids
Logger.log('I:Getting list of existing records')
list = []
hits_per_page = 1000
# Fast version, using the dedicated index
has_dedicated_index = index_exist?(index_object_ids)
if has_dedicated_index
begin
index_object_ids.browse(
attributesToRetrieve: 'content',
hitsPerPage: hits_per_page
) do |hit|
list += hit['content']
end
end
else
# Slow versio, browsing the full index
Logger.verbose(
"I:Inspecting existing records in index #{index.name}..."
)
return remote_object_ids_from_dedicated_index if has_dedicated_index
# As it might take some time, we display a progress bar
max_hits = index.search(
'',
attributesToRetrieve: 'objectID',
distinct: false,
hitsPerPage: 1
)['nbHits']
progress_bar = ProgressBar.create(
total: max_hits,
format: 'Inspecting existing records (%j%%) |%B|'
)
begin
index.browse(
attributesToRetrieve: 'objectID',
hitsPerPage: hits_per_page
) do |hit|
list << hit['objectID']
progress_bar.increment
end
end
end
list.sort
# Slow version, browsing the full index
remote_object_ids_from_main_index
end
# Public: Returns an array of the local objectIDs
@ -137,41 +161,43 @@ module Jekyll
remote_ids = remote_object_ids
local_ids = local_object_ids(records)
# We need the list of objectIDs for deletion
old_records_ids = remote_ids - local_ids
# For addition, we need the full records
# We build a first hash of all the records we have, with access by
new_records_ids = local_ids - remote_ids
cache = Hash[records.map { |record| [record[:objectID], record] }]
new_records = new_records_ids.map { |id| cache[id] }
# Making a diff, to see what to add and what to delete
ids_to_delete = remote_ids - local_ids
ids_to_add = local_ids - remote_ids
# Stop if nothing to change
if old_records_ids.empty? && new_records.empty?
if ids_to_delete.empty? && ids_to_add.empty?
Logger.log('I:Content is already up to date.')
return
end
Logger.log("I:Updating records in index #{index.name}...")
Logger.log("I:Records to delete: #{old_records_ids.length}")
Logger.log("I:Records to add: #{new_records.length}")
Logger.log("I:Records to delete: #{ids_to_delete.length}")
Logger.log("I:Records to add: #{ids_to_add.length}")
return if Configurator.dry_run?
# We group delete and add operations into the same batch. Delete
# operations should still come first, to avoid hitting an overquota too
# soon
# Transforming ids into real records to add
records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
records_to_add = ids_to_add.map { |id| records_by_id[id] }
# We group all operations into one batch
operations = []
old_records_ids.each do |object_id|
# Deletion operations come first, to avoid hitting an overquota too soon
# if it can be avoided
ids_to_delete.each do |object_id|
operations << {
action: 'deleteObject', indexName: index.name,
body: { objectID: object_id }
}
end
operations += new_records.map do |new_record|
# Then we add the new records
operations += records_to_add.map do |new_record|
{ action: 'addObject', indexName: index.name, body: new_record }
end
# We batch as well the update to the index holding the object ids
# We also clear the dedicated index holding the object ids and push the
# new list of ids
operations << { action: 'clear', indexName: index_object_ids.name }
local_ids.each_slice(100).each do |ids|
operations << {
@ -183,6 +209,12 @@ module Jekyll
execute_operations(operations)
end
# Public: Execute a serie of operations in a batch
#
# operations - Operations to batch
#
# Note: Will split the batch in several calls if too big, and will display
# a progress bar if this happens
def self.execute_operations(operations)
# Run the batches in slices if they are too large
batch_size = Configurator.algolia('indexing_batch_size')
@ -202,10 +234,7 @@ module Jekyll
progress_bar.increment if should_have_progress_bar
rescue StandardError => error
records = slice.map do |record|
record[:body]
end
ErrorHandler.stop(error, records: records)
ErrorHandler.stop(error, operations: slice)
end
end
end

View File

@ -7,6 +7,7 @@ describe(Jekyll::Algolia::ErrorHandler) do
let(:current) { Jekyll::Algolia::ErrorHandler }
let(:configurator) { Jekyll::Algolia::Configurator }
let(:logger) { Jekyll::Algolia::Logger }
let(:utils) { Jekyll::Algolia::Utils }
describe '.stop' do
subject { -> { current.stop(error) } }
@ -37,6 +38,64 @@ describe(Jekyll::Algolia::ErrorHandler) do
end
end
describe '.identify' do
let(:error) { double('Error') }
let(:context) { 'context' }
subject { current.identify(error, context) }
before do
allow(current).to receive(:unknown_application_id?).and_return(false)
allow(current).to receive(:invalid_credentials?).and_return(false)
allow(current).to receive(:record_too_big?).and_return(false)
allow(current).to receive(:too_many_records?).and_return(false)
allow(current).to receive(:unknown_setting?).and_return(false)
allow(current).to receive(:invalid_index_name?).and_return(false)
end
it 'should return false if nothing matches' do
should eq false
end
describe 'should call all methods with error and context' do
before do
current.identify(error, context)
end
it do
expect(current)
.to have_received(:unknown_application_id?)
.with(error, context)
expect(current)
.to have_received(:invalid_credentials?)
.with(error, context)
expect(current)
.to have_received(:record_too_big?)
.with(error, context)
expect(current)
.to have_received(:too_many_records?)
.with(error, context)
expect(current)
.to have_received(:unknown_setting?)
.with(error, context)
expect(current)
.to have_received(:invalid_index_name?)
.with(error, context)
end
end
describe 'should return the result of one if matches' do
before do
allow(current)
.to receive(:too_many_records?)
.and_return('foo')
end
it do
should eq(name: 'too_many_records', details: 'foo')
end
end
end
describe '.error_hash' do
subject { current.error_hash(message) }
@ -105,28 +164,31 @@ describe(Jekyll::Algolia::ErrorHandler) do
it { should eq 'baz (100.00 Kb), bar (10.00 Kb), foo (1.00 Kb)' }
end
describe '.identify' do
subject { current.identify(error, context) }
describe '.unknown_application_id?' do
let(:error) { double('Error', message: message) }
let(:context) { {} }
context 'with unknown application_id' do
subject { current.unknown_application_id?(error) }
describe 'not matching' do
let(:message) { 'foo bar' }
it { should eq false }
end
describe 'matching' do
let(:message) do
# rubocop:disable Metrics/LineLength
'Cannot reach any host: '\
'getaddrinfo: Name or service not known (MY_APP_ID-dsn.algolia.net:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-3.algolianet.com:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-1.algolianet.com:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-2.algolianet.com:443)'
'getaddrinfo: Name or service not known (MY_APP_ID-dsn.algolia.net:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-3.algolianet.com:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-1.algolianet.com:443), '\
'getaddrinfo: No address associated with hostname (MY_APP_ID-2.algolianet.com:443)'
# rubocop:enable Metrics/LineLength
end
it { should include(name: 'unknown_application_id') }
it { should include(details: { 'application_id' => 'MY_APP_ID' }) }
it { should eq('application_id' => 'MY_APP_ID') }
end
context 'with unknown application_id and no DSN' do
describe 'matching with a DSN' do
let(:message) do
# rubocop:disable Metrics/LineLength
'Cannot reach any host: '\
@ -137,138 +199,224 @@ describe(Jekyll::Algolia::ErrorHandler) do
# rubocop:enable Metrics/LineLength
end
it { should include(name: 'unknown_application_id') }
it { should include(details: { 'application_id' => 'MY_APP_ID' }) }
it { should eq('application_id' => 'MY_APP_ID') }
end
end
describe '.invalid_credentials?' do
let(:error) { double('Error').as_null_object }
subject { current.invalid_credentials?(error) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
end
describe 'not matching' do
let(:error_hash) { false }
it { should eq false }
end
context 'with wrong API key' do
before do
allow(configurator)
.to receive(:index_name)
.and_return('my_index')
let(:error_hash) do
{
'message' => 'Invalid Application-ID or API key',
'application_id' => 'MY_APP_ID'
}
end
let(:message) do
'Cannot POST to '\
'https://MY_APP_ID.algolia.net/1/indexes/my_index/batch: '\
'{"message":"Invalid Application-ID or API key","status":403}'\
"\n (403)"
it { should eq('application_id' => 'MY_APP_ID') }
end
end
describe '.record_too_big?' do
let(:error) { double('Error').as_null_object }
let(:error_hash) do
{
'message' => 'Record at the position 3 '\
'objectID=deadbeef is too big size=109196 bytes. '\
'Contact us if you need an extended quota',
'objectID' => 'object_id'
}
end
let(:context) do
{
operations: [
{
action: 'deleteObject',
body: { objectID: 'object_to_delete' }
},
{
action: 'addObject',
body: { objectID: 'object_id', title: 'foo', url: 'url' }
},
{
action: 'clear'
},
{
action: 'addObject',
body: { content: %w[object_id1 object_id2] }
}
]
}
end
subject { current.record_too_big?(error, context) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
allow(utils).to receive(:find_by_key).and_return({})
allow(current).to receive(:readable_largest_record_keys)
allow(logger).to receive(:write_to_file)
end
describe 'wrongly formatted message' do
let(:error_hash) { false }
it { should eq false }
end
describe 'not matching' do
let(:error_hash) { { 'message' => 'foo bar' } }
it { should eq false }
end
it 'should get information from message' do
should include('object_id' => 'object_id')
should include('size' => '109.20 Kb')
should include('size_limit' => '10 Kb')
end
describe 'includes the nodes to index' do
before do
allow(configurator).to receive(:algolia).and_return('nodes')
end
it { should include(name: 'invalid_credentials') }
it do
should include(details: {
'application_id' => 'MY_APP_ID'
})
should include('nodes_to_index' => 'nodes')
end
end
context 'with a record too big' do
let(:message) do
'400: Cannot POST to '\
'https://MY_APP_ID.algolia.net/1/indexes/*/batch: '\
'{"message":"Record at the position 3 '\
'objectID=deadbeef is too big size=109196 bytes. '\
'Contact us if you need an extended quota","position":3,'\
'"objectID":"deadbeef","status":400} (400)'
end
let(:context) do
{ records: [
{
objectID: 'deadbeef',
title: 'Page title',
url: '/path/to/file.ext',
# rubocop:disable Metrics/LineLength
content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as well so we will cut it at 100 characters.'
# rubocop:enable Metrics/LineLength
},
{ objectID: 'foo' }
] }
end
let(:record_log_path) { '/source/output.log' }
let(:probable_wrong_keys) { 'foo, bar, baz' }
describe 'includes information about the bad record' do
before do
allow(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('p,li,foo')
allow(current)
.to receive(:readable_largest_record_keys)
.and_return('wrong_keys')
end
it do
should include('object_title' => 'foo')
should include('object_url' => 'url')
should include('probable_wrong_keys' => 'wrong_keys')
end
end
describe 'save log file' do
before do
expect(::JSON)
.to receive(:pretty_generate)
.with(objectID: 'object_id', title: 'foo', url: 'url')
.and_return('{json}')
expect(logger)
.to receive(:write_to_file)
.and_return(record_log_path)
expect(current)
.to receive(:readable_largest_record_keys)
.and_return(probable_wrong_keys)
.with(
'jekyll-algolia-record-too-big-object_id.log',
'{json}'
)
.and_return('/path/to/file.log')
end
it { should include(name: 'record_too_big') }
it do
details = subject[:details]
expect(details).to include('object_id' => 'deadbeef')
expect(details).to include('object_title' => 'Page title')
expect(details).to include('object_url' => '/path/to/file.ext')
expect(details).to include('record_log_path' => record_log_path)
expect(details).to include('probable_wrong_keys' => probable_wrong_keys)
expect(details).to include('size' => '109.20 Kb')
expect(details).to include('size_limit' => '10 Kb')
expect(details).to include('nodes_to_index' => 'p,li,foo')
it 'should return the path of the log file in the output' do
should include('record_log_path' => '/path/to/file.log')
end
end
end
context 'with an unknown setting' do
let(:message) do
# rubocop:disable Metrics/LineLength
'400: Cannot PUT to '\
'https://MY_APP_ID.algolia.net/1/indexes/my_index/settings: '\
'{"message":"Invalid object attributes: deadbeef near line:1 column:456",'\
'"status":400} (400)'
# rubocop:enable Metrics/LineLength
end
let(:context) do
{ settings:
{
'searchableAttributes' => %w[foo bar],
'deadbeef' => 'foofoo'
} }
end
it { should include(name: 'unknown_settings') }
it do
details = subject[:details]
expect(details).to include('setting_name' => 'deadbeef')
expect(details).to include('setting_value' => 'foofoo')
end
describe '.unknown_setting?' do
let(:error) { double('Error').as_null_object }
let(:context) do
{
settings: {
'iDontExist' => 'foo'
}
}
end
context 'with an invalid index name' do
before do
allow(configurator)
.to receive(:index_name)
.and_return('invalid_index_name')
end
let(:message) do
# rubocop:disable Metrics/LineLength
'400: Cannot GET to '\
'https://MY_APP_ID-dsn.algolia.net/1/indexes/invalid_index_name/settings?getVersion=2: '\
'{"message":"indexName is not valid","status":400} (400)'
# rubocop:enable Metrics/LineLength
end
subject { current.unknown_setting?(error, context) }
it { should include(name: 'invalid_index_name') }
it do
details = subject[:details]
expect(details).to include('index_name' => 'invalid_index_name')
end
before do
allow(current).to receive(:error_hash).and_return(error_hash)
end
context 'with too many record' do
let(:message) do
'403: Cannot POST to '\
'https://MY_APP_ID.algolia.net/1/indexes/*/batch: '\
'{"message":"Record quota exceeded, change plan or delete records.",'\
'"status":403} (403)'
end
describe 'not matching' do
let(:error_hash) { false }
it { should eq false }
end
it { should include(name: 'too_many_records') }
context 'with non-existent setting' do
let(:error_hash) do
{
'message' => 'Invalid object attributes: iDontExist '\
'near line:1 column:456'
}
end
it do
should include('setting_name' => 'iDontExist')
should include('setting_value' => 'foo')
end
end
end
describe '.invalid_index_name?' do
let(:error) { double('Error').as_null_object }
subject { current.invalid_index_name?(error) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
allow(configurator).to receive(:index_name).and_return('my_index')
end
describe 'not matching' do
let(:error_hash) { false }
it { should eq false }
end
context 'with invalid index name' do
let(:error_hash) do
{
'message' => 'indexName is not valid'
}
end
it do
should include('index_name' => 'my_index')
end
end
end
describe '.too_many_records?' do
let(:error) { double('Error').as_null_object }
subject { current.too_many_records?(error) }
before do
allow(current).to receive(:error_hash).and_return(error_hash)
end
describe 'not matching' do
let(:error_hash) { false }
it { should eq false }
end
context 'with quota exceeded' do
let(:error_hash) do
{
'message' => 'Record quota exceeded, change plan or delete records.'
}
end
it do
should eq({})
end
end
end
end

View File

@ -20,27 +20,67 @@ describe(Jekyll::Algolia::Indexer) do
allow(configurator).to receive(:application_id).and_return('app_id')
allow(configurator).to receive(:api_key).and_return('api_key')
allow(::Algolia).to receive(:init)
allow(::Algolia::Index)
.to receive(:new)
.and_return(double('Algolia::Index', name: 'index_name'))
allow(::Algolia::Index).to receive(:new)
allow(current).to receive(:set_user_agent)
end
before { current.init }
describe 'should instanciate Algolia with application id and api_key' do
before { current.init }
it 'should instanciate Algolia search with application id and api_key' do
expect(::Algolia)
.to have_received(:init)
.with(hash_including(
application_id: 'app_id',
api_key: 'api_key'
))
it do
expect(::Algolia)
.to have_received(:init)
.with(hash_including(
application_id: 'app_id',
api_key: 'api_key'
))
end
end
it 'should set the user agent' do
expect(current).to have_received(:set_user_agent)
describe 'should set the user agent' do
before { current.init }
it do
expect(current).to have_received(:set_user_agent)
end
end
it 'should make the index accessible' do
expect(current.index.name).to eq 'index_name'
describe 'should make the index accessible' do
let(:index) { double('Algolia::Index') }
before do
allow(configurator)
.to receive(:index_name)
.and_return('index_name')
allow(::Algolia::Index)
.to receive(:new)
.with('index_name')
.and_return(index)
current.init
end
it do
expect(current.index).to eq index
end
end
describe 'should make the index for object ids accessible' do
let(:index_object_ids) { double('Algolia::Index') }
before do
allow(configurator)
.to receive(:index_name)
.and_return('index_name')
allow(::Algolia::Index)
.to receive(:new)
.with('index_name_object_ids')
.and_return(index_object_ids)
current.init
end
it do
expect(current.index_object_ids).to eq index_object_ids
end
end
end
@ -70,24 +110,165 @@ describe(Jekyll::Algolia::Indexer) do
end
end
describe '.remote_object_ids' do
subject { current.remote_object_ids }
describe '.index_exist?' do
let(:index) { double('Algolia::Index') }
describe 'when getting settings correctly' do
subject { current.index_exist?(index) }
before do
allow(index).to receive(:get_settings).and_return({})
end
it { should eq true }
end
describe 'when throwing an error on settings' do
subject { current.index_exist?(index) }
before do
allow(index).to receive(:get_settings).and_raise
end
it { should eq false }
end
end
describe '.record_count' do
let(:index) { double('Algolia::Index') }
let(:nb_hits) { 12 }
subject { current.record_count(index) }
before do
expect(index)
.to receive(:search)
.with(
'',
hash_including(
distinct: false, # To get the correct number of records
hitsPerPage: 1, # To get a short response
attributesToRetrieve: 'objectID', # To get a short response
)
)
.and_return('nbHits' => nb_hits)
end
it { should eq 12 }
end
describe '.remote_object_ids_from_main_index' do
let(:index) { double('Algolia::Index').as_null_object }
let(:progress_bar_instance) { double('ProgressBarInstance') }
subject { current.remote_object_ids_from_main_index }
before do
allow(current).to receive(:index).and_return(index)
expect(index)
allow(current).to receive(:record_count)
allow(progress_bar).to receive(:create).and_return(progress_bar_instance)
allow(progress_bar_instance).to receive(:increment)
allow(index)
.to receive(:browse)
.with(attributesToRetrieve: 'objectID')
.and_yield('objectID' => 'foo')
.and_yield('objectID' => 'bar')
end
it { should include('foo') }
it { should include('bar') }
# Should be ordered
it { should eq %w[bar foo] }
it 'should return all objectID sorted' do
should eq %w[bar foo]
end
describe 'should grab as many ids as possible' do
before do
current.remote_object_ids_from_main_index
end
it do
expect(index)
.to have_received(:browse)
.with(
attributesToRetrieve: 'objectID',
hitsPerPage: 1000
)
end
end
describe 'should display a progress bar' do
before do
allow(current).to receive(:record_count).and_return(12)
current.remote_object_ids_from_main_index
end
it do
expect(progress_bar)
.to have_received(:create)
.with(hash_including(
total: 12
))
expect(progress_bar_instance).to have_received(:increment).twice
end
end
end
describe '.remote_object_ids_from_dedicated_index' do
let(:index) { double('Algolia::Index') }
subject { current.remote_object_ids_from_dedicated_index }
before do
allow(current).to receive(:index_object_ids).and_return(index)
allow(index)
.to receive(:browse)
.and_yield('content' => %w[foo baz])
.and_yield('content' => ['bar'])
end
it 'should return all objectID sorted' do
should eq %w[bar baz foo]
end
describe 'should grab as many ids as possible' do
before do
current.remote_object_ids_from_dedicated_index
end
it do
expect(index)
.to have_received(:browse)
.with(
attributesToRetrieve: 'content',
hitsPerPage: 1000
)
end
end
end
describe '.remote_object_ids' do
subject { current.remote_object_ids }
before do
allow(current)
.to receive(:remote_object_ids_from_dedicated_index)
.and_return('dedicated')
allow(current)
.to receive(:remote_object_ids_from_main_index)
.and_return('main')
allow(current).to receive(:index_object_ids)
allow(current).to receive(:index_exist?).and_return(dedicated_index_exist)
end
describe 'from the main index' do
let(:dedicated_index_exist) { false }
it { should eq 'main' }
end
describe 'from the dedicated index' do
let(:dedicated_index_exist) { true }
it { should eq 'dedicated' }
end
end
describe '.local_object_ids' do
@ -115,84 +296,168 @@ describe(Jekyll::Algolia::Indexer) do
describe '.update_records' do
let(:index) { double('Algolia::Index', name: 'my_index') }
let(:old_records_ids) { %w[abc] }
let(:new_records) { [{ 'objectID' => 'def' }] }
let(:index_object_ids) do
double('Algolia::Index', name: 'my_index_object_ids')
end
let(:remote_ids) { %w[bar baz] }
let(:records) do
[
{ objectID: 'foo', name: 'foo' },
{ objectID: 'bar', name: 'bar' }
]
end
before do
allow(current).to receive(:index).and_return(index)
allow(current).to receive(:index_object_ids).and_return(index_object_ids)
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:execute_operations)
end
context 'when nothing to update' do
let(:remote_ids) { [] }
before do
allow(current).to receive(:local_object_ids).and_return([])
current.update_records(records)
end
it do
expect(current)
.to_not have_received(:execute_operations)
end
end
context 'when running a dry run' do
let(:dry_run) { true }
before do
current.update_records(records)
end
it do
expect(current)
.to_not have_received(:execute_operations)
end
end
context 'batch operations' do
before do
current.update_records(records)
end
it 'should start with deleting old record' do
expected = {
action: 'deleteObject',
indexName: 'my_index',
body: { objectID: 'baz' }
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[0]).to eq expected
end
end
it 'should add new items after deleting old ones' do
expected = {
action: 'addObject',
indexName: 'my_index',
body: { objectID: 'foo', name: 'foo' }
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[1]).to eq expected
end
end
it 'should clear the object id index after updating the record' do
expected = {
action: 'clear',
indexName: 'my_index_object_ids'
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[2]).to eq expected
end
end
it 'should add new objectIDs to the dedicated index' do
expected = {
action: 'addObject',
indexName: 'my_index_object_ids',
body: { content: %w[bar foo] }
}
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[3]).to eq expected
end
end
end
context 'storing ids by group of 100' do
let(:records) do
records = []
150.times { |i| records << { objectID: "foo-#{i}" } }
records
end
before do
current.update_records(records)
end
it 'should create two records for storing the object IDs' do
expect(current)
.to have_received(:execute_operations) do |operations|
dedicated_index_operations = operations.select do |operation|
operation[:indexName] == 'my_index_object_ids' &&
operation[:action] == 'addObject'
end
expect(dedicated_index_operations.length).to eq 2
end
end
end
end
describe '.execute_operations' do
let(:indexing_batch_size) { 1000 }
let(:operations) { %w[foo bar] }
let(:progress_bar_instance) { double('ProgressBarInstance') }
before do
allow(::Algolia).to receive(:batch!)
allow(progress_bar).to receive(:create).and_return(progress_bar_instance)
allow(progress_bar_instance).to receive(:increment)
allow(current).to receive(:index).and_return(index)
allow(configurator)
.to receive(:algolia)
.with('indexing_batch_size')
.and_return(indexing_batch_size)
end
before { current.update_records(old_records_ids, new_records) }
context 'when running a dry run' do
let(:dry_run) { true }
it do
expect(::Algolia)
.to_not have_received(:batch!)
end
end
context 'when nothing to update' do
let(:old_records_ids) { [] }
let(:new_records) { [] }
it do
expect(::Algolia)
.to_not have_received(:batch!)
end
end
it 'should batch all operations (deletions first)' do
expect(::Algolia)
.to have_received(:batch!)
.with([
{
action: 'deleteObject',
indexName: 'my_index',
body: { objectID: 'abc' }
},
{
action: 'addObject',
indexName: 'my_index',
body: { 'objectID' => 'def' }
}
])
end
context 'split in smaller batches if too many operations' do
let(:indexing_batch_size) { 1 }
before { current.execute_operations(operations) }
it do
# expect(::Algolia).to have_received(:batch!).twice
expect(::Algolia)
.to have_received(:batch!)
.ordered
.with([
{
action: 'deleteObject',
indexName: 'my_index',
body: { objectID: 'abc' }
}
])
.with(['foo'])
expect(::Algolia)
.to have_received(:batch!)
.ordered
.with([
{
action: 'addObject',
indexName: 'my_index',
body: { 'objectID' => 'def' }
}
])
.with(['bar'])
end
end
context 'progress bar' do
before { current.execute_operations(operations) }
describe 'should not create it if only one batch' do
it do
expect(progress_bar).to_not have_received(:create)
@ -207,62 +472,32 @@ describe(Jekyll::Algolia::Indexer) do
end
end
end
end
describe '.run' do
let(:records) { [{ objectID: 'foo' }, { objectID: 'bar' }] }
let(:remote_ids) { %w[foo baz] }
let(:index_name) { 'my_index' }
let(:index) { double('Algolia::Index', name: index_name) }
before do
allow(current).to receive(:init)
allow(current).to receive(:index).and_return(index)
allow(current).to receive(:update_settings)
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:update_records)
end
context 'with records' do
before { current.run(records) }
it { expect(current).to have_received(:init) }
it do
expect(current)
.to have_received(:update_settings)
end
it do
expect(current)
.to have_received(:update_records)
.with(['baz'], [{ objectID: 'bar' }])
end
end
context 'with empty results' do
subject { -> { current.run(records) } }
let(:records) { [] }
context 'dispatch the error to the error handler' do
before do
expect(configurator)
.to receive(:algolia)
.with('files_to_exclude')
.and_return(%w[foo.html bar.md])
expect(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('p,li')
expect(logger)
.to receive(:known_message)
.with(
'no_records_found',
hash_including(
'files_to_exclude' => 'foo.html, bar.md',
'nodes_to_index' => 'p,li'
)
)
allow(::Algolia).to receive(:batch!).and_raise
allow(error_handler).to receive(:stop)
current.execute_operations(operations)
end
it { is_expected.to raise_error SystemExit }
describe 'when only one slice' do
it do
expect(error_handler)
.to have_received(:stop)
.with(RuntimeError, operations: operations)
end
end
describe 'when split in several slices' do
let(:indexing_batch_size) { 1 }
let(:operations) { %w[foo bar] }
it do
expect(error_handler)
.to have_received(:stop)
.with(RuntimeError, operations: ['foo'])
end
end
end
end
@ -500,5 +735,62 @@ describe(Jekyll::Algolia::Indexer) do
)
end
end
describe '.run' do
let(:records) { [{ objectID: 'foo' }, { objectID: 'bar' }] }
let(:remote_ids) { %w[foo baz] }
let(:index_name) { 'my_index' }
let(:index) { double('Algolia::Index', name: index_name) }
before do
allow(current).to receive(:init)
allow(current).to receive(:index).and_return(index)
allow(current).to receive(:update_settings)
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:update_records)
end
context 'with records' do
before { current.run(records) }
it { expect(current).to have_received(:init) }
it do
expect(current)
.to have_received(:update_settings)
end
it do
expect(current)
.to have_received(:update_records)
.with(records)
end
end
context 'with empty results' do
subject { -> { current.run(records) } }
let(:records) { [] }
before do
expect(configurator)
.to receive(:algolia)
.with('files_to_exclude')
.and_return(%w[foo.html bar.md])
expect(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('p,li')
expect(logger)
.to receive(:known_message)
.with(
'no_records_found',
hash_including(
'files_to_exclude' => 'foo.html, bar.md',
'nodes_to_index' => 'p,li'
)
)
end
it { is_expected.to raise_error SystemExit }
end
end
end
# rubocop:enable Metrics/BlockLength