test(indexer): Add integration tests for the dedicated index

This commit is contained in:
Pixelastic 2018-04-04 13:44:21 +02:00
parent 40c113d587
commit 66f084fe2b
9 changed files with 318 additions and 76 deletions

View File

@ -86,8 +86,10 @@ namespace 'test' do
task.pattern = [
# Check that the default build has the expected results
'spec/integration/main_spec.rb',
# Now check various config and its impact on the settings
'spec/integration/settings_spec.rb'
# Check various config and its impact on the settings
'spec/integration/settings_spec.rb',
# Check that object ids are stored in dedicated index
'spec/integration/object_ids_spec.rb'
]
end
# Live-reloading integration tests

View File

@ -3,6 +3,9 @@ E:
E:The jekyll-algolia plugin could not connect to your application ID using the API key your provided.
W:
W:Make sure your API key has access to your {application_id} application.
W:It should also have the rights to push to the following indices:
W:   - {index_name}
W:   - {index_object_ids_name}
I:
I:You can find your API key in your Algolia dashboard here:
I:   https://www.algolia.com/licensing

View File

@ -148,6 +148,11 @@ module Jekyll
ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
end
# Public: Return the name of the index used to store the object ids
def self.index_object_ids_name
"#{index_name}_object_ids"
end
# Public: Get the index settings
#
# This will be a merge of default settings and the one defined in the

View File

@ -187,7 +187,9 @@ module Jekyll
end
{
'application_id' => details['application_id']
'application_id' => details['application_id'],
'index_name' => Configurator.index_name,
'index_object_ids_name' => Configurator.index_object_ids_name
}
end

View File

@ -4,6 +4,7 @@ require 'algoliasearch'
require 'yaml'
require 'algolia_html_extractor'
# rubocop:disable Metrics/ModuleLength
module Jekyll
module Algolia
# Module to push records to Algolia and configure the index
@ -18,7 +19,8 @@ module Jekyll
)
index_name = Configurator.index_name
@index = ::Algolia::Index.new(index_name)
@index_object_ids = ::Algolia::Index.new("#{index_name}_object_ids")
index_object_ids_name = Configurator.index_object_ids_name
@index_object_ids = ::Algolia::Index.new(index_object_ids_name)
set_user_agent
@ -62,6 +64,8 @@ module Jekyll
distinct: false,
hitsPerPage: 1
)['nbHits']
rescue StandardError
0
end
# Public: Set the User-Agent to send to the API
@ -103,6 +107,8 @@ module Jekyll
list << hit['objectID']
progress_bar.increment
end
rescue StandardError
return []
end
list.sort
@ -122,6 +128,8 @@ module Jekyll
) do |hit|
list += hit['content']
end
rescue StandardError
return []
end
list.sort
@ -165,45 +173,55 @@ module Jekyll
ids_to_delete = remote_ids - local_ids
ids_to_add = local_ids - remote_ids
# What changes should we do to the indexes?
has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
has_dedicated_index = index_exist?(index_object_ids)
# Stop if nothing to change
if ids_to_delete.empty? && ids_to_add.empty?
if !has_records_to_update && has_dedicated_index
Logger.log('I:Content is already up to date.')
return
end
Logger.log("I:Updating records in index #{index.name}...")
Logger.log("I:Records to delete: #{ids_to_delete.length}")
Logger.log("I:Records to add: #{ids_to_add.length}")
return if Configurator.dry_run?
# Transforming ids into real records to add
records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
records_to_add = ids_to_add.map { |id| records_by_id[id] }
# We group all operations into one batch
operations = []
# Deletion operations come first, to avoid hitting an overquota too soon
# if it can be avoided
ids_to_delete.each do |object_id|
operations << {
action: 'deleteObject', indexName: index.name,
body: { objectID: object_id }
}
end
# Then we add the new records
operations += records_to_add.map do |new_record|
{ action: 'addObject', indexName: index.name, body: new_record }
# We update records only if there are records to update
if has_records_to_update
Logger.log("I:Updating records in index #{index.name}...")
Logger.log("I:Records to delete: #{ids_to_delete.length}")
Logger.log("I:Records to add: #{ids_to_add.length}")
# Transforming ids into real records to add
records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
records_to_add = ids_to_add.map { |id| records_by_id[id] }
# Deletion operations come first, to avoid hitting an overquota too
# soon if it can be avoided
ids_to_delete.each do |object_id|
operations << {
action: 'deleteObject', indexName: index.name,
body: { objectID: object_id }
}
end
# Then we add the new records
operations += records_to_add.map do |new_record|
{ action: 'addObject', indexName: index.name, body: new_record }
end
end
# We also clear the dedicated index holding the object ids and push the
# new list of ids
operations << { action: 'clear', indexName: index_object_ids.name }
local_ids.each_slice(100).each do |ids|
operations << {
action: 'addObject', indexName: index_object_ids.name,
body: { content: ids }
}
# We update the dedicated index everytime we update records, but we also
# create it if it does not exist
should_update_dedicated_index = has_records_to_update ||
!has_dedicated_index
if should_update_dedicated_index
operations << { action: 'clear', indexName: index_object_ids.name }
local_ids.each_slice(100).each do |ids|
operations << {
action: 'addObject', indexName: index_object_ids.name,
body: { content: ids }
}
end
end
execute_operations(operations)
@ -216,6 +234,9 @@ module Jekyll
# Note: Will split the batch in several calls if too big, and will display
# a progress bar if this happens
def self.execute_operations(operations)
return if Configurator.dry_run?
return if operations.empty?
# Run the batches in slices if they are too large
batch_size = Configurator.algolia('indexing_batch_size')
slices = operations.each_slice(batch_size).to_a
@ -364,3 +385,4 @@ module Jekyll
end
end
end
# rubocop:enable Metrics/ModuleLength

View File

@ -0,0 +1,87 @@
# frozen_string_literal: true
require_relative './spec_helper'
# Note: Those tests will create and delete records and indexes several time.
# rubocop:disable Metrics/BlockLength
describe('storing object ids') do
let(:logger) { Jekyll::Algolia::Logger }
let(:configurator) { Jekyll::Algolia::Configurator }
let(:indexer) { Jekyll::Algolia::Indexer.init }
let(:progress_bar) { Jekyll::Algolia::ProgressBar }
let(:progress_bar_instance) { double('ProgressBar').as_null_object }
let(:records) do
[
{ objectID: 'foo', name: 'foo' },
{ objectID: 'bar', name: 'bar' },
{ objectID: 'baz', name: 'baz' }
]
end
before do
allow(configurator)
.to receive(:algolia)
.and_call_original
allow(configurator)
.to receive(:algolia)
.with('application_id')
.and_return(ENV['ALGOLIA_APPLICATION_ID'])
allow(configurator)
.to receive(:algolia)
.with('api_key')
.and_return(ENV['ALGOLIA_API_KEY'])
allow(configurator)
.to receive(:algolia)
.with('index_name')
.and_return(ENV['ALGOLIA_INDEX_NAME'])
allow(logger).to receive(:log)
allow(progress_bar).to receive(:create).and_return(progress_bar_instance)
indexer.index.delete_index!
indexer.index_object_ids.delete_index!
end
describe 'initial push should store ids in dedicated index' do
before do
indexer.update_records(records)
@index = indexer.index_object_ids
end
it 'should create a dedicated index' do
has_dedicated_index = indexer.index_exist?(@index)
expect(has_dedicated_index).to eq true
end
it 'should contain all object ids' do
records = @index.search('')['hits']
expect(records.length).to eq 1
expect(records[0]['content']).to include('foo')
expect(records[0]['content']).to include('bar')
expect(records[0]['content']).to include('baz')
end
end
describe 'dedicated index should be created if does not exist' do
before do
indexer.update_records(records)
indexer.index_object_ids.delete_index!
indexer.update_records(records)
@index = indexer.index_object_ids
end
it 'should create a dedicated index' do
has_dedicated_index = indexer.index_exist?(@index)
expect(has_dedicated_index).to eq true
end
it 'should contain all object ids' do
records = @index.search('')['hits']
expect(records.length).to eq 1
expect(records[0]['content']).to include('foo')
expect(records[0]['content']).to include('bar')
expect(records[0]['content']).to include('baz')
end
end
end
# rubocop:enable Metrics/BlockLength

View File

@ -159,6 +159,18 @@ describe(Jekyll::Algolia::Configurator) do
end
end
describe '.index_object_ids_name' do
subject { current.index_object_ids_name }
before do
allow(current).to receive(:index_name).and_return('my_index')
end
describe 'should get the name from the main index' do
it { should eq 'my_index_object_ids' }
end
end
describe '.application_id' do
subject { current.application_id }

View File

@ -224,7 +224,17 @@ describe(Jekyll::Algolia::ErrorHandler) do
'application_id' => 'MY_APP_ID'
}
end
it { should eq('application_id' => 'MY_APP_ID') }
before do
allow(configurator)
.to receive(:index_name)
.and_return('foo')
allow(configurator)
.to receive(:index_object_ids_name)
.and_return('foo_object_ids')
end
it { should include('application_id' => 'MY_APP_ID') }
it { should include('index_name' => 'foo') }
it { should include('index_object_ids_name' => 'foo_object_ids') }
end
end

View File

@ -69,11 +69,11 @@ describe(Jekyll::Algolia::Indexer) do
before do
allow(configurator)
.to receive(:index_name)
.and_return('index_name')
.to receive(:index_object_ids_name)
.and_return('foo')
allow(::Algolia::Index)
.to receive(:new)
.with('index_name_object_ids')
.with('foo')
.and_return(index_object_ids)
current.init
@ -140,21 +140,31 @@ describe(Jekyll::Algolia::Indexer) do
subject { current.record_count(index) }
before do
expect(index)
.to receive(:search)
.with(
'',
hash_including(
distinct: false, # To get the correct number of records
hitsPerPage: 1, # To get a short response
attributesToRetrieve: 'objectID', # To get a short response
describe 'when index exists' do
before do
expect(index)
.to receive(:search)
.with(
'',
hash_including(
distinct: false, # To get the correct number of records
hitsPerPage: 1, # To get a short response
attributesToRetrieve: 'objectID', # To get a short response
)
)
)
.and_return('nbHits' => nb_hits)
.and_return('nbHits' => nb_hits)
end
it { should eq 12 }
end
it { should eq 12 }
describe 'when index does not exist' do
before do
allow(index).to receive(:search).and_raise
end
it { should eq 0 }
end
end
describe '.remote_object_ids_from_main_index' do
@ -209,6 +219,16 @@ describe(Jekyll::Algolia::Indexer) do
expect(progress_bar_instance).to have_received(:increment).twice
end
end
context 'when no index' do
before do
allow(index)
.to receive(:browse)
.and_raise
end
it { should eq [] }
end
end
describe '.remote_object_ids_from_dedicated_index' do
@ -242,6 +262,16 @@ describe(Jekyll::Algolia::Indexer) do
)
end
end
context 'when no index' do
before do
allow(index)
.to receive(:browse)
.and_raise
end
it { should eq [] }
end
end
describe '.remote_object_ids' do
@ -295,10 +325,9 @@ describe(Jekyll::Algolia::Indexer) do
end
describe '.update_records' do
let(:index) { double('Algolia::Index', name: 'my_index') }
let(:index_object_ids) do
double('Algolia::Index', name: 'my_index_object_ids')
end
let(:index) { double('Algolia::Index', name: 'main') }
let(:index_object_ids) { double('Algolia::Index', name: 'dedicated') }
let(:has_dedicated_index) { false }
let(:remote_ids) { %w[bar baz] }
let(:records) do
[
@ -312,32 +341,53 @@ describe(Jekyll::Algolia::Indexer) do
allow(current).to receive(:index_object_ids).and_return(index_object_ids)
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:execute_operations)
allow(current)
.to receive(:index_exist?)
.with(index_object_ids)
.and_return(has_dedicated_index)
end
context 'when nothing to update' do
let(:remote_ids) { [] }
before do
allow(current).to receive(:local_object_ids).and_return([])
allow(current).to receive(:local_object_ids).and_return(local_ids)
current.update_records(records)
end
it do
expect(current)
.to_not have_received(:execute_operations)
context 'when records to update and no dedicated index' do
let(:local_ids) { ['foo'] }
let(:remote_ids) { [] }
let(:has_dedicated_index) { false }
it do
expect(current)
.to have_received(:execute_operations)
end
end
end
context 'when running a dry run' do
let(:dry_run) { true }
before do
current.update_records(records)
context 'when records to update and a dedicated index exist' do
let(:local_ids) { ['foo'] }
let(:remote_ids) { [] }
let(:has_dedicated_index) { true }
it do
expect(current)
.to have_received(:execute_operations)
end
end
it do
expect(current)
.to_not have_received(:execute_operations)
context 'when no records to update and no dedicated index' do
let(:local_ids) { [] }
let(:remote_ids) { [] }
let(:has_dedicated_index) { false }
it do
expect(current)
.to have_received(:execute_operations)
end
end
context 'when no records to update but a dedicated index exist' do
let(:local_ids) { [] }
let(:remote_ids) { [] }
let(:has_dedicated_index) { true }
it do
expect(current)
.to_not have_received(:execute_operations)
end
end
end
@ -349,7 +399,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should start with deleting old record' do
expected = {
action: 'deleteObject',
indexName: 'my_index',
indexName: 'main',
body: { objectID: 'baz' }
}
@ -362,7 +412,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should add new items after deleting old ones' do
expected = {
action: 'addObject',
indexName: 'my_index',
indexName: 'main',
body: { objectID: 'foo', name: 'foo' }
}
@ -375,7 +425,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should clear the object id index after updating the record' do
expected = {
action: 'clear',
indexName: 'my_index_object_ids'
indexName: 'dedicated'
}
expect(current)
@ -387,7 +437,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should add new objectIDs to the dedicated index' do
expected = {
action: 'addObject',
indexName: 'my_index_object_ids',
indexName: 'dedicated',
body: { content: %w[bar foo] }
}
@ -398,6 +448,36 @@ describe(Jekyll::Algolia::Indexer) do
end
end
context 'when no update to the records' do
let(:local_ids) { %w[foo bar] }
let(:remote_ids) { %w[foo bar] }
before do
allow(current).to receive(:local_object_ids).and_return(local_ids)
current.update_records(records)
end
context 'do not update the dedicated index if already exist' do
let(:has_dedicated_index) { true }
it do
expect(current).to_not have_received(:execute_operations)
end
end
context 'create the dedicated index if does not yet exist' do
let(:has_dedicated_index) { false }
it do
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[0]).to include(action: 'clear')
expect(operations[0]).to include(indexName: 'dedicated')
expect(operations[1]).to include(action: 'addObject')
expect(operations[1]).to include(body: { content: %w[foo bar] })
end
end
end
end
context 'storing ids by group of 100' do
let(:records) do
records = []
@ -413,7 +493,7 @@ describe(Jekyll::Algolia::Indexer) do
expect(current)
.to have_received(:execute_operations) do |operations|
dedicated_index_operations = operations.select do |operation|
operation[:indexName] == 'my_index_object_ids' &&
operation[:indexName] == 'dedicated' &&
operation[:action] == 'addObject'
end
expect(dedicated_index_operations.length).to eq 2
@ -437,13 +517,32 @@ describe(Jekyll::Algolia::Indexer) do
.and_return(indexing_batch_size)
end
context 'when running in dry run mode' do
let(:dry_run) { true }
before { current.execute_operations(operations) }
it do
expect(::Algolia).to_not have_received(:batch!)
end
end
context 'when running an empty set of operations' do
let(:operations) { [] }
before { current.execute_operations(operations) }
it do
expect(::Algolia).to_not have_received(:batch!)
end
end
context 'split in smaller batches if too many operations' do
let(:indexing_batch_size) { 1 }
before { current.execute_operations(operations) }
it do
# expect(::Algolia).to have_received(:batch!).twice
expect(::Algolia)
.to have_received(:batch!)
.ordered