test(indexer): Add integration tests for the dedicated index

This commit is contained in:
Pixelastic 2018-04-04 13:44:21 +02:00
parent 40c113d587
commit 66f084fe2b
9 changed files with 318 additions and 76 deletions

View File

@ -86,8 +86,10 @@ namespace 'test' do
task.pattern = [ task.pattern = [
# Check that the default build has the expected results # Check that the default build has the expected results
'spec/integration/main_spec.rb', 'spec/integration/main_spec.rb',
# Now check various config and its impact on the settings # Check various config and its impact on the settings
'spec/integration/settings_spec.rb' 'spec/integration/settings_spec.rb',
# Check that object ids are stored in dedicated index
'spec/integration/object_ids_spec.rb'
] ]
end end
# Live-reloading integration tests # Live-reloading integration tests

View File

@ -3,6 +3,9 @@ E:
E:The jekyll-algolia plugin could not connect to your application ID using the API key your provided. E:The jekyll-algolia plugin could not connect to your application ID using the API key your provided.
W: W:
W:Make sure your API key has access to your {application_id} application. W:Make sure your API key has access to your {application_id} application.
W:It should also have the rights to push to the following indices:
W:   - {index_name}
W:   - {index_object_ids_name}
I: I:
I:You can find your API key in your Algolia dashboard here: I:You can find your API key in your Algolia dashboard here:
I:   https://www.algolia.com/licensing I:   https://www.algolia.com/licensing

View File

@ -148,6 +148,11 @@ module Jekyll
ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name') ENV['ALGOLIA_INDEX_NAME'] || algolia('index_name')
end end
# Public: Return the name of the index used to store the object ids
def self.index_object_ids_name
"#{index_name}_object_ids"
end
# Public: Get the index settings # Public: Get the index settings
# #
# This will be a merge of default settings and the one defined in the # This will be a merge of default settings and the one defined in the

View File

@ -187,7 +187,9 @@ module Jekyll
end end
{ {
'application_id' => details['application_id'] 'application_id' => details['application_id'],
'index_name' => Configurator.index_name,
'index_object_ids_name' => Configurator.index_object_ids_name
} }
end end

View File

@ -4,6 +4,7 @@ require 'algoliasearch'
require 'yaml' require 'yaml'
require 'algolia_html_extractor' require 'algolia_html_extractor'
# rubocop:disable Metrics/ModuleLength
module Jekyll module Jekyll
module Algolia module Algolia
# Module to push records to Algolia and configure the index # Module to push records to Algolia and configure the index
@ -18,7 +19,8 @@ module Jekyll
) )
index_name = Configurator.index_name index_name = Configurator.index_name
@index = ::Algolia::Index.new(index_name) @index = ::Algolia::Index.new(index_name)
@index_object_ids = ::Algolia::Index.new("#{index_name}_object_ids") index_object_ids_name = Configurator.index_object_ids_name
@index_object_ids = ::Algolia::Index.new(index_object_ids_name)
set_user_agent set_user_agent
@ -62,6 +64,8 @@ module Jekyll
distinct: false, distinct: false,
hitsPerPage: 1 hitsPerPage: 1
)['nbHits'] )['nbHits']
rescue StandardError
0
end end
# Public: Set the User-Agent to send to the API # Public: Set the User-Agent to send to the API
@ -103,6 +107,8 @@ module Jekyll
list << hit['objectID'] list << hit['objectID']
progress_bar.increment progress_bar.increment
end end
rescue StandardError
return []
end end
list.sort list.sort
@ -122,6 +128,8 @@ module Jekyll
) do |hit| ) do |hit|
list += hit['content'] list += hit['content']
end end
rescue StandardError
return []
end end
list.sort list.sort
@ -165,26 +173,31 @@ module Jekyll
ids_to_delete = remote_ids - local_ids ids_to_delete = remote_ids - local_ids
ids_to_add = local_ids - remote_ids ids_to_add = local_ids - remote_ids
# What changes should we do to the indexes?
has_records_to_update = !ids_to_delete.empty? || !ids_to_add.empty?
has_dedicated_index = index_exist?(index_object_ids)
# Stop if nothing to change # Stop if nothing to change
if ids_to_delete.empty? && ids_to_add.empty? if !has_records_to_update && has_dedicated_index
Logger.log('I:Content is already up to date.') Logger.log('I:Content is already up to date.')
return return
end end
# We group all operations into one batch
operations = []
# We update records only if there are records to update
if has_records_to_update
Logger.log("I:Updating records in index #{index.name}...") Logger.log("I:Updating records in index #{index.name}...")
Logger.log("I:Records to delete: #{ids_to_delete.length}") Logger.log("I:Records to delete: #{ids_to_delete.length}")
Logger.log("I:Records to add: #{ids_to_add.length}") Logger.log("I:Records to add: #{ids_to_add.length}")
return if Configurator.dry_run?
# Transforming ids into real records to add # Transforming ids into real records to add
records_by_id = Hash[records.map { |r| [r[:objectID], r] }] records_by_id = Hash[records.map { |r| [r[:objectID], r] }]
records_to_add = ids_to_add.map { |id| records_by_id[id] } records_to_add = ids_to_add.map { |id| records_by_id[id] }
# We group all operations into one batch # Deletion operations come first, to avoid hitting an overquota too
operations = [] # soon if it can be avoided
# Deletion operations come first, to avoid hitting an overquota too soon
# if it can be avoided
ids_to_delete.each do |object_id| ids_to_delete.each do |object_id|
operations << { operations << {
action: 'deleteObject', indexName: index.name, action: 'deleteObject', indexName: index.name,
@ -195,9 +208,13 @@ module Jekyll
operations += records_to_add.map do |new_record| operations += records_to_add.map do |new_record|
{ action: 'addObject', indexName: index.name, body: new_record } { action: 'addObject', indexName: index.name, body: new_record }
end end
end
# We also clear the dedicated index holding the object ids and push the # We update the dedicated index everytime we update records, but we also
# new list of ids # create it if it does not exist
should_update_dedicated_index = has_records_to_update ||
!has_dedicated_index
if should_update_dedicated_index
operations << { action: 'clear', indexName: index_object_ids.name } operations << { action: 'clear', indexName: index_object_ids.name }
local_ids.each_slice(100).each do |ids| local_ids.each_slice(100).each do |ids|
operations << { operations << {
@ -205,6 +222,7 @@ module Jekyll
body: { content: ids } body: { content: ids }
} }
end end
end
execute_operations(operations) execute_operations(operations)
end end
@ -216,6 +234,9 @@ module Jekyll
# Note: Will split the batch in several calls if too big, and will display # Note: Will split the batch in several calls if too big, and will display
# a progress bar if this happens # a progress bar if this happens
def self.execute_operations(operations) def self.execute_operations(operations)
return if Configurator.dry_run?
return if operations.empty?
# Run the batches in slices if they are too large # Run the batches in slices if they are too large
batch_size = Configurator.algolia('indexing_batch_size') batch_size = Configurator.algolia('indexing_batch_size')
slices = operations.each_slice(batch_size).to_a slices = operations.each_slice(batch_size).to_a
@ -364,3 +385,4 @@ module Jekyll
end end
end end
end end
# rubocop:enable Metrics/ModuleLength

View File

@ -0,0 +1,87 @@
# frozen_string_literal: true
require_relative './spec_helper'
# Note: Those tests will create and delete records and indexes several time.
# rubocop:disable Metrics/BlockLength
describe('storing object ids') do
let(:logger) { Jekyll::Algolia::Logger }
let(:configurator) { Jekyll::Algolia::Configurator }
let(:indexer) { Jekyll::Algolia::Indexer.init }
let(:progress_bar) { Jekyll::Algolia::ProgressBar }
let(:progress_bar_instance) { double('ProgressBar').as_null_object }
let(:records) do
[
{ objectID: 'foo', name: 'foo' },
{ objectID: 'bar', name: 'bar' },
{ objectID: 'baz', name: 'baz' }
]
end
before do
allow(configurator)
.to receive(:algolia)
.and_call_original
allow(configurator)
.to receive(:algolia)
.with('application_id')
.and_return(ENV['ALGOLIA_APPLICATION_ID'])
allow(configurator)
.to receive(:algolia)
.with('api_key')
.and_return(ENV['ALGOLIA_API_KEY'])
allow(configurator)
.to receive(:algolia)
.with('index_name')
.and_return(ENV['ALGOLIA_INDEX_NAME'])
allow(logger).to receive(:log)
allow(progress_bar).to receive(:create).and_return(progress_bar_instance)
indexer.index.delete_index!
indexer.index_object_ids.delete_index!
end
describe 'initial push should store ids in dedicated index' do
before do
indexer.update_records(records)
@index = indexer.index_object_ids
end
it 'should create a dedicated index' do
has_dedicated_index = indexer.index_exist?(@index)
expect(has_dedicated_index).to eq true
end
it 'should contain all object ids' do
records = @index.search('')['hits']
expect(records.length).to eq 1
expect(records[0]['content']).to include('foo')
expect(records[0]['content']).to include('bar')
expect(records[0]['content']).to include('baz')
end
end
describe 'dedicated index should be created if does not exist' do
before do
indexer.update_records(records)
indexer.index_object_ids.delete_index!
indexer.update_records(records)
@index = indexer.index_object_ids
end
it 'should create a dedicated index' do
has_dedicated_index = indexer.index_exist?(@index)
expect(has_dedicated_index).to eq true
end
it 'should contain all object ids' do
records = @index.search('')['hits']
expect(records.length).to eq 1
expect(records[0]['content']).to include('foo')
expect(records[0]['content']).to include('bar')
expect(records[0]['content']).to include('baz')
end
end
end
# rubocop:enable Metrics/BlockLength

View File

@ -159,6 +159,18 @@ describe(Jekyll::Algolia::Configurator) do
end end
end end
describe '.index_object_ids_name' do
subject { current.index_object_ids_name }
before do
allow(current).to receive(:index_name).and_return('my_index')
end
describe 'should get the name from the main index' do
it { should eq 'my_index_object_ids' }
end
end
describe '.application_id' do describe '.application_id' do
subject { current.application_id } subject { current.application_id }

View File

@ -224,7 +224,17 @@ describe(Jekyll::Algolia::ErrorHandler) do
'application_id' => 'MY_APP_ID' 'application_id' => 'MY_APP_ID'
} }
end end
it { should eq('application_id' => 'MY_APP_ID') } before do
allow(configurator)
.to receive(:index_name)
.and_return('foo')
allow(configurator)
.to receive(:index_object_ids_name)
.and_return('foo_object_ids')
end
it { should include('application_id' => 'MY_APP_ID') }
it { should include('index_name' => 'foo') }
it { should include('index_object_ids_name' => 'foo_object_ids') }
end end
end end

View File

@ -69,11 +69,11 @@ describe(Jekyll::Algolia::Indexer) do
before do before do
allow(configurator) allow(configurator)
.to receive(:index_name) .to receive(:index_object_ids_name)
.and_return('index_name') .and_return('foo')
allow(::Algolia::Index) allow(::Algolia::Index)
.to receive(:new) .to receive(:new)
.with('index_name_object_ids') .with('foo')
.and_return(index_object_ids) .and_return(index_object_ids)
current.init current.init
@ -140,6 +140,7 @@ describe(Jekyll::Algolia::Indexer) do
subject { current.record_count(index) } subject { current.record_count(index) }
describe 'when index exists' do
before do before do
expect(index) expect(index)
.to receive(:search) .to receive(:search)
@ -157,6 +158,15 @@ describe(Jekyll::Algolia::Indexer) do
it { should eq 12 } it { should eq 12 }
end end
describe 'when index does not exist' do
before do
allow(index).to receive(:search).and_raise
end
it { should eq 0 }
end
end
describe '.remote_object_ids_from_main_index' do describe '.remote_object_ids_from_main_index' do
let(:index) { double('Algolia::Index').as_null_object } let(:index) { double('Algolia::Index').as_null_object }
let(:progress_bar_instance) { double('ProgressBarInstance') } let(:progress_bar_instance) { double('ProgressBarInstance') }
@ -209,6 +219,16 @@ describe(Jekyll::Algolia::Indexer) do
expect(progress_bar_instance).to have_received(:increment).twice expect(progress_bar_instance).to have_received(:increment).twice
end end
end end
context 'when no index' do
before do
allow(index)
.to receive(:browse)
.and_raise
end
it { should eq [] }
end
end end
describe '.remote_object_ids_from_dedicated_index' do describe '.remote_object_ids_from_dedicated_index' do
@ -242,6 +262,16 @@ describe(Jekyll::Algolia::Indexer) do
) )
end end
end end
context 'when no index' do
before do
allow(index)
.to receive(:browse)
.and_raise
end
it { should eq [] }
end
end end
describe '.remote_object_ids' do describe '.remote_object_ids' do
@ -295,10 +325,9 @@ describe(Jekyll::Algolia::Indexer) do
end end
describe '.update_records' do describe '.update_records' do
let(:index) { double('Algolia::Index', name: 'my_index') } let(:index) { double('Algolia::Index', name: 'main') }
let(:index_object_ids) do let(:index_object_ids) { double('Algolia::Index', name: 'dedicated') }
double('Algolia::Index', name: 'my_index_object_ids') let(:has_dedicated_index) { false }
end
let(:remote_ids) { %w[bar baz] } let(:remote_ids) { %w[bar baz] }
let(:records) do let(:records) do
[ [
@ -312,33 +341,54 @@ describe(Jekyll::Algolia::Indexer) do
allow(current).to receive(:index_object_ids).and_return(index_object_ids) allow(current).to receive(:index_object_ids).and_return(index_object_ids)
allow(current).to receive(:remote_object_ids).and_return(remote_ids) allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:execute_operations) allow(current).to receive(:execute_operations)
allow(current)
.to receive(:index_exist?)
.with(index_object_ids)
.and_return(has_dedicated_index)
end end
context 'when nothing to update' do context 'when nothing to update' do
before do
allow(current).to receive(:local_object_ids).and_return(local_ids)
current.update_records(records)
end
context 'when records to update and no dedicated index' do
let(:local_ids) { ['foo'] }
let(:remote_ids) { [] } let(:remote_ids) { [] }
before do let(:has_dedicated_index) { false }
allow(current).to receive(:local_object_ids).and_return([]) it do
expect(current)
current.update_records(records) .to have_received(:execute_operations)
end end
end
context 'when records to update and a dedicated index exist' do
let(:local_ids) { ['foo'] }
let(:remote_ids) { [] }
let(:has_dedicated_index) { true }
it do
expect(current)
.to have_received(:execute_operations)
end
end
context 'when no records to update and no dedicated index' do
let(:local_ids) { [] }
let(:remote_ids) { [] }
let(:has_dedicated_index) { false }
it do
expect(current)
.to have_received(:execute_operations)
end
end
context 'when no records to update but a dedicated index exist' do
let(:local_ids) { [] }
let(:remote_ids) { [] }
let(:has_dedicated_index) { true }
it do it do
expect(current) expect(current)
.to_not have_received(:execute_operations) .to_not have_received(:execute_operations)
end end
end end
context 'when running a dry run' do
let(:dry_run) { true }
before do
current.update_records(records)
end
it do
expect(current)
.to_not have_received(:execute_operations)
end
end end
context 'batch operations' do context 'batch operations' do
@ -349,7 +399,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should start with deleting old record' do it 'should start with deleting old record' do
expected = { expected = {
action: 'deleteObject', action: 'deleteObject',
indexName: 'my_index', indexName: 'main',
body: { objectID: 'baz' } body: { objectID: 'baz' }
} }
@ -362,7 +412,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should add new items after deleting old ones' do it 'should add new items after deleting old ones' do
expected = { expected = {
action: 'addObject', action: 'addObject',
indexName: 'my_index', indexName: 'main',
body: { objectID: 'foo', name: 'foo' } body: { objectID: 'foo', name: 'foo' }
} }
@ -375,7 +425,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should clear the object id index after updating the record' do it 'should clear the object id index after updating the record' do
expected = { expected = {
action: 'clear', action: 'clear',
indexName: 'my_index_object_ids' indexName: 'dedicated'
} }
expect(current) expect(current)
@ -387,7 +437,7 @@ describe(Jekyll::Algolia::Indexer) do
it 'should add new objectIDs to the dedicated index' do it 'should add new objectIDs to the dedicated index' do
expected = { expected = {
action: 'addObject', action: 'addObject',
indexName: 'my_index_object_ids', indexName: 'dedicated',
body: { content: %w[bar foo] } body: { content: %w[bar foo] }
} }
@ -398,6 +448,36 @@ describe(Jekyll::Algolia::Indexer) do
end end
end end
context 'when no update to the records' do
let(:local_ids) { %w[foo bar] }
let(:remote_ids) { %w[foo bar] }
before do
allow(current).to receive(:local_object_ids).and_return(local_ids)
current.update_records(records)
end
context 'do not update the dedicated index if already exist' do
let(:has_dedicated_index) { true }
it do
expect(current).to_not have_received(:execute_operations)
end
end
context 'create the dedicated index if does not yet exist' do
let(:has_dedicated_index) { false }
it do
expect(current)
.to have_received(:execute_operations) do |operations|
expect(operations[0]).to include(action: 'clear')
expect(operations[0]).to include(indexName: 'dedicated')
expect(operations[1]).to include(action: 'addObject')
expect(operations[1]).to include(body: { content: %w[foo bar] })
end
end
end
end
context 'storing ids by group of 100' do context 'storing ids by group of 100' do
let(:records) do let(:records) do
records = [] records = []
@ -413,7 +493,7 @@ describe(Jekyll::Algolia::Indexer) do
expect(current) expect(current)
.to have_received(:execute_operations) do |operations| .to have_received(:execute_operations) do |operations|
dedicated_index_operations = operations.select do |operation| dedicated_index_operations = operations.select do |operation|
operation[:indexName] == 'my_index_object_ids' && operation[:indexName] == 'dedicated' &&
operation[:action] == 'addObject' operation[:action] == 'addObject'
end end
expect(dedicated_index_operations.length).to eq 2 expect(dedicated_index_operations.length).to eq 2
@ -437,13 +517,32 @@ describe(Jekyll::Algolia::Indexer) do
.and_return(indexing_batch_size) .and_return(indexing_batch_size)
end end
context 'when running in dry run mode' do
let(:dry_run) { true }
before { current.execute_operations(operations) }
it do
expect(::Algolia).to_not have_received(:batch!)
end
end
context 'when running an empty set of operations' do
let(:operations) { [] }
before { current.execute_operations(operations) }
it do
expect(::Algolia).to_not have_received(:batch!)
end
end
context 'split in smaller batches if too many operations' do context 'split in smaller batches if too many operations' do
let(:indexing_batch_size) { 1 } let(:indexing_batch_size) { 1 }
before { current.execute_operations(operations) } before { current.execute_operations(operations) }
it do it do
# expect(::Algolia).to have_received(:batch!).twice
expect(::Algolia) expect(::Algolia)
.to have_received(:batch!) .to have_received(:batch!)
.ordered .ordered