Adding diff indexing mode + tests

This commit is contained in:
Pixelastic 2017-11-14 16:47:26 +01:00
parent 40740ccc6f
commit 02adfc3fc1
3 changed files with 224 additions and 12 deletions

View File

@ -9,6 +9,7 @@ module Jekyll
'extensions_to_index' => nil,
'files_to_exclude' => nil,
'nodes_to_index' => 'p',
'indexing_batch_size' => 1000,
'settings' => {
'distinct' => true,
'attributeForDistinct' => 'url',

View File

@ -5,32 +5,58 @@ module Jekyll
# Module to push records to Algolia and configure the index
module Indexer
include Jekyll::Algolia
@index = nil
# Public: Init the module
#
# This call will instanciate the Algolia API client, set the custom
# User Agent and give an easy access to the main index
def self.init
::Algolia.init(
application_id: Configurator.application_id,
api_key: Configurator.api_key
)
@index = index(Configurator.index_name)
set_user_agent
end
def self.set_user_agent; end
# Public: Returns an Algolia Index object from an index name
#
# index_name - String name of the index
def self.index(index_name)
::Algolia::Index.new(index_name)
end
# Public: Update records of the specified index
#
# index - Algolia Index to update
# records - Array of records to update
#
# New records will be automatically added. Technically existing records
# should be updated but this case should never happen as changing a record
# content will change its objectID as well.
def self.update_records(index, records)
index.add_objects!(records)
batch_size = Configurator.algolia('indexing_batch_size')
records.each_slice(batch_size) do |batch|
index.add_objects!(batch)
end
end
# Public: Delete records whose objectIDs are passed
#
# index - Algolia Index to target
# ids - Array of objectIDs to delete
def self.delete_records_by_id(index, ids)
index.delete_objects!(ids)
end
# Public: Returns an array of all the objectIDs in the index
#
# index - Algolia Index to target
#
# The returned array is sorted. It won't have any impact on the way it is
# processed, but makes debugging easier when comparing arrays is needed.
def self.remote_object_ids(index)
list = []
index.browse(attributesToRetrieve: 'objectID') do |hit|
@ -39,36 +65,53 @@ module Jekyll
list.sort
end
def self.indexing_diff(records)
# Public: Returns an array of the local objectIDs
#
# records - Array of all local records
def self.local_object_ids(records)
records.map { |record| record[:objectID] }.sort
end
# Public: Index content following the "diff" indexing mode
#
# records - Array of local records
#
# The "diff" indexing mode will only push new content to the index and
# remove old content from it. It won't touch records that haven't been
# updated. It will be a bit slower as it will first need to get the list
# of all records in the index, but it will consume less operations than
# the "atomic" indexing mode.
def self.run_diff_mode(records)
index = index(Configurator.index_name)
# Getting list of objectID in remote and locally
remote_ids = remote_object_ids(@index)
local_ids = records.map { |record| record[:objectID] }.sort
remote_ids = remote_object_ids(index)
local_ids = local_object_ids(records)
# Delete remote records that are no longer available locally
delete_records_by_id(@index, remote_ids - local_ids)
delete_records_by_id(index, remote_ids - local_ids)
# Add only records that are not yet already in the remote
new_records_ids = local_ids - remote_ids
new_records = records.select do |record|
new_records = records.select do |record|
new_records_ids.include?(record[:objectID])
end
update_records(@index, new_records)
update_records(index, new_records)
update_settings(@index, Configurator.settings)
# Update settings
update_settings(index, Configurator.settings)
end
def self.update_settings(index, settings)
index.set_settings(settings)
end
# Public: Push all records to Algolia and configure the index
#
# records - Records to push
def self.run(records)
init
indexing_diff(records)
run_diff_mode(records)
# checker = AlgoliaSearchCredentialChecker.new(@config)
# checker.assert_valid

View File

@ -0,0 +1,168 @@
# rubocop:disable Metrics/BlockLength
require 'spec_helper'
describe(Jekyll::Algolia::Indexer) do
let(:current) { Jekyll::Algolia::Indexer }
let(:configurator) { Jekyll::Algolia::Configurator }
context '.init' do
before do
allow(configurator).to receive(:application_id).and_return('app_id')
allow(configurator).to receive(:api_key).and_return('api_key')
allow(::Algolia).to receive(:init)
allow(current).to receive(:set_user_agent)
end
before { current.init }
it 'should instanciate Algolia search with application id and api_key' do
expect(::Algolia)
.to have_received(:init)
.with(hash_including(
application_id: 'app_id',
api_key: 'api_key'
))
end
it 'should set the user agent' do
expect(current).to have_received(:set_user_agent)
end
end
context '.index' do
subject { current.index(input) }
let(:input) { 'index_name' }
before do
expect(::Algolia::Index)
.to receive(:new)
.with('index_name')
.and_return('custom_index')
end
it { should eq 'custom_index' }
end
context 'update_records' do
let(:index) { double('Algolia::Index', add_objects!: nil) }
context 'with a small number of records' do
let(:records) { Array.new(10, foo: 'bar') }
before { current.update_records(index, records) }
it do
expect(index)
.to have_received(:add_objects!)
.with(records)
.once
end
end
context 'with a large number of records' do
let(:records) { Array.new(2500, foo: 'bar') }
before { current.update_records(index, records) }
it do
expect(index)
.to have_received(:add_objects!)
.exactly(3).times
end
end
context 'with a custom batch size' do
let(:records) { Array.new(2500, foo: 'bar') }
before do
allow(configurator)
.to receive(:algolia)
.with('indexing_batch_size')
.and_return(500)
end
before { current.update_records(index, records) }
it do
expect(index)
.to have_received(:add_objects!)
.exactly(5).times
end
end
end
context 'delete_records_by_id' do
let(:index) { double('Algolia::Index', delete_objects!: nil) }
let(:ids) { %w[foo bar baz] }
before { current.delete_records_by_id(index, ids) }
it do
expect(index)
.to have_received(:delete_objects!)
.with(ids)
end
end
context '.remote_object_ids' do
subject { current.remote_object_ids(index) }
let(:index) { double('Algolia::Index').as_null_object }
before do
expect(index)
.to receive(:browse)
.with(attributesToRetrieve: 'objectID')
.and_yield('objectID' => 'foo')
.and_yield('objectID' => 'bar')
end
it { should include('foo') }
it { should include('bar') }
# Should be ordered
it { should eq %w[bar foo] }
end
context '.local_object_ids' do
subject { current.local_object_ids(records) }
let(:records) { [{ objectID: 'foo' }, { objectID: 'bar' }] }
it { should include('foo') }
it { should include('bar') }
# Should be ordered
it { should eq %w[bar foo] }
end
context '.run_diff_mode' do
let(:local_records) do
[
{ objectID: 'foo' },
{ objectID: 'bar' }
]
end
let(:remote_ids) { %w[foo baz] }
before do
allow(current)
.to receive(:index)
.and_return(double('Algolia::Index', new: 'my_index'))
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
allow(current).to receive(:delete_records_by_id)
allow(current).to receive(:update_records)
allow(current).to receive(:update_settings)
allow(configurator).to receive(:settings).and_return('my_settings')
end
before { current.run_diff_mode(local_records) }
it do
expect(current)
.to have_received(:delete_records_by_id)
.with(anything, ['baz'])
expect(current)
.to have_received(:update_records)
.with(anything, [{ objectID: 'bar' }])
expect(current)
.to have_received(:update_settings)
.with(anything, 'my_settings')
end
end
context '.update_settings' do
let(:index) { double('Algolia::Index', set_settings: nil) }
let(:settings) { { 'foo' => 'bar' } }
before { current.update_settings(index, settings) }
it do
expect(index).to have_received(:set_settings).with(settings)
end
end
end