Adding diff indexing mode + tests
This commit is contained in:
parent
40740ccc6f
commit
02adfc3fc1
@ -9,6 +9,7 @@ module Jekyll
|
||||
'extensions_to_index' => nil,
|
||||
'files_to_exclude' => nil,
|
||||
'nodes_to_index' => 'p',
|
||||
'indexing_batch_size' => 1000,
|
||||
'settings' => {
|
||||
'distinct' => true,
|
||||
'attributeForDistinct' => 'url',
|
||||
|
||||
@ -5,32 +5,58 @@ module Jekyll
|
||||
# Module to push records to Algolia and configure the index
|
||||
module Indexer
|
||||
include Jekyll::Algolia
|
||||
@index = nil
|
||||
|
||||
# Public: Init the module
|
||||
#
|
||||
# This call will instanciate the Algolia API client, set the custom
|
||||
# User Agent and give an easy access to the main index
|
||||
def self.init
|
||||
::Algolia.init(
|
||||
application_id: Configurator.application_id,
|
||||
api_key: Configurator.api_key
|
||||
)
|
||||
@index = index(Configurator.index_name)
|
||||
|
||||
set_user_agent
|
||||
end
|
||||
|
||||
def self.set_user_agent; end
|
||||
|
||||
# Public: Returns an Algolia Index object from an index name
|
||||
#
|
||||
# index_name - String name of the index
|
||||
def self.index(index_name)
|
||||
::Algolia::Index.new(index_name)
|
||||
end
|
||||
|
||||
# Public: Update records of the specified index
|
||||
#
|
||||
# index - Algolia Index to update
|
||||
# records - Array of records to update
|
||||
#
|
||||
# New records will be automatically added. Technically existing records
|
||||
# should be updated but this case should never happen as changing a record
|
||||
# content will change its objectID as well.
|
||||
def self.update_records(index, records)
|
||||
index.add_objects!(records)
|
||||
batch_size = Configurator.algolia('indexing_batch_size')
|
||||
records.each_slice(batch_size) do |batch|
|
||||
index.add_objects!(batch)
|
||||
end
|
||||
end
|
||||
|
||||
# Public: Delete records whose objectIDs are passed
|
||||
#
|
||||
# index - Algolia Index to target
|
||||
# ids - Array of objectIDs to delete
|
||||
def self.delete_records_by_id(index, ids)
|
||||
index.delete_objects!(ids)
|
||||
end
|
||||
|
||||
# Public: Returns an array of all the objectIDs in the index
|
||||
#
|
||||
# index - Algolia Index to target
|
||||
#
|
||||
# The returned array is sorted. It won't have any impact on the way it is
|
||||
# processed, but makes debugging easier when comparing arrays is needed.
|
||||
def self.remote_object_ids(index)
|
||||
list = []
|
||||
index.browse(attributesToRetrieve: 'objectID') do |hit|
|
||||
@ -39,36 +65,53 @@ module Jekyll
|
||||
list.sort
|
||||
end
|
||||
|
||||
def self.indexing_diff(records)
|
||||
# Public: Returns an array of the local objectIDs
|
||||
#
|
||||
# records - Array of all local records
|
||||
def self.local_object_ids(records)
|
||||
records.map { |record| record[:objectID] }.sort
|
||||
end
|
||||
|
||||
# Public: Index content following the "diff" indexing mode
|
||||
#
|
||||
# records - Array of local records
|
||||
#
|
||||
# The "diff" indexing mode will only push new content to the index and
|
||||
# remove old content from it. It won't touch records that haven't been
|
||||
# updated. It will be a bit slower as it will first need to get the list
|
||||
# of all records in the index, but it will consume less operations than
|
||||
# the "atomic" indexing mode.
|
||||
def self.run_diff_mode(records)
|
||||
index = index(Configurator.index_name)
|
||||
# Getting list of objectID in remote and locally
|
||||
remote_ids = remote_object_ids(@index)
|
||||
local_ids = records.map { |record| record[:objectID] }.sort
|
||||
remote_ids = remote_object_ids(index)
|
||||
local_ids = local_object_ids(records)
|
||||
|
||||
# Delete remote records that are no longer available locally
|
||||
delete_records_by_id(@index, remote_ids - local_ids)
|
||||
delete_records_by_id(index, remote_ids - local_ids)
|
||||
|
||||
# Add only records that are not yet already in the remote
|
||||
new_records_ids = local_ids - remote_ids
|
||||
new_records = records.select do |record|
|
||||
new_records = records.select do |record|
|
||||
new_records_ids.include?(record[:objectID])
|
||||
end
|
||||
update_records(@index, new_records)
|
||||
update_records(index, new_records)
|
||||
|
||||
update_settings(@index, Configurator.settings)
|
||||
# Update settings
|
||||
update_settings(index, Configurator.settings)
|
||||
end
|
||||
|
||||
def self.update_settings(index, settings)
|
||||
index.set_settings(settings)
|
||||
end
|
||||
|
||||
|
||||
# Public: Push all records to Algolia and configure the index
|
||||
#
|
||||
# records - Records to push
|
||||
def self.run(records)
|
||||
init
|
||||
|
||||
indexing_diff(records)
|
||||
run_diff_mode(records)
|
||||
|
||||
# checker = AlgoliaSearchCredentialChecker.new(@config)
|
||||
# checker.assert_valid
|
||||
|
||||
168
spec/jekyll/algolia/indexer_spec.rb
Normal file
168
spec/jekyll/algolia/indexer_spec.rb
Normal file
@ -0,0 +1,168 @@
|
||||
# rubocop:disable Metrics/BlockLength
|
||||
require 'spec_helper'
|
||||
|
||||
describe(Jekyll::Algolia::Indexer) do
|
||||
let(:current) { Jekyll::Algolia::Indexer }
|
||||
let(:configurator) { Jekyll::Algolia::Configurator }
|
||||
|
||||
context '.init' do
|
||||
before do
|
||||
allow(configurator).to receive(:application_id).and_return('app_id')
|
||||
allow(configurator).to receive(:api_key).and_return('api_key')
|
||||
allow(::Algolia).to receive(:init)
|
||||
allow(current).to receive(:set_user_agent)
|
||||
end
|
||||
|
||||
before { current.init }
|
||||
|
||||
it 'should instanciate Algolia search with application id and api_key' do
|
||||
expect(::Algolia)
|
||||
.to have_received(:init)
|
||||
.with(hash_including(
|
||||
application_id: 'app_id',
|
||||
api_key: 'api_key'
|
||||
))
|
||||
end
|
||||
it 'should set the user agent' do
|
||||
expect(current).to have_received(:set_user_agent)
|
||||
end
|
||||
end
|
||||
|
||||
context '.index' do
|
||||
subject { current.index(input) }
|
||||
|
||||
let(:input) { 'index_name' }
|
||||
before do
|
||||
expect(::Algolia::Index)
|
||||
.to receive(:new)
|
||||
.with('index_name')
|
||||
.and_return('custom_index')
|
||||
end
|
||||
|
||||
it { should eq 'custom_index' }
|
||||
end
|
||||
|
||||
context 'update_records' do
|
||||
let(:index) { double('Algolia::Index', add_objects!: nil) }
|
||||
|
||||
context 'with a small number of records' do
|
||||
let(:records) { Array.new(10, foo: 'bar') }
|
||||
before { current.update_records(index, records) }
|
||||
it do
|
||||
expect(index)
|
||||
.to have_received(:add_objects!)
|
||||
.with(records)
|
||||
.once
|
||||
end
|
||||
end
|
||||
context 'with a large number of records' do
|
||||
let(:records) { Array.new(2500, foo: 'bar') }
|
||||
before { current.update_records(index, records) }
|
||||
it do
|
||||
expect(index)
|
||||
.to have_received(:add_objects!)
|
||||
.exactly(3).times
|
||||
end
|
||||
end
|
||||
context 'with a custom batch size' do
|
||||
let(:records) { Array.new(2500, foo: 'bar') }
|
||||
before do
|
||||
allow(configurator)
|
||||
.to receive(:algolia)
|
||||
.with('indexing_batch_size')
|
||||
.and_return(500)
|
||||
end
|
||||
before { current.update_records(index, records) }
|
||||
it do
|
||||
expect(index)
|
||||
.to have_received(:add_objects!)
|
||||
.exactly(5).times
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'delete_records_by_id' do
|
||||
let(:index) { double('Algolia::Index', delete_objects!: nil) }
|
||||
let(:ids) { %w[foo bar baz] }
|
||||
before { current.delete_records_by_id(index, ids) }
|
||||
it do
|
||||
expect(index)
|
||||
.to have_received(:delete_objects!)
|
||||
.with(ids)
|
||||
end
|
||||
end
|
||||
|
||||
context '.remote_object_ids' do
|
||||
subject { current.remote_object_ids(index) }
|
||||
|
||||
let(:index) { double('Algolia::Index').as_null_object }
|
||||
|
||||
before do
|
||||
expect(index)
|
||||
.to receive(:browse)
|
||||
.with(attributesToRetrieve: 'objectID')
|
||||
.and_yield('objectID' => 'foo')
|
||||
.and_yield('objectID' => 'bar')
|
||||
end
|
||||
|
||||
it { should include('foo') }
|
||||
it { should include('bar') }
|
||||
# Should be ordered
|
||||
it { should eq %w[bar foo] }
|
||||
end
|
||||
|
||||
context '.local_object_ids' do
|
||||
subject { current.local_object_ids(records) }
|
||||
|
||||
let(:records) { [{ objectID: 'foo' }, { objectID: 'bar' }] }
|
||||
|
||||
it { should include('foo') }
|
||||
it { should include('bar') }
|
||||
# Should be ordered
|
||||
it { should eq %w[bar foo] }
|
||||
end
|
||||
|
||||
context '.run_diff_mode' do
|
||||
let(:local_records) do
|
||||
[
|
||||
{ objectID: 'foo' },
|
||||
{ objectID: 'bar' }
|
||||
]
|
||||
end
|
||||
let(:remote_ids) { %w[foo baz] }
|
||||
before do
|
||||
allow(current)
|
||||
.to receive(:index)
|
||||
.and_return(double('Algolia::Index', new: 'my_index'))
|
||||
allow(current).to receive(:remote_object_ids).and_return(remote_ids)
|
||||
allow(current).to receive(:delete_records_by_id)
|
||||
allow(current).to receive(:update_records)
|
||||
allow(current).to receive(:update_settings)
|
||||
allow(configurator).to receive(:settings).and_return('my_settings')
|
||||
end
|
||||
|
||||
before { current.run_diff_mode(local_records) }
|
||||
|
||||
it do
|
||||
expect(current)
|
||||
.to have_received(:delete_records_by_id)
|
||||
.with(anything, ['baz'])
|
||||
expect(current)
|
||||
.to have_received(:update_records)
|
||||
.with(anything, [{ objectID: 'bar' }])
|
||||
expect(current)
|
||||
.to have_received(:update_settings)
|
||||
.with(anything, 'my_settings')
|
||||
end
|
||||
end
|
||||
|
||||
context '.update_settings' do
|
||||
let(:index) { double('Algolia::Index', set_settings: nil) }
|
||||
let(:settings) { { 'foo' => 'bar' } }
|
||||
before { current.update_settings(index, settings) }
|
||||
|
||||
it do
|
||||
expect(index).to have_received(:set_settings).with(settings)
|
||||
end
|
||||
end
|
||||
end
|
||||
Loading…
x
Reference in New Issue
Block a user