2017-11-16 17:08:28 +01:00

229 lines
7.5 KiB
Ruby

require 'algoliasearch'
module Jekyll
module Algolia
# Module to push records to Algolia and configure the index
module Indexer
include Jekyll::Algolia
# Public: Init the module
#
# This call will instanciate the Algolia API client, set the custom
# User Agent and give an easy access to the main index
def self.init
::Algolia.init(
application_id: Configurator.application_id,
api_key: Configurator.api_key
)
set_user_agent
end
# Public: Set the User-Agent to send to the API
#
# Every integrations should follow the "YYY Integration" pattern, and
# every API client should follow the "Algolia for YYY" pattern. Even if
# each integration version is pinned to a specific API client version, we
# are explicit in defining it to help debug from the dashboard.
def self.set_user_agent
user_agent = [
"Jekyll Integration (#{VERSION})",
"Algolia for Ruby (#{::Algolia::VERSION})",
"Jekyll (#{::Jekyll::VERSION})",
"Ruby (#{RUBY_VERSION})"
].join('; ')
::Algolia.set_extra_header('User-Agent', user_agent)
end
# Public: Returns an Algolia Index object from an index name
#
# index_name - String name of the index
def self.index(index_name)
::Algolia::Index.new(index_name)
end
# Public: Update records of the specified index
#
# index - Algolia Index to update
# records - Array of records to update
#
# New records will be automatically added. Technically existing records
# should be updated but this case should never happen as changing a record
# content will change its objectID as well.
#
# Does nothing in dry run mode
def self.update_records(index, records)
batch_size = Configurator.algolia('indexing_batch_size')
records.each_slice(batch_size) do |batch|
Logger.log("I:Pushing #{batch.size} records")
next if Configurator.dry_run?
begin
index.add_objects!(batch)
rescue StandardError => error
ErrorHandler.stop(error, records: records)
end
end
end
# Public: Delete records whose objectIDs are passed
#
# index - Algolia Index to target
# ids - Array of objectIDs to delete
#
# Does nothing in dry run mode
def self.delete_records_by_id(index, ids)
Logger.log("I:Deleting #{ids.length} records")
return if Configurator.dry_run?
begin
index.delete_objects!(ids)
rescue StandardError => error
ErrorHandler.stop(error)
end
end
# Public: Returns an array of all the objectIDs in the index
#
# index - Algolia Index to target
#
# The returned array is sorted. It won't have any impact on the way it is
# processed, but makes debugging easier when comparing arrays is needed.
def self.remote_object_ids(index)
list = []
index.browse(attributesToRetrieve: 'objectID') do |hit|
list << hit['objectID']
end
list.sort
end
# Public: Returns an array of the local objectIDs
#
# records - Array of all local records
def self.local_object_ids(records)
records.map { |record| record[:objectID] }.sort
end
# Public: Update settings of the index
#
# index - The Algolia Index
# settings - The hash of settings to pass to the index
#
# Does nothing in dry run mode
def self.update_settings(index, settings)
Logger.verbose('I:Updating settings')
return if Configurator.dry_run?
begin
index.set_settings(settings)
rescue StandardError => error
ErrorHandler.stop(error, settings: settings)
end
end
# Public: Index content following the `diff` indexing mode
#
# records - Array of local records
#
# The `diff` indexing mode will only push new content to the index and
# remove old content from it. It won't touch records that haven't been
# updated. It will be a bit slower as it will first need to get the list
# of all records in the index, but it will consume less operations.
def self.run_diff_mode(records)
index = index(Configurator.index_name)
# Update settings
update_settings(index, Configurator.settings)
# Getting list of objectID in remote and locally
remote_ids = remote_object_ids(index)
local_ids = local_object_ids(records)
# Delete remote records that are no longer available locally
delete_records_by_id(index, remote_ids - local_ids)
# Add only records that are not yet already in the remote
new_records_ids = local_ids - remote_ids
new_records = records.select do |record|
new_records_ids.include?(record[:objectID])
end
update_records(index, new_records)
end
# Public: Get the settings of the remote index
#
# index - The Algolia Index
def self.remote_settings(index)
index.get_settings
rescue StandardError => error
ErrorHandler.stop(error)
end
# Public: Rename an index
#
# old_name - Current name of the index
# new_name - New name of the index
#
# Does nothing in dry run mode
def self.rename_index(old_name, new_name)
Logger.verbose("I:Renaming `#{old_name}` to `#{new_name}`")
return if Configurator.dry_run?
begin
::Algolia.move_index(old_name, new_name)
rescue StandardError => error
ErrorHandler.stop(error, new_name: new_name)
end
end
# Public: Index content following the `atomic` indexing mode
#
# records - Array of records to push
#
# The `atomic` indexing mode will push all records to a brand new index,
# configure it, and then overwrite the previous index with this new one.
# For the end-user, it will make all the changes in one go, making sure
# people are always searching into a fully configured index. It will
# consume more operations, but will never leave the index in a transient
# state.
def self.run_atomic_mode(records)
index_name = Configurator.index_name
index = index(index_name)
index_tmp_name = "#{Configurator.index_name}_tmp"
index_tmp = index(index_tmp_name)
Logger.verbose("I:Using `#{index_tmp_name}` as temporary index")
# Copying original settings to the new index
remote_settings = remote_settings(index)
new_settings = remote_settings.merge(Configurator.settings)
update_settings(index_tmp, new_settings)
# Pushing everthing to a brand new index
update_records(index_tmp, records)
# Renaming the new index in place of the old
rename_index(index_tmp_name, index_name)
end
# Public: Push all records to Algolia and configure the index
#
# records - Records to push
def self.run(records)
init
Logger.verbose("I:Extracted #{records.length} records")
indexing_mode = Configurator.indexing_mode
Logger.verbose("I:Indexing mode: #{indexing_mode}")
case indexing_mode
when 'diff'
run_diff_mode(records)
when 'atomic'
run_atomic_mode(records)
end
Logger.log('I:✔ Indexing complete')
end
end
end
end