jekyll-algolia/lib/jekyll/algolia/error_handler.rb

293 lines
9.2 KiB
Ruby

# frozen_string_literal: true
require 'verbal_expressions'
require 'filesize'
require 'cgi'
module Jekyll
module Algolia
# Catch API errors and display messages
module ErrorHandler
include Jekyll::Algolia
# Public: Stop the execution of the plugin and display if possible
# a human-readable error message
#
# error - The caught error
# context - A hash of values that will be passed from where the error
# happened to the display
def self.stop(error, context = {})
Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
identified_error = identify(error, context)
if identified_error == false
Logger.log('E:[jekyll-algolia] Error:')
Logger.log("E:#{error}")
else
Logger.known_message(
identified_error[:name],
identified_error[:details]
)
end
exit 1
end
# Public: Will identify the error and return its internal name
#
# error - The caught error
# context - A hash of additional information that can be passed from the
# code intercepting the user
#
# It will parse in order all potential known issues until it finds one
# that matches. Returns false if no match, or a hash of :name and :details
# further identifying the issue.
def self.identify(error, context = {})
known_errors = %w[
unknown_application_id
invalid_credentials
record_too_big
too_many_records
unknown_setting
invalid_index_name
]
# Checking the errors against our known list
known_errors.each do |potential_error|
error_check = send("#{potential_error}?", error, context)
next if error_check == false
return {
name: potential_error,
details: error_check
}
end
false
end
# Public: Parses an Algolia error message into a hash of its content
#
# message - The raw message as returned by the API
#
# Returns a hash of all parts of the message, to be more easily consumed
# by our error matchers
def self.error_hash(message)
message = message.delete("\n")
# Ex: Cannot PUT to https://appid.algolia.net/1/indexes/index_name/settings:
# {"message":"Invalid Application-ID or API key","status":403} (403)
regex = VerEx.new do
find 'Cannot '
capture('verb') { word }
find ' to '
capture('scheme') { word }
find '://'
capture('application_id') { word }
anything_but '/'
find '/'
capture('api_version') { digit }
find '/'
capture('api_section') { word }
find '/'
capture('index_name') do
anything_but('/')
end
find '/'
capture do
capture('api_action') { word }
maybe '?'
capture('query_parameters') do
anything_but(':')
end
end
find ': '
capture('json') do
find '{'
anything_but('}')
find '}'
end
find ' ('
capture('http_error') { word }
find ')'
end
matches = regex.match(message)
return false unless matches
# Convert matches to a hash
hash = {}
matches.names.each do |name|
hash[name] = matches[name]
end
hash['api_version'] = hash['api_version'].to_i
hash['http_error'] = hash['http_error'].to_i
# Merging the JSON key directly in the answer
hash = hash.merge(JSON.parse(hash['json']))
hash.delete('json')
# Merging the query parameters in the answer
CGI.parse(hash['query_parameters']).each do |key, values|
hash[key] = values[0]
end
hash.delete('query_parameters')
hash
end
# Public: Returns a string explaining which attributes are the largest in
# the record
#
# record - The record hash to analyze
#
# This will be used on the `record_too_big` error, to guide users in
# finding which record is causing trouble
def self.readable_largest_record_keys(record)
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
output = []
largest_keys.each do |key, size|
size = Filesize.from("#{size} B").to_s('Kb')
output << "#{key} (#{size})"
end
output.join(', ')
end
# Public: Check if the application id is available
#
# _context - Not used
#
# If the call to the cluster fails, chances are that the application ID
# is invalid. As we cannot actually contact the server, the error is raw
# and does not follow our error spec
def self.unknown_application_id?(error, _context = {})
message = error.message
return false if message !~ /^Cannot reach any host/
matches = /.*\((.*)\.algolia.net.*/.match(message)
# The API will browse on APP_ID-dsn, but push/delete on APP_ID only
# We need to catch both potential errors
app_id = matches[1].gsub(/-dsn$/, '')
{ 'application_id' => app_id }
end
# Public: Check if the credentials are working
#
# _context - Not used
#
# Application ID and API key submitted don't match any credentials known
def self.invalid_credentials?(error, _context = {})
details = error_hash(error.message)
return false if details == false
if details['message'] != 'Invalid Application-ID or API key'
return false
end
{
'application_id' => details['application_id']
}
end
# Public: Check if the sent records are not too big
#
# context[:records] - list of records sent in the batch
#
# Records cannot weight more that 10Kb. If we're getting this error it
# means that one of the records is too big, so we'll try to give
# informations about it so the user can debug it.
def self.record_too_big?(error, context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^Record .* is too big .*/
# Getting the record size
size, = /.*size=(.*) bytes.*/.match(message).captures
size = Filesize.from("#{size} B").to_s('Kb')
object_id = details['objectID']
# Finding the record in all the operations
operation = context[:operations].find do |o|
o[:action] == 'addObject' && o[:body][:objectID] == object_id
end
record = operation[:body]
probable_wrong_keys = readable_largest_record_keys(record)
# Writing the full record to disk for inspection
record_log_path = Logger.write_to_file(
"jekyll-algolia-record-too-big-#{object_id}.log",
JSON.pretty_generate(record)
)
{
'object_id' => object_id,
'object_title' => record[:title],
'object_url' => record[:url],
'probable_wrong_keys' => probable_wrong_keys,
'record_log_path' => record_log_path,
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
'size' => size,
'size_limit' => '10 Kb'
}
end
# Public: Check if one of the index settings is invalid
#
# context[:settings] - The settings passed to update the index
#
# The API will block any call that tries to update a setting value that is
# not available. We'll tell the user which one so they can fix their
# issue.
def self.unknown_setting?(error, context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^Invalid object attributes.*/
# Getting the unknown setting name
regex = /^Invalid object attributes: (.*) near line.*/
setting_name, = regex.match(message).captures
setting_value = context[:settings][setting_name]
{
'setting_name' => setting_name,
'setting_value' => setting_value
}
end
# Public: Check if the index name is invalid
#
# Some characters are forbidden in index names
def self.invalid_index_name?(error, _context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^indexName is not valid.*/
{
'index_name' => Configurator.index_name
}
end
# Public: Check if the application has too many records
#
# We're trying to push too many records and it goes over quota
def self.too_many_records?(error, _context = {})
details = error_hash(error.message)
return false if details == false
message = details['message']
return false if message !~ /^Record quota exceeded.*/
{}
end
end
end
end