fix(record_too_big): Better debug of records too big

This commit is contained in:
Pixelastic 2018-02-12 17:42:08 +01:00
parent 61bb7314e4
commit 918524009d
6 changed files with 99 additions and 19 deletions

View File

@ -9,16 +9,21 @@ W: should not weight more than {size_limit}. One of your records weights {size}
W: and has been rejected.
W:
W: Here are more information about the rejected record:
W: {
W: "objectID": "{object_id}",
W: "title": "{object_title}",
W: "url": "{object_url}",
W: "text": "{object_hint}…",
W: […]
W: }
W:
I: This issue is sometimes caused by malformed HTML preventing the parser to
I: correctly grab the content of the nodes.
W: objectID: {object_id}
W: title: {object_title}
W: url: {object_url}
W:
W: Complete log of the record has been extracted to:
W: {record_log_path}
W:
W: Most probable keys causing the issue:
W: {object_hint}
W:
I: This issue can be caused by malformed HTML preventing the parser to correctly
I: grab the content of the nodes. Double check that the page actually renders
I: correctly with a regular `jekyll build`.
I:
I:
I: If you're having trouble solving this issue, feel free to file a bug on
I: GitHub, ideally with a link to a repository where we can reproduce the issue.

View File

@ -18,7 +18,6 @@ module Jekyll
# happened to the display
def self.stop(error, context = {})
Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
Logger.verbose("E:[jekyll-algolia] Context: #{context}")
identified_error = identify(error, context)
@ -172,9 +171,27 @@ module Jekyll
}
end
# Public: Returns a string explaining which attributes are the largest in
# the record
#
# record - The record hash to analyze
#
# This will be used on the `record_too_big` error, to guide users in
# finding which record is causing trouble
def self.readable_largest_record_keys(record)
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
output = []
largest_keys.each do |key, size|
size = Filesize.from("#{size} B").to_s('Kb')
output << "#{key} (#{size})"
end
output.join(', ')
end
# Public: Check if the sent records are not too big
#
# context[:records] - list of records to push
# context[:records] - list of records sent in the batch
#
# Records cannot weight more that 10Kb. If we're getting this error it
# means that one of the records is too big, so we'll try to give
@ -187,17 +204,25 @@ module Jekyll
# Getting the record size
size, = /.*size=(.*) bytes.*/.match(message).captures
size = Filesize.from("#{size} B").pretty
size = Filesize.from("#{size} B").to_s('Kb')
object_id = details['objectID']
# Getting record details
record = Utils.find_by_key(context[:records], :objectID, object_id)
probable_wrong_keys = readable_largest_record_keys(record)
# Writing the full record to disk for inspection
record_log_path = Logger.write_to_file(
"jekyll-algolia-record-too-big-#{object_id}.log",
JSON.pretty_generate(record)
)
{
'object_id' => object_id,
'object_title' => record[:title],
'object_url' => record[:url],
'object_hint' => record[:content][0..100],
'probable_wrong_keys' => probable_wrong_keys,
'record_log_path' => record_log_path,
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
'size' => size,
'size_limit' => '10 Kb'

View File

@ -151,7 +151,10 @@ module Jekyll
begin
::Algolia.batch!(slice)
rescue StandardError => error
ErrorHandler.stop(error)
records = slice.map do |record|
record[:body]
end
ErrorHandler.stop(error, records: records)
end
end
end

View File

@ -52,6 +52,16 @@ module Jekyll
log(line)
end
# Public: Write the specified content to a file in the source directory
#
# filename - the file basename
# content - the actual content of the file
def self.write_to_file(filename, content)
filepath = File.join(Configurator.get('source'), filename)
File.write(filepath, content)
filepath
end
# Public: Displays a helpful error message for one of the knows errors
#
# message_id: A string identifying a know message

View File

@ -94,6 +94,17 @@ describe(Jekyll::Algolia::ErrorHandler) do
end
end
describe '.readable_largest_record_keys' do
let(:record) { { foo: foo, bar: bar, baz: baz, small: 'xxx' } }
let(:foo) { 'x' * 1000 }
let(:bar) { 'x' * 10_000 }
let(:baz) { 'x' * 100_000 }
subject { current.readable_largest_record_keys(record) }
it { should eq 'baz (100.00 Kb), bar (10.00 Kb), foo (1.00 Kb)' }
end
describe '.identify' do
subject { current.identify(error, context) }
@ -156,7 +167,7 @@ describe(Jekyll::Algolia::ErrorHandler) do
'400: Cannot POST to '\
'https://MY_APP_ID.algolia.net/1/indexes/my_index/batch: '\
'{"message":"Record at the position 3 '\
'objectID=deadbeef is too big size=1091966 bytes. '\
'objectID=deadbeef is too big size=109196 bytes. '\
'Contact us if you need an extended quota","position":3,'\
'"objectID":"deadbeef","status":400} (400)'
end
@ -167,28 +178,37 @@ describe(Jekyll::Algolia::ErrorHandler) do
title: 'Page title',
url: '/path/to/file.ext',
# rubocop:disable Metrics/LineLength
content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as wel so we will cut it at 100 characters.'
content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as well so we will cut it at 100 characters.'
# rubocop:enable Metrics/LineLength
},
{ objectID: 'foo' }
] }
end
let(:record_log_path) { '/source/output.log' }
let(:probable_wrong_keys) { 'foo, bar, baz' }
before do
allow(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('p,li,foo')
expect(logger)
.to receive(:write_to_file)
.and_return(record_log_path)
expect(current)
.to receive(:readable_largest_record_keys)
.and_return(probable_wrong_keys)
end
it { should include(name: 'record_too_big') }
it do
details = subject[:details]
expect(details).to include('object_id' => 'deadbeef')
expect(details).to include('object_title' => 'Page title')
expect(details).to include('object_url' => '/path/to/file.ext')
expect(details['object_hint']).to match(/^A very long text/)
expect(details['object_hint']).to match(/.{0,100}/)
expect(details).to include('size' => '1.04 MiB')
expect(details).to include('record_log_path' => record_log_path)
expect(details).to include('probable_wrong_keys' => probable_wrong_keys)
expect(details).to include('size' => '109.20 Kb')
expect(details).to include('size_limit' => '10 Kb')
expect(details).to include('nodes_to_index' => 'p,li,foo')
end

View File

@ -93,5 +93,22 @@ describe(Jekyll::Algolia::Logger) do
end
end
end
describe '.write_to_file' do
let(:filename) { 'output.log' }
let(:content) { 'content' }
let(:source) { '/source/' }
before do
expect(configurator)
.to receive(:get).with('source').and_return(source)
expect(File).to receive(:write)
.with('/source/output.log', content)
end
subject { current.write_to_file(filename, content) }
it { should eq '/source/output.log' }
end
end
# rubocop:enable Metrics/BlockLength