fix(record_too_big): Better debug of records too big

This commit is contained in:
Pixelastic 2018-02-12 17:42:08 +01:00
parent 61bb7314e4
commit 918524009d
6 changed files with 99 additions and 19 deletions

View File

@ -9,16 +9,21 @@ W: should not weight more than {size_limit}. One of your records weights {size}
W: and has been rejected. W: and has been rejected.
W: W:
W: Here are more information about the rejected record: W: Here are more information about the rejected record:
W: {
W: "objectID": "{object_id}",
W: "title": "{object_title}",
W: "url": "{object_url}",
W: "text": "{object_hint}…",
W: […]
W: }
W: W:
I: This issue is sometimes caused by malformed HTML preventing the parser to W: objectID: {object_id}
I: correctly grab the content of the nodes. W: title: {object_title}
W: url: {object_url}
W:
W: Complete log of the record has been extracted to:
W: {record_log_path}
W:
W: Most probable keys causing the issue:
W: {object_hint}
W:
I: This issue can be caused by malformed HTML preventing the parser to correctly
I: grab the content of the nodes. Double check that the page actually renders
I: correctly with a regular `jekyll build`.
I:
I: I:
I: If you're having trouble solving this issue, feel free to file a bug on I: If you're having trouble solving this issue, feel free to file a bug on
I: GitHub, ideally with a link to a repository where we can reproduce the issue. I: GitHub, ideally with a link to a repository where we can reproduce the issue.

View File

@ -18,7 +18,6 @@ module Jekyll
# happened to the display # happened to the display
def self.stop(error, context = {}) def self.stop(error, context = {})
Logger.verbose("E:[jekyll-algolia] Raw error: #{error}") Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
Logger.verbose("E:[jekyll-algolia] Context: #{context}")
identified_error = identify(error, context) identified_error = identify(error, context)
@ -172,9 +171,27 @@ module Jekyll
} }
end end
# Public: Returns a string explaining which attributes are the largest in
# the record
#
# record - The record hash to analyze
#
# This will be used on the `record_too_big` error, to guide users in
# finding which record is causing trouble
def self.readable_largest_record_keys(record)
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
output = []
largest_keys.each do |key, size|
size = Filesize.from("#{size} B").to_s('Kb')
output << "#{key} (#{size})"
end
output.join(', ')
end
# Public: Check if the sent records are not too big # Public: Check if the sent records are not too big
# #
# context[:records] - list of records to push # context[:records] - list of records sent in the batch
# #
# Records cannot weight more that 10Kb. If we're getting this error it # Records cannot weight more that 10Kb. If we're getting this error it
# means that one of the records is too big, so we'll try to give # means that one of the records is too big, so we'll try to give
@ -187,17 +204,25 @@ module Jekyll
# Getting the record size # Getting the record size
size, = /.*size=(.*) bytes.*/.match(message).captures size, = /.*size=(.*) bytes.*/.match(message).captures
size = Filesize.from("#{size} B").pretty size = Filesize.from("#{size} B").to_s('Kb')
object_id = details['objectID'] object_id = details['objectID']
# Getting record details # Getting record details
record = Utils.find_by_key(context[:records], :objectID, object_id) record = Utils.find_by_key(context[:records], :objectID, object_id)
probable_wrong_keys = readable_largest_record_keys(record)
# Writing the full record to disk for inspection
record_log_path = Logger.write_to_file(
"jekyll-algolia-record-too-big-#{object_id}.log",
JSON.pretty_generate(record)
)
{ {
'object_id' => object_id, 'object_id' => object_id,
'object_title' => record[:title], 'object_title' => record[:title],
'object_url' => record[:url], 'object_url' => record[:url],
'object_hint' => record[:content][0..100], 'probable_wrong_keys' => probable_wrong_keys,
'record_log_path' => record_log_path,
'nodes_to_index' => Configurator.algolia('nodes_to_index'), 'nodes_to_index' => Configurator.algolia('nodes_to_index'),
'size' => size, 'size' => size,
'size_limit' => '10 Kb' 'size_limit' => '10 Kb'

View File

@ -151,7 +151,10 @@ module Jekyll
begin begin
::Algolia.batch!(slice) ::Algolia.batch!(slice)
rescue StandardError => error rescue StandardError => error
ErrorHandler.stop(error) records = slice.map do |record|
record[:body]
end
ErrorHandler.stop(error, records: records)
end end
end end
end end

View File

@ -52,6 +52,16 @@ module Jekyll
log(line) log(line)
end end
# Public: Write the specified content to a file in the source directory
#
# filename - the file basename
# content - the actual content of the file
def self.write_to_file(filename, content)
filepath = File.join(Configurator.get('source'), filename)
File.write(filepath, content)
filepath
end
# Public: Displays a helpful error message for one of the knows errors # Public: Displays a helpful error message for one of the knows errors
# #
# message_id: A string identifying a know message # message_id: A string identifying a know message

View File

@ -94,6 +94,17 @@ describe(Jekyll::Algolia::ErrorHandler) do
end end
end end
describe '.readable_largest_record_keys' do
let(:record) { { foo: foo, bar: bar, baz: baz, small: 'xxx' } }
let(:foo) { 'x' * 1000 }
let(:bar) { 'x' * 10_000 }
let(:baz) { 'x' * 100_000 }
subject { current.readable_largest_record_keys(record) }
it { should eq 'baz (100.00 Kb), bar (10.00 Kb), foo (1.00 Kb)' }
end
describe '.identify' do describe '.identify' do
subject { current.identify(error, context) } subject { current.identify(error, context) }
@ -156,7 +167,7 @@ describe(Jekyll::Algolia::ErrorHandler) do
'400: Cannot POST to '\ '400: Cannot POST to '\
'https://MY_APP_ID.algolia.net/1/indexes/my_index/batch: '\ 'https://MY_APP_ID.algolia.net/1/indexes/my_index/batch: '\
'{"message":"Record at the position 3 '\ '{"message":"Record at the position 3 '\
'objectID=deadbeef is too big size=1091966 bytes. '\ 'objectID=deadbeef is too big size=109196 bytes. '\
'Contact us if you need an extended quota","position":3,'\ 'Contact us if you need an extended quota","position":3,'\
'"objectID":"deadbeef","status":400} (400)' '"objectID":"deadbeef","status":400} (400)'
end end
@ -167,28 +178,37 @@ describe(Jekyll::Algolia::ErrorHandler) do
title: 'Page title', title: 'Page title',
url: '/path/to/file.ext', url: '/path/to/file.ext',
# rubocop:disable Metrics/LineLength # rubocop:disable Metrics/LineLength
content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as wel so we will cut it at 100 characters.' content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as well so we will cut it at 100 characters.'
# rubocop:enable Metrics/LineLength # rubocop:enable Metrics/LineLength
}, },
{ objectID: 'foo' } { objectID: 'foo' }
] } ] }
end end
let(:record_log_path) { '/source/output.log' }
let(:probable_wrong_keys) { 'foo, bar, baz' }
before do before do
allow(configurator) allow(configurator)
.to receive(:algolia) .to receive(:algolia)
.with('nodes_to_index') .with('nodes_to_index')
.and_return('p,li,foo') .and_return('p,li,foo')
expect(logger)
.to receive(:write_to_file)
.and_return(record_log_path)
expect(current)
.to receive(:readable_largest_record_keys)
.and_return(probable_wrong_keys)
end end
it { should include(name: 'record_too_big') } it { should include(name: 'record_too_big') }
it do it do
details = subject[:details] details = subject[:details]
expect(details).to include('object_id' => 'deadbeef') expect(details).to include('object_id' => 'deadbeef')
expect(details).to include('object_title' => 'Page title') expect(details).to include('object_title' => 'Page title')
expect(details).to include('object_url' => '/path/to/file.ext') expect(details).to include('object_url' => '/path/to/file.ext')
expect(details['object_hint']).to match(/^A very long text/) expect(details).to include('record_log_path' => record_log_path)
expect(details['object_hint']).to match(/.{0,100}/) expect(details).to include('probable_wrong_keys' => probable_wrong_keys)
expect(details).to include('size' => '1.04 MiB') expect(details).to include('size' => '109.20 Kb')
expect(details).to include('size_limit' => '10 Kb') expect(details).to include('size_limit' => '10 Kb')
expect(details).to include('nodes_to_index' => 'p,li,foo') expect(details).to include('nodes_to_index' => 'p,li,foo')
end end

View File

@ -93,5 +93,22 @@ describe(Jekyll::Algolia::Logger) do
end end
end end
end end
describe '.write_to_file' do
let(:filename) { 'output.log' }
let(:content) { 'content' }
let(:source) { '/source/' }
before do
expect(configurator)
.to receive(:get).with('source').and_return(source)
expect(File).to receive(:write)
.with('/source/output.log', content)
end
subject { current.write_to_file(filename, content) }
it { should eq '/source/output.log' }
end
end end
# rubocop:enable Metrics/BlockLength # rubocop:enable Metrics/BlockLength