fix(record_too_big): Better debug of records too big
This commit is contained in:
parent
61bb7314e4
commit
918524009d
@ -9,16 +9,21 @@ W: should not weight more than {size_limit}. One of your records weights {size}
|
||||
W: and has been rejected.
|
||||
W:
|
||||
W: Here are more information about the rejected record:
|
||||
W: {
|
||||
W: "objectID": "{object_id}",
|
||||
W: "title": "{object_title}",
|
||||
W: "url": "{object_url}",
|
||||
W: "text": "{object_hint}…",
|
||||
W: […]
|
||||
W: }
|
||||
W:
|
||||
I: This issue is sometimes caused by malformed HTML preventing the parser to
|
||||
I: correctly grab the content of the nodes.
|
||||
W: objectID: {object_id}
|
||||
W: title: {object_title}
|
||||
W: url: {object_url}
|
||||
W:
|
||||
W: Complete log of the record has been extracted to:
|
||||
W: {record_log_path}
|
||||
W:
|
||||
W: Most probable keys causing the issue:
|
||||
W: {object_hint}
|
||||
W:
|
||||
I: This issue can be caused by malformed HTML preventing the parser to correctly
|
||||
I: grab the content of the nodes. Double check that the page actually renders
|
||||
I: correctly with a regular `jekyll build`.
|
||||
I:
|
||||
I:
|
||||
I: If you're having trouble solving this issue, feel free to file a bug on
|
||||
I: GitHub, ideally with a link to a repository where we can reproduce the issue.
|
||||
|
||||
@ -18,7 +18,6 @@ module Jekyll
|
||||
# happened to the display
|
||||
def self.stop(error, context = {})
|
||||
Logger.verbose("E:[jekyll-algolia] Raw error: #{error}")
|
||||
Logger.verbose("E:[jekyll-algolia] Context: #{context}")
|
||||
|
||||
identified_error = identify(error, context)
|
||||
|
||||
@ -172,9 +171,27 @@ module Jekyll
|
||||
}
|
||||
end
|
||||
|
||||
# Public: Returns a string explaining which attributes are the largest in
|
||||
# the record
|
||||
#
|
||||
# record - The record hash to analyze
|
||||
#
|
||||
# This will be used on the `record_too_big` error, to guide users in
|
||||
# finding which record is causing trouble
|
||||
def self.readable_largest_record_keys(record)
|
||||
keys = Hash[record.map { |key, value| [key, value.to_s.length] }]
|
||||
largest_keys = keys.sort_by { |_, value| value }.reverse[0..2]
|
||||
output = []
|
||||
largest_keys.each do |key, size|
|
||||
size = Filesize.from("#{size} B").to_s('Kb')
|
||||
output << "#{key} (#{size})"
|
||||
end
|
||||
output.join(', ')
|
||||
end
|
||||
|
||||
# Public: Check if the sent records are not too big
|
||||
#
|
||||
# context[:records] - list of records to push
|
||||
# context[:records] - list of records sent in the batch
|
||||
#
|
||||
# Records cannot weight more that 10Kb. If we're getting this error it
|
||||
# means that one of the records is too big, so we'll try to give
|
||||
@ -187,17 +204,25 @@ module Jekyll
|
||||
|
||||
# Getting the record size
|
||||
size, = /.*size=(.*) bytes.*/.match(message).captures
|
||||
size = Filesize.from("#{size} B").pretty
|
||||
size = Filesize.from("#{size} B").to_s('Kb')
|
||||
object_id = details['objectID']
|
||||
|
||||
# Getting record details
|
||||
record = Utils.find_by_key(context[:records], :objectID, object_id)
|
||||
probable_wrong_keys = readable_largest_record_keys(record)
|
||||
|
||||
# Writing the full record to disk for inspection
|
||||
record_log_path = Logger.write_to_file(
|
||||
"jekyll-algolia-record-too-big-#{object_id}.log",
|
||||
JSON.pretty_generate(record)
|
||||
)
|
||||
|
||||
{
|
||||
'object_id' => object_id,
|
||||
'object_title' => record[:title],
|
||||
'object_url' => record[:url],
|
||||
'object_hint' => record[:content][0..100],
|
||||
'probable_wrong_keys' => probable_wrong_keys,
|
||||
'record_log_path' => record_log_path,
|
||||
'nodes_to_index' => Configurator.algolia('nodes_to_index'),
|
||||
'size' => size,
|
||||
'size_limit' => '10 Kb'
|
||||
|
||||
@ -151,7 +151,10 @@ module Jekyll
|
||||
begin
|
||||
::Algolia.batch!(slice)
|
||||
rescue StandardError => error
|
||||
ErrorHandler.stop(error)
|
||||
records = slice.map do |record|
|
||||
record[:body]
|
||||
end
|
||||
ErrorHandler.stop(error, records: records)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@ -52,6 +52,16 @@ module Jekyll
|
||||
log(line)
|
||||
end
|
||||
|
||||
# Public: Write the specified content to a file in the source directory
|
||||
#
|
||||
# filename - the file basename
|
||||
# content - the actual content of the file
|
||||
def self.write_to_file(filename, content)
|
||||
filepath = File.join(Configurator.get('source'), filename)
|
||||
File.write(filepath, content)
|
||||
filepath
|
||||
end
|
||||
|
||||
# Public: Displays a helpful error message for one of the knows errors
|
||||
#
|
||||
# message_id: A string identifying a know message
|
||||
|
||||
@ -94,6 +94,17 @@ describe(Jekyll::Algolia::ErrorHandler) do
|
||||
end
|
||||
end
|
||||
|
||||
describe '.readable_largest_record_keys' do
|
||||
let(:record) { { foo: foo, bar: bar, baz: baz, small: 'xxx' } }
|
||||
let(:foo) { 'x' * 1000 }
|
||||
let(:bar) { 'x' * 10_000 }
|
||||
let(:baz) { 'x' * 100_000 }
|
||||
|
||||
subject { current.readable_largest_record_keys(record) }
|
||||
|
||||
it { should eq 'baz (100.00 Kb), bar (10.00 Kb), foo (1.00 Kb)' }
|
||||
end
|
||||
|
||||
describe '.identify' do
|
||||
subject { current.identify(error, context) }
|
||||
|
||||
@ -156,7 +167,7 @@ describe(Jekyll::Algolia::ErrorHandler) do
|
||||
'400: Cannot POST to '\
|
||||
'https://MY_APP_ID.algolia.net/1/indexes/my_index/batch: '\
|
||||
'{"message":"Record at the position 3 '\
|
||||
'objectID=deadbeef is too big size=1091966 bytes. '\
|
||||
'objectID=deadbeef is too big size=109196 bytes. '\
|
||||
'Contact us if you need an extended quota","position":3,'\
|
||||
'"objectID":"deadbeef","status":400} (400)'
|
||||
end
|
||||
@ -167,28 +178,37 @@ describe(Jekyll::Algolia::ErrorHandler) do
|
||||
title: 'Page title',
|
||||
url: '/path/to/file.ext',
|
||||
# rubocop:disable Metrics/LineLength
|
||||
content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as wel so we will cut it at 100 characters.'
|
||||
content: 'A very long text that is obviously too long to fit in one record, but that would be too long to actually display in the error message as well so we will cut it at 100 characters.'
|
||||
# rubocop:enable Metrics/LineLength
|
||||
},
|
||||
{ objectID: 'foo' }
|
||||
] }
|
||||
end
|
||||
let(:record_log_path) { '/source/output.log' }
|
||||
let(:probable_wrong_keys) { 'foo, bar, baz' }
|
||||
before do
|
||||
allow(configurator)
|
||||
.to receive(:algolia)
|
||||
.with('nodes_to_index')
|
||||
.and_return('p,li,foo')
|
||||
expect(logger)
|
||||
.to receive(:write_to_file)
|
||||
.and_return(record_log_path)
|
||||
expect(current)
|
||||
.to receive(:readable_largest_record_keys)
|
||||
.and_return(probable_wrong_keys)
|
||||
end
|
||||
|
||||
it { should include(name: 'record_too_big') }
|
||||
|
||||
it do
|
||||
details = subject[:details]
|
||||
expect(details).to include('object_id' => 'deadbeef')
|
||||
expect(details).to include('object_title' => 'Page title')
|
||||
expect(details).to include('object_url' => '/path/to/file.ext')
|
||||
expect(details['object_hint']).to match(/^A very long text/)
|
||||
expect(details['object_hint']).to match(/.{0,100}/)
|
||||
expect(details).to include('size' => '1.04 MiB')
|
||||
expect(details).to include('record_log_path' => record_log_path)
|
||||
expect(details).to include('probable_wrong_keys' => probable_wrong_keys)
|
||||
expect(details).to include('size' => '109.20 Kb')
|
||||
expect(details).to include('size_limit' => '10 Kb')
|
||||
expect(details).to include('nodes_to_index' => 'p,li,foo')
|
||||
end
|
||||
|
||||
@ -93,5 +93,22 @@ describe(Jekyll::Algolia::Logger) do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '.write_to_file' do
|
||||
let(:filename) { 'output.log' }
|
||||
let(:content) { 'content' }
|
||||
let(:source) { '/source/' }
|
||||
|
||||
before do
|
||||
expect(configurator)
|
||||
.to receive(:get).with('source').and_return(source)
|
||||
expect(File).to receive(:write)
|
||||
.with('/source/output.log', content)
|
||||
end
|
||||
|
||||
subject { current.write_to_file(filename, content) }
|
||||
|
||||
it { should eq '/source/output.log' }
|
||||
end
|
||||
end
|
||||
# rubocop:enable Metrics/BlockLength
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user