diff --git a/docs-src/config.js b/docs-src/config.js index a1606bc..d4ab31b 100644 --- a/docs-src/config.js +++ b/docs-src/config.js @@ -35,7 +35,6 @@ const sidebarMenu = [ { title: 'Options', url: 'options.html' }, { title: 'Commandline', url: 'commandline.html' }, { title: 'Hooks', url: 'hooks.html' }, - { title: 'Indexing modes', url: 'indexing-modes.html' }, ], }, { @@ -48,11 +47,7 @@ const sidebarMenu = [ }, { title: 'Tutorials', - items: [ - { title: 'Blog', url: 'blog.html' }, - // { title: 'Dropdown menu', url: 'autocomplete.html' }, - // { title: 'Collection search', url: 'collections.html' }, - ], + items: [{ title: 'Blog', url: 'blog.html' }], }, ]; diff --git a/docs-src/src/getting-started.md b/docs-src/src/getting-started.md index b4766b1..4ed2f4c 100644 --- a/docs-src/src/getting-started.md +++ b/docs-src/src/getting-started.md @@ -75,7 +75,7 @@ want to keep this key secret and not commit it to your versioning system. ![jekyll algolia command example][6] -_Note that in the animation I simplified the method call to `jekyll algolia` by using an +_Note that in the animation we simplified the method call to `jekyll algolia` by using an [alternative way][7] of loading the API key and using [rubygems-bundler][8] to remove the need to add `bundle exec`._ diff --git a/docs-src/src/how-it-works.md b/docs-src/src/how-it-works.md index 7c6245c..133af98 100644 --- a/docs-src/src/how-it-works.md +++ b/docs-src/src/how-it-works.md @@ -5,6 +5,12 @@ layout: content-with-menu.pug # How does this work? +This page will give you a bit more insight about how the internals of the plugin +are working. This should give you more context to better understand the various +options you can configure. + +## Extracting data + The plugin will work like a `jekyll build` run, but instead of writing `.html` files to disk, it will push content to Algolia. It will go through each file Jekyll would have processed in a regular build: pages, posts and collections. @@ -53,16 +59,26 @@ front-matter). Specific data is the paragraph content, and information about its position in the page (where its situated in the hierarchy of headings in the page). -Once displayed, results are grouped so only the best matching paragraph of each -page is returned for a specific query. This greatly improves the perceived -relevance of the search results. +Using the [distinct setting][1] of the Algolia API, only the best matching +paragraph of each page is returned for a specific query. This greatly improves +the perceived relevance of the search results as you can highlight specifically +the part that was matching. -Because the plugin is splitting each page into smaller chunks, it can be hard to get -an estimate of how many records will actually be pushed. The plugin tries to be -smart and consume as less operations as possible, but you can always run it in -`--dry-run` mode to better understand what it would do. +## Pushing data -![jekyll algolia dry run example][1] +The plugin tries to be smart by using as less operations as possible, to be +mindful of your Algolia quota. Whenever you run `jekyll algolia`, only records +that changed since your last push will be updated. -[1]: ./assets/images/dry-run.gif +This is made possible because each record is attributed a unique `objectID`, +computed as a hash of the actual content of the record. Whenever the content of +the record changes, its `objectID` will change as well. This allows us to compare +what is current available in your index and what is about to be pushed, to only +update what actually changed. +Previous outdated records will be deleted, and new updated records will be added +instead. All those operations are grouped into a batch call, making sure that +the changes are done atomically: your index will never be in an inconsistent +state where records are only partially updated. + +[1]: https://www.algolia.com/doc/guides/ranking/distinct/?language=ruby#distinct-to-index-large-records diff --git a/docs-src/src/indexing-modes.md b/docs-src/src/indexing-modes.md deleted file mode 100644 index 320a625..0000000 --- a/docs-src/src/indexing-modes.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: Indexing modes -layout: content-with-menu.pug ---- - -# Indexing modes - -Algolia's pricing model is based on the number of records you have in your index -as well as the number of add/edit/delete operations you operate on your index -per month. - -By default, the plugin tries to be mindful of your quota and act in a smart way -by default: only updating records that changed between two runs. - -It does so by attributing a unique `objectID` to each record, generated from the -actual content of this record. If the content changes, then the `objectID` will -change as well. - -Because of this mechanism, the plugin can know which records changed between two -runs and will delete the records that are no longer needed and push the new ones -instead. Doing so only consumes a small number of operations (instead of pushing -everything each time). - -When using the default `indexing_mode` value (`diff`), all those changes are -batched into one call to the API. They will be executed atomically (the index -will be updated with all the changes in one go, instead of one record at -a time). This allow users of the website to always search into the most -up-to-date version of the data. - -This should work for 99% of the use-cases and you shouldn't need to change the -value of the `indexing_mode`. - - -## `diff` (default) - -Using the default `diff` mode, the plugin will try to be smart when pushing -content to your index: it will only add/edit/delete what changed. All -records that didn't change will stay untouched. - -To do so, it first grabs the list of all records in your index, then compares -them with the records generated locally. It then deletes the old records that no -longer exists, and add the newly created ones. - -There is no notion of "updating" a record here because as soon as the content of -a record changes, it will be considered as a new record (thus, the old version -will be deleted and the new one will be added). - -### Cons - -All operations will be done on the same index, sequentially. Old records will -first be discarded, then new ones will be added. Users doing a search on your -website during the update will have inconsistent or incomplete results. - -## `atomic` - -The `atomic` mode solves the inconsistency issue of the `diff` mode. Instead of -doing all changes in sequence on the same index, the updates will be done on -a temporary index in the background. - -The plugin will start by making a copy of the existing data, and will then apply -the `diff` method to it: it will remove old records and add new ones to this -index. While those changes are applied, your current index is still serving -search queries by your users. Once all changes are applied, the plugin will -replace the current public index with the temporary one, all in one atomic move. - -### Cons - -As this method will need to create a copy of your current index during indexing, -it means you will need an Algolia plan that can hold at least **twice** the -number of records. diff --git a/docs-src/src/migration-guide.md b/docs-src/src/migration-guide.md index dded5d5..f17e2a9 100644 --- a/docs-src/src/migration-guide.md +++ b/docs-src/src/migration-guide.md @@ -46,17 +46,16 @@ been changed: [extensions_to_index][3]. Note that for the last one, it now expects a comma-separated list of extensions. -The `lazy_update` option has renamed to [indexing_mode][4]. The default indexing -mode ([diff][5]), is equivalent to `lazy_update: true`. This means that by -default the plugin will now be smart enough to only update records that actually -changed since the last run. You can still get the old behavior of re-pushing -everything every time by using the [atomic][6] indexing mode. +The `lazy_update` option does not exist anymore. The new indexing mode is +equal to `lazy_update: true`. Only records that changed between the current +build and the previous one will be updated, and it will even be done in an +atomic way (all in one go). ## Hooks All three hooks (`custom_hook_excluded_file?`, `custom_hook_each` and `custom_hook_all`) are still here, but they have been renamed to -[should_be_excluded?][7], [before_indexing_each][8] and [before_indexing_all][9]. +[should_be_excluded?][4], [before_indexing_each][5] and [before_indexing_all][6]. They all have the same behavior and expect the same arguments as before, but should now extend the `Jekyll::Algolia::Hooks` module. It means that the file @@ -72,7 +71,7 @@ module Jekyll end ``` -You can find the complete documentation on the [dedicated page][10]. +You can find the complete documentation on the [dedicated page][7]. ## Records @@ -114,17 +113,14 @@ Here is an example of a record extracted by the plugin: ## Need more help? If you need more help migrating from the previous plugin to this new version, -you can [file an issue][11] on the GitHub repo and we'll do our best to help you. +you can [file an issue][8] on the GitHub repo and we'll do our best to help you. [1]: ./options.html#files-to-exclude [2]: ./options.html#nodes-to-index [3]: ./options.html#extensions-to-index -[4]: ./options.html#indexing-mode -[5]: ./indexing-modes.html#diff-default -[6]: ./indexing-modes.html#atomic -[7]: ./hooks.html#should-be-excluded -[8]: ./hooks.html#before-indexing-each -[9]: ./hooks.html#before-indexing-all -[10]: ./hooks.html -[11]: https://github.com/algolia/jekyll-algolia/issues +[4]: ./hooks.html#should-be-excluded +[5]: ./hooks.html#before-indexing-each +[6]: ./hooks.html#before-indexing-all +[7]: ./hooks.html +[8]: https://github.com/algolia/jekyll-algolia/issues diff --git a/docs-src/src/options.md b/docs-src/src/options.md index b7f714f..1fd6f8b 100644 --- a/docs-src/src/options.md +++ b/docs-src/src/options.md @@ -62,27 +62,6 @@ algolia: _Note that some files (pagination pages, static assets, etc) will **always** be excluded and you don't have to specify them._ -## `indexing_batch_size` - -The Algolia API allows you to send batches of changes to add or update several -records at once, instead of doing one HTTP call per record. The plugin will -batch updates by groups of 1000 records by default. - -If you are on an unstable internet connection, you might want to decrease the -value. You will send more batches, but each will be smaller in size. - -```yml -algolia: - # Send fewer records per batch - indexing_batch_size: 500 -``` - -## `indexing_mode` - -This option will let you choose the strategy used to sync your data with your -Algolia index. The default value should work for most cases, but feel free to -[read the pros and cons][4] of each and pick the one best suited for your needs. - ## `nodes_to_index` This options defines how each page is split into chunks. It expects @@ -108,7 +87,7 @@ This option let you pass specific settings to your Algolia index. By default the plugin will configure your Algolia index with settings tailored to the format of the extracted records. You are of course free to overwrite them or configure them as best suits your needs. Every option passed to the -`settings` entry will be set as [settings to your index][5]. +`settings` entry will be set as [settings to your index][4]. For example if you want to change the HTML tag used for the highlighting, you can overwrite it like this: @@ -120,9 +99,26 @@ algolia: highlightPostTag: '' ``` +## `indexing_batch_size` + +This option defines the number of operations that will be grouped as part of one +updating batch. All operations of one batch are applied atomically. The default +value is `1000`. + +You might want to increase this value if you are doing a lot of updates on each +run and still want to have your changes done atomically. + +You might want to decrease this value if you're using an unstable internet +connection. Smaller batches are easier to send that large ones. + +```yml +algolia: + # Send fewer records per batch + indexing_batch_size: 500 +``` + [1]: ./how-it-works.html [2]: http://www.methods.co.nz/asciidoc/ [3]: https://github.com/textile -[4]: ./indexing-modes.html -[5]: https://www.algolia.com/doc/api-reference/api-methods/set-settings/?language=ruby#set-settings +[4]: https://www.algolia.com/doc/api-reference/api-methods/set-settings/?language=ruby#set-settings