Compare commits

..

No commits in common. "caa5bb43c7071cb7946b1a42467f4ba0d02ecbed" and "880583ab825f0c0415ea9efb23d5ffe944ae7e83" have entirely different histories.

10 changed files with 35 additions and 372 deletions

View File

@ -37,11 +37,7 @@ group :test do
platform :mri do
if RUBY_VERSION >= "2.3.0"
gem "google-protobuf", "< 3.19.2" if RUBY_VERSION < "2.5.0"
if RUBY_VERSION <= "2.6.0"
gem "grpc", "< 1.49.0"
else
gem "grpc"
end
gem "grpc"
gem "logging"
gem "marcel", require: false
gem "mimemagic", require: false

View File

@ -1,12 +0,0 @@
# 0.21.1
## Bugfixes
* fix: protecting tcp connect phase against low-level syscall errors
* such as network unreachable, which can happen if connectivity is lost meanwhile.
* native resolver: fix for nameserver switch not happening in case of DNS timeout.
* when more than a nameserver was advertised by the system.
## Chore
* Removing usage of deprecated `Random::DEFAULT.rand` (using `Random.rand` instead)-

View File

@ -13,14 +13,14 @@ module HTTPX
**Resolv::DNS::Config.default_config_hash,
packet_size: 512,
timeouts: Resolver::RESOLVE_TIMEOUT,
}
}.freeze
else
{
nameserver: nil,
**Resolv::DNS::Config.default_config_hash,
packet_size: 512,
timeouts: Resolver::RESOLVE_TIMEOUT,
}
}.freeze
end
# nameservers for ipv6 are misconfigured in certain systems;
@ -35,8 +35,6 @@ module HTTPX
end
end if DEFAULTS[:nameserver]
DEFAULTS.freeze
DNS_PORT = 53
def_delegator :@connections, :empty?
@ -154,21 +152,10 @@ module HTTPX
host = connection.origin.host
timeout = (@timeouts[host][0] -= loop_time)
return unless timeout <= 0
return unless timeout.negative?
@timeouts[host].shift
if !@timeouts[host].empty?
log { "resolver: timeout after #{timeout}s, retry(#{@timeouts[host].first}) #{host}..." }
resolve(connection)
elsif @ns_index + 1 < @nameserver.size
# try on the next nameserver
@ns_index += 1
log { "resolver: failed resolving #{host} on nameserver #{@nameserver[@ns_index - 1]} (timeout error)" }
transition(:idle)
resolve(connection)
else
if @timeouts[host].empty?
@timeouts.delete(host)
@queries.delete(h)
@ -178,6 +165,9 @@ module HTTPX
# This loop_time passed to the exception is bogus. Ideally we would pass the total
# resolve timeout, including from the previous retries.
raise ResolveTimeoutError.new(loop_time, "Timed out while resolving #{connection.origin.host}")
else
log { "resolver: timeout after #{timeout}s, retry(#{@timeouts[host].first}) #{host}..." }
resolve(connection)
end
end

View File

@ -1,5 +1,5 @@
# frozen_string_literal: true
module HTTPX
VERSION = "0.21.1"
VERSION = "0.21.0"
end

View File

@ -99,21 +99,17 @@ module ResponseHelpers
File.join("test", "support", "fixtures", fixture_file_name)
end
def start_test_servlet(servlet_class, *args)
server = servlet_class.new(*args)
def start_test_servlet(servlet_class)
server = servlet_class.new
th = Thread.new { server.start }
begin
yield server
ensure
if server.respond_to?(:shutdown)
server.shutdown
server.shutdown
begin
Timeout.timeout(3) { th.join }
rescue Timeout::Error
th.kill
end
else
begin
Timeout.timeout(3) { th.join }
rescue Timeout::Error
th.kill
end
end

View File

@ -6,54 +6,54 @@ module Requests
native: { cache: false },
system: { cache: false },
https: { uri: ENV["HTTPX_RESOLVER_URI"], cache: false },
}.each do |resolver_type, options|
define_method :"test_resolver_#{resolver_type}_multiple_errors" do
}.each do |resolver, options|
define_method :"test_resolver_#{resolver}_multiple_errors" do
2.times do |i|
session = HTTPX.plugin(SessionWithPool)
unknown_uri = "http://www.sfjewjfwigiewpgwwg-native-#{i}.com"
response = session.get(unknown_uri, resolver_class: resolver_type, resolver_options: options)
response = session.get(unknown_uri, resolver_class: resolver, resolver_options: options)
verify_error_response(response, HTTPX::ResolveError)
end
end
define_method :"test_resolver_#{resolver_type}_request" do
define_method :"test_resolver_#{resolver}_request" do
session = HTTPX.plugin(SessionWithPool)
uri = build_uri("/get")
response = session.head(uri, resolver_class: resolver_type, resolver_options: options)
response = session.head(uri, resolver_class: resolver, resolver_options: options)
verify_status(response, 200)
response.close
end
define_method :"test_resolver_#{resolver_type}_alias_request" do
define_method :"test_resolver_#{resolver}_alias_request" do
session = HTTPX.plugin(SessionWithPool)
uri = URI(build_uri("/get"))
# this google host will resolve to a CNAME
uri.host = "lh3.googleusercontent.com"
response = session.head(uri, resolver_class: resolver_type, resolver_options: options)
response = session.head(uri, resolver_class: resolver, resolver_options: options)
assert !response.is_a?(HTTPX::ErrorResponse), "response was an error (#{response})"
assert response.status < 500, "unexpected HTTP error (#{response})"
response.close
end
case resolver_type
case resolver
when :https
define_method :"test_resolver_#{resolver_type}_get_request" do
define_method :"test_resolver_#{resolver}_get_request" do
session = HTTPX.plugin(SessionWithPool)
uri = build_uri("/get")
response = session.head(uri, resolver_class: resolver_type, resolver_options: options.merge(use_get: true))
response = session.head(uri, resolver_class: resolver, resolver_options: options.merge(use_get: true))
verify_status(response, 200)
response.close
end
define_method :"test_resolver_#{resolver_type}_unresolvable_servername" do
define_method :"test_resolver_#{resolver}_unresolvable_servername" do
session = HTTPX.plugin(SessionWithPool)
uri = build_uri("/get")
response = session.head(uri, resolver_class: resolver_type, resolver_options: options.merge(uri: "https://unexisting-doh/dns-query"))
response = session.head(uri, resolver_class: resolver, resolver_options: options.merge(uri: "https://unexisting-doh/dns-query"))
verify_error_response(response, HTTPX::ResolveError)
end
define_method :"test_resolver_#{resolver_type}_server_error" do
define_method :"test_resolver_#{resolver}_server_error" do
session = HTTPX.plugin(SessionWithPool)
uri = URI(build_uri("/get"))
resolver_class = Class.new(HTTPX::Resolver::HTTPS) do
@ -65,7 +65,7 @@ module Requests
verify_error_response(response, HTTPX::ResolveError)
end
define_method :"test_resolver_#{resolver_type}_decoding_error" do
define_method :"test_resolver_#{resolver}_decoding_error" do
session = HTTPX.plugin(SessionWithPool)
uri = URI(build_uri("/get"))
resolver_class = Class.new(HTTPX::Resolver::HTTPS) do
@ -79,7 +79,7 @@ module Requests
when :native
# this test mocks an unresponsive DNS server which doesn't return a DNS asnwer back.
define_method :"test_resolver_#{resolver_type}_timeout" do
define_method :"test_resolver_#{resolver}_timeout" do
session = HTTPX.plugin(SessionWithPool)
uri = URI(build_uri("/get"))
# absolute URL, just to shorten the impact of resolv.conf search.
@ -104,36 +104,16 @@ module Requests
end
# this test mocks the case where there's no nameserver set to send the DNS queries to.
define_method :"test_resolver_#{resolver_type}_no_nameserver" do
define_method :"test_resolver_#{resolver}_no_nameserver" do
session = HTTPX.plugin(SessionWithPool)
uri = build_uri("/get")
response = session.head(uri, resolver_class: resolver_type, resolver_options: options.merge(nameserver: nil))
response = session.head(uri, resolver_class: resolver, resolver_options: options.merge(nameserver: nil))
verify_error_response(response, HTTPX::ResolveError)
end
define_method :"test_resolver_#{resolver_type}_slow_nameserver" do
start_test_servlet(SlowDNSServer, 6) do |slow_dns_server|
start_test_servlet(SlowDNSServer, 1) do |not_so_slow_dns_server|
nameservers = [slow_dns_server.nameserver, not_so_slow_dns_server.nameserver]
resolver_opts = options.merge(nameserver: nameservers, timeouts: [3])
HTTPX.plugin(SessionWithPool).wrap do |session|
uri = build_uri("/get")
response = session.get(uri, resolver_class: resolver_type, resolver_options: resolver_opts)
verify_status(response, 200)
resolver = session.pool.resolver.resolvers[0]
assert resolver.instance_variable_get(:@ns_index) == 1
end
end
end
end
# this test mocks a DNS server invalid messages back
define_method :"test_resolver_#{resolver_type}_decoding_error" do
define_method :"test_resolver_#{resolver}_decoding_error" do
session = HTTPX.plugin(SessionWithPool)
uri = URI(build_uri("/get"))
resolver_class = Class.new(HTTPX::Resolver::Native) do
@ -146,7 +126,7 @@ module Requests
end
# this test mocks a DNS server breaking the socket with Errno::EHOSTUNREACH
define_method :"test_resolver_#{resolver_type}_unreachable" do
define_method :"test_resolver_#{resolver}_unreachable" do
session = HTTPX.plugin(SessionWithPool)
uri = URI(build_uri("/get"))
resolver_class = Class.new(HTTPX::Resolver::Native) do

View File

@ -1,98 +0,0 @@
# frozen_string_literal: true
require "resolv"
require "socket"
# from https://gist.github.com/peterc/1425383
class SlowDNSServer
attr_reader :queries, :answers
def initialize(timeout)
@port = next_available_port
@can_log = ENV.key?("HTTPX_DEBUG")
@timeout = timeout
@queries = 0
@answers = 0
end
def nameserver
["127.0.0.1", @port]
end
def start
Socket.udp_server_loop(@port) do |query, src|
@queries += 1
sleep(@timeout)
src.reply(dns_response(query))
@answers += 1
end
end
private
def extract_domain(data)
domain = +""
# Check "Opcode" of question header for valid question
if (data[2].ord & 120).zero?
# Read QNAME section of question section
# DNS header section is 12 bytes long, so data starts at offset 12
idx = 12
len = data[idx].ord
# Strings are rendered as a byte containing length, then text.. repeat until length of 0
until len.zero?
domain << "#{data[idx + 1, len]}."
idx += len + 1
len = data[idx].ord
end
end
domain
end
def dns_response(query)
domain = extract_domain(query)
ip = Resolv.getaddress(domain)
cname = ip =~ /[a-z]/
# Valid response header
response = "#{query[0, 2]}\x81\x00#{query[4, 2] * 2}\x00\x00\x00\x00".b
# Append original question section
response << query[12..-1].b
# Use pointer to refer to domain name in question section
response << "\xc0\x0c".b
# Set response type accordingly
response << (cname ? "\x00\x05".b : "\x00\x01".b)
# Set response class (IN)
response << "\x00\x01".b
# TTL in seconds
response << [120].pack("N").b
# Calculate RDATA - we need its length in advance
rdata = if cname
ip.split(".").map { |a| a.length.chr + a }.join << "\x00"
else
# Append IP address as four 8 bit unsigned bytes
ip.split(".").map(&:to_i).pack("C*")
end
# RDATA is 4 bytes
response << [rdata.length].pack("n").b
response << rdata.b
response
end
def next_available_port
udp = UDPSocket.new
udp.bind("127.0.0.1", 0)
udp.addr[1]
ensure
udp.close
end
end

View File

@ -2,7 +2,7 @@
module SessionWithPool
ConnectionPool = Class.new(HTTPX::Pool) do
attr_reader :resolver, :connections, :selector
attr_reader :connections, :selector
attr_reader :connection_count
attr_reader :ping_count
@ -21,11 +21,6 @@ module SessionWithPool
def selectable_count
@selector.instance_variable_get(:@selectables).size
end
def find_resolver_for(*args, &blk)
@resolver = super(*args, &blk)
@resolver
end
end
module InstanceMethods

View File

@ -1,7 +1,4 @@
-
-
name: "0.21.1"
path: "0_21_1_md.html"
-
name: "0.21.0"
path: "0_21_0_md.html"

View File

@ -1,181 +0,0 @@
---
layout: post
title: How to "bundle install" in deployment mode, using bundler in docker
keywords: ruby, docker, bundler, gems, rubygems
---
**tl;dr**: `BUNDLE_PATH=$GEM_HOME`.
I was recently setting up the deployment of a `ruby` service, in my employer's production environment, which uses [EKS on AWS](https://aws.amazon.com/pt/eks/) and [docker](https://docs.docker.com/get-docker/) containers. This time though, I wanted to try how hard would be to generate a production image, as well the dev/test one we use in CI, from the same [Dockerfile](https://docs.docker.com/engine/reference/builder/).
I figured that it was just a matter of juggling the right combination of [ARG](https://docs.docker.com/engine/reference/builder/) and [ENV](https://docs.docker.com/compose/environment-variables/) declarations. And while I was right, I thought the outcome was worth documenting in a blog post about, in order to spare the next rubyist suffering when going down the same path. And while I can still appreciate `bundler`'s role and leadership in the `ruby` community, and array of features and configurability, its defaults and user/permissions handling leave some to be desired.
## Development setup
The initial Dockerfile used for development looked roughly like this:
```Dockerfile
FROM ruby:3.1.2-bullseye
LABEL maintainer=me
RUN adduser --disabled-password --gecos '' app \
&& mkdir -p /home/service \
&& chown app:app /home/service
USER app:app
WORKDIR /home/service
COPY --chown=app:app Gemfile Gemfile.lock /home/service
RUN bundle install
COPY --chown=app:app . .
CMD ["bundle", "exec", "start-it-up"]
```
The Gemfile was very simple, with a test group:
```ruby
# Gemfile
source "https://rubygems.org"
gem "rake"
gem "zeitwerk"
gem "sentry-ruby"
# ...
group :test do
gem "minitest"
gem "standard"
gem "debug"
# ...
end
```
This was all tied up locally using [Docker Compose](https://docs.docker.com/get-started/08_using_compose/), where the service declaration looked like this:
```yaml
# docker-compose.yml
services:
foo:
env_file: .env
volumes:
- ./:/home/service
```
This setup worked well locally, and was reused to run the tests in CI (we use [Gitlab CI docker executors](https://docs.gitlab.com/runner/executors/docker.html)).
It was ready to go to production.
## bundler in production
[Bundler how to deploy page](https://bundler.io/guides/deploying.html) gives you a simple advice: `bundle install --deployment` and you're good to go. My use-case wasn't as simple though, as I wanted to follow some best practices from the get-go, rather than retrofitting them when it's too costly to do so.
For once, I didn't want to install test dependencies in the final production image (benefit: leaner production image, less exposure to vulnerabilities I don't need in servers). I also didn't want to use commmand-line options, as dealing with the development/production options would make my single Dockerfile harder to read. Fortunately, [bundler covers that by supporting environment variables for configuration](https://bundler.io/man/bundle-config.1.html):
```Dockerfile
# Dockerfile
FROM ruby:3.1.2-bullseye
# to declare which bundler groups to ignore, aka bundle install --without
ARG BUNDLE_WITHOUT
```
```yaml
# .gitlab-ci.yml
Build Production Image:
variables:
DOCKER_BUILD_ARGS: "BUNDLE_DEPLOYMENT=1 BUNDLE_WITHOUT=test"
script:
- docker build ${DOCKER_BUILD_ARGS} ...
```
```yml
# kubernetes service.yml
env:
BUNDLE_WITHOUT:
value: "test"
BUNDLE_DEPLOYMENT:
value: 1
```
Simple, right? So I thought, so I deployed. And the service didn't boot. Looking at the logs, I was seeing a variation of the following error:
```log
Could not find rake-13.0.6, zeitwerk-2.6.0, ...(the rest) in any of the sources (Bundler::GemNotFound)
```
I couldn't figure out. It worked on my machine. And I vaguely remembered doing similar work in the past. So I start googling for "ruby dockerfile setup", only to find similar dockerfiles. I initialize a pod, and quickly check for `GEM_PATH`, pointing to `/usr/local/bundle`, and nothing was there in fact.
I then spent the next two days, playing with several other bundler flags, adding, removing, editing them, trying to get to a positive outcome, and in the process almost giving up the idea altogether.
But this post is not about the journey. It's about the solution. Which eventually became clear.
## Root, non-root, bundler, and rubygems
The main difference between my dockerfile, and most of the "ruby docker" examples on the web: I wasn't running the process as root.
The [ruby base image](https://github.com/docker-library/ruby/blob/master/3.1/bullseye/Dockerfile) sets up some variables, some of them involving `bundler` and `rubygems` (both ship with ruby as "bundled gems"):
```dockerfile
# from ruby 3.1.2 bullseye dockerfile
# don't create ".bundle" in all our apps
ENV GEM_HOME /usr/local/bundle
ENV BUNDLE_SILENCE_ROOT_WARNING=1 \
BUNDLE_APP_CONFIG="$GEM_HOME"
ENV PATH $GEM_HOME/bin:$PATH
# adjust permissions of a few directories for running "gem install" as an arbitrary user
RUN mkdir -p "$GEM_HOME" && chmod 777 "$GEM_HOME"
```
This means that:
* gems are installed in `$GEM_HOME`;
* gem-installed binstubs are accessible in the `$PATH`;
* `bundler` configs can be found under `$GEM_HOME`;
When I switch to a non-privileged user, as the initial Dockerfile shows, and run `bundle install`, gems are installed under `$GEM_HOME/gems`; executables are under `$GEM_HOME/bin`. It works on my machine.
But when I do it with `BUNDLE_DEPLOYMENT=1`? Gems still get installed in the same place. Executables too. But running `bundle exec` breaks. That's because, in deployment mode, `bundler` sets its internal bundle path, used for dependency resolution and lookup, [to `"vendor/bundle"`](https://github.com/rubygems/rubygems/blob/def27af571af48f7375cc0bdc58b845122dcb5b4/bundler/lib/bundler/settings.rb#L4).
```ruby
# from lib/bundler/settings.rb
def path
configs.each do |_level, settings|
path = value_for("path", settings)
path = "vendor/bundle" if value_for("deployment", settings) && path.nil?
# ...
```
But there's nothing there, because as it was mentioned, gems were installed under `$GEM_HOME`.
So the solution is right in the line above: just set the bundle path. The most straightforward way to do this in this setup was via `BUNDLE_PATH`:
```dockerfile
# Dockerfile
ENV BUNDLE_PATH $GEM_HOME
# and now, you can bundle exec
```
That's it. Annoying, but simple to fix.
## Conclusion
While the solution was very straightforward (patch this environment variable and you're good to go), it took me some time and a lot of trial and error to get there. Due to a combination of factors.
First one is docker defaults and best practices; while it's been known for some time in the security realm that ["thou shalt not run containers as root"](https://stackoverflow.com/questions/68155641/should-i-run-things-inside-a-docker-container-as-non-root-for-safety), if I type "dockerfile ruby" in google, from the [first](https://lipanski.com/posts/dockerfile-ruby-best-practices) [5](https://semaphoreci.com/community/tutorials/dockerizing-a-ruby-on-rails-application) [relevant](https://www.cloudbees.com/blog/build-minimal-docker-container-ruby-apps) [results](https://www.digitalocean.com/community/tutorials/containerizing-a-ruby-on-rails-application-for-development-with-docker-compose) [I](https://docs.docker.com/samples/rails/) get (the last one being docker official recommendation for using `compose` and `rails`), only one of them sets a non-privileged user for running the container. And that single example does it **after** running `bundle install`.
Why is it important to run `bundle install` as non-root? You can read the details in [this Snyk blog post](https://snyk.io/blog/ruby-gem-installation-lockfile-injection-attacks/), but the tl;dr is, if the gem requires compiling C extensions, a [post-install callback can be invoked](https://blog.costan.us/2008/11/post-install-post-update-scripts-for.html) which allows arbitrary code to run with the privileges of the user invoking `bundle install`, which becomes a privilege escalation attack when exploited.
Why does `bundler` default to setting `"vendor/bundle"` as the default gems lookup dir, which is different than the default gem install dir, when deployment-mode is activated? I have no idea. I'd say it looks like a bug, as [the docs do say that gems are installed to "vendor/bundle" in deployment mode](https://github.com/rubygems/rubygems/blob/def27af571af48f7375cc0bdc58b845122dcb5b4/bundler/lib/bundler/man/bundle-install.1.ronn#deployment-mode), and ruby docker defaults overriding `GEM_HOME` causes `bundler` to use it to install gems, but then it gets ignored for path lookups? But somehow works when user can `sudo`? Do `bundler` and `rubygems` still have a few misalignments to work out? `bundler` defaults don't seem to be the sanest, as [this blog post puts it, whether you agree with the tone or not](https://felipec.wordpress.com/2022/08/25/fixing-ruby-gems-installation/), it can definitely do better.
But don't get me wrong, as it's still better than dealing with the absolute scorched earth equivalent in `python` or `nodejs`.
No bundler options were deprecated while performing these reproductions.