mirror of
https://github.com/HoneyryderChuck/httpx.git
synced 2025-10-15 00:00:39 -04:00
Merge branch 'issue-124' into 'master'
idnx and IDN improvements Closes #124 See merge request honeyryderchuck/httpx!138
This commit is contained in:
commit
c1828a8213
@ -19,6 +19,7 @@ AllCops:
|
|||||||
- 'vendor/**/*'
|
- 'vendor/**/*'
|
||||||
- 'www/**/*'
|
- 'www/**/*'
|
||||||
- 'lib/httpx/extensions.rb'
|
- 'lib/httpx/extensions.rb'
|
||||||
|
- 'lib/httpx/punycode.rb'
|
||||||
# Do not lint ffi block, for openssl parity
|
# Do not lint ffi block, for openssl parity
|
||||||
- 'lib/httpx/io/tls/*.rb'
|
- 'lib/httpx/io/tls/*.rb'
|
||||||
|
|
||||||
|
@ -10,4 +10,5 @@ SimpleCov.start do
|
|||||||
add_filter "/lib/httpx/plugins/multipart/mime_type_detector.rb"
|
add_filter "/lib/httpx/plugins/multipart/mime_type_detector.rb"
|
||||||
add_filter "/lib/httpx/io/tls/"
|
add_filter "/lib/httpx/io/tls/"
|
||||||
add_filter "/lib/httpx/io/tls.rb"
|
add_filter "/lib/httpx/io/tls.rb"
|
||||||
|
add_filter "/lib/httpx/punycode.rb"
|
||||||
end
|
end
|
||||||
|
1
Gemfile
1
Gemfile
@ -66,6 +66,7 @@ group :test do
|
|||||||
|
|
||||||
gem "aws-sdk-s3"
|
gem "aws-sdk-s3"
|
||||||
gem "faraday"
|
gem "faraday"
|
||||||
|
gem "idnx" if RUBY_VERSION >= "2.4.0"
|
||||||
gem "oga"
|
gem "oga"
|
||||||
|
|
||||||
if RUBY_VERSION >= "3.0.0"
|
if RUBY_VERSION >= "3.0.0"
|
||||||
|
@ -6,6 +6,7 @@ require "httpx/extensions"
|
|||||||
|
|
||||||
require "httpx/errors"
|
require "httpx/errors"
|
||||||
require "httpx/utils"
|
require "httpx/utils"
|
||||||
|
require "httpx/punycode"
|
||||||
require "httpx/domain_name"
|
require "httpx/domain_name"
|
||||||
require "httpx/altsvc"
|
require "httpx/altsvc"
|
||||||
require "httpx/callbacks"
|
require "httpx/callbacks"
|
||||||
|
@ -51,7 +51,7 @@ module HTTPX
|
|||||||
def initialize(type, uri, options)
|
def initialize(type, uri, options)
|
||||||
@type = type
|
@type = type
|
||||||
@origins = [uri.origin]
|
@origins = [uri.origin]
|
||||||
@origin = Utils.uri(uri.origin)
|
@origin = Utils.to_uri(uri.origin)
|
||||||
@options = Options.new(options)
|
@options = Options.new(options)
|
||||||
@window_size = @options.window_size
|
@window_size = @options.window_size
|
||||||
@read_buffer = Buffer.new(BUFFER_SIZE)
|
@read_buffer = Buffer.new(BUFFER_SIZE)
|
||||||
|
@ -144,295 +144,5 @@ module HTTPX
|
|||||||
1
|
1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# :nocov:
|
|
||||||
# rubocop:disable all
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#--
|
|
||||||
# punycode.rb - PunyCode encoder for the Domain Name library
|
|
||||||
#
|
|
||||||
# Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved.
|
|
||||||
#
|
|
||||||
# Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
|
|
||||||
# Library.
|
|
||||||
#
|
|
||||||
# Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or
|
|
||||||
# without modification, are permitted provided that the following
|
|
||||||
# conditions are met:
|
|
||||||
#
|
|
||||||
# 1) Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# 2) Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in
|
|
||||||
# the documentation and/or other materials provided with the
|
|
||||||
# distribution.
|
|
||||||
#
|
|
||||||
# 3) Neither the name of the VeriSign Inc. nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
||||||
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
||||||
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
# This software is licensed under the BSD open source license. For more
|
|
||||||
# information visit www.opensource.org.
|
|
||||||
#
|
|
||||||
# Authors:
|
|
||||||
# John Colosi (VeriSign)
|
|
||||||
# Srikanth Veeramachaneni (VeriSign)
|
|
||||||
# Nagesh Chigurupati (Verisign)
|
|
||||||
# Praveen Srinivasan(Verisign)
|
|
||||||
#++
|
|
||||||
module Punycode
|
|
||||||
BASE = 36
|
|
||||||
TMIN = 1
|
|
||||||
TMAX = 26
|
|
||||||
SKEW = 38
|
|
||||||
DAMP = 700
|
|
||||||
INITIAL_BIAS = 72
|
|
||||||
INITIAL_N = 0x80
|
|
||||||
DELIMITER = "-"
|
|
||||||
|
|
||||||
MAXINT = (1 << 32) - 1
|
|
||||||
|
|
||||||
LOBASE = BASE - TMIN
|
|
||||||
CUTOFF = LOBASE * TMAX / 2
|
|
||||||
|
|
||||||
RE_NONBASIC = /[^\x00-\x7f]/.freeze
|
|
||||||
|
|
||||||
# Returns the numeric value of a basic code point (for use in
|
|
||||||
# representing integers) in the range 0 to base-1, or nil if cp
|
|
||||||
# is does not represent a value.
|
|
||||||
DECODE_DIGIT = {}.tap do |map|
|
|
||||||
# ASCII A..Z map to 0..25
|
|
||||||
# ASCII a..z map to 0..25
|
|
||||||
(0..25).each { |i| map[65 + i] = map[97 + i] = i }
|
|
||||||
# ASCII 0..9 map to 26..35
|
|
||||||
(26..35).each { |i| map[22 + i] = i }
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the basic code point whose value (when used for
|
|
||||||
# representing integers) is d, which must be in the range 0 to
|
|
||||||
# BASE-1. The lowercase form is used unless flag is true, in
|
|
||||||
# which case the uppercase form is used. The behavior is
|
|
||||||
# undefined if flag is nonzero and digit d has no uppercase
|
|
||||||
# form.
|
|
||||||
ENCODE_DIGIT = proc { |d, flag|
|
|
||||||
(d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
|
|
||||||
# 0..25 map to ASCII a..z or A..Z
|
|
||||||
# 26..35 map to ASCII 0..9
|
|
||||||
}
|
|
||||||
|
|
||||||
DOT = "."
|
|
||||||
PREFIX = "xn--"
|
|
||||||
|
|
||||||
# Most errors we raise are basically kind of ArgumentError.
|
|
||||||
class ArgumentError < ::ArgumentError; end
|
|
||||||
class BufferOverflowError < ArgumentError; end
|
|
||||||
|
|
||||||
class << self
|
|
||||||
# Encode a +string+ in Punycode
|
|
||||||
def encode(string)
|
|
||||||
input = string.unpack("U*")
|
|
||||||
output = +""
|
|
||||||
|
|
||||||
# Initialize the state
|
|
||||||
n = INITIAL_N
|
|
||||||
delta = 0
|
|
||||||
bias = INITIAL_BIAS
|
|
||||||
|
|
||||||
# Handle the basic code points
|
|
||||||
input.each { |cp| output << cp.chr if cp < 0x80 }
|
|
||||||
|
|
||||||
h = b = output.length
|
|
||||||
|
|
||||||
# h is the number of code points that have been handled, b is the
|
|
||||||
# number of basic code points, and out is the number of characters
|
|
||||||
# that have been output.
|
|
||||||
|
|
||||||
output << DELIMITER if b > 0
|
|
||||||
|
|
||||||
# Main encoding loop
|
|
||||||
|
|
||||||
while h < input.length
|
|
||||||
# All non-basic code points < n have been handled already. Find
|
|
||||||
# the next larger one
|
|
||||||
|
|
||||||
m = MAXINT
|
|
||||||
input.each do |cp|
|
|
||||||
m = cp if (n...m) === cp
|
|
||||||
end
|
|
||||||
|
|
||||||
# Increase delta enough to advance the decoder's <n,i> state to
|
|
||||||
# <m,0>, but guard against overflow
|
|
||||||
|
|
||||||
delta += (m - n) * (h + 1)
|
|
||||||
raise BufferOverflowError if delta > MAXINT
|
|
||||||
|
|
||||||
n = m
|
|
||||||
|
|
||||||
input.each do |cp|
|
|
||||||
# AMC-ACE-Z can use this simplified version instead
|
|
||||||
if cp < n
|
|
||||||
delta += 1
|
|
||||||
raise BufferOverflowError if delta > MAXINT
|
|
||||||
elsif cp == n
|
|
||||||
# Represent delta as a generalized variable-length integer
|
|
||||||
q = delta
|
|
||||||
k = BASE
|
|
||||||
loop do
|
|
||||||
t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
|
|
||||||
break if q < t
|
|
||||||
|
|
||||||
q, r = (q - t).divmod(BASE - t)
|
|
||||||
output << ENCODE_DIGIT[t + r, false]
|
|
||||||
k += BASE
|
|
||||||
end
|
|
||||||
|
|
||||||
output << ENCODE_DIGIT[q, false]
|
|
||||||
|
|
||||||
# Adapt the bias
|
|
||||||
delta = h == b ? delta / DAMP : delta >> 1
|
|
||||||
delta += delta / (h + 1)
|
|
||||||
bias = 0
|
|
||||||
while delta > CUTOFF
|
|
||||||
delta /= LOBASE
|
|
||||||
bias += BASE
|
|
||||||
end
|
|
||||||
bias += (LOBASE + 1) * delta / (delta + SKEW)
|
|
||||||
|
|
||||||
delta = 0
|
|
||||||
h += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
delta += 1
|
|
||||||
n += 1
|
|
||||||
end
|
|
||||||
|
|
||||||
output
|
|
||||||
end
|
|
||||||
|
|
||||||
# Encode a hostname using IDN/Punycode algorithms
|
|
||||||
def encode_hostname(hostname)
|
|
||||||
hostname.match(RE_NONBASIC) || (return hostname)
|
|
||||||
|
|
||||||
hostname.split(DOT).map do |name|
|
|
||||||
if name.match(RE_NONBASIC)
|
|
||||||
PREFIX + encode(name)
|
|
||||||
else
|
|
||||||
name
|
|
||||||
end
|
|
||||||
end.join(DOT)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Decode a +string+ encoded in Punycode
|
|
||||||
def decode(string)
|
|
||||||
# Initialize the state
|
|
||||||
n = INITIAL_N
|
|
||||||
i = 0
|
|
||||||
bias = INITIAL_BIAS
|
|
||||||
|
|
||||||
if j = string.rindex(DELIMITER)
|
|
||||||
b = string[0...j]
|
|
||||||
|
|
||||||
b.match(RE_NONBASIC) &&
|
|
||||||
raise(ArgumentError, "Illegal character is found in basic part: #{string.inspect}")
|
|
||||||
|
|
||||||
# Handle the basic code points
|
|
||||||
|
|
||||||
output = b.unpack("U*")
|
|
||||||
u = string[(j + 1)..-1]
|
|
||||||
else
|
|
||||||
output = []
|
|
||||||
u = string
|
|
||||||
end
|
|
||||||
|
|
||||||
# Main decoding loop: Start just after the last delimiter if any
|
|
||||||
# basic code points were copied; start at the beginning
|
|
||||||
# otherwise.
|
|
||||||
|
|
||||||
input = u.unpack("C*")
|
|
||||||
input_length = input.length
|
|
||||||
h = 0
|
|
||||||
out = output.length
|
|
||||||
|
|
||||||
while h < input_length
|
|
||||||
# Decode a generalized variable-length integer into delta,
|
|
||||||
# which gets added to i. The overflow checking is easier
|
|
||||||
# if we increase i as we go, then subtract off its starting
|
|
||||||
# value at the end to obtain delta.
|
|
||||||
|
|
||||||
oldi = i
|
|
||||||
w = 1
|
|
||||||
k = BASE
|
|
||||||
|
|
||||||
loop do
|
|
||||||
(digit = DECODE_DIGIT[input[h]]) ||
|
|
||||||
raise(ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}")
|
|
||||||
h += 1
|
|
||||||
i += digit * w
|
|
||||||
raise BufferOverflowError if i > MAXINT
|
|
||||||
|
|
||||||
t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
|
|
||||||
break if digit < t
|
|
||||||
|
|
||||||
w *= BASE - t
|
|
||||||
raise BufferOverflowError if w > MAXINT
|
|
||||||
|
|
||||||
k += BASE
|
|
||||||
(h < input_length) || raise(ArgumentError, "Malformed input given: #{string.inspect}")
|
|
||||||
end
|
|
||||||
|
|
||||||
# Adapt the bias
|
|
||||||
delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1
|
|
||||||
delta += delta / (out + 1)
|
|
||||||
bias = 0
|
|
||||||
while delta > CUTOFF
|
|
||||||
delta /= LOBASE
|
|
||||||
bias += BASE
|
|
||||||
end
|
|
||||||
bias += (LOBASE + 1) * delta / (delta + SKEW)
|
|
||||||
|
|
||||||
# i was supposed to wrap around from out+1 to 0, incrementing
|
|
||||||
# n each time, so we'll fix that now:
|
|
||||||
|
|
||||||
q, i = i.divmod(out + 1)
|
|
||||||
n += q
|
|
||||||
raise BufferOverflowError if n > MAXINT
|
|
||||||
|
|
||||||
# Insert n at position i of the output:
|
|
||||||
|
|
||||||
output[i, 0] = n
|
|
||||||
|
|
||||||
out += 1
|
|
||||||
i += 1
|
|
||||||
end
|
|
||||||
output.pack("U*")
|
|
||||||
end
|
|
||||||
|
|
||||||
# Decode a hostname using IDN/Punycode algorithms
|
|
||||||
def decode_hostname(hostname)
|
|
||||||
hostname.gsub(/(\A|#{Regexp.quote(DOT)})#{Regexp.quote(PREFIX)}([^#{Regexp.quote(DOT)}]*)/o) do
|
|
||||||
Regexp.last_match(1) << decode(Regexp.last_match(2))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
# rubocop:enable all
|
|
||||||
# :nocov:
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -78,7 +78,7 @@ module HTTPX
|
|||||||
|
|
||||||
def authority
|
def authority
|
||||||
port_string = port == default_port ? nil : ":#{port}"
|
port_string = port == default_port ? nil : ":#{port}"
|
||||||
"#{@non_ascii_hostname || host}#{port_string}"
|
"#{host}#{port_string}"
|
||||||
end
|
end
|
||||||
|
|
||||||
def origin
|
def origin
|
||||||
|
304
lib/httpx/punycode.rb
Normal file
304
lib/httpx/punycode.rb
Normal file
@ -0,0 +1,304 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module HTTPX
|
||||||
|
begin
|
||||||
|
require "idnx"
|
||||||
|
|
||||||
|
module Punycode
|
||||||
|
module_function
|
||||||
|
|
||||||
|
def encode_hostname(hostname)
|
||||||
|
Idnx.to_punycode(hostname)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
rescue LoadError
|
||||||
|
# :nocov:
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#--
|
||||||
|
# punycode.rb - PunyCode encoder for the Domain Name library
|
||||||
|
#
|
||||||
|
# Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved.
|
||||||
|
#
|
||||||
|
# Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
|
||||||
|
# Library.
|
||||||
|
#
|
||||||
|
# Copyright (C) 2000-2002 Verisign Inc., All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or
|
||||||
|
# without modification, are permitted provided that the following
|
||||||
|
# conditions are met:
|
||||||
|
#
|
||||||
|
# 1) Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# 2) Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
#
|
||||||
|
# 3) Neither the name of the VeriSign Inc. nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
|
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||||
|
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||||
|
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
#
|
||||||
|
# This software is licensed under the BSD open source license. For more
|
||||||
|
# information visit www.opensource.org.
|
||||||
|
#
|
||||||
|
# Authors:
|
||||||
|
# John Colosi (VeriSign)
|
||||||
|
# Srikanth Veeramachaneni (VeriSign)
|
||||||
|
# Nagesh Chigurupati (Verisign)
|
||||||
|
# Praveen Srinivasan(Verisign)
|
||||||
|
#++
|
||||||
|
module Punycode
|
||||||
|
BASE = 36
|
||||||
|
TMIN = 1
|
||||||
|
TMAX = 26
|
||||||
|
SKEW = 38
|
||||||
|
DAMP = 700
|
||||||
|
INITIAL_BIAS = 72
|
||||||
|
INITIAL_N = 0x80
|
||||||
|
DELIMITER = "-"
|
||||||
|
|
||||||
|
MAXINT = (1 << 32) - 1
|
||||||
|
|
||||||
|
LOBASE = BASE - TMIN
|
||||||
|
CUTOFF = LOBASE * TMAX / 2
|
||||||
|
|
||||||
|
RE_NONBASIC = /[^\x00-\x7f]/.freeze
|
||||||
|
|
||||||
|
# Returns the numeric value of a basic code point (for use in
|
||||||
|
# representing integers) in the range 0 to base-1, or nil if cp
|
||||||
|
# is does not represent a value.
|
||||||
|
DECODE_DIGIT = {}.tap do |map|
|
||||||
|
# ASCII A..Z map to 0..25
|
||||||
|
# ASCII a..z map to 0..25
|
||||||
|
(0..25).each { |i| map[65 + i] = map[97 + i] = i }
|
||||||
|
# ASCII 0..9 map to 26..35
|
||||||
|
(26..35).each { |i| map[22 + i] = i }
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns the basic code point whose value (when used for
|
||||||
|
# representing integers) is d, which must be in the range 0 to
|
||||||
|
# BASE-1. The lowercase form is used unless flag is true, in
|
||||||
|
# which case the uppercase form is used. The behavior is
|
||||||
|
# undefined if flag is nonzero and digit d has no uppercase
|
||||||
|
# form.
|
||||||
|
ENCODE_DIGIT = proc { |d, flag|
|
||||||
|
(d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
|
||||||
|
# 0..25 map to ASCII a..z or A..Z
|
||||||
|
# 26..35 map to ASCII 0..9
|
||||||
|
}
|
||||||
|
|
||||||
|
DOT = "."
|
||||||
|
PREFIX = "xn--"
|
||||||
|
|
||||||
|
# Most errors we raise are basically kind of ArgumentError.
|
||||||
|
class ArgumentError < ::ArgumentError; end
|
||||||
|
class BufferOverflowError < ArgumentError; end
|
||||||
|
|
||||||
|
module_function
|
||||||
|
|
||||||
|
# Encode a +string+ in Punycode
|
||||||
|
def encode(string)
|
||||||
|
input = string.unpack("U*")
|
||||||
|
output = +""
|
||||||
|
|
||||||
|
# Initialize the state
|
||||||
|
n = INITIAL_N
|
||||||
|
delta = 0
|
||||||
|
bias = INITIAL_BIAS
|
||||||
|
|
||||||
|
# Handle the basic code points
|
||||||
|
input.each { |cp| output << cp.chr if cp < 0x80 }
|
||||||
|
|
||||||
|
h = b = output.length
|
||||||
|
|
||||||
|
# h is the number of code points that have been handled, b is the
|
||||||
|
# number of basic code points, and out is the number of characters
|
||||||
|
# that have been output.
|
||||||
|
|
||||||
|
output << DELIMITER if b > 0
|
||||||
|
|
||||||
|
# Main encoding loop
|
||||||
|
|
||||||
|
while h < input.length
|
||||||
|
# All non-basic code points < n have been handled already. Find
|
||||||
|
# the next larger one
|
||||||
|
|
||||||
|
m = MAXINT
|
||||||
|
input.each do |cp|
|
||||||
|
m = cp if (n...m) === cp
|
||||||
|
end
|
||||||
|
|
||||||
|
# Increase delta enough to advance the decoder's <n,i> state to
|
||||||
|
# <m,0>, but guard against overflow
|
||||||
|
|
||||||
|
delta += (m - n) * (h + 1)
|
||||||
|
raise BufferOverflowError if delta > MAXINT
|
||||||
|
|
||||||
|
n = m
|
||||||
|
|
||||||
|
input.each do |cp|
|
||||||
|
# AMC-ACE-Z can use this simplified version instead
|
||||||
|
if cp < n
|
||||||
|
delta += 1
|
||||||
|
raise BufferOverflowError if delta > MAXINT
|
||||||
|
elsif cp == n
|
||||||
|
# Represent delta as a generalized variable-length integer
|
||||||
|
q = delta
|
||||||
|
k = BASE
|
||||||
|
loop do
|
||||||
|
t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
|
||||||
|
break if q < t
|
||||||
|
|
||||||
|
q, r = (q - t).divmod(BASE - t)
|
||||||
|
output << ENCODE_DIGIT[t + r, false]
|
||||||
|
k += BASE
|
||||||
|
end
|
||||||
|
|
||||||
|
output << ENCODE_DIGIT[q, false]
|
||||||
|
|
||||||
|
# Adapt the bias
|
||||||
|
delta = h == b ? delta / DAMP : delta >> 1
|
||||||
|
delta += delta / (h + 1)
|
||||||
|
bias = 0
|
||||||
|
while delta > CUTOFF
|
||||||
|
delta /= LOBASE
|
||||||
|
bias += BASE
|
||||||
|
end
|
||||||
|
bias += (LOBASE + 1) * delta / (delta + SKEW)
|
||||||
|
|
||||||
|
delta = 0
|
||||||
|
h += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
delta += 1
|
||||||
|
n += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
output
|
||||||
|
end
|
||||||
|
|
||||||
|
# Encode a hostname using IDN/Punycode algorithms
|
||||||
|
def encode_hostname(hostname)
|
||||||
|
hostname.match(RE_NONBASIC) || (return hostname)
|
||||||
|
|
||||||
|
hostname.split(DOT).map do |name|
|
||||||
|
if name.match(RE_NONBASIC)
|
||||||
|
PREFIX + encode(name)
|
||||||
|
else
|
||||||
|
name
|
||||||
|
end
|
||||||
|
end.join(DOT)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Decode a +string+ encoded in Punycode
|
||||||
|
def decode(string)
|
||||||
|
# Initialize the state
|
||||||
|
n = INITIAL_N
|
||||||
|
i = 0
|
||||||
|
bias = INITIAL_BIAS
|
||||||
|
|
||||||
|
if j = string.rindex(DELIMITER)
|
||||||
|
b = string[0...j]
|
||||||
|
|
||||||
|
b.match(RE_NONBASIC) &&
|
||||||
|
raise(ArgumentError, "Illegal character is found in basic part: #{string.inspect}")
|
||||||
|
|
||||||
|
# Handle the basic code points
|
||||||
|
|
||||||
|
output = b.unpack("U*")
|
||||||
|
u = string[(j + 1)..-1]
|
||||||
|
else
|
||||||
|
output = []
|
||||||
|
u = string
|
||||||
|
end
|
||||||
|
|
||||||
|
# Main decoding loop: Start just after the last delimiter if any
|
||||||
|
# basic code points were copied; start at the beginning
|
||||||
|
# otherwise.
|
||||||
|
|
||||||
|
input = u.unpack("C*")
|
||||||
|
input_length = input.length
|
||||||
|
h = 0
|
||||||
|
out = output.length
|
||||||
|
|
||||||
|
while h < input_length
|
||||||
|
# Decode a generalized variable-length integer into delta,
|
||||||
|
# which gets added to i. The overflow checking is easier
|
||||||
|
# if we increase i as we go, then subtract off its starting
|
||||||
|
# value at the end to obtain delta.
|
||||||
|
|
||||||
|
oldi = i
|
||||||
|
w = 1
|
||||||
|
k = BASE
|
||||||
|
|
||||||
|
loop do
|
||||||
|
(digit = DECODE_DIGIT[input[h]]) ||
|
||||||
|
raise(ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}")
|
||||||
|
h += 1
|
||||||
|
i += digit * w
|
||||||
|
raise BufferOverflowError if i > MAXINT
|
||||||
|
|
||||||
|
t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
|
||||||
|
break if digit < t
|
||||||
|
|
||||||
|
w *= BASE - t
|
||||||
|
raise BufferOverflowError if w > MAXINT
|
||||||
|
|
||||||
|
k += BASE
|
||||||
|
(h < input_length) || raise(ArgumentError, "Malformed input given: #{string.inspect}")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Adapt the bias
|
||||||
|
delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1
|
||||||
|
delta += delta / (out + 1)
|
||||||
|
bias = 0
|
||||||
|
while delta > CUTOFF
|
||||||
|
delta /= LOBASE
|
||||||
|
bias += BASE
|
||||||
|
end
|
||||||
|
bias += (LOBASE + 1) * delta / (delta + SKEW)
|
||||||
|
|
||||||
|
# i was supposed to wrap around from out+1 to 0, incrementing
|
||||||
|
# n each time, so we'll fix that now:
|
||||||
|
|
||||||
|
q, i = i.divmod(out + 1)
|
||||||
|
n += q
|
||||||
|
raise BufferOverflowError if n > MAXINT
|
||||||
|
|
||||||
|
# Insert n at position i of the output:
|
||||||
|
|
||||||
|
output[i, 0] = n
|
||||||
|
|
||||||
|
out += 1
|
||||||
|
i += 1
|
||||||
|
end
|
||||||
|
output.pack("U*")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Decode a hostname using IDN/Punycode algorithms
|
||||||
|
def decode_hostname(hostname)
|
||||||
|
hostname.gsub(/(\A|#{Regexp.quote(DOT)})#{Regexp.quote(PREFIX)}([^#{Regexp.quote(DOT)}]*)/o) do
|
||||||
|
Regexp.last_match(1) << decode(Regexp.last_match(2))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
# :nocov:
|
||||||
|
end
|
||||||
|
end
|
@ -45,7 +45,7 @@ module HTTPX
|
|||||||
def initialize(verb, uri, options = {})
|
def initialize(verb, uri, options = {})
|
||||||
@verb = verb.to_s.downcase.to_sym
|
@verb = verb.to_s.downcase.to_sym
|
||||||
@options = Options.new(options)
|
@options = Options.new(options)
|
||||||
@uri = Utils.uri(uri)
|
@uri = Utils.to_uri(uri)
|
||||||
if @uri.relative?
|
if @uri.relative?
|
||||||
raise(Error, "invalid URI: #{@uri}") unless @options.origin
|
raise(Error, "invalid URI: #{@uri}") unless @options.origin
|
||||||
|
|
||||||
|
@ -18,14 +18,16 @@ module HTTPX
|
|||||||
end
|
end
|
||||||
|
|
||||||
if RUBY_VERSION < "2.3"
|
if RUBY_VERSION < "2.3"
|
||||||
def uri(*args)
|
|
||||||
URI(*args)
|
def to_uri(uri)
|
||||||
|
URI(uri)
|
||||||
end
|
end
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
||||||
URIParser = URI::RFC2396_Parser.new
|
URIParser = URI::RFC2396_Parser.new
|
||||||
|
|
||||||
def uri(uri)
|
def to_uri(uri)
|
||||||
return Kernel.URI(uri) unless uri.is_a?(String) && !uri.ascii_only?
|
return Kernel.URI(uri) unless uri.is_a?(String) && !uri.ascii_only?
|
||||||
|
|
||||||
uri = Kernel.URI(URIParser.escape(uri))
|
uri = Kernel.URI(URIParser.escape(uri))
|
||||||
@ -34,7 +36,7 @@ module HTTPX
|
|||||||
|
|
||||||
non_ascii_hostname.force_encoding(Encoding::UTF_8)
|
non_ascii_hostname.force_encoding(Encoding::UTF_8)
|
||||||
|
|
||||||
idna_hostname = DomainName.new(non_ascii_hostname).hostname
|
idna_hostname = Punycode.encode_hostname(non_ascii_hostname)
|
||||||
|
|
||||||
uri.host = idna_hostname
|
uri.host = idna_hostname
|
||||||
uri.non_ascii_hostname = non_ascii_hostname
|
uri.non_ascii_hostname = non_ascii_hostname
|
||||||
|
7
sig/utils.rbs
Normal file
7
sig/utils.rbs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
module HTTPX
|
||||||
|
module Utils
|
||||||
|
def self?.parse_retry_after: (String) -> Numeric
|
||||||
|
|
||||||
|
def self?.to_uri: (generic_uri uri) -> URI::Generic
|
||||||
|
end
|
||||||
|
end
|
@ -17,12 +17,12 @@ module ResponseHelpers
|
|||||||
if value.respond_to?(:start_with?)
|
if value.respond_to?(:start_with?)
|
||||||
assert value.start_with?(expect), "#{meth} assertion failed: \#{key}=\#{value} (expected: \#{expect}})"
|
assert value.start_with?(expect), "#{meth} assertion failed: \#{key}=\#{value} (expected: \#{expect}})"
|
||||||
else
|
else
|
||||||
assert value == expect, "#{meth} assertion failed: \#{key}=\#{value.to_s} (expected: \#{expect.to_s})"
|
assert value == expect, "#{meth} assertion failed: \#{key}=\#{value.inspect} (expected: \#{expect.to_s})"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def verify_no_#{meth}(#{meth}s, key)
|
def verify_no_#{meth}(#{meth}s, key)
|
||||||
assert !#{meth}s.key?(key), "#{meth}s contains the given key (" + key + ": \#{#{meth}s[key]})"
|
assert !#{meth}s.key?(key), "#{meth}s contains the given key (" + key + ": \#{#{meth}s[key].inspect})"
|
||||||
end
|
end
|
||||||
DEFINE
|
DEFINE
|
||||||
end
|
end
|
||||||
|
@ -6,12 +6,12 @@ RUBY_PLATFORM=`ruby -e 'puts RUBY_PLATFORM'`
|
|||||||
RUBY_ENGINE=`ruby -e 'puts RUBY_ENGINE'`
|
RUBY_ENGINE=`ruby -e 'puts RUBY_ENGINE'`
|
||||||
|
|
||||||
if [[ "$RUBY_ENGINE" = "truffleruby" ]]; then
|
if [[ "$RUBY_ENGINE" = "truffleruby" ]]; then
|
||||||
microdnf install -y iptables iproute which file
|
microdnf install -y iptables iproute which file idn2
|
||||||
elif [[ "$RUBY_PLATFORM" = "java" ]]; then
|
elif [[ "$RUBY_PLATFORM" = "java" ]]; then
|
||||||
echo "
|
echo "
|
||||||
deb http://deb.debian.org/debian sid main contrib non-free
|
deb http://deb.debian.org/debian sid main contrib non-free
|
||||||
deb-src http://deb.debian.org/debian sid main contrib non-free" >> /etc/apt/sources.list
|
deb-src http://deb.debian.org/debian sid main contrib non-free" >> /etc/apt/sources.list
|
||||||
apt-get update && apt-get install -y iptables openssl libssl-dev ca-certificates file
|
apt-get update && apt-get install -y iptables openssl libssl-dev ca-certificates file idn2
|
||||||
update-ca-certificates
|
update-ca-certificates
|
||||||
elif [[ ${RUBY_VERSION:0:3} = "2.1" ]]; then
|
elif [[ ${RUBY_VERSION:0:3} = "2.1" ]]; then
|
||||||
apt-get update && apt-get install -y libsodium-dev iptables
|
apt-get update && apt-get install -y libsodium-dev iptables
|
||||||
@ -23,7 +23,7 @@ elif [[ ${RUBY_VERSION:0:3} = "2.3" ]]; then
|
|||||||
wget http://deb.debian.org/debian/pool/main/o/openssl1.0/libssl1.0-dev_1.0.2u-1~deb9u1_amd64.deb
|
wget http://deb.debian.org/debian/pool/main/o/openssl1.0/libssl1.0-dev_1.0.2u-1~deb9u1_amd64.deb
|
||||||
dpkg -i libssl1.0-dev_1.0.2u-1~deb9u1_amd64.deb
|
dpkg -i libssl1.0-dev_1.0.2u-1~deb9u1_amd64.deb
|
||||||
else
|
else
|
||||||
apt-get update && apt-get install -y iptables
|
apt-get update && apt-get install -y iptables idn2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# use port 9090 to test connection timeouts
|
# use port 9090 to test connection timeouts
|
||||||
|
@ -94,11 +94,17 @@ module Requests
|
|||||||
response.close
|
response.close
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_get_non_ascii
|
def test_get_idn
|
||||||
response = HTTPX.get("http://bücher.ch")
|
response = HTTPX.get("http://bücher.ch")
|
||||||
verify_status(response, 200)
|
verify_status(response, 301)
|
||||||
|
verify_header(response.headers, "location", "https://www.buecher.de")
|
||||||
|
|
||||||
response.close
|
response.close
|
||||||
|
|
||||||
|
assert response.instance_variable_get(:@request).authority == "xn--bcher-kva.ch"
|
||||||
|
end unless RUBY_VERSION < "2.3"
|
||||||
|
|
||||||
|
def test_get_non_ascii
|
||||||
response = HTTPX.get(build_uri("/get?q=ã"))
|
response = HTTPX.get(build_uri("/get?q=ã"))
|
||||||
verify_status(response, 200)
|
verify_status(response, 200)
|
||||||
response.close
|
response.close
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
description: API and support for NTLM Authentication.
|
description: API and support for NTLM Authentication.
|
||||||
-
|
-
|
||||||
name: AwsSigV4Authentication
|
name: AwsSigV4Authentication
|
||||||
path: AWS-SigV4.html
|
path: AWS-SigV4
|
||||||
description: API and support for AWS SigV4 Authentication.
|
description: API and support for AWS SigV4 Authentication.
|
||||||
-
|
-
|
||||||
name: Compression
|
name: Compression
|
||||||
|
44
www/_posts/2021-06-11-introducing-idnx.md
Normal file
44
www/_posts/2021-06-11-introducing-idnx.md
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
---
|
||||||
|
layout: post
|
||||||
|
title: Introducing idnx
|
||||||
|
---
|
||||||
|
|
||||||
|
|
||||||
|
I've just published the first version of [idnx](https://github.com/HoneyryderChuck/idnx) to Rubygems. `idnx` is a ruby gem which converts Internationalized Domain Names into Punycode. The gist of it is:
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
require "idnx"
|
||||||
|
|
||||||
|
Idnx.to_punycode("bücher.de") #=> "xn--bcher-kva.de"
|
||||||
|
```
|
||||||
|
|
||||||
|
That's it! That's the announcement!
|
||||||
|
|
||||||
|
## Why yet another idn gem?
|
||||||
|
|
||||||
|
Let me spare you the work: here's the [ruby toolbox link](https://www.ruby-toolbox.com/search?q=idn). Yes, there have been many IDN-related gems over the years. Why yet another one?
|
||||||
|
|
||||||
|
While researching on how to better support IDN domain names for `httpx`, I asked myself, "what does cURL do?". After a session of "look at the source", I found out that cURL uses [libidn2](https://github.com/libidn/libidn2) in Unix environments, while it uses [the winAPI IdnToAscii](https://docs.microsoft.com/en-us/windows/win32/api/winnls/nf-winnls-idntoascii) on Windows.
|
||||||
|
|
||||||
|
After that, I searched for a ruby library that would support at least one of the above. To my surprise, I didn't find any. In fact, I found out that most of the idn-related gems from that ruby toolbox list haven't received much attention for years, and most of them use [libidn](https://www.gnu.org/software/libidn/), the predecessor of `libidn2`, which does not support IDNA 2008 Punycode protocol. Also, none of them supports Windows.
|
||||||
|
|
||||||
|
So I decided to roll my own, the cURL way: provide bindings for `libidn2`, while using Windows APIs for Windows, all via FFI, so that it'd transparently works with JRuby.
|
||||||
|
|
||||||
|
## Why no punycode-to-idn translation?
|
||||||
|
|
||||||
|
The short answer is: because I don't need it. If you do though, I'll welcome a Pull Request introducing it.
|
||||||
|
|
||||||
|
## Why doesn't ruby provide this?
|
||||||
|
|
||||||
|
I've previously [discussed in the ruby bugs board](https://bugs.ruby-lang.org/issues/17309) about the lack of support for punycode, and that breaking the "principle of least astonishment" when using standard library APIs like `uri` or `resolv`. I understand that doing so would require `ruby` to be dependent on `libidn2` (at least in Linux/BSD systems), and the core team has been pretty resistant when it comes to had more dependencies to the runtime. I understand that this'll never happen, unless someone makes a convincing argument that satisfies the core team.
|
||||||
|
|
||||||
|
Until then, you can use this gem, which, in case the day will come, can hopefully work as a template.
|
||||||
|
|
||||||
|
## Will I need idnx to use httpx?
|
||||||
|
|
||||||
|
No. `idnx` will be a "weak" dependency, i.e. you'll have to install it yourself, and `httpx` will hook on it if available. It'll otherwise fallback to a [pure ruby punycode implementation imported from another ruby gem](https://gitlab.com/honeyryderchuck/httpx/-/blob/master/lib/httpx/punycode.rb) (it doesn't support IDNA2008 however, so make sure to use `idnx` if you require it).
|
||||||
|
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
That's it. Happy hacking!
|
Loading…
x
Reference in New Issue
Block a user