From 048501e9400d4a3ec3a34cec1d3c316217390f0f Mon Sep 17 00:00:00 2001 From: HoneyryderChuck Date: Mon, 2 Nov 2020 10:29:46 +0000 Subject: [PATCH] adding and integrating the new cookie modules for: store, jar, cookie, and domain name --- lib/httpx/plugins/cookies.rb | 76 +-- lib/httpx/plugins/cookies/cookie.rb | 177 +++++++ lib/httpx/plugins/cookies/domain_name.rb | 440 ++++++++++++++++++ lib/httpx/plugins/cookies/jar.rb | 74 +++ .../plugins/cookies/set_cookie_parser.rb | 154 ++++++ sig/plugins/cookies.rbs | 26 +- sig/plugins/cookies/cookie.rbs | 50 ++ sig/plugins/cookies/domain_name.rbs | 19 + sig/plugins/cookies/jar.rbs | 27 ++ test/support/requests/plugins/cookies.rb | 126 ++++- 10 files changed, 1087 insertions(+), 82 deletions(-) create mode 100644 lib/httpx/plugins/cookies/cookie.rb create mode 100644 lib/httpx/plugins/cookies/domain_name.rb create mode 100644 lib/httpx/plugins/cookies/jar.rb create mode 100644 lib/httpx/plugins/cookies/set_cookie_parser.rb create mode 100644 sig/plugins/cookies/cookie.rbs create mode 100644 sig/plugins/cookies/domain_name.rbs create mode 100644 sig/plugins/cookies/jar.rbs diff --git a/lib/httpx/plugins/cookies.rb b/lib/httpx/plugins/cookies.rb index 935f0b1e..201328fe 100644 --- a/lib/httpx/plugins/cookies.rb +++ b/lib/httpx/plugins/cookies.rb @@ -13,67 +13,19 @@ module HTTPX # module Cookies def self.load_dependencies(*) - require "httpx/plugins/cookies/store" require "httpx/plugins/cookies/jar" require "httpx/plugins/cookies/cookie" require "httpx/plugins/cookies/domain_name" require "httpx/plugins/cookies/set_cookie_parser" end - class Store - def self.new(cookies = nil) - return cookies if cookies.is_a?(self) - - super - end - - def initialize(cookies = nil) - @store = Hash.new { |hash, origin| hash[origin] = HTTP::CookieJar.new } - - return unless cookies - - @default_cookies = cookies.enum_for(:each).map do |*args| - if args.size == 1 && args.first.is_a?(HTTP::Cookie) - args.first - else - HTTP::Cookie.new(*args) - end - end - end - - def set(origin, cookies) - return unless cookies - - @store[origin].parse(cookies, origin) - end - - def [](uri) - store = @store[uri.origin] - @default_cookies.each do |cookie| - c = cookie.dup - c.domain ||= uri.authority - c.path ||= uri.path - store.add(c) - end if @default_cookies - store - end - - def ==(other) - @store == other.instance_variable_get(:@store) - end - end - - def self.load_dependencies(*) - require "http/cookie" - end - def self.extra_options(options) Class.new(options.class) do def_option(:cookies) do |cookies| - if cookies.is_a?(Store) + if cookies.is_a?(Jar) cookies else - Store.new(cookies) + Jar.new(cookies) end end end.new(options) @@ -85,26 +37,31 @@ module HTTPX def_delegator :@options, :cookies def initialize(options = {}, &blk) - super({ cookies: Store.new }.merge(options), &blk) + super({ cookies: Jar.new }.merge(options), &blk) end def wrap return super unless block_given? super do |session| - old_cookies_store = @options.cookies.dup + old_cookies_jar = @options.cookies.dup begin yield session ensure - @options = @options.with(cookies: old_cookies_store) + @options = @options.merge(cookies: old_cookies_jar) end end end private - def on_response(request, response) - @options.cookies.set(request.origin, response.headers["set-cookie"]) if response.respond_to?(:headers) + def on_response(reuest, response) + if response && response.respond_to?(:headers) && (set_cookie = response.headers["set-cookie"]) + + log { "cookies: set-cookie is over #{Cookie::MAX_LENGTH}" } if set_cookie.bytesize > Cookie::MAX_LENGTH + + @options.cookies.parse(set_cookie) + end super end @@ -117,13 +74,12 @@ module HTTPX end module HeadersMethods - def set_cookie(jar) - return unless jar + def set_cookie(cookies) + return if cookies.empty? - cookie_value = HTTP::Cookie.cookie_value(jar.cookies) - return if cookie_value.empty? + header_value = cookies.sort.join("; ") - add("cookie", cookie_value) + add("cookie", header_value) end end end diff --git a/lib/httpx/plugins/cookies/cookie.rb b/lib/httpx/plugins/cookies/cookie.rb new file mode 100644 index 00000000..4da32ce9 --- /dev/null +++ b/lib/httpx/plugins/cookies/cookie.rb @@ -0,0 +1,177 @@ +# frozen_string_literal: true + +module HTTPX::Plugins::Cookies + # The HTTP Cookie. + # + # Contains the single cookie info: name, value and attributes. + class Cookie + include Comparable + # Maximum number of bytes per cookie (RFC 6265 6.1 requires 4096 at + # least) + MAX_LENGTH = 4096 + + attr_reader :domain + + attr_reader :path + + attr_reader :name, :value + + attr_reader :created_at + + def path=(path) + path = String(path) + @path = path.start_with?("/") ? path : "/" + end + + # See #domain. + def domain=(domain) + domain = String(domain) + + if domain.start_with?(".") + @for_domain = true + domain = domain[1..-1] + end + + return if domain.empty? + + @domain_name = DomainName.new(domain) + # RFC 6265 5.3 5. + @for_domain = false if @domain_name.domain.nil? # a public suffix or IP address + + @domain = @domain_name.hostname + end + + # Compares the cookie with another. When there are many cookies with + # the same name for a URL, the value of the smallest must be used. + def <=>(other) + # RFC 6265 5.4 + # Precedence: 1. longer path 2. older creation + (@name <=> other.name).nonzero? || + (other.path.length <=> @path.length).nonzero? || + (@created_at <=> other.created_at).nonzero? || + @value <=> other.value + end + + class << self + def new(cookie, *args) + return cookie if cookie.is_a?(self) + + super + end + + # Tests if +target_path+ is under +base_path+ as described in RFC + # 6265 5.1.4. +base_path+ must be an absolute path. + # +target_path+ may be empty, in which case it is treated as the + # root path. + # + # e.g. + # + # path_match?('/admin/', '/admin/index') == true + # path_match?('/admin/', '/Admin/index') == false + # path_match?('/admin/', '/admin/') == true + # path_match?('/admin/', '/admin') == false + # + # path_match?('/admin', '/admin') == true + # path_match?('/admin', '/Admin') == false + # path_match?('/admin', '/admins') == false + # path_match?('/admin', '/admin/') == true + # path_match?('/admin', '/admin/index') == true + def path_match?(base_path, target_path) + base_path.start_with?("/") || (return false) + # RFC 6265 5.1.4 + bsize = base_path.size + tsize = target_path.size + return bsize == 1 if tsize.zero? # treat empty target_path as "/" + return false unless target_path.start_with?(base_path) + return true if bsize == tsize || base_path.end_with?("/") + + target_path[bsize] == "/" + end + end + + def initialize(arg, *attrs) + @created_at = Time.now + + if attrs.empty? + attr_hash = Hash.try_convert(arg) + else + @name = arg + @value, attr_hash = attrs + attr_hash = Hash.try_convert(attr_hash) + end + + attr_hash.each do |key, val| + key = key.downcase.tr("-", "_").to_sym unless key.is_a?(Symbol) + + case key + when :domain, :path + __send__(:"#{key}=", val) + else + instance_variable_set(:"@#{key}", val) + end + end if attr_hash + + @path ||= "/" + raise ArgumentError, "name must be specified" if @name.nil? + end + + def expires + @expires || (@created_at && @max_age ? @created_at + @max_age : nil) + end + + def expired?(time = Time.now) + return false unless expires + + expires <= time + end + + # Returns a string for use in the Cookie header, i.e. `name=value` + # or `name="value"`. + def cookie_value + "#{@name}=#{Scanner.quote(@value)}" + end + alias_method :to_s, :cookie_value + + # Tests if it is OK to send this cookie to a given `uri`. A + # RuntimeError is raised if the cookie's domain is unknown. + def valid_for_uri?(uri) + uri = URI(uri) + # RFC 6265 5.4 + + return false if @secure && uri.scheme != "https" + + acceptable_from_uri?(uri) && Cookie.path_match?(@path, uri.path) + end + + private + + # Tests if it is OK to accept this cookie if it is sent from a given + # URI/URL, `uri`. + def acceptable_from_uri?(uri) + uri = URI(uri) + + host = DomainName.new(uri.host) + + # RFC 6265 5.3 + if host.hostname == @domain + true + elsif @for_domain # !host-only-flag + host.cookie_domain?(@domain_name) + else + @domain.nil? + end + end + + module Scanner + RE_BAD_CHAR = /([\x00-\x20\x7F",;\\])/.freeze + + module_function + + def quote(s) + return s unless s.match(RE_BAD_CHAR) + + "\"#{s.gsub(/([\\"])/, "\\\\\\1")}\"" + end + end + end +end diff --git a/lib/httpx/plugins/cookies/domain_name.rb b/lib/httpx/plugins/cookies/domain_name.rb new file mode 100644 index 00000000..74b495a0 --- /dev/null +++ b/lib/httpx/plugins/cookies/domain_name.rb @@ -0,0 +1,440 @@ +# frozen_string_literal: true + +# +# domain_name.rb - Domain Name manipulation library for Ruby +# +# Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +require "ipaddr" + +module HTTPX::Plugins::Cookies + # Represents a domain name ready for extracting its registered domain + # and TLD. + class DomainName + include Comparable + + # The full host name normalized, ASCII-ized and downcased using the + # Unicode NFC rules and the Punycode algorithm. If initialized with + # an IP address, the string representation of the IP address + # suitable for opening a connection to. + attr_reader :hostname + + # The Unicode representation of the #hostname property. + # + # :attr_reader: hostname_idn + + # The least "universally original" domain part of this domain name. + # For example, "example.co.uk" for "www.sub.example.co.uk". This + # may be nil if the hostname does not have one, like when it is an + # IP address, an effective TLD or higher itself, or of a + # non-canonical domain. + attr_reader :domain + + DOT = "." # :nodoc: + + class << self + def new(domain) + return domain if domain.is_a?(self) + + super(domain) + end + + # Normalizes a _domain_ using the Punycode algorithm as necessary. + # The result will be a downcased, ASCII-only string. + def normalize(domain) + domain = domain.ascii_only? ? domain : domain.chomp(DOT).unicode_normaliza(:nfc) + Punycode.encode_hostname(domain).downcase + end + end + + # Parses _hostname_ into a DomainName object. An IP address is also + # accepted. An IPv6 address may be enclosed in square brackets. + def initialize(hostname) + hostname = String(hostname) + + raise ArgumentError, "domain name must not start with a dot: #{hostname}" if hostname.start_with?(DOT) + + begin + @ipaddr = IPAddr.new(hostname) + @hostname = @ipaddr.to_s + return + rescue IPAddr::Error + nil + end + + @hostname = DomainName.normalize(hostname) + tld = if (last_dot = @hostname.rindex(DOT)) + @hostname[(last_dot + 1)..-1] + else + @hostname + end + + # unknown/local TLD + @domain = if last_dot + # fallback - accept cookies down to second level + # cf. http://www.dkim-reputation.org/regdom-libs/ + if (penultimate_dot = @hostname.rindex(DOT, last_dot - 1)) + @hostname[(penultimate_dot + 1)..-1] + else + @hostname + end + else + # no domain part - must be a local hostname + tld + end + end + + # Checks if the server represented by this domain is qualified to + # send and receive cookies with a domain attribute value of + # _domain_. A true value given as the second argument represents + # cookies without a domain attribute value, in which case only + # hostname equality is checked. + def cookie_domain?(domain, host_only = false) + # RFC 6265 #5.3 + # When the user agent "receives a cookie": + return self == @domain if host_only + + domain = DomainName.new(domain) + + # RFC 6265 #5.1.3 + # Do not perform subdomain matching against IP addresses. + @hostname == domain.hostname if @ipaddr + + # RFC 6265 #4.1.1 + # Domain-value must be a subdomain. + @domain && self <= domain && domain <= @domain ? true : false + end + + # def ==(other) + # other = DomainName.new(other) + # other.hostname == @hostname + # end + + def <=>(other) + other = DomainName.new(other) + othername = other.hostname + if othername == @hostname + 0 + elsif @hostname.end_with?(othername) && @hostname[-othername.size - 1, 1] == DOT + # The other is higher + -1 + elsif othername.end_with?(@hostname) && othername[-@hostname.size - 1, 1] == DOT + # The other is lower + 1 + else + 1 + end + end + + # :nocov: + # rubocop:disable all + # -*- coding: utf-8 -*- + #-- + # punycode.rb - PunyCode encoder for the Domain Name library + # + # Copyright (C) 2011-2017 Akinori MUSHA, All rights reserved. + # + # Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN + # Library. + # + # Copyright (C) 2000-2002 Verisign Inc., All rights reserved. + # + # Redistribution and use in source and binary forms, with or + # without modification, are permitted provided that the following + # conditions are met: + # + # 1) Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # + # 2) Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions and the following disclaimer in + # the documentation and/or other materials provided with the + # distribution. + # + # 3) Neither the name of the VeriSign Inc. nor the names of its + # contributors may be used to endorse or promote products derived + # from this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + # POSSIBILITY OF SUCH DAMAGE. + # + # This software is licensed under the BSD open source license. For more + # information visit www.opensource.org. + # + # Authors: + # John Colosi (VeriSign) + # Srikanth Veeramachaneni (VeriSign) + # Nagesh Chigurupati (Verisign) + # Praveen Srinivasan(Verisign) + #++ + module Punycode + BASE = 36 + TMIN = 1 + TMAX = 26 + SKEW = 38 + DAMP = 700 + INITIAL_BIAS = 72 + INITIAL_N = 0x80 + DELIMITER = "-" + + MAXINT = (1 << 32) - 1 + + LOBASE = BASE - TMIN + CUTOFF = LOBASE * TMAX / 2 + + RE_NONBASIC = /[^\x00-\x7f]/.freeze + + # Returns the numeric value of a basic code point (for use in + # representing integers) in the range 0 to base-1, or nil if cp + # is does not represent a value. + DECODE_DIGIT = {}.tap do |map| + # ASCII A..Z map to 0..25 + # ASCII a..z map to 0..25 + (0..25).each { |i| map[65 + i] = map[97 + i] = i } + # ASCII 0..9 map to 26..35 + (26..35).each { |i| map[22 + i] = i } + end + + # Returns the basic code point whose value (when used for + # representing integers) is d, which must be in the range 0 to + # BASE-1. The lowercase form is used unless flag is true, in + # which case the uppercase form is used. The behavior is + # undefined if flag is nonzero and digit d has no uppercase + # form. + ENCODE_DIGIT = proc { |d, flag| + (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr + # 0..25 map to ASCII a..z or A..Z + # 26..35 map to ASCII 0..9 + } + + DOT = "." + PREFIX = "xn--" + + # Most errors we raise are basically kind of ArgumentError. + class ArgumentError < ::ArgumentError; end + class BufferOverflowError < ArgumentError; end + + class << self + # Encode a +string+ in Punycode + def encode(string) + input = string.unpack("U*") + output = "" + + # Initialize the state + n = INITIAL_N + delta = 0 + bias = INITIAL_BIAS + + # Handle the basic code points + input.each { |cp| output << cp.chr if cp < 0x80 } + + h = b = output.length + + # h is the number of code points that have been handled, b is the + # number of basic code points, and out is the number of characters + # that have been output. + + output << DELIMITER if b > 0 + + # Main encoding loop + + while h < input.length + # All non-basic code points < n have been handled already. Find + # the next larger one + + m = MAXINT + input.each do |cp| + m = cp if (n...m) === cp + end + + # Increase delta enough to advance the decoder's state to + # , but guard against overflow + + delta += (m - n) * (h + 1) + raise BufferOverflowError if delta > MAXINT + + n = m + + input.each do |cp| + # AMC-ACE-Z can use this simplified version instead + if cp < n + delta += 1 + raise BufferOverflowError if delta > MAXINT + elsif cp == n + # Represent delta as a generalized variable-length integer + q = delta + k = BASE + loop do + t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias + break if q < t + + q, r = (q - t).divmod(BASE - t) + output << ENCODE_DIGIT[t + r, false] + k += BASE + end + + output << ENCODE_DIGIT[q, false] + + # Adapt the bias + delta = h == b ? delta / DAMP : delta >> 1 + delta += delta / (h + 1) + bias = 0 + while delta > CUTOFF + delta /= LOBASE + bias += BASE + end + bias += (LOBASE + 1) * delta / (delta + SKEW) + + delta = 0 + h += 1 + end + end + + delta += 1 + n += 1 + end + + output + end + + # Encode a hostname using IDN/Punycode algorithms + def encode_hostname(hostname) + hostname.match(RE_NONBASIC) || (return hostname) + + hostname.split(DOT).map do |name| + if name.match(RE_NONBASIC) + PREFIX + encode(name) + else + name + end + end.join(DOT) + end + + # Decode a +string+ encoded in Punycode + def decode(string) + # Initialize the state + n = INITIAL_N + i = 0 + bias = INITIAL_BIAS + + if j = string.rindex(DELIMITER) + b = string[0...j] + + b.match(RE_NONBASIC) && + raise(ArgumentError, "Illegal character is found in basic part: #{string.inspect}") + + # Handle the basic code points + + output = b.unpack("U*") + u = string[(j + 1)..-1] + else + output = [] + u = string + end + + # Main decoding loop: Start just after the last delimiter if any + # basic code points were copied; start at the beginning + # otherwise. + + input = u.unpack("C*") + input_length = input.length + h = 0 + out = output.length + + while h < input_length + # Decode a generalized variable-length integer into delta, + # which gets added to i. The overflow checking is easier + # if we increase i as we go, then subtract off its starting + # value at the end to obtain delta. + + oldi = i + w = 1 + k = BASE + + loop do + (digit = DECODE_DIGIT[input[h]]) || + raise(ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}") + h += 1 + i += digit * w + raise BufferOverflowError if i > MAXINT + + t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias + break if digit < t + + w *= BASE - t + raise BufferOverflowError if w > MAXINT + + k += BASE + (h < input_length) || raise(ArgumentError, "Malformed input given: #{string.inspect}") + end + + # Adapt the bias + delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1 + delta += delta / (out + 1) + bias = 0 + while delta > CUTOFF + delta /= LOBASE + bias += BASE + end + bias += (LOBASE + 1) * delta / (delta + SKEW) + + # i was supposed to wrap around from out+1 to 0, incrementing + # n each time, so we'll fix that now: + + q, i = i.divmod(out + 1) + n += q + raise BufferOverflowError if n > MAXINT + + # Insert n at position i of the output: + + output[i, 0] = n + + out += 1 + i += 1 + end + output.pack("U*") + end + + # Decode a hostname using IDN/Punycode algorithms + def decode_hostname(hostname) + hostname.gsub(/(\A|#{Regexp.quote(DOT)})#{Regexp.quote(PREFIX)}([^#{Regexp.quote(DOT)}]*)/o) do + Regexp.last_match(1) << decode(Regexp.last_match(2)) + end + end + end + # rubocop:enable all + # :nocov: + end + end +end diff --git a/lib/httpx/plugins/cookies/jar.rb b/lib/httpx/plugins/cookies/jar.rb new file mode 100644 index 00000000..219d3f52 --- /dev/null +++ b/lib/httpx/plugins/cookies/jar.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module HTTPX + module Plugins::Cookies + # The Cookie Jar + # + # It holds a bunch of cookies. + class Jar + using URIExtensions + + include Enumerable + + def initialize_dup(orig) + super + @cookies = orig.instance_variable_get(:@cookies).dup + end + + def initialize(cookies = nil) + @cookies = [] + + cookies.each do |elem| + cookie = case elem + when Cookie + elem + when Array + Cookie.new(*elem) + else + Cookie.new(elem) + end + + @cookies << cookie + end if cookies + end + + def parse(set_cookie) + SetCookieParser.call(set_cookie) do |name, value, attrs| + add(Cookie.new(name, value, attrs)) + end + end + + def add(cookie, path = nil) + c = cookie.dup + + c.path = path if path && c.path == "/" + + @cookies << c + end + + def [](uri) + each(uri).sort + end + + def each(uri = nil, &blk) + return enum_for(__method__, uri) unless block_given? + + return @store.each(&blk) unless uri + + uri = URI(uri) + + now = Time.now + tpath = uri.path + + @cookies.delete_if do |cookie| + if cookie.expired?(now) + true + else + yield cookie if cookie.valid_for_uri?(uri) && Cookie.path_match?(cookie.path, tpath) + false + end + end + end + end + end +end diff --git a/lib/httpx/plugins/cookies/set_cookie_parser.rb b/lib/httpx/plugins/cookies/set_cookie_parser.rb new file mode 100644 index 00000000..9a91296d --- /dev/null +++ b/lib/httpx/plugins/cookies/set_cookie_parser.rb @@ -0,0 +1,154 @@ +# frozen_string_literal: true + +require "strscan" +require "time" + +module HTTPX::Plugins::Cookies + module SetCookieParser + unless Regexp.method_defined?(:match?) + # If you wonder why this is there: the oauth feature uses a refinement to enhance the + # Regexp class locally with #match? , but this is never tested, because ActiveSupport + # monkey-patches the same method... Please ActiveSupport, stop being so intrusive! + # :nocov: + module RegexpExtensions + refine(Regexp) do + def match?(*args) + !match(*args).nil? + end + end + end + using(RegexpExtensions) + # :nocov: + end + + # Whitespace. + RE_WSP = /[ \t]+/.freeze + + # A pattern that matches a cookie name or attribute name which may + # be empty, capturing trailing whitespace. + RE_NAME = /(?!#{RE_WSP})[^,;\\"=]*/.freeze + + RE_BAD_CHAR = /([\x00-\x20\x7F",;\\])/.freeze + + # A pattern that matches the comma in a (typically date) value. + RE_COOKIE_COMMA = /,(?=#{RE_WSP}?#{RE_NAME}=)/.freeze + + module_function + + def scan_dquoted(scanner) + s = +"" + + until scanner.eos? + break if scanner.skip(/"/) + + if scanner.skip(/\\/) + s << scanner.getch + elsif scanner.scan(/[^"\\]+/) + s << scanner.matched + end + end + + s + end + + def scan_value(scanner, comma_as_separator = false) + value = +"" + + until scanner.eos? + if scanner.scan(/[^,;"]+/) + value << scanner.matched + elsif scanner.skip(/"/) + # RFC 6265 2.2 + # A cookie-value may be DQUOTE'd. + value << scan_dquoted(scanner) + elsif scanner.check(/;/) + break + elsif comma_as_separator && scanner.check(RE_COOKIE_COMMA) + break + else + value << scanner.getch + end + end + + value.rstrip! + value + end + + def scan_name_value(scanner, comma_as_separator = false) + name = scanner.scan(RE_NAME) + name.rstrip! if name + + if scanner.skip(/\=/) + value = scan_value(scanner, comma_as_separator) + else + scan_value(scanner, comma_as_separator) + value = nil + end + [name, value] + end + + def call(set_cookie) + scanner = StringScanner.new(set_cookie) + + # RFC 6265 4.1.1 & 5.2 + until scanner.eos? + start = scanner.pos + len = nil + + scanner.skip(RE_WSP) + + name, value = scan_name_value(scanner, true) + value = nil if name.empty? + + attrs = {} + + until scanner.eos? + if scanner.skip(/,/) + # The comma is used as separator for concatenating multiple + # values of a header. + len = (scanner.pos - 1) - start + break + elsif scanner.skip(/;/) + scanner.skip(RE_WSP) + + aname, avalue = scan_name_value(scanner, true) + + next if aname.empty? || value.nil? + + aname.downcase! + + case aname + when "expires" + # RFC 6265 5.2.1 + (avalue &&= Time.httpdate(avalue)) || next + when "max-age" + # RFC 6265 5.2.2 + next unless /\A-?\d+\z/.match?(avalue) + + avalue = Integer(avalue) + when "domain" + # RFC 6265 5.2.3 + # An empty value SHOULD be ignored. + next if avalue.nil? || avalue.empty? + when "path" + # RFC 6265 5.2.4 + # A relative path must be ignored rather than normalizing it + # to "/". + next unless avalue.start_with?("/") + when "secure", "httponly" + # RFC 6265 5.2.5, 5.2.6 + avalue = true + end + attrs[aname] = avalue + end + end + + len ||= scanner.pos - start + + next if len > Cookie::MAX_LENGTH + + yield(name, value, attrs) if name && !name.empty? && value + end + end + end +end diff --git a/sig/plugins/cookies.rbs b/sig/plugins/cookies.rbs index 81369bc0..51fca338 100644 --- a/sig/plugins/cookies.rbs +++ b/sig/plugins/cookies.rbs @@ -1,36 +1,22 @@ module HTTPX module Plugins module Cookies - type store_value = Array[HTTP::Cookie] | _Each[_ToS, _ToS] | _Each[Hash[_ToS, _ToS]] - type cookies_value = Store | store_value + type jar = Jar | _Each[cookies] interface _CookieOptions - def cookies: () -> Store? - def cookies=: (cookies_value) -> Store - def with_cookies: (cookies_value) -> instance + def cookies: () -> Jar? + def cookies=: (jar) -> Jar + def with_cookies: (jar) -> instance end def self.extra_options: (Options) -> (Options & _CookieOptions) module InstanceMethods - def cookies: () -> Store + def cookies: () -> Jar end module HeadersMethods - def set_cookie: (HTTP::CookieJar?) -> void - end - - class Store - @store: Hash[string, HTTP::CookieJar] - @default_cookies: Array[HTTP::Cookie]? - - def set: (string origin, string? cookies) -> void - # def []: (URI::HTTP | URI::HTTPS) -> HTTP::CookieJar - def ==: (untyped other) -> bool - - private - - def initialize: (store_value?) -> untyped + def set_cookie: (Array[Cookie]) -> void end end diff --git a/sig/plugins/cookies/cookie.rbs b/sig/plugins/cookies/cookie.rbs new file mode 100644 index 00000000..7568d8d3 --- /dev/null +++ b/sig/plugins/cookies/cookie.rbs @@ -0,0 +1,50 @@ +module HTTPX + module Plugins::Cookies + type cookie_attributes = Hash[Symbol | String, top] + + class Cookie + include Comparable + + MAX_LENGTH: Integer + + attr_reader domain: DomainName? + + attr_reader path: String + + attr_reader name: String + + attr_reader value: String? + + attr_reader created_at: Time + + def path=: (string) -> void + + def domain=: (string) -> void + + def expires: () -> Time? + + def expired?: () -> bool + | (Time) -> bool + + def cookie_value: () -> String + alias to_s cookie_value + + def valid_for_uri?: (uri) -> bool + + def self.new: (Cookie) -> untyped + | (cookie_attributes) -> untyped + | (String, String) -> untyped + | (String, String, cookie_attributes) -> untyped + + def self.path_match?: (String, String) -> bool + + private + + def initialize: (cookie_attributes) -> untyped + | (String, String) -> untyped + | (String, String, cookie_attributes?) -> untyped + + def acceptable_from_uri?: (uri) -> bool + end + end +end \ No newline at end of file diff --git a/sig/plugins/cookies/domain_name.rbs b/sig/plugins/cookies/domain_name.rbs new file mode 100644 index 00000000..3dd48157 --- /dev/null +++ b/sig/plugins/cookies/domain_name.rbs @@ -0,0 +1,19 @@ +module HTTPX + module Plugins::Cookies + class DomainName + type domain = string | DomainName + + include Comparable + + def normalize: (String) -> String + + def cookie_domain?: (domain, ?bool?) -> bool + + def self.new: (domain) -> untyped + + private + + def initialize: (string) -> untyped + end + end +end \ No newline at end of file diff --git a/sig/plugins/cookies/jar.rbs b/sig/plugins/cookies/jar.rbs new file mode 100644 index 00000000..226126cc --- /dev/null +++ b/sig/plugins/cookies/jar.rbs @@ -0,0 +1,27 @@ +module HTTPX + module Plugins::Cookies + class Jar + type cookie = Cookie | Array[String, String, cookie_attributes] | Array[String, String] | cookie_attributes + + include Enumerable[Cookie, void] + + @cookies: Array[Cookie] + + def parse: (String) -> void + + def add: (Cookie) -> void + | (Cookie, String) -> void + + def []: (uri) -> Array[Cookie] + + def each: (uri) { (Cookie) -> void } -> void + | (uri) -> Enumerable[Cookie, void] + | () { (Cookie) -> void } -> void + | () -> Enumerable[Cookie, void] + private + + def initialize: () -> void + | (_Each[cookie]) -> untyped + end + end +end \ No newline at end of file diff --git a/test/support/requests/plugins/cookies.rb b/test/support/requests/plugins/cookies.rb index 1db7410a..5d3c04a4 100644 --- a/test/support/requests/plugins/cookies.rb +++ b/test/support/requests/plugins/cookies.rb @@ -24,13 +24,21 @@ module Requests assert body["cookies"]["abc"] == "def", "abc wasn't properly set" end + def test_plugin_cookies_get_with_cookie + session = HTTPX.plugin(:cookies) + session_response = session.with_cookies([HTTPX::Plugins::Cookies::Cookie.new("abc", "def")]).get(cookies_uri) + body = json_body(session_response) + assert body.key?("cookies") + assert body["cookies"]["abc"] == "def", "abc wasn't properly set" + end + def test_plugin_cookies_set session = HTTPX.plugin(:cookies) session_cookies = { "a" => "b", "c" => "d" } session_uri = cookies_set_uri(session_cookies) session_response = session.get(session_uri) verify_status(session_response, 302) - verify_cookies(session.cookies[URI(session_uri)], session_cookies) + verify_cookies(session.cookies[session_uri], session_cookies) # first request sets the session response = session.get(cookies_uri) @@ -50,6 +58,36 @@ module Requests assert !other_origin_response.headers.key?("set-cookie"), "cookies should not transition to next origin" end + def test_cookies_wrap + session = HTTPX.plugin(:cookies).with_cookies("abc" => "def") + + session.wrap do |_http| + set_cookie_uri = cookies_set_uri("123" => "456") + session_response = session.get(set_cookie_uri) + verify_status(session_response, 302) + + session_response = session.get(cookies_uri) + body = json_body(session_response) + assert body.key?("cookies") + assert body["cookies"]["abc"] == "def", "abc wasn't properly set" + assert body["cookies"]["123"] == "456", "123 wasn't properly set" + + set_cookie_uri = cookies_set_uri("abc" => "123") + session_response = session.get(set_cookie_uri) + verify_status(session_response, 302) + + session_response = session.get(cookies_uri) + body = json_body(session_response) + assert body.key?("cookies") + assert body["cookies"]["abc"] == "123", "abc wasn't properly set" + end + + session_response = session.get(cookies_uri) + body = json_body(session_response) + assert body.key?("cookies") + assert body["cookies"]["abc"] == "def", "abc wasn't properly set" + end + def test_plugin_cookies_follow session = HTTPX.plugin(:follow_redirects).plugin(:cookies) session_cookies = { "a" => "b", "c" => "d" } @@ -63,6 +101,90 @@ module Requests verify_cookies(body["cookies"], session_cookies) end + def test_plugin_cookies_jar + HTTPX.plugin(:cookies) # force loading the modules + + # Test special cases + special_jar = HTTPX::Plugins::Cookies::Jar.new + special_jar.parse(%(a="b"; Path=/, c=d; Path=/, e="f\\\"; \\\"g")) + cookies = special_jar[cookies_uri] + assert(cookies.one? { |cookie| cookie.name == "a" && cookie.value == "b" }) + assert(cookies.one? { |cookie| cookie.name == "c" && cookie.value == "d" }) + assert(cookies.one? { |cookie| cookie.name == "e" && cookie.value == "f\"; \"g" }) + + # Test secure parameter + secure_jar = HTTPX::Plugins::Cookies::Jar.new + secure_jar.parse(%(a=b; Path=/; Secure)) + cookies = secure_jar[cookies_uri] + if URI(cookies_uri).scheme == "https" + assert !cookies.empty?, "cookie jar should contain the secure cookie" + else + assert cookies.empty?, "cookie jar should not contain the secure cookie" + end + + # Test path parameter + path_jar = HTTPX::Plugins::Cookies::Jar.new + path_jar.parse(%(a=b; Path=/cookies)) + assert path_jar[build_uri("/")].empty? + assert !path_jar[build_uri("/cookies")].empty? + assert !path_jar[build_uri("/cookies/set")].empty? + + # Test expires + expires_jar = HTTPX::Plugins::Cookies::Jar.new + expires_jar.parse(%(a=b; Path=/; Max-Age=2)) + assert !expires_jar[cookies_uri].empty? + sleep 2 + assert expires_jar[cookies_uri].empty? + + maxage_jar = HTTPX::Plugins::Cookies::Jar.new + maxage_jar.parse(%(a=b; Path=/; Expires=Sat, 02 Nov 2019 15:24:00 GMT)) + assert maxage_jar[cookies_uri].empty? + + # Test domain + domain_jar = HTTPX::Plugins::Cookies::Jar.new + domain_jar.parse(%(a=b; Path=/; Domain=.google.com)) + assert domain_jar[cookies_uri].empty? + assert !domain_jar["http://www.google.com/"].empty? + + ipv4_domain_jar = HTTPX::Plugins::Cookies::Jar.new + ipv4_domain_jar.parse(%(a=b; Path=/; Domain=137.1.0.12)) + assert ipv4_domain_jar["http://www.google.com/"].empty? + assert !ipv4_domain_jar["http://137.1.0.12/"].empty? + + ipv6_domain_jar = HTTPX::Plugins::Cookies::Jar.new + ipv6_domain_jar.parse(%(a=b; Path=/; Domain=[fe80::1])) + assert ipv6_domain_jar["http://www.google.com/"].empty? + assert !ipv6_domain_jar["http://[fe80::1]/"].empty? + end + + def test_cookies_cookie + HTTPX.plugin(:cookies) # force loading the modules + + # match against uris + acc_c1 = HTTPX::Plugins::Cookies::Cookie.new("a", "b") + assert acc_c1.send(:acceptable_from_uri?, "https://www.google.com") + acc_c2 = HTTPX::Plugins::Cookies::Cookie.new("a", "b", domain: ".google.com") + assert acc_c2.send(:acceptable_from_uri?, "https://www.google.com") + assert !acc_c2.send(:acceptable_from_uri?, "https://nghttp2.org") + acc_c3 = HTTPX::Plugins::Cookies::Cookie.new("a", "b", domain: "google.com") + assert !acc_c3.send(:acceptable_from_uri?, "https://www.google.com") + + # quoting funny characters + sch_cookie = HTTPX::Plugins::Cookies::Cookie.new("Bar", "value\"4") + assert sch_cookie.cookie_value == %(Bar="value\\\"4") + + # sorting + c1 = HTTPX::Plugins::Cookies::Cookie.new("a", "b") + c2 = HTTPX::Plugins::Cookies::Cookie.new("a", "bc") + assert [c2, c1].sort == [c1, c2] + + c3 = HTTPX::Plugins::Cookies::Cookie.new("a", "b", path: "/cookies") + assert [c3, c2, c1].sort == [c3, c1, c2] + + c4 = HTTPX::Plugins::Cookies::Cookie.new("a", "b", created_at: (Time.now - 60 * 60 * 24)) + assert [c4, c3, c2, c1].sort == [c3, c4, c1, c2] + end + private def cookies_uri @@ -77,7 +199,7 @@ module Requests assert !jar.nil? && !jar.empty?, "there should be cookies in the response" assert jar.all? { |cookie| case cookie - when HTTP::Cookie + when HTTPX::Plugins::Cookies::Cookie cookies.one? { |k, v| k == cookie.name && v == cookie.value } else cookie_name, cookie_value = cookie