mirror of
https://github.com/HoneyryderChuck/httpx.git
synced 2025-10-08 00:02:42 -04:00
adapt the hackernews pages script to be also ran in async
This commit is contained in:
parent
1c5bf34fe5
commit
d5fed640fe
@ -1,42 +1,88 @@
|
|||||||
require "httpx"
|
require "httpx"
|
||||||
require "oga"
|
require "oga"
|
||||||
|
|
||||||
http = HTTPX.plugin(:persistent).with(timeout: { request_timeout: 5 })
|
HTTP = HTTPX.plugin(:persistent).with(timeout: { request_timeout: 5 })
|
||||||
|
|
||||||
PAGES = (ARGV.first || 10).to_i
|
def get_pages(pages, mode)
|
||||||
pages = PAGES.times.map do |page|
|
case mode
|
||||||
"https://news.ycombinator.com/?p=#{page+1}"
|
when "async"
|
||||||
|
responses = Array.new(pages.size)
|
||||||
|
|
||||||
|
Async do
|
||||||
|
pages.each_with_index do |page, i|
|
||||||
|
Async do
|
||||||
|
responses[i] = HTTP.get(page)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
responses
|
||||||
|
else
|
||||||
|
Array(HTTP.get(*pages))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
links = Array.new(PAGES) { [] }
|
def fetch_pages(pages, mode)
|
||||||
Array(http.get(*pages)).each_with_index.map do |response, i|
|
links = Array.new(pages.size) { [] }
|
||||||
if response.is_a?(HTTPX::ErrorResponse)
|
|
||||||
puts "error: #{response.error}"
|
|
||||||
next
|
|
||||||
end
|
|
||||||
html = Oga.parse_html(response.to_s)
|
|
||||||
# binding.irb
|
|
||||||
page_links = html.css('.athing .title a').map{|link| link.get('href') }.select { |link| URI(link).absolute? }
|
|
||||||
puts "page(#{i+1}): #{page_links.size}"
|
|
||||||
if page_links.size == 0
|
|
||||||
puts "error(#{response.status}) on page #{i+1}"
|
|
||||||
next
|
|
||||||
end
|
|
||||||
# page_links.each do |link|
|
|
||||||
# puts "link: #{link}"
|
|
||||||
# links[i] << http.get(link)
|
|
||||||
# end
|
|
||||||
links[i].concat(http.get(*page_links))
|
|
||||||
end
|
|
||||||
|
|
||||||
links = links.each_with_index do |pages, i|
|
get_pages(pages, mode).each_with_index.map do |response, i|
|
||||||
puts "Page: #{i+1}\t Links: #{pages.size}"
|
if response.is_a?(HTTPX::ErrorResponse)
|
||||||
pages.each do |page|
|
puts "error: #{response.error}"
|
||||||
case page
|
next
|
||||||
in status:
|
end
|
||||||
puts "URL: #{page.uri} (#{status})"
|
html = Oga.parse_html(response.to_s)
|
||||||
in error:
|
# binding.irb
|
||||||
puts "URL: #{page.uri} (#{error.message})"
|
page_links = html.css('.athing .title a').map{|link| link.get('href') }.select { |link| URI(link).absolute? }
|
||||||
|
puts "page(#{i+1}): #{page_links.size}"
|
||||||
|
if page_links.size == 0
|
||||||
|
puts "error(#{response.status}) on page #{i+1}"
|
||||||
|
next
|
||||||
|
end
|
||||||
|
# page_links.each do |link|
|
||||||
|
# puts "link: #{link}"
|
||||||
|
# links[i] << http.get(link)
|
||||||
|
# end
|
||||||
|
links[i].concat(get_pages(page_links, mode))
|
||||||
|
end
|
||||||
|
|
||||||
|
links = links.each_with_index do |pages, i|
|
||||||
|
puts "Page: #{i+1}\t Links: #{pages.size}"
|
||||||
|
pages.each do |page|
|
||||||
|
case page
|
||||||
|
in status:
|
||||||
|
puts "URL: #{page.uri} (#{status})"
|
||||||
|
in error:
|
||||||
|
puts "URL: #{page.uri} (#{error.message})"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if __FILE__ == $0
|
||||||
|
pages, mode = ARGV
|
||||||
|
|
||||||
|
pages = (pages || 10).to_i
|
||||||
|
mode ||= "normal"
|
||||||
|
|
||||||
|
page_urls = pages.times.map do |page|
|
||||||
|
"https://news.ycombinator.com/?p=#{page+1}"
|
||||||
|
end
|
||||||
|
|
||||||
|
case mode
|
||||||
|
when "async"
|
||||||
|
require "async"
|
||||||
|
fetch_pages(page_urls, mode)
|
||||||
|
when "bench"
|
||||||
|
require "benchmark"
|
||||||
|
require "async"
|
||||||
|
|
||||||
|
Benchmark.bm do |x|
|
||||||
|
x.report("normal") {fetch_pages(page_urls, "normal")}
|
||||||
|
x.report("async"){fetch_pages(page_urls, "async")}
|
||||||
|
end
|
||||||
|
else
|
||||||
|
|
||||||
|
fetch_pages(page_urls, mode)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
Loading…
x
Reference in New Issue
Block a user