Compare commits

..

11 Commits

Author SHA1 Message Date
Kanezoh
d4f7054a88
Merge pull request #27 from mamantoha/set-proxy
add proxy support
2024-04-27 08:00:58 +09:00
Anton Maminov
64da39eb9b add proxy support 2023-01-08 19:45:22 +02:00
Kanezoh
48001b0404
Merge pull request #26 from mamantoha/develop
bump kemal
2022-11-07 14:28:23 +09:00
Anton Maminov
c783485d9a cosmetic changes 2022-11-04 15:19:48 +02:00
Anton Maminov
cc6f42b796 fix redundant use of Object#to_s in interpolation 2022-11-04 14:34:42 +02:00
Anton Maminov
230deb68dc bump kemal 2022-11-04 14:24:40 +02:00
Kanezoh
be4d845921
Update README.md 2022-01-24 21:25:58 +09:00
Kanezoh
e72fae8516 improve comment 2022-01-15 12:32:31 +09:00
Kanezoh
cb3b03873a delete an unnecessary file 2022-01-15 12:32:21 +09:00
Kanezoh
f44ed2eea6
Merge pull request #24 from Kanezoh/fix-query-parameter-with-path
fix unexpected behaviour when get uri with path and query parameter
2022-01-10 18:56:25 +09:00
Kanezoh
2c73aa2bcf fix unexpected behaviour when get uri with path and query parameter 2022-01-10 10:44:27 +09:00
15 changed files with 95 additions and 32 deletions

View File

@ -4,7 +4,8 @@
This project is inspired by Ruby's [mechanize](https://github.com/sparklemotion/mechanize). This project is inspired by Ruby's [mechanize](https://github.com/sparklemotion/mechanize).
The purpose is to cover all the features of original one. The purpose is to cover all the features of original one.
Now, mechanize.cr can automatically store and send cookies, fill and submit forms.
[API Documentation](https://kanezoh.github.io/mechanize.cr/)
## Installation ## Installation
@ -20,7 +21,7 @@ Now, mechanize.cr can automatically store and send cookies, fill and submit form
## Usage ## Usage
### simple GET request ### GET request
```crystal ```crystal
require "mechanize" require "mechanize"
@ -95,6 +96,17 @@ For activation, simply setup the log to `:debug` level
Log.setup("mechanize", :debug) Log.setup("mechanize", :debug)
``` ```
### Basic auth
You can access a page which is protected by basic auth, setting username and password for the url.
```crystal
agent = Mechanize.new
agent.add_auth("http://example.com", "username", "password")
agent.get("http://example.com")
```
## Contributing ## Contributing
1. Fork it (<https://github.com/Kanezoh/mechanize.cr/fork>) 1. Fork it (<https://github.com/Kanezoh/mechanize.cr/fork>)

13
main.cr
View File

@ -1,13 +0,0 @@
require "./src/mechanize.cr"
agent = Mechanize.new
agent.request_headers = HTTP::Headers{"Foo" => "Bar"}
params = {"hoge" => "hoge"}
page = agent.get("http://example.com/", params: params)
# form = page.forms[0]
# query = {"foo" => "foo_value", "bar" => "bar_value"}
# page = agent.post("http://example.com/", query: query)
# puts page.code
# puts page.body
# puts page.css("h1").first.inner_text
# puts page.title

View File

@ -11,6 +11,8 @@ license: MIT
dependencies: dependencies:
lexbor: lexbor:
github: kostya/lexbor github: kostya/lexbor
http_proxy:
github: mamantoha/http_proxy
development_dependencies: development_dependencies:
webmock: webmock:
@ -18,6 +20,6 @@ development_dependencies:
branch: master branch: master
kemal: kemal:
github: kemalcr/kemal github: kemalcr/kemal
version: ~> 1.0.0 version: ~> 1.3.0
kemal-basic-auth: kemal-basic-auth:
github: kemalcr/kemal-basic-auth github: kemalcr/kemal-basic-auth

View File

@ -1,5 +1,6 @@
require "./spec_helper" require "./spec_helper"
WebMock.stub(:get, "http://example.com/?foo=bar&foo1=bar2") WebMock.stub(:get, "http://example.com/?foo=bar&foo1=bar2")
WebMock.stub(:get, "http://example.com/path?foo=bar&foo1=bar2")
WebMock.stub(:post, "http://example.com/post") WebMock.stub(:post, "http://example.com/post")
.with(body: "email=foobar", headers: {"Content-Type" => "application/x-www-form-urlencoded"}) .with(body: "email=foobar", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
.to_return(body: "success") .to_return(body: "success")
@ -40,6 +41,14 @@ describe "Mechanize HTTP test" do
page.uri.to_s.should eq uri page.uri.to_s.should eq uri
end end
it "GET with query parameter with path" do
agent = Mechanize.new
uri = "http://example.com/path?foo=bar&foo1=bar2"
page = agent.get(uri)
page.code.should eq 200
page.uri.to_s.should eq uri
end
it "simple POST" do it "simple POST" do
agent = Mechanize.new agent = Mechanize.new
query = {"email" => "foobar"} query = {"email" => "foobar"}

17
spec/proxy_spec.cr Normal file
View File

@ -0,0 +1,17 @@
require "./spec_helper"
WebMock.stub(:get, "http://example.com/with_proxy").to_return(body: "success")
describe "Mechanize proxy test" do
it "set proxy" do
with_proxy_server do |host, port, wants_close|
agent = Mechanize.new
agent.set_proxy("127.0.0.1", 8080)
page = agent.get("http://example.com/with_proxy")
page.body.should eq("success")
page.code.should eq(200)
ensure
wants_close.send(nil)
end
end
end

View File

@ -1,5 +1,6 @@
require "spec" require "spec"
require "webmock" require "webmock"
require "http_proxy"
require "../src/mechanize" require "../src/mechanize"
WebMock.stub(:get, "example.com") WebMock.stub(:get, "example.com")
@ -46,3 +47,22 @@ WebMock.stub(:post, "example.com/post_path")
WebMock.stub(:post, "example.com/post_path") WebMock.stub(:post, "example.com/post_path")
.with(body: "name=foo&email=bar&commit=submit", headers: {"Content-Type" => "application/x-www-form-urlencoded"}) .with(body: "name=foo&email=bar&commit=submit", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
.to_return(body: "success with button") .to_return(body: "success with button")
def with_proxy_server(host = "127.0.0.1", port = 8080, &)
wants_close = Channel(Nil).new
server = HTTP::Proxy::Server.new
spawn do
server.bind_tcp(host, port)
server.listen
end
spawn do
wants_close.receive
server.close
end
Fiber.yield
yield host, port, wants_close
end

View File

@ -2,6 +2,7 @@ require "log"
require "uri" require "uri"
require "http/client" require "http/client"
require "lexbor" require "lexbor"
require "http_proxy"
require "./mechanize/http/agent" require "./mechanize/http/agent"
require "./mechanize/form" require "./mechanize/form"
require "./mechanize/node" require "./mechanize/node"
@ -298,13 +299,18 @@ class Mechanize
# set basic auth credentials. # set basic auth credentials.
# ``` # ```
# # make download.html whose content is http://example.com's html. # # set an auth credential with a specific url.
# agent.add_auth("http://example.com", "username", "password") # agent.add_auth("http://example.com", "username", "password")
# ``` # ```
def add_auth(uri : String, user : String, pass : String) def add_auth(uri : String, user : String, pass : String)
@agent.add_auth(uri, user, pass) @agent.add_auth(uri, user, pass)
end end
# Sets the proxy +address+ at +port+ with an optional +user+ and +password+
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
@agent.set_proxy(address, port, user, password)
end
# Runs given block, then resets the page history as it was before. # Runs given block, then resets the page history as it was before.
private def transact private def transact
# save the previous history status. # save the previous history status.

View File

@ -193,7 +193,6 @@ class Mechanize
# submitted with multiple buttons, pass each button to this method. # submitted with multiple buttons, pass each button to this method.
def add_button_to_query(button) def add_button_to_query(button)
unless button.form_node == @node unless button.form_node == @node
message = ""
"#{button.inspect} does not belong to the same page as " \ "#{button.inspect} does not belong to the same page as " \
"the form #{@name.inspect} in #{@page.try &.uri}" "the form #{@name.inspect} in #{@page.try &.uri}"
message = "not a valid button" message = "not a valid button"

View File

@ -23,10 +23,14 @@ class Mechanize::FormContent::SelectList < Mechanize::FormContent::MultiSelectLi
end end
def value=(new_value) def value=(new_value)
values = new_value @values = new_value
end end
def query_value def query_value
value ? [[name, value.not_nil!]] : nil if v = value
[[name, v]]
else
nil
end
end end
end end

View File

@ -30,7 +30,7 @@ class Mechanize
if size == 0 if size == 0
# TODO: raise error # TODO: raise error
end end
page = @array.pop @array.pop
end end
end end
end end

View File

@ -16,6 +16,8 @@ class Mechanize
getter authenticate_methods : Hash(URI, Hash(String, Array(AuthRealm))) getter authenticate_methods : Hash(URI, Hash(String, Array(AuthRealm)))
getter authenticate_parser : WWWAuthenticateParser getter authenticate_parser : WWWAuthenticateParser
@proxy : ::HTTP::Proxy::Client?
def initialize(@context : Mechanize? = nil) def initialize(@context : Mechanize? = nil)
@history = History.new @history = History.new
@request_headers = ::HTTP::Headers.new @request_headers = ::HTTP::Headers.new
@ -43,6 +45,7 @@ class Mechanize
uri, params = resolve_parameters(uri, method, params) uri, params = resolve_parameters(uri, method, params)
client = ::HTTP::Client.new(uri) client = ::HTTP::Client.new(uri)
request_auth client, uri request_auth client, uri
client.set_proxy(@proxy) if @proxy
response = http_request(client, uri, method, params, body) response = http_request(client, uri, method, params, body)
body = response.not_nil!.body body = response.not_nil!.body
page = response_parse(response, body, uri) page = response_parse(response, body, uri)
@ -66,7 +69,7 @@ class Mechanize
redirect_url = response.headers["location"] redirect_url = response.headers["location"]
uri = resolve_url(redirect_url, referer) uri = resolve_url(redirect_url, referer)
Log.debug { "follow redirect to: #{uri.to_s}" } Log.debug { "follow redirect to: #{uri}" }
# Make sure we are not copying over the POST headers from the original request # Make sure we are not copying over the POST headers from the original request
headers.delete("Content-MD5") headers.delete("Content-MD5")
@ -84,6 +87,7 @@ class Mechanize
body : String?) : ::HTTP::Client::Response? body : String?) : ::HTTP::Client::Response?
request_log(uri, method) request_log(uri, method)
path = uri.path path = uri.path
path += "?#{uri.query.not_nil!}" if uri.query
case uri.scheme.not_nil!.downcase case uri.scheme.not_nil!.downcase
when "http", "https" when "http", "https"
@ -162,13 +166,17 @@ class Mechanize
# set basic auth credentials. # set basic auth credentials.
# ``` # ```
# # make download.html whose content is http://example.com's html. # # set an auth credential with a specific url.
# agent.add_auth("http://example.com", "username", "password") # agent.add_auth("http://example.com", "username", "password")
# ``` # ```
def add_auth(uri : String, user : String, pass : String) def add_auth(uri : String, user : String, pass : String)
@auth_store.add_auth(uri, user, pass) @auth_store.add_auth(uri, user, pass)
end end
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
@proxy = ::HTTP::Proxy::Client.new(address, port, username: user, password: password)
end
private def set_request_headers(uri, headers) private def set_request_headers(uri, headers)
reset_request_header_cookies reset_request_header_cookies
headers.each do |k, v| headers.each do |k, v|
@ -317,7 +325,6 @@ class Mechanize
# extract valid cookies according to URI # extract valid cookies according to URI
private def valid_cookies(uri) private def valid_cookies(uri)
host = uri.host
valid_cookies = ::HTTP::Cookies.new valid_cookies = ::HTTP::Cookies.new
request_cookies.each do |cookie| request_cookies.each do |cookie|
valid_cookies << cookie if cookie.valid_cookie?(uri) valid_cookies << cookie if cookie.valid_cookie?(uri)
@ -326,7 +333,7 @@ class Mechanize
end end
private def request_log(uri, method) private def request_log(uri, method)
Log.debug { "#{method.to_s.upcase}: #{uri.to_s}" } Log.debug { "#{method.to_s.upcase}: #{uri}" }
request_headers.each do |key, values| request_headers.each do |key, values|
value = values.size == 1 ? values.first : values value = values.size == 1 ? values.first : values

View File

@ -8,6 +8,7 @@ class Mechanize
@auth_accounts = Hash(URI, Hash(String, Array(String))).new @auth_accounts = Hash(URI, Hash(String, Array(String))).new
end end
# set an auth credential with a specific url.
def add_auth(uri : String | URI, user : String, pass : String, realm : String? = nil, domain : String? = nil) def add_auth(uri : String | URI, user : String, pass : String, realm : String? = nil, domain : String? = nil)
target_uri = uri.dup target_uri = uri.dup
unless uri.is_a?(URI) unless uri.is_a?(URI)
@ -28,7 +29,6 @@ class Mechanize
end end
end end
##
# Returns true if credentials exist for the +challenges+ from the server at # Returns true if credentials exist for the +challenges+ from the server at
# +uri+. # +uri+.

View File

@ -127,7 +127,7 @@ class Mechanize
return nil unless value return nil unless value
return [name, value] [name, value]
end end
## ##

View File

@ -43,11 +43,11 @@ class Mechanize
# page.forms # => Array(Mechanize::Form) # page.forms # => Array(Mechanize::Form)
# ``` # ```
def forms : Array(Form) def forms : Array(Form)
forms = css("form").map do |html_form| css("form").map do |html_form|
form = Form.new(html_form, self) form = Form.new(html_form, self)
form.action ||= @uri.to_s form.action ||= @uri.to_s
form form
end.to_a end
end end
# return all links(`Mechanize::PageContent::Link`) in the page. # return all links(`Mechanize::PageContent::Link`) in the page.
@ -55,11 +55,11 @@ class Mechanize
# page.links # => Array(Mechanize::PageContent::Link) # page.links # => Array(Mechanize::PageContent::Link)
# ``` # ```
def links : Array(PageContent::Link) def links : Array(PageContent::Link)
links = %w{a area}.map do |tag| %w{a area}.flat_map do |tag|
css(tag).map do |node| css(tag).map do |node|
PageContent::Link.new(node, @mech, self) PageContent::Link.new(node, @mech, self)
end end
end.flatten end
end end
elements_with "form" elements_with "form"