Compare commits
11 Commits
64bccad495
...
d4f7054a88
Author | SHA1 | Date | |
---|---|---|---|
|
d4f7054a88 | ||
|
64da39eb9b | ||
|
48001b0404 | ||
|
c783485d9a | ||
|
cc6f42b796 | ||
|
230deb68dc | ||
|
be4d845921 | ||
|
e72fae8516 | ||
|
cb3b03873a | ||
|
f44ed2eea6 | ||
|
2c73aa2bcf |
16
README.md
16
README.md
@ -4,7 +4,8 @@
|
|||||||
|
|
||||||
This project is inspired by Ruby's [mechanize](https://github.com/sparklemotion/mechanize).
|
This project is inspired by Ruby's [mechanize](https://github.com/sparklemotion/mechanize).
|
||||||
The purpose is to cover all the features of original one.
|
The purpose is to cover all the features of original one.
|
||||||
Now, mechanize.cr can automatically store and send cookies, fill and submit forms.
|
|
||||||
|
[API Documentation](https://kanezoh.github.io/mechanize.cr/)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@ -20,7 +21,7 @@ Now, mechanize.cr can automatically store and send cookies, fill and submit form
|
|||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
### simple GET request
|
### GET request
|
||||||
|
|
||||||
```crystal
|
```crystal
|
||||||
require "mechanize"
|
require "mechanize"
|
||||||
@ -95,6 +96,17 @@ For activation, simply setup the log to `:debug` level
|
|||||||
Log.setup("mechanize", :debug)
|
Log.setup("mechanize", :debug)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Basic auth
|
||||||
|
|
||||||
|
You can access a page which is protected by basic auth, setting username and password for the url.
|
||||||
|
|
||||||
|
```crystal
|
||||||
|
agent = Mechanize.new
|
||||||
|
agent.add_auth("http://example.com", "username", "password")
|
||||||
|
agent.get("http://example.com")
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
1. Fork it (<https://github.com/Kanezoh/mechanize.cr/fork>)
|
1. Fork it (<https://github.com/Kanezoh/mechanize.cr/fork>)
|
||||||
|
13
main.cr
13
main.cr
@ -1,13 +0,0 @@
|
|||||||
require "./src/mechanize.cr"
|
|
||||||
|
|
||||||
agent = Mechanize.new
|
|
||||||
agent.request_headers = HTTP::Headers{"Foo" => "Bar"}
|
|
||||||
params = {"hoge" => "hoge"}
|
|
||||||
page = agent.get("http://example.com/", params: params)
|
|
||||||
# form = page.forms[0]
|
|
||||||
# query = {"foo" => "foo_value", "bar" => "bar_value"}
|
|
||||||
# page = agent.post("http://example.com/", query: query)
|
|
||||||
# puts page.code
|
|
||||||
# puts page.body
|
|
||||||
# puts page.css("h1").first.inner_text
|
|
||||||
# puts page.title
|
|
@ -11,6 +11,8 @@ license: MIT
|
|||||||
dependencies:
|
dependencies:
|
||||||
lexbor:
|
lexbor:
|
||||||
github: kostya/lexbor
|
github: kostya/lexbor
|
||||||
|
http_proxy:
|
||||||
|
github: mamantoha/http_proxy
|
||||||
|
|
||||||
development_dependencies:
|
development_dependencies:
|
||||||
webmock:
|
webmock:
|
||||||
@ -18,6 +20,6 @@ development_dependencies:
|
|||||||
branch: master
|
branch: master
|
||||||
kemal:
|
kemal:
|
||||||
github: kemalcr/kemal
|
github: kemalcr/kemal
|
||||||
version: ~> 1.0.0
|
version: ~> 1.3.0
|
||||||
kemal-basic-auth:
|
kemal-basic-auth:
|
||||||
github: kemalcr/kemal-basic-auth
|
github: kemalcr/kemal-basic-auth
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
require "./spec_helper"
|
require "./spec_helper"
|
||||||
WebMock.stub(:get, "http://example.com/?foo=bar&foo1=bar2")
|
WebMock.stub(:get, "http://example.com/?foo=bar&foo1=bar2")
|
||||||
|
WebMock.stub(:get, "http://example.com/path?foo=bar&foo1=bar2")
|
||||||
WebMock.stub(:post, "http://example.com/post")
|
WebMock.stub(:post, "http://example.com/post")
|
||||||
.with(body: "email=foobar", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
|
.with(body: "email=foobar", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
|
||||||
.to_return(body: "success")
|
.to_return(body: "success")
|
||||||
@ -40,6 +41,14 @@ describe "Mechanize HTTP test" do
|
|||||||
page.uri.to_s.should eq uri
|
page.uri.to_s.should eq uri
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "GET with query parameter with path" do
|
||||||
|
agent = Mechanize.new
|
||||||
|
uri = "http://example.com/path?foo=bar&foo1=bar2"
|
||||||
|
page = agent.get(uri)
|
||||||
|
page.code.should eq 200
|
||||||
|
page.uri.to_s.should eq uri
|
||||||
|
end
|
||||||
|
|
||||||
it "simple POST" do
|
it "simple POST" do
|
||||||
agent = Mechanize.new
|
agent = Mechanize.new
|
||||||
query = {"email" => "foobar"}
|
query = {"email" => "foobar"}
|
||||||
|
17
spec/proxy_spec.cr
Normal file
17
spec/proxy_spec.cr
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
require "./spec_helper"
|
||||||
|
|
||||||
|
WebMock.stub(:get, "http://example.com/with_proxy").to_return(body: "success")
|
||||||
|
|
||||||
|
describe "Mechanize proxy test" do
|
||||||
|
it "set proxy" do
|
||||||
|
with_proxy_server do |host, port, wants_close|
|
||||||
|
agent = Mechanize.new
|
||||||
|
agent.set_proxy("127.0.0.1", 8080)
|
||||||
|
page = agent.get("http://example.com/with_proxy")
|
||||||
|
page.body.should eq("success")
|
||||||
|
page.code.should eq(200)
|
||||||
|
ensure
|
||||||
|
wants_close.send(nil)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -1,5 +1,6 @@
|
|||||||
require "spec"
|
require "spec"
|
||||||
require "webmock"
|
require "webmock"
|
||||||
|
require "http_proxy"
|
||||||
require "../src/mechanize"
|
require "../src/mechanize"
|
||||||
|
|
||||||
WebMock.stub(:get, "example.com")
|
WebMock.stub(:get, "example.com")
|
||||||
@ -46,3 +47,22 @@ WebMock.stub(:post, "example.com/post_path")
|
|||||||
WebMock.stub(:post, "example.com/post_path")
|
WebMock.stub(:post, "example.com/post_path")
|
||||||
.with(body: "name=foo&email=bar&commit=submit", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
|
.with(body: "name=foo&email=bar&commit=submit", headers: {"Content-Type" => "application/x-www-form-urlencoded"})
|
||||||
.to_return(body: "success with button")
|
.to_return(body: "success with button")
|
||||||
|
|
||||||
|
def with_proxy_server(host = "127.0.0.1", port = 8080, &)
|
||||||
|
wants_close = Channel(Nil).new
|
||||||
|
server = HTTP::Proxy::Server.new
|
||||||
|
|
||||||
|
spawn do
|
||||||
|
server.bind_tcp(host, port)
|
||||||
|
server.listen
|
||||||
|
end
|
||||||
|
|
||||||
|
spawn do
|
||||||
|
wants_close.receive
|
||||||
|
server.close
|
||||||
|
end
|
||||||
|
|
||||||
|
Fiber.yield
|
||||||
|
|
||||||
|
yield host, port, wants_close
|
||||||
|
end
|
||||||
|
@ -2,6 +2,7 @@ require "log"
|
|||||||
require "uri"
|
require "uri"
|
||||||
require "http/client"
|
require "http/client"
|
||||||
require "lexbor"
|
require "lexbor"
|
||||||
|
require "http_proxy"
|
||||||
require "./mechanize/http/agent"
|
require "./mechanize/http/agent"
|
||||||
require "./mechanize/form"
|
require "./mechanize/form"
|
||||||
require "./mechanize/node"
|
require "./mechanize/node"
|
||||||
@ -298,13 +299,18 @@ class Mechanize
|
|||||||
|
|
||||||
# set basic auth credentials.
|
# set basic auth credentials.
|
||||||
# ```
|
# ```
|
||||||
# # make download.html whose content is http://example.com's html.
|
# # set an auth credential with a specific url.
|
||||||
# agent.add_auth("http://example.com", "username", "password")
|
# agent.add_auth("http://example.com", "username", "password")
|
||||||
# ```
|
# ```
|
||||||
def add_auth(uri : String, user : String, pass : String)
|
def add_auth(uri : String, user : String, pass : String)
|
||||||
@agent.add_auth(uri, user, pass)
|
@agent.add_auth(uri, user, pass)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Sets the proxy +address+ at +port+ with an optional +user+ and +password+
|
||||||
|
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
|
||||||
|
@agent.set_proxy(address, port, user, password)
|
||||||
|
end
|
||||||
|
|
||||||
# Runs given block, then resets the page history as it was before.
|
# Runs given block, then resets the page history as it was before.
|
||||||
private def transact
|
private def transact
|
||||||
# save the previous history status.
|
# save the previous history status.
|
||||||
|
@ -193,7 +193,6 @@ class Mechanize
|
|||||||
# submitted with multiple buttons, pass each button to this method.
|
# submitted with multiple buttons, pass each button to this method.
|
||||||
def add_button_to_query(button)
|
def add_button_to_query(button)
|
||||||
unless button.form_node == @node
|
unless button.form_node == @node
|
||||||
message = ""
|
|
||||||
"#{button.inspect} does not belong to the same page as " \
|
"#{button.inspect} does not belong to the same page as " \
|
||||||
"the form #{@name.inspect} in #{@page.try &.uri}"
|
"the form #{@name.inspect} in #{@page.try &.uri}"
|
||||||
message = "not a valid button"
|
message = "not a valid button"
|
||||||
|
@ -23,10 +23,14 @@ class Mechanize::FormContent::SelectList < Mechanize::FormContent::MultiSelectLi
|
|||||||
end
|
end
|
||||||
|
|
||||||
def value=(new_value)
|
def value=(new_value)
|
||||||
values = new_value
|
@values = new_value
|
||||||
end
|
end
|
||||||
|
|
||||||
def query_value
|
def query_value
|
||||||
value ? [[name, value.not_nil!]] : nil
|
if v = value
|
||||||
|
[[name, v]]
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -30,7 +30,7 @@ class Mechanize
|
|||||||
if size == 0
|
if size == 0
|
||||||
# TODO: raise error
|
# TODO: raise error
|
||||||
end
|
end
|
||||||
page = @array.pop
|
@array.pop
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -16,6 +16,8 @@ class Mechanize
|
|||||||
getter authenticate_methods : Hash(URI, Hash(String, Array(AuthRealm)))
|
getter authenticate_methods : Hash(URI, Hash(String, Array(AuthRealm)))
|
||||||
getter authenticate_parser : WWWAuthenticateParser
|
getter authenticate_parser : WWWAuthenticateParser
|
||||||
|
|
||||||
|
@proxy : ::HTTP::Proxy::Client?
|
||||||
|
|
||||||
def initialize(@context : Mechanize? = nil)
|
def initialize(@context : Mechanize? = nil)
|
||||||
@history = History.new
|
@history = History.new
|
||||||
@request_headers = ::HTTP::Headers.new
|
@request_headers = ::HTTP::Headers.new
|
||||||
@ -43,6 +45,7 @@ class Mechanize
|
|||||||
uri, params = resolve_parameters(uri, method, params)
|
uri, params = resolve_parameters(uri, method, params)
|
||||||
client = ::HTTP::Client.new(uri)
|
client = ::HTTP::Client.new(uri)
|
||||||
request_auth client, uri
|
request_auth client, uri
|
||||||
|
client.set_proxy(@proxy) if @proxy
|
||||||
response = http_request(client, uri, method, params, body)
|
response = http_request(client, uri, method, params, body)
|
||||||
body = response.not_nil!.body
|
body = response.not_nil!.body
|
||||||
page = response_parse(response, body, uri)
|
page = response_parse(response, body, uri)
|
||||||
@ -66,7 +69,7 @@ class Mechanize
|
|||||||
redirect_url = response.headers["location"]
|
redirect_url = response.headers["location"]
|
||||||
uri = resolve_url(redirect_url, referer)
|
uri = resolve_url(redirect_url, referer)
|
||||||
|
|
||||||
Log.debug { "follow redirect to: #{uri.to_s}" }
|
Log.debug { "follow redirect to: #{uri}" }
|
||||||
|
|
||||||
# Make sure we are not copying over the POST headers from the original request
|
# Make sure we are not copying over the POST headers from the original request
|
||||||
headers.delete("Content-MD5")
|
headers.delete("Content-MD5")
|
||||||
@ -84,6 +87,7 @@ class Mechanize
|
|||||||
body : String?) : ::HTTP::Client::Response?
|
body : String?) : ::HTTP::Client::Response?
|
||||||
request_log(uri, method)
|
request_log(uri, method)
|
||||||
path = uri.path
|
path = uri.path
|
||||||
|
path += "?#{uri.query.not_nil!}" if uri.query
|
||||||
|
|
||||||
case uri.scheme.not_nil!.downcase
|
case uri.scheme.not_nil!.downcase
|
||||||
when "http", "https"
|
when "http", "https"
|
||||||
@ -162,13 +166,17 @@ class Mechanize
|
|||||||
|
|
||||||
# set basic auth credentials.
|
# set basic auth credentials.
|
||||||
# ```
|
# ```
|
||||||
# # make download.html whose content is http://example.com's html.
|
# # set an auth credential with a specific url.
|
||||||
# agent.add_auth("http://example.com", "username", "password")
|
# agent.add_auth("http://example.com", "username", "password")
|
||||||
# ```
|
# ```
|
||||||
def add_auth(uri : String, user : String, pass : String)
|
def add_auth(uri : String, user : String, pass : String)
|
||||||
@auth_store.add_auth(uri, user, pass)
|
@auth_store.add_auth(uri, user, pass)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def set_proxy(address : String, port : Int32, user : String? = nil, password : String? = nil)
|
||||||
|
@proxy = ::HTTP::Proxy::Client.new(address, port, username: user, password: password)
|
||||||
|
end
|
||||||
|
|
||||||
private def set_request_headers(uri, headers)
|
private def set_request_headers(uri, headers)
|
||||||
reset_request_header_cookies
|
reset_request_header_cookies
|
||||||
headers.each do |k, v|
|
headers.each do |k, v|
|
||||||
@ -317,7 +325,6 @@ class Mechanize
|
|||||||
|
|
||||||
# extract valid cookies according to URI
|
# extract valid cookies according to URI
|
||||||
private def valid_cookies(uri)
|
private def valid_cookies(uri)
|
||||||
host = uri.host
|
|
||||||
valid_cookies = ::HTTP::Cookies.new
|
valid_cookies = ::HTTP::Cookies.new
|
||||||
request_cookies.each do |cookie|
|
request_cookies.each do |cookie|
|
||||||
valid_cookies << cookie if cookie.valid_cookie?(uri)
|
valid_cookies << cookie if cookie.valid_cookie?(uri)
|
||||||
@ -326,7 +333,7 @@ class Mechanize
|
|||||||
end
|
end
|
||||||
|
|
||||||
private def request_log(uri, method)
|
private def request_log(uri, method)
|
||||||
Log.debug { "#{method.to_s.upcase}: #{uri.to_s}" }
|
Log.debug { "#{method.to_s.upcase}: #{uri}" }
|
||||||
|
|
||||||
request_headers.each do |key, values|
|
request_headers.each do |key, values|
|
||||||
value = values.size == 1 ? values.first : values
|
value = values.size == 1 ? values.first : values
|
||||||
|
@ -8,6 +8,7 @@ class Mechanize
|
|||||||
@auth_accounts = Hash(URI, Hash(String, Array(String))).new
|
@auth_accounts = Hash(URI, Hash(String, Array(String))).new
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# set an auth credential with a specific url.
|
||||||
def add_auth(uri : String | URI, user : String, pass : String, realm : String? = nil, domain : String? = nil)
|
def add_auth(uri : String | URI, user : String, pass : String, realm : String? = nil, domain : String? = nil)
|
||||||
target_uri = uri.dup
|
target_uri = uri.dup
|
||||||
unless uri.is_a?(URI)
|
unless uri.is_a?(URI)
|
||||||
@ -28,7 +29,6 @@ class Mechanize
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
|
||||||
# Returns true if credentials exist for the +challenges+ from the server at
|
# Returns true if credentials exist for the +challenges+ from the server at
|
||||||
# +uri+.
|
# +uri+.
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ class Mechanize
|
|||||||
|
|
||||||
return nil unless value
|
return nil unless value
|
||||||
|
|
||||||
return [name, value]
|
[name, value]
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
##
|
||||||
|
@ -43,11 +43,11 @@ class Mechanize
|
|||||||
# page.forms # => Array(Mechanize::Form)
|
# page.forms # => Array(Mechanize::Form)
|
||||||
# ```
|
# ```
|
||||||
def forms : Array(Form)
|
def forms : Array(Form)
|
||||||
forms = css("form").map do |html_form|
|
css("form").map do |html_form|
|
||||||
form = Form.new(html_form, self)
|
form = Form.new(html_form, self)
|
||||||
form.action ||= @uri.to_s
|
form.action ||= @uri.to_s
|
||||||
form
|
form
|
||||||
end.to_a
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# return all links(`Mechanize::PageContent::Link`) in the page.
|
# return all links(`Mechanize::PageContent::Link`) in the page.
|
||||||
@ -55,11 +55,11 @@ class Mechanize
|
|||||||
# page.links # => Array(Mechanize::PageContent::Link)
|
# page.links # => Array(Mechanize::PageContent::Link)
|
||||||
# ```
|
# ```
|
||||||
def links : Array(PageContent::Link)
|
def links : Array(PageContent::Link)
|
||||||
links = %w{a area}.map do |tag|
|
%w{a area}.flat_map do |tag|
|
||||||
css(tag).map do |node|
|
css(tag).map do |node|
|
||||||
PageContent::Link.new(node, @mech, self)
|
PageContent::Link.new(node, @mech, self)
|
||||||
end
|
end
|
||||||
end.flatten
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
elements_with "form"
|
elements_with "form"
|
||||||
|
Loading…
Reference in New Issue
Block a user