diff --git a/Cargo.toml b/Cargo.toml index b12da15..3b77d0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,4 +13,5 @@ url = "2.1" http = "0.2" nipper = "0.1.8" serde_json = "1.0" -serde = "1.0" \ No newline at end of file +serde = "1.0" +cssifier = "0.1.2" \ No newline at end of file diff --git a/src/selector.rs b/src/selector.rs index 989718f..e6c33d7 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -3,21 +3,39 @@ pub trait SelectorBase { fn html(&self) -> String; - fn css(&self, css_selector: &'static str) -> Vec { + // fn css<'a, T: SelectorBase>(&self, css_selector: &'a str) -> Vec { + fn css<'a>(&self, css_selector: &'a str) -> Vec { let html = nipper::Document::from(self.html().as_str()); let mut output = vec![]; for item in html.select(css_selector).iter() { - output.push(T::from_html(item.html().to_string())) + output.push(Selector::from_html(item.html().to_string())) } output } - fn css_once(&self, css_selector: &'static str) -> Option { + fn css_once<'a>(&self, css_selector: &'a str) -> Option { self.css(css_selector).pop() } + fn xpath(&self, xpath: &'static str) -> Vec { + match cssifier::cssifier(xpath) { + Some(css_selector) => { + if css_selector == "" { + Vec::default() + } else { + self.css(css_selector.as_str()) + } + } + None => Vec::default(), + } + } + + fn xpath_once(&self, xpath: &'static str) -> Option { + self.xpath(xpath).pop() + } + fn content(&self) -> String { let html = nipper::Document::from(self.html().as_str()); html.select("body > *") diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 1ee8304..16f3c43 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -3,6 +3,7 @@ use http::StatusCode; use raspa::request::{Request, RequestBase}; use raspa::selector::{Selector, SelectorBase}; use serde_json::Value; +// use std::collections::HashMap; #[test] fn plain_text_selector() { @@ -17,12 +18,9 @@ fn plain_text_selector() { " .to_string(); let sel = Selector::from_html(html); - assert_eq!(sel.css::("h1")[0].html(), "

hello world

"); - assert_eq!(sel.css::("#text")[0].content(), "good bye"); - assert_eq!( - sel.css_once::("body > a").unwrap().content(), - "simple text" - ); + assert_eq!(sel.css("h1")[0].html(), "

hello world

"); + assert_eq!(sel.css("#text")[0].content(), "good bye"); + assert_eq!(sel.css_once("body > a").unwrap().content(), "simple text"); } #[test] @@ -30,7 +28,7 @@ fn simple_request() { let req: Request = RequestBase::new("https://httpbin.org/").unwrap(); let resp = req.launch(); assert_eq!(resp.status_code, StatusCode::OK); - assert!(resp.css::("h2")[0].html().contains("httpbin.org")); + assert!(resp.css("h2")[0].html().contains("httpbin.org")); } #[test] @@ -55,29 +53,57 @@ fn complex_selectors() { " .to_string(); let sel = Selector::from_html(html); - assert_eq!( - sel.css_once::("p").unwrap().attr("id").unwrap(), - "text" - ); - assert_eq!( - sel.css::("a")[0].attr("href").unwrap(), - "http://google.com" - ); - for node in sel.css::("ul li").iter() { + assert_eq!(sel.css_once("p").unwrap().attr("id").unwrap(), "text"); + assert_eq!(sel.css("a")[0].attr("href").unwrap(), "http://google.com"); + for node in sel.css("ul li").iter() { let text = node.content(); assert_eq!(node.attr("class").unwrap(), "item"); assert!(node.attr("id").unwrap().contains(&text)); } - let div = sel.css_once::("div").unwrap(); - for node in div.css::("a").iter() { + let div = sel.css_once("div").unwrap(); + for node in div.css("a").iter() { if node.attr("href").unwrap() == "#" { assert_eq!(node.content(), "non link"); } else { assert_eq!(node.content(), "link"); } } - assert!(sel.css_once::("h1").is_none()); + assert!(sel.css_once("h1").is_none()); +} + +#[test] +fn xpath_test() { + let html = " + + +

good bye

+ simple text + + + +" + .to_string(); + let sel = Selector::from_html(html); + assert_eq!( + sel.xpath_once("//div/a[1]").unwrap().content(), + "first text" + ); + assert_eq!(sel.xpath("//*[@id='text']")[0].content(), "good bye"); + assert_eq!( + sel.xpath("//a[contains(@href, 'localhost')]")[0].content(), + "link" + ); + assert_eq!( + sel.xpath_once("//div[@class='container']/a[3]") + .unwrap() + .content(), + "non link" + ); } #[test] @@ -90,8 +116,22 @@ fn simple_json_test() { #[test] fn simple_post_request() { - let mut req = Request::new("https://httpbin.org/post").unwrap(); - req.method(Method::POST); - let resp: Value = req.launch().to_json().expect("cannot parse json"); + let resp: Value = Request::new("https://httpbin.org/post") + .unwrap() + .method(Method::POST) + .launch() + .to_json() + .expect("cannot parse json"); assert_eq!(resp["url"].as_str().unwrap(), "https://httpbin.org/post"); } + +// #[test] +// fn complex_post_request() { +// let form = HashMap::new(); +// let attrs = HashMap::new(); +// +// let resp = Request::new("https://httpbin.org/post") +// .unwrap() +// .method(Method::POST) +// .add_attrs() +// }