raspa/src/selector.rs

71 lines
1.8 KiB
Rust
Raw Normal View History

2021-01-18 02:01:06 +00:00
pub trait SelectorBase {
fn from_html(html: String) -> Self;
2021-01-18 02:01:06 +00:00
fn html(&self) -> String;
fn css<S: AsRef<str>>(&self, css_selector: S) -> Vec<Selector> {
let html = nipper::Document::from(self.html().as_str());
let mut output = vec![];
for item in html.select(css_selector.as_ref()).iter() {
2021-03-15 01:38:41 +00:00
output.push(Selector::from_html(item.html().to_string()))
}
output
}
fn css_once<S: AsRef<str>>(&self, css_selector: S) -> Option<Selector> {
self.css(css_selector.as_ref()).pop()
2021-01-18 02:01:06 +00:00
}
fn xpath<S: AsRef<str>>(&self, xpath: S) -> Vec<Selector> {
match cssifier::cssifier(xpath.as_ref()) {
2021-03-15 01:38:41 +00:00
Some(css_selector) => {
if css_selector.is_empty() {
2021-03-15 01:38:41 +00:00
Vec::default()
} else {
self.css(css_selector.as_str())
}
}
None => Vec::default(),
}
}
fn xpath_once<S: AsRef<str>>(&self, xpath: S) -> Option<Selector> {
self.xpath(xpath.as_ref()).pop()
2021-03-15 01:38:41 +00:00
}
2021-01-18 02:01:06 +00:00
fn content(&self) -> String {
let html = nipper::Document::from(self.html().as_str());
2021-01-31 03:11:54 +00:00
html.select("body > *")
2021-01-18 02:01:06 +00:00
.iter()
.map(|element| element.text().to_string())
.last()
.unwrap()
}
2021-01-31 03:11:54 +00:00
2021-03-17 03:20:50 +00:00
fn attr<'a>(&self, attribute: &'a str) -> Option<String> {
2021-01-31 03:11:54 +00:00
let html = nipper::Document::from(self.html().as_str());
html.select(" body > *")
.attr(attribute)
.map(|text| text.to_string())
2021-01-31 03:11:54 +00:00
}
2021-01-18 02:01:06 +00:00
}
#[derive(Debug)]
pub struct Selector {
text: String,
}
impl SelectorBase for Selector {
fn from_html(html: String) -> Self {
2021-01-18 02:01:06 +00:00
Selector {
text: html.to_string(),
}
}
fn html(&self) -> String {
2021-03-17 03:20:50 +00:00
self.text.clone()
}
2021-01-18 02:01:06 +00:00
}