scraper/element_ref/
mod.rs
use std::ops::Deref;
use ego_tree::iter::{Edge, Traverse};
use ego_tree::NodeRef;
use html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
use crate::node::Element;
use crate::{Node, Selector};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ElementRef<'a> {
node: NodeRef<'a, Node>,
}
impl<'a> ElementRef<'a> {
fn new(node: NodeRef<'a, Node>) -> Self {
ElementRef { node }
}
pub fn wrap(node: NodeRef<'a, Node>) -> Option<Self> {
if node.value().is_element() {
Some(ElementRef::new(node))
} else {
None
}
}
pub fn value(&self) -> &'a Element {
self.node.value().as_element().unwrap()
}
pub fn select<'b>(&self, selector: &'b Selector) -> Select<'a, 'b> {
let mut inner = self.traverse();
inner.next(); Select {
scope: *self,
inner,
selector,
}
}
fn serialize(&self, traversal_scope: TraversalScope) -> String {
let opts = SerializeOpts {
scripting_enabled: false, traversal_scope,
create_missing_parent: false,
};
let mut buf = Vec::new();
serialize(&mut buf, self, opts).unwrap();
String::from_utf8(buf).unwrap()
}
pub fn html(&self) -> String {
self.serialize(TraversalScope::IncludeNode)
}
pub fn inner_html(&self) -> String {
self.serialize(TraversalScope::ChildrenOnly(None))
}
pub fn text(&self) -> Text<'a> {
Text {
inner: self.traverse(),
}
}
}
impl<'a> Deref for ElementRef<'a> {
type Target = NodeRef<'a, Node>;
fn deref(&self) -> &NodeRef<'a, Node> {
&self.node
}
}
#[derive(Debug, Clone)]
pub struct Select<'a, 'b> {
scope: ElementRef<'a>,
inner: Traverse<'a, Node>,
selector: &'b Selector,
}
impl<'a, 'b> Iterator for Select<'a, 'b> {
type Item = ElementRef<'a>;
fn next(&mut self) -> Option<ElementRef<'a>> {
for edge in &mut self.inner {
if let Edge::Open(node) = edge {
if let Some(element) = ElementRef::wrap(node) {
if self.selector.matches_with_scope(&element, Some(self.scope)) {
return Some(element);
}
}
}
}
None
}
}
#[derive(Debug, Clone)]
pub struct Text<'a> {
inner: Traverse<'a, Node>,
}
impl<'a> Iterator for Text<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
for edge in &mut self.inner {
if let Edge::Open(node) = edge {
if let Node::Text(ref text) = node.value() {
return Some(&**text);
}
}
}
None
}
}
mod element;
mod serializable;
#[cfg(test)]
mod tests {
use crate::html::Html;
use crate::selector::Selector;
#[test]
fn test_scope() {
let html = r"
<div>
<b>1</b>
<span>
<span><b>2</b></span>
<b>3</b>
</span>
</div>
";
let fragment = Html::parse_fragment(html);
let sel1 = Selector::parse("div > span").unwrap();
let sel2 = Selector::parse(":scope > b").unwrap();
let element1 = fragment.select(&sel1).next().unwrap();
let element2 = element1.select(&sel2).next().unwrap();
assert_eq!(element2.inner_html(), "3");
}
}