scraper/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
//! HTML parsing and querying with CSS selectors.
//!
//! `scraper` is on [Crates.io][crate] and [GitHub][github].
//!
//! [crate]: https://crates.io/crates/scraper
//! [github]: https://github.com/programble/scraper
//!
//! Scraper provides an interface to Servo's `html5ever` and `selectors` crates, for browser-grade
//! parsing and querying.
//!
//! # Examples
//!
//! ## Parsing a document
//!
//! ```
//! use scraper::Html;
//!
//! let html = r#"
//! <!DOCTYPE html>
//! <meta charset="utf-8">
//! <title>Hello, world!</title>
//! <h1 class="foo">Hello, <i>world!</i></h1>
//! "#;
//!
//! let document = Html::parse_document(html);
//! ```
//!
//! ## Parsing a fragment
//!
//! ```
//! use scraper::Html;
//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
//! ```
//!
//! ## Parsing a selector
//!
//! ```
//! use scraper::Selector;
//! let selector = Selector::parse("h1.foo").unwrap();
//! ```
//!
//! ## Selecting elements
//!
//! ```
//! use scraper::{Html, Selector};
//!
//! let html = r#"
//! <ul>
//! <li>Foo</li>
//! <li>Bar</li>
//! <li>Baz</li>
//! </ul>
//! "#;
//!
//! let fragment = Html::parse_fragment(html);
//! let selector = Selector::parse("li").unwrap();
//!
//! for element in fragment.select(&selector) {
//! assert_eq!("li", element.value().name());
//! }
//! ```
//!
//! ## Selecting descendent elements
//!
//! ```
//! use scraper::{Html, Selector};
//!
//! let html = r#"
//! <ul>
//! <li>Foo</li>
//! <li>Bar</li>
//! <li>Baz</li>
//! </ul>
//! "#;
//!
//! let fragment = Html::parse_fragment(html);
//! let ul_selector = Selector::parse("ul").unwrap();
//! let li_selector = Selector::parse("li").unwrap();
//!
//! let ul = fragment.select(&ul_selector).next().unwrap();
//! for element in ul.select(&li_selector) {
//! assert_eq!("li", element.value().name());
//! }
//! ```
//!
//! ## Accessing element attributes
//!
//! ```
//! use scraper::{Html, Selector};
//!
//! let fragment = Html::parse_fragment(r#"<input name="foo" value="bar">"#);
//! let selector = Selector::parse(r#"input[name="foo"]"#).unwrap();
//!
//! let input = fragment.select(&selector).next().unwrap();
//! assert_eq!(Some("bar"), input.value().attr("value"));
//! ```
//!
//! ## Serializing HTML and inner HTML
//!
//! ```
//! use scraper::{Html, Selector};
//!
//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
//! let selector = Selector::parse("h1").unwrap();
//!
//! let h1 = fragment.select(&selector).next().unwrap();
//!
//! assert_eq!("<h1>Hello, <i>world!</i></h1>", h1.html());
//! assert_eq!("Hello, <i>world!</i>", h1.inner_html());
//! ```
//!
//! ## Accessing descendent text
//!
//! ```
//! use scraper::{Html, Selector};
//!
//! let fragment = Html::parse_fragment("<h1>Hello, <i>world!</i></h1>");
//! let selector = Selector::parse("h1").unwrap();
//!
//! let h1 = fragment.select(&selector).next().unwrap();
//! let text = h1.text().collect::<Vec<_>>();
//!
//! assert_eq!(vec!["Hello, ", "world!"], text);
//! ```
#![warn(
missing_docs,
missing_debug_implementations,
missing_copy_implementations,
trivial_casts,
trivial_numeric_casts,
unused_extern_crates,
unused_import_braces,
unused_qualifications,
variant_size_differences
)]
#[macro_use]
extern crate html5ever;
pub use crate::element_ref::ElementRef;
pub use crate::html::Html;
pub use crate::node::Node;
pub use crate::selector::Selector;
pub use selectors::{attr::CaseSensitivity, Element};
pub mod element_ref;
pub mod error;
pub mod html;
pub mod node;
pub mod selector;
#[cfg(feature = "atomic")]
pub(crate) mod tendril_util {
use html5ever::tendril;
/// Atomic equivalent to the default `StrTendril` type.
pub type StrTendril = tendril::Tendril<tendril::fmt::UTF8, tendril::Atomic>;
/// Convert a standard tendril into an atomic one.
pub fn make(s: tendril::StrTendril) -> StrTendril {
s.into_send().into()
}
}
#[cfg(not(feature = "atomic"))]
pub(crate) mod tendril_util {
use html5ever::tendril;
/// Primary string tendril type.
pub type StrTendril = tendril::StrTendril;
/// Return unaltered.
pub fn make(s: StrTendril) -> StrTendril {
s
}
}
pub use tendril_util::StrTendril;
#[cfg(test)]
mod test;