diff --git a/snipped.html b/snipped.html
index bd0aa35..91a7c6c 100644
--- a/snipped.html
+++ b/snipped.html
@@ -74,3 +74,115 @@ x1 →
x1
( "Schnelle Formation/Verdunstung", 0,08 sek./Stück)
+
+ -
+
+ Chlor
+ x1 →
+
+ Salz
+ x2 ("Salzproduktion", 0,24 sek./Stück)
+
+ -
+
+ Diwasserstoff
+ x1 +
+
+ Sauerstoff
+ x1 →
+ Salz
+ x1 ("Schnelle Formation/Verdunstung", 0,08 sek./Stück)
+
+
diff --git a/src/main.rs b/src/main.rs
index f473b14..bd40d20 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,10 @@
-use std::{fs::File, io::{Read}};
+mod types;
+
+use std::{fs::File, io::Read, net::Incoming, ops::Deref};
-use scraper::{ElementRef, Node};
use ego_tree::NodeRef;
+use scraper::{Element, ElementRef, Node};
+use types::Icon;
fn main() -> Result<(), Box> {
env_logger::init();
@@ -10,7 +13,6 @@ fn main() -> Result<(), Box> {
Ok(())
}
-
fn _download_file(url: &str, _path: &str) -> Result> {
// Some simple CLI args requirements...
@@ -22,13 +24,12 @@ fn _download_file(url: &str, _path: &str) -> Result Result {
- let mut file = File::open(path)?;
+ let mut file = File::open(path)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(contents)
@@ -41,8 +42,8 @@ fn parse(html: &str) -> Result> {
let selector_verwendung = scraper::Selector::parse("#Verwendung").unwrap();
let selector_li = scraper::Selector::parse("li").unwrap();
- let elt_quelle= document.select(&selector_quelle).next();
-
+ let elt_quelle = document.select(&selector_quelle).next();
+
if elt_quelle.is_none() {
eprintln!("No element found with the selector '#Quelle'");
return Ok(false);
@@ -55,14 +56,13 @@ fn parse(html: &str) -> Result> {
let mut c = 0;
let mut elt_ul = None;
- while elt_quelle. next_sibling().is_some() {
-
+ while elt_quelle.next_sibling().is_some() {
elt_quelle = elt_quelle.next_sibling().unwrap();
-
- if !elt_quelle.value().is_element(){
+
+ if !elt_quelle.value().is_element() {
continue;
}
-
+
let elt = elt_quelle.value().as_element().unwrap();
if elt.name() == "ul" {
c += 1;
@@ -84,22 +84,21 @@ fn parse(html: &str) -> Result> {
return Ok(false);
}
let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap();
- let li = elt_ul.select(&selector_li);
+ let li = elt_ul.select(&selector_li);
- for item in li
- {
+ for item in li {
parse_li_to_resource(item);
}
- let elt_verwendung= document.select(&selector_verwendung).next();
-
+ let elt_verwendung = document.select(&selector_verwendung).next();
+
if elt_verwendung.is_none() {
eprintln!("No element found with the selector '#Verwendung'");
return Ok(false);
}
let elt_verwendung = elt_verwendung.unwrap();
-
+
Ok(true)
}
@@ -109,29 +108,97 @@ fn parse_li_to_resource(item: ElementRef<'_>) {
return;
}
- let mut resource_items = Vec::new();
+ let mut resource_items: Vec> = Vec::new();
+ let selector = scraper::Selector::parse("span,a,img,small").unwrap();
+ let iter = item.select(&selector);
- let first_child = item.first_child().unwrap();
-
- resource_items.push(first_child);
+ for child in iter {
+ println!(
+ "name: {}, text: {}, text1: {}",
+ child.value().name(),
+ get_text(child.deref()),
+ get_text1(child.deref())
+ );
+ }
+ println!("======================");
+}
- let iter = first_child.next_siblings();
+fn get_text(node: &NodeRef<'_, Node>) -> String {
+ if node.value().is_text() {
+ return node.value().as_text().unwrap().text.trim().to_string();
+ }
- for next in iter {
- if next.value().is_text() {
- resource_items.push(next);
+ if node.value().is_element() {
+ let mut text = String::new();
+
+ for child in node.children() {
+ text.push_str(&get_text(&child));
}
- else if next.value().is_element() {
- if next.value().as_element().unwrap().name() == "span" {
- parse_resource (resource_items);
- resource_items = Vec::new();
- }
- resource_items.push(next);
+ if !text.is_empty() {
+ return text;
}
}
- println!("======================");
+ let next = node.next_sibling();
+ if !next.is_some() {
+ return String::new();
+ }
+
+ let next = next.unwrap();
+
+ if next.value().is_text() {
+ return next.value().as_text().unwrap().text.trim().to_string();
+ }
+
+ String::new()
+}
+
+fn get_text1(node: &NodeRef<'_, Node>) -> String {
+ if node.value().is_text() {
+ return node.value().as_text().unwrap().text.trim().to_string();
+ }
+ let next = node.next_sibling();
+ if !next.is_some() {
+ return String::new();
+ }
+
+ let next = next.unwrap();
+
+ if next.value().is_text() {
+ return next.value().as_text().unwrap().text.trim().to_string();
+ }
+
+ String::new()
+}
+
+fn add_all_children<'a>(
+ child: NodeRef<'a, Node>,
+ resource_items: &mut Vec>,
+) -> bool {
+ let mut result = false;
+
+ if child.value().is_text() {
+ resource_items.push(child);
+ } else if child.value().is_element() {
+ let selector = scraper::Selector::parse("a,img,small").unwrap();
+ if child.value().as_element().unwrap().name() == "span" {
+ result = true;
+ }
+
+ if child.value().as_element().unwrap().name() == "a" {
+ resource_items.push(child);
+ }
+
+ let items = ElementRef::wrap(child).unwrap().select(&selector);
+
+ for item in items {
+ let x = item.deref();
+ resource_items.push(*x);
+ }
+ }
+
+ result
}
fn parse_resource(resource_items: Vec>) {
@@ -140,16 +207,37 @@ fn parse_resource(resource_items: Vec>) {
return;
}
+ let mut url: Option<&str> = Option::None;
+ let mut title: Option<&str> = Option::None;
+ let mut icon: Option = Option::None;
+
println!("Parsing resource items...");
for item in resource_items {
if item.value().is_text() {
- println!("Text: {}", item.value().as_text().unwrap().text.trim_ascii());
+ println!(
+ "Text: {}",
+ item.value().as_text().unwrap().text.trim_ascii()
+ );
continue;
}
- println!("Resource: {:?}", item.value());
+ check_item(&item, &mut url, &mut title, &mut icon);
+ println!("Resource: {:?}", url);
}
println!("------------------");
-
-}
\ No newline at end of file
+}
+
+fn check_item(
+ item: &NodeRef<'_, Node>,
+ url: &mut Option<&str>,
+ title: &mut Option<&str>,
+ icon: &mut Option,
+) {
+ println!(
+ "Checking item: {} {:?}",
+ item.value().as_element().unwrap().name(),
+ item.value().as_element().unwrap().attrs
+ );
+ *url = Some("test");
+}
diff --git a/src/types/types.rs b/src/types/types.rs
index e69de29..38374cc 100644
--- a/src/types/types.rs
+++ b/src/types/types.rs
@@ -0,0 +1,20 @@
+#[derive(Debug, PartialEq, Eq)]
+pub struct Icon {
+ pub name: String,
+ pub url: String,
+ pub width: u32,
+ pub height: u32,
+ pub content_type: String,
+}
+
+impl Clone for Icon {
+ fn clone(&self) -> Self {
+ Icon {
+ name: self.name.clone(),
+ url: self.url.clone(),
+ width: self.width,
+ height: self.height,
+ content_type: self.content_type.clone(),
+ }
+ }
+}
\ No newline at end of file