From d2589affe5ce54c2130152165d94e6a9e4aeb100 Mon Sep 17 00:00:00 2001 From: Stefan Menner Date: Sun, 1 Jun 2025 21:32:06 +0200 Subject: [PATCH] Parser update --- .vscode/settings.json | 3 + Cargo.toml | 1 + src/main.rs | 367 +--- src/parse/mod.rs | 798 +++++++++ src/types/types.rs | 41 +- test_mordit.html | 3501 +++++++++++++++++++++++++++++++++++++ test_vc.html | 3877 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 8218 insertions(+), 370 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 src/parse/mod.rs create mode 100644 test_mordit.html create mode 100644 test_vc.html diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..89e15c6 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "cSpell.words": ["Quelle", "Verwendung"] +} diff --git a/Cargo.toml b/Cargo.toml index 55883db..9fbf986 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ merlin_env_helper = { version = "0.2.0", registry = "merlin" } regex = "1.11.1" reqwest = {version="0.12.15", features=["blocking"]} scraper = "0.23.1" +select = "0.6.1" diff --git a/src/main.rs b/src/main.rs index 48efd9c..5c31315 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,14 @@ +mod parse; mod types; -use std::{fs::File, io::Read, net::Incoming, ops::Deref, time::Duration}; +use std::{fs::File, io::Read}; -use ego_tree::NodeRef; -use regex::Regex; -use scraper::{Element, ElementRef, Node}; -use types::{Icon, ParseType}; +use parse::parse; fn main() -> Result<(), Box> { env_logger::init(); - let html = read("test.html")?; - parse(&html)?; + let html = read("test_vc.html")?; + parse(&html); Ok(()) } @@ -35,358 +33,3 @@ fn read(path: &str) -> Result { file.read_to_string(&mut contents)?; Ok(contents) } - -fn parse(html: &str) -> Result> { - // Parse the HTML content - let document = scraper::Html::parse_document(html); - let selector_quelle = scraper::Selector::parse("#Quelle").unwrap(); - let selector_verwendung = scraper::Selector::parse("#Verwendung").unwrap(); - let selector_li = scraper::Selector::parse("li").unwrap(); - - let elt_quelle = document.select(&selector_quelle).next(); - - if elt_quelle.is_none() { - eprintln!("No element found with the selector '#Quelle'"); - return Ok(false); - } - - let elt_quelle = elt_quelle.unwrap(); - - let mut elt_quelle = elt_quelle.parent().unwrap(); - - let mut c = 0; - let mut elt_ul = None; - - while elt_quelle.next_sibling().is_some() { - elt_quelle = elt_quelle.next_sibling().unwrap(); - - if !elt_quelle.value().is_element() { - continue; - } - - let elt = elt_quelle.value().as_element().unwrap(); - if elt.name() == "ul" { - c += 1; - - if c > 1 { - elt_ul = Some(elt_quelle); - break; - } - } - - if elt.name() == "h2" { - eprintln!("Found 'h2' element, stopping search for 'ul'"); - break; - } - } - - if elt_ul.is_none() { - eprintln!("No second 'ul' element found after '#Quelle'"); - return Ok(false); - } - let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap(); - let li = elt_ul.select(&selector_li); - - for item in li { - parse_li_to_resource(item); - } - - let elt_verwendung = document.select(&selector_verwendung).next(); - - if elt_verwendung.is_none() { - eprintln!("No element found with the selector '#Verwendung'"); - return Ok(false); - } - - let elt_verwendung = elt_verwendung.unwrap(); - let mut elt_verwendung = elt_verwendung.parent().unwrap(); - - let mut elt_ul = None; - - while elt_verwendung.next_sibling().is_some() { - elt_verwendung = elt_verwendung.next_sibling().unwrap(); - - if !elt_verwendung.value().is_element() { - continue; - } - - let elt = elt_verwendung.value().as_element().unwrap(); - if elt.name() == "ul" { - elt_ul = Some(elt_verwendung); - break; - } - - if elt.name() == "h2" { - eprintln!("Found 'h2' element, stopping search for 'ul'"); - break; - } - } - - if elt_ul.is_none() { - eprintln!("No second 'ul' element found after '#Verwendung'"); - return Ok(false); - } - let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap(); - let li = elt_ul.select(&selector_li); - - for item in li { - parse_li_to_resource(item); - } - Ok(true) -} - -fn parse_li_to_resource(item: ElementRef<'_>) { - if !item.has_children() { - println!("Item has no children, skipping."); - return; - } - - let mut resource_items: Vec = Vec::new(); - - let selector = scraper::Selector::parse("span,a,img,small").unwrap(); - let iter = item.select(&selector); - - for child in iter { - let elem = child.value(); - let name = elem.name(); - - if name == "a" - && elem.attrs().find(|attr| attr.0 == "href").is_some() - && elem.attrs().find(|attr| attr.0 == "title").is_some() - { - resource_items.push(ParseType::Link { - url: elem - .attrs() - .find(|attr| attr.0 == "href") - .unwrap() - .1 - .to_string(), - title: elem - .attrs() - .find(|attr| attr.0 == "title") - .unwrap() - .1 - .to_string(), - }); - - let txt = get_text_next(&child); - - if !txt.is_empty() { - parse_text(&txt, &mut resource_items); - } - } else if name == "img" - && elem.attrs().find(|attr| attr.0 == "data-src").is_some() - && elem.attrs().find(|attr| attr.0 == "width").is_some() - && elem.attrs().find(|attr| attr.0 == "height").is_some() - && elem - .attrs() - .find(|attr| attr.0 == "data-image-name") - .is_some() - { - let url = elem.attrs().find(|attr| attr.0 == "data-src").unwrap().1; - let name = elem - .attrs() - .find(|attr| attr.0 == "data-image-name") - .unwrap() - .1; - let width = elem - .attrs() - .find(|attr| attr.0 == "width") - .unwrap() - .1 - .parse() - .unwrap_or(0); - let height = elem - .attrs() - .find(|attr| attr.0 == "height") - .unwrap() - .1 - .parse() - .unwrap_or(0); - - resource_items.push(ParseType::Img(Icon { - name: name.to_string(), - url: url.to_string(), - width, - height, - content_type: "image/png".to_string(), // Assuming PNG, adjust as needed - })); - } else if name == "small" { - let txt = get_text(&child); - parse_text(&txt, &mut resource_items); - } - } - for item in resource_items.iter() { - match item { - ParseType::Link { url, title } => { - println!("Link: {} - {}", url, title); - } - ParseType::Img(icon) => { - println!( - "Image: {} ({}x{}) - {}", - icon.name, icon.width, icon.height, icon.url - ); - } - ParseType::Count(count) => { - println!("Count: {}", count); - } - ParseType::ResourceAdd => { - println!("ResourceAdd"); - } - ParseType::ResourceLast => { - println!("ResourceLast"); - } - ParseType::Duration { duration, unit } => { - println!("Duration: {} {}", duration, unit); - } - } - } - println!("======================"); -} - -fn parse_text(text: &str, resource_items: &mut Vec) { - let reg1 = Regex::new(r"^\s*x(?\d+)\s+(?[→+])\s*$").unwrap(); - let reg2 = Regex::new(r"^\s*\(.*(?\d+,\d+)\ssek\./(?\w+)\s*\)$").unwrap(); - - if let Some(res) = reg1.captures(text) { - let count = res.name("count").unwrap().as_str().parse().unwrap_or(0); - - resource_items.push(ParseType::Count(count)); - - let end = res.name("end").unwrap().as_str().to_string(); - - if end == "+" { - resource_items.push(ParseType::ResourceAdd); - } else { - resource_items.push(ParseType::ResourceLast); - } - } - - if let Some(res) = reg2.captures(text) { - let duration_str = res.name("duration").unwrap().as_str(); - let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0); - let unit = res.name("unit").unwrap().as_str().to_string(); - let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds - - resource_items.push(ParseType::Duration { duration, unit }); - } -} - -fn get_text(node: &NodeRef<'_, Node>) -> String { - if node.value().is_text() { - return node.value().as_text().unwrap().text.trim().to_string(); - } - - if node.value().is_element() { - let mut text = String::new(); - - for child in node.children() { - text.push_str(&get_text(&child)); - } - - if !text.is_empty() { - return text; - } - } - - let next = node.next_sibling(); - if !next.is_some() { - return String::new(); - } - - let next = next.unwrap(); - - if next.value().is_text() { - return next.value().as_text().unwrap().text.trim().to_string(); - } - - String::new() -} - -fn get_text_next(node: &NodeRef<'_, Node>) -> String { - if node.value().is_text() { - return node.value().as_text().unwrap().text.trim().to_string(); - } - let next = node.next_sibling(); - if !next.is_some() { - return String::new(); - } - - let next = next.unwrap(); - - if next.value().is_text() { - return next.value().as_text().unwrap().text.trim().to_string(); - } - - String::new() -} - -fn add_all_children<'a>( - child: NodeRef<'a, Node>, - resource_items: &mut Vec>, -) -> bool { - let mut result = false; - - if child.value().is_text() { - resource_items.push(child); - } else if child.value().is_element() { - let selector = scraper::Selector::parse("a,img,small").unwrap(); - if child.value().as_element().unwrap().name() == "span" { - result = true; - } - - if child.value().as_element().unwrap().name() == "a" { - resource_items.push(child); - } - - let items = ElementRef::wrap(child).unwrap().select(&selector); - - for item in items { - let x = item.deref(); - resource_items.push(*x); - } - } - - result -} - -fn parse_resource(resource_items: Vec>) { - if resource_items.is_empty() { - println!("No resource items to parse."); - return; - } - - let mut url: Option<&str> = Option::None; - let mut title: Option<&str> = Option::None; - let mut icon: Option = Option::None; - - println!("Parsing resource items..."); - - for item in resource_items { - if item.value().is_text() { - println!( - "Text: {}", - item.value().as_text().unwrap().text.trim_ascii() - ); - continue; - } - check_item(&item, &mut url, &mut title, &mut icon); - println!("Resource: {:?}", url); - } - - println!("------------------"); -} - -fn check_item( - item: &NodeRef<'_, Node>, - url: &mut Option<&str>, - title: &mut Option<&str>, - icon: &mut Option, -) { - println!( - "Checking item: {} {:?}", - item.value().as_element().unwrap().name(), - item.value().as_element().unwrap().attrs - ); - *url = Some("test"); -} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..dd2412a --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,798 @@ +use std::{collections::HashMap, hash::Hash}; + +use regex::Regex; +use select::{ + document::Document, + node::Node, + predicate::{Attr, Name, Or}, +}; + +use crate::types::{Icon, Ingredient, Recipe, RecipeType, Resource, ResourceState}; + +#[derive(Debug, PartialEq, Eq)] +enum ParseType { + Link { url: String, title: String }, + Img(Icon), + Count(u32), + Resource(String), + ResourceAdd, + ResourceLast, + Duration { duration: u64, unit: String }, +} + +fn first(iter: &mut impl Iterator) -> Option { + iter.next() +} + +fn first_child_element(node: Node<'_>) -> Option> { + if (node.children().next().is_none()) { + return None; + } + + let mut next = node.children().next(); + + while next.is_some() { + let child = next.unwrap(); + + if child.name().is_some() { + return Some(child); + } + + next = child.next(); + } + + None +} + +pub fn parse(html: &str) { + let document = Document::from(html); + + parse_source(&document); + parse_dst(&document, "Herstellung", RecipeType::Production); + parse_dst(&document, "Raffination", RecipeType::Refining); + parse_dst(&document, "Raffination_2", RecipeType::Refining); + parse_dst(&document, "Kochen", RecipeType::Cooking); +} + +fn parse_dst(document: &Document, id: &str, recipe_type: RecipeType) -> bool { + let mut dest = document.find(Attr("id", id)); + let dest = first(&mut dest); + + if dest.is_none() { + eprintln!("No element found with the selector '#{}'", id); + return false; + } + + let dst = dest.unwrap(); + let mut dst = dst.parent().unwrap(); + + let mut elt_ul = None; + + while dst.next().is_some() { + dst = dst.next().unwrap(); + + if let Some(name) = dst.name() { + if name == "ul" { + elt_ul = Some(dst); + break; + } + + if name == "h2" { + break; + } + + let first_child = first_child_element(dst); + if name == "h3" + && first_child.is_some() + && first_child.unwrap().name().unwrap() == "span" + && first_child.unwrap().attr("id").is_some() + { + break; + } + } + } + + if elt_ul.is_none() { + return false; + } + + let elt_ul = elt_ul.unwrap(); + let li = elt_ul.find(Name("li")); + + for item in li { + parse_source_li_to_resource(&item, recipe_type.clone()); + } + + return true; +} + +fn parse_source(document: &Document) -> bool { + let mut source = document.find(Attr("id", "Quelle")); + let source = first(&mut source); + + if source.is_none() { + return false; + } + + let source = source.unwrap(); + let mut source = source.parent().unwrap(); + + let mut c = 0; + let mut elt_ul = None; + + while source.next().is_some() { + source = source.next().unwrap(); + + if let Some(name) = source.name() { + if name == "ul" { + c += 1; + + if c > 1 { + elt_ul = Some(source); + break; + } + } + + if name == "h2" { + break; + } + } + } + + if elt_ul.is_none() { + return false; + } + let elt_ul = elt_ul.unwrap(); + let li = elt_ul.find(Name("li")); + + let mut recipes: Vec = Vec::new(); + + for item in li { + if let Some(recipe) = parse_source_li_to_resource(&item, RecipeType::Refining) { + recipes.push(recipe); + } + } + + for recipe in recipes { + let name = recipe.resource.resource.name.clone(); + let title = recipe.resource.resource.title.clone(); + let url = recipe.resource.resource.url.clone(); + let icon = recipe.resource.resource.icon.clone(); + + println!("Resource: {} - {} - {:?} - {:?}", name, title, url, icon); + } + + return true; +} + +fn parse_source_li_to_resource(item: &Node<'_>, recipe_type: RecipeType) -> Option { + if item.children().next().is_none() { + return None; + } + + let mut resource_items: Vec = Vec::new(); + + let selector = item.find(Or( + Or(Name("span"), Name("a")), + Or(Name("img"), Name("small")), + )); + + for child in selector { + let name = child.name().unwrap(); + + if name == "a" && child.attr("href").is_some() && child.attr("title").is_some() { + resource_items.push(ParseType::Link { + url: child.attr("href").unwrap().to_string(), + title: child.attr("title").unwrap().to_string(), + }); + + let txt = get_text_next(&child); + + if !txt.is_empty() { + parse_text(&txt, &mut resource_items); + } + } else if name == "img" + && child.attr("data-src").is_some() + && child.attr("width").is_some() + && child.attr("height").is_some() + && child.attr("data-image-name").is_some() + { + let url = child.attr("data-src").unwrap(); + let name = child.attr("data-image-name").unwrap(); + let width = child.attr("width").unwrap().parse().unwrap_or(0); + let height = child.attr("height").unwrap().parse().unwrap_or(0); + + resource_items.push(ParseType::Img(Icon { + name: name.to_string(), + url: url.to_string(), + width, + height, + content_type: "image/png".to_string(), // Assuming PNG, adjust as needed + })); + } else if name == "span" + && !child.text().is_empty() + && child.parent().unwrap().name().unwrap() == "strong" + { + let txt = child.text().trim().to_string(); + resource_items.push(ParseType::Resource(txt)); + let txt = get_text_next(&child.parent().unwrap()); + parse_text(&txt, &mut resource_items); + } else if name == "strong" && !child.text().is_empty() { + } else if name == "small" { + let txt = get_text(&child); + parse_text(&txt, &mut resource_items); + } + } + + let mut tmp_resource: ( + Option, // name + Option, // title + Option, + Option, // count + Option, // duration + ) = (None, None, None, None, None); + + let mut map_resource: HashMap = HashMap::new(); + let mut ingredient_to_add: Vec = Vec::new(); + + for item in resource_items.iter() { + match item { + ParseType::Link { url, title } => { + if tmp_resource.0.is_none() { + tmp_resource.0 = Some(title.to_string()); + } + if tmp_resource.1.is_none() { + tmp_resource.1 = Some(url.to_string()); + } + + // println!("Link: {} - {}", url, title); + } + ParseType::Img(icon) => { + // println!( + // "Image: {} ({}x{}) - {}", + // icon.name, icon.width, icon.height, icon.url + // ); + + if (tmp_resource.2.is_none()) { + tmp_resource.2 = Some(icon.clone()); + } + } + ParseType::Count(count) => { + // println!("Count: {}", count); + + if (tmp_resource.3.is_none()) { + tmp_resource.3 = Some(*count); + } + } + ParseType::Resource(resource) => { + // println!("Resource: {}", resource); + if (tmp_resource.0.is_none()) { + tmp_resource.0 = Some(resource.to_string()); + } + } + ParseType::ResourceAdd => { + add(&tmp_resource, &mut map_resource, &mut ingredient_to_add); + tmp_resource = (None, None, None, None, None); // Reset for next resource + // println!("ResourceAdd"); + } + ParseType::ResourceLast => { + add(&tmp_resource, &mut map_resource, &mut ingredient_to_add); + tmp_resource = (None, None, None, None, None); // Reset for next resource + // println!("ResourceLast"); + } + ParseType::Duration { duration, unit } => { + // println!(">>> Duration: {} {}", duration, unit); + + if tmp_resource.4.is_none() { + tmp_resource.4 = Some(*duration); + } + } + } + } + + let (_, ingredient) = create_resource_and_ingredient(&tmp_resource, &mut map_resource); + + if !ingredient_to_add.is_empty() { + let recipe = crate::types::Recipe { + recipe_type: recipe_type, + resource: ingredient, + duration: tmp_resource.4.unwrap_or(0), + ingredients: ingredient_to_add, + }; + + return Some(recipe); + } + + None +} + +fn to_name(name: &str) -> String { + normalize_text(name).replace(" ", "_").to_lowercase() +} +fn create_resource_and_ingredient( + tmp_resource: &( + Option, + Option, + Option, + Option, + Option, + ), + map_resource: &mut HashMap, +) -> (Resource, Ingredient) { + let title = tmp_resource.0.as_ref().unwrap().clone(); + let name = to_name(&title); + let url = tmp_resource.1.clone(); + let icon = tmp_resource.2.clone(); + let count = tmp_resource.3.unwrap_or(1); + + let mut resource = Resource { + name: name.clone(), + title, + url, + icon, + state: ResourceState::Unparsed, + }; + + if map_resource.contains_key(&name) { + let res = map_resource.get_mut(&name).unwrap(); + + if res.url.is_some() { + resource.url = res.url.clone(); + } + if res.icon.is_some() { + resource.icon = res.icon.clone(); + } + } + + let ingredient = Ingredient { + resource: resource.clone(), + quantity: count, + }; + + (resource, ingredient) +} +fn add( + tmp_resource: &( + Option, + Option, + Option, + Option, + Option, + ), + map_resource: &mut HashMap, + ingredient_to_add: &mut Vec, +) { + if tmp_resource.0.is_none() { + return; + } + + let (resource, ingredient) = create_resource_and_ingredient(tmp_resource, map_resource); + + map_resource.insert(resource.name.clone(), resource); + + ingredient_to_add.push(ingredient); +} + +fn normalize_text(text: &str) -> String { + let mut text = text + .trim() + .replace('\n', " ") + .replace('\r', " ") + .replace('\t', " "); + + while text.contains(" ") { + text = text.replace(" ", " "); + } + + text +} + +fn get_text(node: &Node<'_>) -> String { + let mut text = String::new(); + + if node.as_text().is_some() { + text.push_str(node.as_text().unwrap().to_string().as_str()); + } + + if node.children().next().is_some() { + for child in node.descendants() { + if child.as_text().is_some() { + text.push_str(child.as_text().unwrap().to_string().as_str()); + } + } + } + + if node.next().is_some() { + let mut next = node.next(); + + while next.is_some() { + let next_node = next.unwrap(); + + if next_node.as_text().is_some() { + text.push_str(next_node.as_text().unwrap()); + } else { + break; + } + + next = next_node.next(); + } + } + + return normalize_text(&text); +} + +fn get_text_next(node: &Node<'_>) -> String { + if node.as_text().is_some() { + return normalize_text(&node.as_text().unwrap()); + } + + let next = node.next(); + + if !next.is_some() { + return String::new(); + } + + let next = next.unwrap(); + + if next.as_text().is_some() { + let mut text = next.as_text().unwrap().trim().to_string(); + let mut next = next.next(); + + while next.is_some() { + let node = next.unwrap(); + next = node.next(); + + if node.as_text().is_some() { + text.push_str(node.as_text().unwrap().trim()); + } else { + break; + } + } + + return normalize_text(&text); + } + + String::new() +} + +fn parse_text(text: &str, resource_items: &mut Vec) { + let reg1 = Regex::new(r"^\s*x(?\d+)\s+(?[→+])\s*$").unwrap(); + let reg2 = Regex::new(r"^\s*\(.*(?\d+,\d+)\ssek\./(?\w+)\s*\)$").unwrap(); + + if let Some(res) = reg1.captures(text) { + let count = res.name("count").unwrap().as_str().parse().unwrap_or(0); + + resource_items.push(ParseType::Count(count)); + + let end = res.name("end").unwrap().as_str().to_string(); + + if end == "+" { + resource_items.push(ParseType::ResourceAdd); + } else { + resource_items.push(ParseType::ResourceLast); + } + + return; + } + + if let Some(res) = reg2.captures(text) { + let duration_str = res.name("duration").unwrap().as_str(); + let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0); + let unit = res.name("unit").unwrap().as_str().to_string(); + let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds + + resource_items.push(ParseType::Duration { duration, unit }); + } +} + +// pub fn parse1(html: &str) -> Result> { +// // Parse the HTML content +// let document = scraper::Html::parse_document(html); +// let selector_usage = scraper::Selector::parse("#Verwendung").unwrap(); +// let selector_li = scraper::Selector::parse("li").unwrap(); + +// parse_source(&document); + +// let elt_usage = document.select(&selector_usage).next(); + +// if elt_usage.is_none() { +// eprintln!("No element found with the selector '#Verwendung'"); +// return Ok(false); +// } + +// let elt_usage = elt_usage.unwrap(); +// let mut elt_usage = elt_usage.parent().unwrap(); + +// let mut elt_ul = None; + +// while elt_usage.next_sibling().is_some() { +// elt_usage = elt_usage.next_sibling().unwrap(); + +// if !elt_usage.value().is_element() { +// continue; +// } + +// let elt = elt_usage.value().as_element().unwrap(); +// if elt.name() == "ul" { +// elt_ul = Some(elt_usage); +// break; +// } + +// if elt.name() == "h2" { +// eprintln!("Found 'h2' element, stopping search for 'ul'"); +// break; +// } +// } + +// if elt_ul.is_none() { +// eprintln!("No second 'ul' element found after '#Verwendung'"); +// return Ok(false); +// } +// let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap(); +// let li = elt_ul.select(&selector_li); + +// for item in li { +// parse_source_li_to_resource(item); +// } +// Ok(true) +// } + +// fn parse_source(document: &Html) -> bool { +// let selector_source = scraper::Selector::parse("#Quelle").unwrap(); +// let selector_li = scraper::Selector::parse("li").unwrap(); +// let elt_source = document.select(&selector_source).next(); + +// if elt_source.is_none() { +// eprintln!("No element found with the selector '#Quelle'"); +// return false; +// } + +// let elt_source = elt_source.unwrap(); + +// let mut elt_source = elt_source.parent().unwrap(); + +// let mut c = 0; +// let mut elt_ul = None; + +// while elt_source.next_sibling().is_some() { +// elt_source = elt_source.next_sibling().unwrap(); + +// if !elt_source.value().is_element() { +// continue; +// } + +// let elt = elt_source.value().as_element().unwrap(); +// if elt.name() == "ul" { +// c += 1; + +// if c > 1 { +// elt_ul = Some(elt_source); +// break; +// } +// } + +// if elt.name() == "h2" { +// eprintln!("Found 'h2' element, stopping search for 'ul'"); +// break; +// } +// } + +// if elt_ul.is_none() { +// eprintln!("No second 'ul' element found after '#Quelle'"); +// return false; +// } +// let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap(); +// let li = elt_ul.select(&selector_li); + +// for item in li { +// parse_source_li_to_resource(item); +// } + +// return true; +// } + +// fn parse_source_li_to_resource(item: ElementRef<'_>) { +// if !item.has_children() { +// println!("Item has no children, skipping."); +// return; +// } + +// let mut resource_items: Vec = Vec::new(); + +// let selector = scraper::Selector::parse("span,a,img,small").unwrap(); +// let iter = item.select(&selector); + +// for child in iter { +// let elem = child.value(); +// let name = elem.name(); + +// if name == "a" +// && elem.attrs().find(|attr| attr.0 == "href").is_some() +// && elem.attrs().find(|attr| attr.0 == "title").is_some() +// { +// resource_items.push(ParseType::Link { +// url: elem +// .attrs() +// .find(|attr| attr.0 == "href") +// .unwrap() +// .1 +// .to_string(), +// title: elem +// .attrs() +// .find(|attr| attr.0 == "title") +// .unwrap() +// .1 +// .to_string(), +// }); + +// let txt = get_text_next(&child); + +// if !txt.is_empty() { +// parse_text(&txt, &mut resource_items); +// } +// } else if name == "img" +// && elem.attrs().find(|attr| attr.0 == "data-src").is_some() +// && elem.attrs().find(|attr| attr.0 == "width").is_some() +// && elem.attrs().find(|attr| attr.0 == "height").is_some() +// && elem +// .attrs() +// .find(|attr| attr.0 == "data-image-name") +// .is_some() +// { +// let url = elem.attrs().find(|attr| attr.0 == "data-src").unwrap().1; +// let name = elem +// .attrs() +// .find(|attr| attr.0 == "data-image-name") +// .unwrap() +// .1; +// let width = elem +// .attrs() +// .find(|attr| attr.0 == "width") +// .unwrap() +// .1 +// .parse() +// .unwrap_or(0); +// let height = elem +// .attrs() +// .find(|attr| attr.0 == "height") +// .unwrap() +// .1 +// .parse() +// .unwrap_or(0); + +// resource_items.push(ParseType::Img(Icon { +// name: name.to_string(), +// url: url.to_string(), +// width, +// height, +// content_type: "image/png".to_string(), // Assuming PNG, adjust as needed +// })); +// } else if name == "small" { +// let txt = get_text(&child); +// parse_text(&txt, &mut resource_items); +// } +// } +// for item in resource_items.iter() { +// match item { +// ParseType::Link { url, title } => { +// println!("Link: {} - {}", url, title); +// } +// ParseType::Img(icon) => { +// println!( +// "Image: {} ({}x{}) - {}", +// icon.name, icon.width, icon.height, icon.url +// ); +// } +// ParseType::Count(count) => { +// println!("Count: {}", count); +// } +// ParseType::ResourceAdd => { +// println!("ResourceAdd") +// } +// ParseType::ResourceLast => { +// println!("ResourceLast") +// } +// ParseType::Duration { duration, unit } => { +// println!(">>> Duration: {} {}", duration, unit) +// } +// } +// } +// println!("======================"); +// } + +// fn parse_text(text: &str, resource_items: &mut Vec) { +// let reg1 = Regex::new(r"^\s*x(?\d+)\s+(?[→+])\s*$").unwrap(); +// let reg2 = Regex::new(r"^\s*\(.*(?\d+,\d+)\ssek\./(?\w+)\s*\)$").unwrap(); + +// if let Some(res) = reg1.captures(text) { +// let count = res.name("count").unwrap().as_str().parse().unwrap_or(0); + +// resource_items.push(ParseType::Count(count)); + +// let end = res.name("end").unwrap().as_str().to_string(); + +// if end == "+" { +// resource_items.push(ParseType::ResourceAdd); +// } else { +// resource_items.push(ParseType::ResourceLast); +// } + +// return; +// } + +// if let Some(res) = reg2.captures(text) { +// let duration_str = res.name("duration").unwrap().as_str(); +// let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0); +// let unit = res.name("unit").unwrap().as_str().to_string(); +// let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds + +// resource_items.push(ParseType::Duration { duration, unit }); +// } +// } + +// fn get_text(node: &NodeRef<'_, Node>) -> String { +// let mut text = String::new(); + +// println!("{:?}", node.value()); + +// if node.value().is_text() { +// text.push_str(node.value().as_text().unwrap().to_string().as_str()); +// } + +// if node.has_children() { +// for child in node.descendants() { +// if (child.value().is_text()) { +// text.push_str(child.value().as_text().unwrap().to_string().as_str()); +// } +// } +// } + +// if node.has_siblings() { +// let mut next = node.next_sibling(); + +// while next.is_some() { +// let next_node = next.unwrap(); +// if next_node.value().is_text() { +// text.push_str(next_node.value().as_text().unwrap()); +// } else { +// break; +// } +// next = next_node.next_sibling(); +// } +// } + +// return text; +// } + +// fn get_text_next(node: &NodeRef<'_, Node>) -> String { +// if node.value().is_text() { +// return node.value().as_text().unwrap().text.trim().to_string(); +// } +// let next = node.next_sibling(); + +// if !next.is_some() { +// return String::new(); +// } + +// let next = next.unwrap(); + +// if next.value().is_text() { +// let mut text = next.value().as_text().unwrap().text.trim().to_string(); +// let mut next = next.next_sibling(); + +// while (next.is_some()) { +// let node = next.unwrap(); +// next = node.next_sibling(); + +// if node.value().is_text() { +// text.push_str(node.value().as_text().unwrap().text.trim()); +// } else { +// break; +// } +// } + +// return text; +// } + +// String::new() +// } diff --git a/src/types/types.rs b/src/types/types.rs index a171394..abfd90f 100644 --- a/src/types/types.rs +++ b/src/types/types.rs @@ -1,3 +1,8 @@ +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum ResourceState { + Parsed(bool), + Unparsed, +} #[derive(Debug, PartialEq, Eq)] pub struct Icon { pub name: String, @@ -18,12 +23,32 @@ impl Clone for Icon { } } } -#[derive(Debug, PartialEq, Eq)] -pub enum ParseType { - Link { url: String, title: String }, - Img(Icon), - Count(u32), - ResourceAdd, - ResourceLast, - Duration { duration: u64, unit: String }, + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Resource { + pub name: String, + pub title: String, + pub url: Option, + pub icon: Option, + pub state: ResourceState, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Ingredient { + pub resource: Resource, + pub quantity: u32, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum RecipeType { + Production, + Refining, + Cooking, +} +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Recipe { + pub recipe_type: RecipeType, + pub resource: Ingredient, + pub duration: u64, + pub ingredients: Vec, } diff --git a/test_mordit.html b/test_mordit.html new file mode 100644 index 0000000..33e9a79 --- /dev/null +++ b/test_mordit.html @@ -0,0 +1,3501 @@ + + + + + + Mordit – No Man's Sky Wiki + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + No Man's Sky Wiki + +
+ + +
+
+
+ +
+
+
+
+
+
+ +
+
+
+ + +
+
+
+
+ + + + + +
+
+ +
+ +
+ + + + + + + +
+
+ + +
+
+ + + + + + + + +
+
Das Thema dieses Artikels ist + aus dem Endurance + Update.
Die Informationen aus diesem Artikel sind auf dem Stand vom + 6. Oktober 2022.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Mordit
Mordit +
Kategorie Erde
Typ Abgeerntete Substanz
Seltenheit Häufig
Bauplan Wert 40,0 +
Verwendet für Bauen, Herstellen, Aufladung, Verbessern, Veredeln
Symbol Mo +
Aktualisiert Endurance +
+ +

Mordit ist eine Ressource.

+ +

Zusammenfassung[ + + ]

+

Mordit (Mo) ist eine Ressource und eines der Erdelemente.

+

Spielbeschreibung[ + + ]

+

Trotz zahlreicher Studien ist über diese unheimliche kristalline Verbindung nur + wenig bekannt.

+

Sie wird von den Kadavern sterbender Kreaturen freigesetzt, kann aber auch von + der Morditwurzel + Pflanze gewonnen und speziell kultiviert werden.

+

Quelle[ + + ]

+
    +
  • Es kann durch das Töten von Fauna + im Spiel gewonnen werden.
  • +
  • Es kann aus Morditwurzel + geerntet werden (ergibt 39 pro Zyklus).
  • +
  • Es kann auch in „Organischem“ + Gestein gefunden werden.
  • +
  • Es kann in Mineralienvorkommen auf Sumpf-Planeten + gefunden werden.
  • +
+

Mordit kann mit den folgenden Bestandteilen in einer Raffinerie raffiniert werden:

+
    +
  • + Faecium + x3  →   + Mordit + x2  ("Organischer Wiederaufbau", 0,72 + sek./Stück)
  • +
  • + Kohlenstoff + x1  +   + Pugneum + x1  →   + Mordit + x1  ("Anomalie-Organisch-Hybridisierung", 0,36 + sek./Stück)
  • +
  • + Verdichteter Kohlenstoff + x1  +   + Pugneum + x1  →   + Mordit + x2  ("Anomalie-Organisch-Hybridisierung", 0,36 + sek./Stück)
  • +
  • + Diwasserstoff + x1  +   + Verdichteter Kohlenstoff + x1  →   + Mordit + x1  ("Organischer Wiederaufbau", 0,08 + sek./Stück)
  • +
+

Verwendung[ + + ]

+

Herstellung[]

+

Mordit wird als Bestandteil für die Herstellung der folgenden Produkte verwendet:

+ +

Raffination[]

+

Mordit wird als Bestandteil in einer Raffinerie zur Raffination der folgenden Produkte verwendet:

+ +

Kochen[]

+

Mordit wird als Zutat in einem Nährstoffprozessor zum Kochen der folgenden Produkte + verwendet:

+ +

Zusätzliche Informationen[ + + ]

+

Versionsgeschichte[ + + ]

+
    +
  • Foundation - Als + Ressource hinzugefügt.
  • +
  • NEXT - Kann + jetzt sowohl raffiniert als auch wiederhergestellt werden. Symbol geändert. +
  • +
  • Visions - + Versteckte Änderungen: Die Informationstafel hat ein zusätzliches + Symbol, das den Zweck anzeigt.
  • +
  • Beyond - Es + wird nicht mehr benötigt, um Vy‘keen-Wörter von NPCs zu lernen. Veredelung zur + Liste "Verwendet für" hinzugefügt.
  • +
  • Beyond + 2.09.1 - Es wurde ein Problem behoben, das dazu führte, dass + Tiere beim Tod kein Mordit fallen ließen.
  • +
+

Galerie[ + + ]

+ +

Videos[ + + ]

+
+
+
+

+

+
+
+
+

Externe Links[ + + ]

+ + + + + + + + + + + +
+
+
+ +
+ +
+
+
+ +
+
+ +
+ + + + + + + + + \ No newline at end of file diff --git a/test_vc.html b/test_vc.html new file mode 100644 index 0000000..ea24833 --- /dev/null +++ b/test_vc.html @@ -0,0 +1,3877 @@ + + + + + + Verdichteter Kohlenstoff – No Man's Sky Wiki + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + No Man's Sky Wiki + +
+ + +
+
+
+ +
+
+
+
+
+
+ +
+
+
+ + +
+
+
+
+ + + + + +
+
+ +
+ +
+ + + + + + + +
+
+ + +
+
+ + + + + + + + +
+
Das Thema dieses Artikels ist + aus dem Endurance + Update.
Die Informationen aus diesem Artikel sind auf dem Stand vom + 6. Oktober 2022.
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Verdichteter Kohlenstoff
Verdichteter Kohlenstoff +
Kategorie Treibstoff
Typ Raffiniertes organisches Element
Seltenheit Ungewöhnlich
Bauplan Wert 24,0 +
Verwendet für Bauen, Herstellen, Aufladung, Verbessern
Symbol C+ +
Aktualisiert Endurance +
+ +

Verdichteter Kohlenstoff ist eine Ressource.

+ +

Zusammenfassung[ + + ]

+

Verdichteter Kohlenstoff (C+) ist eine Ressource und eines der Treibstoff-Elemente.

+

Spielbeschreibung[ + + ]

+

Fortgeschrittenes Material, mit einer Raffinerie aus gewöhnlichem Kohlenstoff gewonnen. +

+

Die erhöhte Molekulardichte führt zu einem erheblich verbesserten + Energie-Gewicht-Verhältnis.

+

Wird gelegentlich in Kristallform gefunden. Das Abernten von großen Kristallen + könnte einen fortgeschrittenen Minenlaser erfordern.

+

Raffination[ + + ]

+

Verdichteter Kohlenstoff kann mit den folgenden Bestandteilen in einer Raffinerie raffiniert werden:

+
    +
  • + Kohlenstoff + x2  →   + Verdichteter Kohlenstoff + x1  ("Kohlenstoff verdichten", 0,18 + sek./Stück)
  • +
  • + Kohlenstoff + x2  +   + Sauerstoff + x2  →   + Verdichteter Kohlenstoff + x5  ("Kohlenstoff oxygenieren", 0,36 + sek./Stück)
  • +
  • + Verdichteter Kohlenstoff + x1  +   + Sauerstoff + x2  →   + Verdichteter Kohlenstoff + x6  ("Kohlenstoff oxygenieren", 0,36 + sek./Stück)
  • +
  • + Faecium + x1  +   + Mordit + x1  →   + Verdichteter Kohlenstoff + x2  ("Mikroben nähren", 0,36 + sek./Stück)
  • +
  • + Faecium + x1  +   + Kohlenstoff + x2  →   + Verdichteter Kohlenstoff + x2  ("Mikroben nähren", 0,36 + sek./Stück)
  • +
  • + Faecium + x1  +   + Verdichteter Kohlenstoff + x1  →   + Verdichteter Kohlenstoff + x3  ("Mikroben nähren", 0,36 + sek./Stück)
  • +
  • + Zytophosphat + x1  →   + Verdichteter Kohlenstoff + x1  ("Algenverarbeitung", 0,24 + sek./Stück)
  • +
  • + Uran + x1  +   + Diwasserstoff + x1  →   + Verdichteter Kohlenstoff + x2  ("Energie schöpfen", 0,04 + sek./Stück)
  • +
+

Verwendung[ + + ]

+

Herstellung[]

+

Verdichteter Kohlenstoff wird als Bestandteil für die Herstellung der + folgenden Produkte verwendet:

+ +

Raffination[]

+

Verdichteter Kohlenstoff wird als Bestandteil in einer Raffinerie zur Raffination der folgenden Produkte verwendet:

+ +

Kochen[]

+

Verdichteter Kohlenstoff wird nicht als Zutat in einem Nährstoffprozessor zum Kochen verwendet.

+

Zusätzliche Informationen[ + + ]

+
    +
  • Wenn ein Reisender-NPC nach Plutonium + fragt, besteht die Möglichkeit, einen "Plutoniumersatz" zu geben, im + Normalfall verdichteten Kohlenstoff.
  • +
+

Versionsgeschichte[ + + ]

+
    +
  • NEXT - Als + Ressource hinzugefügt.
  • +
  • Visions - + Versteckte Änderungen: Die Informationstafel hat ein zusätzliches + Symbol, das den Zweck anzeigt.
  • +
+

Galerie[ + + ]

+ + + + + + + + + + + +
+
+
+ +
+ +
+
+
+ +
+
+ +
+ + + + + + + + + \ No newline at end of file