Parser update

This commit is contained in:
2025-06-01 21:32:06 +02:00
parent 4f55658b87
commit d2589affe5
7 changed files with 8218 additions and 370 deletions

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"cSpell.words": ["Quelle", "Verwendung"]
}

View File

@@ -15,3 +15,4 @@ merlin_env_helper = { version = "0.2.0", registry = "merlin" }
regex = "1.11.1"
reqwest = {version="0.12.15", features=["blocking"]}
scraper = "0.23.1"
select = "0.6.1"

View File

@@ -1,16 +1,14 @@
mod parse;
mod types;
use std::{fs::File, io::Read, net::Incoming, ops::Deref, time::Duration};
use std::{fs::File, io::Read};
use ego_tree::NodeRef;
use regex::Regex;
use scraper::{Element, ElementRef, Node};
use types::{Icon, ParseType};
use parse::parse;
fn main() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let html = read("test.html")?;
parse(&html)?;
let html = read("test_vc.html")?;
parse(&html);
Ok(())
}
@@ -35,358 +33,3 @@ fn read(path: &str) -> Result<String, std::io::Error> {
file.read_to_string(&mut contents)?;
Ok(contents)
}
fn parse(html: &str) -> Result<bool, Box<dyn std::error::Error>> {
// Parse the HTML content
let document = scraper::Html::parse_document(html);
let selector_quelle = scraper::Selector::parse("#Quelle").unwrap();
let selector_verwendung = scraper::Selector::parse("#Verwendung").unwrap();
let selector_li = scraper::Selector::parse("li").unwrap();
let elt_quelle = document.select(&selector_quelle).next();
if elt_quelle.is_none() {
eprintln!("No element found with the selector '#Quelle'");
return Ok(false);
}
let elt_quelle = elt_quelle.unwrap();
let mut elt_quelle = elt_quelle.parent().unwrap();
let mut c = 0;
let mut elt_ul = None;
while elt_quelle.next_sibling().is_some() {
elt_quelle = elt_quelle.next_sibling().unwrap();
if !elt_quelle.value().is_element() {
continue;
}
let elt = elt_quelle.value().as_element().unwrap();
if elt.name() == "ul" {
c += 1;
if c > 1 {
elt_ul = Some(elt_quelle);
break;
}
}
if elt.name() == "h2" {
eprintln!("Found 'h2' element, stopping search for 'ul'");
break;
}
}
if elt_ul.is_none() {
eprintln!("No second 'ul' element found after '#Quelle'");
return Ok(false);
}
let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap();
let li = elt_ul.select(&selector_li);
for item in li {
parse_li_to_resource(item);
}
let elt_verwendung = document.select(&selector_verwendung).next();
if elt_verwendung.is_none() {
eprintln!("No element found with the selector '#Verwendung'");
return Ok(false);
}
let elt_verwendung = elt_verwendung.unwrap();
let mut elt_verwendung = elt_verwendung.parent().unwrap();
let mut elt_ul = None;
while elt_verwendung.next_sibling().is_some() {
elt_verwendung = elt_verwendung.next_sibling().unwrap();
if !elt_verwendung.value().is_element() {
continue;
}
let elt = elt_verwendung.value().as_element().unwrap();
if elt.name() == "ul" {
elt_ul = Some(elt_verwendung);
break;
}
if elt.name() == "h2" {
eprintln!("Found 'h2' element, stopping search for 'ul'");
break;
}
}
if elt_ul.is_none() {
eprintln!("No second 'ul' element found after '#Verwendung'");
return Ok(false);
}
let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap();
let li = elt_ul.select(&selector_li);
for item in li {
parse_li_to_resource(item);
}
Ok(true)
}
fn parse_li_to_resource(item: ElementRef<'_>) {
if !item.has_children() {
println!("Item has no children, skipping.");
return;
}
let mut resource_items: Vec<ParseType> = Vec::new();
let selector = scraper::Selector::parse("span,a,img,small").unwrap();
let iter = item.select(&selector);
for child in iter {
let elem = child.value();
let name = elem.name();
if name == "a"
&& elem.attrs().find(|attr| attr.0 == "href").is_some()
&& elem.attrs().find(|attr| attr.0 == "title").is_some()
{
resource_items.push(ParseType::Link {
url: elem
.attrs()
.find(|attr| attr.0 == "href")
.unwrap()
.1
.to_string(),
title: elem
.attrs()
.find(|attr| attr.0 == "title")
.unwrap()
.1
.to_string(),
});
let txt = get_text_next(&child);
if !txt.is_empty() {
parse_text(&txt, &mut resource_items);
}
} else if name == "img"
&& elem.attrs().find(|attr| attr.0 == "data-src").is_some()
&& elem.attrs().find(|attr| attr.0 == "width").is_some()
&& elem.attrs().find(|attr| attr.0 == "height").is_some()
&& elem
.attrs()
.find(|attr| attr.0 == "data-image-name")
.is_some()
{
let url = elem.attrs().find(|attr| attr.0 == "data-src").unwrap().1;
let name = elem
.attrs()
.find(|attr| attr.0 == "data-image-name")
.unwrap()
.1;
let width = elem
.attrs()
.find(|attr| attr.0 == "width")
.unwrap()
.1
.parse()
.unwrap_or(0);
let height = elem
.attrs()
.find(|attr| attr.0 == "height")
.unwrap()
.1
.parse()
.unwrap_or(0);
resource_items.push(ParseType::Img(Icon {
name: name.to_string(),
url: url.to_string(),
width,
height,
content_type: "image/png".to_string(), // Assuming PNG, adjust as needed
}));
} else if name == "small" {
let txt = get_text(&child);
parse_text(&txt, &mut resource_items);
}
}
for item in resource_items.iter() {
match item {
ParseType::Link { url, title } => {
println!("Link: {} - {}", url, title);
}
ParseType::Img(icon) => {
println!(
"Image: {} ({}x{}) - {}",
icon.name, icon.width, icon.height, icon.url
);
}
ParseType::Count(count) => {
println!("Count: {}", count);
}
ParseType::ResourceAdd => {
println!("ResourceAdd");
}
ParseType::ResourceLast => {
println!("ResourceLast");
}
ParseType::Duration { duration, unit } => {
println!("Duration: {} {}", duration, unit);
}
}
}
println!("======================");
}
fn parse_text(text: &str, resource_items: &mut Vec<ParseType>) {
let reg1 = Regex::new(r"^\s*x(?<count>\d+)\s+(?<end>[→+])\s*$").unwrap();
let reg2 = Regex::new(r"^\s*\(.*(?<duration>\d+,\d+)\ssek\./(?<unit>\w+)\s*\)$").unwrap();
if let Some(res) = reg1.captures(text) {
let count = res.name("count").unwrap().as_str().parse().unwrap_or(0);
resource_items.push(ParseType::Count(count));
let end = res.name("end").unwrap().as_str().to_string();
if end == "+" {
resource_items.push(ParseType::ResourceAdd);
} else {
resource_items.push(ParseType::ResourceLast);
}
}
if let Some(res) = reg2.captures(text) {
let duration_str = res.name("duration").unwrap().as_str();
let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0);
let unit = res.name("unit").unwrap().as_str().to_string();
let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds
resource_items.push(ParseType::Duration { duration, unit });
}
}
fn get_text(node: &NodeRef<'_, Node>) -> String {
if node.value().is_text() {
return node.value().as_text().unwrap().text.trim().to_string();
}
if node.value().is_element() {
let mut text = String::new();
for child in node.children() {
text.push_str(&get_text(&child));
}
if !text.is_empty() {
return text;
}
}
let next = node.next_sibling();
if !next.is_some() {
return String::new();
}
let next = next.unwrap();
if next.value().is_text() {
return next.value().as_text().unwrap().text.trim().to_string();
}
String::new()
}
fn get_text_next(node: &NodeRef<'_, Node>) -> String {
if node.value().is_text() {
return node.value().as_text().unwrap().text.trim().to_string();
}
let next = node.next_sibling();
if !next.is_some() {
return String::new();
}
let next = next.unwrap();
if next.value().is_text() {
return next.value().as_text().unwrap().text.trim().to_string();
}
String::new()
}
fn add_all_children<'a>(
child: NodeRef<'a, Node>,
resource_items: &mut Vec<NodeRef<'a, Node>>,
) -> bool {
let mut result = false;
if child.value().is_text() {
resource_items.push(child);
} else if child.value().is_element() {
let selector = scraper::Selector::parse("a,img,small").unwrap();
if child.value().as_element().unwrap().name() == "span" {
result = true;
}
if child.value().as_element().unwrap().name() == "a" {
resource_items.push(child);
}
let items = ElementRef::wrap(child).unwrap().select(&selector);
for item in items {
let x = item.deref();
resource_items.push(*x);
}
}
result
}
fn parse_resource(resource_items: Vec<NodeRef<'_, Node>>) {
if resource_items.is_empty() {
println!("No resource items to parse.");
return;
}
let mut url: Option<&str> = Option::None;
let mut title: Option<&str> = Option::None;
let mut icon: Option<Icon> = Option::None;
println!("Parsing resource items...");
for item in resource_items {
if item.value().is_text() {
println!(
"Text: {}",
item.value().as_text().unwrap().text.trim_ascii()
);
continue;
}
check_item(&item, &mut url, &mut title, &mut icon);
println!("Resource: {:?}", url);
}
println!("------------------");
}
fn check_item(
item: &NodeRef<'_, Node>,
url: &mut Option<&str>,
title: &mut Option<&str>,
icon: &mut Option<Icon>,
) {
println!(
"Checking item: {} {:?}",
item.value().as_element().unwrap().name(),
item.value().as_element().unwrap().attrs
);
*url = Some("test");
}

798
src/parse/mod.rs Normal file
View File

@@ -0,0 +1,798 @@
use std::{collections::HashMap, hash::Hash};
use regex::Regex;
use select::{
document::Document,
node::Node,
predicate::{Attr, Name, Or},
};
use crate::types::{Icon, Ingredient, Recipe, RecipeType, Resource, ResourceState};
#[derive(Debug, PartialEq, Eq)]
enum ParseType {
Link { url: String, title: String },
Img(Icon),
Count(u32),
Resource(String),
ResourceAdd,
ResourceLast,
Duration { duration: u64, unit: String },
}
fn first<T>(iter: &mut impl Iterator<Item = T>) -> Option<T> {
iter.next()
}
fn first_child_element(node: Node<'_>) -> Option<Node<'_>> {
if (node.children().next().is_none()) {
return None;
}
let mut next = node.children().next();
while next.is_some() {
let child = next.unwrap();
if child.name().is_some() {
return Some(child);
}
next = child.next();
}
None
}
pub fn parse(html: &str) {
let document = Document::from(html);
parse_source(&document);
parse_dst(&document, "Herstellung", RecipeType::Production);
parse_dst(&document, "Raffination", RecipeType::Refining);
parse_dst(&document, "Raffination_2", RecipeType::Refining);
parse_dst(&document, "Kochen", RecipeType::Cooking);
}
fn parse_dst(document: &Document, id: &str, recipe_type: RecipeType) -> bool {
let mut dest = document.find(Attr("id", id));
let dest = first(&mut dest);
if dest.is_none() {
eprintln!("No element found with the selector '#{}'", id);
return false;
}
let dst = dest.unwrap();
let mut dst = dst.parent().unwrap();
let mut elt_ul = None;
while dst.next().is_some() {
dst = dst.next().unwrap();
if let Some(name) = dst.name() {
if name == "ul" {
elt_ul = Some(dst);
break;
}
if name == "h2" {
break;
}
let first_child = first_child_element(dst);
if name == "h3"
&& first_child.is_some()
&& first_child.unwrap().name().unwrap() == "span"
&& first_child.unwrap().attr("id").is_some()
{
break;
}
}
}
if elt_ul.is_none() {
return false;
}
let elt_ul = elt_ul.unwrap();
let li = elt_ul.find(Name("li"));
for item in li {
parse_source_li_to_resource(&item, recipe_type.clone());
}
return true;
}
fn parse_source(document: &Document) -> bool {
let mut source = document.find(Attr("id", "Quelle"));
let source = first(&mut source);
if source.is_none() {
return false;
}
let source = source.unwrap();
let mut source = source.parent().unwrap();
let mut c = 0;
let mut elt_ul = None;
while source.next().is_some() {
source = source.next().unwrap();
if let Some(name) = source.name() {
if name == "ul" {
c += 1;
if c > 1 {
elt_ul = Some(source);
break;
}
}
if name == "h2" {
break;
}
}
}
if elt_ul.is_none() {
return false;
}
let elt_ul = elt_ul.unwrap();
let li = elt_ul.find(Name("li"));
let mut recipes: Vec<Recipe> = Vec::new();
for item in li {
if let Some(recipe) = parse_source_li_to_resource(&item, RecipeType::Refining) {
recipes.push(recipe);
}
}
for recipe in recipes {
let name = recipe.resource.resource.name.clone();
let title = recipe.resource.resource.title.clone();
let url = recipe.resource.resource.url.clone();
let icon = recipe.resource.resource.icon.clone();
println!("Resource: {} - {} - {:?} - {:?}", name, title, url, icon);
}
return true;
}
fn parse_source_li_to_resource(item: &Node<'_>, recipe_type: RecipeType) -> Option<Recipe> {
if item.children().next().is_none() {
return None;
}
let mut resource_items: Vec<ParseType> = Vec::new();
let selector = item.find(Or(
Or(Name("span"), Name("a")),
Or(Name("img"), Name("small")),
));
for child in selector {
let name = child.name().unwrap();
if name == "a" && child.attr("href").is_some() && child.attr("title").is_some() {
resource_items.push(ParseType::Link {
url: child.attr("href").unwrap().to_string(),
title: child.attr("title").unwrap().to_string(),
});
let txt = get_text_next(&child);
if !txt.is_empty() {
parse_text(&txt, &mut resource_items);
}
} else if name == "img"
&& child.attr("data-src").is_some()
&& child.attr("width").is_some()
&& child.attr("height").is_some()
&& child.attr("data-image-name").is_some()
{
let url = child.attr("data-src").unwrap();
let name = child.attr("data-image-name").unwrap();
let width = child.attr("width").unwrap().parse().unwrap_or(0);
let height = child.attr("height").unwrap().parse().unwrap_or(0);
resource_items.push(ParseType::Img(Icon {
name: name.to_string(),
url: url.to_string(),
width,
height,
content_type: "image/png".to_string(), // Assuming PNG, adjust as needed
}));
} else if name == "span"
&& !child.text().is_empty()
&& child.parent().unwrap().name().unwrap() == "strong"
{
let txt = child.text().trim().to_string();
resource_items.push(ParseType::Resource(txt));
let txt = get_text_next(&child.parent().unwrap());
parse_text(&txt, &mut resource_items);
} else if name == "strong" && !child.text().is_empty() {
} else if name == "small" {
let txt = get_text(&child);
parse_text(&txt, &mut resource_items);
}
}
let mut tmp_resource: (
Option<String>, // name
Option<String>, // title
Option<Icon>,
Option<u32>, // count
Option<u64>, // duration
) = (None, None, None, None, None);
let mut map_resource: HashMap<String, Resource> = HashMap::new();
let mut ingredient_to_add: Vec<Ingredient> = Vec::new();
for item in resource_items.iter() {
match item {
ParseType::Link { url, title } => {
if tmp_resource.0.is_none() {
tmp_resource.0 = Some(title.to_string());
}
if tmp_resource.1.is_none() {
tmp_resource.1 = Some(url.to_string());
}
// println!("Link: {} - {}", url, title);
}
ParseType::Img(icon) => {
// println!(
// "Image: {} ({}x{}) - {}",
// icon.name, icon.width, icon.height, icon.url
// );
if (tmp_resource.2.is_none()) {
tmp_resource.2 = Some(icon.clone());
}
}
ParseType::Count(count) => {
// println!("Count: {}", count);
if (tmp_resource.3.is_none()) {
tmp_resource.3 = Some(*count);
}
}
ParseType::Resource(resource) => {
// println!("Resource: {}", resource);
if (tmp_resource.0.is_none()) {
tmp_resource.0 = Some(resource.to_string());
}
}
ParseType::ResourceAdd => {
add(&tmp_resource, &mut map_resource, &mut ingredient_to_add);
tmp_resource = (None, None, None, None, None); // Reset for next resource
// println!("ResourceAdd");
}
ParseType::ResourceLast => {
add(&tmp_resource, &mut map_resource, &mut ingredient_to_add);
tmp_resource = (None, None, None, None, None); // Reset for next resource
// println!("ResourceLast");
}
ParseType::Duration { duration, unit } => {
// println!(">>> Duration: {} {}", duration, unit);
if tmp_resource.4.is_none() {
tmp_resource.4 = Some(*duration);
}
}
}
}
let (_, ingredient) = create_resource_and_ingredient(&tmp_resource, &mut map_resource);
if !ingredient_to_add.is_empty() {
let recipe = crate::types::Recipe {
recipe_type: recipe_type,
resource: ingredient,
duration: tmp_resource.4.unwrap_or(0),
ingredients: ingredient_to_add,
};
return Some(recipe);
}
None
}
fn to_name(name: &str) -> String {
normalize_text(name).replace(" ", "_").to_lowercase()
}
fn create_resource_and_ingredient(
tmp_resource: &(
Option<String>,
Option<String>,
Option<Icon>,
Option<u32>,
Option<u64>,
),
map_resource: &mut HashMap<String, Resource>,
) -> (Resource, Ingredient) {
let title = tmp_resource.0.as_ref().unwrap().clone();
let name = to_name(&title);
let url = tmp_resource.1.clone();
let icon = tmp_resource.2.clone();
let count = tmp_resource.3.unwrap_or(1);
let mut resource = Resource {
name: name.clone(),
title,
url,
icon,
state: ResourceState::Unparsed,
};
if map_resource.contains_key(&name) {
let res = map_resource.get_mut(&name).unwrap();
if res.url.is_some() {
resource.url = res.url.clone();
}
if res.icon.is_some() {
resource.icon = res.icon.clone();
}
}
let ingredient = Ingredient {
resource: resource.clone(),
quantity: count,
};
(resource, ingredient)
}
fn add(
tmp_resource: &(
Option<String>,
Option<String>,
Option<Icon>,
Option<u32>,
Option<u64>,
),
map_resource: &mut HashMap<String, Resource>,
ingredient_to_add: &mut Vec<Ingredient>,
) {
if tmp_resource.0.is_none() {
return;
}
let (resource, ingredient) = create_resource_and_ingredient(tmp_resource, map_resource);
map_resource.insert(resource.name.clone(), resource);
ingredient_to_add.push(ingredient);
}
fn normalize_text(text: &str) -> String {
let mut text = text
.trim()
.replace('\n', " ")
.replace('\r', " ")
.replace('\t', " ");
while text.contains(" ") {
text = text.replace(" ", " ");
}
text
}
fn get_text(node: &Node<'_>) -> String {
let mut text = String::new();
if node.as_text().is_some() {
text.push_str(node.as_text().unwrap().to_string().as_str());
}
if node.children().next().is_some() {
for child in node.descendants() {
if child.as_text().is_some() {
text.push_str(child.as_text().unwrap().to_string().as_str());
}
}
}
if node.next().is_some() {
let mut next = node.next();
while next.is_some() {
let next_node = next.unwrap();
if next_node.as_text().is_some() {
text.push_str(next_node.as_text().unwrap());
} else {
break;
}
next = next_node.next();
}
}
return normalize_text(&text);
}
fn get_text_next(node: &Node<'_>) -> String {
if node.as_text().is_some() {
return normalize_text(&node.as_text().unwrap());
}
let next = node.next();
if !next.is_some() {
return String::new();
}
let next = next.unwrap();
if next.as_text().is_some() {
let mut text = next.as_text().unwrap().trim().to_string();
let mut next = next.next();
while next.is_some() {
let node = next.unwrap();
next = node.next();
if node.as_text().is_some() {
text.push_str(node.as_text().unwrap().trim());
} else {
break;
}
}
return normalize_text(&text);
}
String::new()
}
fn parse_text(text: &str, resource_items: &mut Vec<ParseType>) {
let reg1 = Regex::new(r"^\s*x(?<count>\d+)\s+(?<end>[→+])\s*$").unwrap();
let reg2 = Regex::new(r"^\s*\(.*(?<duration>\d+,\d+)\ssek\./(?<unit>\w+)\s*\)$").unwrap();
if let Some(res) = reg1.captures(text) {
let count = res.name("count").unwrap().as_str().parse().unwrap_or(0);
resource_items.push(ParseType::Count(count));
let end = res.name("end").unwrap().as_str().to_string();
if end == "+" {
resource_items.push(ParseType::ResourceAdd);
} else {
resource_items.push(ParseType::ResourceLast);
}
return;
}
if let Some(res) = reg2.captures(text) {
let duration_str = res.name("duration").unwrap().as_str();
let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0);
let unit = res.name("unit").unwrap().as_str().to_string();
let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds
resource_items.push(ParseType::Duration { duration, unit });
}
}
// pub fn parse1(html: &str) -> Result<bool, Box<dyn std::error::Error>> {
// // Parse the HTML content
// let document = scraper::Html::parse_document(html);
// let selector_usage = scraper::Selector::parse("#Verwendung").unwrap();
// let selector_li = scraper::Selector::parse("li").unwrap();
// parse_source(&document);
// let elt_usage = document.select(&selector_usage).next();
// if elt_usage.is_none() {
// eprintln!("No element found with the selector '#Verwendung'");
// return Ok(false);
// }
// let elt_usage = elt_usage.unwrap();
// let mut elt_usage = elt_usage.parent().unwrap();
// let mut elt_ul = None;
// while elt_usage.next_sibling().is_some() {
// elt_usage = elt_usage.next_sibling().unwrap();
// if !elt_usage.value().is_element() {
// continue;
// }
// let elt = elt_usage.value().as_element().unwrap();
// if elt.name() == "ul" {
// elt_ul = Some(elt_usage);
// break;
// }
// if elt.name() == "h2" {
// eprintln!("Found 'h2' element, stopping search for 'ul'");
// break;
// }
// }
// if elt_ul.is_none() {
// eprintln!("No second 'ul' element found after '#Verwendung'");
// return Ok(false);
// }
// let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap();
// let li = elt_ul.select(&selector_li);
// for item in li {
// parse_source_li_to_resource(item);
// }
// Ok(true)
// }
// fn parse_source(document: &Html) -> bool {
// let selector_source = scraper::Selector::parse("#Quelle").unwrap();
// let selector_li = scraper::Selector::parse("li").unwrap();
// let elt_source = document.select(&selector_source).next();
// if elt_source.is_none() {
// eprintln!("No element found with the selector '#Quelle'");
// return false;
// }
// let elt_source = elt_source.unwrap();
// let mut elt_source = elt_source.parent().unwrap();
// let mut c = 0;
// let mut elt_ul = None;
// while elt_source.next_sibling().is_some() {
// elt_source = elt_source.next_sibling().unwrap();
// if !elt_source.value().is_element() {
// continue;
// }
// let elt = elt_source.value().as_element().unwrap();
// if elt.name() == "ul" {
// c += 1;
// if c > 1 {
// elt_ul = Some(elt_source);
// break;
// }
// }
// if elt.name() == "h2" {
// eprintln!("Found 'h2' element, stopping search for 'ul'");
// break;
// }
// }
// if elt_ul.is_none() {
// eprintln!("No second 'ul' element found after '#Quelle'");
// return false;
// }
// let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap();
// let li = elt_ul.select(&selector_li);
// for item in li {
// parse_source_li_to_resource(item);
// }
// return true;
// }
// fn parse_source_li_to_resource(item: ElementRef<'_>) {
// if !item.has_children() {
// println!("Item has no children, skipping.");
// return;
// }
// let mut resource_items: Vec<ParseType> = Vec::new();
// let selector = scraper::Selector::parse("span,a,img,small").unwrap();
// let iter = item.select(&selector);
// for child in iter {
// let elem = child.value();
// let name = elem.name();
// if name == "a"
// && elem.attrs().find(|attr| attr.0 == "href").is_some()
// && elem.attrs().find(|attr| attr.0 == "title").is_some()
// {
// resource_items.push(ParseType::Link {
// url: elem
// .attrs()
// .find(|attr| attr.0 == "href")
// .unwrap()
// .1
// .to_string(),
// title: elem
// .attrs()
// .find(|attr| attr.0 == "title")
// .unwrap()
// .1
// .to_string(),
// });
// let txt = get_text_next(&child);
// if !txt.is_empty() {
// parse_text(&txt, &mut resource_items);
// }
// } else if name == "img"
// && elem.attrs().find(|attr| attr.0 == "data-src").is_some()
// && elem.attrs().find(|attr| attr.0 == "width").is_some()
// && elem.attrs().find(|attr| attr.0 == "height").is_some()
// && elem
// .attrs()
// .find(|attr| attr.0 == "data-image-name")
// .is_some()
// {
// let url = elem.attrs().find(|attr| attr.0 == "data-src").unwrap().1;
// let name = elem
// .attrs()
// .find(|attr| attr.0 == "data-image-name")
// .unwrap()
// .1;
// let width = elem
// .attrs()
// .find(|attr| attr.0 == "width")
// .unwrap()
// .1
// .parse()
// .unwrap_or(0);
// let height = elem
// .attrs()
// .find(|attr| attr.0 == "height")
// .unwrap()
// .1
// .parse()
// .unwrap_or(0);
// resource_items.push(ParseType::Img(Icon {
// name: name.to_string(),
// url: url.to_string(),
// width,
// height,
// content_type: "image/png".to_string(), // Assuming PNG, adjust as needed
// }));
// } else if name == "small" {
// let txt = get_text(&child);
// parse_text(&txt, &mut resource_items);
// }
// }
// for item in resource_items.iter() {
// match item {
// ParseType::Link { url, title } => {
// println!("Link: {} - {}", url, title);
// }
// ParseType::Img(icon) => {
// println!(
// "Image: {} ({}x{}) - {}",
// icon.name, icon.width, icon.height, icon.url
// );
// }
// ParseType::Count(count) => {
// println!("Count: {}", count);
// }
// ParseType::ResourceAdd => {
// println!("ResourceAdd")
// }
// ParseType::ResourceLast => {
// println!("ResourceLast")
// }
// ParseType::Duration { duration, unit } => {
// println!(">>> Duration: {} {}", duration, unit)
// }
// }
// }
// println!("======================");
// }
// fn parse_text(text: &str, resource_items: &mut Vec<ParseType>) {
// let reg1 = Regex::new(r"^\s*x(?<count>\d+)\s+(?<end>[→+])\s*$").unwrap();
// let reg2 = Regex::new(r"^\s*\(.*(?<duration>\d+,\d+)\ssek\./(?<unit>\w+)\s*\)$").unwrap();
// if let Some(res) = reg1.captures(text) {
// let count = res.name("count").unwrap().as_str().parse().unwrap_or(0);
// resource_items.push(ParseType::Count(count));
// let end = res.name("end").unwrap().as_str().to_string();
// if end == "+" {
// resource_items.push(ParseType::ResourceAdd);
// } else {
// resource_items.push(ParseType::ResourceLast);
// }
// return;
// }
// if let Some(res) = reg2.captures(text) {
// let duration_str = res.name("duration").unwrap().as_str();
// let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0);
// let unit = res.name("unit").unwrap().as_str().to_string();
// let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds
// resource_items.push(ParseType::Duration { duration, unit });
// }
// }
// fn get_text(node: &NodeRef<'_, Node>) -> String {
// let mut text = String::new();
// println!("{:?}", node.value());
// if node.value().is_text() {
// text.push_str(node.value().as_text().unwrap().to_string().as_str());
// }
// if node.has_children() {
// for child in node.descendants() {
// if (child.value().is_text()) {
// text.push_str(child.value().as_text().unwrap().to_string().as_str());
// }
// }
// }
// if node.has_siblings() {
// let mut next = node.next_sibling();
// while next.is_some() {
// let next_node = next.unwrap();
// if next_node.value().is_text() {
// text.push_str(next_node.value().as_text().unwrap());
// } else {
// break;
// }
// next = next_node.next_sibling();
// }
// }
// return text;
// }
// fn get_text_next(node: &NodeRef<'_, Node>) -> String {
// if node.value().is_text() {
// return node.value().as_text().unwrap().text.trim().to_string();
// }
// let next = node.next_sibling();
// if !next.is_some() {
// return String::new();
// }
// let next = next.unwrap();
// if next.value().is_text() {
// let mut text = next.value().as_text().unwrap().text.trim().to_string();
// let mut next = next.next_sibling();
// while (next.is_some()) {
// let node = next.unwrap();
// next = node.next_sibling();
// if node.value().is_text() {
// text.push_str(node.value().as_text().unwrap().text.trim());
// } else {
// break;
// }
// }
// return text;
// }
// String::new()
// }

View File

@@ -1,3 +1,8 @@
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ResourceState {
Parsed(bool),
Unparsed,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Icon {
pub name: String,
@@ -18,12 +23,32 @@ impl Clone for Icon {
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum ParseType {
Link { url: String, title: String },
Img(Icon),
Count(u32),
ResourceAdd,
ResourceLast,
Duration { duration: u64, unit: String },
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Resource {
pub name: String,
pub title: String,
pub url: Option<String>,
pub icon: Option<Icon>,
pub state: ResourceState,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Ingredient {
pub resource: Resource,
pub quantity: u32,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum RecipeType {
Production,
Refining,
Cooking,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Recipe {
pub recipe_type: RecipeType,
pub resource: Ingredient,
pub duration: u64,
pub ingredients: Vec<Ingredient>,
}

3501
test_mordit.html Normal file

File diff suppressed because one or more lines are too long

3877
test_vc.html Normal file

File diff suppressed because one or more lines are too long