Update parser
This commit is contained in:
@@ -7,7 +7,7 @@ use parse::parse;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
let html = read("test_vc.html")?;
|
||||
let html = read("test_mordit.html")?;
|
||||
parse(&html);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
673
src/parse/mod.rs
673
src/parse/mod.rs
@@ -1,4 +1,4 @@
|
||||
use std::{collections::HashMap, hash::Hash};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use regex::Regex;
|
||||
use select::{
|
||||
@@ -7,7 +7,7 @@ use select::{
|
||||
predicate::{Attr, Name, Or},
|
||||
};
|
||||
|
||||
use crate::types::{Icon, Ingredient, Recipe, RecipeType, Resource, ResourceState};
|
||||
use crate::types::{Duration, Icon, Ingredient, Recipe, RecipeType, Resource, ResourceState};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum ParseType {
|
||||
@@ -17,7 +17,61 @@ enum ParseType {
|
||||
Resource(String),
|
||||
ResourceAdd,
|
||||
ResourceLast,
|
||||
Duration { duration: u64, unit: String },
|
||||
DashDash,
|
||||
Duration(Duration),
|
||||
}
|
||||
|
||||
type ResourceTmp = (
|
||||
Option<String>, // name
|
||||
Option<String>, // title
|
||||
Option<Icon>, // icon
|
||||
Option<u32>, // count
|
||||
Option<Duration>, // duration
|
||||
);
|
||||
|
||||
pub fn parse(html: &str) {
|
||||
let document = Document::from(html);
|
||||
|
||||
let mut map_resource: HashMap<String, Resource> = HashMap::new();
|
||||
let mut recipes: Vec<Recipe> = Vec::new();
|
||||
|
||||
parse_source(&document, &mut map_resource, &mut recipes);
|
||||
|
||||
parse_dst(
|
||||
&document,
|
||||
"Raffination",
|
||||
RecipeType::Refining,
|
||||
&mut map_resource,
|
||||
&mut recipes,
|
||||
);
|
||||
|
||||
parse_dst(
|
||||
&document,
|
||||
"Herstellung",
|
||||
RecipeType::Production,
|
||||
&mut map_resource,
|
||||
&mut recipes,
|
||||
);
|
||||
|
||||
parse_dst(
|
||||
&document,
|
||||
"Raffination_2",
|
||||
RecipeType::Refining,
|
||||
&mut map_resource,
|
||||
&mut recipes,
|
||||
);
|
||||
|
||||
parse_dst(
|
||||
&document,
|
||||
"Kochen",
|
||||
RecipeType::Cooking,
|
||||
&mut map_resource,
|
||||
&mut recipes,
|
||||
);
|
||||
|
||||
print_recipe(&recipes, RecipeType::Refining);
|
||||
print_recipe(&recipes, RecipeType::Production);
|
||||
print_recipe(&recipes, RecipeType::Cooking);
|
||||
}
|
||||
|
||||
fn first<T>(iter: &mut impl Iterator<Item = T>) -> Option<T> {
|
||||
@@ -25,7 +79,7 @@ fn first<T>(iter: &mut impl Iterator<Item = T>) -> Option<T> {
|
||||
}
|
||||
|
||||
fn first_child_element(node: Node<'_>) -> Option<Node<'_>> {
|
||||
if (node.children().next().is_none()) {
|
||||
if node.children().next().is_none() {
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -44,17 +98,126 @@ fn first_child_element(node: Node<'_>) -> Option<Node<'_>> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse(html: &str) {
|
||||
let document = Document::from(html);
|
||||
fn parse_resource_items(
|
||||
resource_items: Vec<ParseType>,
|
||||
recipe_type: RecipeType,
|
||||
map_resource: &mut HashMap<String, Resource>,
|
||||
) -> Option<Recipe> {
|
||||
let mut tmp_resource: ResourceTmp = (None, None, None, None, None);
|
||||
|
||||
parse_source(&document);
|
||||
parse_dst(&document, "Herstellung", RecipeType::Production);
|
||||
parse_dst(&document, "Raffination", RecipeType::Refining);
|
||||
parse_dst(&document, "Raffination_2", RecipeType::Refining);
|
||||
parse_dst(&document, "Kochen", RecipeType::Cooking);
|
||||
let mut ingredient_to_add: Vec<Ingredient> = Vec::new();
|
||||
let mut not_add = false;
|
||||
|
||||
for item in resource_items.iter() {
|
||||
match item {
|
||||
ParseType::Link { url, title } => {
|
||||
if tmp_resource.0.is_none() {
|
||||
tmp_resource.0 = Some(title.to_string());
|
||||
}
|
||||
if tmp_resource.1.is_none() {
|
||||
tmp_resource.1 = Some(url.to_string());
|
||||
}
|
||||
|
||||
// println!("Link: {} - {}", url, title);
|
||||
}
|
||||
ParseType::Img(icon) => {
|
||||
if tmp_resource.2.is_none() {
|
||||
tmp_resource.2 = Some(icon.clone());
|
||||
}
|
||||
}
|
||||
ParseType::Count(count) => {
|
||||
if tmp_resource.3.is_none() {
|
||||
tmp_resource.3 = Some(*count);
|
||||
}
|
||||
}
|
||||
ParseType::Resource(resource) => {
|
||||
// println!("Resource: {}", resource);
|
||||
if tmp_resource.0.is_none() {
|
||||
tmp_resource.0 = Some(resource.to_string());
|
||||
}
|
||||
}
|
||||
ParseType::ResourceAdd => {
|
||||
if !not_add {
|
||||
add(&tmp_resource, map_resource, &mut ingredient_to_add);
|
||||
}
|
||||
|
||||
not_add = false;
|
||||
|
||||
tmp_resource = (None, None, None, None, None); // Reset for next resource
|
||||
// println!("ResourceAdd");
|
||||
}
|
||||
ParseType::ResourceLast => {
|
||||
if !not_add {
|
||||
add(&tmp_resource, map_resource, &mut ingredient_to_add);
|
||||
}
|
||||
|
||||
not_add = false;
|
||||
|
||||
tmp_resource = (None, None, None, None, None); // Reset for next resource
|
||||
// println!("ResourceLast");
|
||||
}
|
||||
ParseType::Duration(duration) => {
|
||||
// println!(">>> Duration: {} {}", duration, unit);
|
||||
|
||||
if tmp_resource.4.is_none() {
|
||||
tmp_resource.4 = Some(duration.clone());
|
||||
}
|
||||
}
|
||||
ParseType::DashDash => {
|
||||
not_add = false;
|
||||
tmp_resource = (None, None, None, None, None); // Reset for next resource
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (_, ingredient) = create_resource_and_ingredient(&tmp_resource, map_resource);
|
||||
|
||||
if !ingredient_to_add.is_empty() {
|
||||
let recipe = crate::types::Recipe {
|
||||
recipe_type: recipe_type,
|
||||
resource: ingredient,
|
||||
duration: tmp_resource.4.unwrap_or(Duration {
|
||||
millis: 0,
|
||||
unit: "Stück".to_string(),
|
||||
}),
|
||||
ingredients: ingredient_to_add,
|
||||
};
|
||||
|
||||
return Some(recipe);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_dst(document: &Document, id: &str, recipe_type: RecipeType) -> bool {
|
||||
fn print_recipe(recipes: &Vec<Recipe>, recipe_type: RecipeType) {
|
||||
for recipe in recipes
|
||||
.iter()
|
||||
.filter(|recipe| recipe.recipe_type == recipe_type)
|
||||
{
|
||||
println!("Recipe Type: {:?}", recipe.recipe_type);
|
||||
println!(
|
||||
"Resource: {} ({})",
|
||||
recipe.resource.resource.name, recipe.resource.quantity
|
||||
);
|
||||
println!("Duration: {} ms", recipe.duration.millis);
|
||||
println!("Ingredients:");
|
||||
for ingredient in &recipe.ingredients {
|
||||
println!(
|
||||
"- {} ({} x {})",
|
||||
ingredient.resource.name, ingredient.quantity, ingredient.resource.title
|
||||
);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_dst(
|
||||
document: &Document,
|
||||
id: &str,
|
||||
recipe_type: RecipeType,
|
||||
map_resource: &mut HashMap<String, Resource>,
|
||||
recipes: &mut Vec<Recipe>,
|
||||
) -> bool {
|
||||
let mut dest = document.find(Attr("id", id));
|
||||
let dest = first(&mut dest);
|
||||
|
||||
@@ -100,13 +263,19 @@ fn parse_dst(document: &Document, id: &str, recipe_type: RecipeType) -> bool {
|
||||
let li = elt_ul.find(Name("li"));
|
||||
|
||||
for item in li {
|
||||
parse_source_li_to_resource(&item, recipe_type.clone());
|
||||
if let Some(recipe) = parse_li_to_resource(&item, recipe_type.clone(), map_resource) {
|
||||
recipes.push(recipe);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
fn parse_source(document: &Document) -> bool {
|
||||
fn parse_source(
|
||||
document: &Document,
|
||||
map_resource: &mut HashMap<String, Resource>,
|
||||
recipes: &mut Vec<Recipe>,
|
||||
) -> bool {
|
||||
let mut source = document.find(Attr("id", "Quelle"));
|
||||
let source = first(&mut source);
|
||||
|
||||
@@ -145,27 +314,20 @@ fn parse_source(document: &Document) -> bool {
|
||||
let elt_ul = elt_ul.unwrap();
|
||||
let li = elt_ul.find(Name("li"));
|
||||
|
||||
let mut recipes: Vec<Recipe> = Vec::new();
|
||||
|
||||
for item in li {
|
||||
if let Some(recipe) = parse_source_li_to_resource(&item, RecipeType::Refining) {
|
||||
if let Some(recipe) = parse_li_to_resource(&item, RecipeType::Refining, map_resource) {
|
||||
recipes.push(recipe);
|
||||
}
|
||||
}
|
||||
|
||||
for recipe in recipes {
|
||||
let name = recipe.resource.resource.name.clone();
|
||||
let title = recipe.resource.resource.title.clone();
|
||||
let url = recipe.resource.resource.url.clone();
|
||||
let icon = recipe.resource.resource.icon.clone();
|
||||
|
||||
println!("Resource: {} - {} - {:?} - {:?}", name, title, url, icon);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
fn parse_source_li_to_resource(item: &Node<'_>, recipe_type: RecipeType) -> Option<Recipe> {
|
||||
fn parse_li_to_resource(
|
||||
item: &Node<'_>,
|
||||
recipe_type: RecipeType,
|
||||
map_resource: &mut HashMap<String, Resource>,
|
||||
) -> Option<Recipe> {
|
||||
if item.children().next().is_none() {
|
||||
return None;
|
||||
}
|
||||
@@ -173,24 +335,27 @@ fn parse_source_li_to_resource(item: &Node<'_>, recipe_type: RecipeType) -> Opti
|
||||
let mut resource_items: Vec<ParseType> = Vec::new();
|
||||
|
||||
let selector = item.find(Or(
|
||||
Or(Name("span"), Name("a")),
|
||||
Or(Name("img"), Name("small")),
|
||||
Name("strong"),
|
||||
Or(Or(Name("span"), Name("a")), Or(Name("img"), Name("small"))),
|
||||
));
|
||||
|
||||
for child in selector {
|
||||
let name = child.name().unwrap();
|
||||
|
||||
if name == "a" && child.attr("href").is_some() && child.attr("title").is_some() {
|
||||
let txt = get_text_next(&child);
|
||||
|
||||
resource_items.push(ParseType::Link {
|
||||
url: child.attr("href").unwrap().to_string(),
|
||||
title: child.attr("title").unwrap().to_string(),
|
||||
});
|
||||
|
||||
let txt = get_text_next(&child);
|
||||
|
||||
if !txt.is_empty() {
|
||||
parse_text(&txt, &mut resource_items);
|
||||
}
|
||||
if txt == "--" {
|
||||
resource_items.push(ParseType::DashDash);
|
||||
}
|
||||
} else if name == "img"
|
||||
&& child.attr("data-src").is_some()
|
||||
&& child.attr("width").is_some()
|
||||
@@ -211,112 +376,30 @@ fn parse_source_li_to_resource(item: &Node<'_>, recipe_type: RecipeType) -> Opti
|
||||
}));
|
||||
} else if name == "span"
|
||||
&& !child.text().is_empty()
|
||||
&& child.parent().unwrap().name().unwrap() == "strong"
|
||||
&& (child.parent().unwrap().name().unwrap() == "strong"
|
||||
|| child.parent().unwrap().name().unwrap() == "span")
|
||||
{
|
||||
let txt = child.text().trim().to_string();
|
||||
resource_items.push(ParseType::Resource(txt));
|
||||
let txt = get_text_next(&child.parent().unwrap());
|
||||
parse_text(&txt, &mut resource_items);
|
||||
} else if name == "strong" && !child.text().is_empty() {
|
||||
} else if name == "strong" {
|
||||
// let txt = get_text_next(&child);
|
||||
// parse_text(&txt, &mut resource_items);
|
||||
} else if name == "small" {
|
||||
let txt = get_text(&child);
|
||||
parse_text(&txt, &mut resource_items);
|
||||
}
|
||||
}
|
||||
|
||||
let mut tmp_resource: (
|
||||
Option<String>, // name
|
||||
Option<String>, // title
|
||||
Option<Icon>,
|
||||
Option<u32>, // count
|
||||
Option<u64>, // duration
|
||||
) = (None, None, None, None, None);
|
||||
|
||||
let mut map_resource: HashMap<String, Resource> = HashMap::new();
|
||||
let mut ingredient_to_add: Vec<Ingredient> = Vec::new();
|
||||
|
||||
for item in resource_items.iter() {
|
||||
match item {
|
||||
ParseType::Link { url, title } => {
|
||||
if tmp_resource.0.is_none() {
|
||||
tmp_resource.0 = Some(title.to_string());
|
||||
}
|
||||
if tmp_resource.1.is_none() {
|
||||
tmp_resource.1 = Some(url.to_string());
|
||||
}
|
||||
|
||||
// println!("Link: {} - {}", url, title);
|
||||
}
|
||||
ParseType::Img(icon) => {
|
||||
// println!(
|
||||
// "Image: {} ({}x{}) - {}",
|
||||
// icon.name, icon.width, icon.height, icon.url
|
||||
// );
|
||||
|
||||
if (tmp_resource.2.is_none()) {
|
||||
tmp_resource.2 = Some(icon.clone());
|
||||
}
|
||||
}
|
||||
ParseType::Count(count) => {
|
||||
// println!("Count: {}", count);
|
||||
|
||||
if (tmp_resource.3.is_none()) {
|
||||
tmp_resource.3 = Some(*count);
|
||||
}
|
||||
}
|
||||
ParseType::Resource(resource) => {
|
||||
// println!("Resource: {}", resource);
|
||||
if (tmp_resource.0.is_none()) {
|
||||
tmp_resource.0 = Some(resource.to_string());
|
||||
}
|
||||
}
|
||||
ParseType::ResourceAdd => {
|
||||
add(&tmp_resource, &mut map_resource, &mut ingredient_to_add);
|
||||
tmp_resource = (None, None, None, None, None); // Reset for next resource
|
||||
// println!("ResourceAdd");
|
||||
}
|
||||
ParseType::ResourceLast => {
|
||||
add(&tmp_resource, &mut map_resource, &mut ingredient_to_add);
|
||||
tmp_resource = (None, None, None, None, None); // Reset for next resource
|
||||
// println!("ResourceLast");
|
||||
}
|
||||
ParseType::Duration { duration, unit } => {
|
||||
// println!(">>> Duration: {} {}", duration, unit);
|
||||
|
||||
if tmp_resource.4.is_none() {
|
||||
tmp_resource.4 = Some(*duration);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (_, ingredient) = create_resource_and_ingredient(&tmp_resource, &mut map_resource);
|
||||
|
||||
if !ingredient_to_add.is_empty() {
|
||||
let recipe = crate::types::Recipe {
|
||||
recipe_type: recipe_type,
|
||||
resource: ingredient,
|
||||
duration: tmp_resource.4.unwrap_or(0),
|
||||
ingredients: ingredient_to_add,
|
||||
};
|
||||
|
||||
return Some(recipe);
|
||||
}
|
||||
|
||||
None
|
||||
parse_resource_items(resource_items, recipe_type, map_resource)
|
||||
}
|
||||
|
||||
fn to_name(name: &str) -> String {
|
||||
normalize_text(name).replace(" ", "_").to_lowercase()
|
||||
}
|
||||
fn create_resource_and_ingredient(
|
||||
tmp_resource: &(
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
Option<Icon>,
|
||||
Option<u32>,
|
||||
Option<u64>,
|
||||
),
|
||||
tmp_resource: &ResourceTmp,
|
||||
map_resource: &mut HashMap<String, Resource>,
|
||||
) -> (Resource, Ingredient) {
|
||||
let title = tmp_resource.0.as_ref().unwrap().clone();
|
||||
@@ -351,14 +434,9 @@ fn create_resource_and_ingredient(
|
||||
|
||||
(resource, ingredient)
|
||||
}
|
||||
|
||||
fn add(
|
||||
tmp_resource: &(
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
Option<Icon>,
|
||||
Option<u32>,
|
||||
Option<u64>,
|
||||
),
|
||||
tmp_resource: &ResourceTmp,
|
||||
map_resource: &mut HashMap<String, Resource>,
|
||||
ingredient_to_add: &mut Vec<Ingredient>,
|
||||
) {
|
||||
@@ -380,6 +458,12 @@ fn normalize_text(text: &str) -> String {
|
||||
.replace('\r', " ")
|
||||
.replace('\t', " ");
|
||||
|
||||
for c in text.clone().chars() {
|
||||
if c.is_control() || c.is_whitespace() {
|
||||
text = text.replace(c, " ");
|
||||
}
|
||||
}
|
||||
|
||||
while text.contains(" ") {
|
||||
text = text.replace(" ", " ");
|
||||
}
|
||||
@@ -444,6 +528,8 @@ fn get_text_next(node: &Node<'_>) -> String {
|
||||
|
||||
if node.as_text().is_some() {
|
||||
text.push_str(node.as_text().unwrap().trim());
|
||||
} else if node.name().is_some() && node.name().unwrap() == "i" {
|
||||
text.push_str(node.text().trim());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@@ -456,10 +542,12 @@ fn get_text_next(node: &Node<'_>) -> String {
|
||||
}
|
||||
|
||||
fn parse_text(text: &str, resource_items: &mut Vec<ParseType>) {
|
||||
let reg1 = Regex::new(r"^\s*x(?<count>\d+)\s+(?<end>[→+])\s*$").unwrap();
|
||||
let reg2 = Regex::new(r"^\s*\(.*(?<duration>\d+,\d+)\ssek\./(?<unit>\w+)\s*\)$").unwrap();
|
||||
let reg_count_next = Regex::new(r"^\s*x(?<count>\d+)\s+(?<end>[→+])\s*$").unwrap();
|
||||
let reg_count = Regex::new(r"^\s*x(?<count>\d+)\s*.*$").unwrap();
|
||||
let reg_duration =
|
||||
Regex::new(r"^.*\(.*(?<duration>\d+(|,\d+))\ssek\./(?<unit>\w+)\s*\)$").unwrap();
|
||||
|
||||
if let Some(res) = reg1.captures(text) {
|
||||
if let Some(res) = reg_count_next.captures(text) {
|
||||
let count = res.name("count").unwrap().as_str().parse().unwrap_or(0);
|
||||
|
||||
resource_items.push(ParseType::Count(count));
|
||||
@@ -475,324 +563,21 @@ fn parse_text(text: &str, resource_items: &mut Vec<ParseType>) {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(res) = reg2.captures(text) {
|
||||
if let Some(res) = reg_count.captures(text) {
|
||||
let count = res.name("count").unwrap().as_str().parse().unwrap_or(0);
|
||||
|
||||
resource_items.push(ParseType::Count(count));
|
||||
}
|
||||
|
||||
if let Some(res) = reg_duration.captures(text) {
|
||||
let duration_str = res.name("duration").unwrap().as_str();
|
||||
let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0);
|
||||
let unit = res.name("unit").unwrap().as_str().to_string();
|
||||
let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds
|
||||
|
||||
resource_items.push(ParseType::Duration { duration, unit });
|
||||
resource_items.push(ParseType::Duration(Duration {
|
||||
millis: duration,
|
||||
unit,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn parse1(html: &str) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
// // Parse the HTML content
|
||||
// let document = scraper::Html::parse_document(html);
|
||||
// let selector_usage = scraper::Selector::parse("#Verwendung").unwrap();
|
||||
// let selector_li = scraper::Selector::parse("li").unwrap();
|
||||
|
||||
// parse_source(&document);
|
||||
|
||||
// let elt_usage = document.select(&selector_usage).next();
|
||||
|
||||
// if elt_usage.is_none() {
|
||||
// eprintln!("No element found with the selector '#Verwendung'");
|
||||
// return Ok(false);
|
||||
// }
|
||||
|
||||
// let elt_usage = elt_usage.unwrap();
|
||||
// let mut elt_usage = elt_usage.parent().unwrap();
|
||||
|
||||
// let mut elt_ul = None;
|
||||
|
||||
// while elt_usage.next_sibling().is_some() {
|
||||
// elt_usage = elt_usage.next_sibling().unwrap();
|
||||
|
||||
// if !elt_usage.value().is_element() {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// let elt = elt_usage.value().as_element().unwrap();
|
||||
// if elt.name() == "ul" {
|
||||
// elt_ul = Some(elt_usage);
|
||||
// break;
|
||||
// }
|
||||
|
||||
// if elt.name() == "h2" {
|
||||
// eprintln!("Found 'h2' element, stopping search for 'ul'");
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// if elt_ul.is_none() {
|
||||
// eprintln!("No second 'ul' element found after '#Verwendung'");
|
||||
// return Ok(false);
|
||||
// }
|
||||
// let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap();
|
||||
// let li = elt_ul.select(&selector_li);
|
||||
|
||||
// for item in li {
|
||||
// parse_source_li_to_resource(item);
|
||||
// }
|
||||
// Ok(true)
|
||||
// }
|
||||
|
||||
// fn parse_source(document: &Html) -> bool {
|
||||
// let selector_source = scraper::Selector::parse("#Quelle").unwrap();
|
||||
// let selector_li = scraper::Selector::parse("li").unwrap();
|
||||
// let elt_source = document.select(&selector_source).next();
|
||||
|
||||
// if elt_source.is_none() {
|
||||
// eprintln!("No element found with the selector '#Quelle'");
|
||||
// return false;
|
||||
// }
|
||||
|
||||
// let elt_source = elt_source.unwrap();
|
||||
|
||||
// let mut elt_source = elt_source.parent().unwrap();
|
||||
|
||||
// let mut c = 0;
|
||||
// let mut elt_ul = None;
|
||||
|
||||
// while elt_source.next_sibling().is_some() {
|
||||
// elt_source = elt_source.next_sibling().unwrap();
|
||||
|
||||
// if !elt_source.value().is_element() {
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// let elt = elt_source.value().as_element().unwrap();
|
||||
// if elt.name() == "ul" {
|
||||
// c += 1;
|
||||
|
||||
// if c > 1 {
|
||||
// elt_ul = Some(elt_source);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// if elt.name() == "h2" {
|
||||
// eprintln!("Found 'h2' element, stopping search for 'ul'");
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// if elt_ul.is_none() {
|
||||
// eprintln!("No second 'ul' element found after '#Quelle'");
|
||||
// return false;
|
||||
// }
|
||||
// let elt_ul = ElementRef::wrap(elt_ul.unwrap()).unwrap();
|
||||
// let li = elt_ul.select(&selector_li);
|
||||
|
||||
// for item in li {
|
||||
// parse_source_li_to_resource(item);
|
||||
// }
|
||||
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// fn parse_source_li_to_resource(item: ElementRef<'_>) {
|
||||
// if !item.has_children() {
|
||||
// println!("Item has no children, skipping.");
|
||||
// return;
|
||||
// }
|
||||
|
||||
// let mut resource_items: Vec<ParseType> = Vec::new();
|
||||
|
||||
// let selector = scraper::Selector::parse("span,a,img,small").unwrap();
|
||||
// let iter = item.select(&selector);
|
||||
|
||||
// for child in iter {
|
||||
// let elem = child.value();
|
||||
// let name = elem.name();
|
||||
|
||||
// if name == "a"
|
||||
// && elem.attrs().find(|attr| attr.0 == "href").is_some()
|
||||
// && elem.attrs().find(|attr| attr.0 == "title").is_some()
|
||||
// {
|
||||
// resource_items.push(ParseType::Link {
|
||||
// url: elem
|
||||
// .attrs()
|
||||
// .find(|attr| attr.0 == "href")
|
||||
// .unwrap()
|
||||
// .1
|
||||
// .to_string(),
|
||||
// title: elem
|
||||
// .attrs()
|
||||
// .find(|attr| attr.0 == "title")
|
||||
// .unwrap()
|
||||
// .1
|
||||
// .to_string(),
|
||||
// });
|
||||
|
||||
// let txt = get_text_next(&child);
|
||||
|
||||
// if !txt.is_empty() {
|
||||
// parse_text(&txt, &mut resource_items);
|
||||
// }
|
||||
// } else if name == "img"
|
||||
// && elem.attrs().find(|attr| attr.0 == "data-src").is_some()
|
||||
// && elem.attrs().find(|attr| attr.0 == "width").is_some()
|
||||
// && elem.attrs().find(|attr| attr.0 == "height").is_some()
|
||||
// && elem
|
||||
// .attrs()
|
||||
// .find(|attr| attr.0 == "data-image-name")
|
||||
// .is_some()
|
||||
// {
|
||||
// let url = elem.attrs().find(|attr| attr.0 == "data-src").unwrap().1;
|
||||
// let name = elem
|
||||
// .attrs()
|
||||
// .find(|attr| attr.0 == "data-image-name")
|
||||
// .unwrap()
|
||||
// .1;
|
||||
// let width = elem
|
||||
// .attrs()
|
||||
// .find(|attr| attr.0 == "width")
|
||||
// .unwrap()
|
||||
// .1
|
||||
// .parse()
|
||||
// .unwrap_or(0);
|
||||
// let height = elem
|
||||
// .attrs()
|
||||
// .find(|attr| attr.0 == "height")
|
||||
// .unwrap()
|
||||
// .1
|
||||
// .parse()
|
||||
// .unwrap_or(0);
|
||||
|
||||
// resource_items.push(ParseType::Img(Icon {
|
||||
// name: name.to_string(),
|
||||
// url: url.to_string(),
|
||||
// width,
|
||||
// height,
|
||||
// content_type: "image/png".to_string(), // Assuming PNG, adjust as needed
|
||||
// }));
|
||||
// } else if name == "small" {
|
||||
// let txt = get_text(&child);
|
||||
// parse_text(&txt, &mut resource_items);
|
||||
// }
|
||||
// }
|
||||
// for item in resource_items.iter() {
|
||||
// match item {
|
||||
// ParseType::Link { url, title } => {
|
||||
// println!("Link: {} - {}", url, title);
|
||||
// }
|
||||
// ParseType::Img(icon) => {
|
||||
// println!(
|
||||
// "Image: {} ({}x{}) - {}",
|
||||
// icon.name, icon.width, icon.height, icon.url
|
||||
// );
|
||||
// }
|
||||
// ParseType::Count(count) => {
|
||||
// println!("Count: {}", count);
|
||||
// }
|
||||
// ParseType::ResourceAdd => {
|
||||
// println!("ResourceAdd")
|
||||
// }
|
||||
// ParseType::ResourceLast => {
|
||||
// println!("ResourceLast")
|
||||
// }
|
||||
// ParseType::Duration { duration, unit } => {
|
||||
// println!(">>> Duration: {} {}", duration, unit)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// println!("======================");
|
||||
// }
|
||||
|
||||
// fn parse_text(text: &str, resource_items: &mut Vec<ParseType>) {
|
||||
// let reg1 = Regex::new(r"^\s*x(?<count>\d+)\s+(?<end>[→+])\s*$").unwrap();
|
||||
// let reg2 = Regex::new(r"^\s*\(.*(?<duration>\d+,\d+)\ssek\./(?<unit>\w+)\s*\)$").unwrap();
|
||||
|
||||
// if let Some(res) = reg1.captures(text) {
|
||||
// let count = res.name("count").unwrap().as_str().parse().unwrap_or(0);
|
||||
|
||||
// resource_items.push(ParseType::Count(count));
|
||||
|
||||
// let end = res.name("end").unwrap().as_str().to_string();
|
||||
|
||||
// if end == "+" {
|
||||
// resource_items.push(ParseType::ResourceAdd);
|
||||
// } else {
|
||||
// resource_items.push(ParseType::ResourceLast);
|
||||
// }
|
||||
|
||||
// return;
|
||||
// }
|
||||
|
||||
// if let Some(res) = reg2.captures(text) {
|
||||
// let duration_str = res.name("duration").unwrap().as_str();
|
||||
// let duration: f64 = duration_str.replace(',', ".").parse().unwrap_or(0.0);
|
||||
// let unit = res.name("unit").unwrap().as_str().to_string();
|
||||
// let duration: u64 = (duration * 1000.0) as u64; // Convert to milliseconds
|
||||
|
||||
// resource_items.push(ParseType::Duration { duration, unit });
|
||||
// }
|
||||
// }
|
||||
|
||||
// fn get_text(node: &NodeRef<'_, Node>) -> String {
|
||||
// let mut text = String::new();
|
||||
|
||||
// println!("{:?}", node.value());
|
||||
|
||||
// if node.value().is_text() {
|
||||
// text.push_str(node.value().as_text().unwrap().to_string().as_str());
|
||||
// }
|
||||
|
||||
// if node.has_children() {
|
||||
// for child in node.descendants() {
|
||||
// if (child.value().is_text()) {
|
||||
// text.push_str(child.value().as_text().unwrap().to_string().as_str());
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// if node.has_siblings() {
|
||||
// let mut next = node.next_sibling();
|
||||
|
||||
// while next.is_some() {
|
||||
// let next_node = next.unwrap();
|
||||
// if next_node.value().is_text() {
|
||||
// text.push_str(next_node.value().as_text().unwrap());
|
||||
// } else {
|
||||
// break;
|
||||
// }
|
||||
// next = next_node.next_sibling();
|
||||
// }
|
||||
// }
|
||||
|
||||
// return text;
|
||||
// }
|
||||
|
||||
// fn get_text_next(node: &NodeRef<'_, Node>) -> String {
|
||||
// if node.value().is_text() {
|
||||
// return node.value().as_text().unwrap().text.trim().to_string();
|
||||
// }
|
||||
// let next = node.next_sibling();
|
||||
|
||||
// if !next.is_some() {
|
||||
// return String::new();
|
||||
// }
|
||||
|
||||
// let next = next.unwrap();
|
||||
|
||||
// if next.value().is_text() {
|
||||
// let mut text = next.value().as_text().unwrap().text.trim().to_string();
|
||||
// let mut next = next.next_sibling();
|
||||
|
||||
// while (next.is_some()) {
|
||||
// let node = next.unwrap();
|
||||
// next = node.next_sibling();
|
||||
|
||||
// if node.value().is_text() {
|
||||
// text.push_str(node.value().as_text().unwrap().text.trim());
|
||||
// } else {
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
|
||||
// return text;
|
||||
// }
|
||||
|
||||
// String::new()
|
||||
// }
|
||||
|
||||
@@ -45,10 +45,17 @@ pub enum RecipeType {
|
||||
Refining,
|
||||
Cooking,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct Duration {
|
||||
pub millis: u64,
|
||||
pub unit: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct Recipe {
|
||||
pub recipe_type: RecipeType,
|
||||
pub resource: Ingredient,
|
||||
pub duration: u64,
|
||||
pub duration: Duration,
|
||||
pub ingredients: Vec<Ingredient>,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user