commit 1a7108f9f90ba2fc9f5466529710147d36735dd8 Author: kirbylife Date: Sun Sep 29 01:08:53 2024 -0600 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5e12075 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/target +Cargo.lock + +*.pem diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..da02840 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "librs_gemini" +version = "0.1.0" +edition = "2021" + +[dependencies] +cached = { version = "0.53.1", features = ["serde"] } +glob = "0.3.1" +htmd = "0.1.6" +raspa = { git = "https://git.kirbylife.dev/kirbylife/raspa" } +serde = { version = "1.0.210", features = ["derive"] } +tera = "1.20.0" +tokio = { version = "1.40.0", features = ["full"] } +windmark = { version = "0.3.11", features = ["logger", "response-macros"] } +md2gemtext = { git = "https://git.kirbylife.dev/kirbylife/md2gemtext" } +urlencoding = "2.1.3" +tera_thousands = "0.1.0" diff --git a/src/category.rs b/src/category.rs new file mode 100644 index 0000000..301f538 --- /dev/null +++ b/src/category.rs @@ -0,0 +1,10 @@ +use serde::Serialize; + +#[derive(Serialize, Debug, Clone)] +pub struct Category { + pub name: String, + pub slug: String, + pub description: String, + pub crates: Vec, + pub more_count: u16, +} diff --git a/src/consts.rs b/src/consts.rs new file mode 100644 index 0000000..806e62f --- /dev/null +++ b/src/consts.rs @@ -0,0 +1,3 @@ +pub const HOME_URL: &'static str = "https://lib.rs/"; +pub const CRITERIA_SEARCH_URL: &'static str = "https://lib.rs/search"; +pub const CRATE_INFO_URL: &'static str = "https://lib.rs/crates"; diff --git a/src/crate_info.rs b/src/crate_info.rs new file mode 100644 index 0000000..ebf68a0 --- /dev/null +++ b/src/crate_info.rs @@ -0,0 +1,17 @@ +use crate::version::Version; +use serde::Serialize; + +#[derive(Serialize, Debug, Clone)] +pub struct CrateInfo { + pub name: String, + pub description: String, + pub owner: String, + pub latest_version: Version, + pub versions: Vec, + pub readme: Option, + pub license: Option, + pub git_repo: Option, + pub api_reference: Option, + pub website: Option, + pub dependencies: Vec, +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..7c58e5d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,88 @@ +mod category; +mod consts; +mod crate_info; +mod scraper; +mod search_item; +mod version; + +use scraper::{get_home_page, search_by_criteria, search_crate}; +use tera::Tera; +use tokio::sync::OnceCell; + +static TERA: OnceCell = OnceCell::const_new(); + +async fn render>(name: S, context: &tera::Context) -> Result { + let tera = TERA + .get_or_init(|| async { + let mut tera = Tera::default(); + tera.register_filter("separate_with_commas", tera_thousands::separate_with_commas); + for path in glob::glob("templates/*").unwrap().flatten() { + let raw_path = path.clone(); + let filename = raw_path + .file_name() + .unwrap() // This should be safe (? + .to_str() + .unwrap() // ._.? + .split('.') + .next(); + tera.add_template_file(path, filename).unwrap() + } + tera + }) + .await; + tera.render(name.as_ref(), context).map_err(|_| ()) +} +#[windmark::main] +async fn main() -> Result<(), Box> { + windmark::router::Router::new() + .set_private_key_file("key.pem") + .set_certificate_file("cert.pem") + .set_port(1971) + .enable_default_logger(true) + .set_fix_path(true) + .mount("/", move |_| async { + let mut context = tera::Context::new(); + let (total_crates_indexed, categories) = get_home_page(); + + context.insert("categories", &categories); + context.insert("total_crates_indexed", &total_crates_indexed); + windmark::response::Response::success(render("index", &context).await.unwrap()) + }) + .mount("/about", move |_| async { + let context = tera::Context::new(); + + windmark::response::Response::success(render("about", &context).await.unwrap()) + }) + .mount("/search/:criteria", move |request| async move { + let mut context = tera::Context::new(); + + let criteria = urlencoding::decode(request.parameters.get("criteria").unwrap()) + .unwrap() + .to_string(); + + let (error, items) = match search_by_criteria(criteria.to_string()) { + None => (Some("Nothing found :("), None), + Some(items) => (None, Some(items)), + }; + + context.insert("criteria", &criteria); + context.insert("error", &error); + context.insert("items", &items); + windmark::response::Response::success(render("search", &context).await.unwrap()) + }) + .mount("/crates/:crate_name", move |request| async move { + let mut context = tera::Context::new(); + + let crate_name = request.parameters.get("crate_name").unwrap(); + + let (error, crate_info) = match search_crate(crate_name.to_string()) { + None => (Some("Crate not found"), None), + Some(crate_info) => (None, Some(crate_info)), + }; + context.insert("error", &error); + context.insert("item", &crate_info); + windmark::response::Response::success(render("crate", &context).await.unwrap()) + }) + .run() + .await +} diff --git a/src/scraper.rs b/src/scraper.rs new file mode 100644 index 0000000..8b15e8e --- /dev/null +++ b/src/scraper.rs @@ -0,0 +1,218 @@ +use crate::category::Category; +use crate::crate_info::CrateInfo; +use crate::search_item::SearchItem; +use crate::{consts, version::Version}; +use cached::proc_macro::cached; + +use htmd::HtmlToMarkdown; +use raspa::{ + request::{Request, RequestBase}, + selector::SelectorBase, +}; + +#[cached(time = 62400)] +pub fn get_home_page() -> (u32, Vec) { + let resp = Request::new(consts::HOME_URL).unwrap().launch(); + + let raw_header_text = resp.css_once(".inner-col p").unwrap().content(); + let total_crates_indexed = raw_header_text + .split(' ') + .nth(2) + .unwrap() + .replace('.', "") + .replace(',', "") + .parse() + .unwrap(); + + let categoryes = resp + .css("ul.cat > li") + .iter() + .filter_map(|elem| { + if elem.css_once("h3").is_none() { + return None; + } + + let title = elem.css_once("h3").unwrap().content(); + let slug = elem + .css_once("a") + .unwrap() + .attr("href") + .unwrap() + .strip_prefix('/') + .unwrap() + .to_string(); + let desc = elem.css_once("span.desc").unwrap().content(); + let mut crates = elem + .css("ul.crates > li") + .iter() + .map(|e| e.css_once("a").unwrap().content()) + .collect::>(); + let more_count = crates + .pop() + .unwrap() + .split(" ") + .next() + .unwrap() + .parse() + .unwrap(); + Some(Category { + name: title, + slug, + description: desc, + crates, + more_count, + }) + }) + .collect::>(); + (total_crates_indexed, categoryes) +} + +#[cached(time = 3600)] +pub fn search_by_criteria(criteria: String) -> Option> { + let resp = Request::new(consts::CRITERIA_SEARCH_URL) + .unwrap() + .add_params(vec![("q", criteria)]) + .launch(); + if resp.css_once(".notfound").is_some() { + None + } else { + Some( + resp.css(".inner-col ol li") + .iter() + .map(|elem| { + let name = elem.css_once("h4").unwrap().content(); + let description = elem.css_once(".desc").unwrap().content().trim().into(); + let version = elem.css_once(".version").unwrap().content(); + let tags = elem + .css(".k") + .iter() + .map(|tag| tag.content()) + .collect::>(); + let search_item = SearchItem::new(name, description, version, tags); + search_item + }) + .collect::>(), + ) + } +} + +#[cached(time = 3600)] +pub fn search_crate(crate_name: String) -> Option { + let resp = Request::new(format!("{}/{}", consts::CRATE_INFO_URL, crate_name)) + .unwrap() + .launch(); + if resp.css_once(".notfound").is_some() { + None + } else { + let description = resp + .css_once(".desc") + .unwrap() + .content() + .trim() + .replace('\n', " ") + .into(); + let owner = resp + .css("a.owner") + .iter() + .next() + .unwrap() + .css_once("span") + .unwrap() + .content(); + let versions = resp + .css("#versions tr") + .iter() + .filter_map(|elem| { + if elem.css_once("del").is_some() { + None + } else if elem.css_once(".new").is_some() { + let version = elem + .css_once("a") + .unwrap() + .content() + .lines() + .nth(2) + .unwrap() + .trim() + .to_string(); + let date = elem + .css_once("*") + .unwrap() + .content() + .lines() + .nth(2) + .unwrap() + .trim() + .to_string(); + Some(Version::new(version, date)) + } else { + let raw_data = elem.css_once("*").unwrap().content(); + let mut elements = raw_data.lines(); + let mut version = elements.nth(1).unwrap().to_string(); + if version.contains("new") { + version = elements.nth(0).unwrap().to_string(); + } + let date = elements.nth(1).unwrap().to_string(); + Some(Version::new(version, date)) + } + }) + .collect::>(); + let latest_version = versions.first().unwrap().clone(); + + let readme = resp.css_once("#readme").map(|elem| { + let converter = HtmlToMarkdown::builder() + .skip_tags(vec![ + "h1", + "img", + "picture", + "figcaption", + "source", + "figure", + "svg", + ]) + .build(); + let md_readme = converter.convert(elem.html().as_ref()).unwrap(); + md2gemtext::convert(&md_readme) + }); + + let license = resp + .css_once("b[property='license']") + .map(|elem| elem.content()); + + let mut git_repo = None; + let mut api_reference = None; + let mut website = None; + for li in resp.css("header nav li") { + let a = li.css_once("a").unwrap(); + let href = a.attr("href"); + let content = a.html(); + if content.contains("Git") || content.contains("Repository") { + git_repo = href; + } else if content.contains("API reference") { + api_reference = href; + } else if content.contains("Home") { + website = href; + } + } + + let dependencies = resp + .css("#deps li[property='requirements'] a[title*='1']") + .iter() + .map(|elem| elem.content()) + .collect::>(); + + Some(CrateInfo { + name: crate_name, + description, + owner, + latest_version, + versions, + readme, + license, + git_repo, + api_reference, + website, + dependencies, + }) + } +} diff --git a/src/search_item.rs b/src/search_item.rs new file mode 100644 index 0000000..3448f24 --- /dev/null +++ b/src/search_item.rs @@ -0,0 +1,20 @@ +use serde::Serialize; + +#[derive(Serialize, Debug, Clone)] +pub struct SearchItem { + pub name: String, + pub description: String, + pub version: String, + pub tags: Vec, +} + +impl SearchItem { + pub fn new(name: String, description: String, version: String, tags: Vec) -> Self { + SearchItem { + name, + description, + version, + tags, + } + } +} diff --git a/src/version.rs b/src/version.rs new file mode 100644 index 0000000..5fd2469 --- /dev/null +++ b/src/version.rs @@ -0,0 +1,13 @@ +use serde::Serialize; + +#[derive(Serialize, Debug, Clone)] +pub struct Version { + pub version: String, + pub date: String, +} + +impl Version { + pub fn new(version: String, date: String) -> Self { + Version { version, date } + } +} diff --git a/templates/_base.gmi.tera b/templates/_base.gmi.tera new file mode 100644 index 0000000..4ca4767 --- /dev/null +++ b/templates/_base.gmi.tera @@ -0,0 +1,12 @@ +# lib.rs +=> / Home + +{% block content -%} +{%- endblock content %} + +This version of Lib.rs for the Gemini protocol was created by kirbylife, but all credit goes to kornelski, the original author of lib.rs. +Lib.rs is an unofficial list of Rust/Cargo crates. It contains data from multiple sources, including heuristics and manually curated data. The content of this page is not necessarily endorsed by the authors of the crate. This gemini capsule is not affiliated with nor endorsed by the Rust Project. + +contact me trough: +=> mailto:hola@kirbylife.dev hola(at)kirbylife.dev +@kirbylife:matrix.org diff --git a/templates/about.gmi.tera b/templates/about.gmi.tera new file mode 100644 index 0000000..71205ab --- /dev/null +++ b/templates/about.gmi.tera @@ -0,0 +1,21 @@ +# lib.rs +=> / Home + +(Sorry in advance, I always suck at writing about) +This unofficial port of lib.rs for the Gemini protocol pulls information directly from the official website but it has no connection whatsoever with the original project or it's creator kornelski. I created this project to check crate details from Lagrange on my Haiku laptop and decided to open the portal to the rest of the Gemini space. + +Although the data is fetched from lib.rs, I cache it for an hour to avoid putting stress on kornelski's server. This means that occasionally the data might not be 100% up to date, but in 99% of cases, it remains useful. + +If you find that any crate information is displayed incorrectly, feel free to contact me via email, Mastodon or through Matrix chat: +=> mailto:hola@kirbylife.dev hola@kirbylife.dev [E-mail] +=> https://mstdn.mx/@kirbylife @kirbylife@mstdn.mx [Mastodon] +@kirbylife:matrix.org [Matrix] + +Please don’t report bad rendered README's as an error. This happens because the content goes through the following transformations: +Markdown (md) -> HTML +on the lib.rs server, and then on my server: +HTML -> Markdown -> Gemtext. +Some information gets lost along the way, which is to be expected. + +This project is made with Rust using the Windmark framework and the code can be found in my gitea instance: +=> https://git.kirbylife.dev/kirbylife/librs_gemini librs_gemini Source code diff --git a/templates/crate.gmi.tera b/templates/crate.gmi.tera new file mode 100644 index 0000000..542fdff --- /dev/null +++ b/templates/crate.gmi.tera @@ -0,0 +1,52 @@ +{% extends "_base" %} +{% block content -%} +{% if error -%} +{{ error }} + +{% endif -%} + +{%- if item -%} +## {{ item.name }} v{{ item.latest_version.version }} +> {{ item.description }} +by {{ item.owner }} {% if item.license %}[{{ item.license }}]{% endif %} + +### Install by: +Run this command in your project's directory: +``` +cargo add {{ item.name }} +``` +Or add the following line to your Cargo.toml +``` +[dependencies] +{{ item.name }} = "{{ item.latest_version.version }}" +``` + +{% if item.api_reference -%} +=> {{ item.api_reference }} API reference +{%- endif %} +{% if item.git_repo -%} +=> {{ item.git_repo }} Git repo +{%- endif %} +{% if item.website -%} +=> {{ item.website }} Website +{%- endif %} +### Latest versions: +{%- for version in item.versions %} +* {{ version.version }} {{ version.date }} +{%- endfor %} + +### Readme +---------------------------------------------- +{{ item.readme | trim }} +---------------------------------------------- + +### Dependencies: +{% if item.dependencies -%} +{%- for dep in item.dependencies %} +=> /crates/{{ dep }} {{ dep }} +{%- endfor -%} +{%- else -%} +No dependencies +{%- endif -%} +{%- endif -%} +{% endblock content %} diff --git a/templates/index.gmi.tera b/templates/index.gmi.tera new file mode 100644 index 0000000..ae88ec8 --- /dev/null +++ b/templates/index.gmi.tera @@ -0,0 +1,22 @@ +{% extends "_base" %} +{% block content -%} +Index of {{ total_crates_indexed | separate_with_commas }} Rust libraries and applications. Lightweight, opinionated, curated, unofficial alternative to crates.io. +=> /about About + +To search for a crate, use the following path: +/search/ + +or if you you want to go directly to a crate info page, use: +/crates/ + +## Categories +{% for category in categories %} +### {{ category.name }} +{{ category.description -}} +{% for crate in category.crates %} +=> /crates/{{ crate }} {{ crate }} +{%- endfor %} +=> /category/{{ category.slug }} {{ category.more_count }} more... +{% endfor -%} + +{% endblock %} diff --git a/templates/search.gmi.tera b/templates/search.gmi.tera new file mode 100644 index 0000000..e07bfa8 --- /dev/null +++ b/templates/search.gmi.tera @@ -0,0 +1,20 @@ +{% extends "_base" %} + +{% block content -%} +## Search {{ criteria }} + +{% if error -%} +{{ error }} + +{% endif -%} +{%- if items -%} +### {{ items | length }} crates found: + +{% for item in items -%} +=> /crates/{{ item.name}} {{ item.name }} {{ item.version }} +{{ item.description }} +tags: {{ item.tags | join(sep=", ") }} + +{% endfor -%} +{%- endif -%} +{% endblock content %}