Initial commit

main
kirbylife 2024-09-29 01:08:53 -06:00
commit 1a7108f9f9
14 changed files with 517 additions and 0 deletions

4
.gitignore vendored 100644
View File

@ -0,0 +1,4 @@
/target
Cargo.lock
*.pem

17
Cargo.toml 100644
View File

@ -0,0 +1,17 @@
[package]
name = "librs_gemini"
version = "0.1.0"
edition = "2021"
[dependencies]
cached = { version = "0.53.1", features = ["serde"] }
glob = "0.3.1"
htmd = "0.1.6"
raspa = { git = "https://git.kirbylife.dev/kirbylife/raspa" }
serde = { version = "1.0.210", features = ["derive"] }
tera = "1.20.0"
tokio = { version = "1.40.0", features = ["full"] }
windmark = { version = "0.3.11", features = ["logger", "response-macros"] }
md2gemtext = { git = "https://git.kirbylife.dev/kirbylife/md2gemtext" }
urlencoding = "2.1.3"
tera_thousands = "0.1.0"

10
src/category.rs 100644
View File

@ -0,0 +1,10 @@
use serde::Serialize;
#[derive(Serialize, Debug, Clone)]
pub struct Category {
pub name: String,
pub slug: String,
pub description: String,
pub crates: Vec<String>,
pub more_count: u16,
}

3
src/consts.rs 100644
View File

@ -0,0 +1,3 @@
pub const HOME_URL: &'static str = "https://lib.rs/";
pub const CRITERIA_SEARCH_URL: &'static str = "https://lib.rs/search";
pub const CRATE_INFO_URL: &'static str = "https://lib.rs/crates";

17
src/crate_info.rs 100644
View File

@ -0,0 +1,17 @@
use crate::version::Version;
use serde::Serialize;
#[derive(Serialize, Debug, Clone)]
pub struct CrateInfo {
pub name: String,
pub description: String,
pub owner: String,
pub latest_version: Version,
pub versions: Vec<Version>,
pub readme: Option<String>,
pub license: Option<String>,
pub git_repo: Option<String>,
pub api_reference: Option<String>,
pub website: Option<String>,
pub dependencies: Vec<String>,
}

88
src/main.rs 100644
View File

@ -0,0 +1,88 @@
mod category;
mod consts;
mod crate_info;
mod scraper;
mod search_item;
mod version;
use scraper::{get_home_page, search_by_criteria, search_crate};
use tera::Tera;
use tokio::sync::OnceCell;
static TERA: OnceCell<Tera> = OnceCell::const_new();
async fn render<S: AsRef<str>>(name: S, context: &tera::Context) -> Result<String, ()> {
let tera = TERA
.get_or_init(|| async {
let mut tera = Tera::default();
tera.register_filter("separate_with_commas", tera_thousands::separate_with_commas);
for path in glob::glob("templates/*").unwrap().flatten() {
let raw_path = path.clone();
let filename = raw_path
.file_name()
.unwrap() // This should be safe (?
.to_str()
.unwrap() // ._.?
.split('.')
.next();
tera.add_template_file(path, filename).unwrap()
}
tera
})
.await;
tera.render(name.as_ref(), context).map_err(|_| ())
}
#[windmark::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
windmark::router::Router::new()
.set_private_key_file("key.pem")
.set_certificate_file("cert.pem")
.set_port(1971)
.enable_default_logger(true)
.set_fix_path(true)
.mount("/", move |_| async {
let mut context = tera::Context::new();
let (total_crates_indexed, categories) = get_home_page();
context.insert("categories", &categories);
context.insert("total_crates_indexed", &total_crates_indexed);
windmark::response::Response::success(render("index", &context).await.unwrap())
})
.mount("/about", move |_| async {
let context = tera::Context::new();
windmark::response::Response::success(render("about", &context).await.unwrap())
})
.mount("/search/:criteria", move |request| async move {
let mut context = tera::Context::new();
let criteria = urlencoding::decode(request.parameters.get("criteria").unwrap())
.unwrap()
.to_string();
let (error, items) = match search_by_criteria(criteria.to_string()) {
None => (Some("Nothing found :("), None),
Some(items) => (None, Some(items)),
};
context.insert("criteria", &criteria);
context.insert("error", &error);
context.insert("items", &items);
windmark::response::Response::success(render("search", &context).await.unwrap())
})
.mount("/crates/:crate_name", move |request| async move {
let mut context = tera::Context::new();
let crate_name = request.parameters.get("crate_name").unwrap();
let (error, crate_info) = match search_crate(crate_name.to_string()) {
None => (Some("Crate not found"), None),
Some(crate_info) => (None, Some(crate_info)),
};
context.insert("error", &error);
context.insert("item", &crate_info);
windmark::response::Response::success(render("crate", &context).await.unwrap())
})
.run()
.await
}

218
src/scraper.rs 100644
View File

@ -0,0 +1,218 @@
use crate::category::Category;
use crate::crate_info::CrateInfo;
use crate::search_item::SearchItem;
use crate::{consts, version::Version};
use cached::proc_macro::cached;
use htmd::HtmlToMarkdown;
use raspa::{
request::{Request, RequestBase},
selector::SelectorBase,
};
#[cached(time = 62400)]
pub fn get_home_page() -> (u32, Vec<Category>) {
let resp = Request::new(consts::HOME_URL).unwrap().launch();
let raw_header_text = resp.css_once(".inner-col p").unwrap().content();
let total_crates_indexed = raw_header_text
.split(' ')
.nth(2)
.unwrap()
.replace('.', "")
.replace(',', "")
.parse()
.unwrap();
let categoryes = resp
.css("ul.cat > li")
.iter()
.filter_map(|elem| {
if elem.css_once("h3").is_none() {
return None;
}
let title = elem.css_once("h3").unwrap().content();
let slug = elem
.css_once("a")
.unwrap()
.attr("href")
.unwrap()
.strip_prefix('/')
.unwrap()
.to_string();
let desc = elem.css_once("span.desc").unwrap().content();
let mut crates = elem
.css("ul.crates > li")
.iter()
.map(|e| e.css_once("a").unwrap().content())
.collect::<Vec<String>>();
let more_count = crates
.pop()
.unwrap()
.split(" ")
.next()
.unwrap()
.parse()
.unwrap();
Some(Category {
name: title,
slug,
description: desc,
crates,
more_count,
})
})
.collect::<Vec<Category>>();
(total_crates_indexed, categoryes)
}
#[cached(time = 3600)]
pub fn search_by_criteria(criteria: String) -> Option<Vec<SearchItem>> {
let resp = Request::new(consts::CRITERIA_SEARCH_URL)
.unwrap()
.add_params(vec![("q", criteria)])
.launch();
if resp.css_once(".notfound").is_some() {
None
} else {
Some(
resp.css(".inner-col ol li")
.iter()
.map(|elem| {
let name = elem.css_once("h4").unwrap().content();
let description = elem.css_once(".desc").unwrap().content().trim().into();
let version = elem.css_once(".version").unwrap().content();
let tags = elem
.css(".k")
.iter()
.map(|tag| tag.content())
.collect::<Vec<String>>();
let search_item = SearchItem::new(name, description, version, tags);
search_item
})
.collect::<Vec<SearchItem>>(),
)
}
}
#[cached(time = 3600)]
pub fn search_crate(crate_name: String) -> Option<CrateInfo> {
let resp = Request::new(format!("{}/{}", consts::CRATE_INFO_URL, crate_name))
.unwrap()
.launch();
if resp.css_once(".notfound").is_some() {
None
} else {
let description = resp
.css_once(".desc")
.unwrap()
.content()
.trim()
.replace('\n', " ")
.into();
let owner = resp
.css("a.owner")
.iter()
.next()
.unwrap()
.css_once("span")
.unwrap()
.content();
let versions = resp
.css("#versions tr")
.iter()
.filter_map(|elem| {
if elem.css_once("del").is_some() {
None
} else if elem.css_once(".new").is_some() {
let version = elem
.css_once("a")
.unwrap()
.content()
.lines()
.nth(2)
.unwrap()
.trim()
.to_string();
let date = elem
.css_once("*")
.unwrap()
.content()
.lines()
.nth(2)
.unwrap()
.trim()
.to_string();
Some(Version::new(version, date))
} else {
let raw_data = elem.css_once("*").unwrap().content();
let mut elements = raw_data.lines();
let mut version = elements.nth(1).unwrap().to_string();
if version.contains("new") {
version = elements.nth(0).unwrap().to_string();
}
let date = elements.nth(1).unwrap().to_string();
Some(Version::new(version, date))
}
})
.collect::<Vec<Version>>();
let latest_version = versions.first().unwrap().clone();
let readme = resp.css_once("#readme").map(|elem| {
let converter = HtmlToMarkdown::builder()
.skip_tags(vec![
"h1",
"img",
"picture",
"figcaption",
"source",
"figure",
"svg",
])
.build();
let md_readme = converter.convert(elem.html().as_ref()).unwrap();
md2gemtext::convert(&md_readme)
});
let license = resp
.css_once("b[property='license']")
.map(|elem| elem.content());
let mut git_repo = None;
let mut api_reference = None;
let mut website = None;
for li in resp.css("header nav li") {
let a = li.css_once("a").unwrap();
let href = a.attr("href");
let content = a.html();
if content.contains("Git") || content.contains("Repository") {
git_repo = href;
} else if content.contains("API reference") {
api_reference = href;
} else if content.contains("Home") {
website = href;
}
}
let dependencies = resp
.css("#deps li[property='requirements'] a[title*='1']")
.iter()
.map(|elem| elem.content())
.collect::<Vec<String>>();
Some(CrateInfo {
name: crate_name,
description,
owner,
latest_version,
versions,
readme,
license,
git_repo,
api_reference,
website,
dependencies,
})
}
}

20
src/search_item.rs 100644
View File

@ -0,0 +1,20 @@
use serde::Serialize;
#[derive(Serialize, Debug, Clone)]
pub struct SearchItem {
pub name: String,
pub description: String,
pub version: String,
pub tags: Vec<String>,
}
impl SearchItem {
pub fn new(name: String, description: String, version: String, tags: Vec<String>) -> Self {
SearchItem {
name,
description,
version,
tags,
}
}
}

13
src/version.rs 100644
View File

@ -0,0 +1,13 @@
use serde::Serialize;
#[derive(Serialize, Debug, Clone)]
pub struct Version {
pub version: String,
pub date: String,
}
impl Version {
pub fn new(version: String, date: String) -> Self {
Version { version, date }
}
}

View File

@ -0,0 +1,12 @@
# lib.rs
=> / Home
{% block content -%}
{%- endblock content %}
This version of Lib.rs for the Gemini protocol was created by kirbylife, but all credit goes to kornelski, the original author of lib.rs.
Lib.rs is an unofficial list of Rust/Cargo crates. It contains data from multiple sources, including heuristics and manually curated data. The content of this page is not necessarily endorsed by the authors of the crate. This gemini capsule is not affiliated with nor endorsed by the Rust Project.
contact me trough:
=> mailto:hola@kirbylife.dev hola(at)kirbylife.dev
@kirbylife:matrix.org

View File

@ -0,0 +1,21 @@
# lib.rs
=> / Home
(Sorry in advance, I always suck at writing about)
This unofficial port of lib.rs for the Gemini protocol pulls information directly from the official website but it has no connection whatsoever with the original project or it's creator kornelski. I created this project to check crate details from Lagrange on my Haiku laptop and decided to open the portal to the rest of the Gemini space.
Although the data is fetched from lib.rs, I cache it for an hour to avoid putting stress on kornelski's server. This means that occasionally the data might not be 100% up to date, but in 99% of cases, it remains useful.
If you find that any crate information is displayed incorrectly, feel free to contact me via email, Mastodon or through Matrix chat:
=> mailto:hola@kirbylife.dev hola@kirbylife.dev [E-mail]
=> https://mstdn.mx/@kirbylife @kirbylife@mstdn.mx [Mastodon]
@kirbylife:matrix.org [Matrix]
Please dont report bad rendered README's as an error. This happens because the content goes through the following transformations:
Markdown (md) -> HTML
on the lib.rs server, and then on my server:
HTML -> Markdown -> Gemtext.
Some information gets lost along the way, which is to be expected.
This project is made with Rust using the Windmark framework and the code can be found in my gitea instance:
=> https://git.kirbylife.dev/kirbylife/librs_gemini librs_gemini Source code

View File

@ -0,0 +1,52 @@
{% extends "_base" %}
{% block content -%}
{% if error -%}
{{ error }}
{% endif -%}
{%- if item -%}
## {{ item.name }} v{{ item.latest_version.version }}
> {{ item.description }}
by {{ item.owner }} {% if item.license %}[{{ item.license }}]{% endif %}
### Install by:
Run this command in your project's directory:
```
cargo add {{ item.name }}
```
Or add the following line to your Cargo.toml
```
[dependencies]
{{ item.name }} = "{{ item.latest_version.version }}"
```
{% if item.api_reference -%}
=> {{ item.api_reference }} API reference
{%- endif %}
{% if item.git_repo -%}
=> {{ item.git_repo }} Git repo
{%- endif %}
{% if item.website -%}
=> {{ item.website }} Website
{%- endif %}
### Latest versions:
{%- for version in item.versions %}
* {{ version.version }} {{ version.date }}
{%- endfor %}
### Readme
----------------------------------------------
{{ item.readme | trim }}
----------------------------------------------
### Dependencies:
{% if item.dependencies -%}
{%- for dep in item.dependencies %}
=> /crates/{{ dep }} {{ dep }}
{%- endfor -%}
{%- else -%}
No dependencies
{%- endif -%}
{%- endif -%}
{% endblock content %}

View File

@ -0,0 +1,22 @@
{% extends "_base" %}
{% block content -%}
Index of {{ total_crates_indexed | separate_with_commas }} Rust libraries and applications. Lightweight, opinionated, curated, unofficial alternative to crates.io.
=> /about About
To search for a crate, use the following path:
/search/<term to search>
or if you you want to go directly to a crate info page, use:
/crates/<crate name>
## Categories
{% for category in categories %}
### {{ category.name }}
{{ category.description -}}
{% for crate in category.crates %}
=> /crates/{{ crate }} {{ crate }}
{%- endfor %}
=> /category/{{ category.slug }} {{ category.more_count }} more...
{% endfor -%}
{% endblock %}

View File

@ -0,0 +1,20 @@
{% extends "_base" %}
{% block content -%}
## Search {{ criteria }}
{% if error -%}
{{ error }}
{% endif -%}
{%- if items -%}
### {{ items | length }} crates found:
{% for item in items -%}
=> /crates/{{ item.name}} {{ item.name }} {{ item.version }}
{{ item.description }}
tags: {{ item.tags | join(sep=", ") }}
{% endfor -%}
{%- endif -%}
{% endblock content %}