write the library
commit
33fb649a36
|
@ -0,0 +1,10 @@
|
||||||
|
image: alpine/latest
|
||||||
|
packages:
|
||||||
|
- rust
|
||||||
|
- cargo
|
||||||
|
sources:
|
||||||
|
- https://git.sr.ht/~boringcactus/md2gemtext
|
||||||
|
tasks:
|
||||||
|
- test: |
|
||||||
|
cd md2gemtext
|
||||||
|
cargo test
|
|
@ -0,0 +1 @@
|
||||||
|
/target
|
|
@ -0,0 +1,55 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "gemtext"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "git+https://tulpa.dev/boringcactus/maj.git?branch=make-gemtext-node-clone#c07d81077a46f618edb38f4c3b482802d377818c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "md2gemtext"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"gemtext 0.2.0 (git+https://tulpa.dev/boringcactus/maj.git?branch=make-gemtext-node-clone)",
|
||||||
|
"pulldown-cmark 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pulldown-cmark"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicase"
|
||||||
|
version = "2.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"version_check 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||||
|
"checksum gemtext 0.2.0 (git+https://tulpa.dev/boringcactus/maj.git?branch=make-gemtext-node-clone)" = "<none>"
|
||||||
|
"checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
|
||||||
|
"checksum pulldown-cmark 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8"
|
||||||
|
"checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
|
||||||
|
"checksum version_check 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
|
|
@ -0,0 +1,11 @@
|
||||||
|
[package]
|
||||||
|
name = "md2gemtext"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Melody Horn <melody@boringcactus.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
gemtext = { version = "0.2.0", git = "https://tulpa.dev/boringcactus/maj.git", branch = "make-gemtext-node-clone" }
|
||||||
|
pulldown-cmark = { version = "0.8", default-features = false }
|
|
@ -0,0 +1,35 @@
|
||||||
|
# md2gemtext
|
||||||
|
|
||||||
|
[![builds.sr.ht status](https://builds.sr.ht/~boringcactus/md2gemtext.svg)](https://builds.sr.ht/~boringcactus/md2gemtext?)
|
||||||
|
|
||||||
|
for converting Markdown into [gemtext](https://gemini.circumlunar.space/docs/specification.html).
|
||||||
|
|
||||||
|
## standalone usage
|
||||||
|
|
||||||
|
```shell script
|
||||||
|
cargo install md2gemtext
|
||||||
|
md2gemtext /path/to/some/file.md /path/to/some/file.gmi
|
||||||
|
```
|
||||||
|
|
||||||
|
## library usage
|
||||||
|
|
||||||
|
```rust
|
||||||
|
let gemtext = md2gemtext::convert("some markdown")?;
|
||||||
|
```
|
||||||
|
|
||||||
|
## translation rules
|
||||||
|
|
||||||
|
- "thematic breaks" (hr tags) are translated to `-----` on a line by itself
|
||||||
|
- headings turn into headings, levels beyond 3 get capped at 3
|
||||||
|
- code blocks get turned into code blocks (info strings are discarded)
|
||||||
|
- something happens to HTML, i forget what
|
||||||
|
- paragraphs get empty lines between them, because i think that looks better
|
||||||
|
- block quotes get turned into quotes
|
||||||
|
- lists get turned into lists ("loose lists" probably misbehave, nested lists *definitely* misbehave, numbering is not preserved)
|
||||||
|
- `code spans` turn into `\`code spans\``
|
||||||
|
- *italics* turn into `_italics_`
|
||||||
|
- **bold** turns into `**bold**`
|
||||||
|
- ~~strikethrough~~ turns into `~~strikethrough~~`
|
||||||
|
- `a [link](a://url) with context` turns into `a link with context` followed by `=> a://url link`
|
||||||
|
- `an ![inline](a://url) image` turns into `an [image: inline] image` followed by `=> a://url [image: inline]`
|
||||||
|
- if a link or image is its own paragraph, it becomes just the gemtext link, to not be redundant
|
|
@ -0,0 +1,215 @@
|
||||||
|
use pulldown_cmark as md;
|
||||||
|
|
||||||
|
/// Converts a given string of Markdown to semi-equivalent gemtext.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
///
|
||||||
|
/// Will panic if gemtext::render somehow produces invalid UTF-8.
|
||||||
|
/// Since gemtext::render only produces valid UTF-8, this should never happen.
|
||||||
|
pub fn convert(markdown_text: &str) -> String {
|
||||||
|
let mut options = md::Options::empty();
|
||||||
|
options.insert(md::Options::ENABLE_STRIKETHROUGH);
|
||||||
|
let parser = md::Parser::new_ext(markdown_text, options);
|
||||||
|
let mut state = State::new();
|
||||||
|
|
||||||
|
for event in parser {
|
||||||
|
println!("{:?}", event);
|
||||||
|
match event {
|
||||||
|
md::Event::Start(md::Tag::Paragraph) => (),
|
||||||
|
md::Event::Start(md::Tag::Heading(level)) => state.start_heading(level),
|
||||||
|
md::Event::Start(md::Tag::BlockQuote) => state.start_block_quote(),
|
||||||
|
md::Event::Start(md::Tag::CodeBlock(_)) => state.start_code_block(),
|
||||||
|
md::Event::Start(md::Tag::List(_)) => (),
|
||||||
|
md::Event::Start(md::Tag::Item) => state.start_list_item(),
|
||||||
|
md::Event::Start(md::Tag::FootnoteDefinition(_)) => unimplemented!("footnotes disabled"),
|
||||||
|
md::Event::Start(md::Tag::Table(_)) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::Start(md::Tag::TableHead) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::Start(md::Tag::TableRow) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::Start(md::Tag::TableCell) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::Start(md::Tag::Emphasis) => state.toggle_emphasis(),
|
||||||
|
md::Event::Start(md::Tag::Strong) => state.toggle_strong(),
|
||||||
|
md::Event::Start(md::Tag::Strikethrough) => state.toggle_strikethrough(),
|
||||||
|
md::Event::Start(md::Tag::Link(_, _, _)) => state.start_link(),
|
||||||
|
md::Event::Start(md::Tag::Image(_, _, _)) => state.start_image(),
|
||||||
|
|
||||||
|
md::Event::End(md::Tag::Paragraph) => state.finish_node(),
|
||||||
|
md::Event::End(md::Tag::Heading(_)) => state.finish_node(),
|
||||||
|
md::Event::End(md::Tag::BlockQuote) => state.finish_node(),
|
||||||
|
md::Event::End(md::Tag::CodeBlock(_)) => state.finish_node(),
|
||||||
|
md::Event::End(md::Tag::List(_)) => state.finish_list(),
|
||||||
|
md::Event::End(md::Tag::Item) => state.finish_node(),
|
||||||
|
md::Event::End(md::Tag::FootnoteDefinition(_)) => unimplemented!("footnotes disabled"),
|
||||||
|
md::Event::End(md::Tag::Table(_)) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::End(md::Tag::TableHead) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::End(md::Tag::TableRow) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::End(md::Tag::TableCell) => unimplemented!("tables disabled"),
|
||||||
|
md::Event::End(md::Tag::Emphasis) => state.toggle_emphasis(),
|
||||||
|
md::Event::End(md::Tag::Strong) => state.toggle_strong(),
|
||||||
|
md::Event::End(md::Tag::Strikethrough) => state.toggle_strikethrough(),
|
||||||
|
md::Event::End(md::Tag::Link(_, href, _)) => state.end_link(&href),
|
||||||
|
md::Event::End(md::Tag::Image(_, src, _)) => state.end_image(&src),
|
||||||
|
|
||||||
|
md::Event::Text(text) => state.add_text(&text),
|
||||||
|
md::Event::Code(code) => state.add_inline_code(&code),
|
||||||
|
md::Event::Html(html) => state.add_text(&html),
|
||||||
|
md::Event::FootnoteReference(_) => unimplemented!("footnotes disabled"),
|
||||||
|
md::Event::SoftBreak => state.add_text(" "),
|
||||||
|
md::Event::HardBreak => state.finish_node(),
|
||||||
|
md::Event::Rule => state.add_rule(),
|
||||||
|
md::Event::TaskListMarker(_) => unimplemented!("task lists disabled"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let nodes = state.nodes
|
||||||
|
.into_iter()
|
||||||
|
.map(condense)
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(&gemtext::Node::blank());
|
||||||
|
let mut result: Vec<u8> = vec![];
|
||||||
|
gemtext::render(nodes, &mut result).expect("gemtext::render somehow failed");
|
||||||
|
String::from_utf8(result).expect("gemtext::render somehow produced invalid UTF-8")
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeCluster = Vec<gemtext::Node>;
|
||||||
|
|
||||||
|
fn condense(original: NodeCluster) -> NodeCluster {
|
||||||
|
match original.as_slice() {
|
||||||
|
[gemtext::Node::Text(text), gemtext::Node::Link { name: Some(name), .. }] if text == name => vec![original[1].clone()],
|
||||||
|
_ => original,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum NodeType {
|
||||||
|
Text,
|
||||||
|
Preformatted,
|
||||||
|
Heading { level: u8 },
|
||||||
|
ListItem,
|
||||||
|
Quote,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NodeType {
|
||||||
|
fn take(&mut self) -> Self {
|
||||||
|
std::mem::replace(self, NodeType::Text)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn construct(self, body: String) -> gemtext::Node {
|
||||||
|
use NodeType::*;
|
||||||
|
match self {
|
||||||
|
Text => gemtext::Node::Text(body),
|
||||||
|
Preformatted => gemtext::Node::Preformatted(body),
|
||||||
|
Heading { level } => gemtext::Node::Heading { level, body },
|
||||||
|
ListItem => gemtext::Node::ListItem(body),
|
||||||
|
Quote => gemtext::Node::Quote(body),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct State {
|
||||||
|
nodes: Vec<NodeCluster>,
|
||||||
|
pending_node_content: String,
|
||||||
|
pending_node_type: NodeType,
|
||||||
|
pending_other: Vec<gemtext::Node>,
|
||||||
|
link_text_stack: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl State {
|
||||||
|
fn new() -> Self {
|
||||||
|
State {
|
||||||
|
nodes: vec![],
|
||||||
|
pending_node_content: String::new(),
|
||||||
|
pending_node_type: NodeType::Text,
|
||||||
|
pending_other: vec![],
|
||||||
|
link_text_stack: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_heading(&mut self, level: u32) {
|
||||||
|
let level = match level {
|
||||||
|
1 => 1,
|
||||||
|
2 => 2,
|
||||||
|
_ => 3,
|
||||||
|
};
|
||||||
|
self.pending_node_type = NodeType::Heading { level };
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_block_quote(&mut self) {
|
||||||
|
self.pending_node_type = NodeType::Quote;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_code_block(&mut self) {
|
||||||
|
self.pending_node_type = NodeType::Preformatted;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_list_item(&mut self) {
|
||||||
|
self.pending_node_type = NodeType::ListItem;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn toggle_emphasis(&mut self) {
|
||||||
|
self.add_text("_");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn toggle_strong(&mut self) {
|
||||||
|
self.add_text("**");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn toggle_strikethrough(&mut self) {
|
||||||
|
self.add_text("~~");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_link(&mut self) {
|
||||||
|
self.link_text_stack.push(String::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_image(&mut self) {
|
||||||
|
self.link_text_stack.push(String::new());
|
||||||
|
self.pending_node_content += "[image: ";
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finish_list(&mut self) {
|
||||||
|
self.nodes.push(vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_link(&mut self, href: &str) {
|
||||||
|
let text = self.link_text_stack.pop().unwrap_or_else(|| href.to_string());
|
||||||
|
self.pending_other.push(gemtext::Node::Link { to: href.to_string(), name: Some(text) });
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_image(&mut self, src: &str) {
|
||||||
|
let text = self.link_text_stack.pop().unwrap_or_else(|| src.to_string());
|
||||||
|
let text = format!("[image: {}]", text);
|
||||||
|
self.pending_other.push(gemtext::Node::Link { to: src.to_string(), name: Some(text) });
|
||||||
|
self.pending_node_content += "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
// will create an empty paragraph if pending_text is empty
|
||||||
|
fn finish_node(&mut self) {
|
||||||
|
match (&self.pending_node_type, self.nodes.last().and_then(|cluster| cluster.last())) {
|
||||||
|
(NodeType::ListItem, Some(gemtext::Node::ListItem(_))) => (),
|
||||||
|
_ => self.nodes.push(vec![]),
|
||||||
|
}
|
||||||
|
let node_text = self.pending_node_content.trim().to_string();
|
||||||
|
let new_node = self.pending_node_type.take().construct(node_text);
|
||||||
|
let last_cluster = self.nodes.last_mut().expect("empty cluster list??");
|
||||||
|
last_cluster.push(new_node);
|
||||||
|
last_cluster.extend(self.pending_other.drain(..));
|
||||||
|
|
||||||
|
self.pending_node_content = String::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_text(&mut self, text: &str) {
|
||||||
|
for link_text in &mut self.link_text_stack {
|
||||||
|
*link_text += text;
|
||||||
|
}
|
||||||
|
self.pending_node_content += text;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_inline_code(&mut self, code: &str) {
|
||||||
|
self.pending_node_content += "`";
|
||||||
|
self.pending_node_content += code;
|
||||||
|
self.pending_node_content += "`";
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_rule(&mut self) {
|
||||||
|
self.add_text("-----");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
use std::env;
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args = env::args_os().collect::<Vec<_>>();
|
||||||
|
match args.as_slice() {
|
||||||
|
[_, source_file, dest_file] => {
|
||||||
|
let source_text = fs::read_to_string(source_file).expect("couldn't read source file");
|
||||||
|
let result_text = md2gemtext::convert(&source_text);
|
||||||
|
fs::write(dest_file, result_text).expect("couldn't write dest file");
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
eprintln!("usage: md2gemtext <source (markdown) file path> <dest (gemtext) file path>")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue