From 33fb649a36dffe5bb52f213312e2ff9d3132275e Mon Sep 17 00:00:00 2001 From: Melody Horn Date: Mon, 5 Oct 2020 05:35:17 -0600 Subject: [PATCH] write the library --- .build.yml | 10 +++ .gitignore | 1 + Cargo.lock | 55 ++++++++++++++ Cargo.toml | 11 +++ README.md | 35 +++++++++ src/lib.rs | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 16 ++++ 7 files changed, 343 insertions(+) create mode 100644 .build.yml create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 src/lib.rs create mode 100644 src/main.rs diff --git a/.build.yml b/.build.yml new file mode 100644 index 0000000..adc428e --- /dev/null +++ b/.build.yml @@ -0,0 +1,10 @@ +image: alpine/latest +packages: + - rust + - cargo +sources: + - https://git.sr.ht/~boringcactus/md2gemtext +tasks: + - test: | + cd md2gemtext + cargo test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..c38dbe2 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,55 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "gemtext" +version = "0.2.0" +source = "git+https://tulpa.dev/boringcactus/maj.git?branch=make-gemtext-node-clone#c07d81077a46f618edb38f4c3b482802d377818c" + +[[package]] +name = "md2gemtext" +version = "0.1.0" +dependencies = [ + "gemtext 0.2.0 (git+https://tulpa.dev/boringcactus/maj.git?branch=make-gemtext-node-clone)", + "pulldown-cmark 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "pulldown-cmark" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "version_check 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "version_check" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +"checksum gemtext 0.2.0 (git+https://tulpa.dev/boringcactus/maj.git?branch=make-gemtext-node-clone)" = "" +"checksum memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" +"checksum pulldown-cmark 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8" +"checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +"checksum version_check 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..73b7902 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "md2gemtext" +version = "0.1.0" +authors = ["Melody Horn "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +gemtext = { version = "0.2.0", git = "https://tulpa.dev/boringcactus/maj.git", branch = "make-gemtext-node-clone" } +pulldown-cmark = { version = "0.8", default-features = false } diff --git a/README.md b/README.md new file mode 100644 index 0000000..314fb10 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +# md2gemtext + +[![builds.sr.ht status](https://builds.sr.ht/~boringcactus/md2gemtext.svg)](https://builds.sr.ht/~boringcactus/md2gemtext?) + +for converting Markdown into [gemtext](https://gemini.circumlunar.space/docs/specification.html). + +## standalone usage + +```shell script +cargo install md2gemtext +md2gemtext /path/to/some/file.md /path/to/some/file.gmi +``` + +## library usage + +```rust +let gemtext = md2gemtext::convert("some markdown")?; +``` + +## translation rules + +- "thematic breaks" (hr tags) are translated to `-----` on a line by itself +- headings turn into headings, levels beyond 3 get capped at 3 +- code blocks get turned into code blocks (info strings are discarded) +- something happens to HTML, i forget what +- paragraphs get empty lines between them, because i think that looks better +- block quotes get turned into quotes +- lists get turned into lists ("loose lists" probably misbehave, nested lists *definitely* misbehave, numbering is not preserved) +- `code spans` turn into `\`code spans\`` +- *italics* turn into `_italics_` +- **bold** turns into `**bold**` +- ~~strikethrough~~ turns into `~~strikethrough~~` +- `a [link](a://url) with context` turns into `a link with context` followed by `=> a://url link` +- `an ![inline](a://url) image` turns into `an [image: inline] image` followed by `=> a://url [image: inline]` +- if a link or image is its own paragraph, it becomes just the gemtext link, to not be redundant diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f989cf0 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,215 @@ +use pulldown_cmark as md; + +/// Converts a given string of Markdown to semi-equivalent gemtext. +/// +/// # Panics +/// +/// Will panic if gemtext::render somehow produces invalid UTF-8. +/// Since gemtext::render only produces valid UTF-8, this should never happen. +pub fn convert(markdown_text: &str) -> String { + let mut options = md::Options::empty(); + options.insert(md::Options::ENABLE_STRIKETHROUGH); + let parser = md::Parser::new_ext(markdown_text, options); + let mut state = State::new(); + + for event in parser { + println!("{:?}", event); + match event { + md::Event::Start(md::Tag::Paragraph) => (), + md::Event::Start(md::Tag::Heading(level)) => state.start_heading(level), + md::Event::Start(md::Tag::BlockQuote) => state.start_block_quote(), + md::Event::Start(md::Tag::CodeBlock(_)) => state.start_code_block(), + md::Event::Start(md::Tag::List(_)) => (), + md::Event::Start(md::Tag::Item) => state.start_list_item(), + md::Event::Start(md::Tag::FootnoteDefinition(_)) => unimplemented!("footnotes disabled"), + md::Event::Start(md::Tag::Table(_)) => unimplemented!("tables disabled"), + md::Event::Start(md::Tag::TableHead) => unimplemented!("tables disabled"), + md::Event::Start(md::Tag::TableRow) => unimplemented!("tables disabled"), + md::Event::Start(md::Tag::TableCell) => unimplemented!("tables disabled"), + md::Event::Start(md::Tag::Emphasis) => state.toggle_emphasis(), + md::Event::Start(md::Tag::Strong) => state.toggle_strong(), + md::Event::Start(md::Tag::Strikethrough) => state.toggle_strikethrough(), + md::Event::Start(md::Tag::Link(_, _, _)) => state.start_link(), + md::Event::Start(md::Tag::Image(_, _, _)) => state.start_image(), + + md::Event::End(md::Tag::Paragraph) => state.finish_node(), + md::Event::End(md::Tag::Heading(_)) => state.finish_node(), + md::Event::End(md::Tag::BlockQuote) => state.finish_node(), + md::Event::End(md::Tag::CodeBlock(_)) => state.finish_node(), + md::Event::End(md::Tag::List(_)) => state.finish_list(), + md::Event::End(md::Tag::Item) => state.finish_node(), + md::Event::End(md::Tag::FootnoteDefinition(_)) => unimplemented!("footnotes disabled"), + md::Event::End(md::Tag::Table(_)) => unimplemented!("tables disabled"), + md::Event::End(md::Tag::TableHead) => unimplemented!("tables disabled"), + md::Event::End(md::Tag::TableRow) => unimplemented!("tables disabled"), + md::Event::End(md::Tag::TableCell) => unimplemented!("tables disabled"), + md::Event::End(md::Tag::Emphasis) => state.toggle_emphasis(), + md::Event::End(md::Tag::Strong) => state.toggle_strong(), + md::Event::End(md::Tag::Strikethrough) => state.toggle_strikethrough(), + md::Event::End(md::Tag::Link(_, href, _)) => state.end_link(&href), + md::Event::End(md::Tag::Image(_, src, _)) => state.end_image(&src), + + md::Event::Text(text) => state.add_text(&text), + md::Event::Code(code) => state.add_inline_code(&code), + md::Event::Html(html) => state.add_text(&html), + md::Event::FootnoteReference(_) => unimplemented!("footnotes disabled"), + md::Event::SoftBreak => state.add_text(" "), + md::Event::HardBreak => state.finish_node(), + md::Event::Rule => state.add_rule(), + md::Event::TaskListMarker(_) => unimplemented!("task lists disabled"), + } + } + + let nodes = state.nodes + .into_iter() + .map(condense) + .collect::>() + .join(&gemtext::Node::blank()); + let mut result: Vec = vec![]; + gemtext::render(nodes, &mut result).expect("gemtext::render somehow failed"); + String::from_utf8(result).expect("gemtext::render somehow produced invalid UTF-8") +} + +type NodeCluster = Vec; + +fn condense(original: NodeCluster) -> NodeCluster { + match original.as_slice() { + [gemtext::Node::Text(text), gemtext::Node::Link { name: Some(name), .. }] if text == name => vec![original[1].clone()], + _ => original, + } +} + +enum NodeType { + Text, + Preformatted, + Heading { level: u8 }, + ListItem, + Quote, +} + +impl NodeType { + fn take(&mut self) -> Self { + std::mem::replace(self, NodeType::Text) + } + + fn construct(self, body: String) -> gemtext::Node { + use NodeType::*; + match self { + Text => gemtext::Node::Text(body), + Preformatted => gemtext::Node::Preformatted(body), + Heading { level } => gemtext::Node::Heading { level, body }, + ListItem => gemtext::Node::ListItem(body), + Quote => gemtext::Node::Quote(body), + } + } +} + +struct State { + nodes: Vec, + pending_node_content: String, + pending_node_type: NodeType, + pending_other: Vec, + link_text_stack: Vec, +} + +impl State { + fn new() -> Self { + State { + nodes: vec![], + pending_node_content: String::new(), + pending_node_type: NodeType::Text, + pending_other: vec![], + link_text_stack: vec![], + } + } + + fn start_heading(&mut self, level: u32) { + let level = match level { + 1 => 1, + 2 => 2, + _ => 3, + }; + self.pending_node_type = NodeType::Heading { level }; + } + + fn start_block_quote(&mut self) { + self.pending_node_type = NodeType::Quote; + } + + fn start_code_block(&mut self) { + self.pending_node_type = NodeType::Preformatted; + } + + fn start_list_item(&mut self) { + self.pending_node_type = NodeType::ListItem; + } + + fn toggle_emphasis(&mut self) { + self.add_text("_"); + } + + fn toggle_strong(&mut self) { + self.add_text("**"); + } + + fn toggle_strikethrough(&mut self) { + self.add_text("~~"); + } + + fn start_link(&mut self) { + self.link_text_stack.push(String::new()); + } + + fn start_image(&mut self) { + self.link_text_stack.push(String::new()); + self.pending_node_content += "[image: "; + } + + fn finish_list(&mut self) { + self.nodes.push(vec![]); + } + + fn end_link(&mut self, href: &str) { + let text = self.link_text_stack.pop().unwrap_or_else(|| href.to_string()); + self.pending_other.push(gemtext::Node::Link { to: href.to_string(), name: Some(text) }); + } + + fn end_image(&mut self, src: &str) { + let text = self.link_text_stack.pop().unwrap_or_else(|| src.to_string()); + let text = format!("[image: {}]", text); + self.pending_other.push(gemtext::Node::Link { to: src.to_string(), name: Some(text) }); + self.pending_node_content += "]"; + } + + // will create an empty paragraph if pending_text is empty + fn finish_node(&mut self) { + match (&self.pending_node_type, self.nodes.last().and_then(|cluster| cluster.last())) { + (NodeType::ListItem, Some(gemtext::Node::ListItem(_))) => (), + _ => self.nodes.push(vec![]), + } + let node_text = self.pending_node_content.trim().to_string(); + let new_node = self.pending_node_type.take().construct(node_text); + let last_cluster = self.nodes.last_mut().expect("empty cluster list??"); + last_cluster.push(new_node); + last_cluster.extend(self.pending_other.drain(..)); + + self.pending_node_content = String::new(); + } + + fn add_text(&mut self, text: &str) { + for link_text in &mut self.link_text_stack { + *link_text += text; + } + self.pending_node_content += text; + } + + fn add_inline_code(&mut self, code: &str) { + self.pending_node_content += "`"; + self.pending_node_content += code; + self.pending_node_content += "`"; + } + + fn add_rule(&mut self) { + self.add_text("-----"); + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..a237a11 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,16 @@ +use std::env; +use std::fs; + +fn main() { + let args = env::args_os().collect::>(); + match args.as_slice() { + [_, source_file, dest_file] => { + let source_text = fs::read_to_string(source_file).expect("couldn't read source file"); + let result_text = md2gemtext::convert(&source_text); + fs::write(dest_file, result_text).expect("couldn't write dest file"); + } + _ => { + eprintln!("usage: md2gemtext ") + } + } +}