md2gemtext/src/lib.rs

511 lines
14 KiB
Rust

use comfy_table::Table;
use pulldown_cmark as md;
/// Converts a given string of Markdown to semi-equivalent gemtext.
///
/// # Panics
///
/// Will panic if gemtext::render somehow produces invalid UTF-8.
/// Since gemtext::render only produces valid UTF-8, this should never happen.
pub fn convert(markdown_text: &str) -> String {
let mut opts = md::Options::empty();
opts.toggle(md::Options::ENABLE_TABLES);
let parser = md::Parser::new_ext(markdown_text, opts);
let mut state = State::new();
for event in parser {
match event {
md::Event::Start(md::Tag::Paragraph) => (),
md::Event::Start(md::Tag::Heading(level)) => state.start_heading(level),
md::Event::Start(md::Tag::BlockQuote) => state.start_block_quote(),
md::Event::Start(md::Tag::CodeBlock(_)) => state.start_code_block(),
md::Event::Start(md::Tag::List(_)) => state.start_list(),
md::Event::Start(md::Tag::Item) => state.start_list_item(),
md::Event::Start(md::Tag::FootnoteDefinition(_)) => {
unimplemented!("footnotes disabled")
}
md::Event::Start(md::Tag::Table(_)) => state.start_table_building(),
md::Event::Start(md::Tag::TableHead) => state.add_table_row(),
md::Event::Start(md::Tag::TableRow) => state.add_table_row(),
md::Event::Start(md::Tag::TableCell) => state.add_table_cell(),
md::Event::Start(md::Tag::Emphasis) => state.toggle_emphasis(),
md::Event::Start(md::Tag::Strong) => state.toggle_strong(),
md::Event::Start(md::Tag::Strikethrough) => unimplemented!("strikethrough disabled"),
md::Event::Start(md::Tag::Link(_, _, _)) => state.start_link(),
md::Event::Start(md::Tag::Image(_, _, _)) => state.start_image(),
md::Event::End(md::Tag::Paragraph) => state.finish_node(),
md::Event::End(md::Tag::Heading(_)) => state.finish_node(),
md::Event::End(md::Tag::BlockQuote) => (),
md::Event::End(md::Tag::CodeBlock(_)) => state.finish_node(),
md::Event::End(md::Tag::List(_)) => state.finish_list(),
md::Event::End(md::Tag::Item) => state.finish_item(),
md::Event::End(md::Tag::FootnoteDefinition(_)) => unimplemented!("footnotes disabled"),
md::Event::End(md::Tag::Table(_)) => state.finish_table_building(),
md::Event::End(md::Tag::TableHead) => (),
md::Event::End(md::Tag::TableRow) => (),
md::Event::End(md::Tag::TableCell) => (),
md::Event::End(md::Tag::Emphasis) => state.toggle_emphasis(),
md::Event::End(md::Tag::Strong) => state.toggle_strong(),
md::Event::End(md::Tag::Strikethrough) => unimplemented!("strikethrough disabled"),
md::Event::End(md::Tag::Link(_, href, _)) => state.end_link(&href),
md::Event::End(md::Tag::Image(_, src, _)) => state.end_image(&src),
md::Event::Text(text) => state.add_text(&text),
md::Event::Code(code) => state.add_inline_code(&code),
md::Event::Html(html) => state.add_text(&html),
md::Event::FootnoteReference(_) => unimplemented!("footnotes disabled"),
md::Event::SoftBreak => state.add_text(" "),
md::Event::HardBreak => state.finish_node(),
md::Event::Rule => state.add_rule(),
md::Event::TaskListMarker(_) => unimplemented!("task lists disabled"),
}
}
let nodes = state
.nodes
.into_iter()
.filter(|cluster| !cluster.is_empty())
.map(condense)
.collect::<Vec<_>>()
.join(&gemtext::Node::blank());
let mut result: Vec<u8> = vec![];
gemtext::render(nodes, &mut result).expect("gemtext::render somehow failed");
String::from_utf8(result).expect("gemtext::render somehow produced invalid UTF-8")
}
type NodeCluster = Vec<gemtext::Node>;
fn condense(original: NodeCluster) -> NodeCluster {
match original.as_slice() {
[gemtext::Node::Text(text), gemtext::Node::Link {
name: Some(name), ..
}] if text == name => vec![original[1].clone()],
_ => original,
}
}
enum NodeType {
Text,
Preformatted,
Heading { level: u8 },
ListItem,
Quote,
}
impl NodeType {
fn take(&mut self) -> Self {
std::mem::replace(self, NodeType::Text)
}
fn construct(self, body: String) -> gemtext::Node {
use NodeType::*;
match self {
Text => gemtext::Node::Text(body),
Preformatted => gemtext::Node::Preformatted(body),
Heading { level } => gemtext::Node::Heading { level, body },
ListItem => gemtext::Node::ListItem(body),
Quote => gemtext::Node::Quote(body),
}
}
}
struct State {
nodes: Vec<NodeCluster>,
pending_node_content: String,
pending_node_type: NodeType,
pending_other: Vec<gemtext::Node>,
link_text_stack: Vec<String>,
table: Vec<Vec<String>>,
building_table: bool,
nested_list_level: Option<u8>,
list_items: Vec<String>,
}
impl State {
fn new() -> Self {
State {
nodes: vec![],
pending_node_content: String::new(),
pending_node_type: NodeType::Text,
pending_other: vec![],
link_text_stack: vec![],
table: vec![],
building_table: false,
nested_list_level: None,
list_items: vec![],
}
}
fn start_heading(&mut self, level: u32) {
let level = match level {
1 => 1,
2 => 2,
_ => 3,
};
self.pending_node_type = NodeType::Heading { level };
}
fn start_table_building(&mut self) {
self.building_table = true;
}
fn add_table_row(&mut self) {
self.table.push(vec![]);
}
fn add_table_cell(&mut self) {
if let Some(last) = self.table.last_mut() {
last.push(String::new());
}
}
fn start_block_quote(&mut self) {
self.pending_node_type = NodeType::Quote;
}
fn start_code_block(&mut self) {
if !self.building_table {
self.pending_node_type = NodeType::Preformatted;
}
}
fn start_list(&mut self) {
let level = match self.nested_list_level {
Some(n) => n + 1,
None => 0,
};
self.nested_list_level = Some(level);
}
fn start_list_item(&mut self) {
self.list_items.push(String::new());
}
fn toggle_emphasis(&mut self) {
if !self.building_table {
self.add_text("_");
}
}
fn toggle_strong(&mut self) {
if !self.building_table {
self.add_text("**");
}
}
fn start_link(&mut self) {
self.link_text_stack.push(String::new());
}
fn start_image(&mut self) {
self.link_text_stack.push(String::new());
self.pending_node_content += "[image: ";
}
fn finish_item(&mut self) {
if self.nested_list_level.is_none() {
self.finish_node();
}
}
fn finish_table_building(&mut self) {
let mut table = Table::new();
if let Some(header) = self.table.first() {
table.set_header(header);
}
table.add_rows(self.table[1..].into_iter());
self.building_table = false;
self.table = vec![];
self.pending_node_type = NodeType::Preformatted;
self.pending_node_content += &table.to_string();
self.finish_node();
}
fn finish_list(&mut self) {
let level = match self.nested_list_level {
Some(0) => {
for item in self.list_items.clone() {
self.pending_node_type = NodeType::ListItem;
self.pending_node_content = item;
self.finish_node();
}
self.list_items.clear();
self.force_links();
None
}
Some(n) => Some(n - 1),
None => unreachable!("How can you finish a list without level?"),
};
self.nested_list_level = level;
}
fn end_link(&mut self, href: &str) {
let text = if self.nested_list_level.is_some() {
href.to_string()
} else {
self.link_text_stack
.pop()
.unwrap_or_else(|| href.to_string())
};
self.pending_other.push(gemtext::Node::Link {
to: href.to_string(),
name: Some(text),
});
}
fn end_image(&mut self, src: &str) {
let text = self
.link_text_stack
.pop()
.unwrap_or_else(|| src.to_string());
let text = format!("[image: {}]", text);
self.pending_other.push(gemtext::Node::Link {
to: src.to_string(),
name: Some(text),
});
self.pending_node_content += "]";
}
fn force_links(&mut self) {
let last_cluster = self.nodes.last_mut().expect("empty cluster list??");
last_cluster.extend(self.pending_other.drain(..));
}
// will create an empty paragraph if pending_text is empty
fn finish_node(&mut self) {
match (
&self.pending_node_type,
self.nodes.last().and_then(|cluster| cluster.last()),
) {
(NodeType::ListItem, Some(gemtext::Node::ListItem(_))) => (),
_ => self.nodes.push(vec![]),
}
let node_text = self.pending_node_content.trim_end().to_string();
let new_node = self.pending_node_type.take().construct(node_text);
let last_cluster = self.nodes.last_mut().expect("empty cluster list??");
last_cluster.push(new_node);
if self.nested_list_level.is_none() {
last_cluster.extend(self.pending_other.drain(..));
}
self.pending_node_content = String::new();
}
fn add_text(&mut self, text: &str) {
if self.nested_list_level.is_some() {
if let Some(last) = self.list_items.last_mut() {
last.push_str(text);
}
} else if self.building_table {
if let Some(last_row) = self.table.last_mut() {
if let Some(last_cell) = last_row.last_mut() {
last_cell.push_str(&text.split("<br>").collect::<Vec<&str>>().join("\n"));
}
}
} else {
for link_text in &mut self.link_text_stack {
*link_text += text;
}
self.pending_node_content += text;
}
}
fn add_inline_code(&mut self, code: &str) {
if self.building_table {
if let Some(last_row) = self.table.last_mut() {
if let Some(last_cell) = last_row.last_mut() {
last_cell.push_str(
&code
.replace("\\_", "_")
.split("<br>")
.collect::<Vec<&str>>()
.join("\n"),
);
}
}
} else if self.nested_list_level.is_some() {
if let Some(last) = self.list_items.last_mut() {
last.push_str("`");
last.push_str(code);
last.push_str("`");
}
} else {
self.pending_node_content += "`";
self.pending_node_content += code;
self.pending_node_content += "`";
}
}
fn add_rule(&mut self) {
self.add_text("-----");
self.finish_node();
}
}
#[cfg(test)]
#[test]
fn test_kitchen_sink() {
let markdown_demo = r#"
# h1
## h2
### h3
---
```
sample
text
```
> implying
1. don't pick up the phone
2. don't let him in
3. don't be his friend
some `code` and some `` fancy`code `` and *italics*
and __bold__ and ***semi-overlapping* bold *and* italics**
this [paragraph](http://example.com) has [several links](http://example.org)
and an ![inline image](a://url) in it
![this one's just an image](https://placekitten.com/200/300)
"#;
let gemtext_demo = r#"# h1
## h2
### h3
-----
```
sample
text
```
> implying
* don't pick up the phone
* don't let him in
* don't be his friend
some `code` and some `fancy`code` and _italics_ and **bold** and **_semi-overlapping_ bold _and_ italics**
this paragraph has several links and an [image: inline image] in it
=> http://example.com paragraph
=> http://example.org several links
=> a://url [image: inline image]
=> https://placekitten.com/200/300 [image: this one's just an image]
"#;
assert_eq!(convert(markdown_demo), gemtext_demo);
}
#[cfg(test)]
#[test]
fn test_list_start() {
let markdown = "> hi\n\n1. uh\n2. ah\n";
let gemtext = "> hi\n\n* uh\n* ah\n";
assert_eq!(convert(markdown), gemtext);
}
#[cfg(test)]
#[test]
fn test_readme() {
let markdown = include_str!("../README.md");
let gemtext = include_str!("../README.gmi");
assert_eq!(convert(markdown), gemtext);
}
#[cfg(test)]
#[test]
fn test_single_table() {
let markdown = r#"
| Column 1 | Col 2 |
| -------- | ----- |
| ten | 10 |
| veinte | 20 |
"#;
let gemtext = r#"
```
+----------+-------+
| Column 1 | Col 2 |
+==================+
| ten | 10 |
|----------+-------|
| veinte | 20 |
+----------+-------+
```
"#;
assert_eq!(convert(markdown).trim(), gemtext.trim());
}
#[cfg(test)]
#[test]
fn test_multi_tables() {
let markdown = r#"
| Column 1 | Col 2 |
| -------- | ----- |
| ten | 10 |
| veinte | 20 |
| Column 1 | Col 2 |
| -------- | ----- |
| 30 | 40 |
| 50 | 60 |
"#;
let gemtext = r#"
```
+----------+-------+
| Column 1 | Col 2 |
+==================+
| ten | 10 |
|----------+-------|
| veinte | 20 |
+----------+-------+
```
```
+----------+-------+
| Column 1 | Col 2 |
+==================+
| 30 | 40 |
|----------+-------|
| 50 | 60 |
+----------+-------+
```
"#;
assert_eq!(convert(markdown).trim(), gemtext.trim());
}
#[cfg(test)]
#[test]
fn test_nested_list() {
let markdown = r#"
- item 1
- item 2
- subitem 2.1
- subitem [2.2](https://example.com)
- subitem [2.3](https://example.com)
- item 3
"#;
let gemtext = r#"
* item 1
* item 2
* subitem 2.1
* subitem 2.2
* subitem 2.3
* item 3
=> https://example.com https://example.com
=> https://example.com https://example.com
"#;
assert_eq!(convert(markdown).trim(), gemtext.trim());
}