markdown_encoder.rs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. use crate::core::{AttributeKey, AttributeValue, Attributes, OperationIterator, Operations};
  2. use crate::text_delta::{is_block, TextAttributeKey};
  3. use std::collections::HashMap;
  4. use std::str::FromStr;
  5. use strum_macros::EnumString;
  6. const LINEFEEDASCIICODE: i32 = 0x0A;
  7. #[cfg(test)]
  8. mod tests {
  9. use crate::codec::markdown::markdown_encoder::markdown_encoder;
  10. use crate::text_delta::TextDelta;
  11. #[test]
  12. fn markdown_encoder_header_1_test() {
  13. let json = r#"[{"insert":"header 1"},{"insert":"\n","attributes":{"header":1}}]"#;
  14. let delta = TextDelta::from_json(json).unwrap();
  15. let md = markdown_encoder(&delta);
  16. assert_eq!(md, "# header 1\n");
  17. }
  18. #[test]
  19. fn markdown_encoder_header_2_test() {
  20. let json = r#"[{"insert":"header 2"},{"insert":"\n","attributes":{"header":2}}]"#;
  21. let delta = TextDelta::from_json(json).unwrap();
  22. let md = markdown_encoder(&delta);
  23. assert_eq!(md, "## header 2\n");
  24. }
  25. #[test]
  26. fn markdown_encoder_header_3_test() {
  27. let json = r#"[{"insert":"header 3"},{"insert":"\n","attributes":{"header":3}}]"#;
  28. let delta = TextDelta::from_json(json).unwrap();
  29. let md = markdown_encoder(&delta);
  30. assert_eq!(md, "### header 3\n");
  31. }
  32. #[test]
  33. fn markdown_encoder_bold_italics_underlined_test() {
  34. let json = r#"[{"insert":"bold","attributes":{"bold":true}},{"insert":" "},{"insert":"italics","attributes":{"italic":true}},{"insert":" "},{"insert":"underlined","attributes":{"underline":true}},{"insert":" "},{"insert":"\n","attributes":{"header":3}}]"#;
  35. let delta = TextDelta::from_json(json).unwrap();
  36. let md = markdown_encoder(&delta);
  37. assert_eq!(md, "### **bold** _italics_ <u>underlined</u> \n");
  38. }
  39. #[test]
  40. fn markdown_encoder_strikethrough_highlight_test() {
  41. let json = r##"[{"insert":"strikethrough","attributes":{"strike":true}},{"insert":" "},{"insert":"highlighted","attributes":{"background":"#ffefe3"}},{"insert":"\n"}]"##;
  42. let delta = TextDelta::from_json(json).unwrap();
  43. let md = markdown_encoder(&delta);
  44. assert_eq!(md, "~~strikethrough~~ <mark>highlighted</mark>\n");
  45. }
  46. #[test]
  47. fn markdown_encoder_numbered_list_test() {
  48. let json = r#"[{"insert":"numbered list\nitem 1"},{"insert":"\n","attributes":{"list":"ordered"}},{"insert":"item 2"},{"insert":"\n","attributes":{"list":"ordered"}},{"insert":"item3"},{"insert":"\n","attributes":{"list":"ordered"}}]"#;
  49. let delta = TextDelta::from_json(json).unwrap();
  50. let md = markdown_encoder(&delta);
  51. assert_eq!(md, "numbered list\n\n1. item 1\n1. item 2\n1. item3\n");
  52. }
  53. #[test]
  54. fn markdown_encoder_bullet_list_test() {
  55. let json = r#"[{"insert":"bullet list\nitem1"},{"insert":"\n","attributes":{"list":"bullet"}}]"#;
  56. let delta = TextDelta::from_json(json).unwrap();
  57. let md = markdown_encoder(&delta);
  58. assert_eq!(md, "bullet list\n\n* item1\n");
  59. }
  60. #[test]
  61. fn markdown_encoder_check_list_test() {
  62. let json = r#"[{"insert":"check list\nchecked"},{"insert":"\n","attributes":{"list":"checked"}},{"insert":"unchecked"},{"insert":"\n","attributes":{"list":"unchecked"}}]"#;
  63. let delta = TextDelta::from_json(json).unwrap();
  64. let md = markdown_encoder(&delta);
  65. assert_eq!(md, "check list\n\n- [x] checked\n\n- [ ] unchecked\n");
  66. }
  67. #[test]
  68. fn markdown_encoder_code_test() {
  69. let json = r#"[{"insert":"code this "},{"insert":"print(\"hello world\")","attributes":{"code":true}},{"insert":"\n"}]"#;
  70. let delta = TextDelta::from_json(json).unwrap();
  71. let md = markdown_encoder(&delta);
  72. assert_eq!(md, "code this `print(\"hello world\")`\n");
  73. }
  74. #[test]
  75. fn markdown_encoder_quote_block_test() {
  76. let json = r#"[{"insert":"this is a quote block"},{"insert":"\n","attributes":{"blockquote":true}}]"#;
  77. let delta = TextDelta::from_json(json).unwrap();
  78. let md = markdown_encoder(&delta);
  79. assert_eq!(md, "> this is a quote block\n");
  80. }
  81. #[test]
  82. fn markdown_encoder_link_test() {
  83. let json = r#"[{"insert":"appflowy","attributes":{"link":"https://www.appflowy.io/"}},{"insert":"\n"}]"#;
  84. let delta = TextDelta::from_json(json).unwrap();
  85. let md = markdown_encoder(&delta);
  86. assert_eq!(md, "[appflowy](https://www.appflowy.io/)\n");
  87. }
  88. }
  89. struct Attribute {
  90. key: AttributeKey,
  91. value: AttributeValue,
  92. }
  93. pub fn markdown_encoder(delta: &Operations<Attributes>) -> String {
  94. let mut markdown_buffer = String::new();
  95. let mut line_buffer = String::new();
  96. let mut current_inline_style = Attributes::default();
  97. let mut current_block_lines: Vec<String> = Vec::new();
  98. let mut iterator = OperationIterator::new(delta);
  99. let mut current_block_style: Option<Attribute> = None;
  100. while iterator.has_next() {
  101. let operation = iterator.next().unwrap();
  102. let operation_data = operation.get_data();
  103. if !operation_data.contains('\n') {
  104. handle_inline(
  105. &mut current_inline_style,
  106. &mut line_buffer,
  107. String::from(operation_data),
  108. operation.get_attributes(),
  109. )
  110. } else {
  111. handle_line(
  112. &mut line_buffer,
  113. &mut markdown_buffer,
  114. String::from(operation_data),
  115. operation.get_attributes(),
  116. &mut current_block_style,
  117. &mut current_block_lines,
  118. &mut current_inline_style,
  119. )
  120. }
  121. }
  122. handle_block(&mut current_block_style, &mut current_block_lines, &mut markdown_buffer);
  123. markdown_buffer
  124. }
  125. fn handle_inline(current_inline_style: &mut Attributes, buffer: &mut String, mut text: String, attributes: Attributes) {
  126. let mut marked_for_removal: HashMap<AttributeKey, AttributeValue> = HashMap::new();
  127. for key in current_inline_style
  128. .clone()
  129. .keys()
  130. .collect::<Vec<&AttributeKey>>()
  131. .into_iter()
  132. .rev()
  133. {
  134. if let Some(attribute) = TextAttributeKey::from_str(key) {
  135. if is_block(&attribute) {
  136. continue;
  137. }
  138. }
  139. if attributes.contains_key(key) {
  140. continue;
  141. }
  142. let padding = trim_right(buffer);
  143. write_attribute(buffer, key, current_inline_style.get(key.as_ref()).unwrap(), true);
  144. if !padding.is_empty() {
  145. buffer.push_str(&padding)
  146. }
  147. marked_for_removal.insert(key.clone(), current_inline_style.get(key).unwrap().clone());
  148. }
  149. for (marked_for_removal_key, marked_for_removal_value) in &marked_for_removal {
  150. current_inline_style.retain(|inline_style_key, inline_style_value| {
  151. inline_style_key != marked_for_removal_key && inline_style_value != marked_for_removal_value
  152. })
  153. }
  154. for (key, value) in attributes.iter() {
  155. if let Some(attribute) = TextAttributeKey::from_str(key) {
  156. if is_block(&attribute) {
  157. continue;
  158. }
  159. }
  160. if current_inline_style.contains_key(key) {
  161. continue;
  162. }
  163. let original_text = text.clone();
  164. text = text.trim_start().to_string();
  165. let padding = " ".repeat(original_text.len() - text.len());
  166. if !padding.is_empty() {
  167. buffer.push_str(&padding)
  168. }
  169. write_attribute(buffer, key, value, false)
  170. }
  171. buffer.push_str(&text);
  172. *current_inline_style = attributes;
  173. }
  174. fn trim_right(buffer: &mut String) -> String {
  175. let text = buffer.clone();
  176. if !text.ends_with(' ') {
  177. return String::from("");
  178. }
  179. let result = text.trim_end();
  180. buffer.clear();
  181. buffer.push_str(result);
  182. " ".repeat(text.len() - result.len())
  183. }
  184. fn write_attribute(buffer: &mut String, key: &AttributeKey, value: &AttributeValue, close: bool) {
  185. let key = TextAttributeKey::from_str(key);
  186. match key {
  187. TextAttributeKey::Bold => buffer.push_str("**"),
  188. TextAttributeKey::Italic => buffer.push('_'),
  189. TextAttributeKey::Underline => {
  190. if close {
  191. buffer.push_str("</u>")
  192. } else {
  193. buffer.push_str("<u>")
  194. }
  195. }
  196. TextAttributeKey::StrikeThrough => buffer.push_str("~~"),
  197. TextAttributeKey::Link => {
  198. if close {
  199. buffer.push_str(format!("]({})", value.0.as_ref().unwrap()).as_str())
  200. } else {
  201. buffer.push('[')
  202. }
  203. }
  204. TextAttributeKey::Background => {
  205. if close {
  206. buffer.push_str("</mark>")
  207. } else {
  208. buffer.push_str("<mark>")
  209. }
  210. }
  211. TextAttributeKey::CodeBlock => {
  212. if close {
  213. buffer.push_str("\n```")
  214. } else {
  215. buffer.push_str("```\n")
  216. }
  217. }
  218. TextAttributeKey::InlineCode => buffer.push('`'),
  219. _ => {}
  220. }
  221. }
  222. fn handle_line(
  223. buffer: &mut String,
  224. markdown_buffer: &mut String,
  225. data: String,
  226. attributes: Attributes,
  227. current_block_style: &mut Option<Attribute>,
  228. current_block_lines: &mut Vec<String>,
  229. current_inline_style: &mut Attributes,
  230. ) {
  231. let mut span = String::new();
  232. for c in data.chars() {
  233. if (c as i32) == LINEFEEDASCIICODE {
  234. if !span.is_empty() {
  235. handle_inline(current_inline_style, buffer, span.clone(), attributes.clone());
  236. }
  237. handle_inline(current_inline_style, buffer, String::from(""), Attributes::default());
  238. let line_block_key = attributes.keys().find(|key| {
  239. if let Some(attribute) = TextAttributeKey::from_str(key) {
  240. is_block(&attribute)
  241. } else {
  242. false
  243. }
  244. });
  245. match (line_block_key, &current_block_style) {
  246. (Some(line_block_key), Some(current_block_style))
  247. if *line_block_key == current_block_style.key
  248. && *attributes.get(line_block_key).unwrap() == current_block_style.value =>
  249. {
  250. current_block_lines.push(buffer.clone());
  251. }
  252. (None, None) => {
  253. current_block_lines.push(buffer.clone());
  254. }
  255. _ => {
  256. handle_block(current_block_style, current_block_lines, markdown_buffer);
  257. current_block_lines.clear();
  258. current_block_lines.push(buffer.clone());
  259. match line_block_key {
  260. None => *current_block_style = None,
  261. Some(line_block_key) => {
  262. *current_block_style = Some(Attribute {
  263. key: line_block_key.clone(),
  264. value: attributes.get(line_block_key).unwrap().clone(),
  265. })
  266. }
  267. }
  268. }
  269. }
  270. buffer.clear();
  271. span.clear();
  272. } else {
  273. span.push(c);
  274. }
  275. }
  276. if !span.is_empty() {
  277. handle_inline(current_inline_style, buffer, span.clone(), attributes)
  278. }
  279. }
  280. fn handle_block(
  281. block_style: &mut Option<Attribute>,
  282. current_block_lines: &mut Vec<String>,
  283. markdown_buffer: &mut String,
  284. ) {
  285. if current_block_lines.is_empty() {
  286. return;
  287. }
  288. if !markdown_buffer.is_empty() {
  289. markdown_buffer.push('\n')
  290. }
  291. match block_style {
  292. None => {
  293. markdown_buffer.push_str(&current_block_lines.join("\n"));
  294. markdown_buffer.push('\n');
  295. }
  296. Some(block_style) if block_style.key == AttributeKey::CodeBlock => {
  297. write_attribute(markdown_buffer, &block_style.key, &block_style.value, false);
  298. markdown_buffer.push_str(&current_block_lines.join("\n"));
  299. write_attribute(markdown_buffer, &block_style.key, &block_style.value, true);
  300. markdown_buffer.push('\n');
  301. }
  302. Some(block_style) => {
  303. for line in current_block_lines {
  304. write_block_tag(markdown_buffer, block_style, false);
  305. markdown_buffer.push_str(line);
  306. markdown_buffer.push('\n');
  307. }
  308. }
  309. }
  310. }
  311. fn write_block_tag(buffer: &mut String, block: &Attribute, close: bool) {
  312. if close {
  313. return;
  314. }
  315. if block.key == AttributeKey::BlockQuote {
  316. buffer.push_str("> ");
  317. } else if block.key == AttributeKey::List {
  318. if block.value.0.as_ref().unwrap().eq("bullet") {
  319. buffer.push_str("* ");
  320. } else if block.value.0.as_ref().unwrap().eq("checked") {
  321. buffer.push_str("- [x] ");
  322. } else if block.value.0.as_ref().unwrap().eq("unchecked") {
  323. buffer.push_str("- [ ] ");
  324. } else if block.value.0.as_ref().unwrap().eq("ordered") {
  325. buffer.push_str("1. ");
  326. } else {
  327. buffer.push_str("* ");
  328. }
  329. } else if block.key == AttributeKey::Header {
  330. if block.value.0.as_ref().unwrap().eq("1") {
  331. buffer.push_str("# ");
  332. } else if block.value.0.as_ref().unwrap().eq("2") {
  333. buffer.push_str("## ");
  334. } else if block.value.0.as_ref().unwrap().eq("3") {
  335. buffer.push_str("### ");
  336. } else if block.key == AttributeKey::List {
  337. }
  338. }
  339. }