rev_persistence.rs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. use crate::cache::{
  2. disk::{RevisionChangeset, RevisionDiskCache},
  3. memory::RevisionMemoryCacheDelegate,
  4. };
  5. use crate::disk::{RevisionState, SyncRecord};
  6. use crate::memory::RevisionMemoryCache;
  7. use crate::RevisionMergeable;
  8. use flowy_error::{internal_error, FlowyError, FlowyResult};
  9. use flowy_http_model::revision::{Revision, RevisionRange};
  10. use std::collections::VecDeque;
  11. use std::{borrow::Cow, sync::Arc};
  12. use tokio::sync::RwLock;
  13. use tokio::task::spawn_blocking;
  14. pub const REVISION_WRITE_INTERVAL_IN_MILLIS: u64 = 600;
  15. #[derive(Clone)]
  16. pub struct RevisionPersistenceConfiguration {
  17. merge_threshold: usize,
  18. merge_lagging: bool,
  19. }
  20. impl RevisionPersistenceConfiguration {
  21. pub fn new(merge_threshold: usize, merge_lagging: bool) -> Self {
  22. debug_assert!(merge_threshold > 1);
  23. if merge_threshold > 1 {
  24. Self {
  25. merge_threshold,
  26. merge_lagging,
  27. }
  28. } else {
  29. Self {
  30. merge_threshold: 100,
  31. merge_lagging,
  32. }
  33. }
  34. }
  35. }
  36. impl std::default::Default for RevisionPersistenceConfiguration {
  37. fn default() -> Self {
  38. Self {
  39. merge_threshold: 100,
  40. merge_lagging: false,
  41. }
  42. }
  43. }
  44. pub struct RevisionPersistence<Connection> {
  45. user_id: String,
  46. object_id: String,
  47. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  48. memory_cache: Arc<RevisionMemoryCache>,
  49. sync_seq: RwLock<DeferSyncSequence>,
  50. configuration: RevisionPersistenceConfiguration,
  51. }
  52. impl<Connection> RevisionPersistence<Connection>
  53. where
  54. Connection: 'static,
  55. {
  56. pub fn new<C>(
  57. user_id: &str,
  58. object_id: &str,
  59. disk_cache: C,
  60. configuration: RevisionPersistenceConfiguration,
  61. ) -> RevisionPersistence<Connection>
  62. where
  63. C: 'static + RevisionDiskCache<Connection, Error = FlowyError>,
  64. {
  65. let disk_cache = Arc::new(disk_cache) as Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>;
  66. Self::from_disk_cache(user_id, object_id, disk_cache, configuration)
  67. }
  68. pub fn from_disk_cache(
  69. user_id: &str,
  70. object_id: &str,
  71. disk_cache: Arc<dyn RevisionDiskCache<Connection, Error = FlowyError>>,
  72. configuration: RevisionPersistenceConfiguration,
  73. ) -> RevisionPersistence<Connection> {
  74. let object_id = object_id.to_owned();
  75. let user_id = user_id.to_owned();
  76. let sync_seq = RwLock::new(DeferSyncSequence::new());
  77. let memory_cache = Arc::new(RevisionMemoryCache::new(&object_id, Arc::new(disk_cache.clone())));
  78. Self {
  79. user_id,
  80. object_id,
  81. disk_cache,
  82. memory_cache,
  83. sync_seq,
  84. configuration,
  85. }
  86. }
  87. /// Save the revision that comes from remote to disk.
  88. #[tracing::instrument(level = "trace", skip(self, revision), fields(rev_id, object_id=%self.object_id), err)]
  89. pub(crate) async fn add_ack_revision(&self, revision: &Revision) -> FlowyResult<()> {
  90. tracing::Span::current().record("rev_id", &revision.rev_id);
  91. self.add(revision.clone(), RevisionState::Ack, true).await
  92. }
  93. /// Append the revision that already existed in the local DB state to sync sequence
  94. #[tracing::instrument(level = "trace", skip(self), fields(rev_id, object_id=%self.object_id), err)]
  95. pub(crate) async fn sync_revision(&self, revision: &Revision) -> FlowyResult<()> {
  96. tracing::Span::current().record("rev_id", &revision.rev_id);
  97. self.add(revision.clone(), RevisionState::Sync, false).await?;
  98. self.sync_seq.write().await.recv(revision.rev_id)?;
  99. Ok(())
  100. }
  101. #[tracing::instrument(level = "trace", skip_all, err)]
  102. pub async fn compact_lagging_revisions<'a>(
  103. &'a self,
  104. rev_compress: &Arc<dyn RevisionMergeable + 'a>,
  105. ) -> FlowyResult<()> {
  106. if !self.configuration.merge_lagging {
  107. return Ok(());
  108. }
  109. let mut sync_seq = self.sync_seq.write().await;
  110. let compact_seq = sync_seq.compact();
  111. if !compact_seq.is_empty() {
  112. let range = RevisionRange {
  113. start: *compact_seq.front().unwrap(),
  114. end: *compact_seq.back().unwrap(),
  115. };
  116. let revisions = self.revisions_in_range(&range).await?;
  117. debug_assert_eq!(range.len() as usize, revisions.len());
  118. // compact multiple revisions into one
  119. let merged_revision = rev_compress.merge_revisions(&self.user_id, &self.object_id, revisions)?;
  120. tracing::Span::current().record("rev_id", &merged_revision.rev_id);
  121. let _ = sync_seq.recv(merged_revision.rev_id)?;
  122. // replace the revisions in range with compact revision
  123. self.compact(&range, merged_revision).await?;
  124. }
  125. Ok(())
  126. }
  127. /// Save the revision to disk and append it to the end of the sync sequence.
  128. #[tracing::instrument(level = "trace", skip_all, fields(rev_id, compact_range, object_id=%self.object_id), err)]
  129. pub(crate) async fn add_sync_revision<'a>(
  130. &'a self,
  131. new_revision: &'a Revision,
  132. rev_compress: &Arc<dyn RevisionMergeable + 'a>,
  133. ) -> FlowyResult<i64> {
  134. let mut sync_seq = self.sync_seq.write().await;
  135. let compact_length = sync_seq.compact_length;
  136. // Before the new_revision is pushed into the sync_seq, we check if the current `compact_length` of the
  137. // sync_seq is less equal to or greater than the merge threshold. If yes, it's needs to merged
  138. // with the new_revision into one revision.
  139. let mut compact_seq = VecDeque::default();
  140. // tracing::info!("{}", compact_seq)
  141. if compact_length >= self.configuration.merge_threshold - 1 {
  142. compact_seq.extend(sync_seq.compact());
  143. }
  144. if !compact_seq.is_empty() {
  145. let range = RevisionRange {
  146. start: *compact_seq.front().unwrap(),
  147. end: *compact_seq.back().unwrap(),
  148. };
  149. tracing::Span::current().record("compact_range", &format!("{}", range).as_str());
  150. let mut revisions = self.revisions_in_range(&range).await?;
  151. debug_assert_eq!(range.len() as usize, revisions.len());
  152. // append the new revision
  153. revisions.push(new_revision.clone());
  154. // compact multiple revisions into one
  155. let merged_revision = rev_compress.merge_revisions(&self.user_id, &self.object_id, revisions)?;
  156. let rev_id = merged_revision.rev_id;
  157. tracing::Span::current().record("rev_id", &merged_revision.rev_id);
  158. let _ = sync_seq.recv(merged_revision.rev_id)?;
  159. // replace the revisions in range with compact revision
  160. self.compact(&range, merged_revision).await?;
  161. Ok(rev_id)
  162. } else {
  163. tracing::Span::current().record("rev_id", &new_revision.rev_id);
  164. self.add(new_revision.clone(), RevisionState::Sync, true).await?;
  165. sync_seq.merge_recv(new_revision.rev_id)?;
  166. Ok(new_revision.rev_id)
  167. }
  168. }
  169. /// Remove the revision with rev_id from the sync sequence.
  170. pub(crate) async fn ack_revision(&self, rev_id: i64) -> FlowyResult<()> {
  171. if self.sync_seq.write().await.ack(&rev_id).is_ok() {
  172. self.memory_cache.ack(&rev_id).await;
  173. }
  174. Ok(())
  175. }
  176. pub(crate) async fn next_sync_revision(&self) -> FlowyResult<Option<Revision>> {
  177. match self.sync_seq.read().await.next_rev_id() {
  178. None => Ok(None),
  179. Some(rev_id) => Ok(self.get(rev_id).await.map(|record| record.revision)),
  180. }
  181. }
  182. pub(crate) async fn next_sync_rev_id(&self) -> Option<i64> {
  183. self.sync_seq.read().await.next_rev_id()
  184. }
  185. pub(crate) fn number_of_sync_records(&self) -> usize {
  186. self.memory_cache.number_of_sync_records()
  187. }
  188. pub(crate) fn number_of_records_in_disk(&self) -> usize {
  189. match self.disk_cache.read_revision_records(&self.object_id, None) {
  190. Ok(records) => records.len(),
  191. Err(e) => {
  192. tracing::error!("Read revision records failed: {:?}", e);
  193. 0
  194. }
  195. }
  196. }
  197. /// The cache gets reset while it conflicts with the remote revisions.
  198. #[tracing::instrument(level = "trace", skip(self, revisions), err)]
  199. pub(crate) async fn reset(&self, revisions: Vec<Revision>) -> FlowyResult<()> {
  200. let records = revisions
  201. .into_iter()
  202. .map(|revision| SyncRecord {
  203. revision,
  204. state: RevisionState::Sync,
  205. write_to_disk: false,
  206. })
  207. .collect::<Vec<_>>();
  208. let _ = self
  209. .disk_cache
  210. .delete_and_insert_records(&self.object_id, None, records.clone())?;
  211. let _ = self.memory_cache.reset_with_revisions(records).await;
  212. self.sync_seq.write().await.clear();
  213. Ok(())
  214. }
  215. async fn add(&self, revision: Revision, state: RevisionState, write_to_disk: bool) -> FlowyResult<()> {
  216. if self.memory_cache.contains(&revision.rev_id) {
  217. tracing::warn!("Duplicate revision: {}:{}-{:?}", self.object_id, revision.rev_id, state);
  218. return Ok(());
  219. }
  220. let record = SyncRecord {
  221. revision,
  222. state,
  223. write_to_disk,
  224. };
  225. self.memory_cache.add(Cow::Owned(record)).await;
  226. Ok(())
  227. }
  228. async fn compact(&self, range: &RevisionRange, new_revision: Revision) -> FlowyResult<()> {
  229. self.memory_cache.remove_with_range(range);
  230. let rev_ids = range.to_rev_ids();
  231. let _ = self
  232. .disk_cache
  233. .delete_revision_records(&self.object_id, Some(rev_ids))?;
  234. self.add(new_revision, RevisionState::Sync, true).await?;
  235. Ok(())
  236. }
  237. pub async fn get(&self, rev_id: i64) -> Option<SyncRecord> {
  238. match self.memory_cache.get(&rev_id).await {
  239. None => match self
  240. .disk_cache
  241. .read_revision_records(&self.object_id, Some(vec![rev_id]))
  242. {
  243. Ok(mut records) => {
  244. let record = records.pop()?;
  245. assert!(records.is_empty());
  246. Some(record)
  247. }
  248. Err(e) => {
  249. tracing::error!("{}", e);
  250. None
  251. }
  252. },
  253. Some(revision) => Some(revision),
  254. }
  255. }
  256. pub fn load_all_records(&self, object_id: &str) -> FlowyResult<Vec<SyncRecord>> {
  257. self.disk_cache.read_revision_records(object_id, None)
  258. }
  259. // Read the revision which rev_id >= range.start && rev_id <= range.end
  260. pub async fn revisions_in_range(&self, range: &RevisionRange) -> FlowyResult<Vec<Revision>> {
  261. let range = range.clone();
  262. let mut records = self.memory_cache.get_with_range(&range).await?;
  263. let range_len = range.len() as usize;
  264. if records.len() != range_len {
  265. let disk_cache = self.disk_cache.clone();
  266. let object_id = self.object_id.clone();
  267. records = spawn_blocking(move || disk_cache.read_revision_records_with_range(&object_id, &range))
  268. .await
  269. .map_err(internal_error)??;
  270. if records.len() != range_len {
  271. // #[cfg(debug_assertions)]
  272. // records.iter().for_each(|record| {
  273. // let delta = PlainDelta::from_bytes(&record.revision.delta_data).unwrap();
  274. // tracing::trace!("{}", delta.to_string());
  275. // });
  276. tracing::error!("Expect revision len {},but receive {}", range_len, records.len());
  277. }
  278. }
  279. Ok(records
  280. .into_iter()
  281. .map(|record| record.revision)
  282. .collect::<Vec<Revision>>())
  283. }
  284. }
  285. impl<C> RevisionMemoryCacheDelegate for Arc<dyn RevisionDiskCache<C, Error = FlowyError>> {
  286. fn send_sync(&self, mut records: Vec<SyncRecord>) -> FlowyResult<()> {
  287. records.retain(|record| record.write_to_disk);
  288. if !records.is_empty() {
  289. tracing::Span::current().record(
  290. "checkpoint_result",
  291. &format!("{} records were saved", records.len()).as_str(),
  292. );
  293. let _ = self.create_revision_records(records)?;
  294. }
  295. Ok(())
  296. }
  297. fn receive_ack(&self, object_id: &str, rev_id: i64) {
  298. let changeset = RevisionChangeset {
  299. object_id: object_id.to_string(),
  300. rev_id: rev_id.into(),
  301. state: RevisionState::Ack,
  302. };
  303. match self.update_revision_record(vec![changeset]) {
  304. Ok(_) => {}
  305. Err(e) => tracing::error!("{}", e),
  306. }
  307. }
  308. }
  309. #[derive(Default)]
  310. struct DeferSyncSequence {
  311. rev_ids: VecDeque<i64>,
  312. compact_index: Option<usize>,
  313. compact_length: usize,
  314. }
  315. impl DeferSyncSequence {
  316. fn new() -> Self {
  317. DeferSyncSequence::default()
  318. }
  319. /// Pushes the new_rev_id to the end of the list and marks this new_rev_id is mergeable.
  320. ///
  321. /// When calling `compact` method, it will return a list of revision ids started from
  322. /// the `compact_start_pos`, and ends with the `compact_length`.
  323. fn merge_recv(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  324. let _ = self.recv(new_rev_id)?;
  325. self.compact_length += 1;
  326. if self.compact_index.is_none() && !self.rev_ids.is_empty() {
  327. self.compact_index = Some(self.rev_ids.len() - 1);
  328. }
  329. Ok(())
  330. }
  331. /// Pushes the new_rev_id to the end of the list.
  332. fn recv(&mut self, new_rev_id: i64) -> FlowyResult<()> {
  333. // The last revision's rev_id must be greater than the new one.
  334. if let Some(rev_id) = self.rev_ids.back() {
  335. if *rev_id >= new_rev_id {
  336. return Err(
  337. FlowyError::internal().context(format!("The new revision's id must be greater than {}", rev_id))
  338. );
  339. }
  340. }
  341. self.rev_ids.push_back(new_rev_id);
  342. Ok(())
  343. }
  344. /// Removes the rev_id from the list
  345. fn ack(&mut self, rev_id: &i64) -> FlowyResult<()> {
  346. let cur_rev_id = self.rev_ids.front().cloned();
  347. if let Some(pop_rev_id) = cur_rev_id {
  348. if &pop_rev_id != rev_id {
  349. let desc = format!(
  350. "The ack rev_id:{} is not equal to the current rev_id:{}",
  351. rev_id, pop_rev_id
  352. );
  353. return Err(FlowyError::internal().context(desc));
  354. }
  355. let mut compact_rev_id = None;
  356. if let Some(compact_index) = self.compact_index {
  357. compact_rev_id = self.rev_ids.get(compact_index).cloned();
  358. }
  359. let pop_rev_id = self.rev_ids.pop_front();
  360. if let (Some(compact_rev_id), Some(pop_rev_id)) = (compact_rev_id, pop_rev_id) {
  361. if compact_rev_id <= pop_rev_id && self.compact_length > 0 {
  362. self.compact_length -= 1;
  363. }
  364. }
  365. }
  366. Ok(())
  367. }
  368. fn next_rev_id(&self) -> Option<i64> {
  369. self.rev_ids.front().cloned()
  370. }
  371. fn clear(&mut self) {
  372. self.compact_index = None;
  373. self.compact_length = 0;
  374. self.rev_ids.clear();
  375. }
  376. // Compact the rev_ids into one except the current synchronizing rev_id.
  377. fn compact(&mut self) -> VecDeque<i64> {
  378. let mut compact_seq = VecDeque::with_capacity(self.rev_ids.len());
  379. if let Some(start) = self.compact_index {
  380. if start < self.rev_ids.len() {
  381. let seq = self.rev_ids.split_off(start);
  382. compact_seq.extend(seq);
  383. }
  384. }
  385. self.compact_index = None;
  386. self.compact_length = 0;
  387. compact_seq
  388. }
  389. }