Add HTTP and data cacheing for speed

This commit is contained in:
Leonora Tindall 2022-11-02 14:11:40 -05:00
parent caa0723c1c
commit 63a61c1355
Signed by: nora
GPG Key ID: 7A8B52EC67E09AAF
7 changed files with 803 additions and 85 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/target /target
mastodon-data.toml mastodon-data.toml
http-cacache

758
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -18,3 +18,6 @@ chrono = { version = "0.4.22", features = [ "serde" ] }
rss = { path = "./rss/", version = "2.0.1-atom-link-fix", features = [ "builders", "atom", "chrono" ] } rss = { path = "./rss/", version = "2.0.1-atom-link-fix", features = [ "builders", "atom", "chrono" ] }
pulldown-cmark = "0.9.2" pulldown-cmark = "0.9.2"
atom_syndication = "0.11.0" atom_syndication = "0.11.0"
http-cache-reqwest = "0.5.0"
reqwest-middleware = "0.1.6"
cached = "0.40.0"

View File

@ -19,7 +19,9 @@ ports to use for development and deployment.
- [ ] More robust parsing (defaults for all!) - [ ] More robust parsing (defaults for all!)
- [ ] RSS feeds for tags - [ ] RSS feeds for tags
- [x] Atom Extension pagination support - [x] Atom Extension pagination support
- [x] Disable pagination and just go for it lmao - [x] Disable pagination
- [x] HTTP Cacheing
- [x] Data cacheing
- [ ] Read More support - [ ] Read More support
- [ ] Dublin Core support - [ ] Dublin Core support
- [ ] Media Envelope support - [ ] Media Envelope support

View File

@ -3,7 +3,7 @@ use serde::Deserialize;
/// The API URL from whence Cohost serves JSON project definitions /// The API URL from whence Cohost serves JSON project definitions
pub const COHOST_ACCOUNT_API_URL: &str = "https://cohost.org/api/v1/project/"; pub const COHOST_ACCOUNT_API_URL: &str = "https://cohost.org/api/v1/project/";
#[derive(Debug, Deserialize, PartialEq, Eq)] #[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
pub struct CohostAccount { pub struct CohostAccount {
#[serde(rename = "projectId")] #[serde(rename = "projectId")]
pub project_id: u64, pub project_id: u64,

View File

@ -11,7 +11,7 @@ pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String {
// Cohost doesn't give us Next links ("rel: next") for further pages, so we'll have to ALWAYS populate the rel=next field // Cohost doesn't give us Next links ("rel: next") for further pages, so we'll have to ALWAYS populate the rel=next field
#[derive(Debug, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct CohostPostsPage { pub struct CohostPostsPage {
#[serde(rename = "nItems")] #[serde(rename = "nItems")]
pub number_items: usize, pub number_items: usize,
@ -22,7 +22,7 @@ pub struct CohostPostsPage {
pub links: Vec<CohostPostLink>, pub links: Vec<CohostPostLink>,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct CohostPost { pub struct CohostPost {
#[serde(rename = "postId")] #[serde(rename = "postId")]
pub id: u64, pub id: u64,
@ -50,7 +50,7 @@ pub struct CohostPost {
pub share_tree: Vec<CohostPost>, pub share_tree: Vec<CohostPost>,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct CohostPostingProject { pub struct CohostPostingProject {
#[serde(rename = "projectId")] #[serde(rename = "projectId")]
pub id: u64, pub id: u64,
@ -70,7 +70,7 @@ pub struct CohostPostingProject {
pub pronouns: String, pub pronouns: String,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Clone, Deserialize)]
pub struct CohostPostLink { pub struct CohostPostLink {
#[serde(deserialize_with = "deserialize_null_default", default)] #[serde(deserialize_with = "deserialize_null_default", default)]
pub href: String, pub href: String,

View File

@ -3,7 +3,10 @@ use std::collections::HashMap;
use std::error::Error; use std::error::Error;
#[macro_use] #[macro_use]
extern crate rocket; extern crate rocket;
use reqwest::{Client, StatusCode}; use cached::proc_macro::cached;
use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache};
use reqwest::StatusCode;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use rocket::response::content::RawHtml; use rocket::response::content::RawHtml;
use rocket::serde::json::Json; use rocket::serde::json::Json;
@ -40,6 +43,20 @@ fn user_agent() -> String {
} }
static ARGS: once_cell::sync::Lazy<Args> = once_cell::sync::Lazy::new(|| Args::parse()); static ARGS: once_cell::sync::Lazy<Args> = once_cell::sync::Lazy::new(|| Args::parse());
static CLIENT: once_cell::sync::Lazy<ClientWithMiddleware> = once_cell::sync::Lazy::new(|| {
ClientBuilder::new(
reqwest::Client::builder()
.user_agent(user_agent())
.build()
.unwrap(),
)
.with(Cache(HttpCache {
mode: CacheMode::Default,
manager: CACacheManager::default(),
options: None,
}))
.build()
});
#[get("/")] #[get("/")]
fn index() -> RawHtml<&'static str> { fn index() -> RawHtml<&'static str> {
@ -52,13 +69,13 @@ struct MdResponse {
inner: String, inner: String,
} }
#[derive(Responder)] #[derive(Debug, Clone, Responder)]
#[response(content_type = "application/rss+xml")] #[response(content_type = "application/rss+xml")]
struct RssResponse { struct RssResponse {
inner: String, inner: String,
} }
#[derive(Responder)] #[derive(Debug, Responder)]
#[response(content_type = "text/plain")] #[response(content_type = "text/plain")]
enum ErrorResponse { enum ErrorResponse {
#[response(status = 404)] #[response(status = 404)]
@ -67,14 +84,11 @@ enum ErrorResponse {
InternalError(String), InternalError(String),
} }
async fn get_post_from_page( #[cached(time = 60, result)]
client: &mut Client, async fn get_post_from_page(project_id: String, post_id: u64) -> Result<CohostPost, ErrorResponse> {
project_id: &str,
post_id: u64,
) -> Result<CohostPost, ErrorResponse> {
let mut page = 0; let mut page = 0;
loop { loop {
let new_page = get_page_data(client, project_id, page).await?; let new_page = get_page_data(project_id.clone(), page).await?;
if new_page.items.is_empty() { if new_page.items.is_empty() {
// Once there are no posts, we're done. // Once there are no posts, we're done.
return Err(ErrorResponse::NotFound( return Err(ErrorResponse::NotFound(
@ -89,14 +103,12 @@ async fn get_post_from_page(
} }
} }
async fn get_full_post_data( #[cached(time = 120, result)]
client: &mut Client, async fn get_full_post_data(project_id: String) -> Result<CohostPostsPage, ErrorResponse> {
project_id: &str,
) -> Result<CohostPostsPage, ErrorResponse> {
let mut page = 0; let mut page = 0;
let mut merged_page = get_page_data(client, project_id, page).await?; let mut merged_page = get_page_data(project_id.clone(), page).await?;
loop { loop {
let mut new_page = get_page_data(client, project_id, page).await?; let mut new_page = get_page_data(project_id.clone(), page).await?;
if new_page.items.is_empty() { if new_page.items.is_empty() {
// Once there are no posts, we're done. // Once there are no posts, we're done.
break; break;
@ -109,14 +121,11 @@ async fn get_full_post_data(
Ok(merged_page) Ok(merged_page)
} }
async fn get_page_data( // Not cached because it's never used individually.
client: &mut Client, async fn get_page_data(project_id: String, page: u64) -> Result<CohostPostsPage, ErrorResponse> {
project_id: &str, let posts_url = cohost_posts_api_url(&project_id, page);
page: u64,
) -> Result<CohostPostsPage, ErrorResponse> {
let posts_url = cohost_posts_api_url(project_id, page);
eprintln!("making request to {}", posts_url); eprintln!("making request to {}", posts_url);
match client.get(posts_url).send().await { match CLIENT.get(posts_url).send().await {
Ok(v) => match v.status() { Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostPostsPage>().await { StatusCode::OK => match v.json::<CohostPostsPage>().await {
Ok(page_data) => Ok(page_data), Ok(page_data) => Ok(page_data),
@ -147,35 +156,11 @@ async fn get_page_data(
} }
} }
#[get("/<project>/feed.rss")] #[cached(time = 60, result)]
async fn syndication_rss_route(project: &str) -> Result<RssResponse, ErrorResponse> { async fn get_project_data(project_id: String) -> Result<CohostAccount, ErrorResponse> {
let mut client = get_client()?;
let project_data = get_project_data(&mut client, project).await?;
let page_data = get_full_post_data(&mut client, project).await?;
Ok(RssResponse {
inner: syndication::channel_for_posts_page(project, project_data, page_data).to_string(),
})
}
#[get("/<project>/<id>")]
async fn post_md_route(project: &str, id: u64) -> Result<MdResponse, ErrorResponse> {
let mut client = get_client()?;
let _project_data = get_project_data(&mut client, project).await?;
let post_data = get_post_from_page(&mut client, project, id).await?;
Ok(MdResponse {
inner: post_data.plain_body,
})
}
async fn get_project_data(
client: &mut Client,
project_id: &str,
) -> Result<CohostAccount, ErrorResponse> {
let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project_id); let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project_id);
eprintln!("making request to {}", project_url); eprintln!("making request to {}", project_url);
match client.get(project_url).send().await { match CLIENT.get(project_url).send().await {
Ok(v) => match v.status() { Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostAccount>().await { StatusCode::OK => match v.json::<CohostAccount>().await {
Ok(a) => Ok(a), Ok(a) => Ok(a),
@ -209,15 +194,23 @@ async fn get_project_data(
} }
} }
fn get_client() -> Result<Client, ErrorResponse> { #[get("/<project>/feed.rss")]
match Client::builder().user_agent(user_agent()).build() { async fn syndication_rss_route(project: String) -> Result<RssResponse, ErrorResponse> {
Ok(v) => Ok(v), let project_data = get_project_data(project.clone()).await?;
Err(e) => { let page_data = get_full_post_data(project.clone()).await?;
let err = format!("Couldn't build a reqwest client: {:?}", e); Ok(RssResponse {
eprintln!("{}", err); inner: syndication::channel_for_posts_page(project.clone(), project_data, page_data)
Err(ErrorResponse::InternalError(err)) .to_string(),
} })
} }
#[get("/<project>/<id>")]
async fn post_md_route(project: String, id: u64) -> Result<MdResponse, ErrorResponse> {
let _project_data = get_project_data(project.clone()).await?;
let post_data = get_post_from_page(project.clone(), id).await?;
Ok(MdResponse {
inner: post_data.plain_body,
})
} }
#[get("/.well-known/webfinger?<params..>")] #[get("/.well-known/webfinger?<params..>")]
@ -232,9 +225,8 @@ async fn webfinger_route(
eprintln!("{}", err); eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err)); return Err(ErrorResponse::InternalError(err));
} }
let mut client = get_client()?;
if let Some(param) = params.iter().next() { if let Some(param) = params.iter().next() {
let _project_data = get_project_data(&mut client, param.0.as_str()).await?; let _project_data = get_project_data(param.0.clone()).await?;
Ok(Json(CohostWebfingerResource::new( Ok(Json(CohostWebfingerResource::new(
param.0.as_str(), param.0.as_str(),
&ARGS.domain, &ARGS.domain,