Add HTTP and data cacheing for speed

This commit is contained in:
Leonora Tindall 2022-11-02 14:11:40 -05:00
parent caa0723c1c
commit 63a61c1355
Signed by: nora
GPG Key ID: 7A8B52EC67E09AAF
7 changed files with 803 additions and 85 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/target
mastodon-data.toml
http-cacache

758
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -18,3 +18,6 @@ chrono = { version = "0.4.22", features = [ "serde" ] }
rss = { path = "./rss/", version = "2.0.1-atom-link-fix", features = [ "builders", "atom", "chrono" ] }
pulldown-cmark = "0.9.2"
atom_syndication = "0.11.0"
http-cache-reqwest = "0.5.0"
reqwest-middleware = "0.1.6"
cached = "0.40.0"

View File

@ -19,7 +19,9 @@ ports to use for development and deployment.
- [ ] More robust parsing (defaults for all!)
- [ ] RSS feeds for tags
- [x] Atom Extension pagination support
- [x] Disable pagination and just go for it lmao
- [x] Disable pagination
- [x] HTTP Cacheing
- [x] Data cacheing
- [ ] Read More support
- [ ] Dublin Core support
- [ ] Media Envelope support

View File

@ -3,7 +3,7 @@ use serde::Deserialize;
/// The API URL from whence Cohost serves JSON project definitions
pub const COHOST_ACCOUNT_API_URL: &str = "https://cohost.org/api/v1/project/";
#[derive(Debug, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
pub struct CohostAccount {
#[serde(rename = "projectId")]
pub project_id: u64,

View File

@ -11,7 +11,7 @@ pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String {
// Cohost doesn't give us Next links ("rel: next") for further pages, so we'll have to ALWAYS populate the rel=next field
#[derive(Debug, Deserialize)]
#[derive(Debug, Clone, Deserialize)]
pub struct CohostPostsPage {
#[serde(rename = "nItems")]
pub number_items: usize,
@ -22,7 +22,7 @@ pub struct CohostPostsPage {
pub links: Vec<CohostPostLink>,
}
#[derive(Debug, Deserialize)]
#[derive(Debug, Clone, Deserialize)]
pub struct CohostPost {
#[serde(rename = "postId")]
pub id: u64,
@ -50,7 +50,7 @@ pub struct CohostPost {
pub share_tree: Vec<CohostPost>,
}
#[derive(Debug, Deserialize)]
#[derive(Debug, Clone, Deserialize)]
pub struct CohostPostingProject {
#[serde(rename = "projectId")]
pub id: u64,
@ -70,7 +70,7 @@ pub struct CohostPostingProject {
pub pronouns: String,
}
#[derive(Debug, Deserialize)]
#[derive(Debug, Clone, Deserialize)]
pub struct CohostPostLink {
#[serde(deserialize_with = "deserialize_null_default", default)]
pub href: String,

View File

@ -3,7 +3,10 @@ use std::collections::HashMap;
use std::error::Error;
#[macro_use]
extern crate rocket;
use reqwest::{Client, StatusCode};
use cached::proc_macro::cached;
use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache};
use reqwest::StatusCode;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use rocket::response::content::RawHtml;
use rocket::serde::json::Json;
@ -40,6 +43,20 @@ fn user_agent() -> String {
}
static ARGS: once_cell::sync::Lazy<Args> = once_cell::sync::Lazy::new(|| Args::parse());
static CLIENT: once_cell::sync::Lazy<ClientWithMiddleware> = once_cell::sync::Lazy::new(|| {
ClientBuilder::new(
reqwest::Client::builder()
.user_agent(user_agent())
.build()
.unwrap(),
)
.with(Cache(HttpCache {
mode: CacheMode::Default,
manager: CACacheManager::default(),
options: None,
}))
.build()
});
#[get("/")]
fn index() -> RawHtml<&'static str> {
@ -52,13 +69,13 @@ struct MdResponse {
inner: String,
}
#[derive(Responder)]
#[derive(Debug, Clone, Responder)]
#[response(content_type = "application/rss+xml")]
struct RssResponse {
inner: String,
}
#[derive(Responder)]
#[derive(Debug, Responder)]
#[response(content_type = "text/plain")]
enum ErrorResponse {
#[response(status = 404)]
@ -67,14 +84,11 @@ enum ErrorResponse {
InternalError(String),
}
async fn get_post_from_page(
client: &mut Client,
project_id: &str,
post_id: u64,
) -> Result<CohostPost, ErrorResponse> {
#[cached(time = 60, result)]
async fn get_post_from_page(project_id: String, post_id: u64) -> Result<CohostPost, ErrorResponse> {
let mut page = 0;
loop {
let new_page = get_page_data(client, project_id, page).await?;
let new_page = get_page_data(project_id.clone(), page).await?;
if new_page.items.is_empty() {
// Once there are no posts, we're done.
return Err(ErrorResponse::NotFound(
@ -89,14 +103,12 @@ async fn get_post_from_page(
}
}
async fn get_full_post_data(
client: &mut Client,
project_id: &str,
) -> Result<CohostPostsPage, ErrorResponse> {
#[cached(time = 120, result)]
async fn get_full_post_data(project_id: String) -> Result<CohostPostsPage, ErrorResponse> {
let mut page = 0;
let mut merged_page = get_page_data(client, project_id, page).await?;
let mut merged_page = get_page_data(project_id.clone(), page).await?;
loop {
let mut new_page = get_page_data(client, project_id, page).await?;
let mut new_page = get_page_data(project_id.clone(), page).await?;
if new_page.items.is_empty() {
// Once there are no posts, we're done.
break;
@ -109,14 +121,11 @@ async fn get_full_post_data(
Ok(merged_page)
}
async fn get_page_data(
client: &mut Client,
project_id: &str,
page: u64,
) -> Result<CohostPostsPage, ErrorResponse> {
let posts_url = cohost_posts_api_url(project_id, page);
// Not cached because it's never used individually.
async fn get_page_data(project_id: String, page: u64) -> Result<CohostPostsPage, ErrorResponse> {
let posts_url = cohost_posts_api_url(&project_id, page);
eprintln!("making request to {}", posts_url);
match client.get(posts_url).send().await {
match CLIENT.get(posts_url).send().await {
Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostPostsPage>().await {
Ok(page_data) => Ok(page_data),
@ -147,35 +156,11 @@ async fn get_page_data(
}
}
#[get("/<project>/feed.rss")]
async fn syndication_rss_route(project: &str) -> Result<RssResponse, ErrorResponse> {
let mut client = get_client()?;
let project_data = get_project_data(&mut client, project).await?;
let page_data = get_full_post_data(&mut client, project).await?;
Ok(RssResponse {
inner: syndication::channel_for_posts_page(project, project_data, page_data).to_string(),
})
}
#[get("/<project>/<id>")]
async fn post_md_route(project: &str, id: u64) -> Result<MdResponse, ErrorResponse> {
let mut client = get_client()?;
let _project_data = get_project_data(&mut client, project).await?;
let post_data = get_post_from_page(&mut client, project, id).await?;
Ok(MdResponse {
inner: post_data.plain_body,
})
}
async fn get_project_data(
client: &mut Client,
project_id: &str,
) -> Result<CohostAccount, ErrorResponse> {
#[cached(time = 60, result)]
async fn get_project_data(project_id: String) -> Result<CohostAccount, ErrorResponse> {
let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project_id);
eprintln!("making request to {}", project_url);
match client.get(project_url).send().await {
match CLIENT.get(project_url).send().await {
Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostAccount>().await {
Ok(a) => Ok(a),
@ -209,15 +194,23 @@ async fn get_project_data(
}
}
fn get_client() -> Result<Client, ErrorResponse> {
match Client::builder().user_agent(user_agent()).build() {
Ok(v) => Ok(v),
Err(e) => {
let err = format!("Couldn't build a reqwest client: {:?}", e);
eprintln!("{}", err);
Err(ErrorResponse::InternalError(err))
}
}
#[get("/<project>/feed.rss")]
async fn syndication_rss_route(project: String) -> Result<RssResponse, ErrorResponse> {
let project_data = get_project_data(project.clone()).await?;
let page_data = get_full_post_data(project.clone()).await?;
Ok(RssResponse {
inner: syndication::channel_for_posts_page(project.clone(), project_data, page_data)
.to_string(),
})
}
#[get("/<project>/<id>")]
async fn post_md_route(project: String, id: u64) -> Result<MdResponse, ErrorResponse> {
let _project_data = get_project_data(project.clone()).await?;
let post_data = get_post_from_page(project.clone(), id).await?;
Ok(MdResponse {
inner: post_data.plain_body,
})
}
#[get("/.well-known/webfinger?<params..>")]
@ -232,9 +225,8 @@ async fn webfinger_route(
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
let mut client = get_client()?;
if let Some(param) = params.iter().next() {
let _project_data = get_project_data(&mut client, param.0.as_str()).await?;
let _project_data = get_project_data(param.0.clone()).await?;
Ok(Json(CohostWebfingerResource::new(
param.0.as_str(),
&ARGS.domain,