Compare commits

..

8 Commits

9 changed files with 3585 additions and 133 deletions

2
Cargo.lock generated
View File

@ -293,7 +293,7 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]] [[package]]
name = "corobel" name = "corobel"
version = "0.2.2" version = "0.4.0"
dependencies = [ dependencies = [
"atom_syndication", "atom_syndication",
"chrono", "chrono",

View File

@ -1,6 +1,6 @@
[package] [package]
name = "corobel" name = "corobel"
version = "0.2.2" version = "0.4.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -16,8 +16,10 @@ ports to use for development and deployment.
- [ ] Handle redirects - [ ] Handle redirects
- [x] RSS feeds for projects - [x] RSS feeds for projects
- [x] Index page explaining what's going on - [x] Index page explaining what's going on
- [ ] More robust parsing (defaults for all!)
- [ ] RSS feeds for tags - [ ] RSS feeds for tags
- [x] Atom Extension pagination support - [x] Atom Extension pagination support
- [x] Disable pagination and just go for it lmao
- [ ] Read More support - [ ] Read More support
- [ ] Dublin Core support - [ ] Dublin Core support
- [ ] Media Envelope support - [ ] Media Envelope support

View File

@ -0,0 +1 @@
{"nItems":0,"nPages":0,"items":[],"_links":[{"href":"/api/v1/project/vogon","rel":"project","type":"GET"},{"href":"/api/v1/project/vogon/posts?page=998","rel":"prev","type":"GET"}]}

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use serde::Deserialize; use serde::{Deserialize, Deserializer};
pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String { pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String {
format!( format!(
@ -11,7 +11,7 @@ pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String {
// Cohost doesn't give us Next links ("rel: next") for further pages, so we'll have to ALWAYS populate the rel=next field // Cohost doesn't give us Next links ("rel: next") for further pages, so we'll have to ALWAYS populate the rel=next field
#[derive(Deserialize)] #[derive(Debug, Deserialize)]
pub struct CohostPostsPage { pub struct CohostPostsPage {
#[serde(rename = "nItems")] #[serde(rename = "nItems")]
pub number_items: usize, pub number_items: usize,
@ -22,18 +22,27 @@ pub struct CohostPostsPage {
pub links: Vec<CohostPostLink>, pub links: Vec<CohostPostLink>,
} }
#[derive(Deserialize)] #[derive(Debug, Deserialize)]
pub struct CohostPost { pub struct CohostPost {
#[serde(rename = "postId")] #[serde(rename = "postId")]
pub id: u64, pub id: u64,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub headline: String, pub headline: String,
#[serde(rename = "publishedAt")] #[serde(rename = "publishedAt")]
pub published_at: DateTime<Utc>, pub published_at: DateTime<Utc>,
pub cws: Vec<String>, pub cws: Vec<String>,
pub tags: Vec<String>, pub tags: Vec<String>,
#[serde(rename = "plainTextBody")] #[serde(
rename = "plainTextBody",
deserialize_with = "deserialize_null_default",
default
)]
pub plain_body: String, pub plain_body: String,
#[serde(rename = "singlePostPageUrl")] #[serde(
rename = "singlePostPageUrl",
deserialize_with = "deserialize_null_default",
default
)]
pub url: String, pub url: String,
#[serde(rename = "postingProject")] #[serde(rename = "postingProject")]
pub poster: CohostPostingProject, pub poster: CohostPostingProject,
@ -41,26 +50,49 @@ pub struct CohostPost {
pub share_tree: Vec<CohostPost>, pub share_tree: Vec<CohostPost>,
} }
#[derive(Deserialize)] #[derive(Debug, Deserialize)]
pub struct CohostPostingProject { pub struct CohostPostingProject {
#[serde(rename = "projectId")] #[serde(rename = "projectId")]
pub id: u64, pub id: u64,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub handle: String, pub handle: String,
#[serde(rename = "displayName")] #[serde(
rename = "displayName",
deserialize_with = "deserialize_null_default",
default
)]
pub display_name: String, pub display_name: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub dek: String, pub dek: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub description: String, pub description: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub pronouns: String, pub pronouns: String,
} }
#[derive(Deserialize)] #[derive(Debug, Deserialize)]
pub struct CohostPostLink { pub struct CohostPostLink {
#[serde(deserialize_with = "deserialize_null_default", default)]
pub href: String, pub href: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub rel: String, pub rel: String,
#[serde(rename = "type")] #[serde(
rename = "type",
deserialize_with = "deserialize_null_default",
default
)]
pub t_type: String, pub t_type: String,
} }
fn deserialize_null_default<'de, D, T>(deserializer: D) -> Result<T, D::Error>
where
T: Default + Deserialize<'de>,
D: Deserializer<'de>,
{
let opt = Option::deserialize(deserializer)?;
Ok(opt.unwrap_or_default())
}
#[test] #[test]
fn test_deserialize() -> Result<(), Box<dyn std::error::Error>> { fn test_deserialize() -> Result<(), Box<dyn std::error::Error>> {
let post_page_json = include_str!("../samples/cohost/api/v1/project_posts.json"); let post_page_json = include_str!("../samples/cohost/api/v1/project_posts.json");
@ -71,3 +103,19 @@ fn test_deserialize() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(post.poster.id, 32693); assert_eq!(post.poster.id, 32693);
Ok(()) Ok(())
} }
#[test]
fn test_deserialize_weird() -> Result<(), Box<dyn std::error::Error>> {
let post_page_json = include_str!("../samples/cohost/api/v1/vogon_pathological.json");
let _post_page_actual: CohostPostsPage = serde_json::from_str(post_page_json)?;
Ok(())
}
#[test]
fn test_deserialize_empty() -> Result<(), Box<dyn std::error::Error>> {
let post_page_json = include_str!("../samples/cohost/api/v1/empty_posts_age.json");
let post_page_actual: CohostPostsPage = serde_json::from_str(post_page_json)?;
println!("{:?}", post_page_actual);
assert!(post_page_actual.items.is_empty());
Ok(())
}

View File

@ -12,7 +12,7 @@ mod cohost_posts;
mod syndication; mod syndication;
mod webfinger; mod webfinger;
use cohost_account::{CohostAccount, COHOST_ACCOUNT_API_URL}; use cohost_account::{CohostAccount, COHOST_ACCOUNT_API_URL};
use cohost_posts::{cohost_posts_api_url, CohostPostsPage}; use cohost_posts::{cohost_posts_api_url, CohostPost, CohostPostsPage};
use webfinger::CohostWebfingerResource; use webfinger::CohostWebfingerResource;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
@ -46,6 +46,12 @@ fn index() -> RawHtml<&'static str> {
RawHtml(include_str!("../static/index.html")) RawHtml(include_str!("../static/index.html"))
} }
#[derive(Responder)]
#[response(content_type = "text/markdown")]
struct MdResponse {
inner: String,
}
#[derive(Responder)] #[derive(Responder)]
#[response(content_type = "application/rss+xml")] #[response(content_type = "application/rss+xml")]
struct RssResponse { struct RssResponse {
@ -61,74 +67,63 @@ enum ErrorResponse {
InternalError(String), InternalError(String),
} }
#[get("/<project>/feed.rss?<page>")] async fn get_post_from_page(
async fn syndication_rss_route( client: &mut Client,
project: &str, project_id: &str,
page: Option<u64>, post_id: u64,
) -> Result<RssResponse, ErrorResponse> { ) -> Result<CohostPost, ErrorResponse> {
let page = page.unwrap_or(0); let mut page = 0;
let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project); loop {
let posts_url = cohost_posts_api_url(project, page); let new_page = get_page_data(client, project_id, page).await?;
if new_page.items.is_empty() {
// Once there are no posts, we're done.
return Err(ErrorResponse::NotFound(
"End of posts reached, ID not found.".into(),
));
} else {
page += 1;
if let Some(post) = new_page.items.into_iter().find(|post| post.id == post_id) {
return Ok(post);
}
}
}
}
let client = match Client::builder().user_agent(user_agent()).build() { async fn get_full_post_data(
Ok(v) => v, client: &mut Client,
Err(e) => { project_id: &str,
let err = format!("Couldn't build a reqwest client: {:?}", e); ) -> Result<CohostPostsPage, ErrorResponse> {
eprintln!("{}", err); let mut page = 0;
return Err(ErrorResponse::InternalError(err)); let mut merged_page = get_page_data(client, project_id, page).await?;
loop {
let mut new_page = get_page_data(client, project_id, page).await?;
if new_page.items.is_empty() {
// Once there are no posts, we're done.
break;
} else {
page += 1;
merged_page.number_items += new_page.number_items;
merged_page.items.append(&mut new_page.items);
} }
}; }
Ok(merged_page)
eprintln!("making request to {}", project_url); }
let project_data: CohostAccount = match client.get(project_url).send().await {
Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostAccount>().await {
Ok(a) => a,
Err(e) => {
let err = format!("Couldn't deserialize Cohost project '{}': {:?}", project, e);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
},
// TODO NORA: Handle possible redirects
s => {
let err = format!(
"Didn't receive status code 200 for Cohost project '{}'; got {:?} instead.",
project, s
);
eprintln!("{}", err);
return Err(ErrorResponse::NotFound(err));
}
},
Err(e) => {
let err = format!(
"Error making request to Cohost for project '{}': {:?}",
project, e
);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
};
async fn get_page_data(
client: &mut Client,
project_id: &str,
page: u64,
) -> Result<CohostPostsPage, ErrorResponse> {
let posts_url = cohost_posts_api_url(project_id, page);
eprintln!("making request to {}", posts_url); eprintln!("making request to {}", posts_url);
match client.get(posts_url).send().await { match client.get(posts_url).send().await {
Ok(v) => match v.status() { Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostPostsPage>().await { StatusCode::OK => match v.json::<CohostPostsPage>().await {
Ok(page_data) => { Ok(page_data) => Ok(page_data),
return Ok(RssResponse {
inner: syndication::channel_for_posts_page(
project,
page,
project_data,
page_data,
)
.to_string(),
});
}
Err(e) => { Err(e) => {
let err = format!( let err = format!(
"Couldn't deserialize Cohost posts page for '{}': {:?}", "Couldn't deserialize Cohost posts page for '{}': {:?}",
project, e project_id, e
); );
eprintln!("{}", err); eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err)); return Err(ErrorResponse::InternalError(err));
@ -144,66 +139,110 @@ async fn syndication_rss_route(
Err(e) => { Err(e) => {
let err = format!( let err = format!(
"Error making request to Cohost for posts for project '{}': {:?}", "Error making request to Cohost for posts for project '{}': {:?}",
project, e project_id, e
); );
eprintln!("{}", err); eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err)); return Err(ErrorResponse::InternalError(err));
} }
}; }
} }
#[get("/.well-known/webfinger?<params..>")] #[get("/<project>/feed.rss")]
async fn webfinger_route(params: HashMap<String, String>) -> Option<Json<CohostWebfingerResource>> { async fn syndication_rss_route(project: &str) -> Result<RssResponse, ErrorResponse> {
if params.len() != 1 { let mut client = get_client()?;
eprintln!(
"Too may or too few parameters. Expected 1, got {}", let project_data = get_project_data(&mut client, project).await?;
params.len() let page_data = get_full_post_data(&mut client, project).await?;
); Ok(RssResponse {
return None; inner: syndication::channel_for_posts_page(project, project_data, page_data).to_string(),
} })
let client = match Client::builder().user_agent(user_agent()).build() { }
Ok(v) => v,
Err(e) => { #[get("/<project>/<id>")]
let err = format!("Couldn't build a reqwest client: {:?}", e); async fn post_md_route(project: &str, id: u64) -> Result<MdResponse, ErrorResponse> {
eprintln!("{}", err); let mut client = get_client()?;
return None;
} let _project_data = get_project_data(&mut client, project).await?;
}; let post_data = get_post_from_page(&mut client, project, id).await?;
if let Some(param) = params.iter().next() { Ok(MdResponse {
let url = format!("{}{}", COHOST_ACCOUNT_API_URL, param.0); inner: post_data.plain_body,
eprintln!("making request to {}", url); })
match client.get(url).send().await { }
Ok(v) => {
match v.status() { async fn get_project_data(
client: &mut Client,
project_id: &str,
) -> Result<CohostAccount, ErrorResponse> {
let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project_id);
eprintln!("making request to {}", project_url);
match client.get(project_url).send().await {
Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostAccount>().await { StatusCode::OK => match v.json::<CohostAccount>().await {
Ok(_v) => { Ok(a) => Ok(a),
return Some(Json(CohostWebfingerResource::new(
param.0.as_str(),
&ARGS.domain,
&ARGS.base_url,
)));
}
Err(e) => { Err(e) => {
eprintln!("Couldn't deserialize Cohost project '{}': {:?}", param.0, e); let err = format!(
"Couldn't deserialize Cohost project '{}': {:?}",
project_id, e
);
eprintln!("{}", err);
Err(ErrorResponse::InternalError(err))
} }
}, },
// TODO NORA: Handle possible redirects // TODO NORA: Handle possible redirects
s => { s => {
eprintln!("Didn't receive status code 200 for Cohost project '{}'; got {:?} instead.", param.0, s); let err = format!(
return None; "Didn't receive status code 200 for Cohost project '{}'; got {:?} instead.",
} project_id, s
}
}
Err(e) => {
eprintln!(
"Error making request to Cohost for project '{}': {:?}",
param.0, e
); );
return None; eprintln!("{}", err);
Err(ErrorResponse::NotFound(err))
} }
}; },
Err(e) => {
let err = format!(
"Error making request to Cohost for project '{}': {:?}",
project_id, e
);
eprintln!("{}", err);
Err(ErrorResponse::InternalError(err))
}
}
}
fn get_client() -> Result<Client, ErrorResponse> {
match Client::builder().user_agent(user_agent()).build() {
Ok(v) => Ok(v),
Err(e) => {
let err = format!("Couldn't build a reqwest client: {:?}", e);
eprintln!("{}", err);
Err(ErrorResponse::InternalError(err))
}
}
}
#[get("/.well-known/webfinger?<params..>")]
async fn webfinger_route(
params: HashMap<String, String>,
) -> Result<Json<CohostWebfingerResource>, ErrorResponse> {
if params.len() != 1 {
let err = format!(
"Too may or too few parameters. Expected 1, got {}",
params.len()
);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
let mut client = get_client()?;
if let Some(param) = params.iter().next() {
let _project_data = get_project_data(&mut client, param.0.as_str()).await?;
Ok(Json(CohostWebfingerResource::new(
param.0.as_str(),
&ARGS.domain,
&ARGS.base_url,
)))
} else {
Err(ErrorResponse::NotFound("No project ID provided.".into()))
} }
None
} }
#[rocket::main] #[rocket::main]
@ -213,7 +252,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
let _rocket = rocket::build() let _rocket = rocket::build()
.mount( .mount(
&ARGS.base_url, &ARGS.base_url,
routes![index, webfinger_route, syndication_rss_route], routes![index, webfinger_route, syndication_rss_route, post_md_route],
) )
.ignite() .ignite()
.await? .await?

View File

@ -20,7 +20,6 @@ fn rel_link_for(rel: &str, project_name: &str, page_number: u64) -> Link {
pub fn channel_for_posts_page( pub fn channel_for_posts_page(
project_name: impl AsRef<str>, project_name: impl AsRef<str>,
page_number: u64,
project: CohostAccount, project: CohostAccount,
mut page: CohostPostsPage, mut page: CohostPostsPage,
) -> Channel { ) -> Channel {
@ -34,23 +33,13 @@ pub fn channel_for_posts_page(
env!("CARGO_CRATE_NAME"), env!("CARGO_CRATE_NAME"),
env!("CARGO_PKG_VERSION") env!("CARGO_PKG_VERSION")
))) )))
.link(format!( .link(format!("https://cohost.org/{}", project_name,));
"https://cohost.org/{}?page={}",
project_name, page_number
));
let mut atom = AtomExtensionBuilder::default(); let mut atom = AtomExtensionBuilder::default();
let mut links = vec![ let links = vec![
rel_link_for("self", project_name, page_number), rel_link_for("self", project_name, 0),
rel_link_for("first", project_name, 0), rel_link_for("first", project_name, 0),
]; ];
if page_number > 0 {
links.push(rel_link_for("previous", project_name, page_number - 1));
}
if page.number_items > 0 {
// Cohost API is wrong about the pagination so we can only guess. If there are posts, there might be more posts.
links.push(rel_link_for("next", project_name, page_number + 1));
}
atom.links(links); atom.links(links);
builder.atom_ext(Some(atom.build())); builder.atom_ext(Some(atom.build()));

View File

@ -30,6 +30,11 @@
Go to <code>/project_name/feed.rss</code> to get a feed for a project. Go to <code>/project_name/feed.rss</code> to get a feed for a project.
For example, <a href="/noracodes/feed.rss"><code>/noracodes/feed.rss</code></a> will give you the feed for my page. For example, <a href="/noracodes/feed.rss"><code>/noracodes/feed.rss</code></a> will give you the feed for my page.
</p> </p>
<p>
You can also get a particular post's original plain-text body at <code>/project_name/post_id/</code>, such as
<a href="/noracodes/169186/"><code>/noracodes/169186/</code></a>. (In a Cohost post URL, the ID is the numerical part after <code>/post/</code>.
For instance, in <code>https://cohost.org/noracodes/post/169186-october-update</code>, the ID is "169186".)
</p>
<p> <p>
Webfinger resources for accounts are provided at the Webfinger well-known URL <code>/.well-known/webfinger?project_name</code>. Webfinger resources for accounts are provided at the Webfinger well-known URL <code>/.well-known/webfinger?project_name</code>.
</p> </p>