Compare commits

...

8 Commits

9 changed files with 3585 additions and 133 deletions

2
Cargo.lock generated
View File

@ -293,7 +293,7 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "corobel"
version = "0.2.2"
version = "0.4.0"
dependencies = [
"atom_syndication",
"chrono",

View File

@ -1,6 +1,6 @@
[package]
name = "corobel"
version = "0.2.2"
version = "0.4.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -16,8 +16,10 @@ ports to use for development and deployment.
- [ ] Handle redirects
- [x] RSS feeds for projects
- [x] Index page explaining what's going on
- [ ] More robust parsing (defaults for all!)
- [ ] RSS feeds for tags
- [x] Atom Extension pagination support
- [x] Disable pagination and just go for it lmao
- [ ] Read More support
- [ ] Dublin Core support
- [ ] Media Envelope support

View File

@ -0,0 +1 @@
{"nItems":0,"nPages":0,"items":[],"_links":[{"href":"/api/v1/project/vogon","rel":"project","type":"GET"},{"href":"/api/v1/project/vogon/posts?page=998","rel":"prev","type":"GET"}]}

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
use chrono::{DateTime, Utc};
use serde::Deserialize;
use serde::{Deserialize, Deserializer};
pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String {
format!(
@ -11,7 +11,7 @@ pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String {
// Cohost doesn't give us Next links ("rel: next") for further pages, so we'll have to ALWAYS populate the rel=next field
#[derive(Deserialize)]
#[derive(Debug, Deserialize)]
pub struct CohostPostsPage {
#[serde(rename = "nItems")]
pub number_items: usize,
@ -22,18 +22,27 @@ pub struct CohostPostsPage {
pub links: Vec<CohostPostLink>,
}
#[derive(Deserialize)]
#[derive(Debug, Deserialize)]
pub struct CohostPost {
#[serde(rename = "postId")]
pub id: u64,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub headline: String,
#[serde(rename = "publishedAt")]
pub published_at: DateTime<Utc>,
pub cws: Vec<String>,
pub tags: Vec<String>,
#[serde(rename = "plainTextBody")]
#[serde(
rename = "plainTextBody",
deserialize_with = "deserialize_null_default",
default
)]
pub plain_body: String,
#[serde(rename = "singlePostPageUrl")]
#[serde(
rename = "singlePostPageUrl",
deserialize_with = "deserialize_null_default",
default
)]
pub url: String,
#[serde(rename = "postingProject")]
pub poster: CohostPostingProject,
@ -41,26 +50,49 @@ pub struct CohostPost {
pub share_tree: Vec<CohostPost>,
}
#[derive(Deserialize)]
#[derive(Debug, Deserialize)]
pub struct CohostPostingProject {
#[serde(rename = "projectId")]
pub id: u64,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub handle: String,
#[serde(rename = "displayName")]
#[serde(
rename = "displayName",
deserialize_with = "deserialize_null_default",
default
)]
pub display_name: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub dek: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub description: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub pronouns: String,
}
#[derive(Deserialize)]
#[derive(Debug, Deserialize)]
pub struct CohostPostLink {
#[serde(deserialize_with = "deserialize_null_default", default)]
pub href: String,
#[serde(deserialize_with = "deserialize_null_default", default)]
pub rel: String,
#[serde(rename = "type")]
#[serde(
rename = "type",
deserialize_with = "deserialize_null_default",
default
)]
pub t_type: String,
}
fn deserialize_null_default<'de, D, T>(deserializer: D) -> Result<T, D::Error>
where
T: Default + Deserialize<'de>,
D: Deserializer<'de>,
{
let opt = Option::deserialize(deserializer)?;
Ok(opt.unwrap_or_default())
}
#[test]
fn test_deserialize() -> Result<(), Box<dyn std::error::Error>> {
let post_page_json = include_str!("../samples/cohost/api/v1/project_posts.json");
@ -71,3 +103,19 @@ fn test_deserialize() -> Result<(), Box<dyn std::error::Error>> {
assert_eq!(post.poster.id, 32693);
Ok(())
}
#[test]
fn test_deserialize_weird() -> Result<(), Box<dyn std::error::Error>> {
let post_page_json = include_str!("../samples/cohost/api/v1/vogon_pathological.json");
let _post_page_actual: CohostPostsPage = serde_json::from_str(post_page_json)?;
Ok(())
}
#[test]
fn test_deserialize_empty() -> Result<(), Box<dyn std::error::Error>> {
let post_page_json = include_str!("../samples/cohost/api/v1/empty_posts_age.json");
let post_page_actual: CohostPostsPage = serde_json::from_str(post_page_json)?;
println!("{:?}", post_page_actual);
assert!(post_page_actual.items.is_empty());
Ok(())
}

View File

@ -12,7 +12,7 @@ mod cohost_posts;
mod syndication;
mod webfinger;
use cohost_account::{CohostAccount, COHOST_ACCOUNT_API_URL};
use cohost_posts::{cohost_posts_api_url, CohostPostsPage};
use cohost_posts::{cohost_posts_api_url, CohostPost, CohostPostsPage};
use webfinger::CohostWebfingerResource;
#[derive(Parser, Debug)]
@ -46,6 +46,12 @@ fn index() -> RawHtml<&'static str> {
RawHtml(include_str!("../static/index.html"))
}
#[derive(Responder)]
#[response(content_type = "text/markdown")]
struct MdResponse {
inner: String,
}
#[derive(Responder)]
#[response(content_type = "application/rss+xml")]
struct RssResponse {
@ -61,74 +67,63 @@ enum ErrorResponse {
InternalError(String),
}
#[get("/<project>/feed.rss?<page>")]
async fn syndication_rss_route(
project: &str,
page: Option<u64>,
) -> Result<RssResponse, ErrorResponse> {
let page = page.unwrap_or(0);
let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project);
let posts_url = cohost_posts_api_url(project, page);
let client = match Client::builder().user_agent(user_agent()).build() {
Ok(v) => v,
Err(e) => {
let err = format!("Couldn't build a reqwest client: {:?}", e);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
};
eprintln!("making request to {}", project_url);
let project_data: CohostAccount = match client.get(project_url).send().await {
Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostAccount>().await {
Ok(a) => a,
Err(e) => {
let err = format!("Couldn't deserialize Cohost project '{}': {:?}", project, e);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
},
// TODO NORA: Handle possible redirects
s => {
let err = format!(
"Didn't receive status code 200 for Cohost project '{}'; got {:?} instead.",
project, s
);
eprintln!("{}", err);
return Err(ErrorResponse::NotFound(err));
async fn get_post_from_page(
client: &mut Client,
project_id: &str,
post_id: u64,
) -> Result<CohostPost, ErrorResponse> {
let mut page = 0;
loop {
let new_page = get_page_data(client, project_id, page).await?;
if new_page.items.is_empty() {
// Once there are no posts, we're done.
return Err(ErrorResponse::NotFound(
"End of posts reached, ID not found.".into(),
));
} else {
page += 1;
if let Some(post) = new_page.items.into_iter().find(|post| post.id == post_id) {
return Ok(post);
}
},
Err(e) => {
let err = format!(
"Error making request to Cohost for project '{}': {:?}",
project, e
);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
};
}
}
async fn get_full_post_data(
client: &mut Client,
project_id: &str,
) -> Result<CohostPostsPage, ErrorResponse> {
let mut page = 0;
let mut merged_page = get_page_data(client, project_id, page).await?;
loop {
let mut new_page = get_page_data(client, project_id, page).await?;
if new_page.items.is_empty() {
// Once there are no posts, we're done.
break;
} else {
page += 1;
merged_page.number_items += new_page.number_items;
merged_page.items.append(&mut new_page.items);
}
}
Ok(merged_page)
}
async fn get_page_data(
client: &mut Client,
project_id: &str,
page: u64,
) -> Result<CohostPostsPage, ErrorResponse> {
let posts_url = cohost_posts_api_url(project_id, page);
eprintln!("making request to {}", posts_url);
match client.get(posts_url).send().await {
Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostPostsPage>().await {
Ok(page_data) => {
return Ok(RssResponse {
inner: syndication::channel_for_posts_page(
project,
page,
project_data,
page_data,
)
.to_string(),
});
}
Ok(page_data) => Ok(page_data),
Err(e) => {
let err = format!(
"Couldn't deserialize Cohost posts page for '{}': {:?}",
project, e
project_id, e
);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
@ -144,66 +139,110 @@ async fn syndication_rss_route(
Err(e) => {
let err = format!(
"Error making request to Cohost for posts for project '{}': {:?}",
project, e
project_id, e
);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
};
}
}
#[get("/.well-known/webfinger?<params..>")]
async fn webfinger_route(params: HashMap<String, String>) -> Option<Json<CohostWebfingerResource>> {
if params.len() != 1 {
eprintln!(
"Too may or too few parameters. Expected 1, got {}",
params.len()
);
return None;
#[get("/<project>/feed.rss")]
async fn syndication_rss_route(project: &str) -> Result<RssResponse, ErrorResponse> {
let mut client = get_client()?;
let project_data = get_project_data(&mut client, project).await?;
let page_data = get_full_post_data(&mut client, project).await?;
Ok(RssResponse {
inner: syndication::channel_for_posts_page(project, project_data, page_data).to_string(),
})
}
#[get("/<project>/<id>")]
async fn post_md_route(project: &str, id: u64) -> Result<MdResponse, ErrorResponse> {
let mut client = get_client()?;
let _project_data = get_project_data(&mut client, project).await?;
let post_data = get_post_from_page(&mut client, project, id).await?;
Ok(MdResponse {
inner: post_data.plain_body,
})
}
async fn get_project_data(
client: &mut Client,
project_id: &str,
) -> Result<CohostAccount, ErrorResponse> {
let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project_id);
eprintln!("making request to {}", project_url);
match client.get(project_url).send().await {
Ok(v) => match v.status() {
StatusCode::OK => match v.json::<CohostAccount>().await {
Ok(a) => Ok(a),
Err(e) => {
let err = format!(
"Couldn't deserialize Cohost project '{}': {:?}",
project_id, e
);
eprintln!("{}", err);
Err(ErrorResponse::InternalError(err))
}
},
// TODO NORA: Handle possible redirects
s => {
let err = format!(
"Didn't receive status code 200 for Cohost project '{}'; got {:?} instead.",
project_id, s
);
eprintln!("{}", err);
Err(ErrorResponse::NotFound(err))
}
},
Err(e) => {
let err = format!(
"Error making request to Cohost for project '{}': {:?}",
project_id, e
);
eprintln!("{}", err);
Err(ErrorResponse::InternalError(err))
}
}
let client = match Client::builder().user_agent(user_agent()).build() {
Ok(v) => v,
}
fn get_client() -> Result<Client, ErrorResponse> {
match Client::builder().user_agent(user_agent()).build() {
Ok(v) => Ok(v),
Err(e) => {
let err = format!("Couldn't build a reqwest client: {:?}", e);
eprintln!("{}", err);
return None;
Err(ErrorResponse::InternalError(err))
}
};
if let Some(param) = params.iter().next() {
let url = format!("{}{}", COHOST_ACCOUNT_API_URL, param.0);
eprintln!("making request to {}", url);
match client.get(url).send().await {
Ok(v) => {
match v.status() {
StatusCode::OK => match v.json::<CohostAccount>().await {
Ok(_v) => {
return Some(Json(CohostWebfingerResource::new(
param.0.as_str(),
&ARGS.domain,
&ARGS.base_url,
)));
}
Err(e) => {
eprintln!("Couldn't deserialize Cohost project '{}': {:?}", param.0, e);
}
},
// TODO NORA: Handle possible redirects
s => {
eprintln!("Didn't receive status code 200 for Cohost project '{}'; got {:?} instead.", param.0, s);
return None;
}
}
}
Err(e) => {
eprintln!(
"Error making request to Cohost for project '{}': {:?}",
param.0, e
);
return None;
}
};
}
None
}
#[get("/.well-known/webfinger?<params..>")]
async fn webfinger_route(
params: HashMap<String, String>,
) -> Result<Json<CohostWebfingerResource>, ErrorResponse> {
if params.len() != 1 {
let err = format!(
"Too may or too few parameters. Expected 1, got {}",
params.len()
);
eprintln!("{}", err);
return Err(ErrorResponse::InternalError(err));
}
let mut client = get_client()?;
if let Some(param) = params.iter().next() {
let _project_data = get_project_data(&mut client, param.0.as_str()).await?;
Ok(Json(CohostWebfingerResource::new(
param.0.as_str(),
&ARGS.domain,
&ARGS.base_url,
)))
} else {
Err(ErrorResponse::NotFound("No project ID provided.".into()))
}
}
#[rocket::main]
@ -213,7 +252,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
let _rocket = rocket::build()
.mount(
&ARGS.base_url,
routes![index, webfinger_route, syndication_rss_route],
routes![index, webfinger_route, syndication_rss_route, post_md_route],
)
.ignite()
.await?

View File

@ -20,7 +20,6 @@ fn rel_link_for(rel: &str, project_name: &str, page_number: u64) -> Link {
pub fn channel_for_posts_page(
project_name: impl AsRef<str>,
page_number: u64,
project: CohostAccount,
mut page: CohostPostsPage,
) -> Channel {
@ -34,23 +33,13 @@ pub fn channel_for_posts_page(
env!("CARGO_CRATE_NAME"),
env!("CARGO_PKG_VERSION")
)))
.link(format!(
"https://cohost.org/{}?page={}",
project_name, page_number
));
.link(format!("https://cohost.org/{}", project_name,));
let mut atom = AtomExtensionBuilder::default();
let mut links = vec![
rel_link_for("self", project_name, page_number),
let links = vec![
rel_link_for("self", project_name, 0),
rel_link_for("first", project_name, 0),
];
if page_number > 0 {
links.push(rel_link_for("previous", project_name, page_number - 1));
}
if page.number_items > 0 {
// Cohost API is wrong about the pagination so we can only guess. If there are posts, there might be more posts.
links.push(rel_link_for("next", project_name, page_number + 1));
}
atom.links(links);
builder.atom_ext(Some(atom.build()));

View File

@ -30,6 +30,11 @@
Go to <code>/project_name/feed.rss</code> to get a feed for a project.
For example, <a href="/noracodes/feed.rss"><code>/noracodes/feed.rss</code></a> will give you the feed for my page.
</p>
<p>
You can also get a particular post's original plain-text body at <code>/project_name/post_id/</code>, such as
<a href="/noracodes/169186/"><code>/noracodes/169186/</code></a>. (In a Cohost post URL, the ID is the numerical part after <code>/post/</code>.
For instance, in <code>https://cohost.org/noracodes/post/169186-october-update</code>, the ID is "169186".)
</p>
<p>
Webfinger resources for accounts are provided at the Webfinger well-known URL <code>/.well-known/webfinger?project_name</code>.
</p>