Compare commits
No commits in common. "6e782a72f7aefc12c9530c7ebda46292ee7f15aa" and "caa0723c1ce36daebf954970b40502862b9cccfa" have entirely different histories.
6e782a72f7
...
caa0723c1c
|
@ -1,3 +1,2 @@
|
|||
/target
|
||||
mastodon-data.toml
|
||||
http-cacache
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,12 +1,13 @@
|
|||
[package]
|
||||
name = "corobel"
|
||||
version = "0.5.0"
|
||||
version = "0.4.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.0.18", features = [ "derive" ] }
|
||||
eggbug = { version = "0.1.2", features = [ "tokio" ] }
|
||||
reqwest = "0.11.12"
|
||||
rocket = { version = "0.5.0-rc.2", features = [ "json" ] }
|
||||
serde = { version = "1.0.147", features = [ "derive" ] }
|
||||
|
@ -17,8 +18,3 @@ chrono = { version = "0.4.22", features = [ "serde" ] }
|
|||
rss = { path = "./rss/", version = "2.0.1-atom-link-fix", features = [ "builders", "atom", "chrono" ] }
|
||||
pulldown-cmark = "0.9.2"
|
||||
atom_syndication = "0.11.0"
|
||||
http-cache-reqwest = "0.5.0"
|
||||
reqwest-middleware = "0.1.6"
|
||||
cached = "0.40.0"
|
||||
mime = "0.3.16"
|
||||
mime_guess = "2.0.4"
|
||||
|
|
10
README.md
10
README.md
|
@ -16,15 +16,11 @@ ports to use for development and deployment.
|
|||
- [ ] Handle redirects
|
||||
- [x] RSS feeds for projects
|
||||
- [x] Index page explaining what's going on
|
||||
- [x] Better support for transparent shares
|
||||
- [x] Add feed without shares
|
||||
- [ ] More robust parsing (defaults for all!)
|
||||
- [ ] RSS feeds for tags
|
||||
- [x] Atom Extension pagination support
|
||||
- [x] Disable pagination
|
||||
- [x] HTTP Cacheing
|
||||
- [x] Data cacheing
|
||||
- [x] Nicer theme
|
||||
- [x] Disable pagination and just go for it lmao
|
||||
- [ ] Read More support
|
||||
- [ ] Dublin Core support
|
||||
- [x] Media Envelope support
|
||||
- [ ] Media Envelope support
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -3,7 +3,7 @@ use serde::Deserialize;
|
|||
/// The API URL from whence Cohost serves JSON project definitions
|
||||
pub const COHOST_ACCOUNT_API_URL: &str = "https://cohost.org/api/v1/project/";
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
|
||||
#[derive(Debug, Deserialize, PartialEq, Eq)]
|
||||
pub struct CohostAccount {
|
||||
#[serde(rename = "projectId")]
|
||||
pub project_id: u64,
|
||||
|
|
|
@ -11,7 +11,7 @@ pub fn cohost_posts_api_url(project: impl AsRef<str>, page: u64) -> String {
|
|||
|
||||
// Cohost doesn't give us Next links ("rel: next") for further pages, so we'll have to ALWAYS populate the rel=next field
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct CohostPostsPage {
|
||||
#[serde(rename = "nItems")]
|
||||
pub number_items: usize,
|
||||
|
@ -22,7 +22,7 @@ pub struct CohostPostsPage {
|
|||
pub links: Vec<CohostPostLink>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct CohostPost {
|
||||
#[serde(rename = "postId")]
|
||||
pub id: u64,
|
||||
|
@ -44,17 +44,13 @@ pub struct CohostPost {
|
|||
default
|
||||
)]
|
||||
pub url: String,
|
||||
#[serde(deserialize_with = "deserialize_null_default", default)]
|
||||
pub blocks: Vec<CohostPostBlock>,
|
||||
#[serde(rename = "transparentShareOfPostId")]
|
||||
pub transparent_share_of_post_id: Option<u64>,
|
||||
#[serde(rename = "postingProject")]
|
||||
pub poster: CohostPostingProject,
|
||||
#[serde(rename = "shareTree")]
|
||||
pub share_tree: Vec<CohostPost>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct CohostPostingProject {
|
||||
#[serde(rename = "projectId")]
|
||||
pub id: u64,
|
||||
|
@ -74,7 +70,7 @@ pub struct CohostPostingProject {
|
|||
pub pronouns: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct CohostPostLink {
|
||||
#[serde(deserialize_with = "deserialize_null_default", default)]
|
||||
pub href: String,
|
||||
|
@ -88,21 +84,6 @@ pub struct CohostPostLink {
|
|||
pub t_type: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct CohostPostBlock {
|
||||
pub attachment: Option<CohostPostAttachment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct CohostPostAttachment {
|
||||
#[serde(
|
||||
rename = "fileURL",
|
||||
deserialize_with = "deserialize_null_default",
|
||||
default
|
||||
)]
|
||||
pub file_url: String,
|
||||
}
|
||||
|
||||
fn deserialize_null_default<'de, D, T>(deserializer: D) -> Result<T, D::Error>
|
||||
where
|
||||
T: Default + Deserialize<'de>,
|
||||
|
|
130
src/main.rs
130
src/main.rs
|
@ -3,10 +3,7 @@ use std::collections::HashMap;
|
|||
use std::error::Error;
|
||||
#[macro_use]
|
||||
extern crate rocket;
|
||||
use cached::proc_macro::cached;
|
||||
use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache};
|
||||
use reqwest::StatusCode;
|
||||
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
|
||||
use reqwest::{Client, StatusCode};
|
||||
use rocket::response::content::RawHtml;
|
||||
use rocket::serde::json::Json;
|
||||
|
||||
|
@ -43,20 +40,6 @@ fn user_agent() -> String {
|
|||
}
|
||||
|
||||
static ARGS: once_cell::sync::Lazy<Args> = once_cell::sync::Lazy::new(|| Args::parse());
|
||||
static CLIENT: once_cell::sync::Lazy<ClientWithMiddleware> = once_cell::sync::Lazy::new(|| {
|
||||
ClientBuilder::new(
|
||||
reqwest::Client::builder()
|
||||
.user_agent(user_agent())
|
||||
.build()
|
||||
.unwrap(),
|
||||
)
|
||||
.with(Cache(HttpCache {
|
||||
mode: CacheMode::Default,
|
||||
manager: CACacheManager::default(),
|
||||
options: None,
|
||||
}))
|
||||
.build()
|
||||
});
|
||||
|
||||
#[get("/")]
|
||||
fn index() -> RawHtml<&'static str> {
|
||||
|
@ -69,13 +52,13 @@ struct MdResponse {
|
|||
inner: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Responder)]
|
||||
#[derive(Responder)]
|
||||
#[response(content_type = "application/rss+xml")]
|
||||
struct RssResponse {
|
||||
inner: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Responder)]
|
||||
#[derive(Responder)]
|
||||
#[response(content_type = "text/plain")]
|
||||
enum ErrorResponse {
|
||||
#[response(status = 404)]
|
||||
|
@ -84,11 +67,14 @@ enum ErrorResponse {
|
|||
InternalError(String),
|
||||
}
|
||||
|
||||
#[cached(time = 60, result)]
|
||||
async fn get_post_from_page(project_id: String, post_id: u64) -> Result<CohostPost, ErrorResponse> {
|
||||
async fn get_post_from_page(
|
||||
client: &mut Client,
|
||||
project_id: &str,
|
||||
post_id: u64,
|
||||
) -> Result<CohostPost, ErrorResponse> {
|
||||
let mut page = 0;
|
||||
loop {
|
||||
let new_page = get_page_data(project_id.clone(), page).await?;
|
||||
let new_page = get_page_data(client, project_id, page).await?;
|
||||
if new_page.items.is_empty() {
|
||||
// Once there are no posts, we're done.
|
||||
return Err(ErrorResponse::NotFound(
|
||||
|
@ -103,12 +89,14 @@ async fn get_post_from_page(project_id: String, post_id: u64) -> Result<CohostPo
|
|||
}
|
||||
}
|
||||
|
||||
#[cached(time = 120, result)]
|
||||
async fn get_full_post_data(project_id: String) -> Result<CohostPostsPage, ErrorResponse> {
|
||||
async fn get_full_post_data(
|
||||
client: &mut Client,
|
||||
project_id: &str,
|
||||
) -> Result<CohostPostsPage, ErrorResponse> {
|
||||
let mut page = 0;
|
||||
let mut merged_page = get_page_data(project_id.clone(), page).await?;
|
||||
let mut merged_page = get_page_data(client, project_id, page).await?;
|
||||
loop {
|
||||
let mut new_page = get_page_data(project_id.clone(), page).await?;
|
||||
let mut new_page = get_page_data(client, project_id, page).await?;
|
||||
if new_page.items.is_empty() {
|
||||
// Once there are no posts, we're done.
|
||||
break;
|
||||
|
@ -121,11 +109,14 @@ async fn get_full_post_data(project_id: String) -> Result<CohostPostsPage, Error
|
|||
Ok(merged_page)
|
||||
}
|
||||
|
||||
// Not cached because it's never used individually.
|
||||
async fn get_page_data(project_id: String, page: u64) -> Result<CohostPostsPage, ErrorResponse> {
|
||||
let posts_url = cohost_posts_api_url(&project_id, page);
|
||||
async fn get_page_data(
|
||||
client: &mut Client,
|
||||
project_id: &str,
|
||||
page: u64,
|
||||
) -> Result<CohostPostsPage, ErrorResponse> {
|
||||
let posts_url = cohost_posts_api_url(project_id, page);
|
||||
eprintln!("making request to {}", posts_url);
|
||||
match CLIENT.get(posts_url).send().await {
|
||||
match client.get(posts_url).send().await {
|
||||
Ok(v) => match v.status() {
|
||||
StatusCode::OK => match v.json::<CohostPostsPage>().await {
|
||||
Ok(page_data) => Ok(page_data),
|
||||
|
@ -156,11 +147,35 @@ async fn get_page_data(project_id: String, page: u64) -> Result<CohostPostsPage,
|
|||
}
|
||||
}
|
||||
|
||||
#[cached(time = 60, result)]
|
||||
async fn get_project_data(project_id: String) -> Result<CohostAccount, ErrorResponse> {
|
||||
#[get("/<project>/feed.rss")]
|
||||
async fn syndication_rss_route(project: &str) -> Result<RssResponse, ErrorResponse> {
|
||||
let mut client = get_client()?;
|
||||
|
||||
let project_data = get_project_data(&mut client, project).await?;
|
||||
let page_data = get_full_post_data(&mut client, project).await?;
|
||||
Ok(RssResponse {
|
||||
inner: syndication::channel_for_posts_page(project, project_data, page_data).to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
#[get("/<project>/<id>")]
|
||||
async fn post_md_route(project: &str, id: u64) -> Result<MdResponse, ErrorResponse> {
|
||||
let mut client = get_client()?;
|
||||
|
||||
let _project_data = get_project_data(&mut client, project).await?;
|
||||
let post_data = get_post_from_page(&mut client, project, id).await?;
|
||||
Ok(MdResponse {
|
||||
inner: post_data.plain_body,
|
||||
})
|
||||
}
|
||||
|
||||
async fn get_project_data(
|
||||
client: &mut Client,
|
||||
project_id: &str,
|
||||
) -> Result<CohostAccount, ErrorResponse> {
|
||||
let project_url = format!("{}{}", COHOST_ACCOUNT_API_URL, project_id);
|
||||
eprintln!("making request to {}", project_url);
|
||||
match CLIENT.get(project_url).send().await {
|
||||
match client.get(project_url).send().await {
|
||||
Ok(v) => match v.status() {
|
||||
StatusCode::OK => match v.json::<CohostAccount>().await {
|
||||
Ok(a) => Ok(a),
|
||||
|
@ -194,33 +209,15 @@ async fn get_project_data(project_id: String) -> Result<CohostAccount, ErrorResp
|
|||
}
|
||||
}
|
||||
|
||||
#[get("/<project>/originals.rss")]
|
||||
async fn syndication_originals_rss_route(project: String) -> Result<RssResponse, ErrorResponse> {
|
||||
let project_data = get_project_data(project.clone()).await?;
|
||||
let page_data = get_full_post_data(project.clone()).await?;
|
||||
Ok(RssResponse {
|
||||
inner: syndication::channel_for_posts_page(project.clone(), project_data, page_data, true)
|
||||
.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
#[get("/<project>/feed.rss")]
|
||||
async fn syndication_rss_route(project: String) -> Result<RssResponse, ErrorResponse> {
|
||||
let project_data = get_project_data(project.clone()).await?;
|
||||
let page_data = get_full_post_data(project.clone()).await?;
|
||||
Ok(RssResponse {
|
||||
inner: syndication::channel_for_posts_page(project.clone(), project_data, page_data, false)
|
||||
.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
#[get("/<project>/<id>")]
|
||||
async fn post_md_route(project: String, id: u64) -> Result<MdResponse, ErrorResponse> {
|
||||
let _project_data = get_project_data(project.clone()).await?;
|
||||
let post_data = get_post_from_page(project.clone(), id).await?;
|
||||
Ok(MdResponse {
|
||||
inner: post_data.plain_body,
|
||||
})
|
||||
fn get_client() -> Result<Client, ErrorResponse> {
|
||||
match Client::builder().user_agent(user_agent()).build() {
|
||||
Ok(v) => Ok(v),
|
||||
Err(e) => {
|
||||
let err = format!("Couldn't build a reqwest client: {:?}", e);
|
||||
eprintln!("{}", err);
|
||||
Err(ErrorResponse::InternalError(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[get("/.well-known/webfinger?<params..>")]
|
||||
|
@ -235,8 +232,9 @@ async fn webfinger_route(
|
|||
eprintln!("{}", err);
|
||||
return Err(ErrorResponse::InternalError(err));
|
||||
}
|
||||
let mut client = get_client()?;
|
||||
if let Some(param) = params.iter().next() {
|
||||
let _project_data = get_project_data(param.0.clone()).await?;
|
||||
let _project_data = get_project_data(&mut client, param.0.as_str()).await?;
|
||||
Ok(Json(CohostWebfingerResource::new(
|
||||
param.0.as_str(),
|
||||
&ARGS.domain,
|
||||
|
@ -254,13 +252,7 @@ async fn main() -> Result<(), Box<dyn Error>> {
|
|||
let _rocket = rocket::build()
|
||||
.mount(
|
||||
&ARGS.base_url,
|
||||
routes![
|
||||
index,
|
||||
webfinger_route,
|
||||
syndication_rss_route,
|
||||
syndication_originals_rss_route,
|
||||
post_md_route
|
||||
],
|
||||
routes![index, webfinger_route, syndication_rss_route, post_md_route],
|
||||
)
|
||||
.ignite()
|
||||
.await?
|
||||
|
|
|
@ -22,16 +22,11 @@ pub fn channel_for_posts_page(
|
|||
project_name: impl AsRef<str>,
|
||||
project: CohostAccount,
|
||||
mut page: CohostPostsPage,
|
||||
originals_only: bool,
|
||||
) -> Channel {
|
||||
let project_name = project_name.as_ref().clone();
|
||||
let mut builder = rss::ChannelBuilder::default();
|
||||
builder
|
||||
.title(format!(
|
||||
"{} Cohost Posts{}",
|
||||
project.display_name,
|
||||
if originals_only { "" } else { " and Shares" }
|
||||
))
|
||||
.title(format!("{} Cohost Posts", project.display_name))
|
||||
.description(project.description)
|
||||
.generator(Some(format!(
|
||||
"{} {}",
|
||||
|
@ -77,15 +72,7 @@ pub fn channel_for_posts_page(
|
|||
|
||||
let mut body_text = String::new();
|
||||
|
||||
if let Some(shared_post_id) = item.transparent_share_of_post_id {
|
||||
if originals_only {
|
||||
continue;
|
||||
}
|
||||
body_text.push_str(&format!(
|
||||
"(share of post {} without any commentary)\n\n---\n\n",
|
||||
shared_post_id
|
||||
));
|
||||
} else if item.share_tree.len() == 1 {
|
||||
if item.share_tree.len() == 1 {
|
||||
body_text.push_str("(in reply to another post)\n\n---\n\n")
|
||||
} else if item.share_tree.len() > 1 {
|
||||
body_text.push_str(&format!(
|
||||
|
@ -121,21 +108,8 @@ pub fn channel_for_posts_page(
|
|||
let parser = pulldown_cmark::Parser::new_ext(&body_text, options);
|
||||
let mut html_output = String::new();
|
||||
pulldown_cmark::html::push_html(&mut html_output, parser);
|
||||
item_builder.content(html_output);
|
||||
|
||||
for attachment in item.blocks.into_iter().filter_map(|block| block.attachment) {
|
||||
use mime_guess::from_path as guess_mime_from_path;
|
||||
use rss::EnclosureBuilder;
|
||||
let enclosure = EnclosureBuilder::default()
|
||||
.mime_type(
|
||||
guess_mime_from_path(&attachment.file_url)
|
||||
.first_or_octet_stream()
|
||||
.to_string(),
|
||||
)
|
||||
.url(attachment.file_url)
|
||||
.build();
|
||||
item_builder.enclosure(enclosure);
|
||||
}
|
||||
item_builder.content(html_output);
|
||||
|
||||
items.push(item_builder.build());
|
||||
}
|
||||
|
|
|
@ -20,67 +20,28 @@
|
|||
line-height: 1.75;
|
||||
font-size: 1.25em;
|
||||
}
|
||||
h1,h2,h3,h4,h5,h6 {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
h1 {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: monospace;
|
||||
background-color: black;
|
||||
color: white;
|
||||
display: inline-block;
|
||||
padding: 0px 4px;
|
||||
border-radius: 4px;
|
||||
}
|
||||
a code {
|
||||
color: white;
|
||||
background-color: darkblue;
|
||||
}
|
||||
a:hover code {
|
||||
color: darkblue;
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>corobel</h1>
|
||||
<h2>Standard Data from Cohost Posts and Projects</h2>
|
||||
<h2>RSS feeds from Cohost pages</h2>
|
||||
<p>
|
||||
<h3>Project RSS Feeds</h3>
|
||||
Go to <code>/project_name/feed.rss</code> to get a feed for a project, or <code>/project_name/originals.rss</code> for just original posts (including shared posts with commentary).
|
||||
For example, <a href="/noracodes/feed.rss"><code>/noracodes/feed.rss</code></a> will give you the feed for my page,
|
||||
or <a href="/noracodes/original.rss"><code>/noracodes/feed.rss</code></a> for just my original posts.
|
||||
Go to <code>/project_name/feed.rss</code> to get a feed for a project.
|
||||
For example, <a href="/noracodes/feed.rss"><code>/noracodes/feed.rss</code></a> will give you the feed for my page.
|
||||
</p>
|
||||
<p>
|
||||
<h3>Markdown Extraction</h3>
|
||||
You can also get a particular post's original plain-text body at <code>/project_name/post_id/</code>, such as
|
||||
<a href="/noracodes/169186/"><code>/noracodes/169186/</code></a>. (In a Cohost post URL, the ID is the numerical part after <code>/post/</code>.
|
||||
For instance, in <code>https://cohost.org/noracodes/post/169186-october-update</code>, the ID is "169186".)
|
||||
|
||||
Or, drag this bookmarklet: <a href="javascript:(function(){const regex = /^https:\/\/cohost.org\/([a-zA-Z_\-0-9]*)\/post\/([0-9]*)-.*/;const new_loc = window.location.href.replace(regex, 'https://corobel.nora.codes/$1/$2');window.open(new_loc);})()">
|
||||
Or, drag this bookmarklet: <a href="javascript:(function(){const regex = /^https:\/\/cohost.org\/([a-zA-Z_0-9]*)\/post\/([0-9]*)-.*/;const new_loc = window.location.href.replace(regex, 'https://corobel.nora.codes/$1/$2');window.open(new_loc);})()">
|
||||
Cohost: Extract Source
|
||||
</a> to your bookmarks bar and then click on it when you're on a Cohost individual post page to download that post's source.
|
||||
</p>
|
||||
<p>
|
||||
<h3>Webfinger Resources</h3>
|
||||
Webfinger resources for accounts are provided at the Webfinger well-known URL <code>/.well-known/webfinger?project_name</code>.
|
||||
</p>
|
||||
<p>
|
||||
<h3>Technical Details</h3>
|
||||
Since 0.5.0, Corobel caches various responses to provide better service.
|
||||
<ul>
|
||||
<li>Project/account data for <b>60 seconds</b></li>
|
||||
<li>Individual posts for <b>60 seconds</b></li>
|
||||
<li>Whole RSS feeds for <b>120 seconds</b></li>
|
||||
<li>Internal HTTP responses <b>according to Cohost's settings</b></li>
|
||||
</ul>
|
||||
This means that if you update a post and then immediately request its source, you might get the old source. Just wait a few seconds.
|
||||
</p>
|
||||
<p>
|
||||
Brought to you by <a href="https://nora.codes">Leonora Tindall</a>, written in Rust with Rocket. Code is <a href="https://git.nora.codes/nora/corobel">online</a>, bug reports should go to my email nora@nora.codes.
|
||||
</p>
|
||||
|
|
Loading…
Reference in New Issue