This commit is contained in:
암냥 2026-04-15 18:19:38 +09:00
commit b12ebb725d
No known key found for this signature in database
7 changed files with 222 additions and 198 deletions

View file

@ -1,206 +1,17 @@
import { Elysia, t } from "elysia";
import config from "../config.toml";
import { S3Client } from "bun";
import {
HeadObjectCommand,
PutObjectCommand,
S3Client as AwsS3Client,
} from "@aws-sdk/client-s3";
import * as mongoose from "mongoose";
import openapi from "@elysiajs/openapi";
import { MediaUpload } from "@/models/media";
import { Tag } from "@/models/tag";
import { checkTweetData, fetchTweetData } from "./lib/tweet";
import { makeS3FileName, s3Client, uploadToS3 } from "./lib/s3";
import { normalizeQueryTags, normalizeTags } from "./lib/tag";
await mongoose.connect(config.mongodb.uri);
const mediaUploadSchema = new mongoose.Schema({
tweetId: { type: String, required: true },
tweet: { type: Object, required: true },
mediaIndex: { type: Number, required: true },
mediaUrl: { type: String, required: true },
s3Key: { type: String, required: true },
tags: { type: [String], default: [] },
author: { type: String, required: true },
}, {
timestamps: true,
});
const tagSchema = new mongoose.Schema({
name: { type: String, required: true, unique: true },
usageCount: { type: Number, default: 0 },
lastUsedAt: { type: Date, default: Date.now },
}, {
timestamps: true,
});
const MediaUpload = mongoose.models.MediaUpload || mongoose.model("MediaUpload", mediaUploadSchema);
const Tag = mongoose.models.Tag || mongoose.model("Tag", tagSchema);
const inFlightUploads = new Set<string>();
const client = new S3Client({
accessKeyId: config.s3.access_key,
secretAccessKey: config.s3.secret_key,
bucket: config.s3.bucket,
endpoint: config.s3.endpoint,
});
const awsClient = new AwsS3Client({
region: "auto",
endpoint: config.s3.endpoint,
forcePathStyle: true,
credentials: {
accessKeyId: config.s3.access_key,
secretAccessKey: config.s3.secret_key,
},
});
async function checkTweetData(url: string, selected: Array<boolean>) {
// get tweet id from url
const match = url.match(/\/status\/(\d+)/);
if (!match) {
throw new Error("Invalid tweet URL");
}
const tweetId = match[1];
// find in mongodb if there is already a record with the same tweet id and media index in selected
const existing = await MediaUpload.findOne({ tweetId, mediaIndex: { $in: selected.map((s, i) => s ? i : -1).filter(i => i >= 0) } });
if (existing) {
return true;
}
return null;
}
async function fetchTweetData(url: string) {
const apiUrl = `https://api.fxtwitter.com/${url.replace(/^https?:\/\/(www\.)?(x\.com|twitter\.com|fxtwitter\.com|fixupx\.com|vxwitter\.com)\//, "")}`;
const response = await fetch(apiUrl);
if (response.ok) {
// const dataText = await response.text();
// console.log("Raw API response:", dataText);
const data = await response.json();
return data;
} else {
throw new Error(`Failed to fetch tweet data: ${response.status} ${response.statusText}`);
}
}
function makeS3FileName(authorId: string, tweetId: string, mediaUrl: string, index: number) {
const rawName = mediaUrl.split("/").pop() || `media_${Date.now()}_${index}`;
const withoutQuery = rawName.split("?")[0]?.split("#")[0] || `media_${Date.now()}_${index}`;
const safeName = withoutQuery.replace(/[^a-zA-Z0-9._-]/g, "_");
return `twitter/${authorId}/${tweetId}/${safeName || `media_${Date.now()}_${index}`}`;
}
async function uploadToS3WithRetry(fileName: string, mediaUrl: string, maxRetry = 3) {
async function existsInS3(key: string) {
try {
return await client.exists(key);
} catch {
try {
await awsClient.send(new HeadObjectCommand({
Bucket: config.s3.bucket,
Key: key,
}));
return true;
} catch {
return false;
}
}
}
async function writeToS3(key: string, body: Uint8Array, mediaType?: string | null) {
try {
await client.write(key, body);
return;
} catch (bunWriteError) {
console.warn(`[S3 bun write failed, fallback to aws-sdk] key=${key}`, bunWriteError);
await awsClient.send(new PutObjectCommand({
Bucket: config.s3.bucket,
Key: key,
Body: body,
ContentType: mediaType ?? undefined,
}));
}
}
async function recoverByPollingExists(reason: string) {
for (let probe = 1; probe <= 4; probe++) {
await Bun.sleep(probe * 600);
try {
if (await existsInS3(fileName)) {
console.warn(`[S3 upload recovered-${reason}] key=${fileName} probe=${probe}`);
return true;
}
} catch (existsError) {
console.error(`[S3 exists probe failed] key=${fileName} probe=${probe}`, existsError);
}
}
return false;
}
let lastError: unknown;
for (let attempt = 1; attempt <= maxRetry; attempt++) {
try {
const response = await fetch(mediaUrl);
if (!response.ok) {
throw new Error(`Failed to fetch media from ${mediaUrl}: ${response.status} ${response.statusText}`);
}
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
await writeToS3(fileName, buffer, response.headers.get("content-type"));
return;
} catch (error) {
lastError = error;
console.error(`[S3 upload attempt ${attempt}/${maxRetry}] key=${fileName} url=${mediaUrl}`, error);
const errorCode =
typeof error === "object" && error !== null && "code" in error
? String((error as { code?: unknown }).code)
: "";
// Some S3 providers return UnknownError even when the object is eventually persisted.
if (errorCode === "UnknownError") {
if (await recoverByPollingExists("unknown")) {
return;
}
}
if (attempt < maxRetry) {
await Bun.sleep(attempt * 800);
}
}
}
// Final guard: do one last exists check before surfacing failure.
if (await recoverByPollingExists("final")) {
return;
}
throw lastError;
}
function normalizeTags(tags: string[]) {
const unique = new Set(
tags
.map((tag) => tag.trim())
.filter((tag) => tag.length > 0),
);
if (unique.size === 0) {
return ["미분류"];
}
return Array.from(unique);
}
function normalizeQueryTags(tags?: string | string[]) {
if (!tags) {
return [];
}
const rawTags = Array.isArray(tags) ? tags : [tags];
return normalizeTags(rawTags);
}
function buildUploadKey(url: string, selected: boolean[]) {
const match = url.match(/\/status\/(\d+)/);
const tweetId = match?.[1] ?? url;
@ -315,10 +126,10 @@ const app = new Elysia()
const fileName = makeS3FileName(tweetData.tweet.author.id, tweetData.tweet.id, url, index);
try {
if (await client.exists(fileName)) {
if (await s3Client.exists(fileName)) {
console.log(`File ${fileName} already exists in S3, skipping upload.`);
} else {
await uploadToS3WithRetry(fileName, url);
await uploadToS3(fileName, url);
console.log(`Uploaded ${fileName} to S3`);
}
@ -327,7 +138,6 @@ const app = new Elysia()
await MediaUpload.create({
type: "twitter",
tweetId: tweetData.tweet.id,
tweet: tweetWithoutMedia,
mediaIndex: index,
mediaUrl: `${config.s3.endpoint}/${config.s3.bucket}/${fileName}`,

124
apps/backend/src/lib/s3.ts Normal file
View file

@ -0,0 +1,124 @@
import { S3Client } from "bun";
import {
HeadObjectCommand,
PutObjectCommand,
S3Client as AwsS3Client,
} from "@aws-sdk/client-s3";
import config from "@/../config.toml";
const client = new S3Client({
accessKeyId: config.s3.access_key,
secretAccessKey: config.s3.secret_key,
bucket: config.s3.bucket,
endpoint: config.s3.endpoint,
});
const awsClient = new AwsS3Client({
region: "auto",
endpoint: config.s3.endpoint,
forcePathStyle: true,
credentials: {
accessKeyId: config.s3.access_key,
secretAccessKey: config.s3.secret_key,
},
});
function makeS3FileName(authorId: string, tweetId: string, mediaUrl: string, index: number) {
const rawName = mediaUrl.split("/").pop() || `media_${Date.now()}_${index}`;
const withoutQuery = rawName.split("?")[0]?.split("#")[0] || `media_${Date.now()}_${index}`;
const safeName = withoutQuery.replace(/[^a-zA-Z0-9._-]/g, "_");
return `twitter/${authorId}/${tweetId}/${safeName || `media_${Date.now()}_${index}`}`;
}
async function uploadToS3(fileName: string, mediaUrl: string, maxRetry = 3) {
async function existsInS3(key: string) {
try {
return await client.exists(key);
} catch {
try {
await awsClient.send(new HeadObjectCommand({
Bucket: config.s3.bucket,
Key: key,
}));
return true;
} catch {
return false;
}
}
}
async function writeToS3(key: string, body: Uint8Array, mediaType?: string | null) {
try {
await client.write(key, body);
return;
} catch (bunWriteError) {
console.warn(`[S3 bun write failed, fallback to aws-sdk] key=${key}`, bunWriteError);
await awsClient.send(new PutObjectCommand({
Bucket: config.s3.bucket,
Key: key,
Body: body,
ContentType: mediaType ?? undefined,
}));
}
}
async function recoverByPollingExists(reason: string) {
for (let probe = 1; probe <= 4; probe++) {
await Bun.sleep(probe * 600);
try {
if (await existsInS3(fileName)) {
console.warn(`[S3 upload recovered-${reason}] key=${fileName} probe=${probe}`);
return true;
}
} catch (existsError) {
console.error(`[S3 exists probe failed] key=${fileName} probe=${probe}`, existsError);
}
}
return false;
}
let lastError: unknown;
for (let attempt = 1; attempt <= maxRetry; attempt++) {
try {
const response = await fetch(mediaUrl);
if (!response.ok) {
throw new Error(`Failed to fetch media from ${mediaUrl}: ${response.status} ${response.statusText}`);
}
const arrayBuffer = await response.arrayBuffer();
const buffer = Buffer.from(arrayBuffer);
await writeToS3(fileName, buffer, response.headers.get("content-type"));
return;
} catch (error) {
lastError = error;
console.error(`[S3 upload attempt ${attempt}/${maxRetry}] key=${fileName} url=${mediaUrl}`, error);
const errorCode =
typeof error === "object" && error !== null && "code" in error
? String((error as { code?: unknown }).code)
: "";
// Some S3 providers return UnknownError even when the object is eventually persisted.
if (errorCode === "UnknownError") {
if (await recoverByPollingExists("unknown")) {
return;
}
}
if (attempt < maxRetry) {
await Bun.sleep(attempt * 800);
}
}
}
// Final guard: do one last exists check before surfacing failure.
if (await recoverByPollingExists("final")) {
return;
}
throw lastError;
}
export { makeS3FileName, uploadToS3, client as s3Client };

View file

@ -0,0 +1,25 @@
function normalizeTags(tags: string[]) {
const unique = new Set(
tags
.map((tag) => tag.trim())
.filter((tag) => tag.length > 0),
);
if (unique.size === 0) {
return ["미분류"];
}
return Array.from(unique);
}
function normalizeQueryTags(tags?: string | string[]) {
if (!tags) {
return [];
}
const rawTags = Array.isArray(tags) ? tags : [tags];
return normalizeTags(rawTags);
}
export { normalizeTags, normalizeQueryTags };

View file

@ -0,0 +1,31 @@
import { MediaUpload } from "@/models/media";
async function checkTweetData(url: string, selected: Array<boolean>) {
// get tweet id from url
const match = url.match(/\/status\/(\d+)/);
if (!match) {
throw new Error("Invalid tweet URL");
}
const tweetId = match[1];
// find in mongodb if there is already a record with the same tweet id and media index in selected
const existing = await MediaUpload.findOne({ "tweetData.tweet.id": tweetId, mediaIndex: { $in: selected.map((s, i) => s ? i : -1).filter(i => i >= 0) } });
if (existing) {
return true;
}
return null;
}
async function fetchTweetData(url: string) {
const apiUrl = `https://api.fxtwitter.com/${url.replace(/^https?:\/\/(www\.)?(x\.com|twitter\.com|fxtwitter\.com|fixupx\.com|vxwitter\.com)\//, "")}`;
const response = await fetch(apiUrl);
if (response.ok) {
// const dataText = await response.text();
// console.log("Raw API response:", dataText);
const data = await response.json();
return data;
} else {
throw new Error(`Failed to fetch tweet data: ${response.status} ${response.statusText}`);
}
}
export { checkTweetData, fetchTweetData };

View file

@ -0,0 +1,20 @@
import * as mongoose from "mongoose";
const mediaUploadSchema = new mongoose.Schema({
type: { type: String, required: true },
tweet: {
type: Object,
required: function (this: { type?: string }) {
return this.type === "twitter";
},
},
mediaIndex: { type: Number, required: true },
mediaUrl: { type: String, required: true },
s3Key: { type: String, required: true },
tags: { type: [String], default: [] },
author: { type: String, required: true },
}, {
timestamps: true,
});
export const MediaUpload = mongoose.model("MediaUpload", mediaUploadSchema);

View file

@ -0,0 +1,11 @@
import * as mongoose from "mongoose";
const tagSchema = new mongoose.Schema({
name: { type: String, required: true, unique: true },
usageCount: { type: Number, default: 0 },
lastUsedAt: { type: Date, default: Date.now },
}, {
timestamps: true,
});
export const Tag = mongoose.model("Tag", tagSchema);

View file

@ -28,8 +28,11 @@
"module": "ES2022", /* Specify what module code is generated. */
// "rootDir": "./", /* Specify the root folder within your source files. */
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
"baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
"paths": {
"@/*": ["./src/*"]
},
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
"types": ["bun-types"], /* Specify type package names to be included without being referenced in a source file. */