diff --git a/apps/backend/src/index.ts b/apps/backend/src/index.ts index 446051a..c1238bf 100644 --- a/apps/backend/src/index.ts +++ b/apps/backend/src/index.ts @@ -1,206 +1,17 @@ import { Elysia, t } from "elysia"; import config from "../config.toml"; -import { S3Client } from "bun"; -import { - HeadObjectCommand, - PutObjectCommand, - S3Client as AwsS3Client, -} from "@aws-sdk/client-s3"; import * as mongoose from "mongoose"; import openapi from "@elysiajs/openapi"; +import { MediaUpload } from "@/models/media"; +import { Tag } from "@/models/tag"; +import { checkTweetData, fetchTweetData } from "./lib/tweet"; +import { makeS3FileName, s3Client, uploadToS3 } from "./lib/s3"; +import { normalizeQueryTags, normalizeTags } from "./lib/tag"; await mongoose.connect(config.mongodb.uri); -const mediaUploadSchema = new mongoose.Schema({ - tweetId: { type: String, required: true }, - tweet: { type: Object, required: true }, - mediaIndex: { type: Number, required: true }, - mediaUrl: { type: String, required: true }, - s3Key: { type: String, required: true }, - tags: { type: [String], default: [] }, - author: { type: String, required: true }, -}, { - timestamps: true, -}); - -const tagSchema = new mongoose.Schema({ - name: { type: String, required: true, unique: true }, - usageCount: { type: Number, default: 0 }, - lastUsedAt: { type: Date, default: Date.now }, -}, { - timestamps: true, -}); - -const MediaUpload = mongoose.models.MediaUpload || mongoose.model("MediaUpload", mediaUploadSchema); -const Tag = mongoose.models.Tag || mongoose.model("Tag", tagSchema); const inFlightUploads = new Set(); -const client = new S3Client({ - accessKeyId: config.s3.access_key, - secretAccessKey: config.s3.secret_key, - bucket: config.s3.bucket, - endpoint: config.s3.endpoint, -}); - -const awsClient = new AwsS3Client({ - region: "auto", - endpoint: config.s3.endpoint, - forcePathStyle: true, - credentials: { - accessKeyId: config.s3.access_key, - secretAccessKey: config.s3.secret_key, - }, -}); - -async function checkTweetData(url: string, selected: Array) { - // get tweet id from url - const match = url.match(/\/status\/(\d+)/); - if (!match) { - throw new Error("Invalid tweet URL"); - } - const tweetId = match[1]; - // find in mongodb if there is already a record with the same tweet id and media index in selected - const existing = await MediaUpload.findOne({ tweetId, mediaIndex: { $in: selected.map((s, i) => s ? i : -1).filter(i => i >= 0) } }); - if (existing) { - return true; - } - return null; -} - -async function fetchTweetData(url: string) { - const apiUrl = `https://api.fxtwitter.com/${url.replace(/^https?:\/\/(www\.)?(x\.com|twitter\.com|fxtwitter\.com|fixupx\.com|vxwitter\.com)\//, "")}`; - const response = await fetch(apiUrl); - if (response.ok) { - // const dataText = await response.text(); - // console.log("Raw API response:", dataText); - const data = await response.json(); - return data; - } else { - throw new Error(`Failed to fetch tweet data: ${response.status} ${response.statusText}`); - } -} - -function makeS3FileName(authorId: string, tweetId: string, mediaUrl: string, index: number) { - const rawName = mediaUrl.split("/").pop() || `media_${Date.now()}_${index}`; - const withoutQuery = rawName.split("?")[0]?.split("#")[0] || `media_${Date.now()}_${index}`; - const safeName = withoutQuery.replace(/[^a-zA-Z0-9._-]/g, "_"); - return `twitter/${authorId}/${tweetId}/${safeName || `media_${Date.now()}_${index}`}`; -} - -async function uploadToS3WithRetry(fileName: string, mediaUrl: string, maxRetry = 3) { - async function existsInS3(key: string) { - try { - return await client.exists(key); - } catch { - try { - await awsClient.send(new HeadObjectCommand({ - Bucket: config.s3.bucket, - Key: key, - })); - return true; - } catch { - return false; - } - } - } - - async function writeToS3(key: string, body: Uint8Array, mediaType?: string | null) { - try { - await client.write(key, body); - return; - } catch (bunWriteError) { - console.warn(`[S3 bun write failed, fallback to aws-sdk] key=${key}`, bunWriteError); - await awsClient.send(new PutObjectCommand({ - Bucket: config.s3.bucket, - Key: key, - Body: body, - ContentType: mediaType ?? undefined, - })); - } - } - - async function recoverByPollingExists(reason: string) { - for (let probe = 1; probe <= 4; probe++) { - await Bun.sleep(probe * 600); - try { - if (await existsInS3(fileName)) { - console.warn(`[S3 upload recovered-${reason}] key=${fileName} probe=${probe}`); - return true; - } - } catch (existsError) { - console.error(`[S3 exists probe failed] key=${fileName} probe=${probe}`, existsError); - } - } - - return false; - } - - let lastError: unknown; - - for (let attempt = 1; attempt <= maxRetry; attempt++) { - try { - const response = await fetch(mediaUrl); - if (!response.ok) { - throw new Error(`Failed to fetch media from ${mediaUrl}: ${response.status} ${response.statusText}`); - } - - const arrayBuffer = await response.arrayBuffer(); - const buffer = Buffer.from(arrayBuffer); - await writeToS3(fileName, buffer, response.headers.get("content-type")); - return; - } catch (error) { - lastError = error; - console.error(`[S3 upload attempt ${attempt}/${maxRetry}] key=${fileName} url=${mediaUrl}`, error); - - const errorCode = - typeof error === "object" && error !== null && "code" in error - ? String((error as { code?: unknown }).code) - : ""; - - // Some S3 providers return UnknownError even when the object is eventually persisted. - if (errorCode === "UnknownError") { - if (await recoverByPollingExists("unknown")) { - return; - } - } - - if (attempt < maxRetry) { - await Bun.sleep(attempt * 800); - } - } - } - - // Final guard: do one last exists check before surfacing failure. - if (await recoverByPollingExists("final")) { - return; - } - - throw lastError; -} - -function normalizeTags(tags: string[]) { - const unique = new Set( - tags - .map((tag) => tag.trim()) - .filter((tag) => tag.length > 0), - ); - - if (unique.size === 0) { - return ["미분류"]; - } - - return Array.from(unique); -} - -function normalizeQueryTags(tags?: string | string[]) { - if (!tags) { - return []; - } - - const rawTags = Array.isArray(tags) ? tags : [tags]; - return normalizeTags(rawTags); -} - function buildUploadKey(url: string, selected: boolean[]) { const match = url.match(/\/status\/(\d+)/); const tweetId = match?.[1] ?? url; @@ -315,10 +126,10 @@ const app = new Elysia() const fileName = makeS3FileName(tweetData.tweet.author.id, tweetData.tweet.id, url, index); try { - if (await client.exists(fileName)) { + if (await s3Client.exists(fileName)) { console.log(`File ${fileName} already exists in S3, skipping upload.`); } else { - await uploadToS3WithRetry(fileName, url); + await uploadToS3(fileName, url); console.log(`Uploaded ${fileName} to S3`); } @@ -327,7 +138,6 @@ const app = new Elysia() await MediaUpload.create({ type: "twitter", - tweetId: tweetData.tweet.id, tweet: tweetWithoutMedia, mediaIndex: index, mediaUrl: `${config.s3.endpoint}/${config.s3.bucket}/${fileName}`, diff --git a/apps/backend/src/lib/s3.ts b/apps/backend/src/lib/s3.ts new file mode 100644 index 0000000..6fbc088 --- /dev/null +++ b/apps/backend/src/lib/s3.ts @@ -0,0 +1,124 @@ +import { S3Client } from "bun"; +import { + HeadObjectCommand, + PutObjectCommand, + S3Client as AwsS3Client, +} from "@aws-sdk/client-s3"; +import config from "@/../config.toml"; + +const client = new S3Client({ + accessKeyId: config.s3.access_key, + secretAccessKey: config.s3.secret_key, + bucket: config.s3.bucket, + endpoint: config.s3.endpoint, +}); + +const awsClient = new AwsS3Client({ + region: "auto", + endpoint: config.s3.endpoint, + forcePathStyle: true, + credentials: { + accessKeyId: config.s3.access_key, + secretAccessKey: config.s3.secret_key, + }, +}); + +function makeS3FileName(authorId: string, tweetId: string, mediaUrl: string, index: number) { + const rawName = mediaUrl.split("/").pop() || `media_${Date.now()}_${index}`; + const withoutQuery = rawName.split("?")[0]?.split("#")[0] || `media_${Date.now()}_${index}`; + const safeName = withoutQuery.replace(/[^a-zA-Z0-9._-]/g, "_"); + return `twitter/${authorId}/${tweetId}/${safeName || `media_${Date.now()}_${index}`}`; +} + +async function uploadToS3(fileName: string, mediaUrl: string, maxRetry = 3) { + async function existsInS3(key: string) { + try { + return await client.exists(key); + } catch { + try { + await awsClient.send(new HeadObjectCommand({ + Bucket: config.s3.bucket, + Key: key, + })); + return true; + } catch { + return false; + } + } + } + + async function writeToS3(key: string, body: Uint8Array, mediaType?: string | null) { + try { + await client.write(key, body); + return; + } catch (bunWriteError) { + console.warn(`[S3 bun write failed, fallback to aws-sdk] key=${key}`, bunWriteError); + await awsClient.send(new PutObjectCommand({ + Bucket: config.s3.bucket, + Key: key, + Body: body, + ContentType: mediaType ?? undefined, + })); + } + } + + async function recoverByPollingExists(reason: string) { + for (let probe = 1; probe <= 4; probe++) { + await Bun.sleep(probe * 600); + try { + if (await existsInS3(fileName)) { + console.warn(`[S3 upload recovered-${reason}] key=${fileName} probe=${probe}`); + return true; + } + } catch (existsError) { + console.error(`[S3 exists probe failed] key=${fileName} probe=${probe}`, existsError); + } + } + + return false; + } + + let lastError: unknown; + + for (let attempt = 1; attempt <= maxRetry; attempt++) { + try { + const response = await fetch(mediaUrl); + if (!response.ok) { + throw new Error(`Failed to fetch media from ${mediaUrl}: ${response.status} ${response.statusText}`); + } + + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + await writeToS3(fileName, buffer, response.headers.get("content-type")); + return; + } catch (error) { + lastError = error; + console.error(`[S3 upload attempt ${attempt}/${maxRetry}] key=${fileName} url=${mediaUrl}`, error); + + const errorCode = + typeof error === "object" && error !== null && "code" in error + ? String((error as { code?: unknown }).code) + : ""; + + // Some S3 providers return UnknownError even when the object is eventually persisted. + if (errorCode === "UnknownError") { + if (await recoverByPollingExists("unknown")) { + return; + } + } + + if (attempt < maxRetry) { + await Bun.sleep(attempt * 800); + } + } + } + + // Final guard: do one last exists check before surfacing failure. + if (await recoverByPollingExists("final")) { + return; + } + + throw lastError; +} + +export { makeS3FileName, uploadToS3, client as s3Client }; \ No newline at end of file diff --git a/apps/backend/src/lib/tag.ts b/apps/backend/src/lib/tag.ts new file mode 100644 index 0000000..9c6f02c --- /dev/null +++ b/apps/backend/src/lib/tag.ts @@ -0,0 +1,25 @@ +function normalizeTags(tags: string[]) { + const unique = new Set( + tags + .map((tag) => tag.trim()) + .filter((tag) => tag.length > 0), + ); + + if (unique.size === 0) { + return ["미분류"]; + } + + return Array.from(unique); +} + +function normalizeQueryTags(tags?: string | string[]) { + if (!tags) { + return []; + } + + const rawTags = Array.isArray(tags) ? tags : [tags]; + return normalizeTags(rawTags); +} + + +export { normalizeTags, normalizeQueryTags }; \ No newline at end of file diff --git a/apps/backend/src/lib/tweet.ts b/apps/backend/src/lib/tweet.ts new file mode 100644 index 0000000..baee3ab --- /dev/null +++ b/apps/backend/src/lib/tweet.ts @@ -0,0 +1,31 @@ +import { MediaUpload } from "@/models/media"; + +async function checkTweetData(url: string, selected: Array) { + // get tweet id from url + const match = url.match(/\/status\/(\d+)/); + if (!match) { + throw new Error("Invalid tweet URL"); + } + const tweetId = match[1]; + // find in mongodb if there is already a record with the same tweet id and media index in selected + const existing = await MediaUpload.findOne({ "tweetData.tweet.id": tweetId, mediaIndex: { $in: selected.map((s, i) => s ? i : -1).filter(i => i >= 0) } }); + if (existing) { + return true; + } + return null; +} + +async function fetchTweetData(url: string) { + const apiUrl = `https://api.fxtwitter.com/${url.replace(/^https?:\/\/(www\.)?(x\.com|twitter\.com|fxtwitter\.com|fixupx\.com|vxwitter\.com)\//, "")}`; + const response = await fetch(apiUrl); + if (response.ok) { + // const dataText = await response.text(); + // console.log("Raw API response:", dataText); + const data = await response.json(); + return data; + } else { + throw new Error(`Failed to fetch tweet data: ${response.status} ${response.statusText}`); + } +} + +export { checkTweetData, fetchTweetData }; \ No newline at end of file diff --git a/apps/backend/src/models/media.ts b/apps/backend/src/models/media.ts new file mode 100644 index 0000000..cf735cf --- /dev/null +++ b/apps/backend/src/models/media.ts @@ -0,0 +1,20 @@ +import * as mongoose from "mongoose"; + +const mediaUploadSchema = new mongoose.Schema({ + type: { type: String, required: true }, + tweet: { + type: Object, + required: function (this: { type?: string }) { + return this.type === "twitter"; + }, + }, + mediaIndex: { type: Number, required: true }, + mediaUrl: { type: String, required: true }, + s3Key: { type: String, required: true }, + tags: { type: [String], default: [] }, + author: { type: String, required: true }, +}, { + timestamps: true, +}); + +export const MediaUpload = mongoose.model("MediaUpload", mediaUploadSchema); \ No newline at end of file diff --git a/apps/backend/src/models/tag.ts b/apps/backend/src/models/tag.ts new file mode 100644 index 0000000..e6d2280 --- /dev/null +++ b/apps/backend/src/models/tag.ts @@ -0,0 +1,11 @@ +import * as mongoose from "mongoose"; + +const tagSchema = new mongoose.Schema({ + name: { type: String, required: true, unique: true }, + usageCount: { type: Number, default: 0 }, + lastUsedAt: { type: Date, default: Date.now }, +}, { + timestamps: true, +}); + +export const Tag = mongoose.model("Tag", tagSchema); \ No newline at end of file diff --git a/apps/backend/tsconfig.json b/apps/backend/tsconfig.json index 1ca2350..987cde1 100644 --- a/apps/backend/tsconfig.json +++ b/apps/backend/tsconfig.json @@ -28,8 +28,11 @@ "module": "ES2022", /* Specify what module code is generated. */ // "rootDir": "./", /* Specify the root folder within your source files. */ "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */ - // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ + "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ + "paths": { + "@/*": ["./src/*"] + }, // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */ "types": ["bun-types"], /* Specify type package names to be included without being referenced in a source file. */