Automagically fix your WP Imported Post Broken Images!
What's the big idea?
I wanted to migrate away from WordPress and try out Ghost but my migration went poorly. My old plugins had generated a mix of image tags, images with srcset, videos, weird embeddings, etc. Ghost should have handled them cleanly but it dropped the ball on nearly all of them. So I decided some scripting was necessary to fix the broken images.
One major flaw with the Ghost 6.xx import process is that it failed to find the images on my old WordPress site. That may be my fault as I had already taken it down and hosted it on my personal computer. It was available to the web and the images showed on the temporary instance, but something about that didn't work well with the Ghost import process.
My solution was to copy all of the content files from my WordPress install into the content folder of my Ghost install.
After that I ran the script below to migrate all of the various forms of images into something that works with the Lexical representation in Ghost.
This post includes a production-ready Node.js script that fixes media URLs after a WordPress → Ghost migration. It:
- Repairs image and video URLs inside Lexical HTML blocks and legacy HTML
- Strips WordPress
srcsetattributes so browsers use the correctedsrc - Fixes featured images and auto-assigns the first image as the feature image when missing
- Generates a placeholder image when no images exist
- Replaces missing referenced files with a broken-image placeholder and adds
data-old-urlso the original filename is preserved
How to use this script
- Install prerequisites on the machine that can reach your Ghost Admin API:
npm install axios glob jsonwebtoken form-data - Configure this file:
- Set
SITE_URLto your Ghost site (including https://). - Set
GHOST_ADMIN_KEYto your Admin API key (formatid:secret).
- Set
- Run the fixer (the embedded script) separately on a machine that has access to your Ghost server and the local
/var/www/html/content/imagesfolder (or adjustCONTENT_ROOTaccordingly).
Security note: keep your Admin API key secret. Do not commit it to public repositories.
The full fixer script is embedded below. Copy it into a file named ghost-media-fixer.js and run it after configuring the constants at the top.
/**
* Ghost-Safe Media URL Fixer (Lexical + HTML + Feature Images + Auto-Assign + Broken Placeholder)
* ----------------------------------------------------------------------------------------------
* Adds behavior: when a referenced filename cannot be found in content/images,
* replace the src with a temporary "broken" image (⛓️💥) and add a data-old-url
* attribute so the original filename/URL is preserved in the post content.
*
* All previous features retained:
* - Fixes lexical image nodes and html blocks
* - Fixes legacy post.html
* - Fixes and auto-assigns feature_image (absolute URL)
* - Strips srcset and <source srcset>
* - Generates placeholder images (with retry and graceful fallback)
*
* Requirements:
* npm install axios glob jsonwebtoken form-data
*/
const fs = require("fs");
const path = require("path");
const axios = require("axios");
const glob = require("glob");
const jwt = require("jsonwebtoken");
const FormData = require("form-data");
// -----------------------------
// CONFIGURATION
// -----------------------------
const SITE_URL = "https://www.yoursitenamehere.com";
const GHOST_API_URL = `${SITE_URL}/ghost/api/admin`;
const GHOST_ADMIN_KEY = "yourghost:adminkey";
const CONTENT_ROOT = "/var/www/html/content/";
const IMAGE_DIR = path.join(CONTENT_ROOT, "images");
const MEDIA_EXTENSIONS = [
".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg",
".mp4", ".mov", ".webm",
".mp3", ".wav", ".ogg"
];
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function ghostAuth() {
const [id, secret] = GHOST_ADMIN_KEY.split(":");
const token = jwt.sign(
{
exp: Math.floor(Date.now() / 1000) + 5 * 60,
aud: "/admin/"
},
Buffer.from(secret, "hex"),
{
keyid: id,
algorithm: "HS256"
}
);
return {
headers: {
Authorization: `Ghost ${token}`
}
};
}
function findMediaAnywhere(filename) {
const pattern = path.join(IMAGE_DIR, "**", filename);
const matches = glob.sync(pattern, { nocase: true });
return matches.length > 0 ? matches[0] : null;
}
function extractFilenameFromSrc(src) {
if (!src) return null;
src = src.replace(/\/{2,}/g, "/");
src = src.replace(/^https?:\/\/[^/]+/i, "");
src = src.replace(/\/blog\//gi, "");
src = src.replace(/\/lilithebowmancom-wp-site\.local\//gi, "");
src = src.replace(/\/198\.84\.234\.164\//gi, "");
return path.basename(src);
}
function isMediaFile(filename) {
const ext = path.extname(filename).toLowerCase();
return MEDIA_EXTENSIONS.includes(ext);
}
// Ensure absolute URL for feature_image
function toAbsolute(url) {
if (!url) return url;
if (url.startsWith("http://") || url.startsWith("https://")) return url;
if (!url.startsWith("/")) url = `/${url}`;
return `${SITE_URL}${url}`;
}
// Escape for HTML attributes
function escapeHtmlAttr(s) {
if (!s) return "";
return String(s)
.replace(/&/g, "&")
.replace(/"/g, """)
.replace(/'/g, "'")
.replace(/</g, "<")
.replace(/>/g, ">");
}
// Fix a single URL string (for Lexical image.src or feature_image source)
// Returns { url, changed, found } where found indicates whether file existed locally
function fixUrlString(url, postTitle) {
const filename = extractFilenameFromSrc(url);
if (!filename) return { url, changed: false, found: false };
if (!isMediaFile(filename)) return { url, changed: false, found: false };
const foundPath = findMediaAnywhere(filename);
if (!foundPath) {
console.log(`❌ Missing: ${filename} in post "${postTitle}"`);
return { url, changed: false, found: false };
}
console.log(`🔎 Found ${filename} for post "${postTitle}" at: ${foundPath}`);
let relative = foundPath
.replace(CONTENT_ROOT, "")
.replace(/\\/g, "/")
.replace(/^\/+/, "");
const newUrl = `/content/${relative}`;
if (newUrl !== url) {
console.log(` → Rewriting URL to: ${newUrl}`);
return { url: newUrl, changed: true, found: true };
}
return { url, changed: false, found: true };
}
// Fix an HTML fragment: strip srcset, fix src, remove <source srcset>
// If a referenced file is missing, replace with broken placeholder and add data-old-url
async function fixHtmlFragment(html, postTitle, brokenPlaceholderUrl) {
let changed = false;
let out = html || "";
// Strip srcset attributes
const beforeStrip = out;
out = out.replace(/\s+srcset="[^"]*"/gi, "");
out = out.replace(/\s+srcset='[^']*'/gi, "");
if (out !== beforeStrip) {
console.log(` → Stripped srcset attributes in "${postTitle}"`);
changed = true;
}
// Remove <source>
const beforeSourceStrip = out;
out = out.replace(/<source[^>]*srcset="[^"]*"[^>]*>/gi, "");
out = out.replace(/<source[^>]*srcset='[^']*'[^>]*>/gi, "");
if (out !== beforeSourceStrip) {
console.log(` → Removed <source srcset> tags in "${postTitle}"`);
changed = true;
}
// Fix src="..." URLs
const srcRegex = /(<\s*(?:img|video|audio|source)[^>]*\s)src="([^"]+)"/gi;
let match;
// We'll build replacements carefully to preserve other attributes
out = out.replace(srcRegex, (full, prefix, url) => {
const filename = extractFilenameFromSrc(url);
if (!filename || !isMediaFile(filename)) {
return `${prefix}src="${url}"`;
}
const foundPath = findMediaAnywhere(filename);
if (foundPath) {
let relative = foundPath
.replace(CONTENT_ROOT, "")
.replace(/\\/g, "/")
.replace(/^\/+/, "");
const newUrl = `/content/${relative}`;
if (newUrl !== url) {
console.log(`🔎 Found ${filename} for post "${postTitle}" at: ${foundPath}`);
console.log(` → Rewriting URL to: ${newUrl}`);
changed = true;
return `${prefix}src="${newUrl}"`;
}
return `${prefix}src="${url}"`;
} else {
// Missing file: replace with broken placeholder and add data-old-url
const placeholder = brokenPlaceholderUrl || "";
console.log(` → File missing for ${filename}, inserting broken placeholder for "${postTitle}"`);
changed = true;
const escapedOld = escapeHtmlAttr(url);
if (placeholder) {
return `${prefix}src="${placeholder}" data-old-url="${escapedOld}"`;
} else {
// fallback: keep original but add data-old-url (no visual fix)
return `${prefix}src="${url}" data-old-url="${escapedOld}"`;
}
}
});
// Fix escaped quotes
const beforeUnescape = out;
out = out.replace(/\"/g, '"');
if (out !== beforeUnescape) {
changed = true;
}
return { html: out, changed };
}
// Extract first image from Lexical
function findFirstImageInLexical(lexicalObj) {
if (!lexicalObj || !lexicalObj.root || !Array.isArray(lexicalObj.root.children)) {
return null;
}
function walk(node) {
if (!node || typeof node !== "object") return null;
// 1. Direct Lexical image node
if (node.type === "image" && typeof node.src === "string") {
return node.src;
}
// 2. HTML node containing <img>
if (node.type === "html" && typeof node.html === "string") {
const match = node.html.match(/<img[^>]*src="([^"]+)"/i);
if (match) return match[1];
}
// 3. Recurse
if (Array.isArray(node.children)) {
for (const child of node.children) {
const found = walk(child);
if (found) return found;
}
}
return null;
}
for (const child of lexicalObj.root.children) {
const found = walk(child);
if (found) return found;
}
return null;
}
// Generate placeholder ✏️ SVG and upload to Ghost (with retry, non-fatal)
async function generatePlaceholderImage() {
const svg = `
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
<rect width="100%" height="100%" fill="#f3f3f3"/>
<text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">✏️</text>
</svg>
`;
const buffer = Buffer.from(svg, "utf8");
const maxRetries = 3;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const form = new FormData();
form.append("file", buffer, {
filename: "placeholder.svg",
contentType: "image/svg+xml"
});
const res = await axios.post(
`${GHOST_API_URL}/images/upload/`,
form,
{
headers: {
...form.getHeaders(),
Authorization: ghostAuth().headers.Authorization
}
}
);
return res.data.images[0].url;
} catch (err) {
const code = err.code || err.cause?.code;
console.error(`❌ Failed to upload placeholder image (attempt ${attempt})`, code || "");
if (code === "ECONNRESET" && attempt < maxRetries) {
console.log(" → ECONNRESET during placeholder upload, retrying after delay…");
await sleep(1500);
continue;
}
if (err.response?.data) {
console.error(" → Ghost response (placeholder upload):", JSON.stringify(err.response.data, null, 2));
} else {
console.error(err);
}
break;
}
}
// Fallback: return inline data URL SVG so we always have something to show
const inline = encodeURIComponent(`
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
<rect width="100%" height="100%" fill="#fff0f0"/>
<text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">⛓️💥</text>
</svg>
`);
return `data:image/svg+xml;utf8,${inline}`;
}
// Generate broken placeholder (⛓️💥) and upload to Ghost (with retry, non-fatal)
async function generateBrokenPlaceholderImage() {
const svg = `
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
<rect width="100%" height="100%" fill="#fff0f0"/>
<text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">⛓️💥</text>
</svg>
`;
const buffer = Buffer.from(svg, "utf8");
const maxRetries = 3;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const form = new FormData();
form.append("file", buffer, {
filename: "broken-placeholder.svg",
contentType: "image/svg+xml"
});
const res = await axios.post(
`${GHOST_API_URL}/images/upload/`,
form,
{
headers: {
...form.getHeaders(),
Authorization: ghostAuth().headers.Authorization
}
}
);
return res.data.images[0].url;
} catch (err) {
const code = err.code || err.cause?.code;
console.error(`❌ Failed to upload broken placeholder (attempt ${attempt})`, code || "");
if (code === "ECONNRESET" && attempt < maxRetries) {
console.log(" → ECONNRESET during broken placeholder upload, retrying after delay…");
await sleep(1500);
continue;
}
if (err.response?.data) {
console.error(" → Ghost response (broken placeholder upload):", JSON.stringify(err.response.data, null, 2));
} else {
console.error(err);
}
break;
}
}
// Fallback inline data URL
const inline = encodeURIComponent(`
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
<rect width="100%" height="100%" fill="#fff0f0"/>
<text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">⛓️💥</text>
</svg>
`);
return `data:image/svg+xml;utf8,${inline}`;
}
// Walk Lexical tree and fix image/html nodes
// If an image node references a missing file, convert it to an HTML node with data-old-url and broken placeholder
async function fixLexicalTreeAsync(lexicalObj, postTitle, brokenPlaceholderUrl) {
if (!lexicalObj || !lexicalObj.root || !Array.isArray(lexicalObj.root.children)) {
return { lexical: lexicalObj, changed: false };
}
let changed = false;
async function walk(node, parentArray, index) {
if (!node || typeof node !== "object") return;
// Image node with src
if (node.type === "image" && typeof node.src === "string") {
const { url, changed: urlChanged, found } = fixUrlString(node.src, postTitle);
if (found && urlChanged) {
node.src = url;
changed = true;
} else if (!found) {
// missing file: convert to html node with placeholder and data-old-url
const placeholder = brokenPlaceholderUrl || "";
const escapedOld = escapeHtmlAttr(node.src);
const alt = escapeHtmlAttr(node.alt || "");
const caption = escapeHtmlAttr(node.caption || "");
const imgHtml = placeholder
? `<img src="${placeholder}" data-old-url="${escapedOld}" alt="${alt}">`
: `<img src="${escapeHtmlAttr(node.src)}" data-old-url="${escapedOld}" alt="${alt}">`;
const htmlNode = { type: "html", html: imgHtml };
// replace node in parent array
if (Array.isArray(parentArray) && typeof index === "number") {
parentArray[index] = htmlNode;
changed = true;
}
}
}
// HTML node with html string
if (node.type === "html" && typeof node.html === "string") {
const { html, changed: htmlChanged } = await fixHtmlFragment(node.html, postTitle, brokenPlaceholderUrl);
if (htmlChanged) {
node.html = html;
changed = true;
}
}
// Recurse into children if present
if (Array.isArray(node.children)) {
for (let i = 0; i < node.children.length; i++) {
await walk(node.children[i], node.children, i);
}
}
}
for (let i = 0; i < lexicalObj.root.children.length; i++) {
await walk(lexicalObj.root.children[i], lexicalObj.root.children, i);
}
return { lexical: lexicalObj, changed };
}
// -----------------------------
// FIX MEDIA IN A SINGLE POST
// -----------------------------
async function fixPostMedia(post) {
let lexicalChanged = false;
let htmlChanged = false;
let featureImageChanged = false;
// Pre-generate broken placeholder (upload once per run ideally)
const brokenPlaceholderUrl = await generateBrokenPlaceholderImage();
// 1) Fix Lexical (async walker)
let newLexicalString = post.lexical;
let lexicalObjForFirstImage = null;
if (post.lexical) {
try {
const lexicalObj = JSON.parse(post.lexical);
lexicalObjForFirstImage = lexicalObj;
const { lexical, changed } = await fixLexicalTreeAsync(lexicalObj, post.title, brokenPlaceholderUrl);
if (changed) {
newLexicalString = JSON.stringify(lexical);
lexicalChanged = true;
}
} catch (e) {
console.error(`⚠ Failed to parse lexical for "${post.title}", skipping lexical changes`);
}
}
// 2) Fix legacy HTML
let newHtml = post.html || "";
if (newHtml) {
const { html, changed } = await fixHtmlFragment(newHtml, post.title, brokenPlaceholderUrl);
if (changed) {
newHtml = html;
htmlChanged = true;
}
}
// 3) Fix feature_image if present
let newFeatureImage = post.feature_image;
if (post.feature_image) {
const { url: fixedFeature, changed } = fixUrlString(post.feature_image, post.title);
if (changed) {
newFeatureImage = toAbsolute(fixedFeature);
featureImageChanged = true;
console.log(` → Fixed feature_image for "${post.title}"`);
}
}
// 4) Auto-assign featured image if missing
if (!post.feature_image) {
console.log(` → No feature_image for "${post.title}", searching for first image…`);
let firstImage = null;
// Try Lexical first (use already-parsed object if available)
if (lexicalObjForFirstImage) {
firstImage = findFirstImageInLexical(lexicalObjForFirstImage);
} else if (post.lexical) {
try {
const lexicalObj = JSON.parse(post.lexical);
firstImage = findFirstImageInLexical(lexicalObj);
} catch (e) {}
}
if (firstImage) {
console.log(` → Found first image: ${firstImage}`);
const { url: fixed, changed } = fixUrlString(firstImage, post.title);
const finalUrl = changed ? fixed : firstImage;
newFeatureImage = toAbsolute(finalUrl);
featureImageChanged = true;
} else {
console.log(` → No images found, generating placeholder…`);
const placeholderUrl = await generatePlaceholderImage();
if (placeholderUrl) {
newFeatureImage = placeholderUrl; // already absolute from Ghost or data URL fallback
featureImageChanged = true;
}
}
}
if (!lexicalChanged && !htmlChanged && !featureImageChanged) {
return false;
}
// -----------------------------
// PUT with retry logic + updated_at
// -----------------------------
const maxRetries = 3;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const payloadPost = {
id: post.id,
updated_at: post.updated_at
};
if (lexicalChanged) payloadPost.lexical = newLexicalString;
if (htmlChanged) payloadPost.html = newHtml;
if (featureImageChanged && newFeatureImage) payloadPost.feature_image = newFeatureImage;
await axios.put(
`${GHOST_API_URL}/posts/${post.id}/`,
{ posts: [payloadPost] },
ghostAuth()
);
console.log(`✔ Updated post: ${post.title}`);
return true;
} catch (err) {
const code = err.code || err.cause?.code;
console.error(`❌ Failed to update post ${post.title} (attempt ${attempt})`, code || "");
if (code === "ECONNRESET" && attempt < maxRetries) {
console.log(" → ECONNRESET, retrying after delay…");
await sleep(1500);
continue;
}
if (err.response?.data) {
console.error(" → Ghost response:", JSON.stringify(err.response.data, null, 2));
} else {
console.error(err);
}
return false;
}
}
}
// -----------------------------
// MAIN
// -----------------------------
async function run() {
console.log("Fetching posts…");
const res = await axios.get(
`${GHOST_API_URL}/posts/?limit=all&fields=id,title,html,lexical,feature_image,updated_at&formats=html,lexical`,
ghostAuth()
);
const posts = res.data.posts;
console.log(`Found ${posts.length} posts`);
for (const post of posts) {
await fixPostMedia(post);
await sleep(200);
}
console.log("Done.");
}
run().catch(console.error);
Member discussion