9 min read

Automagically fix your WP Imported Post Broken Images!

Automagically fix your WP Imported Post Broken Images!
Photo by Dollar Gill / Unsplash

What's the big idea?

I wanted to migrate away from WordPress and try out Ghost but my migration went poorly. My old plugins had generated a mix of image tags, images with srcset, videos, weird embeddings, etc. Ghost should have handled them cleanly but it dropped the ball on nearly all of them. So I decided some scripting was necessary to fix the broken images.

One major flaw with the Ghost 6.xx import process is that it failed to find the images on my old WordPress site. That may be my fault as I had already taken it down and hosted it on my personal computer. It was available to the web and the images showed on the temporary instance, but something about that didn't work well with the Ghost import process.

My solution was to copy all of the content files from my WordPress install into the content folder of my Ghost install.

After that I ran the script below to migrate all of the various forms of images into something that works with the Lexical representation in Ghost.


This post includes a production-ready Node.js script that fixes media URLs after a WordPress → Ghost migration. It:

  • Repairs image and video URLs inside Lexical HTML blocks and legacy HTML
  • Strips WordPress srcset attributes so browsers use the corrected src
  • Fixes featured images and auto-assigns the first image as the feature image when missing
  • Generates a placeholder image when no images exist
  • Replaces missing referenced files with a broken-image placeholder and adds data-old-url so the original filename is preserved

How to use this script

  1. Install prerequisites on the machine that can reach your Ghost Admin API:
    npm install axios glob jsonwebtoken form-data
  2. Configure this file:
    • Set SITE_URL to your Ghost site (including https://).
    • Set GHOST_ADMIN_KEY to your Admin API key (format id:secret).
  3. Run the fixer (the embedded script) separately on a machine that has access to your Ghost server and the local /var/www/html/content/images folder (or adjust CONTENT_ROOT accordingly).

Security note: keep your Admin API key secret. Do not commit it to public repositories.

The full fixer script is embedded below. Copy it into a file named ghost-media-fixer.js and run it after configuring the constants at the top.

/**
 * Ghost-Safe Media URL Fixer (Lexical + HTML + Feature Images + Auto-Assign + Broken Placeholder)
 * ----------------------------------------------------------------------------------------------
 * Adds behavior: when a referenced filename cannot be found in content/images,
 * replace the src with a temporary "broken" image (⛓️‍💥) and add a data-old-url
 * attribute so the original filename/URL is preserved in the post content.
 *
 * All previous features retained:
 *  - Fixes lexical image nodes and html blocks
 *  - Fixes legacy post.html
 *  - Fixes and auto-assigns feature_image (absolute URL)
 *  - Strips srcset and <source srcset>
 *  - Generates placeholder images (with retry and graceful fallback)
 *
 * Requirements:
 *   npm install axios glob jsonwebtoken form-data
 */

const fs = require("fs");
const path = require("path");
const axios = require("axios");
const glob = require("glob");
const jwt = require("jsonwebtoken");
const FormData = require("form-data");

// -----------------------------
// CONFIGURATION
// -----------------------------
const SITE_URL = "https://www.yoursitenamehere.com";
const GHOST_API_URL = `${SITE_URL}/ghost/api/admin`;
const GHOST_ADMIN_KEY = "yourghost:adminkey";

const CONTENT_ROOT = "/var/www/html/content/";
const IMAGE_DIR = path.join(CONTENT_ROOT, "images");

const MEDIA_EXTENSIONS = [
  ".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg",
  ".mp4", ".mov", ".webm",
  ".mp3", ".wav", ".ogg"
];

function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

function ghostAuth() {
  const [id, secret] = GHOST_ADMIN_KEY.split(":");

  const token = jwt.sign(
    {
      exp: Math.floor(Date.now() / 1000) + 5 * 60,
      aud: "/admin/"
    },
    Buffer.from(secret, "hex"),
    {
      keyid: id,
      algorithm: "HS256"
    }
  );

  return {
    headers: {
      Authorization: `Ghost ${token}`
    }
  };
}

function findMediaAnywhere(filename) {
  const pattern = path.join(IMAGE_DIR, "**", filename);
  const matches = glob.sync(pattern, { nocase: true });
  return matches.length > 0 ? matches[0] : null;
}

function extractFilenameFromSrc(src) {
  if (!src) return null;

  src = src.replace(/\/{2,}/g, "/");
  src = src.replace(/^https?:\/\/[^/]+/i, "");
  src = src.replace(/\/blog\//gi, "");
  src = src.replace(/\/lilithebowmancom-wp-site\.local\//gi, "");
  src = src.replace(/\/198\.84\.234\.164\//gi, "");

  return path.basename(src);
}

function isMediaFile(filename) {
  const ext = path.extname(filename).toLowerCase();
  return MEDIA_EXTENSIONS.includes(ext);
}

// Ensure absolute URL for feature_image
function toAbsolute(url) {
  if (!url) return url;
  if (url.startsWith("http://") || url.startsWith("https://")) return url;
  if (!url.startsWith("/")) url = `/${url}`;
  return `${SITE_URL}${url}`;
}

// Escape for HTML attributes
function escapeHtmlAttr(s) {
  if (!s) return "";
  return String(s)
    .replace(/&/g, "&amp;")
    .replace(/"/g, "&quot;")
    .replace(/'/g, "&#39;")
    .replace(/</g, "&lt;")
    .replace(/>/g, "&gt;");
}

// Fix a single URL string (for Lexical image.src or feature_image source)
// Returns { url, changed, found } where found indicates whether file existed locally
function fixUrlString(url, postTitle) {
  const filename = extractFilenameFromSrc(url);
  if (!filename) return { url, changed: false, found: false };
  if (!isMediaFile(filename)) return { url, changed: false, found: false };

  const foundPath = findMediaAnywhere(filename);
  if (!foundPath) {
    console.log(`❌ Missing: ${filename} in post "${postTitle}"`);
    return { url, changed: false, found: false };
  }

  console.log(`🔎 Found ${filename} for post "${postTitle}" at: ${foundPath}`);

  let relative = foundPath
    .replace(CONTENT_ROOT, "")
    .replace(/\\/g, "/")
    .replace(/^\/+/, "");

  const newUrl = `/content/${relative}`;

  if (newUrl !== url) {
    console.log(`   → Rewriting URL to: ${newUrl}`);
    return { url: newUrl, changed: true, found: true };
  }

  return { url, changed: false, found: true };
}

// Fix an HTML fragment: strip srcset, fix src, remove <source srcset>
// If a referenced file is missing, replace with broken placeholder and add data-old-url
async function fixHtmlFragment(html, postTitle, brokenPlaceholderUrl) {
  let changed = false;
  let out = html || "";

  // Strip srcset attributes
  const beforeStrip = out;
  out = out.replace(/\s+srcset="[^"]*"/gi, "");
  out = out.replace(/\s+srcset='[^']*'/gi, "");
  if (out !== beforeStrip) {
    console.log(`   → Stripped srcset attributes in "${postTitle}"`);
    changed = true;
  }

  // Remove <source>
  const beforeSourceStrip = out;
  out = out.replace(/<source[^>]*srcset="[^"]*"[^>]*>/gi, "");
  out = out.replace(/<source[^>]*srcset='[^']*'[^>]*>/gi, "");
  if (out !== beforeSourceStrip) {
    console.log(`   → Removed <source srcset> tags in "${postTitle}"`);
    changed = true;
  }

  // Fix src="..." URLs
  const srcRegex = /(<\s*(?:img|video|audio|source)[^>]*\s)src="([^"]+)"/gi;
  let match;
  // We'll build replacements carefully to preserve other attributes
  out = out.replace(srcRegex, (full, prefix, url) => {
    const filename = extractFilenameFromSrc(url);
    if (!filename || !isMediaFile(filename)) {
      return `${prefix}src="${url}"`;
    }

    const foundPath = findMediaAnywhere(filename);
    if (foundPath) {
      let relative = foundPath
        .replace(CONTENT_ROOT, "")
        .replace(/\\/g, "/")
        .replace(/^\/+/, "");
      const newUrl = `/content/${relative}`;
      if (newUrl !== url) {
        console.log(`🔎 Found ${filename} for post "${postTitle}" at: ${foundPath}`);
        console.log(`   → Rewriting URL to: ${newUrl}`);
        changed = true;
        return `${prefix}src="${newUrl}"`;
      }
      return `${prefix}src="${url}"`;
    } else {
      // Missing file: replace with broken placeholder and add data-old-url
      const placeholder = brokenPlaceholderUrl || "";
      console.log(`   → File missing for ${filename}, inserting broken placeholder for "${postTitle}"`);
      changed = true;
      const escapedOld = escapeHtmlAttr(url);
      if (placeholder) {
        return `${prefix}src="${placeholder}" data-old-url="${escapedOld}"`;
      } else {
        // fallback: keep original but add data-old-url (no visual fix)
        return `${prefix}src="${url}" data-old-url="${escapedOld}"`;
      }
    }
  });

  // Fix escaped quotes
  const beforeUnescape = out;
  out = out.replace(/\"/g, '"');
  if (out !== beforeUnescape) {
    changed = true;
  }

  return { html: out, changed };
}

// Extract first image from Lexical
function findFirstImageInLexical(lexicalObj) {
  if (!lexicalObj || !lexicalObj.root || !Array.isArray(lexicalObj.root.children)) {
    return null;
  }

  function walk(node) {
    if (!node || typeof node !== "object") return null;

    // 1. Direct Lexical image node
    if (node.type === "image" && typeof node.src === "string") {
      return node.src;
    }

    // 2. HTML node containing <img>
    if (node.type === "html" && typeof node.html === "string") {
      const match = node.html.match(/<img[^>]*src="([^"]+)"/i);
      if (match) return match[1];
    }

    // 3. Recurse
    if (Array.isArray(node.children)) {
      for (const child of node.children) {
        const found = walk(child);
        if (found) return found;
      }
    }

    return null;
  }

  for (const child of lexicalObj.root.children) {
    const found = walk(child);
    if (found) return found;
  }

  return null;
}

// Generate placeholder ✏️ SVG and upload to Ghost (with retry, non-fatal)
async function generatePlaceholderImage() {
  const svg = `
    <svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
      <rect width="100%" height="100%" fill="#f3f3f3"/>
      <text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">✏️</text>
    </svg>
  `;

  const buffer = Buffer.from(svg, "utf8");
  const maxRetries = 3;

  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      const form = new FormData();
      form.append("file", buffer, {
        filename: "placeholder.svg",
        contentType: "image/svg+xml"
      });

      const res = await axios.post(
        `${GHOST_API_URL}/images/upload/`,
        form,
        {
          headers: {
            ...form.getHeaders(),
            Authorization: ghostAuth().headers.Authorization
          }
        }
      );

      return res.data.images[0].url;
    } catch (err) {
      const code = err.code || err.cause?.code;
      console.error(`❌ Failed to upload placeholder image (attempt ${attempt})`, code || "");

      if (code === "ECONNRESET" && attempt < maxRetries) {
        console.log("   → ECONNRESET during placeholder upload, retrying after delay…");
        await sleep(1500);
        continue;
      }

      if (err.response?.data) {
        console.error("   → Ghost response (placeholder upload):", JSON.stringify(err.response.data, null, 2));
      } else {
        console.error(err);
      }

      break;
    }
  }

  // Fallback: return inline data URL SVG so we always have something to show
  const inline = encodeURIComponent(`
    <svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
      <rect width="100%" height="100%" fill="#fff0f0"/>
      <text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">⛓️‍💥</text>
    </svg>
  `);
  return `data:image/svg+xml;utf8,${inline}`;
}

// Generate broken placeholder (⛓️‍💥) and upload to Ghost (with retry, non-fatal)
async function generateBrokenPlaceholderImage() {
  const svg = `
    <svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
      <rect width="100%" height="100%" fill="#fff0f0"/>
      <text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">⛓️‍💥</text>
    </svg>
  `;

  const buffer = Buffer.from(svg, "utf8");
  const maxRetries = 3;

  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      const form = new FormData();
      form.append("file", buffer, {
        filename: "broken-placeholder.svg",
        contentType: "image/svg+xml"
      });

      const res = await axios.post(
        `${GHOST_API_URL}/images/upload/`,
        form,
        {
          headers: {
            ...form.getHeaders(),
            Authorization: ghostAuth().headers.Authorization
          }
        }
      );

      return res.data.images[0].url;
    } catch (err) {
      const code = err.code || err.cause?.code;
      console.error(`❌ Failed to upload broken placeholder (attempt ${attempt})`, code || "");

      if (code === "ECONNRESET" && attempt < maxRetries) {
        console.log("   → ECONNRESET during broken placeholder upload, retrying after delay…");
        await sleep(1500);
        continue;
      }

      if (err.response?.data) {
        console.error("   → Ghost response (broken placeholder upload):", JSON.stringify(err.response.data, null, 2));
      } else {
        console.error(err);
      }

      break;
    }
  }

  // Fallback inline data URL
  const inline = encodeURIComponent(`
    <svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630">
      <rect width="100%" height="100%" fill="#fff0f0"/>
      <text x="50%" y="50%" font-size="120" text-anchor="middle" dy=".35em">⛓️‍💥</text>
    </svg>
  `);
  return `data:image/svg+xml;utf8,${inline}`;
}

// Walk Lexical tree and fix image/html nodes
// If an image node references a missing file, convert it to an HTML node with data-old-url and broken placeholder
async function fixLexicalTreeAsync(lexicalObj, postTitle, brokenPlaceholderUrl) {
  if (!lexicalObj || !lexicalObj.root || !Array.isArray(lexicalObj.root.children)) {
    return { lexical: lexicalObj, changed: false };
  }

  let changed = false;

  async function walk(node, parentArray, index) {
    if (!node || typeof node !== "object") return;

    // Image node with src
    if (node.type === "image" && typeof node.src === "string") {
      const { url, changed: urlChanged, found } = fixUrlString(node.src, postTitle);
      if (found && urlChanged) {
        node.src = url;
        changed = true;
      } else if (!found) {
        // missing file: convert to html node with placeholder and data-old-url
        const placeholder = brokenPlaceholderUrl || "";
        const escapedOld = escapeHtmlAttr(node.src);
        const alt = escapeHtmlAttr(node.alt || "");
        const caption = escapeHtmlAttr(node.caption || "");
        const imgHtml = placeholder
          ? `<img src="${placeholder}" data-old-url="${escapedOld}" alt="${alt}">`
          : `<img src="${escapeHtmlAttr(node.src)}" data-old-url="${escapedOld}" alt="${alt}">`;
        const htmlNode = { type: "html", html: imgHtml };
        // replace node in parent array
        if (Array.isArray(parentArray) && typeof index === "number") {
          parentArray[index] = htmlNode;
          changed = true;
        }
      }
    }

    // HTML node with html string
    if (node.type === "html" && typeof node.html === "string") {
      const { html, changed: htmlChanged } = await fixHtmlFragment(node.html, postTitle, brokenPlaceholderUrl);
      if (htmlChanged) {
        node.html = html;
        changed = true;
      }
    }

    // Recurse into children if present
    if (Array.isArray(node.children)) {
      for (let i = 0; i < node.children.length; i++) {
        await walk(node.children[i], node.children, i);
      }
    }
  }

  for (let i = 0; i < lexicalObj.root.children.length; i++) {
    await walk(lexicalObj.root.children[i], lexicalObj.root.children, i);
  }

  return { lexical: lexicalObj, changed };
}

// -----------------------------
// FIX MEDIA IN A SINGLE POST
// -----------------------------
async function fixPostMedia(post) {
  let lexicalChanged = false;
  let htmlChanged = false;
  let featureImageChanged = false;

  // Pre-generate broken placeholder (upload once per run ideally)
  const brokenPlaceholderUrl = await generateBrokenPlaceholderImage();

  // 1) Fix Lexical (async walker)
  let newLexicalString = post.lexical;
  let lexicalObjForFirstImage = null;

  if (post.lexical) {
    try {
      const lexicalObj = JSON.parse(post.lexical);
      lexicalObjForFirstImage = lexicalObj;
      const { lexical, changed } = await fixLexicalTreeAsync(lexicalObj, post.title, brokenPlaceholderUrl);
      if (changed) {
        newLexicalString = JSON.stringify(lexical);
        lexicalChanged = true;
      }
    } catch (e) {
      console.error(`⚠ Failed to parse lexical for "${post.title}", skipping lexical changes`);
    }
  }

  // 2) Fix legacy HTML
  let newHtml = post.html || "";
  if (newHtml) {
    const { html, changed } = await fixHtmlFragment(newHtml, post.title, brokenPlaceholderUrl);
    if (changed) {
      newHtml = html;
      htmlChanged = true;
    }
  }

  // 3) Fix feature_image if present
  let newFeatureImage = post.feature_image;
  if (post.feature_image) {
    const { url: fixedFeature, changed } = fixUrlString(post.feature_image, post.title);
    if (changed) {
      newFeatureImage = toAbsolute(fixedFeature);
      featureImageChanged = true;
      console.log(`   → Fixed feature_image for "${post.title}"`);
    }
  }

  // 4) Auto-assign featured image if missing
  if (!post.feature_image) {
    console.log(`   → No feature_image for "${post.title}", searching for first image…`);

    let firstImage = null;

    // Try Lexical first (use already-parsed object if available)
    if (lexicalObjForFirstImage) {
      firstImage = findFirstImageInLexical(lexicalObjForFirstImage);
    } else if (post.lexical) {
      try {
        const lexicalObj = JSON.parse(post.lexical);
        firstImage = findFirstImageInLexical(lexicalObj);
      } catch (e) {}
    }

    if (firstImage) {
      console.log(`   → Found first image: ${firstImage}`);
      const { url: fixed, changed } = fixUrlString(firstImage, post.title);
      const finalUrl = changed ? fixed : firstImage;
      newFeatureImage = toAbsolute(finalUrl);
      featureImageChanged = true;
    } else {
      console.log(`   → No images found, generating placeholder…`);
      const placeholderUrl = await generatePlaceholderImage();
      if (placeholderUrl) {
        newFeatureImage = placeholderUrl; // already absolute from Ghost or data URL fallback
        featureImageChanged = true;
      }
    }
  }

  if (!lexicalChanged && !htmlChanged && !featureImageChanged) {
    return false;
  }

  // -----------------------------
  // PUT with retry logic + updated_at
  // -----------------------------
  const maxRetries = 3;

  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      const payloadPost = {
        id: post.id,
        updated_at: post.updated_at
      };

      if (lexicalChanged) payloadPost.lexical = newLexicalString;
      if (htmlChanged) payloadPost.html = newHtml;
      if (featureImageChanged && newFeatureImage) payloadPost.feature_image = newFeatureImage;

      await axios.put(
        `${GHOST_API_URL}/posts/${post.id}/`,
        { posts: [payloadPost] },
        ghostAuth()
      );

      console.log(`✔ Updated post: ${post.title}`);
      return true;

    } catch (err) {
      const code = err.code || err.cause?.code;
      console.error(`❌ Failed to update post ${post.title} (attempt ${attempt})`, code || "");

      if (code === "ECONNRESET" && attempt < maxRetries) {
        console.log("   → ECONNRESET, retrying after delay…");
        await sleep(1500);
        continue;
      }

      if (err.response?.data) {
        console.error("   → Ghost response:", JSON.stringify(err.response.data, null, 2));
      } else {
        console.error(err);
      }

      return false;
    }
  }
}

// -----------------------------
// MAIN
// -----------------------------
async function run() {
  console.log("Fetching posts…");

  const res = await axios.get(
    `${GHOST_API_URL}/posts/?limit=all&fields=id,title,html,lexical,feature_image,updated_at&formats=html,lexical`,
    ghostAuth()
  );

  const posts = res.data.posts;
  console.log(`Found ${posts.length} posts`);

  for (const post of posts) {
    await fixPostMedia(post);
    await sleep(200);
  }

  console.log("Done.");
}

run().catch(console.error);