Set up a mostly-default project with BackstopJS
Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>
This commit is contained in:
commit
6f29015422
59
backstop.js
Normal file
59
backstop.js
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
const fs = require('node:fs');
|
||||||
|
const list = fs.readFileSync('./pages.json', 'utf8');
|
||||||
|
|
||||||
|
function scenarioForFile(file) {
|
||||||
|
return {
|
||||||
|
"label": file,
|
||||||
|
"cookiePath": "backstop_data/engine_scripts/cookies.json",
|
||||||
|
"url": file,
|
||||||
|
"referenceUrl": "",
|
||||||
|
"readyEvent": "",
|
||||||
|
"readySelector": "",
|
||||||
|
"delay": 0,
|
||||||
|
"hideSelectors": [],
|
||||||
|
"removeSelectors": [],
|
||||||
|
"hoverSelector": "",
|
||||||
|
"clickSelector": "",
|
||||||
|
"postInteractionWait": 0,
|
||||||
|
"selectors": [],
|
||||||
|
"selectorExpansion": true,
|
||||||
|
"expect": 0,
|
||||||
|
"misMatchThreshold" : 0.1,
|
||||||
|
"requireSameDimensions": true
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
"id": "blog_regression",
|
||||||
|
"viewports": [
|
||||||
|
{
|
||||||
|
"label": "phone",
|
||||||
|
"width": 320,
|
||||||
|
"height": 480
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"label": "tablet",
|
||||||
|
"width": 1024,
|
||||||
|
"height": 768
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"onBeforeScript": "puppet/onBefore.js",
|
||||||
|
"onReadyScript": "puppet/onReady.js",
|
||||||
|
"scenarios": list.map(scenarioForFile),
|
||||||
|
"paths": {
|
||||||
|
"bitmaps_reference": "backstop_data/bitmaps_reference",
|
||||||
|
"bitmaps_test": "backstop_data/bitmaps_test",
|
||||||
|
"engine_scripts": "backstop_data/engine_scripts",
|
||||||
|
"html_report": "backstop_data/html_report",
|
||||||
|
"ci_report": "backstop_data/ci_report"
|
||||||
|
},
|
||||||
|
"report": ["browser"],
|
||||||
|
"engine": "puppeteer",
|
||||||
|
"engineOptions": {
|
||||||
|
"args": ["--no-sandbox"]
|
||||||
|
},
|
||||||
|
"asyncCaptureLimit": 5,
|
||||||
|
"asyncCompareLimit": 50,
|
||||||
|
"debug": false,
|
||||||
|
"debugWindow": false
|
||||||
|
}
|
14
backstop_data/engine_scripts/cookies.json
Normal file
14
backstop_data/engine_scripts/cookies.json
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"domain": ".www.yourdomain.com",
|
||||||
|
"path": "/",
|
||||||
|
"name": "yourCookieName",
|
||||||
|
"value": "yourCookieValue",
|
||||||
|
"expirationDate": 1798790400,
|
||||||
|
"hostOnly": false,
|
||||||
|
"httpOnly": false,
|
||||||
|
"secure": false,
|
||||||
|
"session": false,
|
||||||
|
"sameSite": "Lax"
|
||||||
|
}
|
||||||
|
]
|
BIN
backstop_data/engine_scripts/imageStub.jpg
Normal file
BIN
backstop_data/engine_scripts/imageStub.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.8 KiB |
@ -0,0 +1,43 @@
|
|||||||
|
module.exports = async (page, scenario) => {
|
||||||
|
const hoverSelector = scenario.hoverSelectors || scenario.hoverSelector;
|
||||||
|
const clickSelector = scenario.clickSelectors || scenario.clickSelector;
|
||||||
|
const keyPressSelector = scenario.keyPressSelectors || scenario.keyPressSelector;
|
||||||
|
const scrollToSelector = scenario.scrollToSelector;
|
||||||
|
const postInteractionWait = scenario.postInteractionWait; // selector [str] | ms [int]
|
||||||
|
|
||||||
|
if (keyPressSelector) {
|
||||||
|
for (const keyPressSelectorItem of [].concat(keyPressSelector)) {
|
||||||
|
await page.waitForSelector(keyPressSelectorItem.selector);
|
||||||
|
await page.type(keyPressSelectorItem.selector, keyPressSelectorItem.keyPress);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hoverSelector) {
|
||||||
|
for (const hoverSelectorIndex of [].concat(hoverSelector)) {
|
||||||
|
await page.waitForSelector(hoverSelectorIndex);
|
||||||
|
await page.hover(hoverSelectorIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clickSelector) {
|
||||||
|
for (const clickSelectorIndex of [].concat(clickSelector)) {
|
||||||
|
await page.waitForSelector(clickSelectorIndex);
|
||||||
|
await page.click(clickSelectorIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (postInteractionWait) {
|
||||||
|
if (parseInt(postInteractionWait) > 0) {
|
||||||
|
await page.waitForTimeout(postInteractionWait);
|
||||||
|
} else {
|
||||||
|
await page.waitForSelector(postInteractionWait);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scrollToSelector) {
|
||||||
|
await page.waitForSelector(scrollToSelector);
|
||||||
|
await page.evaluate(scrollToSelector => {
|
||||||
|
document.querySelector(scrollToSelector).scrollIntoView();
|
||||||
|
}, scrollToSelector);
|
||||||
|
}
|
||||||
|
};
|
31
backstop_data/engine_scripts/playwright/interceptImages.js
Normal file
31
backstop_data/engine_scripts/playwright/interceptImages.js
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/**
|
||||||
|
* INTERCEPT IMAGES
|
||||||
|
* Listen to all requests. If a request matches IMAGE_URL_RE
|
||||||
|
* then stub the image with data from IMAGE_STUB_URL
|
||||||
|
*
|
||||||
|
* Use this in an onBefore script E.G.
|
||||||
|
```
|
||||||
|
module.exports = async function(page, scenario) {
|
||||||
|
require('./interceptImages')(page, scenario);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const IMAGE_URL_RE = /\.gif|\.jpg|\.png/i;
|
||||||
|
const IMAGE_STUB_URL = path.resolve(__dirname, '../../imageStub.jpg');
|
||||||
|
const IMAGE_DATA_BUFFER = fs.readFileSync(IMAGE_STUB_URL);
|
||||||
|
const HEADERS_STUB = {};
|
||||||
|
|
||||||
|
module.exports = async function (page, scenario) {
|
||||||
|
page.route(IMAGE_URL_RE, route => {
|
||||||
|
route.fulfill({
|
||||||
|
body: IMAGE_DATA_BUFFER,
|
||||||
|
headers: HEADERS_STUB,
|
||||||
|
status: 200
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
16
backstop_data/engine_scripts/playwright/loadCookies.js
Normal file
16
backstop_data/engine_scripts/playwright/loadCookies.js
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
module.exports = async (browserContext, scenario) => {
|
||||||
|
let cookies = [];
|
||||||
|
const cookiePath = scenario.cookiePath;
|
||||||
|
|
||||||
|
// Read Cookies from File, if exists
|
||||||
|
if (fs.existsSync(cookiePath)) {
|
||||||
|
cookies = JSON.parse(fs.readFileSync(cookiePath));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add cookies to browser
|
||||||
|
browserContext.addCookies(cookies);
|
||||||
|
|
||||||
|
console.log('Cookie state restored with:', JSON.stringify(cookies, null, 2));
|
||||||
|
};
|
3
backstop_data/engine_scripts/playwright/onBefore.js
Normal file
3
backstop_data/engine_scripts/playwright/onBefore.js
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
module.exports = async (page, scenario, viewport, isReference, browserContext) => {
|
||||||
|
await require('./loadCookies')(browserContext, scenario);
|
||||||
|
};
|
6
backstop_data/engine_scripts/playwright/onReady.js
Normal file
6
backstop_data/engine_scripts/playwright/onReady.js
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
module.exports = async (page, scenario, viewport, isReference, browserContext) => {
|
||||||
|
console.log('SCENARIO > ' + scenario.label);
|
||||||
|
await require('./clickAndHoverHelper')(page, scenario);
|
||||||
|
|
||||||
|
// add more ready handlers here...
|
||||||
|
};
|
27
backstop_data/engine_scripts/playwright/overrideCSS.js
Normal file
27
backstop_data/engine_scripts/playwright/overrideCSS.js
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/**
|
||||||
|
* OVERRIDE CSS
|
||||||
|
* Apply this CSS to the loaded page, as a way to override styles.
|
||||||
|
*
|
||||||
|
* Use this in an onReady script E.G.
|
||||||
|
```
|
||||||
|
module.exports = async function(page, scenario) {
|
||||||
|
await require('./overrideCSS')(page, scenario);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
const BACKSTOP_TEST_CSS_OVERRIDE = `
|
||||||
|
html {
|
||||||
|
background-image: none;
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
module.exports = async (page, scenario) => {
|
||||||
|
// inject arbitrary css to override styles
|
||||||
|
await page.addStyleTag({
|
||||||
|
content: BACKSTOP_TEST_CSS_OVERRIDE
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('BACKSTOP_TEST_CSS_OVERRIDE injected for: ' + scenario.label);
|
||||||
|
};
|
41
backstop_data/engine_scripts/puppet/clickAndHoverHelper.js
Normal file
41
backstop_data/engine_scripts/puppet/clickAndHoverHelper.js
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
module.exports = async (page, scenario) => {
|
||||||
|
const hoverSelector = scenario.hoverSelectors || scenario.hoverSelector;
|
||||||
|
const clickSelector = scenario.clickSelectors || scenario.clickSelector;
|
||||||
|
const keyPressSelector = scenario.keyPressSelectors || scenario.keyPressSelector;
|
||||||
|
const scrollToSelector = scenario.scrollToSelector;
|
||||||
|
const postInteractionWait = scenario.postInteractionWait; // selector [str] | ms [int]
|
||||||
|
|
||||||
|
if (keyPressSelector) {
|
||||||
|
for (const keyPressSelectorItem of [].concat(keyPressSelector)) {
|
||||||
|
await page.waitForSelector(keyPressSelectorItem.selector);
|
||||||
|
await page.type(keyPressSelectorItem.selector, keyPressSelectorItem.keyPress);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hoverSelector) {
|
||||||
|
for (const hoverSelectorIndex of [].concat(hoverSelector)) {
|
||||||
|
await page.waitForSelector(hoverSelectorIndex);
|
||||||
|
await page.hover(hoverSelectorIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clickSelector) {
|
||||||
|
for (const clickSelectorIndex of [].concat(clickSelector)) {
|
||||||
|
await page.waitForSelector(clickSelectorIndex);
|
||||||
|
await page.click(clickSelectorIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (postInteractionWait) {
|
||||||
|
await new Promise(resolve => {
|
||||||
|
setTimeout(resolve, postInteractionWait);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scrollToSelector) {
|
||||||
|
await page.waitForSelector(scrollToSelector);
|
||||||
|
await page.evaluate(scrollToSelector => {
|
||||||
|
document.querySelector(scrollToSelector).scrollIntoView();
|
||||||
|
}, scrollToSelector);
|
||||||
|
}
|
||||||
|
};
|
65
backstop_data/engine_scripts/puppet/ignoreCSP.js
Normal file
65
backstop_data/engine_scripts/puppet/ignoreCSP.js
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/**
|
||||||
|
* IGNORE CSP HEADERS
|
||||||
|
* Listen to all requests. If a request matches scenario.url
|
||||||
|
* then fetch the request again manually, strip out CSP headers
|
||||||
|
* and respond to the original request without CSP headers.
|
||||||
|
* Allows `ignoreHTTPSErrors: true` BUT... requires `debugWindow: true`
|
||||||
|
*
|
||||||
|
* see https://github.com/GoogleChrome/puppeteer/issues/1229#issuecomment-380133332
|
||||||
|
* this is the workaround until Page.setBypassCSP lands... https://github.com/GoogleChrome/puppeteer/pull/2324
|
||||||
|
*
|
||||||
|
* @param {REQUEST} request
|
||||||
|
* @return {VOID}
|
||||||
|
*
|
||||||
|
* Use this in an onBefore script E.G.
|
||||||
|
```
|
||||||
|
module.exports = async function(page, scenario) {
|
||||||
|
require('./removeCSP')(page, scenario);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
const fetch = require('node-fetch');
|
||||||
|
const https = require('https');
|
||||||
|
const agent = new https.Agent({
|
||||||
|
rejectUnauthorized: false
|
||||||
|
});
|
||||||
|
|
||||||
|
module.exports = async function (page, scenario) {
|
||||||
|
const intercept = async (request, targetUrl) => {
|
||||||
|
const requestUrl = request.url();
|
||||||
|
|
||||||
|
// FIND TARGET URL REQUEST
|
||||||
|
if (requestUrl === targetUrl) {
|
||||||
|
const cookiesList = await page.cookies(requestUrl);
|
||||||
|
const cookies = cookiesList.map(cookie => `${cookie.name}=${cookie.value}`).join('; ');
|
||||||
|
const headers = Object.assign(request.headers(), { cookie: cookies });
|
||||||
|
const options = {
|
||||||
|
headers,
|
||||||
|
body: request.postData(),
|
||||||
|
method: request.method(),
|
||||||
|
follow: 20,
|
||||||
|
agent
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await fetch(requestUrl, options);
|
||||||
|
|
||||||
|
const buffer = await result.buffer();
|
||||||
|
const cleanedHeaders = result.headers._headers || {};
|
||||||
|
cleanedHeaders['content-security-policy'] = '';
|
||||||
|
await request.respond({
|
||||||
|
body: buffer,
|
||||||
|
headers: cleanedHeaders,
|
||||||
|
status: result.status
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
request.continue();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', req => {
|
||||||
|
intercept(req, scenario.url);
|
||||||
|
});
|
||||||
|
};
|
37
backstop_data/engine_scripts/puppet/interceptImages.js
Normal file
37
backstop_data/engine_scripts/puppet/interceptImages.js
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
/**
|
||||||
|
* INTERCEPT IMAGES
|
||||||
|
* Listen to all requests. If a request matches IMAGE_URL_RE
|
||||||
|
* then stub the image with data from IMAGE_STUB_URL
|
||||||
|
*
|
||||||
|
* Use this in an onBefore script E.G.
|
||||||
|
```
|
||||||
|
module.exports = async function(page, scenario) {
|
||||||
|
require('./interceptImages')(page, scenario);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const IMAGE_URL_RE = /\.gif|\.jpg|\.png/i;
|
||||||
|
const IMAGE_STUB_URL = path.resolve(__dirname, '../imageStub.jpg');
|
||||||
|
const IMAGE_DATA_BUFFER = fs.readFileSync(IMAGE_STUB_URL);
|
||||||
|
const HEADERS_STUB = {};
|
||||||
|
|
||||||
|
module.exports = async function (page, scenario) {
|
||||||
|
const intercept = async (request, targetUrl) => {
|
||||||
|
if (IMAGE_URL_RE.test(request.url())) {
|
||||||
|
await request.respond({
|
||||||
|
body: IMAGE_DATA_BUFFER,
|
||||||
|
headers: HEADERS_STUB,
|
||||||
|
status: 200
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
request.continue();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', intercept);
|
||||||
|
};
|
33
backstop_data/engine_scripts/puppet/loadCookies.js
Normal file
33
backstop_data/engine_scripts/puppet/loadCookies.js
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
module.exports = async (page, scenario) => {
|
||||||
|
let cookies = [];
|
||||||
|
const cookiePath = scenario.cookiePath;
|
||||||
|
|
||||||
|
// READ COOKIES FROM FILE IF EXISTS
|
||||||
|
if (fs.existsSync(cookiePath)) {
|
||||||
|
cookies = JSON.parse(fs.readFileSync(cookiePath));
|
||||||
|
}
|
||||||
|
|
||||||
|
// MUNGE COOKIE DOMAIN
|
||||||
|
cookies = cookies.map(cookie => {
|
||||||
|
if (cookie.domain.startsWith('http://') || cookie.domain.startsWith('https://')) {
|
||||||
|
cookie.url = cookie.domain;
|
||||||
|
} else {
|
||||||
|
cookie.url = 'https://' + cookie.domain;
|
||||||
|
}
|
||||||
|
delete cookie.domain;
|
||||||
|
return cookie;
|
||||||
|
});
|
||||||
|
|
||||||
|
// SET COOKIES
|
||||||
|
const setCookies = async () => {
|
||||||
|
return Promise.all(
|
||||||
|
cookies.map(async (cookie) => {
|
||||||
|
await page.setCookie(cookie);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
};
|
||||||
|
await setCookies();
|
||||||
|
console.log('Cookie state restored with:', JSON.stringify(cookies, null, 2));
|
||||||
|
};
|
3
backstop_data/engine_scripts/puppet/onBefore.js
Normal file
3
backstop_data/engine_scripts/puppet/onBefore.js
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
module.exports = async (page, scenario, vp) => {
|
||||||
|
await require('./loadCookies')(page, scenario);
|
||||||
|
};
|
6
backstop_data/engine_scripts/puppet/onReady.js
Normal file
6
backstop_data/engine_scripts/puppet/onReady.js
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
module.exports = async (page, scenario, vp) => {
|
||||||
|
console.log('SCENARIO > ' + scenario.label);
|
||||||
|
await require('./clickAndHoverHelper')(page, scenario);
|
||||||
|
|
||||||
|
// add more ready handlers here...
|
||||||
|
};
|
15
backstop_data/engine_scripts/puppet/overrideCSS.js
Normal file
15
backstop_data/engine_scripts/puppet/overrideCSS.js
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
const BACKSTOP_TEST_CSS_OVERRIDE = 'html {background-image: none;}';
|
||||||
|
|
||||||
|
module.exports = async (page, scenario) => {
|
||||||
|
// inject arbitrary css to override styles
|
||||||
|
await page.evaluate(`window._styleData = '${BACKSTOP_TEST_CSS_OVERRIDE}'`);
|
||||||
|
await page.evaluate(() => {
|
||||||
|
const style = document.createElement('style');
|
||||||
|
style.type = 'text/css';
|
||||||
|
const styleNode = document.createTextNode(window._styleData);
|
||||||
|
style.appendChild(styleNode);
|
||||||
|
document.head.appendChild(style);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('BACKSTOP_TEST_CSS_OVERRIDE injected for: ' + scenario.label);
|
||||||
|
};
|
74
chatgpt-fix-root-URLs.py
Normal file
74
chatgpt-fix-root-URLs.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
import os
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
# Domains considered part of your site.
|
||||||
|
SITE_ROOT_URLS = ["https://danilafe.com/", "http://danilafe.com/"]
|
||||||
|
# The project root is the current working directory.
|
||||||
|
PROJECT_ROOT = os.getcwd()
|
||||||
|
HTML_EXTENSIONS = {".html", ".htm"}
|
||||||
|
|
||||||
|
def convert_to_relative(url, base_filepath):
|
||||||
|
"""
|
||||||
|
Convert an absolute URL (including domain-relative URLs) to a relative path
|
||||||
|
appropriate for the HTML file at base_filepath.
|
||||||
|
"""
|
||||||
|
parsed = urlparse(url)
|
||||||
|
# If the URL is already relative, return it unchanged.
|
||||||
|
if not (url.startswith("/") or any(url.startswith(root) for root in SITE_ROOT_URLS)):
|
||||||
|
return url
|
||||||
|
|
||||||
|
# If it's an absolute URL on danilafe.com, strip the domain.
|
||||||
|
for root_url in SITE_ROOT_URLS:
|
||||||
|
if url.startswith(root_url):
|
||||||
|
url = url[len(root_url):]
|
||||||
|
break
|
||||||
|
|
||||||
|
# For domain-relative URLs (starting with "/"), remove the leading slash.
|
||||||
|
if url.startswith("/"):
|
||||||
|
url = url.lstrip("/")
|
||||||
|
|
||||||
|
# Build the full filesystem path for the target resource.
|
||||||
|
target_path = os.path.normpath(os.path.join(PROJECT_ROOT, url))
|
||||||
|
base_dir = os.path.dirname(base_filepath)
|
||||||
|
# Compute the relative path from the HTML file's directory to the target.
|
||||||
|
relative_path = os.path.relpath(target_path, start=base_dir)
|
||||||
|
return relative_path.replace(os.path.sep, "/")
|
||||||
|
|
||||||
|
def process_html_file(filepath):
|
||||||
|
"""Process a single HTML file to rewrite links, unwrap <noscript> blocks, and remove preload links."""
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
soup = BeautifulSoup(f, "lxml")
|
||||||
|
|
||||||
|
# Update tags with href/src attributes.
|
||||||
|
for tag in soup.find_all(["a", "link", "script", "img"]):
|
||||||
|
attr = "href" if tag.name in ["a", "link"] else "src"
|
||||||
|
if tag.has_attr(attr):
|
||||||
|
tag[attr] = convert_to_relative(tag[attr], filepath)
|
||||||
|
|
||||||
|
# Process <noscript> blocks: update links inside them.
|
||||||
|
for noscript in soup.find_all("noscript"):
|
||||||
|
for link in noscript.find_all("link"):
|
||||||
|
if link.has_attr("href"):
|
||||||
|
link["href"] = convert_to_relative(link["href"], filepath)
|
||||||
|
|
||||||
|
# Remove all <link> elements with rel="preload"
|
||||||
|
for preload in soup.find_all("link", rel="preload"):
|
||||||
|
preload.decompose()
|
||||||
|
|
||||||
|
# "Partially evaluate" noscript: unwrap the <noscript> blocks.
|
||||||
|
for noscript in soup.find_all("noscript"):
|
||||||
|
noscript.unwrap()
|
||||||
|
|
||||||
|
with open(filepath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(str(soup))
|
||||||
|
|
||||||
|
def process_directory(directory):
|
||||||
|
"""Recursively process all HTML files in the given directory."""
|
||||||
|
for root, _, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
if os.path.splitext(file)[1].lower() in HTML_EXTENSIONS:
|
||||||
|
process_html_file(os.path.join(root, file))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
process_directory(".")
|
3
functions.bash
Normal file
3
functions.bash
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
function cleanurls { for result in $(find . -name "result-*"); do; (cd $result; python3 ../chatgpt-fix-root-URLs.py); done; }
|
||||||
|
function htmls { find -s result-$1 -name "*.html" | sed "s|^result-$1/||"; }
|
||||||
|
function commonhtmls { comm -12 <(htmls $1) <(htmls $2); }
|
2613
package-lock.json
generated
Normal file
2613
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
6
package.json
Normal file
6
package.json
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"dependencies": {
|
||||||
|
"backstop": "^1.0.1",
|
||||||
|
"backstopjs": "^6.3.25"
|
||||||
|
}
|
||||||
|
}
|
4
required.txt
Normal file
4
required.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
beautifulsoup4==4.13.3
|
||||||
|
lxml==5.3.1
|
||||||
|
soupsieve==2.6
|
||||||
|
typing_extensions==4.13.0
|
Loading…
Reference in New Issue
Block a user