Skip to content

Pass in own list of TLDs #66

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 4 additions & 16 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,22 +1,10 @@
'use strict';
const ipRegex = require('ip-regex');
const tlds = require('tlds');
const makeUrlRegex = require('./make-url-regex');

module.exports = options => {
options = {
strict: true,
return makeUrlRegex({
tlds,
...options
};

const protocol = `(?:(?:[a-z]+:)?//)${options.strict ? '' : '?'}`;
const auth = '(?:\\S+(?::\\S*)?@)?';
const ip = ipRegex.v4().source;
const host = '(?:(?:[a-z\\u00a1-\\uffff0-9][-_]*)*[a-z\\u00a1-\\uffff0-9]+)';
const domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*';
const tld = `(?:\\.${options.strict ? '(?:[a-z\\u00a1-\\uffff]{2,})' : `(?:${tlds.sort((a, b) => b.length - a.length).join('|')})`})\\.?`;
const port = '(?::\\d{2,5})?';
const path = '(?:[/?#][^\\s"]*)?';
const regex = `(?:${protocol}|www\\.)${auth}(?:localhost|${ip}|${host}${domain}${tld})${port}${path}`;

return options.exact ? new RegExp(`(?:^${regex}$)`, 'i') : new RegExp(regex, 'ig');
});
};
24 changes: 24 additions & 0 deletions make-url-regex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
'use strict';
const ipRegex = require('ip-regex');

module.exports = options => {
options = {
strict: true,
...options
};
if (!options.strict && !options.tlds) {
throw new Error('Need to provide `tlds` option if `strict === false`');
}

const protocol = `(?:(?:[a-z]+:)?//)${options.strict ? '' : '?'}`;
const auth = '(?:\\S+(?::\\S*)?@)?';
const ip = ipRegex.v4().source;
const host = '(?:(?:[a-z\\u00a1-\\uffff0-9][-_]*)*[a-z\\u00a1-\\uffff0-9]+)';
const domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*';
const tld = `(?:\\.${options.strict ? '(?:[a-z\\u00a1-\\uffff]{2,})' : `(?:${options.tlds.sort((a, b) => b.length - a.length).join('|')})`})\\.?`;
const port = '(?::\\d{2,5})?';
const path = '(?:[/?#][^\\s"]*)?';
const regex = `(?:${protocol}|www\\.)${auth}(?:localhost|${ip}|${host}${domain}${tld})${port}${path}`;

return options.exact ? new RegExp(`(?:^${regex}$)`, 'i') : new RegExp(regex, 'ig');
};
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
},
"files": [
"index.js",
"index.d.ts"
"index.d.ts",
"make-url-regex.js"
],
"keywords": [
"regex",
Expand Down
154 changes: 147 additions & 7 deletions test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import test from 'ava';
import makeUrlRegex from './make-url-regex';
import urlRegex from '.';

test('match exact URLs', t => {
Expand Down Expand Up @@ -65,6 +66,7 @@ test('match exact URLs', t => {

for (const x of fixtures) {
t.true(urlRegex({exact: true}).test(x));
t.true(makeUrlRegex({exact: true}).test(x));
}
});

Expand All @@ -77,13 +79,15 @@ test('match URLs in text', t => {
Foo //bar.net/?q=Query with spaces
`;

t.deepEqual([
'//dolor.sit',
'http://example.com',
'http://example.com/with-path',
'https://another.example.com',
'//bar.net/?q=Query'
], fixture.match(urlRegex()));
for (const makeRegex of [urlRegex, makeUrlRegex]) {
t.deepEqual([
'//dolor.sit',
'http://example.com',
'http://example.com/with-path',
'https://another.example.com',
'//bar.net/?q=Query'
], fixture.match(makeRegex()));
}
});

test('do not match URLs', t => {
Expand Down Expand Up @@ -133,6 +137,7 @@ test('do not match URLs', t => {

for (const x of fixtures) {
t.false(urlRegex({exact: true}).test(x));
t.false(makeUrlRegex({exact: true}).test(x));
}
});

Expand Down Expand Up @@ -198,3 +203,138 @@ test('match using list of TLDs', t => {
t.true(urlRegex({exact: true, strict: false}).test(x));
}
});

test('match using explicit list of TLDs', t => {
const fixtures = [
'foo.com/blah_blah',
'foo.com/blah_blah/',
'foo.com/blah_blah_(wikipedia)',
'foo.com/blah_blah_(wikipedia)_(again)',
'www.example.com/wpstyle/?p=364',
'www.example.com/foo/?bar=baz&inga=42&quux',
'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com',
'mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'user:[email protected]:123/one/two.three?q1=a1&q2=a2#body',
'www.microsoft.xn--comindex-g03d.html.irongeek.com',
'✪df.ws/123',
'localhost/',
'userid:[email protected]:8080',
'userid:[email protected]:8080/',
'[email protected]',
'[email protected]/',
'[email protected]:8080',
'[email protected]:8080/',
'userid:[email protected]',
'userid:[email protected]/',
'142.42.1.1/',
'142.42.1.1:8080/',
'➡.ws/䨹',
'⌘.ws',
'⌘.ws/',
'foo.com/blah_(wikipedia)#cite-1',
'foo.com/blah_(wikipedia)_blah#cite-1',
'foo.com/unicode_(✪)_in_parens',
'foo.com/(something)?after=parens',
'☺.damowmow.com/',
'code.google.com/events/#&product=browser',
'j.mp',
'foo.bar/baz',
'foo.bar/?q=Test%20URL-encoded%20stuff',
'-.~_!$&\'()*+\';=:%40:80%2f::::::@example.com',
'1337.net',
'a.b-c.de',
'223.255.255.254',
'example.com?foo=bar',
'example.com#foo',
'localhost:8080',
'foo.ws',
'a.b-c.de',
'223.255.255.254',
'userid:[email protected]',
'➡.ws/䨹',
'//localhost:8080',
'//foo.ws',
'//a.b-c.de',
'//223.255.255.254',
'//userid:[email protected]',
'//➡.ws/䨹',
'www.google.com/unicorn',
'example.com.',
'example.onion',
'unicorn.education',
'//➡.onion/䨹',
'userid:[email protected]',
'-.~_!$&\'()*+\';=:%40:80%2f::::::@example.onion',
'mw1.unicorn.education/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'www.example.onion/wpstyle/?p=364'
];

for (const x of fixtures) {
t.true(makeUrlRegex(
{exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar', 'onion', 'education']}
).test(x));
}
});

test('fail if not in explicit list of TLDs', t => {
const fixtures = [
'foo.baz/blah_blah',
'foo.co.uk/blah_blah/',
'foo.biz/blah_blah_(wikipedia)',
'foo.onion/blah_blah_(wikipedia)_(again)',
'www.example.education/wpstyle/?p=364',
'www.example.biz/foo/?bar=baz&inga=42&quux',
'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.biz',
'mw1.google.biz/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg',
'user:[email protected]:123/one/two.three?q1=a1&q2=a2#body',
'www.microsoft.xn--comindex-g03d.html.irongeek.biz',
'✪df.onion/123',
'userid:[email protected]:8080',
'userid:[email protected]:8080/',
'[email protected]',
'[email protected]/',
'[email protected]:8080',
'[email protected]:8080/',
'userid:[email protected]',
'userid:[email protected]/',
'➡.onion/䨹',
'⌘.onion',
'⌘.onion/',
'foo.biz/blah_(wikipedia)#cite-1',
'foo.biz/blah_(wikipedia)_blah#cite-1',
'foo.biz/unicode_(✪)_in_parens',
'foo.biz/(something)?after=parens',
'☺.damowmow.biz/',
'code.google.biz/events/#&product=browser',
'j.onion',
'foo.baz/baz',
'foo.baz/?q=Test%20URL-encoded%20stuff',
'-.~_!$&\'()*+\';=:%40:80%2f::::::@example.biz',
'1337.biz',
'a.b-c.ly',
'example.biz?foo=bar',
'example.biz#foo',
'foo.jp',
'a.b-c.cn',
'userid:[email protected]',
'➡.uk/䨹',
'//foo.uk',
'//a.b-c.uk',
'//userid:[email protected]',
'//➡.cn/䨹',
'www.google.biz/unicorn',
'example.biz.'
];

for (const x of fixtures) {
t.false(makeUrlRegex(
{exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']}
).test(x));
}
});

test('fail for makeUrlRegex if tlds flag not present, strict false', t => {
t.throws(() => {
makeUrlRegex({exact: true, strict: false}).test('http://google.com');
}, {message: /tlds/});
});