Skip to content

Instantly share code, notes, and snippets.

@geedew
Forked from dperini/regex-weburl.js
Created November 5, 2013 20:26

Revisions

  1. @dperini dperini revised this gist Nov 5, 2013. 1 changed file with 24 additions and 0 deletions.
    24 changes: 24 additions & 0 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -3,6 +3,30 @@
    //
    // Author: Diego Perini
    // Updated: 2010/12/05
    // License: MIT
    //
    // Copyright (c) 2010-2013 Diego Perini (http://www.iport.it)
    //
    // Permission is hereby granted, free of charge, to any person
    // obtaining a copy of this software and associated documentation
    // files (the "Software"), to deal in the Software without
    // restriction, including without limitation the rights to use,
    // copy, modify, merge, publish, distribute, sublicense, and/or sell
    // copies of the Software, and to permit persons to whom the
    // Software is furnished to do so, subject to the following
    // conditions:
    //
    // The above copyright notice and this permission notice shall be
    // included in all copies or substantial portions of the Software.
    //
    // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
    // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
    // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
    // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
    // OTHER DEALINGS IN THE SOFTWARE.
    //
    // the regular expression composed & commented
    // could be easily tweaked for RFC compliance,
  2. @dperini dperini revised this gist Dec 9, 2010. 1 changed file with 15 additions and 2 deletions.
    17 changes: 15 additions & 2 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -23,15 +23,17 @@
    // first and last IP address of each class is considered invalid
    // (since they are broadcast/network addresses)
    //
    // - Added exclusion of private, reserved and/or local networks ranges
    //
    // Compressed one-line versions:
    //
    // Javascript version
    //
    // /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@)?(?:(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:\/[^\s]*)?$/i
    // /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:\/[^\s]*)?$/i
    //
    // PHP version
    //
    // _^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS
    // _^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS
    //
    var re_weburl = new RegExp(
    "^" +
    @@ -40,7 +42,18 @@ var re_weburl = new RegExp(
    // user:pass authentication
    "(?:\\S+(?::\\S*)?@)?" +
    "(?:" +
    // IP address exclusion
    // private & local networks
    "(?!10(?:\\.\\d{1,3}){3})" +
    "(?!127(?:\\.\\d{1,3}){3})" +
    "(?!169\\.254(?:\\.\\d{1,3}){2})" +
    "(?!192\\.168(?:\\.\\d{1,3}){2})" +
    "(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" +
    // IP address dotted notation octets
    // excludes loopback network 0.0.0.0
    // excludes reserved space >= 224.0.0.0
    // excludes network & broacast addresses
    // (first & last IP address of each class)
    "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" +
    "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" +
    "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" +
  3. @dperini dperini revised this gist Dec 6, 2010. 1 changed file with 13 additions and 13 deletions.
    26 changes: 13 additions & 13 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -27,34 +27,34 @@
    //
    // Javascript version
    //
    // /^(https?|ftp):\/\/(\S+(:\S*)?@)?(([1-9]|[1-9]\d|1\d\d|2[0-1]\d|22[0-3])(\.([0-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])){2}(\.([1-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-4]))|(([a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)(\.([a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(\.([a-z\u00a1-\uffff]{2,})))(:\d{2,5})?(\/[^\s]*)?$/i
    // /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@)?(?:(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,})))(?::\d{2,5})?(?:\/[^\s]*)?$/i
    //
    // PHP version
    //
    // _^(https?|ftp)://(\S+(:\S*)?@)?(([1-9]|[1-9]\d|1\d\d|2[0-1]\d|22[0-3])(\.([0-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])){2}(\.([1-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-4]))|(([a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(\.([a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(\.([a-z\x{00a1}-\x{ffff}]{2,})))(:\d{2,5})?(/[^\s]*)?$_iuS
    // _^(?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?$_iuS
    //
    var re_weburl = new RegExp(
    "^" +
    // protocol identifier
    "(https?|ftp)://" +
    "(?:(?:https?|ftp)://)" +
    // user:pass authentication
    "(\\S+(:\\S*)?@)?" +
    "(" +
    "(?:\\S+(?::\\S*)?@)?" +
    "(?:" +
    // IP address dotted notation octets
    "([1-9]|[1-9]\\d|1\\d\\d|2[0-1]\\d|22[0-3])" +
    "(\\.([0-9]|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])){2}" +
    "(\\.([1-9]|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-4]))" +
    "(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" +
    "(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" +
    "(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" +
    "|" +
    // host name
    "(([a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)" +
    "(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)" +
    // domain name
    "(\\.([a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*" +
    "(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*" +
    // TLD identifier
    "(\\.([a-z\\u00a1-\\uffff]{2,}))" +
    "(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))" +
    ")" +
    // port number
    "(:\\d{2,5})?" +
    "(?::\\d{2,5})?" +
    // resource path
    "(/[^\\s]*)?" +
    "(?:/[^\\s]*)?" +
    "$", "i"
    );
  4. @dperini dperini revised this gist Dec 6, 2010. 1 changed file with 3 additions and 2 deletions.
    5 changes: 3 additions & 2 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -19,8 +19,9 @@
    //
    // Changes:
    //
    // - IP address dotted notation validation 1.0.0.0 - 223.255.255.255
    // first and last IP of each class is invalid (broadcast addresses)
    // - IP address dotted notation validation, range: 1.0.0.0 - 223.255.255.255
    // first and last IP address of each class is considered invalid
    // (since they are broadcast/network addresses)
    //
    // Compressed one-line versions:
    //
  5. @dperini dperini revised this gist Dec 6, 2010. 1 changed file with 7 additions and 2 deletions.
    9 changes: 7 additions & 2 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -17,7 +17,12 @@
    // - TLDs have been made mandatory so single names like "localhost" fails
    // - protocols have been restricted to ftp, http and https only as requested
    //
    // Compressed one-line version (319 characters):
    // Changes:
    //
    // - IP address dotted notation validation 1.0.0.0 - 223.255.255.255
    // first and last IP of each class is invalid (broadcast addresses)
    //
    // Compressed one-line versions:
    //
    // Javascript version
    //
    @@ -34,7 +39,7 @@ var re_weburl = new RegExp(
    // user:pass authentication
    "(\\S+(:\\S*)?@)?" +
    "(" +
    // IP address dotted octets notation
    // IP address dotted notation octets
    "([1-9]|[1-9]\\d|1\\d\\d|2[0-1]\\d|22[0-3])" +
    "(\\.([0-9]|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])){2}" +
    "(\\.([1-9]|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-4]))" +
  6. @dperini dperini revised this gist Dec 6, 2010. 1 changed file with 25 additions and 16 deletions.
    41 changes: 25 additions & 16 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -17,29 +17,38 @@
    // - TLDs have been made mandatory so single names like "localhost" fails
    // - protocols have been restricted to ftp, http and https only as requested
    //
    // Compressed one-line version (230 characters):
    // Compressed one-line version (319 characters):
    //
    // var re_weburl = /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)(?:\.(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)*(?:\.[a-z\u00a1-\uffff]{2,6}))(?::\d+)?(?:\/[^\s]*)?$/i;
    // Javascript version
    //
    // /^(https?|ftp):\/\/(\S+(:\S*)?@)?(([1-9]|[1-9]\d|1\d\d|2[0-1]\d|22[0-3])(\.([0-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])){2}(\.([1-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-4]))|(([a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)(\.([a-z\u00a1-\uffff0-9]+-?)*[a-z\u00a1-\uffff0-9]+)*(\.([a-z\u00a1-\uffff]{2,})))(:\d{2,5})?(\/[^\s]*)?$/i
    //
    // PHP version
    //
    // _^(https?|ftp)://(\S+(:\S*)?@)?(([1-9]|[1-9]\d|1\d\d|2[0-1]\d|22[0-3])(\.([0-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])){2}(\.([1-9]|[1-9]\d|1\d\d|2[0-4]\d|25[0-4]))|(([a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(\.([a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(\.([a-z\x{00a1}-\x{ffff}]{2,})))(:\d{2,5})?(/[^\s]*)?$_iuS
    //
    var re_weburl = new RegExp(
    "^" +
    // protocol identifier
    "(?:(?:https?|ftp)://)" +
    "(?:" +
    // user:pass@ authentication
    "\\S+(?::\\S*)?@|" +
    // IP dotted octets notation
    "\\d{1,3}(?:\\.\\d{1,3}){3}|" +
    // host name part
    "(?:(?:[a-z\\d\\u00a1-\\uffff]+-?)*[a-z\\d\\u00a1-\\uffff]+)" +
    // domain name part
    "(?:\\.(?:[a-z\\d\\u00a1-\\uffff]+-?)*[a-z\\d\\u00a1-\\uffff]+)*" +
    // TLD identifier part
    "(?:\\.[a-z\\u00a1-\\uffff]{2,6})" +
    "(https?|ftp)://" +
    // user:pass authentication
    "(\\S+(:\\S*)?@)?" +
    "(" +
    // IP address dotted octets notation
    "([1-9]|[1-9]\\d|1\\d\\d|2[0-1]\\d|22[0-3])" +
    "(\\.([0-9]|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-5])){2}" +
    "(\\.([1-9]|[1-9]\\d|1\\d\\d|2[0-4]\\d|25[0-4]))" +
    "|" +
    // host name
    "(([a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)" +
    // domain name
    "(\\.([a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*" +
    // TLD identifier
    "(\\.([a-z\\u00a1-\\uffff]{2,}))" +
    ")" +
    // port number
    "(?::\\d+)?" +
    "(:\\d{2,5})?" +
    // resource path
    "(?:/[^\\s]*)?" +
    "(/[^\\s]*)?" +
    "$", "i"
    );
  7. @dperini dperini revised this gist Dec 5, 2010. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -17,9 +17,9 @@
    // - TLDs have been made mandatory so single names like "localhost" fails
    // - protocols have been restricted to ftp, http and https only as requested
    //
    // Compressed one-line version (228 characters):
    // Compressed one-line version (230 characters):
    //
    // var re_weburl = /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)(?:\.(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)*(?:\.[a-z\u00a1-\uffff]{2,6}))(?::\d+)?(?:[^\s]*)?$/i;
    // var re_weburl = /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)(?:\.(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)*(?:\.[a-z\u00a1-\uffff]{2,6}))(?::\d+)?(?:\/[^\s]*)?$/i;
    //
    var re_weburl = new RegExp(
    "^" +
  8. @dperini dperini revised this gist Dec 5, 2010. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -40,6 +40,6 @@ var re_weburl = new RegExp(
    // port number
    "(?::\\d+)?" +
    // resource path
    "(?:[^\\s]*)?" +
    "(?:/[^\\s]*)?" +
    "$", "i"
    );
  9. @dperini dperini created this gist Dec 5, 2010.
    45 changes: 45 additions & 0 deletions regex-weburl.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,45 @@
    //
    // Regular Expression for URL validation
    //
    // Author: Diego Perini
    // Updated: 2010/12/05
    //
    // the regular expression composed & commented
    // could be easily tweaked for RFC compliance,
    // it was expressly modified to fit & satisfy
    // these test for an URL shortener:
    //
    // http://mathiasbynens.be/demo/url-regex
    //
    // Notes on possible differences from a standard/generic validation:
    //
    // - utf-8 char class take in consideration the full Unicode range
    // - TLDs have been made mandatory so single names like "localhost" fails
    // - protocols have been restricted to ftp, http and https only as requested
    //
    // Compressed one-line version (228 characters):
    //
    // var re_weburl = /^(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)(?:\.(?:[a-z\d\u00a1-\uffff]+-?)*[a-z\d\u00a1-\uffff]+)*(?:\.[a-z\u00a1-\uffff]{2,6}))(?::\d+)?(?:[^\s]*)?$/i;
    //
    var re_weburl = new RegExp(
    "^" +
    // protocol identifier
    "(?:(?:https?|ftp)://)" +
    "(?:" +
    // user:pass@ authentication
    "\\S+(?::\\S*)?@|" +
    // IP dotted octets notation
    "\\d{1,3}(?:\\.\\d{1,3}){3}|" +
    // host name part
    "(?:(?:[a-z\\d\\u00a1-\\uffff]+-?)*[a-z\\d\\u00a1-\\uffff]+)" +
    // domain name part
    "(?:\\.(?:[a-z\\d\\u00a1-\\uffff]+-?)*[a-z\\d\\u00a1-\\uffff]+)*" +
    // TLD identifier part
    "(?:\\.[a-z\\u00a1-\\uffff]{2,6})" +
    ")" +
    // port number
    "(?::\\d+)?" +
    // resource path
    "(?:[^\\s]*)?" +
    "$", "i"
    );