Spaces:
Runtime error
Runtime error
| import re | |
| URL_REGEX = re.compile( | |
| r"(?:^|(?<![\w\/\.]))" | |
| # protocol identifier | |
| # r"(?:(?:https?|ftp)://)" <-- alt? | |
| r"(?:(?:https?:\/\/|ftp:\/\/|www\d{0,3}\.))" | |
| # user:pass authentication | |
| r"(?:\S+(?::\S*)?@)?" r"(?:" | |
| # IP address exclusion | |
| # private & local networks | |
| r"(?!(?:10|127)(?:\.\d{1,3}){3})" | |
| r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" | |
| r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" | |
| # IP address dotted notation octets | |
| # excludes loopback network 0.0.0.0 | |
| # excludes reserved space >= 224.0.0.0 | |
| # excludes network & broadcast addresses | |
| # (first & last IP address of each class) | |
| r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" | |
| r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" | |
| r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" | |
| r"|" | |
| # host name | |
| r"(?:(?:[a-z\\u00a1-\\uffff0-9]-?)*[a-z\\u00a1-\\uffff0-9]+)" | |
| # domain name | |
| r"(?:\.(?:[a-z\\u00a1-\\uffff0-9]-?)*[a-z\\u00a1-\\uffff0-9]+)*" | |
| # TLD identifier | |
| r"(?:\.(?:[a-z\\u00a1-\\uffff]{2,}))" r"|" r"(?:(localhost))" r")" | |
| # port number | |
| r"(?::\d{2,5})?" | |
| # resource path | |
| r"(?:\/[^\)\]\}\s]*)?", | |
| # r"(?:$|(?![\w?!+&\/\)]))", | |
| # @jfilter: I removed the line above from the regex because I don't understand what it is used for, maybe it was useful? | |
| # But I made sure that it does not include ), ] and } in the URL. | |
| flags=re.UNICODE | re.IGNORECASE, | |
| ) | |