URL detect regex 判断是否为URL的正则表达式


import junit.framework.Assert;

import org.junit.Test;

public class URLDetectorTest {
	private String reg = "(((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
			+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
			+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
			+ "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+"   // named host
			+ "(?:"   // plus top level domain
			+ "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
			+ "|(?:biz|b[abdefghijmnorstvwyz])"
			+ "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
			+ "|d[ejkmoz]"
			+ "|(?:edu|e[cegrstu])"
			+ "|f[ijkmor]"
			+ "|(?:gov|g[abdefghilmnpqrstuwy])"
			+ "|h[kmnrtu]"
			+ "|(?:info|int|i[delmnoqrst])"
			+ "|(?:jobs|j[emop])"
			+ "|k[eghimnrwyz]"
			+ "|l[abcikrstuvy]"
			+ "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
			+ "|(?:name|net|n[acefgilopruz])"
			+ "|(?:org|om)"
			+ "|(?:pro|p[aefghklmnrstwy])"
			+ "|qa"
			+ "|r[eouw]"
			+ "|s[abcdeghijklmnortuvyz]"
			+ "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
			+ "|u[agkmsyz]"
			+ "|v[aceginu]"
			+ "|w[fs]"
			+ "|y[etu]"
			+ "|z[amw]))"
			+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
			+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
			+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
			+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
			+ "|[1-9][0-9]|[0-9])))"
			+ "(?:\\:\\d{1,5})?)" // plus option port number
			+ "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\@\\&\\=\\#\\~"  // plus option query params
			+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
			+ "(?:\\b|$))";
	@Test
	public void test1(){
		String query = "";
		query = query.replaceAll(reg, "\n$1\n");
		Assert.assertEquals("", query);
	}
	@Test
	public void test2(){
		String query = "<link type=\"text/css\" rel=\"stylesheet\" href=\"http://baidu.com/style/201404141151/css/common.css\">";
		query = query.replaceAll(reg, "\n$1\n");
		Assert.assertEquals("<link type=\"text/css\" rel=\"stylesheet\" href=\"\nhttp://baidu.com/style/201404141151/css/common.css\n\">", query);
	}
	@Test
	public void test3(){
		String query = "<link type=\"text/css\" rel=\"stylesheet\" href=\"baidu.com/style/201404141151/css/common.css\">";
		query = query.replaceAll(reg, "\n$1\n");
		Assert.assertEquals("<link type=\"text/css\" rel=\"stylesheet\" href=\"\nbaidu.com/style/201404141151/css/common.css\n\">", query);
	}
}


http://www.regexguru.com/2008/11/detecting-urls-in-a-block-of-text/

发表评论

电子邮件地址不会被公开。 必填项已用 * 标注

您可以使用这些 HTML 标签和属性: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>