/* [The "BSD licence"] Copyright (c) 2003 Terence Parr, jGuru.com All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.foo.lib.html; import org.foo.support.Utils; import java.io.*; public class HTMLUtils { /** Strip all tags and char escapes from text. */ public static String stripHTML(String text) { return stripHTML(text, false); } public static String stripHTML(String text, boolean stripPRE) { StringBuffer buf = new StringBuffer(text.length()); int i = 0; while ( i < text.length() ) { if ( text.charAt(i)=='<' ) { i++; // toss out tag StringBuffer tagName = new StringBuffer(20); while ( i' if ( buf.length()>0 && !Character.isWhitespace(buf.charAt(buf.length()-1)) ) { // make sure we don't glom words together buf.append(' '); // add whitespace if none prior } // System.out.println("tagName finished at "+(i-1)+": "+tagName.toString()); String name = tagName.toString().toUpperCase(); if ( name.startsWith("SCRIPT") ) { // kill til end tag // System.err.println("Found ", i); if ( endScript>=i ) { // System.err.println("found end tag at "+endScript); i = endScript+"".length(); // System.err.println("i reset to "+i); } } else if ( name.startsWith("A") && name.indexOf("HREF=")>0 ) { int endref = Utils.indexOfTextIgnoreCase(text, "", i); if ( endref>=0 ) { // is there an end tag anywhere int nextLink = Utils.indexOfTextIgnoreCase(text, "=0 && nextLink // just scarf the tag and keep going. i = nextLink; } else if ( (endref-i)<=200 ) { // don't look too far ahead; handle missing buf.append(text.substring(i,endref)); i = endref+"".length(); } } } else if ( name.startsWith("FORM") ) { int endScript = Utils.indexOfTextIgnoreCase(text, "", i); if ( endScript!=-1 ) { // scarf til end of form if present. i = endScript+"".length(); } } else if ( name.startsWith("HEAD") ) { int endScript = text.indexOf("", i); int endScript2 = text.indexOf("", i); if ( endScript>=i ) { i = endScript+"".length(); } else if ( endScript2>=i ) { i = endScript2+"".length(); } } else if ( name.startsWith("STYLE") ) { int endScript = text.indexOf("", i); int endScript2 = text.indexOf("", i); if ( endScript>=i ) { i = endScript+"".length(); } else if ( endScript2>=i ) { i = endScript2+"".length(); } } } else if ( (text.charAt(i)=='&'&&(i+5)0 && !Character.isWhitespace(buf.charAt(buf.length()-1)) ) { buf.append(' '); // add whitespace if none prior } } else { buf.append( text.charAt(i) ); i++; } } return buf.toString(); } /** if there is a , otherwise, return the string */ public static String stripCData(String data) { int cdIndex = data.indexOf(""); if(cdEndIndex == -1) { return data.substring(cdIndex+9, data.length()); } else { return data.substring(cdIndex+9, cdEndIndex); } } } /** Textarea HTML entities need to have the & char escaped to * & so they appear correctly. HTML tags seem to stay ok. */ public static String escapeAmpersands(String text) { if ( text==null || text.length()==0 ) { return text; } StringBuffer buf = new StringBuffer(500); for (int i=0; i 0) { String entity = s.substring(i, semiIndex+1); entity = entity.toLowerCase(); boolean b = false; if(!escapeCharEntities) { b = ( (entity.compareTo("<")==0) || (entity.compareTo(">")==0) || (entity.compareTo("&")==0) || (entity.compareTo(""")==0) || (entity.compareTo("'")==0) || (entity.startsWith("&#")) ); } if (b) { i += semiIndex - i; result.append( entity ); } else { // result.append("&"); result.append('&'); } } else { //result.append("&"); result.append('&'); } } else if (ch=='<') { result.append("<"); } else if (ch=='>') { result.append(">"); } /* else if (ch=='\"') { result.append("""); } */ else { result.append(ch); } } return result.toString(); } /** return a string suitable for inclusion as an html attribute * between quotes. */ public static String escapeForHTMLTagAttributeUse(String s) { return Utils.replace(s, "\"", """); } }