java - للصف - مشاريع الفيجوال بيسك جاهزة
كيفية طباعة جميلة XML من جافا؟ (20)

إذا كان استخدام مكتبة XML تابعة لجهة خارجية أمرًا مقبولًا ، فيمكنك الابتعاد عن شيء أبسط بكثير مما تقترحه answers highest-voted حاليًا.

وقد ذكر أن كل من المدخلات والمخرجات يجب أن يكون السلاسل ، لذلك هنا هو وسيلة فائدة تفعل ذلك فقط ، نفذت مع مكتبة XOM :

import nu.xom.*;
import java.io.*;

[...]

public static String format(String xml) throws ParsingException, IOException {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  Serializer serializer = new Serializer(out);
  serializer.setIndent(4); // or whatever you like
  serializer.write(new Builder().build(xml, ""));
  return out.toString("UTF-8");
}

اختبرت أنه يعمل ، والنتائج لا تعتمد على إصدار JRE الخاص بك أو أي شيء من هذا القبيل. لمعرفة كيفية تخصيص تنسيق الإخراج حسب رغبتك ، ألق نظرة على واجهة برمجة تطبيقات Serializer .

جاء هذا في الواقع لفترة أطول مما كنت أعتقد - هناك حاجة إلى بعض خطوط إضافية لأن Serializer يريد OutputStream للكتابة. ولكن لاحظ أن هناك رمزًا صغيرًا لتدوين XML الفعلي هنا.

(تمثل هذه الإجابة جزءًا من تقييمي لـ XOM ، والذي تم suggested كخيار واحد في سؤالي حول أفضل مكتبة XML XML لاستبدال dom4j. بالنسبة للسجل ، مع DOM4j ، يمكنك تحقيق ذلك بسهولة مماثلة باستخدام XMLWriter و OutputFormat . ... كما هو موضح في إجابة mlo55 .)

لدي سلسلة Java تحتوي على XML ، بدون موجزات خط أو مسافات بادئة. أود تحويلها إلى سلسلة مع تنسيق XML جيد. كيف أقوم بهذا العمل؟

String unformattedXml = "<tag><nested>hello</nested></tag>";
String formattedXml = new [UnknownClass]().format(unformattedXml);

ملاحظة: مدخلاتي هي سلسلة . مخرجي عبارة عن سلسلة .


إذا كنت متأكدًا من وجود XML صالح ، فهذا بسيط ، وتجنب أشجار XML DOM. ربما لديك بعض الأخطاء ، لا تعليق إذا كنت ترى أي شيء

public String prettyPrint(String xml) {
      if (xml == null || xml.trim().length() == 0) return "";

      int stack = 0;
      StringBuilder pretty = new StringBuilder();
      String[] rows = xml.trim().replaceAll(">", ">\n").replaceAll("<", "\n<").split("\n");

      for (int i = 0; i < rows.length; i++) {
          if (rows[i] == null || rows[i].trim().length() == 0) continue;

          String row = rows[i].trim();
          if (row.startsWith("<?")) {
              // xml version tag
              pretty.append(row + "\n");
          } else if (row.startsWith("</")) {
              // closing tag
              String indent = repeatString("  ", --stack);
              pretty.append(indent + row + "\n");
          } else if (row.startsWith("<")) {
              // starting tag
              String indent = repeatString("  ", stack++);
              pretty.append(indent + row + "\n");
          } else {
              // tag data
              String indent = repeatString("  ", stack);
              pretty.append(indent + row + "\n");
          }
      }

      return pretty.toString().trim();
  }

الآن عام 2012 وجافا يمكن أن تفعل أكثر مما كانت عليه مع XML ، أود أن أضيف بديلاً إلى جوابي المقبول. هذا ليس له تبعيات خارج Java 6.

import org.w3c.dom.Node;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.InputSource;

import javax.xml.parsers.DocumentBuilderFactory;
import java.io.StringReader;

/**
 * Pretty-prints xml, supplied as a string.
 * <p/>
 * eg.
 * <code>
 * String formattedXml = new XmlFormatter().format("<tag><nested>hello</nested></tag>");
 * </code>
 */
public class XmlFormatter {

  public String format(String xml) {

    try {
      final InputSource src = new InputSource(new StringReader(xml));
      final Node document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(src).getDocumentElement();
      final Boolean keepDeclaration = Boolean.valueOf(xml.startsWith("<?xml"));

    //May need this: System.setProperty(DOMImplementationRegistry.PROPERTY,"com.sun.org.apache.xerces.internal.dom.DOMImplementationSourceImpl");


      final DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
      final DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
      final LSSerializer writer = impl.createLSSerializer();

      writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); // Set this to true if the output needs to be beautified.
      writer.getDomConfig().setParameter("xml-declaration", keepDeclaration); // Set this to true if the declaration is needed to be outputted.

      return writer.writeToString(document);
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  public static void main(String[] args) {
    String unformattedXml =
        "<?xml version=\"1.0\" encoding=\"UTF-8\"?><QueryMessage\n" +
            "    xmlns=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message\"\n" +
            "    xmlns:query=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/query\">\n" +
            "  <Query>\n" +
            "    <query:CategorySchemeWhere>\n" +
            "  \t\t\t\t\t     <query:AgencyID>ECB\n\n\n\n</query:AgencyID>\n" +
            "    </query:CategorySchemeWhere>\n" +
            "  </Query>\n\n\n\n\n" +
            "</QueryMessage>";

    System.out.println(new XmlFormatter().format(unformattedXml));
  }
}

باستخدام scala:

import xml._
val xml = XML.loadString("<tag><nested>hello</nested></tag>")
val formatted = new PrettyPrinter(150, 2).format(xml)
println(formatted)

يمكنك القيام بذلك في Java أيضًا ، إذا كنت تعتمد على scala-library.jar. تبدو هكذا:

import scala.xml.*;

public class FormatXML {
  public static void main(String[] args) {
    String unformattedXml = "<tag><nested>hello</nested></tag>";
    PrettyPrinter pp = new PrettyPrinter(150, 3);
    String formatted = pp.format(XML.loadString(unformattedXml), TopScope$.MODULE$);
    System.out.println(formatted);
  }
}

يتم إنشاء الكائن PrettyPrinter مع اثنين من النتوءات ، أولها طول سطر أقصى والثانية هي خطوة المسافة البادئة.


فقط للإشارة إلى أن الإجابة الأعلى تقييمًا تتطلب استخدام xerces.

إذا كنت لا ترغب في إضافة هذه التبعية الخارجية ، يمكنك ببساطة استخدام مكتبات jdk القياسية (التي يتم إنشاؤها فعليًا باستخدام xerces داخليًا).

ملاحظة: كان هناك خطأ في الإصدار 1.5 من jdk راجع http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6296446 ولكن تم حلها الآن.

(لاحظ حدوث خطأ ، سيؤدي هذا إلى إرجاع النص الأصلي)

package com.test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.stream.StreamResult;

import org.xml.sax.InputSource;

public class XmlTest {
  public static void main(String[] args) {
    XmlTest t = new XmlTest();
    System.out.println(t.formatXml("<a><b><c/><d>text D</d><e value='0'/></b></a>"));
  }

  public String formatXml(String xml){
    try{
      Transformer serializer= SAXTransformerFactory.newInstance().newTransformer();
      serializer.setOutputProperty(OutputKeys.INDENT, "yes");
      //serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
      serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
      //serializer.setOutputProperty("{http://xml.customer.org/xslt}indent-amount", "2");
      Source xmlSource=new SAXSource(new InputSource(new ByteArrayInputStream(xml.getBytes())));
      StreamResult res = new StreamResult(new ByteArrayOutputStream());      
      serializer.transform(xmlSource, res);
      return new String(((ByteArrayOutputStream)res.getOutputStream()).toByteArray());
    }catch(Exception e){
      //TODO log error
      return xml;
    }
  }

}

فقط للرجوع إليها في المستقبل ، إليك الحل الذي عمل من أجلي (بفضل تعليق نشرهGeorge Hawkins في أحد الإجابات):

DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
LSSerializer writer = impl.createLSSerializer();
writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE);
LSOutput output = impl.createLSOutput();
ByteArrayOutputStream out = new ByteArrayOutputStream();
output.setByteStream(out);
writer.write(document, output);
String xmlStr = new String(out.toByteArray());

قال Kevin Hakanson: "ومع ذلك ، إذا كنت تعرف أن سلسلة XML الخاصة بك صالحة ، ولا ترغب في تحمل الحمل الزائد في الذاكرة لتحليل سلسلة إلى DOM ، ثم تشغيل تحويل عبر DOM للحصول على سلسلة أحرف - يمكنك فقط قم بإضفاء طابع قديم من خلال تحليل الأحرف. أدخل سطر جديد ومسافات بعد كل حرف ، وحافظ على العداد والمسافة البادئة (لتحديد عدد المسافات) التي تقوم بزيارتها لكل <>> وإنقاص لكل ما تراه.

متفق عليه. مثل هذا النهج هو أسرع بكثير وله تبعية أقل بكثير.

مثال على الحل:

/**
 * XML utils, including formatting.
 */
public class XmlUtils
{
 private static XmlFormatter formatter = new XmlFormatter(2, 80);

 public static String formatXml(String s)
 {
  return formatter.format(s, 0);
 }

 public static String formatXml(String s, int initialIndent)
 {
  return formatter.format(s, initialIndent);
 }

 private static class XmlFormatter
 {
  private int indentNumChars;
  private int lineLength;
  private boolean singleLine;

  public XmlFormatter(int indentNumChars, int lineLength)
  {
   this.indentNumChars = indentNumChars;
   this.lineLength = lineLength;
  }

  public synchronized String format(String s, int initialIndent)
  {
   int indent = initialIndent;
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < s.length(); i++)
   {
    char currentChar = s.charAt(i);
    if (currentChar == '<')
    {
     char nextChar = s.charAt(i + 1);
     if (nextChar == '/')
      indent -= indentNumChars;
     if (!singleLine)  // Don't indent before closing element if we're creating opening and closing elements on a single line.
      sb.append(buildWhitespace(indent));
     if (nextChar != '?' && nextChar != '!' && nextChar != '/')
      indent += indentNumChars;
     singleLine = false; // Reset flag.
    }
    sb.append(currentChar);
    if (currentChar == '>')
    {
     if (s.charAt(i - 1) == '/')
     {
      indent -= indentNumChars;
      sb.append("\n");
     }
     else
     {
      int nextStartElementPos = s.indexOf('<', i);
      if (nextStartElementPos > i + 1)
      {
       String textBetweenElements = s.substring(i + 1, nextStartElementPos);

       // If the space between elements is solely newlines, let them through to preserve additional newlines in source document.
       if (textBetweenElements.replaceAll("\n", "").length() == 0)
       {
        sb.append(textBetweenElements + "\n");
       }
       // Put tags and text on a single line if the text is short.
       else if (textBetweenElements.length() <= lineLength * 0.5)
       {
        sb.append(textBetweenElements);
        singleLine = true;
       }
       // For larger amounts of text, wrap lines to a maximum line length.
       else
       {
        sb.append("\n" + lineWrap(textBetweenElements, lineLength, indent, null) + "\n");
       }
       i = nextStartElementPos - 1;
      }
      else
      {
       sb.append("\n");
      }
     }
    }
   }
   return sb.toString();
  }
 }

 private static String buildWhitespace(int numChars)
 {
  StringBuilder sb = new StringBuilder();
  for (int i = 0; i < numChars; i++)
   sb.append(" ");
  return sb.toString();
 }

 /**
  * Wraps the supplied text to the specified line length.
  * @lineLength the maximum length of each line in the returned string (not including indent if specified).
  * @indent optional number of whitespace characters to prepend to each line before the text.
  * @linePrefix optional string to append to the indent (before the text).
  * @returns the supplied text wrapped so that no line exceeds the specified line length + indent, optionally with
  * indent and prefix applied to each line.
  */
 private static String lineWrap(String s, int lineLength, Integer indent, String linePrefix)
 {
  if (s == null)
   return null;

  StringBuilder sb = new StringBuilder();
  int lineStartPos = 0;
  int lineEndPos;
  boolean firstLine = true;
  while(lineStartPos < s.length())
  {
   if (!firstLine)
    sb.append("\n");
   else
    firstLine = false;

   if (lineStartPos + lineLength > s.length())
    lineEndPos = s.length() - 1;
   else
   {
    lineEndPos = lineStartPos + lineLength - 1;
    while (lineEndPos > lineStartPos && (s.charAt(lineEndPos) != ' ' && s.charAt(lineEndPos) != '\t'))
     lineEndPos--;
   }
   sb.append(buildWhitespace(indent));
   if (linePrefix != null)
    sb.append(linePrefix);

   sb.append(s.substring(lineStartPos, lineEndPos + 1));
   lineStartPos = lineEndPos + 1;
  }
  return sb.toString();
 }

 // other utils removed for brevity
}

لقد قمت بطباعة جميلة في الماضي باستخدام طريقة org.dom4j.io.OutputFormat.createPrettyPrint ()

public String prettyPrint(final String xml){ 

  if (StringUtils.isBlank(xml)) {
    throw new RuntimeException("xml was null or blank in prettyPrint()");
  }

  final StringWriter sw;

  try {
    final OutputFormat format = OutputFormat.createPrettyPrint();
    final org.dom4j.Document document = DocumentHelper.parseText(xml);
    sw = new StringWriter();
    final XMLWriter writer = new XMLWriter(sw, format);
    writer.write(document);
  }
  catch (Exception e) {
    throw new RuntimeException("Error pretty printing xml:\n" + xml, e);
  }
  return sw.toString();
}

مجرد حل آخر يعمل لدينا

import java.io.StringWriter;
import org.dom4j.DocumentHelper;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

**
 * Pretty Print XML String
 * 
 * @param inputXmlString
 * @return
 */
public static String prettyPrintXml(String xml) {

  final StringWriter sw;

  try {
    final OutputFormat format = OutputFormat.createPrettyPrint();
    final org.dom4j.Document document = DocumentHelper.parseText(xml);
    sw = new StringWriter();
    final XMLWriter writer = new XMLWriter(sw, format);
    writer.write(document);
  }
  catch (Exception e) {
    throw new RuntimeException("Error pretty printing xml:\n" + xml, e);
  }
  return sw.toString();
}

نسخة محسنة قليلا من milosmns ...

public static String getPrettyXml(String xml) {
  if (xml == null || xml.trim().length() == 0) return "";

  int stack = 0;
  StringBuilder pretty = new StringBuilder();
  String[] rows = xml.trim().replaceAll(">", ">\n").replaceAll("<", "\n<").split("\n");

  for (int i = 0; i < rows.length; i++) {
    if (rows[i] == null || rows[i].trim().length() == 0) continue;

    String row = rows[i].trim();
    if (row.startsWith("<?")) {
      pretty.append(row + "\n");
    } else if (row.startsWith("</")) {
      String indent = repeatString(--stack);
      pretty.append(indent + row + "\n");
    } else if (row.startsWith("<") && row.endsWith("/>") == false) {
      String indent = repeatString(stack++);
      pretty.append(indent + row + "\n");
      if (row.endsWith("]]>")) stack--;
    } else {
      String indent = repeatString(stack);
      pretty.append(indent + row + "\n");
    }
  }

  return pretty.toString().trim();
}

private static String repeatString(int stack) {
   StringBuilder indent = new StringBuilder();
   for (int i = 0; i < stack; i++) {
    indent.append(" ");
   }
   return indent.toString();
} 

هممم ... واجهت شيئا من هذا القبيل ، ومن المعروف علة ... مجرد إضافة هذا OutputProperty ..

transformer.setOutputProperty(OutputPropertiesFactory.S_KEY_INDENT_AMOUNT, "8");

أتمنى أن يساعدك هذا ...


وهنا إجابة لسؤالي الخاص. جمعت الإجابات من النتائج المختلفة لكتابة فصل يطبع XML جيدًا.

لا توجد ضمانات حول كيفية استجابتها باستخدام XML غير صحيح أو مستندات كبيرة.

package ecb.sdw.pretty;

import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;

/**
 * Pretty-prints xml, supplied as a string.
 * <p/>
 * eg.
 * <code>
 * String formattedXml = new XmlFormatter().format("<tag><nested>hello</nested></tag>");
 * </code>
 */
public class XmlFormatter {

  public XmlFormatter() {
  }

  public String format(String unformattedXml) {
    try {
      final Document document = parseXmlFile(unformattedXml);

      OutputFormat format = new OutputFormat(document);
      format.setLineWidth(65);
      format.setIndenting(true);
      format.setIndent(2);
      Writer out = new StringWriter();
      XMLSerializer serializer = new XMLSerializer(out, format);
      serializer.serialize(document);

      return out.toString();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  private Document parseXmlFile(String in) {
    try {
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      DocumentBuilder db = dbf.newDocumentBuilder();
      InputSource is = new InputSource(new StringReader(in));
      return db.parse(is);
    } catch (ParserConfigurationException e) {
      throw new RuntimeException(e);
    } catch (SAXException e) {
      throw new RuntimeException(e);
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  public static void main(String[] args) {
    String unformattedXml =
        "<?xml version=\"1.0\" encoding=\"UTF-8\"?><QueryMessage\n" +
            "    xmlns=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message\"\n" +
            "    xmlns:query=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/query\">\n" +
            "  <Query>\n" +
            "    <query:CategorySchemeWhere>\n" +
            "  \t\t\t\t\t     <query:AgencyID>ECB\n\n\n\n</query:AgencyID>\n" +
            "    </query:CategorySchemeWhere>\n" +
            "  </Query>\n\n\n\n\n" +
            "</QueryMessage>";

    System.out.println(new XmlFormatter().format(unformattedXml));
  }

}

As an alternative to the answers from max , , David Easley and , have a look at my lightweight, high-performance pretty-printer library: xml-formatter

// construct lightweight, threadsafe, instance
PrettyPrinter prettyPrinter = PrettyPrinterBuilder.newPrettyPrinter().build();

StringBuilder buffer = new StringBuilder();
String xml = ..; // also works with char[] or Reader

if(prettyPrinter.process(xml, buffer)) {
   // valid XML, print buffer
} else {
   // invalid XML, print xml
}

Sometimes, like when running mocked SOAP services directly from file, it is good to have a pretty-printer which also handles already pretty-printed XML:

PrettyPrinter prettyPrinter = PrettyPrinterBuilder.newPrettyPrinter().ignoreWhitespace().build();

As some have commented, pretty-printing is just a way of presenting XML in a more human-readable form - whitespace strictly does not belong in your XML data.

The library is intended for pretty-printing for logging purposes, and also includes functions for filtering (subtree removal / anonymization) and pretty-printing of XML in CDATA and Text nodes.


For those searching for a quick and dirty solution - which doesn't need the XML to be 100% valid. eg in case of REST / SOAP logging (you never know what the others send ;-))

I found and advanced a code snipped I found online which I think is still missing here as a valid possible approach:

public static String prettyPrintXMLAsString(String xmlString) {
  /* Remove new lines */
  final String LINE_BREAK = "\n";
  xmlString = xmlString.replaceAll(LINE_BREAK, "");
  StringBuffer prettyPrintXml = new StringBuffer();
  /* Group the xml tags */
  Pattern pattern = Pattern.compile("(<[^/][^>]+>)?([^<]*)(</[^>]+>)?(<[^/][^>]+/>)?");
  Matcher matcher = pattern.matcher(xmlString);
  int tabCount = 0;
  while (matcher.find()) {
    String str1 = (null == matcher.group(1) || "null".equals(matcher.group())) ? "" : matcher.group(1);
    String str2 = (null == matcher.group(2) || "null".equals(matcher.group())) ? "" : matcher.group(2);
    String str3 = (null == matcher.group(3) || "null".equals(matcher.group())) ? "" : matcher.group(3);
    String str4 = (null == matcher.group(4) || "null".equals(matcher.group())) ? "" : matcher.group(4);

    if (matcher.group() != null && !matcher.group().trim().equals("")) {
      printTabs(tabCount, prettyPrintXml);
      if (!str1.equals("") && str3.equals("")) {
        ++tabCount;
      }
      if (str1.equals("") && !str3.equals("")) {
        --tabCount;
        prettyPrintXml.deleteCharAt(prettyPrintXml.length() - 1);
      }

      prettyPrintXml.append(str1);
      prettyPrintXml.append(str2);
      prettyPrintXml.append(str3);
      if (!str4.equals("")) {
        prettyPrintXml.append(LINE_BREAK);
        printTabs(tabCount, prettyPrintXml);
        prettyPrintXml.append(str4);
      }
      prettyPrintXml.append(LINE_BREAK);
    }
  }
  return prettyPrintXml.toString();
}

private static void printTabs(int count, StringBuffer stringBuffer) {
  for (int i = 0; i < count; i++) {
    stringBuffer.append("\t");
  }
}

public static void main(String[] args) {
  String x = new String(
      "<soap:Envelope xmlns:soap=\"http://schemas.xmlsoap.org/soap/envelope/\"><soap:Body><soap:Fault><faultcode>soap:Client</faultcode><faultstring>INVALID_MESSAGE</faultstring><detail><ns3:XcbSoapFault xmlns=\"\" xmlns:ns3=\"http://www.someapp.eu/xcb/types/xcb/v1\"><CauseCode>20007</CauseCode><CauseText>INVALID_MESSAGE</CauseText><DebugInfo>Problems creating SAAJ object model</DebugInfo></ns3:XcbSoapFault></detail></soap:Fault></soap:Body></soap:Envelope>");
  System.out.println(prettyPrintXMLAsString(x));
}

here is the output:

<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
 <soap:Body>
  <soap:Fault>
    <faultcode>soap:Client</faultcode>
    <faultstring>INVALID_MESSAGE</faultstring>
    <detail>
      <ns3:XcbSoapFault xmlns="" xmlns:ns3="http://www.someapp.eu/xcb/types/xcb/v1">
        <CauseCode>20007</CauseCode>
        <CauseText>INVALID_MESSAGE</CauseText>
        <DebugInfo>Problems creating SAAJ object model</DebugInfo>
      </ns3:XcbSoapFault>
    </detail>
  </soap:Fault>
 </soap:Body>
</soap:Envelope>

I have found that in Java 1.6.0_32 the normal method to pretty print an XML string (using a Transformer with a null or identity xslt) does not behave as I would like if tags are merely separated by whitespace, as opposed to having no separating text. I tried using <xsl:strip-space elements="*"/> in my template to no avail. The simplest solution I found was to strip the space the way I wanted using a SAXSource and XML filter. Since my solution was for logging I also extended this to work with incomplete XML fragments. Note the normal method seems to work fine if you use a DOMSource but I did not want to use this because of the incompleteness and memory overhead.

public static class WhitespaceIgnoreFilter extends XMLFilterImpl
{

  @Override
  public void ignorableWhitespace(char[] arg0,
                  int arg1,
                  int arg2) throws SAXException
  {
    //Ignore it then...
  }

  @Override
  public void characters( char[] ch,
              int start,
              int length) throws SAXException
  {
    if (!new String(ch, start, length).trim().equals("")) 
        super.characters(ch, start, length); 
  }
}

public static String prettyXML(String logMsg, boolean allowBadlyFormedFragments) throws SAXException, IOException, TransformerException
  {
    TransformerFactory transFactory = TransformerFactory.newInstance();
    transFactory.setAttribute("indent-number", new Integer(2));
    Transformer transformer = transFactory.newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
    StringWriter out = new StringWriter();
    XMLReader masterParser = SAXHelper.getSAXParser(true);
    XMLFilter parser = new WhitespaceIgnoreFilter();
    parser.setParent(masterParser);

    if(allowBadlyFormedFragments)
    {
      transformer.setErrorListener(new ErrorListener()
      {
        @Override
        public void warning(TransformerException exception) throws TransformerException
        {
        }

        @Override
        public void fatalError(TransformerException exception) throws TransformerException
        {
        }

        @Override
        public void error(TransformerException exception) throws TransformerException
        {
        }
      });
    }

    try
    {
      transformer.transform(new SAXSource(parser, new InputSource(new StringReader(logMsg))), new StreamResult(out));
    }
    catch (TransformerException e)
    {
      if(e.getCause() != null && e.getCause() instanceof SAXParseException)
      {
        if(!allowBadlyFormedFragments || !"XML document structures must start and end within the same entity.".equals(e.getCause().getMessage()))
        {
          throw e;
        }
      }
      else
      {
        throw e;
      }
    }
    out.flush();
    return out.toString();
  }

I saw one answer using Scala , so here is another one in Groovy , just in case someone finds it interesting. The default indentation is 2 steps, XmlNodePrinter constructor can be passed another value as well.

def xml = "<tag><nested>hello</nested></tag>"
def stringWriter = new StringWriter()
def node = new XmlParser().parseText(xml);
new XmlNodePrinter(new PrintWriter(stringWriter)).print(node)
println stringWriter.toString()

Usage from Java if groovy jar is in classpath

 String xml = "<tag><nested>hello</nested></tag>";
 StringWriter stringWriter = new StringWriter();
 Node node = new XmlParser().parseText(xml);
 new XmlNodePrinter(new PrintWriter(stringWriter)).print(node);
 System.out.println(stringWriter.toString());

The solutions I have found here for Java 1.6+ do not reformat the code if it is already formatted. The one that worked for me (and re-formatted already formatted code) was the following.

import org.apache.xml.security.c14n.CanonicalizationException;
import org.apache.xml.security.c14n.Canonicalizer;
import org.apache.xml.security.c14n.InvalidCanonicalizerException;
import org.w3c.dom.Element;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import java.io.IOException;
import java.io.StringReader;

public class XmlUtils {
  public static String toCanonicalXml(String xml) throws InvalidCanonicalizerException, ParserConfigurationException, SAXException, CanonicalizationException, IOException {
    Canonicalizer canon = Canonicalizer.getInstance(Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS);
    byte canonXmlBytes[] = canon.canonicalize(xml.getBytes());
    return new String(canonXmlBytes);
  }

  public static String prettyFormat(String input) throws TransformerException, ParserConfigurationException, IOException, SAXException, InstantiationException, IllegalAccessException, ClassNotFoundException {
    InputSource src = new InputSource(new StringReader(input));
    Element document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(src).getDocumentElement();
    Boolean keepDeclaration = input.startsWith("<?xml");
    DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
    DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS");
    LSSerializer writer = impl.createLSSerializer();
    writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE);
    writer.getDomConfig().setParameter("xml-declaration", keepDeclaration);
    return writer.writeToString(document);
  }
}

It is a good tool to use in your unit tests for full-string xml comparison.

private void assertXMLEqual(String expected, String actual) throws ParserConfigurationException, IOException, SAXException, CanonicalizationException, InvalidCanonicalizerException, TransformerException, IllegalAccessException, ClassNotFoundException, InstantiationException {
  String canonicalExpected = prettyFormat(toCanonicalXml(expected));
  String canonicalActual = prettyFormat(toCanonicalXml(actual));
  assertEquals(canonicalExpected, canonicalActual);
}

This code below working perfectly

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

String formattedXml1 = prettyFormat("<root><child>aaa</child><child/></root>");

public static String prettyFormat(String input) {
  return prettyFormat(input, "2");
}

public static String prettyFormat(String input, String indent) {
  Source xmlInput = new StreamSource(new StringReader(input));
  StringWriter stringWriter = new StringWriter();
  try {
    TransformerFactory transformerFactory = TransformerFactory.newInstance();
    Transformer transformer = transformerFactory.newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", indent);
    transformer.transform(xmlInput, new StreamResult(stringWriter));

    String pretty = stringWriter.toString();
    pretty = pretty.replace("\r\n", "\n");
    return pretty;       
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}


كان عليّ أن أبحث عن هذه الصفحة أولاً قبل أن أبدأ بالحل الخاص بي! على أي حال ، يستخدم لي العودية جافا لتحليل صفحة XML. هذا الكود مكتفي بذاته ولا يعتمد على مكتبات الطرف الثالث. أيضا .. فإنه يستخدم العودية!

// you call this method passing in the xml text
public static void prettyPrint(String text){
  prettyPrint(text, 0);
}

// "index" corresponds to the number of levels of nesting and/or the number of tabs to print before printing the tag
public static void prettyPrint(String xmlText, int index){
  boolean foundTagStart = false;
  StringBuilder tagChars = new StringBuilder();
  String startTag = "";
  String endTag = "";
  String[] chars = xmlText.split("");
  // find the next start tag
  for(String ch : chars){
    if(ch.equalsIgnoreCase("<")){
      tagChars.append(ch);
      foundTagStart = true;
    } else if(ch.equalsIgnoreCase(">") && foundTagStart){
      startTag = tagChars.append(ch).toString();
      String tempTag = startTag;
      endTag = (tempTag.contains("\"") ? (tempTag.split(" ")[0] + ">") : tempTag).replace("<", "</"); // <startTag attr1=1 attr2=2> => </startTag>
      break;
    } else if(foundTagStart){
      tagChars.append(ch);
    }
  }
  // once start and end tag are calculated, print start tag, then content, then end tag
  if(foundTagStart){
    int startIndex = xmlText.indexOf(startTag);
    int endIndex = xmlText.indexOf(endTag);
    // handle if matching tags NOT found
    if((startIndex < 0) || (endIndex < 0)){
      if(startIndex < 0) {
        // no start tag found
        return;
      } else {
        // start tag found, no end tag found (handles single tags aka "<mytag/>" or "<?xml ...>")
        printTabs(index);
        System.out.println(startTag);
        // move on to the next tag
        // NOTE: "index" (not index+1) because next tag is on same level as this one
        prettyPrint(xmlText.substring(startIndex+startTag.length(), xmlText.length()), index);
        return;
      }
    // handle when matching tags found
    } else {
      String content = xmlText.substring(startIndex+startTag.length(), endIndex);
      boolean isTagContainsTags = content.contains("<"); // content contains tags
      printTabs(index);
      if(isTagContainsTags){ // ie: <tag1><tag2>stuff</tag2></tag1>
        System.out.println(startTag);
        prettyPrint(content, index+1); // "index+1" because "content" is nested
        printTabs(index);
      } else {
        System.out.print(startTag); // ie: <tag1>stuff</tag1> or <tag1></tag1>
        System.out.print(content);
      }
      System.out.println(endTag);
      int nextIndex = endIndex + endTag.length();
      if(xmlText.length() > nextIndex){ // if there are more tags on this level, continue
        prettyPrint(xmlText.substring(nextIndex, xmlText.length()), index);
      }
    }
  } else {
    System.out.print(xmlText);
  }
}

private static void printTabs(int counter){
  while(counter-- > 0){ 
    System.out.print("\t");
  }
}
pretty-print