Transform.cs 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #region Apache License
  2. //
  3. // Licensed to the Apache Software Foundation (ASF) under one or more
  4. // contributor license agreements. See the NOTICE file distributed with
  5. // this work for additional information regarding copyright ownership.
  6. // The ASF licenses this file to you under the Apache License, Version 2.0
  7. // (the "License"); you may not use this file except in compliance with
  8. // the License. You may obtain a copy of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS,
  14. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. // See the License for the specific language governing permissions and
  16. // limitations under the License.
  17. //
  18. #endregion
  19. using System;
  20. using System.Text;
  21. using System.Xml;
  22. using System.Text.RegularExpressions;
  23. namespace log4net.Util
  24. {
  25. /// <summary>
  26. /// Utility class for transforming strings.
  27. /// </summary>
  28. /// <remarks>
  29. /// <para>
  30. /// Utility class for transforming strings.
  31. /// </para>
  32. /// </remarks>
  33. /// <author>Nicko Cadell</author>
  34. /// <author>Gert Driesen</author>
  35. public sealed class Transform
  36. {
  37. #region Private Instance Constructors
  38. /// <summary>
  39. /// Initializes a new instance of the <see cref="Transform" /> class.
  40. /// </summary>
  41. /// <remarks>
  42. /// <para>
  43. /// Uses a private access modifier to prevent instantiation of this class.
  44. /// </para>
  45. /// </remarks>
  46. private Transform()
  47. {
  48. }
  49. #endregion Private Instance Constructors
  50. #region XML String Methods
  51. /// <summary>
  52. /// Write a string to an <see cref="XmlWriter"/>
  53. /// </summary>
  54. /// <param name="writer">the writer to write to</param>
  55. /// <param name="textData">the string to write</param>
  56. /// <param name="invalidCharReplacement">The string to replace non XML compliant chars with</param>
  57. /// <remarks>
  58. /// <para>
  59. /// The test is escaped either using XML escape entities
  60. /// or using CDATA sections.
  61. /// </para>
  62. /// </remarks>
  63. public static void WriteEscapedXmlString(XmlWriter writer, string textData, string invalidCharReplacement)
  64. {
  65. string stringData = MaskXmlInvalidCharacters(textData, invalidCharReplacement);
  66. // Write either escaped text or CDATA sections
  67. int weightCData = 12 * (1 + CountSubstrings(stringData, CDATA_END));
  68. int weightStringEscapes = 3*(CountSubstrings(stringData, "<") + CountSubstrings(stringData, ">")) + 4*CountSubstrings(stringData, "&");
  69. if (weightStringEscapes <= weightCData)
  70. {
  71. // Write string using string escapes
  72. writer.WriteString(stringData);
  73. }
  74. else
  75. {
  76. // Write string using CDATA section
  77. int end = stringData.IndexOf(CDATA_END);
  78. if (end < 0)
  79. {
  80. writer.WriteCData(stringData);
  81. }
  82. else
  83. {
  84. int start = 0;
  85. while (end > -1)
  86. {
  87. writer.WriteCData(stringData.Substring(start, end - start));
  88. if (end == stringData.Length - 3)
  89. {
  90. start = stringData.Length;
  91. writer.WriteString(CDATA_END);
  92. break;
  93. }
  94. else
  95. {
  96. writer.WriteString(CDATA_UNESCAPABLE_TOKEN);
  97. start = end + 2;
  98. end = stringData.IndexOf(CDATA_END, start);
  99. }
  100. }
  101. if (start < stringData.Length)
  102. {
  103. writer.WriteCData(stringData.Substring(start));
  104. }
  105. }
  106. }
  107. }
  108. /// <summary>
  109. /// Replace invalid XML characters in text string
  110. /// </summary>
  111. /// <param name="textData">the XML text input string</param>
  112. /// <param name="mask">the string to use in place of invalid characters</param>
  113. /// <returns>A string that does not contain invalid XML characters.</returns>
  114. /// <remarks>
  115. /// <para>
  116. /// Certain Unicode code points are not allowed in the XML InfoSet, for
  117. /// details see: <a href="http://www.w3.org/TR/REC-xml/#charsets">http://www.w3.org/TR/REC-xml/#charsets</a>.
  118. /// </para>
  119. /// <para>
  120. /// This method replaces any illegal characters in the input string
  121. /// with the mask string specified.
  122. /// </para>
  123. /// </remarks>
  124. public static string MaskXmlInvalidCharacters(string textData, string mask)
  125. {
  126. return INVALIDCHARS.Replace(textData, mask);
  127. }
  128. #endregion XML String Methods
  129. #region Private Helper Methods
  130. /// <summary>
  131. /// Count the number of times that the substring occurs in the text
  132. /// </summary>
  133. /// <param name="text">the text to search</param>
  134. /// <param name="substring">the substring to find</param>
  135. /// <returns>the number of times the substring occurs in the text</returns>
  136. /// <remarks>
  137. /// <para>
  138. /// The substring is assumed to be non repeating within itself.
  139. /// </para>
  140. /// </remarks>
  141. private static int CountSubstrings(string text, string substring)
  142. {
  143. int count = 0;
  144. int offset = 0;
  145. int length = text.Length;
  146. int substringLength = substring.Length;
  147. if (length == 0)
  148. {
  149. return 0;
  150. }
  151. if (substringLength == 0)
  152. {
  153. return 0;
  154. }
  155. while(offset < length)
  156. {
  157. int index = text.IndexOf(substring, offset);
  158. if (index == -1)
  159. {
  160. break;
  161. }
  162. count++;
  163. offset = index + substringLength;
  164. }
  165. return count;
  166. }
  167. #endregion
  168. #region Private Static Fields
  169. private const string CDATA_END = "]]>";
  170. private const string CDATA_UNESCAPABLE_TOKEN = "]]";
  171. /// <summary>
  172. /// Characters illegal in XML 1.0
  173. /// </summary>
  174. private static Regex INVALIDCHARS=new Regex(@"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD]",RegexOptions.Compiled);
  175. #endregion Private Static Fields
  176. }
  177. }