root/apps/outlook/branches/1.5/DotTelSystem/Punycode/IDNA.cs
@
580
| Revision 580, 7.6 kB (checked in by jonmaycock, 10 months ago) |
|---|
| Line | |
|---|---|
| 1 | /// <summary> |
| 2 | /// * |
| 3 | /// Author: Alexander Gnauck AG-Software, mailto:gnauck@ag-software.de |
| 4 | /// * |
| 5 | /// This file is part of GNU Libidn. |
| 6 | /// * |
| 7 | /// This library is free software; you can redistribute it and/or |
| 8 | /// modify it under the terms of the GNU Lesser General Public License |
| 9 | /// as published by the Free Software Foundation; either version 2.1 of |
| 10 | /// the License, or (at your option) any later version. |
| 11 | /// * |
| 12 | /// This library is distributed in the hope that it will be useful, but |
| 13 | /// WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | /// Lesser General Public License for more details. |
| 16 | /// * |
| 17 | /// You should have received a copy of the GNU Lesser General Public |
| 18 | /// License along with this library; if not, write to the Free Software |
| 19 | /// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 |
| 20 | /// USA |
| 21 | /// </summary> |
| 22 | |
| 23 | using System; |
| 24 | using System.Text; |
| 25 | |
| 26 | namespace Gnu.Inet.Encoding |
| 27 | { |
| 28 | public class IDNA |
| 29 | { |
| 30 | public const string ACE_PREFIX = "xn--"; |
| 31 | |
| 32 | /// <summary> |
| 33 | /// Converts a Unicode string to ASCII using the procedure in RFC3490 |
| 34 | /// section 4.1. Unassigned characters are not allowed and STD3 ASCII |
| 35 | /// rules are enforced. The input string may be a domain name |
| 36 | /// containing dots. |
| 37 | /// </summary> |
| 38 | /// <param name="input">Unicode string.</param> |
| 39 | /// <returns> Encoded string.</returns> |
| 40 | public static string ToASCII(string input) |
| 41 | { |
| 42 | StringBuilder o = new StringBuilder(); |
| 43 | StringBuilder h = new StringBuilder(); |
| 44 | |
| 45 | for (int i = 0; i < input.Length; i++) |
| 46 | { |
| 47 | char c = input[i]; |
| 48 | if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61') |
| 49 | { |
| 50 | o.Append(ToASCII(h.ToString(), false, true)); |
| 51 | o.Append('.'); |
| 52 | h = new StringBuilder(); |
| 53 | } |
| 54 | else |
| 55 | { |
| 56 | h.Append(c); |
| 57 | } |
| 58 | } |
| 59 | o.Append(ToASCII(h.ToString(), false, true)); |
| 60 | return o.ToString(); |
| 61 | } |
| 62 | |
| 63 | /// <summary> |
| 64 | /// Converts a Unicode string to ASCII using the procedure in RFC3490 |
| 65 | /// section 4.1. Unassigned characters are not allowed and STD3 ASCII |
| 66 | /// rules are enforced. |
| 67 | /// </summary> |
| 68 | /// <param name="input">Unicode string.</param> |
| 69 | /// <param name="allowUnassigned">Unassigned characters, allowed or not?</param> |
| 70 | /// <param name="useSTD3ASCIIRules">STD3 ASCII rules, enforced or not?</param> |
| 71 | /// <returns> Encoded string.</returns> |
| 72 | public static string ToASCII(string input, bool allowUnassigned, bool useSTD3ASCIIRules) |
| 73 | { |
| 74 | // Step 1: Check if the string contains code points outside |
| 75 | // the ASCII range 0..0x7c. |
| 76 | |
| 77 | bool nonASCII = false; |
| 78 | |
| 79 | for (int i = 0; i < input.Length; i++) |
| 80 | { |
| 81 | int c = input[i]; |
| 82 | if (c > 0x7f) |
| 83 | { |
| 84 | nonASCII = true; |
| 85 | break; |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | // Step 2: Perform the nameprep operation. |
| 90 | |
| 91 | if (nonASCII) |
| 92 | { |
| 93 | try |
| 94 | { |
| 95 | input = Stringprep.NamePrep(input, allowUnassigned); |
| 96 | } |
| 97 | catch (StringprepException e) |
| 98 | { |
| 99 | // TODO |
| 100 | throw new IDNAException(e); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | // Step 3: - Verify the absence of non-LDH ASCII code points |
| 105 | // (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, |
| 106 | // (char) 0x7b..0x7f |
| 107 | // - Verify the absence of leading and trailing |
| 108 | // hyphen-minus |
| 109 | |
| 110 | if (useSTD3ASCIIRules) |
| 111 | { |
| 112 | for (int i = 0; i < input.Length; i++) |
| 113 | { |
| 114 | int c = input[i]; |
| 115 | if ((c <= 0x2c) || (c >= 0x2e && c <= 0x2f) || (c >= 0x3a && c <= 0x40) || (c >= 0x5b && c <= 0x60) || (c >= 0x7b && c <= 0x7f)) |
| 116 | { |
| 117 | throw new IDNAException(IDNAException.CONTAINS_NON_LDH); |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | if (input.StartsWith("-") || input.EndsWith("-")) |
| 122 | { |
| 123 | throw new IDNAException(IDNAException.CONTAINS_HYPHEN); |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | // Step 4: If all code points are inside 0..0x7f, skip to step 8 |
| 128 | |
| 129 | nonASCII = false; |
| 130 | |
| 131 | for (int i = 0; i < input.Length; i++) |
| 132 | { |
| 133 | int c = input[i]; |
| 134 | if (c > 0x7f) |
| 135 | { |
| 136 | nonASCII = true; |
| 137 | break; |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | string output = input; |
| 142 | |
| 143 | if (nonASCII) |
| 144 | { |
| 145 | |
| 146 | // Step 5: Verify that the sequence does not begin with the ACE prefix. |
| 147 | |
| 148 | if (input.StartsWith(ACE_PREFIX)) |
| 149 | { |
| 150 | throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX); |
| 151 | } |
| 152 | |
| 153 | // Step 6: Punycode |
| 154 | |
| 155 | try |
| 156 | { |
| 157 | output = Punycode.Encode(input); |
| 158 | } |
| 159 | catch (PunycodeException e) |
| 160 | { |
| 161 | // TODO |
| 162 | throw new IDNAException(e); |
| 163 | } |
| 164 | |
| 165 | // Step 7: Prepend the ACE prefix. |
| 166 | |
| 167 | output = ACE_PREFIX + output; |
| 168 | } |
| 169 | |
| 170 | // Step 8: Check that the length is inside 1..63. |
| 171 | |
| 172 | if (output.Length < 1 || output.Length > 63) |
| 173 | { |
| 174 | throw new IDNAException(IDNAException.TOO_LONG); |
| 175 | } |
| 176 | |
| 177 | return output; |
| 178 | } |
| 179 | |
| 180 | /// <summary> |
| 181 | /// Converts an ASCII-encoded string to Unicode. Unassigned |
| 182 | /// characters are not allowed and STD3 hostnames are enforced. Input |
| 183 | /// may be domain name containing dots. |
| 184 | /// </summary> |
| 185 | /// <param name="input">ASCII input string.</param> |
| 186 | /// <returns> Unicode string.</returns> |
| 187 | public static string ToUnicode(string input) |
| 188 | { |
| 189 | input = input.ToLower(); |
| 190 | StringBuilder o = new StringBuilder(); |
| 191 | StringBuilder h = new StringBuilder(); |
| 192 | |
| 193 | for (int i = 0; i < input.Length; i++) |
| 194 | { |
| 195 | char c = input[i]; |
| 196 | if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61') |
| 197 | { |
| 198 | o.Append(ToUnicode(h.ToString(), false, true)); |
| 199 | o.Append(c); |
| 200 | h = new StringBuilder(); |
| 201 | } |
| 202 | else |
| 203 | { |
| 204 | h.Append(c); |
| 205 | } |
| 206 | } |
| 207 | o.Append(ToUnicode(h.ToString(), false, true)); |
| 208 | return o.ToString(); |
| 209 | } |
| 210 | |
| 211 | /// <summary> |
| 212 | /// Converts an ASCII-encoded string to Unicode. |
| 213 | /// </summary> |
| 214 | /// <param name="input">ASCII input string.</param> |
| 215 | /// <param name="allowUnassigned">Allow unassigned Unicode characters.</param> |
| 216 | /// <param name="useSTD3ASCIIRules">Check that the output conforms to STD3.</param> |
| 217 | /// <returns>Unicode string.</returns> |
| 218 | public static string ToUnicode(string input, bool allowUnassigned, bool useSTD3ASCIIRules) |
| 219 | { |
| 220 | string original = input; |
| 221 | bool nonASCII = false; |
| 222 | |
| 223 | // Step 1: If all code points are inside 0..0x7f, skip to step 3. |
| 224 | |
| 225 | for (int i = 0; i < input.Length; i++) |
| 226 | { |
| 227 | int c = input[i]; |
| 228 | if (c > 0x7f) |
| 229 | { |
| 230 | nonASCII = true; |
| 231 | break; |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | // Step 2: Perform the Nameprep operation. |
| 236 | |
| 237 | if (nonASCII) |
| 238 | { |
| 239 | try |
| 240 | { |
| 241 | input = Stringprep.NamePrep(input, allowUnassigned); |
| 242 | } |
| 243 | catch (StringprepException e) |
| 244 | { |
| 245 | // ToUnicode never fails! |
| 246 | return original; |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | // Step 3: Verify the sequence starts with the ACE prefix. |
| 251 | |
| 252 | if (!input.StartsWith(ACE_PREFIX)) |
| 253 | { |
| 254 | // ToUnicode never fails! |
| 255 | return original; |
| 256 | } |
| 257 | |
| 258 | string stored = input; |
| 259 | |
| 260 | // Step 4: Remove the ACE prefix. |
| 261 | |
| 262 | input = input.Substring(ACE_PREFIX.Length); |
| 263 | |
| 264 | // Step 5: Decode using punycode |
| 265 | |
| 266 | string output; |
| 267 | |
| 268 | try |
| 269 | { |
| 270 | output = Punycode.Decode(input); |
| 271 | } |
| 272 | catch (PunycodeException e) |
| 273 | { |
| 274 | // ToUnicode never fails! |
| 275 | return original; |
| 276 | } |
| 277 | |
| 278 | // Step 6: Apply toASCII |
| 279 | |
| 280 | string ascii; |
| 281 | |
| 282 | try |
| 283 | { |
| 284 | ascii = ToASCII(output, allowUnassigned, useSTD3ASCIIRules); |
| 285 | } |
| 286 | catch (IDNAException e) |
| 287 | { |
| 288 | // ToUnicode never fails! |
| 289 | return original; |
| 290 | } |
| 291 | |
| 292 | // Step 7: Compare case-insensitively. |
| 293 | |
| 294 | if (!ascii.ToUpper().Equals(stored.ToUpper())) |
| 295 | { |
| 296 | // ToUnicode never fails! |
| 297 | return original; |
| 298 | } |
| 299 | |
| 300 | // Step 8: Return the result. |
| 301 | |
| 302 | return output; |
| 303 | } |
| 304 | } |
| 305 | } |
Note: See TracBrowser
for help on using the browser.








