1: <?php
2:
3: /*
4: * This file is part of the Symfony package.
5: * (c) Fabien Potencier <fabien@symfony.com>
6: *
7: * For the full copyright and license information, please view the LICENSE
8: * file that was distributed with this source code.
9: */
10:
11: namespace Symfony\Component\Yaml;
12:
13: /**
14: * Unescaper encapsulates unescaping rules for single and double-quoted
15: * YAML strings.
16: *
17: * @author Matthew Lewinski <matthew@lewinski.org>
18: */
19: class Unescaper
20: {
21: // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
22: // must be converted to that encoding.
23: const ENCODING = 'UTF-8';
24:
25: // Regex fragment that matches an escaped character in a double quoted
26: // string.
27: const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";
28:
29: /**
30: * Unescapes a single quoted string.
31: *
32: * @param string $value A single quoted string.
33: *
34: * @return string The unescaped string.
35: */
36: public function unescapeSingleQuotedString($value)
37: {
38: return str_replace('\'\'', '\'', $value);
39: }
40:
41: /**
42: * Unescapes a double quoted string.
43: *
44: * @param string $value A double quoted string.
45: *
46: * @return string The unescaped string.
47: */
48: public function unescapeDoubleQuotedString($value)
49: {
50: $self = $this;
51: $callback = function($match) use ($self) {
52: return $self->unescapeCharacter($match[0]);
53: };
54:
55: // evaluate the string
56: return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
57: }
58:
59: /**
60: * Unescapes a character that was found in a double-quoted string
61: *
62: * @param string $value An escaped character
63: *
64: * @return string The unescaped character
65: */
66: public function unescapeCharacter($value)
67: {
68: switch ($value{1}) {
69: case '0':
70: return "\x0";
71: case 'a':
72: return "\x7";
73: case 'b':
74: return "\x8";
75: case 't':
76: return "\t";
77: case "\t":
78: return "\t";
79: case 'n':
80: return "\n";
81: case 'v':
82: return "\xb";
83: case 'f':
84: return "\xc";
85: case 'r':
86: return "\xd";
87: case 'e':
88: return "\x1b";
89: case ' ':
90: return ' ';
91: case '"':
92: return '"';
93: case '/':
94: return '/';
95: case '\\':
96: return '\\';
97: case 'N':
98: // U+0085 NEXT LINE
99: return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE');
100: case '_':
101: // U+00A0 NO-BREAK SPACE
102: return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE');
103: case 'L':
104: // U+2028 LINE SEPARATOR
105: return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE');
106: case 'P':
107: // U+2029 PARAGRAPH SEPARATOR
108: return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE');
109: case 'x':
110: $char = pack('n', hexdec(substr($value, 2, 2)));
111:
112: return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
113: case 'u':
114: $char = pack('n', hexdec(substr($value, 2, 4)));
115:
116: return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
117: case 'U':
118: $char = pack('N', hexdec(substr($value, 2, 8)));
119:
120: return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE');
121: }
122: }
123:
124: /**
125: * Convert a string from one encoding to another.
126: *
127: * @param string $value The string to convert
128: * @param string $to The input encoding
129: * @param string $from The output encoding
130: *
131: * @return string The string with the new encoding
132: *
133: * @throws RuntimeException if no suitable encoding function is found (iconv or mbstring)
134: */
135: private function convertEncoding($value, $to, $from)
136: {
137: if (function_exists('mb_convert_encoding')) {
138: return mb_convert_encoding($value, $to, $from);
139: } elseif (function_exists('iconv')) {
140: return iconv($from, $to, $value);
141: }
142:
143: throw new RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).');
144: }
145: }
146: