1: <?php
2:
3: 4: 5: 6: 7: 8: 9:
10:
11: namespace Symfony\Component\Yaml;
12:
13: use Symfony\Component\Yaml\Exception\ParseException;
14:
15: 16: 17: 18: 19:
20: class Parser
21: {
22: private $offset = 0;
23: private $lines = array();
24: private $currentLineNb = -1;
25: private $currentLine = '';
26: private $refs = array();
27:
28: 29: 30: 31: 32:
33: public function __construct($offset = 0)
34: {
35: $this->offset = $offset;
36: }
37:
38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48:
49: public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false)
50: {
51: $this->currentLineNb = -1;
52: $this->currentLine = '';
53: $this->lines = explode("\n", $this->cleanup($value));
54:
55: if (function_exists('mb_detect_encoding') && false === mb_detect_encoding($value, 'UTF-8', true)) {
56: throw new ParseException('The YAML value does not appear to be valid UTF-8.');
57: }
58:
59: if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
60: $mbEncoding = mb_internal_encoding();
61: mb_internal_encoding('UTF-8');
62: }
63:
64: $data = array();
65: $context = null;
66: while ($this->moveToNextLine()) {
67: if ($this->isCurrentLineEmpty()) {
68: continue;
69: }
70:
71:
72: if ("\t" === $this->currentLine[0]) {
73: throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
74: }
75:
76: $isRef = $isInPlace = $isProcessed = false;
77: if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
78: if ($context && 'mapping' == $context) {
79: throw new ParseException('You cannot define a sequence item when in a mapping');
80: }
81: $context = 'sequence';
82:
83: if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
84: $isRef = $matches['ref'];
85: $values['value'] = $matches['value'];
86: }
87:
88:
89: if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
90: $c = $this->getRealCurrentLineNb() + 1;
91: $parser = new Parser($c);
92: $parser->refs =& $this->refs;
93: $data[] = $parser->parse($this->getNextEmbedBlock(), $exceptionOnInvalidType, $objectSupport);
94: } else {
95: if (isset($values['leadspaces'])
96: && ' ' == $values['leadspaces']
97: && preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
98: ) {
99:
100: $c = $this->getRealCurrentLineNb();
101: $parser = new Parser($c);
102: $parser->refs =& $this->refs;
103:
104: $block = $values['value'];
105: if (!$this->isNextLineIndented()) {
106: $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + 2);
107: }
108:
109: $data[] = $parser->parse($block, $exceptionOnInvalidType, $objectSupport);
110: } else {
111: $data[] = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport);
112: }
113: }
114: } elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
115: if ($context && 'sequence' == $context) {
116: throw new ParseException('You cannot define a mapping item when in a sequence');
117: }
118: $context = 'mapping';
119:
120:
121: Inline::parse(null, $exceptionOnInvalidType, $objectSupport);
122: try {
123: $key = Inline::parseScalar($values['key']);
124: } catch (ParseException $e) {
125: $e->setParsedLine($this->getRealCurrentLineNb() + 1);
126: $e->setSnippet($this->currentLine);
127:
128: throw $e;
129: }
130:
131: if ('<<' === $key) {
132: if (isset($values['value']) && 0 === strpos($values['value'], '*')) {
133: $isInPlace = substr($values['value'], 1);
134: if (!array_key_exists($isInPlace, $this->refs)) {
135: throw new ParseException(sprintf('Reference "%s" does not exist.', $isInPlace), $this->getRealCurrentLineNb() + 1, $this->currentLine);
136: }
137: } else {
138: if (isset($values['value']) && $values['value'] !== '') {
139: $value = $values['value'];
140: } else {
141: $value = $this->getNextEmbedBlock();
142: }
143: $c = $this->getRealCurrentLineNb() + 1;
144: $parser = new Parser($c);
145: $parser->refs =& $this->refs;
146: $parsed = $parser->parse($value, $exceptionOnInvalidType, $objectSupport);
147:
148: $merged = array();
149: if (!is_array($parsed)) {
150: throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
151: } elseif (isset($parsed[0])) {
152:
153: foreach (array_reverse($parsed) as $parsedItem) {
154: if (!is_array($parsedItem)) {
155: throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem);
156: }
157: $merged = array_merge($parsedItem, $merged);
158: }
159: } else {
160:
161: $merged = array_merge($merged, $parsed);
162: }
163:
164: $isProcessed = $merged;
165: }
166: } elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
167: $isRef = $matches['ref'];
168: $values['value'] = $matches['value'];
169: }
170:
171: if ($isProcessed) {
172:
173: $data = $isProcessed;
174:
175: } elseif (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
176:
177: if ($this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
178: $data[$key] = null;
179: } else {
180: $c = $this->getRealCurrentLineNb() + 1;
181: $parser = new Parser($c);
182: $parser->refs =& $this->refs;
183: $data[$key] = $parser->parse($this->getNextEmbedBlock(), $exceptionOnInvalidType, $objectSupport);
184: }
185: } else {
186: if ($isInPlace) {
187: $data = $this->refs[$isInPlace];
188: } else {
189: $data[$key] = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport);
190: }
191: }
192: } else {
193:
194: $lineCount = count($this->lines);
195: if (1 === $lineCount || (2 === $lineCount && empty($this->lines[1]))) {
196: try {
197: $value = Inline::parse($this->lines[0], $exceptionOnInvalidType, $objectSupport);
198: } catch (ParseException $e) {
199: $e->setParsedLine($this->getRealCurrentLineNb() + 1);
200: $e->setSnippet($this->currentLine);
201:
202: throw $e;
203: }
204:
205: if (is_array($value)) {
206: $first = reset($value);
207: if (is_string($first) && 0 === strpos($first, '*')) {
208: $data = array();
209: foreach ($value as $alias) {
210: $data[] = $this->refs[substr($alias, 1)];
211: }
212: $value = $data;
213: }
214: }
215:
216: if (isset($mbEncoding)) {
217: mb_internal_encoding($mbEncoding);
218: }
219:
220: return $value;
221: }
222:
223: switch (preg_last_error()) {
224: case PREG_INTERNAL_ERROR:
225: $error = 'Internal PCRE error.';
226: break;
227: case PREG_BACKTRACK_LIMIT_ERROR:
228: $error = 'pcre.backtrack_limit reached.';
229: break;
230: case PREG_RECURSION_LIMIT_ERROR:
231: $error = 'pcre.recursion_limit reached.';
232: break;
233: case PREG_BAD_UTF8_ERROR:
234: $error = 'Malformed UTF-8 data.';
235: break;
236: case PREG_BAD_UTF8_OFFSET_ERROR:
237: $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
238: break;
239: default:
240: $error = 'Unable to parse.';
241: }
242:
243: throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
244: }
245:
246: if ($isRef) {
247: $this->refs[$isRef] = end($data);
248: }
249: }
250:
251: if (isset($mbEncoding)) {
252: mb_internal_encoding($mbEncoding);
253: }
254:
255: return empty($data) ? null : $data;
256: }
257:
258: 259: 260: 261: 262:
263: private function getRealCurrentLineNb()
264: {
265: return $this->currentLineNb + $this->offset;
266: }
267:
268: 269: 270: 271: 272:
273: private function getCurrentLineIndentation()
274: {
275: return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' '));
276: }
277:
278: 279: 280: 281: 282: 283: 284: 285: 286:
287: private function getNextEmbedBlock($indentation = null)
288: {
289: $this->moveToNextLine();
290:
291: if (null === $indentation) {
292: $newIndent = $this->getCurrentLineIndentation();
293:
294: $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem($this->currentLine);
295:
296: if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) {
297: throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
298: }
299: } else {
300: $newIndent = $indentation;
301: }
302:
303: $data = array(substr($this->currentLine, $newIndent));
304:
305: $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem($this->currentLine);
306:
307: while ($this->moveToNextLine()) {
308:
309: if ($isItUnindentedCollection && !$this->isStringUnIndentedCollectionItem($this->currentLine)) {
310: $this->moveToPreviousLine();
311: break;
312: }
313:
314: if ($this->isCurrentLineEmpty()) {
315: if ($this->isCurrentLineBlank()) {
316: $data[] = substr($this->currentLine, $newIndent);
317: }
318:
319: continue;
320: }
321:
322: $indent = $this->getCurrentLineIndentation();
323:
324: if (preg_match('#^(?P<text> *)$#', $this->currentLine, $match)) {
325:
326: $data[] = $match['text'];
327: } elseif ($indent >= $newIndent) {
328: $data[] = substr($this->currentLine, $newIndent);
329: } elseif (0 == $indent) {
330: $this->moveToPreviousLine();
331:
332: break;
333: } else {
334: throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
335: }
336: }
337:
338: return implode("\n", $data);
339: }
340:
341: 342: 343: 344: 345:
346: private function moveToNextLine()
347: {
348: if ($this->currentLineNb >= count($this->lines) - 1) {
349: return false;
350: }
351:
352: $this->currentLine = $this->lines[++$this->currentLineNb];
353:
354: return true;
355: }
356:
357: 358: 359:
360: private function moveToPreviousLine()
361: {
362: $this->currentLine = $this->lines[--$this->currentLineNb];
363: }
364:
365: 366: 367: 368: 369: 370: 371: 372: 373:
374: private function parseValue($value, $exceptionOnInvalidType, $objectSupport)
375: {
376: if (0 === strpos($value, '*')) {
377: if (false !== $pos = strpos($value, '#')) {
378: $value = substr($value, 1, $pos - 2);
379: } else {
380: $value = substr($value, 1);
381: }
382:
383: if (!array_key_exists($value, $this->refs)) {
384: throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLine);
385: }
386:
387: return $this->refs[$value];
388: }
389:
390: if (preg_match('/^(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?$/', $value, $matches)) {
391: $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
392:
393: return $this->parseFoldedScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), intval(abs($modifiers)));
394: }
395:
396: try {
397: return Inline::parse($value, $exceptionOnInvalidType, $objectSupport);
398: } catch (ParseException $e) {
399: $e->setParsedLine($this->getRealCurrentLineNb() + 1);
400: $e->setSnippet($this->currentLine);
401:
402: throw $e;
403: }
404: }
405:
406: 407: 408: 409: 410: 411: 412: 413: 414:
415: private function parseFoldedScalar($separator, $indicator = '', $indentation = 0)
416: {
417: $notEOF = $this->moveToNextLine();
418: if (!$notEOF) {
419: return '';
420: }
421:
422:
423: if (0 === $indentation) {
424: if (preg_match('/^ +/', $this->currentLine, $matches)) {
425: $indentation = strlen($matches[0]);
426: }
427: }
428:
429: $text = '';
430: if ($indentation > 0) {
431: $pattern = sprintf('/^ {%d}(.*)$/', $indentation);
432:
433: $isCurrentLineBlank = $this->isCurrentLineBlank();
434: while (
435: $notEOF && (
436: $isCurrentLineBlank ||
437: preg_match($pattern, $this->currentLine, $matches)
438: )
439: ) {
440: if ($isCurrentLineBlank) {
441: $text .= substr($this->currentLine, $indentation);
442: } else {
443: $text .= $matches[1];
444: }
445:
446:
447: if ($notEOF = $this->moveToNextLine()) {
448: $text .= "\n";
449: $isCurrentLineBlank = $this->isCurrentLineBlank();
450: }
451: }
452: } elseif ($notEOF) {
453: $text .= "\n";
454: }
455:
456: if ($notEOF) {
457: $this->moveToPreviousLine();
458: }
459:
460:
461: if ('>' === $separator) {
462: preg_match('/(\n*)$/', $text, $matches);
463: $text = preg_replace('/(?<!\n)\n(?!\n)/', ' ', rtrim($text, "\n"));
464: $text .= $matches[1];
465: }
466:
467:
468: if ('' === $indicator) {
469: $text = preg_replace('/\n+$/s', "\n", $text);
470: } elseif ('-' === $indicator) {
471: $text = preg_replace('/\n+$/s', '', $text);
472: }
473:
474: return $text;
475: }
476:
477: 478: 479: 480: 481:
482: private function isNextLineIndented()
483: {
484: $currentIndentation = $this->getCurrentLineIndentation();
485: $notEOF = $this->moveToNextLine();
486:
487: while ($notEOF && $this->isCurrentLineEmpty()) {
488: $notEOF = $this->moveToNextLine();
489: }
490:
491: if (false === $notEOF) {
492: return false;
493: }
494:
495: $ret = false;
496: if ($this->getCurrentLineIndentation() <= $currentIndentation) {
497: $ret = true;
498: }
499:
500: $this->moveToPreviousLine();
501:
502: return $ret;
503: }
504:
505: 506: 507: 508: 509:
510: private function isCurrentLineEmpty()
511: {
512: return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
513: }
514:
515: 516: 517: 518: 519:
520: private function isCurrentLineBlank()
521: {
522: return '' == trim($this->currentLine, ' ');
523: }
524:
525: 526: 527: 528: 529:
530: private function ()
531: {
532:
533: $ltrimmedLine = ltrim($this->currentLine, ' ');
534:
535: return $ltrimmedLine[0] === '#';
536: }
537:
538: 539: 540: 541: 542: 543: 544:
545: private function cleanup($value)
546: {
547: $value = str_replace(array("\r\n", "\r"), "\n", $value);
548:
549:
550: $count = 0;
551: $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#su', '', $value, -1, $count);
552: $this->offset += $count;
553:
554:
555: $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
556: if ($count == 1) {
557:
558: $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
559: $value = $trimmedValue;
560: }
561:
562:
563: $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
564: if ($count == 1) {
565:
566: $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
567: $value = $trimmedValue;
568:
569:
570: $value = preg_replace('#\.\.\.\s*$#s', '', $value);
571: }
572:
573: return $value;
574: }
575:
576: 577: 578: 579: 580:
581: private function isNextLineUnIndentedCollection()
582: {
583: $currentIndentation = $this->getCurrentLineIndentation();
584: $notEOF = $this->moveToNextLine();
585:
586: while ($notEOF && $this->isCurrentLineEmpty()) {
587: $notEOF = $this->moveToNextLine();
588: }
589:
590: if (false === $notEOF) {
591: return false;
592: }
593:
594: $ret = false;
595: if (
596: $this->getCurrentLineIndentation() == $currentIndentation
597: &&
598: $this->isStringUnIndentedCollectionItem($this->currentLine)
599: ) {
600: $ret = true;
601: }
602:
603: $this->moveToPreviousLine();
604:
605: return $ret;
606: }
607:
608: 609: 610: 611: 612:
613: private function isStringUnIndentedCollectionItem()
614: {
615: return (0 === strpos($this->currentLine, '- '));
616: }
617:
618: }
619: