001 /*
002 * Copyright 2007-2016 UnboundID Corp.
003 * All Rights Reserved.
004 */
005 /*
006 * Copyright (C) 2008-2016 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021 package com.unboundid.ldif;
022
023
024
025 import java.io.BufferedReader;
026 import java.io.BufferedWriter;
027 import java.io.File;
028 import java.io.FileInputStream;
029 import java.io.FileWriter;
030 import java.io.InputStream;
031 import java.io.InputStreamReader;
032 import java.io.IOException;
033 import java.text.ParseException;
034 import java.util.ArrayList;
035 import java.util.Collection;
036 import java.util.Iterator;
037 import java.util.HashSet;
038 import java.util.LinkedHashMap;
039 import java.util.List;
040 import java.util.Set;
041 import java.util.concurrent.BlockingQueue;
042 import java.util.concurrent.ArrayBlockingQueue;
043 import java.util.concurrent.TimeUnit;
044 import java.util.concurrent.atomic.AtomicBoolean;
045 import java.nio.charset.Charset;
046
047 import com.unboundid.asn1.ASN1OctetString;
048 import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule;
049 import com.unboundid.ldap.matchingrules.MatchingRule;
050 import com.unboundid.ldap.sdk.Attribute;
051 import com.unboundid.ldap.sdk.Control;
052 import com.unboundid.ldap.sdk.Entry;
053 import com.unboundid.ldap.sdk.Modification;
054 import com.unboundid.ldap.sdk.ModificationType;
055 import com.unboundid.ldap.sdk.LDAPException;
056 import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition;
057 import com.unboundid.ldap.sdk.schema.Schema;
058 import com.unboundid.util.AggregateInputStream;
059 import com.unboundid.util.Base64;
060 import com.unboundid.util.LDAPSDKThreadFactory;
061 import com.unboundid.util.ThreadSafety;
062 import com.unboundid.util.ThreadSafetyLevel;
063 import com.unboundid.util.parallel.AsynchronousParallelProcessor;
064 import com.unboundid.util.parallel.Result;
065 import com.unboundid.util.parallel.ParallelProcessor;
066 import com.unboundid.util.parallel.Processor;
067
068 import static com.unboundid.ldif.LDIFMessages.*;
069 import static com.unboundid.util.Debug.*;
070 import static com.unboundid.util.StaticUtils.*;
071 import static com.unboundid.util.Validator.*;
072
073 /**
074 * This class provides an LDIF reader, which can be used to read and decode
075 * entries and change records from a data source using the LDAP Data Interchange
076 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>.
077 * <BR>
078 * This class is not synchronized. If multiple threads read from the
079 * LDIFReader, they must be synchronized externally.
080 * <BR><BR>
081 * <H2>Example</H2>
082 * The following example iterates through all entries contained in an LDIF file
083 * and attempts to add them to a directory server:
084 * <PRE>
085 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile);
086 *
087 * int entriesRead = 0;
088 * int entriesAdded = 0;
089 * int errorsEncountered = 0;
090 * while (true)
091 * {
092 * Entry entry;
093 * try
094 * {
095 * entry = ldifReader.readEntry();
096 * if (entry == null)
097 * {
098 * // All entries have been read.
099 * break;
100 * }
101 *
102 * entriesRead++;
103 * }
104 * catch (LDIFException le)
105 * {
106 * errorsEncountered++;
107 * if (le.mayContinueReading())
108 * {
109 * // A recoverable error occurred while attempting to read a change
110 * // record, at or near line number le.getLineNumber()
111 * // The entry will be skipped, but we'll try to keep reading from the
112 * // LDIF file.
113 * continue;
114 * }
115 * else
116 * {
117 * // An unrecoverable error occurred while attempting to read an entry
118 * // at or near line number le.getLineNumber()
119 * // No further LDIF processing will be performed.
120 * break;
121 * }
122 * }
123 * catch (IOException ioe)
124 * {
125 * // An I/O error occurred while attempting to read from the LDIF file.
126 * // No further LDIF processing will be performed.
127 * errorsEncountered++;
128 * break;
129 * }
130 *
131 * LDAPResult addResult;
132 * try
133 * {
134 * addResult = connection.add(entry);
135 * // If we got here, then the change should have been processed
136 * // successfully.
137 * entriesAdded++;
138 * }
139 * catch (LDAPException le)
140 * {
141 * // If we got here, then the change attempt failed.
142 * addResult = le.toLDAPResult();
143 * errorsEncountered++;
144 * }
145 * }
146 *
147 * ldifReader.close();
148 * </PRE>
149 */
150 @ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
151 public final class LDIFReader
152 {
153 /**
154 * The default buffer size (128KB) that will be used when reading from the
155 * data source.
156 */
157 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024;
158
159
160
161 /*
162 * When processing asynchronously, this determines how many of the allocated
163 * worker threads are used to parse each batch of read entries.
164 */
165 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3;
166
167
168
169 /**
170 * When processing asynchronously, this specifies the size of the pending and
171 * completed queues.
172 */
173 private static final int ASYNC_QUEUE_SIZE = 500;
174
175
176
177 /**
178 * Special entry used internally to signal that the LDIFReaderEntryTranslator
179 * has signalled that a read Entry should be skipped by returning null,
180 * which normally implies EOF.
181 */
182 private static final Entry SKIP_ENTRY = new Entry("cn=skipped");
183
184
185
186 /**
187 * The default base path that will be prepended to relative paths. It will
188 * end with a trailing slash.
189 */
190 private static final String DEFAULT_RELATIVE_BASE_PATH;
191 static
192 {
193 final File currentDir;
194 String currentDirString = System.getProperty("user.dir");
195 if (currentDirString == null)
196 {
197 currentDir = new File(".");
198 }
199 else
200 {
201 currentDir = new File(currentDirString);
202 }
203
204 final String currentDirAbsolutePath = currentDir.getAbsolutePath();
205 if (currentDirAbsolutePath.endsWith(File.separator))
206 {
207 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath;
208 }
209 else
210 {
211 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator;
212 }
213 }
214
215
216
217 // The buffered reader that will be used to read LDIF data.
218 private final BufferedReader reader;
219
220 // The behavior that should be exhibited when encountering duplicate attribute
221 // values.
222 private volatile DuplicateValueBehavior duplicateValueBehavior;
223
224 // A line number counter.
225 private long lineNumberCounter = 0;
226
227 // The change record translator to use, if any.
228 private final LDIFReaderChangeRecordTranslator changeRecordTranslator;
229
230 // The entry translator to use, if any.
231 private final LDIFReaderEntryTranslator entryTranslator;
232
233 // The schema that will be used when processing, if applicable.
234 private Schema schema;
235
236 // Specifies the base path that will be prepended to relative paths for file
237 // URLs.
238 private volatile String relativeBasePath;
239
240 // The behavior that should be exhibited with regard to illegal trailing
241 // spaces in attribute values.
242 private volatile TrailingSpaceBehavior trailingSpaceBehavior;
243
244 // True iff we are processing asynchronously.
245 private final boolean isAsync;
246
247 //
248 // The following only apply to asynchronous processing.
249 //
250
251 // Parses entries asynchronously.
252 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord>
253 asyncParser;
254
255 // Set to true when the end of the input is reached.
256 private final AtomicBoolean asyncParsingComplete;
257
258 // The records that have been read and parsed.
259 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>>
260 asyncParsedRecords;
261
262
263
264 /**
265 * Creates a new LDIF reader that will read data from the specified file.
266 *
267 * @param path The path to the file from which the data is to be read. It
268 * must not be {@code null}.
269 *
270 * @throws IOException If a problem occurs while opening the file for
271 * reading.
272 */
273 public LDIFReader(final String path)
274 throws IOException
275 {
276 this(new FileInputStream(path));
277 }
278
279
280
281 /**
282 * Creates a new LDIF reader that will read data from the specified file
283 * and parses the LDIF records asynchronously using the specified number of
284 * threads.
285 *
286 * @param path The path to the file from which the data is to be read. It
287 * must not be {@code null}.
288 * @param numParseThreads If this value is greater than zero, then the
289 * specified number of threads will be used to
290 * asynchronously read and parse the LDIF file.
291 *
292 * @throws IOException If a problem occurs while opening the file for
293 * reading.
294 *
295 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
296 * constructor for more details about asynchronous processing.
297 */
298 public LDIFReader(final String path, final int numParseThreads)
299 throws IOException
300 {
301 this(new FileInputStream(path), numParseThreads);
302 }
303
304
305
306 /**
307 * Creates a new LDIF reader that will read data from the specified file.
308 *
309 * @param file The file from which the data is to be read. It must not be
310 * {@code null}.
311 *
312 * @throws IOException If a problem occurs while opening the file for
313 * reading.
314 */
315 public LDIFReader(final File file)
316 throws IOException
317 {
318 this(new FileInputStream(file));
319 }
320
321
322
323 /**
324 * Creates a new LDIF reader that will read data from the specified file
325 * and optionally parses the LDIF records asynchronously using the specified
326 * number of threads.
327 *
328 * @param file The file from which the data is to be read. It
329 * must not be {@code null}.
330 * @param numParseThreads If this value is greater than zero, then the
331 * specified number of threads will be used to
332 * asynchronously read and parse the LDIF file.
333 *
334 * @throws IOException If a problem occurs while opening the file for
335 * reading.
336 */
337 public LDIFReader(final File file, final int numParseThreads)
338 throws IOException
339 {
340 this(new FileInputStream(file), numParseThreads);
341 }
342
343
344
345 /**
346 * Creates a new LDIF reader that will read data from the specified files in
347 * the order in which they are provided and optionally parses the LDIF records
348 * asynchronously using the specified number of threads.
349 *
350 * @param files The files from which the data is to be read. It
351 * must not be {@code null} or empty.
352 * @param numParseThreads If this value is greater than zero, then the
353 * specified number of threads will be used to
354 * asynchronously read and parse the LDIF file.
355 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries
356 * before they are returned. This is normally
357 * {@code null}, which causes entries to be returned
358 * unaltered. This is particularly useful when
359 * parsing the input file in parallel because the
360 * entry translation is also done in parallel.
361 *
362 * @throws IOException If a problem occurs while opening the file for
363 * reading.
364 */
365 public LDIFReader(final File[] files, final int numParseThreads,
366 final LDIFReaderEntryTranslator entryTranslator)
367 throws IOException
368 {
369 this(files, numParseThreads, entryTranslator, null);
370 }
371
372
373
374 /**
375 * Creates a new LDIF reader that will read data from the specified files in
376 * the order in which they are provided and optionally parses the LDIF records
377 * asynchronously using the specified number of threads.
378 *
379 * @param files The files from which the data is to be
380 * read. It must not be {@code null} or
381 * empty.
382 * @param numParseThreads If this value is greater than zero, then
383 * the specified number of threads will be
384 * used to asynchronously read and parse the
385 * LDIF file.
386 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
387 * entries before they are returned. This is
388 * normally {@code null}, which causes entries
389 * to be returned unaltered. This is
390 * particularly useful when parsing the input
391 * file in parallel because the entry
392 * translation is also done in parallel.
393 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
394 * apply to change records before they are
395 * returned. This is normally {@code null},
396 * which causes change records to be returned
397 * unaltered. This is particularly useful
398 * when parsing the input file in parallel
399 * because the change record translation is
400 * also done in parallel.
401 *
402 * @throws IOException If a problem occurs while opening the file for
403 * reading.
404 */
405 public LDIFReader(final File[] files, final int numParseThreads,
406 final LDIFReaderEntryTranslator entryTranslator,
407 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
408 throws IOException
409 {
410 this(createAggregateInputStream(files), numParseThreads, entryTranslator,
411 changeRecordTranslator);
412 }
413
414
415
416 /**
417 * Creates a new aggregate input stream that will read data from the specified
418 * files. If there are multiple files, then a "padding" file will be inserted
419 * between them to ensure that there is at least one blank line between the
420 * end of one file and the beginning of another.
421 *
422 * @param files The files from which the data is to be read. It must not be
423 * {@code null} or empty.
424 *
425 * @return The input stream to use to read data from the provided files.
426 *
427 * @throws IOException If a problem is encountered while attempting to
428 * create the input stream.
429 */
430 private static InputStream createAggregateInputStream(final File... files)
431 throws IOException
432 {
433 if (files.length == 0)
434 {
435 throw new IOException(ERR_READ_NO_LDIF_FILES.get());
436 }
437 else if (files.length == 1)
438 {
439 return new FileInputStream(files[0]);
440 }
441 else
442 {
443 final File spacerFile =
444 File.createTempFile("ldif-reader-spacer", ".ldif");
445 spacerFile.deleteOnExit();
446
447 final BufferedWriter spacerWriter =
448 new BufferedWriter(new FileWriter(spacerFile));
449 try
450 {
451 spacerWriter.newLine();
452 spacerWriter.newLine();
453 }
454 finally
455 {
456 spacerWriter.close();
457 }
458
459 final File[] returnArray = new File[(files.length * 2) - 1];
460 returnArray[0] = files[0];
461
462 int pos = 1;
463 for (int i=1; i < files.length; i++)
464 {
465 returnArray[pos++] = spacerFile;
466 returnArray[pos++] = files[i];
467 }
468
469 return new AggregateInputStream(returnArray);
470 }
471 }
472
473
474
475 /**
476 * Creates a new LDIF reader that will read data from the provided input
477 * stream.
478 *
479 * @param inputStream The input stream from which the data is to be read.
480 * It must not be {@code null}.
481 */
482 public LDIFReader(final InputStream inputStream)
483 {
484 this(inputStream, 0);
485 }
486
487
488
489 /**
490 * Creates a new LDIF reader that will read data from the specified stream
491 * and parses the LDIF records asynchronously using the specified number of
492 * threads.
493 *
494 * @param inputStream The input stream from which the data is to be read.
495 * It must not be {@code null}.
496 * @param numParseThreads If this value is greater than zero, then the
497 * specified number of threads will be used to
498 * asynchronously read and parse the LDIF file.
499 *
500 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
501 * constructor for more details about asynchronous processing.
502 */
503 public LDIFReader(final InputStream inputStream, final int numParseThreads)
504 {
505 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
506 this(new BufferedReader(new InputStreamReader(inputStream,
507 Charset.forName("UTF-8")),
508 DEFAULT_BUFFER_SIZE),
509 numParseThreads);
510 }
511
512
513
514 /**
515 * Creates a new LDIF reader that will read data from the specified stream
516 * and parses the LDIF records asynchronously using the specified number of
517 * threads.
518 *
519 * @param inputStream The input stream from which the data is to be read.
520 * It must not be {@code null}.
521 * @param numParseThreads If this value is greater than zero, then the
522 * specified number of threads will be used to
523 * asynchronously read and parse the LDIF file.
524 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
525 * entries before they are returned. This is normally
526 * {@code null}, which causes entries to be returned
527 * unaltered. This is particularly useful when parsing
528 * the input file in parallel because the entry
529 * translation is also done in parallel.
530 *
531 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
532 * constructor for more details about asynchronous processing.
533 */
534 public LDIFReader(final InputStream inputStream, final int numParseThreads,
535 final LDIFReaderEntryTranslator entryTranslator)
536 {
537 this(inputStream, numParseThreads, entryTranslator, null);
538 }
539
540
541
542 /**
543 * Creates a new LDIF reader that will read data from the specified stream
544 * and parses the LDIF records asynchronously using the specified number of
545 * threads.
546 *
547 * @param inputStream The input stream from which the data is to
548 * be read. It must not be {@code null}.
549 * @param numParseThreads If this value is greater than zero, then
550 * the specified number of threads will be
551 * used to asynchronously read and parse the
552 * LDIF file.
553 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
554 * entries before they are returned. This is
555 * normally {@code null}, which causes entries
556 * to be returned unaltered. This is
557 * particularly useful when parsing the input
558 * file in parallel because the entry
559 * translation is also done in parallel.
560 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
561 * apply to change records before they are
562 * returned. This is normally {@code null},
563 * which causes change records to be returned
564 * unaltered. This is particularly useful
565 * when parsing the input file in parallel
566 * because the change record translation is
567 * also done in parallel.
568 *
569 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
570 * constructor for more details about asynchronous processing.
571 */
572 public LDIFReader(final InputStream inputStream, final int numParseThreads,
573 final LDIFReaderEntryTranslator entryTranslator,
574 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
575 {
576 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
577 this(new BufferedReader(
578 new InputStreamReader(inputStream, Charset.forName("UTF-8")),
579 DEFAULT_BUFFER_SIZE),
580 numParseThreads, entryTranslator, changeRecordTranslator);
581 }
582
583
584
585 /**
586 * Creates a new LDIF reader that will use the provided buffered reader to
587 * read the LDIF data. The encoding of the underlying Reader must be set to
588 * "UTF-8" as required by RFC 2849.
589 *
590 * @param reader The buffered reader that will be used to read the LDIF
591 * data. It must not be {@code null}.
592 */
593 public LDIFReader(final BufferedReader reader)
594 {
595 this(reader, 0);
596 }
597
598
599
600 /**
601 * Creates a new LDIF reader that will read data from the specified buffered
602 * reader and parses the LDIF records asynchronously using the specified
603 * number of threads. The encoding of the underlying Reader must be set to
604 * "UTF-8" as required by RFC 2849.
605 *
606 * @param reader The buffered reader that will be used to read the LDIF data.
607 * It must not be {@code null}.
608 * @param numParseThreads If this value is greater than zero, then the
609 * specified number of threads will be used to
610 * asynchronously read and parse the LDIF file.
611 *
612 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
613 * constructor for more details about asynchronous processing.
614 */
615 public LDIFReader(final BufferedReader reader, final int numParseThreads)
616 {
617 this(reader, numParseThreads, null);
618 }
619
620
621
622 /**
623 * Creates a new LDIF reader that will read data from the specified buffered
624 * reader and parses the LDIF records asynchronously using the specified
625 * number of threads. The encoding of the underlying Reader must be set to
626 * "UTF-8" as required by RFC 2849.
627 *
628 * @param reader The buffered reader that will be used to read the LDIF data.
629 * It must not be {@code null}.
630 * @param numParseThreads If this value is greater than zero, then the
631 * specified number of threads will be used to
632 * asynchronously read and parse the LDIF file.
633 * This should only be set to greater than zero when
634 * performance analysis has demonstrated that reading
635 * and parsing the LDIF is a bottleneck. The default
636 * synchronous processing is normally fast enough.
637 * There is little benefit in passing in a value
638 * greater than four (unless there is an
639 * LDIFReaderEntryTranslator that does time-consuming
640 * processing). A value of zero implies the
641 * default behavior of reading and parsing LDIF
642 * records synchronously when one of the read
643 * methods is called.
644 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
645 * entries before they are returned. This is normally
646 * {@code null}, which causes entries to be returned
647 * unaltered. This is particularly useful when parsing
648 * the input file in parallel because the entry
649 * translation is also done in parallel.
650 */
651 public LDIFReader(final BufferedReader reader,
652 final int numParseThreads,
653 final LDIFReaderEntryTranslator entryTranslator)
654 {
655 this(reader, numParseThreads, entryTranslator, null);
656 }
657
658
659
660 /**
661 * Creates a new LDIF reader that will read data from the specified buffered
662 * reader and parses the LDIF records asynchronously using the specified
663 * number of threads. The encoding of the underlying Reader must be set to
664 * "UTF-8" as required by RFC 2849.
665 *
666 * @param reader The buffered reader that will be used to
667 * read the LDIF data. It must not be
668 * {@code null}.
669 * @param numParseThreads If this value is greater than zero, then
670 * the specified number of threads will be
671 * used to asynchronously read and parse the
672 * LDIF file.
673 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
674 * entries before they are returned. This is
675 * normally {@code null}, which causes entries
676 * to be returned unaltered. This is
677 * particularly useful when parsing the input
678 * file in parallel because the entry
679 * translation is also done in parallel.
680 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
681 * apply to change records before they are
682 * returned. This is normally {@code null},
683 * which causes change records to be returned
684 * unaltered. This is particularly useful
685 * when parsing the input file in parallel
686 * because the change record translation is
687 * also done in parallel.
688 */
689 public LDIFReader(final BufferedReader reader, final int numParseThreads,
690 final LDIFReaderEntryTranslator entryTranslator,
691 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
692 {
693 ensureNotNull(reader);
694 ensureTrue(numParseThreads >= 0,
695 "LDIFReader.numParseThreads must not be negative.");
696
697 this.reader = reader;
698 this.entryTranslator = entryTranslator;
699 this.changeRecordTranslator = changeRecordTranslator;
700
701 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
702 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
703
704 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH;
705
706 if (numParseThreads == 0)
707 {
708 isAsync = false;
709 asyncParser = null;
710 asyncParsingComplete = null;
711 asyncParsedRecords = null;
712 }
713 else
714 {
715 isAsync = true;
716 asyncParsingComplete = new AtomicBoolean(false);
717
718 // Decodes entries in parallel.
719 final LDAPSDKThreadFactory threadFactory =
720 new LDAPSDKThreadFactory("LDIFReader Worker", true, null);
721 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser =
722 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>(
723 new RecordParser(), threadFactory, numParseThreads,
724 ASYNC_MIN_PER_PARSING_THREAD);
725
726 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new
727 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE);
728
729 // The output queue must be a little more than twice as big as the input
730 // queue to more easily handle being shutdown in the middle of processing
731 // when the queues are full and threads are blocked.
732 asyncParsedRecords = new ArrayBlockingQueue
733 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100);
734
735 asyncParser = new AsynchronousParallelProcessor
736 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser,
737 asyncParsedRecords);
738
739 final LineReaderThread lineReaderThread = new LineReaderThread();
740 lineReaderThread.start();
741 }
742 }
743
744
745
746 /**
747 * Reads entries from the LDIF file with the specified path and returns them
748 * as a {@code List}. This is a convenience method that should only be used
749 * for data sets that are small enough so that running out of memory isn't a
750 * concern.
751 *
752 * @param path The path to the LDIF file containing the entries to be read.
753 *
754 * @return A list of the entries read from the given LDIF file.
755 *
756 * @throws IOException If a problem occurs while attempting to read data
757 * from the specified file.
758 *
759 * @throws LDIFException If a problem is encountered while attempting to
760 * decode data read as LDIF.
761 */
762 public static List<Entry> readEntries(final String path)
763 throws IOException, LDIFException
764 {
765 return readEntries(new LDIFReader(path));
766 }
767
768
769
770 /**
771 * Reads entries from the specified LDIF file and returns them as a
772 * {@code List}. This is a convenience method that should only be used for
773 * data sets that are small enough so that running out of memory isn't a
774 * concern.
775 *
776 * @param file A reference to the LDIF file containing the entries to be
777 * read.
778 *
779 * @return A list of the entries read from the given LDIF file.
780 *
781 * @throws IOException If a problem occurs while attempting to read data
782 * from the specified file.
783 *
784 * @throws LDIFException If a problem is encountered while attempting to
785 * decode data read as LDIF.
786 */
787 public static List<Entry> readEntries(final File file)
788 throws IOException, LDIFException
789 {
790 return readEntries(new LDIFReader(file));
791 }
792
793
794
795 /**
796 * Reads and decodes LDIF entries from the provided input stream and
797 * returns them as a {@code List}. This is a convenience method that should
798 * only be used for data sets that are small enough so that running out of
799 * memory isn't a concern.
800 *
801 * @param inputStream The input stream from which the entries should be
802 * read. The input stream will be closed before
803 * returning.
804 *
805 * @return A list of the entries read from the given input stream.
806 *
807 * @throws IOException If a problem occurs while attempting to read data
808 * from the input stream.
809 *
810 * @throws LDIFException If a problem is encountered while attempting to
811 * decode data read as LDIF.
812 */
813 public static List<Entry> readEntries(final InputStream inputStream)
814 throws IOException, LDIFException
815 {
816 return readEntries(new LDIFReader(inputStream));
817 }
818
819
820
821 /**
822 * Reads entries from the provided LDIF reader and returns them as a list.
823 *
824 * @param reader The reader from which the entries should be read. It will
825 * be closed before returning.
826 *
827 * @return A list of the entries read from the provided reader.
828 *
829 * @throws IOException If a problem was encountered while attempting to read
830 * data from the LDIF data source.
831 *
832 * @throws LDIFException If a problem is encountered while attempting to
833 * decode data read as LDIF.
834 */
835 private static List<Entry> readEntries(final LDIFReader reader)
836 throws IOException, LDIFException
837 {
838 try
839 {
840 final ArrayList<Entry> entries = new ArrayList<Entry>(10);
841 while (true)
842 {
843 final Entry e = reader.readEntry();
844 if (e == null)
845 {
846 break;
847 }
848
849 entries.add(e);
850 }
851
852 return entries;
853 }
854 finally
855 {
856 reader.close();
857 }
858 }
859
860
861
862 /**
863 * Closes this LDIF reader and the underlying LDIF source.
864 *
865 * @throws IOException If a problem occurs while closing the underlying LDIF
866 * source.
867 */
868 public void close()
869 throws IOException
870 {
871 reader.close();
872
873 if (isAsync())
874 {
875 // Closing the reader will trigger the LineReaderThread to complete, but
876 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid
877 // this, we clear out the completed output queue, which is larger than
878 // the input queue, so the LineReaderThread will stop reading and
879 // shutdown the asyncParser.
880 asyncParsedRecords.clear();
881 }
882 }
883
884
885
886 /**
887 * Indicates whether to ignore any duplicate values encountered while reading
888 * LDIF records.
889 *
890 * @return {@code true} if duplicate values should be ignored, or
891 * {@code false} if any LDIF records containing duplicate values
892 * should be rejected.
893 *
894 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead.
895 */
896 @Deprecated()
897 public boolean ignoreDuplicateValues()
898 {
899 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP);
900 }
901
902
903
904 /**
905 * Specifies whether to ignore any duplicate values encountered while reading
906 * LDIF records.
907 *
908 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
909 * attribute values encountered while reading
910 * LDIF records.
911 *
912 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead.
913 */
914 @Deprecated()
915 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues)
916 {
917 if (ignoreDuplicateValues)
918 {
919 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
920 }
921 else
922 {
923 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
924 }
925 }
926
927
928
929 /**
930 * Retrieves the behavior that should be exhibited if the LDIF reader
931 * encounters an entry with duplicate values.
932 *
933 * @return The behavior that should be exhibited if the LDIF reader
934 * encounters an entry with duplicate values.
935 */
936 public DuplicateValueBehavior getDuplicateValueBehavior()
937 {
938 return duplicateValueBehavior;
939 }
940
941
942
943 /**
944 * Specifies the behavior that should be exhibited if the LDIF reader
945 * encounters an entry with duplicate values.
946 *
947 * @param duplicateValueBehavior The behavior that should be exhibited if
948 * the LDIF reader encounters an entry with
949 * duplicate values.
950 */
951 public void setDuplicateValueBehavior(
952 final DuplicateValueBehavior duplicateValueBehavior)
953 {
954 this.duplicateValueBehavior = duplicateValueBehavior;
955 }
956
957
958
959 /**
960 * Indicates whether to strip off any illegal trailing spaces that may appear
961 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
962 * specification strongly recommends that any value which legitimately
963 * contains trailing spaces be base64-encoded, and any spaces which appear
964 * after the end of non-base64-encoded values may therefore be considered
965 * invalid. If any such trailing spaces are encountered in an LDIF record and
966 * they are not to be stripped, then an {@link LDIFException} will be thrown
967 * for that record.
968 * <BR><BR>
969 * Note that this applies only to spaces after the end of a value, and not to
970 * spaces which may appear at the end of a line for a value that is wrapped
971 * and continued on the next line.
972 *
973 * @return {@code true} if illegal trailing spaces should be stripped off, or
974 * {@code false} if LDIF records containing illegal trailing spaces
975 * should be rejected.
976 *
977 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead.
978 */
979 @Deprecated()
980 public boolean stripTrailingSpaces()
981 {
982 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP);
983 }
984
985
986
987 /**
988 * Specifies whether to strip off any illegal trailing spaces that may appear
989 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
990 * specification strongly recommends that any value which legitimately
991 * contains trailing spaces be base64-encoded, and any spaces which appear
992 * after the end of non-base64-encoded values may therefore be considered
993 * invalid. If any such trailing spaces are encountered in an LDIF record and
994 * they are not to be stripped, then an {@link LDIFException} will be thrown
995 * for that record.
996 * <BR><BR>
997 * Note that this applies only to spaces after the end of a value, and not to
998 * spaces which may appear at the end of a line for a value that is wrapped
999 * and continued on the next line.
1000 *
1001 * @param stripTrailingSpaces Indicates whether to strip off any illegal
1002 * trailing spaces, or {@code false} if LDIF
1003 * records containing them should be rejected.
1004 *
1005 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead.
1006 */
1007 @Deprecated()
1008 public void setStripTrailingSpaces(final boolean stripTrailingSpaces)
1009 {
1010 trailingSpaceBehavior = stripTrailingSpaces
1011 ? TrailingSpaceBehavior.STRIP
1012 : TrailingSpaceBehavior.REJECT;
1013 }
1014
1015
1016
1017 /**
1018 * Retrieves the behavior that should be exhibited when encountering attribute
1019 * values which are not base64-encoded but contain trailing spaces. The LDIF
1020 * specification strongly recommends that any value which legitimately
1021 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1022 * may be configured to automatically strip these spaces, to preserve them, or
1023 * to reject any entry or change record containing them.
1024 *
1025 * @return The behavior that should be exhibited when encountering attribute
1026 * values which are not base64-encoded but contain trailing spaces.
1027 */
1028 public TrailingSpaceBehavior getTrailingSpaceBehavior()
1029 {
1030 return trailingSpaceBehavior;
1031 }
1032
1033
1034
1035 /**
1036 * Specifies the behavior that should be exhibited when encountering attribute
1037 * values which are not base64-encoded but contain trailing spaces. The LDIF
1038 * specification strongly recommends that any value which legitimately
1039 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1040 * may be configured to automatically strip these spaces, to preserve them, or
1041 * to reject any entry or change record containing them.
1042 *
1043 * @param trailingSpaceBehavior The behavior that should be exhibited when
1044 * encountering attribute values which are not
1045 * base64-encoded but contain trailing spaces.
1046 */
1047 public void setTrailingSpaceBehavior(
1048 final TrailingSpaceBehavior trailingSpaceBehavior)
1049 {
1050 this.trailingSpaceBehavior = trailingSpaceBehavior;
1051 }
1052
1053
1054
1055 /**
1056 * Retrieves the base path that will be prepended to relative paths in order
1057 * to obtain an absolute path. This will only be used for "file:" URLs that
1058 * have paths which do not begin with a slash.
1059 *
1060 * @return The base path that will be prepended to relative paths in order to
1061 * obtain an absolute path.
1062 */
1063 public String getRelativeBasePath()
1064 {
1065 return relativeBasePath;
1066 }
1067
1068
1069
1070 /**
1071 * Specifies the base path that will be prepended to relative paths in order
1072 * to obtain an absolute path. This will only be used for "file:" URLs that
1073 * have paths which do not begin with a space.
1074 *
1075 * @param relativeBasePath The base path that will be prepended to relative
1076 * paths in order to obtain an absolute path.
1077 */
1078 public void setRelativeBasePath(final String relativeBasePath)
1079 {
1080 setRelativeBasePath(new File(relativeBasePath));
1081 }
1082
1083
1084
1085 /**
1086 * Specifies the base path that will be prepended to relative paths in order
1087 * to obtain an absolute path. This will only be used for "file:" URLs that
1088 * have paths which do not begin with a space.
1089 *
1090 * @param relativeBasePath The base path that will be prepended to relative
1091 * paths in order to obtain an absolute path.
1092 */
1093 public void setRelativeBasePath(final File relativeBasePath)
1094 {
1095 final String path = relativeBasePath.getAbsolutePath();
1096 if (path.endsWith(File.separator))
1097 {
1098 this.relativeBasePath = path;
1099 }
1100 else
1101 {
1102 this.relativeBasePath = path + File.separator;
1103 }
1104 }
1105
1106
1107
1108 /**
1109 * Retrieves the schema that will be used when reading LDIF records, if
1110 * defined.
1111 *
1112 * @return The schema that will be used when reading LDIF records, or
1113 * {@code null} if no schema should be used and all attributes should
1114 * be treated as case-insensitive strings.
1115 */
1116 public Schema getSchema()
1117 {
1118 return schema;
1119 }
1120
1121
1122
1123 /**
1124 * Specifies the schema that should be used when reading LDIF records.
1125 *
1126 * @param schema The schema that should be used when reading LDIF records,
1127 * or {@code null} if no schema should be used and all
1128 * attributes should be treated as case-insensitive strings.
1129 */
1130 public void setSchema(final Schema schema)
1131 {
1132 this.schema = schema;
1133 }
1134
1135
1136
1137 /**
1138 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1139 * change record.
1140 *
1141 * @return The record read from the LDIF source, or {@code null} if there are
1142 * no more entries to be read.
1143 *
1144 * @throws IOException If a problem occurs while trying to read from the
1145 * LDIF source.
1146 *
1147 * @throws LDIFException If the data read could not be parsed as an entry or
1148 * an LDIF change record.
1149 */
1150 public LDIFRecord readLDIFRecord()
1151 throws IOException, LDIFException
1152 {
1153 if (isAsync())
1154 {
1155 return readLDIFRecordAsync();
1156 }
1157 else
1158 {
1159 return readLDIFRecordInternal();
1160 }
1161 }
1162
1163
1164
1165 /**
1166 * Reads an entry from the LDIF source.
1167 *
1168 * @return The entry read from the LDIF source, or {@code null} if there are
1169 * no more entries to be read.
1170 *
1171 * @throws IOException If a problem occurs while attempting to read from the
1172 * LDIF source.
1173 *
1174 * @throws LDIFException If the data read could not be parsed as an entry.
1175 */
1176 public Entry readEntry()
1177 throws IOException, LDIFException
1178 {
1179 if (isAsync())
1180 {
1181 return readEntryAsync();
1182 }
1183 else
1184 {
1185 return readEntryInternal();
1186 }
1187 }
1188
1189
1190
1191 /**
1192 * Reads an LDIF change record from the LDIF source. The LDIF record must
1193 * have a changetype.
1194 *
1195 * @return The change record read from the LDIF source, or {@code null} if
1196 * there are no more records to be read.
1197 *
1198 * @throws IOException If a problem occurs while attempting to read from the
1199 * LDIF source.
1200 *
1201 * @throws LDIFException If the data read could not be parsed as an LDIF
1202 * change record.
1203 */
1204 public LDIFChangeRecord readChangeRecord()
1205 throws IOException, LDIFException
1206 {
1207 return readChangeRecord(false);
1208 }
1209
1210
1211
1212 /**
1213 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1214 * record does not have a changetype, then it may be assumed to be an add
1215 * change record.
1216 *
1217 * @param defaultAdd Indicates whether an LDIF record not containing a
1218 * changetype should be retrieved as an add change record.
1219 * If this is {@code false} and the record read does not
1220 * include a changetype, then an {@link LDIFException}
1221 * will be thrown.
1222 *
1223 * @return The change record read from the LDIF source, or {@code null} if
1224 * there are no more records to be read.
1225 *
1226 * @throws IOException If a problem occurs while attempting to read from the
1227 * LDIF source.
1228 *
1229 * @throws LDIFException If the data read could not be parsed as an LDIF
1230 * change record.
1231 */
1232 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd)
1233 throws IOException, LDIFException
1234 {
1235 if (isAsync())
1236 {
1237 return readChangeRecordAsync(defaultAdd);
1238 }
1239 else
1240 {
1241 return readChangeRecordInternal(defaultAdd);
1242 }
1243 }
1244
1245
1246
1247 /**
1248 * Reads the next {@code LDIFRecord}, which was read and parsed by a different
1249 * thread.
1250 *
1251 * @return The next parsed record or {@code null} if there are no more
1252 * records to read.
1253 *
1254 * @throws IOException If IOException was thrown when reading or parsing
1255 * the record.
1256 *
1257 * @throws LDIFException If LDIFException was thrown parsing the record.
1258 */
1259 private LDIFRecord readLDIFRecordAsync()
1260 throws IOException, LDIFException
1261 {
1262 final Result<UnparsedLDIFRecord, LDIFRecord> result =
1263 readLDIFRecordResultAsync();
1264 if (result == null)
1265 {
1266 return null;
1267 }
1268 else
1269 {
1270 return result.getOutput();
1271 }
1272 }
1273
1274
1275
1276 /**
1277 * Reads an entry asynchronously from the LDIF source.
1278 *
1279 * @return The entry read from the LDIF source, or {@code null} if there are
1280 * no more entries to be read.
1281 *
1282 * @throws IOException If a problem occurs while attempting to read from the
1283 * LDIF source.
1284 * @throws LDIFException If the data read could not be parsed as an entry.
1285 */
1286 private Entry readEntryAsync()
1287 throws IOException, LDIFException
1288 {
1289 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1290 LDIFRecord record = null;
1291 while (record == null)
1292 {
1293 result = readLDIFRecordResultAsync();
1294 if (result == null)
1295 {
1296 return null;
1297 }
1298
1299 record = result.getOutput();
1300
1301 // This is a special value that means we should skip this Entry. We have
1302 // to use something different than null because null means EOF.
1303 if (record == SKIP_ENTRY)
1304 {
1305 record = null;
1306 }
1307 }
1308
1309 if (record instanceof Entry)
1310 {
1311 return (Entry) record;
1312 }
1313 else if (record instanceof LDIFChangeRecord)
1314 {
1315 try
1316 {
1317 // Some LDIFChangeRecord can be converted to an Entry. This is really
1318 // an edge case though.
1319 return ((LDIFChangeRecord)record).toEntry();
1320 }
1321 catch (LDIFException e)
1322 {
1323 debugException(e);
1324 final long firstLineNumber = result.getInput().getFirstLineNumber();
1325 throw new LDIFException(e.getExceptionMessage(),
1326 firstLineNumber, true, e);
1327 }
1328 }
1329
1330 throw new AssertionError("LDIFRecords must either be an Entry or an " +
1331 "LDIFChangeRecord");
1332 }
1333
1334
1335
1336 /**
1337 * Reads an LDIF change record from the LDIF source asynchronously.
1338 * Optionally, if the LDIF record does not have a changetype, then it may be
1339 * assumed to be an add change record.
1340 *
1341 * @param defaultAdd Indicates whether an LDIF record not containing a
1342 * changetype should be retrieved as an add change record.
1343 * If this is {@code false} and the record read does not
1344 * include a changetype, then an {@link LDIFException} will
1345 * be thrown.
1346 *
1347 * @return The change record read from the LDIF source, or {@code null} if
1348 * there are no more records to be read.
1349 *
1350 * @throws IOException If a problem occurs while attempting to read from the
1351 * LDIF source.
1352 * @throws LDIFException If the data read could not be parsed as an LDIF
1353 * change record.
1354 */
1355 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd)
1356 throws IOException, LDIFException
1357 {
1358 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1359 LDIFRecord record = null;
1360 while (record == null)
1361 {
1362 result = readLDIFRecordResultAsync();
1363 if (result == null)
1364 {
1365 return null;
1366 }
1367
1368 record = result.getOutput();
1369
1370 // This is a special value that means we should skip this Entry. We have
1371 // to use something different than null because null means EOF.
1372 if (record == SKIP_ENTRY)
1373 {
1374 record = null;
1375 }
1376 }
1377
1378 if (record instanceof LDIFChangeRecord)
1379 {
1380 return (LDIFChangeRecord) record;
1381 }
1382 else if (record instanceof Entry)
1383 {
1384 if (defaultAdd)
1385 {
1386 return new LDIFAddChangeRecord((Entry) record);
1387 }
1388 else
1389 {
1390 final long firstLineNumber = result.getInput().getFirstLineNumber();
1391 throw new LDIFException(
1392 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber,
1393 true);
1394 }
1395 }
1396
1397 throw new AssertionError("LDIFRecords must either be an Entry or an " +
1398 "LDIFChangeRecord");
1399 }
1400
1401
1402
1403 /**
1404 * Reads the next LDIF record, which was read and parsed asynchronously by
1405 * separate threads.
1406 *
1407 * @return The next LDIF record or {@code null} if there are no more records.
1408 *
1409 * @throws IOException If a problem occurs while attempting to read from the
1410 * LDIF source.
1411 *
1412 * @throws LDIFException If the data read could not be parsed as an entry.
1413 */
1414 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync()
1415 throws IOException, LDIFException
1416 {
1417 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1418
1419 // If the asynchronous reading and parsing is complete, then we don't have
1420 // to block waiting for the next record to show up on the queue. If there
1421 // isn't a record there, then return null (EOF) right away.
1422 if (asyncParsingComplete.get())
1423 {
1424 result = asyncParsedRecords.poll();
1425 }
1426 else
1427 {
1428 try
1429 {
1430 // We probably could just do a asyncParsedRecords.take() here, but
1431 // there are some edge case error scenarios where
1432 // asyncParsingComplete might be set without a special EOF sentinel
1433 // Result enqueued. So to guard against this, we have a very cautious
1434 // polling interval of 1 second. During normal processing, we never
1435 // have to wait for this to expire, when there is something to do
1436 // (like shutdown).
1437 while ((result == null) && (!asyncParsingComplete.get()))
1438 {
1439 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS);
1440 }
1441
1442 // There's a very small chance that we missed the value, so double-check
1443 if (result == null)
1444 {
1445 result = asyncParsedRecords.poll();
1446 }
1447 }
1448 catch (InterruptedException e)
1449 {
1450 debugException(e);
1451 throw createIOExceptionWithCause(null, e);
1452 }
1453 }
1454 if (result == null)
1455 {
1456 return null;
1457 }
1458
1459 rethrow(result.getFailureCause());
1460
1461 // Check if we reached the end of the input
1462 final UnparsedLDIFRecord unparsedRecord = result.getInput();
1463 if (unparsedRecord.isEOF())
1464 {
1465 // This might have been set already by the LineReaderThread, but
1466 // just in case it hasn't gotten to it yet, do so here.
1467 asyncParsingComplete.set(true);
1468
1469 // Enqueue this EOF result again for any other thread that might be
1470 // blocked in asyncParsedRecords.take() even though having multiple
1471 // threads call this method concurrently breaks the contract of this
1472 // class.
1473 try
1474 {
1475 asyncParsedRecords.put(result);
1476 }
1477 catch (InterruptedException e)
1478 {
1479 // We shouldn't ever get interrupted because the put won't ever block.
1480 // Once we are done reading, this is the only item left in the queue,
1481 // so we should always be able to re-enqueue it.
1482 debugException(e);
1483 }
1484 return null;
1485 }
1486
1487 return result;
1488 }
1489
1490
1491
1492 /**
1493 * Indicates whether this LDIF reader was constructed to perform asynchronous
1494 * processing.
1495 *
1496 * @return {@code true} if this LDIFReader was constructed to perform
1497 * asynchronous processing, or {@code false} if not.
1498 */
1499 private boolean isAsync()
1500 {
1501 return isAsync;
1502 }
1503
1504
1505
1506 /**
1507 * If not {@code null}, rethrows the specified Throwable as either an
1508 * IOException or LDIFException.
1509 *
1510 * @param t The exception to rethrow. If it's {@code null}, then nothing
1511 * is thrown.
1512 *
1513 * @throws IOException If t is an IOException or a checked Exception that
1514 * is not an LDIFException.
1515 * @throws LDIFException If t is an LDIFException.
1516 */
1517 static void rethrow(final Throwable t)
1518 throws IOException, LDIFException
1519 {
1520 if (t == null)
1521 {
1522 return;
1523 }
1524
1525 if (t instanceof IOException)
1526 {
1527 throw (IOException) t;
1528 }
1529 else if (t instanceof LDIFException)
1530 {
1531 throw (LDIFException) t;
1532 }
1533 else if (t instanceof RuntimeException)
1534 {
1535 throw (RuntimeException) t;
1536 }
1537 else if (t instanceof Error)
1538 {
1539 throw (Error) t;
1540 }
1541 else
1542 {
1543 throw createIOExceptionWithCause(null, t);
1544 }
1545 }
1546
1547
1548
1549 /**
1550 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1551 * change record.
1552 *
1553 * @return The record read from the LDIF source, or {@code null} if there are
1554 * no more entries to be read.
1555 *
1556 * @throws IOException If a problem occurs while trying to read from the
1557 * LDIF source.
1558 * @throws LDIFException If the data read could not be parsed as an entry or
1559 * an LDIF change record.
1560 */
1561 private LDIFRecord readLDIFRecordInternal()
1562 throws IOException, LDIFException
1563 {
1564 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1565 return decodeRecord(unparsedRecord, relativeBasePath, schema);
1566 }
1567
1568
1569
1570 /**
1571 * Reads an entry from the LDIF source.
1572 *
1573 * @return The entry read from the LDIF source, or {@code null} if there are
1574 * no more entries to be read.
1575 *
1576 * @throws IOException If a problem occurs while attempting to read from the
1577 * LDIF source.
1578 * @throws LDIFException If the data read could not be parsed as an entry.
1579 */
1580 private Entry readEntryInternal()
1581 throws IOException, LDIFException
1582 {
1583 Entry e = null;
1584 while (e == null)
1585 {
1586 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1587 if (unparsedRecord.isEOF())
1588 {
1589 return null;
1590 }
1591
1592 e = decodeEntry(unparsedRecord, relativeBasePath);
1593 debugLDIFRead(e);
1594
1595 if (entryTranslator != null)
1596 {
1597 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber());
1598 }
1599 }
1600 return e;
1601 }
1602
1603
1604
1605 /**
1606 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1607 * record does not have a changetype, then it may be assumed to be an add
1608 * change record.
1609 *
1610 * @param defaultAdd Indicates whether an LDIF record not containing a
1611 * changetype should be retrieved as an add change record.
1612 * If this is {@code false} and the record read does not
1613 * include a changetype, then an {@link LDIFException} will
1614 * be thrown.
1615 *
1616 * @return The change record read from the LDIF source, or {@code null} if
1617 * there are no more records to be read.
1618 *
1619 * @throws IOException If a problem occurs while attempting to read from the
1620 * LDIF source.
1621 * @throws LDIFException If the data read could not be parsed as an LDIF
1622 * change record.
1623 */
1624 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd)
1625 throws IOException, LDIFException
1626 {
1627 LDIFChangeRecord r = null;
1628 while (r == null)
1629 {
1630 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1631 if (unparsedRecord.isEOF())
1632 {
1633 return null;
1634 }
1635
1636 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd,
1637 schema);
1638 debugLDIFRead(r);
1639
1640 if (changeRecordTranslator != null)
1641 {
1642 r = changeRecordTranslator.translate(r,
1643 unparsedRecord.getFirstLineNumber());
1644 }
1645 }
1646 return r;
1647 }
1648
1649
1650
1651 /**
1652 * Reads a record (either an entry or a change record) from the LDIF source
1653 * and places it in the line list.
1654 *
1655 * @return The line number for the first line of the entry that was read.
1656 *
1657 * @throws IOException If a problem occurs while attempting to read from the
1658 * LDIF source.
1659 *
1660 * @throws LDIFException If the data read could not be parsed as a valid
1661 * LDIF record.
1662 */
1663 private UnparsedLDIFRecord readUnparsedRecord()
1664 throws IOException, LDIFException
1665 {
1666 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20);
1667 boolean lastWasComment = false;
1668 long firstLineNumber = lineNumberCounter + 1;
1669 while (true)
1670 {
1671 final String line = reader.readLine();
1672 lineNumberCounter++;
1673
1674 if (line == null)
1675 {
1676 // We've hit the end of the LDIF source. If we haven't read any entry
1677 // data, then return null. Otherwise, the last entry wasn't followed by
1678 // a blank line, which is OK, and we should decode that entry.
1679 if (lineList.isEmpty())
1680 {
1681 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0),
1682 duplicateValueBehavior, trailingSpaceBehavior, schema, -1);
1683 }
1684 else
1685 {
1686 break;
1687 }
1688 }
1689
1690 if (line.length() == 0)
1691 {
1692 // It's a blank line. If we have read entry data, then this signals the
1693 // end of the entry. Otherwise, it's an extra space between entries,
1694 // which is OK.
1695 lastWasComment = false;
1696 if (lineList.isEmpty())
1697 {
1698 firstLineNumber++;
1699 continue;
1700 }
1701 else
1702 {
1703 break;
1704 }
1705 }
1706
1707 if (line.charAt(0) == ' ')
1708 {
1709 // The line starts with a space, which means that it must be a
1710 // continuation of the previous line. This is true even if the last
1711 // line was a comment.
1712 if (lastWasComment)
1713 {
1714 // What we've read is part of a comment, so we don't care about its
1715 // content.
1716 }
1717 else if (lineList.isEmpty())
1718 {
1719 throw new LDIFException(
1720 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter),
1721 lineNumberCounter, false);
1722 }
1723 else
1724 {
1725 lineList.get(lineList.size() - 1).append(line.substring(1));
1726 lastWasComment = false;
1727 }
1728 }
1729 else if (line.charAt(0) == '#')
1730 {
1731 lastWasComment = true;
1732 }
1733 else
1734 {
1735 // We want to make sure that we skip over the "version:" line if it
1736 // exists, but that should only occur at the beginning of an entry where
1737 // it can't be confused with a possible "version" attribute.
1738 if (lineList.isEmpty() && line.startsWith("version:"))
1739 {
1740 lastWasComment = true;
1741 }
1742 else
1743 {
1744 lineList.add(new StringBuilder(line));
1745 lastWasComment = false;
1746 }
1747 }
1748 }
1749
1750 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1751 trailingSpaceBehavior, schema, firstLineNumber);
1752 }
1753
1754
1755
1756 /**
1757 * Decodes the provided set of LDIF lines as an entry. The provided set of
1758 * lines must contain exactly one entry. Long lines may be wrapped as per the
1759 * LDIF specification, and it is acceptable to have one or more blank lines
1760 * following the entry. A default trailing space behavior of
1761 * {@link TrailingSpaceBehavior#REJECT} will be used.
1762 *
1763 * @param ldifLines The set of lines that comprise the LDIF representation
1764 * of the entry. It must not be {@code null} or empty.
1765 *
1766 * @return The entry read from LDIF.
1767 *
1768 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1769 * entry.
1770 */
1771 public static Entry decodeEntry(final String... ldifLines)
1772 throws LDIFException
1773 {
1774 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP,
1775 TrailingSpaceBehavior.REJECT, null, ldifLines),
1776 DEFAULT_RELATIVE_BASE_PATH);
1777 debugLDIFRead(e);
1778 return e;
1779 }
1780
1781
1782
1783 /**
1784 * Decodes the provided set of LDIF lines as an entry. The provided set of
1785 * lines must contain exactly one entry. Long lines may be wrapped as per the
1786 * LDIF specification, and it is acceptable to have one or more blank lines
1787 * following the entry. A default trailing space behavior of
1788 * {@link TrailingSpaceBehavior#REJECT} will be used.
1789 *
1790 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1791 * attribute values encountered while parsing.
1792 * @param schema The schema to use when parsing the record,
1793 * if applicable.
1794 * @param ldifLines The set of lines that comprise the LDIF
1795 * representation of the entry. It must not be
1796 * {@code null} or empty.
1797 *
1798 * @return The entry read from LDIF.
1799 *
1800 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1801 * entry.
1802 */
1803 public static Entry decodeEntry(final boolean ignoreDuplicateValues,
1804 final Schema schema,
1805 final String... ldifLines)
1806 throws LDIFException
1807 {
1808 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT,
1809 schema, ldifLines);
1810 }
1811
1812
1813
1814 /**
1815 * Decodes the provided set of LDIF lines as an entry. The provided set of
1816 * lines must contain exactly one entry. Long lines may be wrapped as per the
1817 * LDIF specification, and it is acceptable to have one or more blank lines
1818 * following the entry.
1819 *
1820 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1821 * attribute values encountered while parsing.
1822 * @param trailingSpaceBehavior The behavior that should be exhibited when
1823 * encountering attribute values which are not
1824 * base64-encoded but contain trailing spaces.
1825 * It must not be {@code null}.
1826 * @param schema The schema to use when parsing the record,
1827 * if applicable.
1828 * @param ldifLines The set of lines that comprise the LDIF
1829 * representation of the entry. It must not be
1830 * {@code null} or empty.
1831 *
1832 * @return The entry read from LDIF.
1833 *
1834 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1835 * entry.
1836 */
1837 public static Entry decodeEntry(
1838 final boolean ignoreDuplicateValues,
1839 final TrailingSpaceBehavior trailingSpaceBehavior,
1840 final Schema schema,
1841 final String... ldifLines) throws LDIFException
1842 {
1843 final Entry e = decodeEntry(prepareRecord(
1844 (ignoreDuplicateValues
1845 ? DuplicateValueBehavior.STRIP
1846 : DuplicateValueBehavior.REJECT),
1847 trailingSpaceBehavior, schema, ldifLines),
1848 DEFAULT_RELATIVE_BASE_PATH);
1849 debugLDIFRead(e);
1850 return e;
1851 }
1852
1853
1854
1855 /**
1856 * Decodes the provided set of LDIF lines as an LDIF change record. The
1857 * provided set of lines must contain exactly one change record and it must
1858 * include a changetype. Long lines may be wrapped as per the LDIF
1859 * specification, and it is acceptable to have one or more blank lines
1860 * following the entry.
1861 *
1862 * @param ldifLines The set of lines that comprise the LDIF representation
1863 * of the change record. It must not be {@code null} or
1864 * empty.
1865 *
1866 * @return The change record read from LDIF.
1867 *
1868 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1869 * change record.
1870 */
1871 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines)
1872 throws LDIFException
1873 {
1874 return decodeChangeRecord(false, ldifLines);
1875 }
1876
1877
1878
1879 /**
1880 * Decodes the provided set of LDIF lines as an LDIF change record. The
1881 * provided set of lines must contain exactly one change record. Long lines
1882 * may be wrapped as per the LDIF specification, and it is acceptable to have
1883 * one or more blank lines following the entry.
1884 *
1885 * @param defaultAdd Indicates whether an LDIF record not containing a
1886 * changetype should be retrieved as an add change record.
1887 * If this is {@code false} and the record read does not
1888 * include a changetype, then an {@link LDIFException}
1889 * will be thrown.
1890 * @param ldifLines The set of lines that comprise the LDIF representation
1891 * of the change record. It must not be {@code null} or
1892 * empty.
1893 *
1894 * @return The change record read from LDIF.
1895 *
1896 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1897 * change record.
1898 */
1899 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd,
1900 final String... ldifLines)
1901 throws LDIFException
1902 {
1903 final LDIFChangeRecord r =
1904 decodeChangeRecord(
1905 prepareRecord(DuplicateValueBehavior.STRIP,
1906 TrailingSpaceBehavior.REJECT, null, ldifLines),
1907 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
1908 debugLDIFRead(r);
1909 return r;
1910 }
1911
1912
1913
1914 /**
1915 * Decodes the provided set of LDIF lines as an LDIF change record. The
1916 * provided set of lines must contain exactly one change record. Long lines
1917 * may be wrapped as per the LDIF specification, and it is acceptable to have
1918 * one or more blank lines following the entry.
1919 *
1920 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1921 * attribute values encountered while parsing.
1922 * @param schema The schema to use when processing the change
1923 * record, or {@code null} if no schema should
1924 * be used and all values should be treated as
1925 * case-insensitive strings.
1926 * @param defaultAdd Indicates whether an LDIF record not
1927 * containing a changetype should be retrieved
1928 * as an add change record. If this is
1929 * {@code false} and the record read does not
1930 * include a changetype, then an
1931 * {@link LDIFException} will be thrown.
1932 * @param ldifLines The set of lines that comprise the LDIF
1933 * representation of the change record. It
1934 * must not be {@code null} or empty.
1935 *
1936 * @return The change record read from LDIF.
1937 *
1938 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1939 * change record.
1940 */
1941 public static LDIFChangeRecord decodeChangeRecord(
1942 final boolean ignoreDuplicateValues,
1943 final Schema schema,
1944 final boolean defaultAdd,
1945 final String... ldifLines)
1946 throws LDIFException
1947 {
1948 return decodeChangeRecord(ignoreDuplicateValues,
1949 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines);
1950 }
1951
1952
1953
1954 /**
1955 * Decodes the provided set of LDIF lines as an LDIF change record. The
1956 * provided set of lines must contain exactly one change record. Long lines
1957 * may be wrapped as per the LDIF specification, and it is acceptable to have
1958 * one or more blank lines following the entry.
1959 *
1960 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1961 * attribute values encountered while parsing.
1962 * @param trailingSpaceBehavior The behavior that should be exhibited when
1963 * encountering attribute values which are not
1964 * base64-encoded but contain trailing spaces.
1965 * It must not be {@code null}.
1966 * @param schema The schema to use when processing the change
1967 * record, or {@code null} if no schema should
1968 * be used and all values should be treated as
1969 * case-insensitive strings.
1970 * @param defaultAdd Indicates whether an LDIF record not
1971 * containing a changetype should be retrieved
1972 * as an add change record. If this is
1973 * {@code false} and the record read does not
1974 * include a changetype, then an
1975 * {@link LDIFException} will be thrown.
1976 * @param ldifLines The set of lines that comprise the LDIF
1977 * representation of the change record. It
1978 * must not be {@code null} or empty.
1979 *
1980 * @return The change record read from LDIF.
1981 *
1982 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1983 * change record.
1984 */
1985 public static LDIFChangeRecord decodeChangeRecord(
1986 final boolean ignoreDuplicateValues,
1987 final TrailingSpaceBehavior trailingSpaceBehavior,
1988 final Schema schema,
1989 final boolean defaultAdd,
1990 final String... ldifLines)
1991 throws LDIFException
1992 {
1993 final LDIFChangeRecord r = decodeChangeRecord(
1994 prepareRecord(
1995 (ignoreDuplicateValues
1996 ? DuplicateValueBehavior.STRIP
1997 : DuplicateValueBehavior.REJECT),
1998 trailingSpaceBehavior, schema, ldifLines),
1999 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
2000 debugLDIFRead(r);
2001 return r;
2002 }
2003
2004
2005
2006 /**
2007 * Parses the provided set of lines into a list of {@code StringBuilder}
2008 * objects suitable for decoding into an entry or LDIF change record.
2009 * Comments will be ignored and wrapped lines will be unwrapped.
2010 *
2011 * @param duplicateValueBehavior The behavior that should be exhibited if
2012 * the LDIF reader encounters an entry with
2013 * duplicate values.
2014 * @param trailingSpaceBehavior The behavior that should be exhibited when
2015 * encountering attribute values which are not
2016 * base64-encoded but contain trailing spaces.
2017 * @param schema The schema to use when parsing the record,
2018 * if applicable.
2019 * @param ldifLines The set of lines that comprise the record
2020 * to decode. It must not be {@code null} or
2021 * empty.
2022 *
2023 * @return The prepared list of {@code StringBuilder} objects ready to be
2024 * decoded.
2025 *
2026 * @throws LDIFException If the provided lines do not contain valid LDIF
2027 * content.
2028 */
2029 private static UnparsedLDIFRecord prepareRecord(
2030 final DuplicateValueBehavior duplicateValueBehavior,
2031 final TrailingSpaceBehavior trailingSpaceBehavior,
2032 final Schema schema, final String... ldifLines)
2033 throws LDIFException
2034 {
2035 ensureNotNull(ldifLines);
2036 ensureFalse(ldifLines.length == 0,
2037 "LDIFReader.prepareRecord.ldifLines must not be empty.");
2038
2039 boolean lastWasComment = false;
2040 final ArrayList<StringBuilder> lineList =
2041 new ArrayList<StringBuilder>(ldifLines.length);
2042 for (int i=0; i < ldifLines.length; i++)
2043 {
2044 final String line = ldifLines[i];
2045 if (line.length() == 0)
2046 {
2047 // This is only acceptable if there are no more non-empty lines in the
2048 // array.
2049 for (int j=i+1; j < ldifLines.length; j++)
2050 {
2051 if (ldifLines[j].length() > 0)
2052 {
2053 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true,
2054 ldifLines, null);
2055 }
2056
2057 // If we've gotten here, then we know that we're at the end of the
2058 // entry. If we have read data, then we can decode it as an entry.
2059 // Otherwise, there was no real data in the provided LDIF lines.
2060 if (lineList.isEmpty())
2061 {
2062 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true,
2063 ldifLines, null);
2064 }
2065 else
2066 {
2067 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2068 trailingSpaceBehavior, schema, 0);
2069 }
2070 }
2071 }
2072
2073 if (line.charAt(0) == ' ')
2074 {
2075 if (i > 0)
2076 {
2077 if (! lastWasComment)
2078 {
2079 lineList.get(lineList.size() - 1).append(line.substring(1));
2080 }
2081 }
2082 else
2083 {
2084 throw new LDIFException(
2085 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0,
2086 true, ldifLines, null);
2087 }
2088 }
2089 else if (line.charAt(0) == '#')
2090 {
2091 lastWasComment = true;
2092 }
2093 else
2094 {
2095 lineList.add(new StringBuilder(line));
2096 lastWasComment = false;
2097 }
2098 }
2099
2100 if (lineList.isEmpty())
2101 {
2102 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null);
2103 }
2104 else
2105 {
2106 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2107 trailingSpaceBehavior, schema, 0);
2108 }
2109 }
2110
2111
2112
2113 /**
2114 * Decodes the unparsed record that was read from the LDIF source. It may be
2115 * either an entry or an LDIF change record.
2116 *
2117 * @param unparsedRecord The unparsed LDIF record that was read from the
2118 * input. It must not be {@code null} or empty.
2119 * @param relativeBasePath The base path that will be prepended to relative
2120 * paths in order to obtain an absolute path.
2121 * @param schema The schema to use when parsing.
2122 *
2123 * @return The parsed record, or {@code null} if there are no more entries to
2124 * be read.
2125 *
2126 * @throws LDIFException If the data read could not be parsed as an entry or
2127 * an LDIF change record.
2128 */
2129 private static LDIFRecord decodeRecord(
2130 final UnparsedLDIFRecord unparsedRecord,
2131 final String relativeBasePath,
2132 final Schema schema)
2133 throws LDIFException
2134 {
2135 // If there was an error reading from the input, then we rethrow it here.
2136 final Exception readError = unparsedRecord.getFailureCause();
2137 if (readError != null)
2138 {
2139 if (readError instanceof LDIFException)
2140 {
2141 // If the error was an LDIFException, which will normally be the case,
2142 // then rethrow it with all of the same state. We could just
2143 // throw (LDIFException) readError;
2144 // but that's considered bad form.
2145 final LDIFException ldifEx = (LDIFException) readError;
2146 throw new LDIFException(ldifEx.getMessage(),
2147 ldifEx.getLineNumber(),
2148 ldifEx.mayContinueReading(),
2149 ldifEx.getDataLines(),
2150 ldifEx.getCause());
2151 }
2152 else
2153 {
2154 throw new LDIFException(getExceptionMessage(readError),
2155 -1, true, readError);
2156 }
2157 }
2158
2159 if (unparsedRecord.isEOF())
2160 {
2161 return null;
2162 }
2163
2164 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList();
2165 if (unparsedRecord.getLineList() == null)
2166 {
2167 return null; // We can get here if there was an error reading the lines.
2168 }
2169
2170 final LDIFRecord r;
2171 if (lineList.size() == 1)
2172 {
2173 r = decodeEntry(unparsedRecord, relativeBasePath);
2174 }
2175 else
2176 {
2177 final String lowerSecondLine = toLowerCase(lineList.get(1).toString());
2178 if (lowerSecondLine.startsWith("control:") ||
2179 lowerSecondLine.startsWith("changetype:"))
2180 {
2181 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema);
2182 }
2183 else
2184 {
2185 r = decodeEntry(unparsedRecord, relativeBasePath);
2186 }
2187 }
2188
2189 debugLDIFRead(r);
2190 return r;
2191 }
2192
2193
2194
2195 /**
2196 * Decodes the provided set of LDIF lines as an entry. The provided list must
2197 * not contain any blank lines or comments, and lines are not allowed to be
2198 * wrapped.
2199 *
2200 * @param unparsedRecord The unparsed LDIF record that was read from the
2201 * input. It must not be {@code null} or empty.
2202 * @param relativeBasePath The base path that will be prepended to relative
2203 * paths in order to obtain an absolute path.
2204 *
2205 * @return The entry read from LDIF.
2206 *
2207 * @throws LDIFException If the provided LDIF data cannot be read as an
2208 * entry.
2209 */
2210 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord,
2211 final String relativeBasePath)
2212 throws LDIFException
2213 {
2214 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2215 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2216
2217 final Iterator<StringBuilder> iterator = ldifLines.iterator();
2218
2219 // The first line must start with either "version:" or "dn:". If the first
2220 // line starts with "version:" then the second must start with "dn:".
2221 StringBuilder line = iterator.next();
2222 handleTrailingSpaces(line, null, firstLineNumber,
2223 unparsedRecord.getTrailingSpaceBehavior());
2224 int colonPos = line.indexOf(":");
2225 if ((colonPos > 0) &&
2226 line.substring(0, colonPos).equalsIgnoreCase("version"))
2227 {
2228 // The first line is "version:". Under most conditions, this will be
2229 // handled by the LDIF reader, but this can happen if you call
2230 // decodeEntry with a set of data that includes a version. At any rate,
2231 // read the next line, which must specify the DN.
2232 line = iterator.next();
2233 handleTrailingSpaces(line, null, firstLineNumber,
2234 unparsedRecord.getTrailingSpaceBehavior());
2235 }
2236
2237 colonPos = line.indexOf(":");
2238 if ((colonPos < 0) ||
2239 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2240 {
2241 throw new LDIFException(
2242 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2243 firstLineNumber, true, ldifLines, null);
2244 }
2245
2246 final String dn;
2247 final int length = line.length();
2248 if (length == (colonPos+1))
2249 {
2250 // The colon was the last character on the line. This is acceptable and
2251 // indicates that the entry has the null DN.
2252 dn = "";
2253 }
2254 else if (line.charAt(colonPos+1) == ':')
2255 {
2256 // Skip over any spaces leading up to the value, and then the rest of the
2257 // string is the base64-encoded DN.
2258 int pos = colonPos+2;
2259 while ((pos < length) && (line.charAt(pos) == ' '))
2260 {
2261 pos++;
2262 }
2263
2264 try
2265 {
2266 final byte[] dnBytes = Base64.decode(line.substring(pos));
2267 dn = new String(dnBytes, "UTF-8");
2268 }
2269 catch (final ParseException pe)
2270 {
2271 debugException(pe);
2272 throw new LDIFException(
2273 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2274 pe.getMessage()),
2275 firstLineNumber, true, ldifLines, pe);
2276 }
2277 catch (final Exception e)
2278 {
2279 debugException(e);
2280 throw new LDIFException(
2281 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e),
2282 firstLineNumber, true, ldifLines, e);
2283 }
2284 }
2285 else
2286 {
2287 // Skip over any spaces leading up to the value, and then the rest of the
2288 // string is the DN.
2289 int pos = colonPos+1;
2290 while ((pos < length) && (line.charAt(pos) == ' '))
2291 {
2292 pos++;
2293 }
2294
2295 dn = line.substring(pos);
2296 }
2297
2298
2299 // The remaining lines must be the attributes for the entry. However, we
2300 // will allow the case in which an entry does not have any attributes, to be
2301 // able to support reading search result entries in which no attributes were
2302 // returned.
2303 if (! iterator.hasNext())
2304 {
2305 return new Entry(dn, unparsedRecord.getSchema());
2306 }
2307
2308 return new Entry(dn, unparsedRecord.getSchema(),
2309 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2310 unparsedRecord.getTrailingSpaceBehavior(),
2311 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath,
2312 firstLineNumber));
2313 }
2314
2315
2316
2317 /**
2318 * Decodes the provided set of LDIF lines as a change record. The provided
2319 * list must not contain any blank lines or comments, and lines are not
2320 * allowed to be wrapped.
2321 *
2322 * @param unparsedRecord The unparsed LDIF record that was read from the
2323 * input. It must not be {@code null} or empty.
2324 * @param relativeBasePath The base path that will be prepended to relative
2325 * paths in order to obtain an absolute path.
2326 * @param defaultAdd Indicates whether an LDIF record not containing a
2327 * changetype should be retrieved as an add change
2328 * record. If this is {@code false} and the record
2329 * read does not include a changetype, then an
2330 * {@link LDIFException} will be thrown.
2331 * @param schema The schema to use in parsing.
2332 *
2333 * @return The change record read from LDIF.
2334 *
2335 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2336 * change record.
2337 */
2338 private static LDIFChangeRecord decodeChangeRecord(
2339 final UnparsedLDIFRecord unparsedRecord,
2340 final String relativeBasePath,
2341 final boolean defaultAdd,
2342 final Schema schema)
2343 throws LDIFException
2344 {
2345 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2346 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2347
2348 Iterator<StringBuilder> iterator = ldifLines.iterator();
2349
2350 // The first line must start with either "version:" or "dn:". If the first
2351 // line starts with "version:" then the second must start with "dn:".
2352 StringBuilder line = iterator.next();
2353 handleTrailingSpaces(line, null, firstLineNumber,
2354 unparsedRecord.getTrailingSpaceBehavior());
2355 int colonPos = line.indexOf(":");
2356 int linesRead = 1;
2357 if ((colonPos > 0) &&
2358 line.substring(0, colonPos).equalsIgnoreCase("version"))
2359 {
2360 // The first line is "version:". Under most conditions, this will be
2361 // handled by the LDIF reader, but this can happen if you call
2362 // decodeEntry with a set of data that includes a version. At any rate,
2363 // read the next line, which must specify the DN.
2364 line = iterator.next();
2365 linesRead++;
2366 handleTrailingSpaces(line, null, firstLineNumber,
2367 unparsedRecord.getTrailingSpaceBehavior());
2368 }
2369
2370 colonPos = line.indexOf(":");
2371 if ((colonPos < 0) ||
2372 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2373 {
2374 throw new LDIFException(
2375 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2376 firstLineNumber, true, ldifLines, null);
2377 }
2378
2379 final String dn;
2380 int length = line.length();
2381 if (length == (colonPos+1))
2382 {
2383 // The colon was the last character on the line. This is acceptable and
2384 // indicates that the entry has the null DN.
2385 dn = "";
2386 }
2387 else if (line.charAt(colonPos+1) == ':')
2388 {
2389 // Skip over any spaces leading up to the value, and then the rest of the
2390 // string is the base64-encoded DN.
2391 int pos = colonPos+2;
2392 while ((pos < length) && (line.charAt(pos) == ' '))
2393 {
2394 pos++;
2395 }
2396
2397 try
2398 {
2399 final byte[] dnBytes = Base64.decode(line.substring(pos));
2400 dn = new String(dnBytes, "UTF-8");
2401 }
2402 catch (final ParseException pe)
2403 {
2404 debugException(pe);
2405 throw new LDIFException(
2406 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2407 pe.getMessage()),
2408 firstLineNumber, true, ldifLines, pe);
2409 }
2410 catch (final Exception e)
2411 {
2412 debugException(e);
2413 throw new LDIFException(
2414 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2415 e),
2416 firstLineNumber, true, ldifLines, e);
2417 }
2418 }
2419 else
2420 {
2421 // Skip over any spaces leading up to the value, and then the rest of the
2422 // string is the DN.
2423 int pos = colonPos+1;
2424 while ((pos < length) && (line.charAt(pos) == ' '))
2425 {
2426 pos++;
2427 }
2428
2429 dn = line.substring(pos);
2430 }
2431
2432
2433 // An LDIF change record may contain zero or more controls, with the end of
2434 // the controls signified by the changetype. The changetype element must be
2435 // present, unless defaultAdd is true in which case the first thing that is
2436 // neither control or changetype will trigger the start of add attribute
2437 // parsing.
2438 if (! iterator.hasNext())
2439 {
2440 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber),
2441 firstLineNumber, true, ldifLines, null);
2442 }
2443
2444 String changeType = null;
2445 ArrayList<Control> controls = null;
2446 while (true)
2447 {
2448 line = iterator.next();
2449 handleTrailingSpaces(line, dn, firstLineNumber,
2450 unparsedRecord.getTrailingSpaceBehavior());
2451 colonPos = line.indexOf(":");
2452 if (colonPos < 0)
2453 {
2454 throw new LDIFException(
2455 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber),
2456 firstLineNumber, true, ldifLines, null);
2457 }
2458
2459 final String token = toLowerCase(line.substring(0, colonPos));
2460 if (token.equals("control"))
2461 {
2462 if (controls == null)
2463 {
2464 controls = new ArrayList<Control>(5);
2465 }
2466
2467 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines,
2468 relativeBasePath));
2469 }
2470 else if (token.equals("changetype"))
2471 {
2472 changeType =
2473 decodeChangeType(line, colonPos, firstLineNumber, ldifLines);
2474 break;
2475 }
2476 else if (defaultAdd)
2477 {
2478 // The line we read wasn't a control or changetype declaration, so we'll
2479 // assume it's an attribute in an add record. However, we're not ready
2480 // for that yet, and since we can't rewind an iterator we'll create a
2481 // new one that hasn't yet gotten to this line.
2482 changeType = "add";
2483 iterator = ldifLines.iterator();
2484 for (int i=0; i < linesRead; i++)
2485 {
2486 iterator.next();
2487 }
2488 break;
2489 }
2490 else
2491 {
2492 throw new LDIFException(
2493 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get(
2494 firstLineNumber),
2495 firstLineNumber, true, ldifLines, null);
2496 }
2497
2498 linesRead++;
2499 }
2500
2501
2502 // Make sure that the change type is acceptable and then decode the rest of
2503 // the change record accordingly.
2504 final String lowerChangeType = toLowerCase(changeType);
2505 if (lowerChangeType.equals("add"))
2506 {
2507 // There must be at least one more line. If not, then that's an error.
2508 // Otherwise, parse the rest of the data as attribute-value pairs.
2509 if (iterator.hasNext())
2510 {
2511 final Collection<Attribute> attrs =
2512 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2513 unparsedRecord.getTrailingSpaceBehavior(),
2514 unparsedRecord.getSchema(), ldifLines, iterator,
2515 relativeBasePath, firstLineNumber);
2516 final Attribute[] attributes = new Attribute[attrs.size()];
2517 final Iterator<Attribute> attrIterator = attrs.iterator();
2518 for (int i=0; i < attributes.length; i++)
2519 {
2520 attributes[i] = attrIterator.next();
2521 }
2522
2523 return new LDIFAddChangeRecord(dn, attributes, controls);
2524 }
2525 else
2526 {
2527 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber),
2528 firstLineNumber, true, ldifLines, null);
2529 }
2530 }
2531 else if (lowerChangeType.equals("delete"))
2532 {
2533 // There shouldn't be any more data. If there is, then that's an error.
2534 // Otherwise, we can just return the delete change record with what we
2535 // already know.
2536 if (iterator.hasNext())
2537 {
2538 throw new LDIFException(
2539 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber),
2540 firstLineNumber, true, ldifLines, null);
2541 }
2542 else
2543 {
2544 return new LDIFDeleteChangeRecord(dn, controls);
2545 }
2546 }
2547 else if (lowerChangeType.equals("modify"))
2548 {
2549 // There must be at least one more line. If not, then that's an error.
2550 // Otherwise, parse the rest of the data as a set of modifications.
2551 if (iterator.hasNext())
2552 {
2553 final Modification[] mods = parseModifications(dn,
2554 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator,
2555 firstLineNumber, schema);
2556 return new LDIFModifyChangeRecord(dn, mods, controls);
2557 }
2558 else
2559 {
2560 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber),
2561 firstLineNumber, true, ldifLines, null);
2562 }
2563 }
2564 else if (lowerChangeType.equals("moddn") ||
2565 lowerChangeType.equals("modrdn"))
2566 {
2567 // There must be at least one more line. If not, then that's an error.
2568 // Otherwise, parse the rest of the data as a set of modifications.
2569 if (iterator.hasNext())
2570 {
2571 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls,
2572 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber);
2573 }
2574 else
2575 {
2576 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber),
2577 firstLineNumber, true, ldifLines, null);
2578 }
2579 }
2580 else
2581 {
2582 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType,
2583 firstLineNumber),
2584 firstLineNumber, true, ldifLines, null);
2585 }
2586 }
2587
2588
2589
2590 /**
2591 * Decodes information about a control from the provided line.
2592 *
2593 * @param line The line to process.
2594 * @param colonPos The position of the colon that separates the
2595 * control token string from tbe encoded control.
2596 * @param firstLineNumber The line number for the start of the record.
2597 * @param ldifLines The lines that comprise the LDIF representation
2598 * of the full record being parsed.
2599 * @param relativeBasePath The base path that will be prepended to relative
2600 * paths in order to obtain an absolute path.
2601 *
2602 * @return The decoded control.
2603 *
2604 * @throws LDIFException If a problem is encountered while trying to decode
2605 * the changetype.
2606 */
2607 private static Control decodeControl(final StringBuilder line,
2608 final int colonPos,
2609 final long firstLineNumber,
2610 final ArrayList<StringBuilder> ldifLines,
2611 final String relativeBasePath)
2612 throws LDIFException
2613 {
2614 final String controlString;
2615 int length = line.length();
2616 if (length == (colonPos+1))
2617 {
2618 // The colon was the last character on the line. This is not
2619 // acceptable.
2620 throw new LDIFException(
2621 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2622 firstLineNumber, true, ldifLines, null);
2623 }
2624 else if (line.charAt(colonPos+1) == ':')
2625 {
2626 // Skip over any spaces leading up to the value, and then the rest of
2627 // the string is the base64-encoded control representation. This is
2628 // unusual and unnecessary, but is nevertheless acceptable.
2629 int pos = colonPos+2;
2630 while ((pos < length) && (line.charAt(pos) == ' '))
2631 {
2632 pos++;
2633 }
2634
2635 try
2636 {
2637 final byte[] controlBytes = Base64.decode(line.substring(pos));
2638 controlString = new String(controlBytes, "UTF-8");
2639 }
2640 catch (final ParseException pe)
2641 {
2642 debugException(pe);
2643 throw new LDIFException(
2644 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(
2645 firstLineNumber, pe.getMessage()),
2646 firstLineNumber, true, ldifLines, pe);
2647 }
2648 catch (final Exception e)
2649 {
2650 debugException(e);
2651 throw new LDIFException(
2652 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e),
2653 firstLineNumber, true, ldifLines, e);
2654 }
2655 }
2656 else
2657 {
2658 // Skip over any spaces leading up to the value, and then the rest of
2659 // the string is the encoded control.
2660 int pos = colonPos+1;
2661 while ((pos < length) && (line.charAt(pos) == ' '))
2662 {
2663 pos++;
2664 }
2665
2666 controlString = line.substring(pos);
2667 }
2668
2669 // If the resulting control definition is empty, then that's invalid.
2670 if (controlString.length() == 0)
2671 {
2672 throw new LDIFException(
2673 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2674 firstLineNumber, true, ldifLines, null);
2675 }
2676
2677
2678 // The first element of the control must be the OID, and it must be followed
2679 // by a space (to separate it from the criticality), a colon (to separate it
2680 // from the value and indicate a default criticality of false), or the end
2681 // of the line (to indicate a default criticality of false and no value).
2682 String oid = null;
2683 boolean hasCriticality = false;
2684 boolean hasValue = false;
2685 int pos = 0;
2686 length = controlString.length();
2687 while (pos < length)
2688 {
2689 final char c = controlString.charAt(pos);
2690 if (c == ':')
2691 {
2692 // This indicates that there is no criticality and that the value
2693 // immediately follows the OID.
2694 oid = controlString.substring(0, pos++);
2695 hasValue = true;
2696 break;
2697 }
2698 else if (c == ' ')
2699 {
2700 // This indicates that there is a criticality. We don't know anything
2701 // about the presence of a value yet.
2702 oid = controlString.substring(0, pos++);
2703 hasCriticality = true;
2704 break;
2705 }
2706 else
2707 {
2708 pos++;
2709 }
2710 }
2711
2712 if (oid == null)
2713 {
2714 // This indicates that the string representation of the control is only
2715 // the OID.
2716 return new Control(controlString, false);
2717 }
2718
2719
2720 // See if we need to read the criticality. If so, then do so now.
2721 // Otherwise, assume a default criticality of false.
2722 final boolean isCritical;
2723 if (hasCriticality)
2724 {
2725 // Skip over any spaces before the criticality.
2726 while (controlString.charAt(pos) == ' ')
2727 {
2728 pos++;
2729 }
2730
2731 // Read until we find a colon or the end of the string.
2732 final int criticalityStartPos = pos;
2733 while (pos < length)
2734 {
2735 final char c = controlString.charAt(pos);
2736 if (c == ':')
2737 {
2738 hasValue = true;
2739 break;
2740 }
2741 else
2742 {
2743 pos++;
2744 }
2745 }
2746
2747 final String criticalityString =
2748 toLowerCase(controlString.substring(criticalityStartPos, pos));
2749 if (criticalityString.equals("true"))
2750 {
2751 isCritical = true;
2752 }
2753 else if (criticalityString.equals("false"))
2754 {
2755 isCritical = false;
2756 }
2757 else
2758 {
2759 throw new LDIFException(
2760 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString,
2761 firstLineNumber),
2762 firstLineNumber, true, ldifLines, null);
2763 }
2764
2765 if (hasValue)
2766 {
2767 pos++;
2768 }
2769 }
2770 else
2771 {
2772 isCritical = false;
2773 }
2774
2775 // See if we need to read the value. If so, then do so now. It may be
2776 // a string, or it may be base64-encoded. It could conceivably even be read
2777 // from a URL.
2778 final ASN1OctetString value;
2779 if (hasValue)
2780 {
2781 // The character immediately after the colon that precedes the value may
2782 // be one of the following:
2783 // - A second colon (optionally followed by a single space) to indicate
2784 // that the value is base64-encoded.
2785 // - A less-than symbol to indicate that the value should be read from a
2786 // location specified by a URL.
2787 // - A single space that precedes the non-base64-encoded value.
2788 // - The first character of the non-base64-encoded value.
2789 switch (controlString.charAt(pos))
2790 {
2791 case ':':
2792 try
2793 {
2794 if (controlString.length() == (pos+1))
2795 {
2796 value = new ASN1OctetString();
2797 }
2798 else if (controlString.charAt(pos+1) == ' ')
2799 {
2800 value = new ASN1OctetString(
2801 Base64.decode(controlString.substring(pos+2)));
2802 }
2803 else
2804 {
2805 value = new ASN1OctetString(
2806 Base64.decode(controlString.substring(pos+1)));
2807 }
2808 }
2809 catch (final Exception e)
2810 {
2811 debugException(e);
2812 throw new LDIFException(
2813 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get(
2814 firstLineNumber, getExceptionMessage(e)),
2815 firstLineNumber, true, ldifLines, e);
2816 }
2817 break;
2818 case '<':
2819 try
2820 {
2821 final String urlString;
2822 if (controlString.charAt(pos+1) == ' ')
2823 {
2824 urlString = controlString.substring(pos+2);
2825 }
2826 else
2827 {
2828 urlString = controlString.substring(pos+1);
2829 }
2830 value = new ASN1OctetString(retrieveURLBytes(urlString,
2831 relativeBasePath, firstLineNumber));
2832 }
2833 catch (final Exception e)
2834 {
2835 debugException(e);
2836 throw new LDIFException(
2837 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get(
2838 firstLineNumber, getExceptionMessage(e)),
2839 firstLineNumber, true, ldifLines, e);
2840 }
2841 break;
2842 case ' ':
2843 value = new ASN1OctetString(controlString.substring(pos+1));
2844 break;
2845 default:
2846 value = new ASN1OctetString(controlString.substring(pos));
2847 break;
2848 }
2849 }
2850 else
2851 {
2852 value = null;
2853 }
2854
2855 return new Control(oid, isCritical, value);
2856 }
2857
2858
2859
2860 /**
2861 * Decodes the changetype element from the provided line.
2862 *
2863 * @param line The line to process.
2864 * @param colonPos The position of the colon that separates the
2865 * changetype string from its value.
2866 * @param firstLineNumber The line number for the start of the record.
2867 * @param ldifLines The lines that comprise the LDIF representation of
2868 * the full record being parsed.
2869 *
2870 * @return The decoded changetype string.
2871 *
2872 * @throws LDIFException If a problem is encountered while trying to decode
2873 * the changetype.
2874 */
2875 private static String decodeChangeType(final StringBuilder line,
2876 final int colonPos, final long firstLineNumber,
2877 final ArrayList<StringBuilder> ldifLines)
2878 throws LDIFException
2879 {
2880 final int length = line.length();
2881 if (length == (colonPos+1))
2882 {
2883 // The colon was the last character on the line. This is not
2884 // acceptable.
2885 throw new LDIFException(
2886 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber,
2887 true, ldifLines, null);
2888 }
2889 else if (line.charAt(colonPos+1) == ':')
2890 {
2891 // Skip over any spaces leading up to the value, and then the rest of
2892 // the string is the base64-encoded changetype. This is unusual and
2893 // unnecessary, but is nevertheless acceptable.
2894 int pos = colonPos+2;
2895 while ((pos < length) && (line.charAt(pos) == ' '))
2896 {
2897 pos++;
2898 }
2899
2900 try
2901 {
2902 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
2903 return new String(changeTypeBytes, "UTF-8");
2904 }
2905 catch (final ParseException pe)
2906 {
2907 debugException(pe);
2908 throw new LDIFException(
2909 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber,
2910 pe.getMessage()),
2911 firstLineNumber, true, ldifLines, pe);
2912 }
2913 catch (final Exception e)
2914 {
2915 debugException(e);
2916 throw new LDIFException(
2917 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e),
2918 firstLineNumber, true, ldifLines, e);
2919 }
2920 }
2921 else
2922 {
2923 // Skip over any spaces leading up to the value, and then the rest of
2924 // the string is the changetype.
2925 int pos = colonPos+1;
2926 while ((pos < length) && (line.charAt(pos) == ' '))
2927 {
2928 pos++;
2929 }
2930
2931 return line.substring(pos);
2932 }
2933 }
2934
2935
2936
2937 /**
2938 * Parses the data available through the provided iterator as a collection of
2939 * attributes suitable for use in an entry or an add change record.
2940 *
2941 * @param dn The DN of the record being read.
2942 * @param duplicateValueBehavior The behavior that should be exhibited if
2943 * the LDIF reader encounters an entry with
2944 * duplicate values.
2945 * @param trailingSpaceBehavior The behavior that should be exhibited when
2946 * encountering attribute values which are not
2947 * base64-encoded but contain trailing spaces.
2948 * @param schema The schema to use when parsing the
2949 * attributes, or {@code null} if none is
2950 * needed.
2951 * @param ldifLines The lines that comprise the LDIF
2952 * representation of the full record being
2953 * parsed.
2954 * @param iterator The iterator to use to access the attribute
2955 * lines.
2956 * @param relativeBasePath The base path that will be prepended to
2957 * relative paths in order to obtain an
2958 * absolute path.
2959 * @param firstLineNumber The line number for the start of the
2960 * record.
2961 *
2962 * @return The collection of attributes that were read.
2963 *
2964 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2965 * set of attributes.
2966 */
2967 private static ArrayList<Attribute> parseAttributes(final String dn,
2968 final DuplicateValueBehavior duplicateValueBehavior,
2969 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema,
2970 final ArrayList<StringBuilder> ldifLines,
2971 final Iterator<StringBuilder> iterator, final String relativeBasePath,
2972 final long firstLineNumber)
2973 throws LDIFException
2974 {
2975 final LinkedHashMap<String,Object> attributes =
2976 new LinkedHashMap<String,Object>(ldifLines.size());
2977 while (iterator.hasNext())
2978 {
2979 final StringBuilder line = iterator.next();
2980 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2981 final int colonPos = line.indexOf(":");
2982 if (colonPos <= 0)
2983 {
2984 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
2985 firstLineNumber, true, ldifLines, null);
2986 }
2987
2988 final String attributeName = line.substring(0, colonPos);
2989 final String lowerName = toLowerCase(attributeName);
2990
2991 final MatchingRule matchingRule;
2992 if (schema == null)
2993 {
2994 matchingRule = CaseIgnoreStringMatchingRule.getInstance();
2995 }
2996 else
2997 {
2998 matchingRule =
2999 MatchingRule.selectEqualityMatchingRule(attributeName, schema);
3000 }
3001
3002 Attribute attr;
3003 final LDIFAttribute ldifAttr;
3004 final Object attrObject = attributes.get(lowerName);
3005 if (attrObject == null)
3006 {
3007 attr = null;
3008 ldifAttr = null;
3009 }
3010 else
3011 {
3012 if (attrObject instanceof Attribute)
3013 {
3014 attr = (Attribute) attrObject;
3015 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule,
3016 attr.getRawValues()[0]);
3017 attributes.put(lowerName, ldifAttr);
3018 }
3019 else
3020 {
3021 attr = null;
3022 ldifAttr = (LDIFAttribute) attrObject;
3023 }
3024 }
3025
3026 final int length = line.length();
3027 if (length == (colonPos+1))
3028 {
3029 // This means that the attribute has a zero-length value, which is
3030 // acceptable.
3031 if (attrObject == null)
3032 {
3033 attr = new Attribute(attributeName, matchingRule, "");
3034 attributes.put(lowerName, attr);
3035 }
3036 else
3037 {
3038 try
3039 {
3040 if (! ldifAttr.addValue(new ASN1OctetString(),
3041 duplicateValueBehavior))
3042 {
3043 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3044 {
3045 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3046 firstLineNumber, attributeName), firstLineNumber, true,
3047 ldifLines, null);
3048 }
3049 }
3050 }
3051 catch (LDAPException le)
3052 {
3053 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3054 firstLineNumber, attributeName, getExceptionMessage(le)),
3055 firstLineNumber, true, ldifLines, le);
3056 }
3057 }
3058 }
3059 else if (line.charAt(colonPos+1) == ':')
3060 {
3061 // Skip over any spaces leading up to the value, and then the rest of
3062 // the string is the base64-encoded attribute value.
3063 int pos = colonPos+2;
3064 while ((pos < length) && (line.charAt(pos) == ' '))
3065 {
3066 pos++;
3067 }
3068
3069 try
3070 {
3071 final byte[] valueBytes = Base64.decode(line.substring(pos));
3072 if (attrObject == null)
3073 {
3074 attr = new Attribute(attributeName, matchingRule, valueBytes);
3075 attributes.put(lowerName, attr);
3076 }
3077 else
3078 {
3079 try
3080 {
3081 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes),
3082 duplicateValueBehavior))
3083 {
3084 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3085 {
3086 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3087 firstLineNumber, attributeName), firstLineNumber, true,
3088 ldifLines, null);
3089 }
3090 }
3091 }
3092 catch (LDAPException le)
3093 {
3094 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3095 firstLineNumber, attributeName, getExceptionMessage(le)),
3096 firstLineNumber, true, ldifLines, le);
3097 }
3098 }
3099 }
3100 catch (final ParseException pe)
3101 {
3102 debugException(pe);
3103 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3104 attributeName, firstLineNumber,
3105 pe.getMessage()),
3106 firstLineNumber, true, ldifLines, pe);
3107 }
3108 }
3109 else if (line.charAt(colonPos+1) == '<')
3110 {
3111 // Skip over any spaces leading up to the value, and then the rest of
3112 // the string is a URL that indicates where to get the real content.
3113 // At the present time, we'll only support the file URLs.
3114 int pos = colonPos+2;
3115 while ((pos < length) && (line.charAt(pos) == ' '))
3116 {
3117 pos++;
3118 }
3119
3120 final byte[] urlBytes;
3121 final String urlString = line.substring(pos);
3122 try
3123 {
3124 urlBytes =
3125 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber);
3126 }
3127 catch (final Exception e)
3128 {
3129 debugException(e);
3130 throw new LDIFException(
3131 ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3132 firstLineNumber, e),
3133 firstLineNumber, true, ldifLines, e);
3134 }
3135
3136 if (attrObject == null)
3137 {
3138 attr = new Attribute(attributeName, matchingRule, urlBytes);
3139 attributes.put(lowerName, attr);
3140 }
3141 else
3142 {
3143 try
3144 {
3145 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes),
3146 duplicateValueBehavior))
3147 {
3148 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3149 {
3150 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3151 firstLineNumber, attributeName), firstLineNumber, true,
3152 ldifLines, null);
3153 }
3154 }
3155 }
3156 catch (final LDIFException le)
3157 {
3158 debugException(le);
3159 throw le;
3160 }
3161 catch (final Exception e)
3162 {
3163 debugException(e);
3164 throw new LDIFException(
3165 ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3166 firstLineNumber, e),
3167 firstLineNumber, true, ldifLines, e);
3168 }
3169 }
3170 }
3171 else
3172 {
3173 // Skip over any spaces leading up to the value, and then the rest of
3174 // the string is the value.
3175 int pos = colonPos+1;
3176 while ((pos < length) && (line.charAt(pos) == ' '))
3177 {
3178 pos++;
3179 }
3180
3181 final String valueString = line.substring(pos);
3182 if (attrObject == null)
3183 {
3184 attr = new Attribute(attributeName, matchingRule, valueString);
3185 attributes.put(lowerName, attr);
3186 }
3187 else
3188 {
3189 try
3190 {
3191 if (! ldifAttr.addValue(new ASN1OctetString(valueString),
3192 duplicateValueBehavior))
3193 {
3194 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3195 {
3196 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3197 firstLineNumber, attributeName), firstLineNumber, true,
3198 ldifLines, null);
3199 }
3200 }
3201 }
3202 catch (LDAPException le)
3203 {
3204 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3205 firstLineNumber, attributeName, getExceptionMessage(le)),
3206 firstLineNumber, true, ldifLines, le);
3207 }
3208 }
3209 }
3210 }
3211
3212 final ArrayList<Attribute> attrList =
3213 new ArrayList<Attribute>(attributes.size());
3214 for (final Object o : attributes.values())
3215 {
3216 if (o instanceof Attribute)
3217 {
3218 attrList.add((Attribute) o);
3219 }
3220 else
3221 {
3222 attrList.add(((LDIFAttribute) o).toAttribute());
3223 }
3224 }
3225
3226 return attrList;
3227 }
3228
3229
3230
3231 /**
3232 * Retrieves the bytes that make up the file referenced by the given URL.
3233 *
3234 * @param urlString The string representation of the URL to retrieve.
3235 * @param relativeBasePath The base path that will be prepended to relative
3236 * paths in order to obtain an absolute path.
3237 * @param firstLineNumber The line number for the start of the record.
3238 *
3239 * @return The bytes contained in the specified file, or an empty array if
3240 * the specified file is empty.
3241 *
3242 * @throws LDIFException If the provided URL is malformed or references a
3243 * nonexistent file.
3244 *
3245 * @throws IOException If a problem is encountered while attempting to read
3246 * from the target file.
3247 */
3248 private static byte[] retrieveURLBytes(final String urlString,
3249 final String relativeBasePath,
3250 final long firstLineNumber)
3251 throws LDIFException, IOException
3252 {
3253 int pos;
3254 String path;
3255 final String lowerURLString = toLowerCase(urlString);
3256 if (lowerURLString.startsWith("file:/"))
3257 {
3258 pos = 6;
3259 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/'))
3260 {
3261 pos++;
3262 }
3263
3264 path = urlString.substring(pos-1);
3265 }
3266 else if (lowerURLString.startsWith("file:"))
3267 {
3268 // A file: URL that doesn't include a slash will be interpreted as a
3269 // relative path.
3270 path = relativeBasePath + urlString.substring(5);
3271 }
3272 else
3273 {
3274 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString),
3275 firstLineNumber, true);
3276 }
3277
3278 final File f = new File(path);
3279 if (! f.exists())
3280 {
3281 throw new LDIFException(
3282 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()),
3283 firstLineNumber, true);
3284 }
3285
3286 // In order to conserve memory, we'll only allow values to be read from
3287 // files no larger than 10 megabytes.
3288 final long fileSize = f.length();
3289 if (fileSize > (10 * 1024 * 1024))
3290 {
3291 throw new LDIFException(
3292 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(),
3293 (10*1024*1024)),
3294 firstLineNumber, true);
3295 }
3296
3297 int fileBytesRemaining = (int) fileSize;
3298 final byte[] fileData = new byte[(int) fileSize];
3299 final FileInputStream fis = new FileInputStream(f);
3300 try
3301 {
3302 int fileBytesRead = 0;
3303 while (fileBytesRead < fileSize)
3304 {
3305 final int bytesRead =
3306 fis.read(fileData, fileBytesRead, fileBytesRemaining);
3307 if (bytesRead < 0)
3308 {
3309 // We hit the end of the file before we expected to. This shouldn't
3310 // happen unless the file size changed since we first looked at it,
3311 // which we won't allow.
3312 throw new LDIFException(
3313 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString,
3314 f.getAbsolutePath()),
3315 firstLineNumber, true);
3316 }
3317
3318 fileBytesRead += bytesRead;
3319 fileBytesRemaining -= bytesRead;
3320 }
3321
3322 if (fis.read() != -1)
3323 {
3324 // There is still more data to read. This shouldn't happen unless the
3325 // file size changed since we first looked at it, which we won't allow.
3326 throw new LDIFException(
3327 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()),
3328 firstLineNumber, true);
3329 }
3330 }
3331 finally
3332 {
3333 fis.close();
3334 }
3335
3336 return fileData;
3337 }
3338
3339
3340
3341 /**
3342 * Parses the data available through the provided iterator into an array of
3343 * modifications suitable for use in a modify change record.
3344 *
3345 * @param dn The DN of the entry being parsed.
3346 * @param trailingSpaceBehavior The behavior that should be exhibited when
3347 * encountering attribute values which are not
3348 * base64-encoded but contain trailing spaces.
3349 * @param ldifLines The lines that comprise the LDIF
3350 * representation of the full record being
3351 * parsed.
3352 * @param iterator The iterator to use to access the
3353 * modification data.
3354 * @param firstLineNumber The line number for the start of the record.
3355 * @param schema The schema to use in processing.
3356 *
3357 * @return An array containing the modifications that were read.
3358 *
3359 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3360 * set of modifications.
3361 */
3362 private static Modification[] parseModifications(final String dn,
3363 final TrailingSpaceBehavior trailingSpaceBehavior,
3364 final ArrayList<StringBuilder> ldifLines,
3365 final Iterator<StringBuilder> iterator,
3366 final long firstLineNumber, final Schema schema)
3367 throws LDIFException
3368 {
3369 final ArrayList<Modification> modList =
3370 new ArrayList<Modification>(ldifLines.size());
3371
3372 while (iterator.hasNext())
3373 {
3374 // The first line must start with "add:", "delete:", "replace:", or
3375 // "increment:" followed by an attribute name.
3376 StringBuilder line = iterator.next();
3377 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3378 int colonPos = line.indexOf(":");
3379 if (colonPos < 0)
3380 {
3381 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber),
3382 firstLineNumber, true, ldifLines, null);
3383 }
3384
3385 final ModificationType modType;
3386 final String modTypeStr = toLowerCase(line.substring(0, colonPos));
3387 if (modTypeStr.equals("add"))
3388 {
3389 modType = ModificationType.ADD;
3390 }
3391 else if (modTypeStr.equals("delete"))
3392 {
3393 modType = ModificationType.DELETE;
3394 }
3395 else if (modTypeStr.equals("replace"))
3396 {
3397 modType = ModificationType.REPLACE;
3398 }
3399 else if (modTypeStr.equals("increment"))
3400 {
3401 modType = ModificationType.INCREMENT;
3402 }
3403 else
3404 {
3405 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr,
3406 firstLineNumber),
3407 firstLineNumber, true, ldifLines, null);
3408 }
3409
3410 String attributeName;
3411 int length = line.length();
3412 if (length == (colonPos+1))
3413 {
3414 // The colon was the last character on the line. This is not
3415 // acceptable.
3416 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3417 firstLineNumber),
3418 firstLineNumber, true, ldifLines, null);
3419 }
3420 else if (line.charAt(colonPos+1) == ':')
3421 {
3422 // Skip over any spaces leading up to the value, and then the rest of
3423 // the string is the base64-encoded attribute name.
3424 int pos = colonPos+2;
3425 while ((pos < length) && (line.charAt(pos) == ' '))
3426 {
3427 pos++;
3428 }
3429
3430 try
3431 {
3432 final byte[] dnBytes = Base64.decode(line.substring(pos));
3433 attributeName = new String(dnBytes, "UTF-8");
3434 }
3435 catch (final ParseException pe)
3436 {
3437 debugException(pe);
3438 throw new LDIFException(
3439 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3440 firstLineNumber, pe.getMessage()),
3441 firstLineNumber, true, ldifLines, pe);
3442 }
3443 catch (final Exception e)
3444 {
3445 debugException(e);
3446 throw new LDIFException(
3447 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3448 firstLineNumber, e),
3449 firstLineNumber, true, ldifLines, e);
3450 }
3451 }
3452 else
3453 {
3454 // Skip over any spaces leading up to the value, and then the rest of
3455 // the string is the attribute name.
3456 int pos = colonPos+1;
3457 while ((pos < length) && (line.charAt(pos) == ' '))
3458 {
3459 pos++;
3460 }
3461
3462 attributeName = line.substring(pos);
3463 }
3464
3465 if (attributeName.length() == 0)
3466 {
3467 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3468 firstLineNumber),
3469 firstLineNumber, true, ldifLines, null);
3470 }
3471
3472
3473 // The next zero or more lines may be the set of attribute values. Keep
3474 // reading until we reach the end of the iterator or until we find a line
3475 // with just a "-".
3476 final ArrayList<ASN1OctetString> valueList =
3477 new ArrayList<ASN1OctetString>(ldifLines.size());
3478 while (iterator.hasNext())
3479 {
3480 line = iterator.next();
3481 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3482 if (line.toString().equals("-"))
3483 {
3484 break;
3485 }
3486
3487 colonPos = line.indexOf(":");
3488 if (colonPos < 0)
3489 {
3490 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
3491 firstLineNumber, true, ldifLines, null);
3492 }
3493 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName))
3494 {
3495 // There are a couple of cases in which this might be acceptable:
3496 // - If the two names are logically equivalent, but have an alternate
3497 // name (or OID) for the target attribute type, or if there are
3498 // attribute options and the options are just in a different order.
3499 // - If this is the first value for the target attribute and the
3500 // alternate name includes a "binary" option that the original
3501 // attribute name did not have. In this case, all subsequent values
3502 // will also be required to have the binary option.
3503 final String alternateName = line.substring(0, colonPos);
3504
3505
3506 // Check to see if the base names are equivalent.
3507 boolean baseNameEquivalent = false;
3508 final String expectedBaseName = Attribute.getBaseName(attributeName);
3509 final String alternateBaseName = Attribute.getBaseName(alternateName);
3510 if (alternateBaseName.equalsIgnoreCase(expectedBaseName))
3511 {
3512 baseNameEquivalent = true;
3513 }
3514 else
3515 {
3516 if (schema != null)
3517 {
3518 final AttributeTypeDefinition expectedAT =
3519 schema.getAttributeType(expectedBaseName);
3520 final AttributeTypeDefinition alternateAT =
3521 schema.getAttributeType(alternateBaseName);
3522 if ((expectedAT != null) && (alternateAT != null) &&
3523 expectedAT.equals(alternateAT))
3524 {
3525 baseNameEquivalent = true;
3526 }
3527 }
3528 }
3529
3530
3531 // Check to see if the attribute options are equivalent.
3532 final Set<String> expectedOptions =
3533 Attribute.getOptions(attributeName);
3534 final Set<String> lowerExpectedOptions =
3535 new HashSet<String>(expectedOptions.size());
3536 for (final String s : expectedOptions)
3537 {
3538 lowerExpectedOptions.add(toLowerCase(s));
3539 }
3540
3541 final Set<String> alternateOptions =
3542 Attribute.getOptions(alternateName);
3543 final Set<String> lowerAlternateOptions =
3544 new HashSet<String>(alternateOptions.size());
3545 for (final String s : alternateOptions)
3546 {
3547 lowerAlternateOptions.add(toLowerCase(s));
3548 }
3549
3550 final boolean optionsEquivalent =
3551 lowerAlternateOptions.equals(lowerExpectedOptions);
3552
3553
3554 if (baseNameEquivalent && optionsEquivalent)
3555 {
3556 // This is fine. The two attribute descriptions are logically
3557 // equivalent. We'll continue using the attribute description that
3558 // was provided first.
3559 }
3560 else if (valueList.isEmpty() && baseNameEquivalent &&
3561 lowerAlternateOptions.remove("binary") &&
3562 lowerAlternateOptions.equals(lowerExpectedOptions))
3563 {
3564 // This means that the provided value is the first value for the
3565 // attribute, and that the only significant difference is that the
3566 // provided attribute description included an unexpected "binary"
3567 // option. We'll accept this, but will require any additional
3568 // values for this modification to also include the binary option,
3569 // and we'll use the binary option in the attribute that is
3570 // eventually created.
3571 attributeName = alternateName;
3572 }
3573 else
3574 {
3575 // This means that either the base names are different or the sets
3576 // of options are incompatible. This is not acceptable.
3577 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get(
3578 firstLineNumber,
3579 line.substring(0, colonPos),
3580 attributeName),
3581 firstLineNumber, true, ldifLines, null);
3582 }
3583 }
3584
3585 length = line.length();
3586 final ASN1OctetString value;
3587 if (length == (colonPos+1))
3588 {
3589 // The colon was the last character on the line. This is fine.
3590 value = new ASN1OctetString();
3591 }
3592 else if (line.charAt(colonPos+1) == ':')
3593 {
3594 // Skip over any spaces leading up to the value, and then the rest of
3595 // the string is the base64-encoded value. This is unusual and
3596 // unnecessary, but is nevertheless acceptable.
3597 int pos = colonPos+2;
3598 while ((pos < length) && (line.charAt(pos) == ' '))
3599 {
3600 pos++;
3601 }
3602
3603 try
3604 {
3605 value = new ASN1OctetString(Base64.decode(line.substring(pos)));
3606 }
3607 catch (final ParseException pe)
3608 {
3609 debugException(pe);
3610 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3611 attributeName, firstLineNumber, pe.getMessage()),
3612 firstLineNumber, true, ldifLines, pe);
3613 }
3614 catch (final Exception e)
3615 {
3616 debugException(e);
3617 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3618 firstLineNumber, e),
3619 firstLineNumber, true, ldifLines, e);
3620 }
3621 }
3622 else
3623 {
3624 // Skip over any spaces leading up to the value, and then the rest of
3625 // the string is the value.
3626 int pos = colonPos+1;
3627 while ((pos < length) && (line.charAt(pos) == ' '))
3628 {
3629 pos++;
3630 }
3631
3632 value = new ASN1OctetString(line.substring(pos));
3633 }
3634
3635 valueList.add(value);
3636 }
3637
3638 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()];
3639 valueList.toArray(values);
3640
3641 // If it's an add modification type, then there must be at least one
3642 // value.
3643 if ((modType.intValue() == ModificationType.ADD.intValue()) &&
3644 (values.length == 0))
3645 {
3646 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName,
3647 firstLineNumber),
3648 firstLineNumber, true, ldifLines, null);
3649 }
3650
3651 // If it's an increment modification type, then there must be exactly one
3652 // value.
3653 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) &&
3654 (values.length != 1))
3655 {
3656 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get(
3657 firstLineNumber, attributeName),
3658 firstLineNumber, true, ldifLines, null);
3659 }
3660
3661 modList.add(new Modification(modType, attributeName, values));
3662 }
3663
3664 final Modification[] mods = new Modification[modList.size()];
3665 modList.toArray(mods);
3666 return mods;
3667 }
3668
3669
3670
3671 /**
3672 * Parses the data available through the provided iterator as the body of a
3673 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional
3674 * newsuperior lines).
3675 *
3676 * @param ldifLines The lines that comprise the LDIF
3677 * representation of the full record being
3678 * parsed.
3679 * @param iterator The iterator to use to access the modify DN
3680 * data.
3681 * @param dn The current DN of the entry.
3682 * @param controls The set of controls to include in the change
3683 * record.
3684 * @param trailingSpaceBehavior The behavior that should be exhibited when
3685 * encountering attribute values which are not
3686 * base64-encoded but contain trailing spaces.
3687 * @param firstLineNumber The line number for the start of the record.
3688 *
3689 * @return The decoded modify DN change record.
3690 *
3691 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3692 * modify DN change record.
3693 */
3694 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord(
3695 final ArrayList<StringBuilder> ldifLines,
3696 final Iterator<StringBuilder> iterator, final String dn,
3697 final List<Control> controls,
3698 final TrailingSpaceBehavior trailingSpaceBehavior,
3699 final long firstLineNumber)
3700 throws LDIFException
3701 {
3702 // The next line must be the new RDN, and it must start with "newrdn:".
3703 StringBuilder line = iterator.next();
3704 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3705 int colonPos = line.indexOf(":");
3706 if ((colonPos < 0) ||
3707 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn")))
3708 {
3709 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get(
3710 firstLineNumber),
3711 firstLineNumber, true, ldifLines, null);
3712 }
3713
3714 final String newRDN;
3715 int length = line.length();
3716 if (length == (colonPos+1))
3717 {
3718 // The colon was the last character on the line. This is not acceptable.
3719 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3720 firstLineNumber),
3721 firstLineNumber, true, ldifLines, null);
3722 }
3723 else if (line.charAt(colonPos+1) == ':')
3724 {
3725 // Skip over any spaces leading up to the value, and then the rest of the
3726 // string is the base64-encoded new RDN.
3727 int pos = colonPos+2;
3728 while ((pos < length) && (line.charAt(pos) == ' '))
3729 {
3730 pos++;
3731 }
3732
3733 try
3734 {
3735 final byte[] dnBytes = Base64.decode(line.substring(pos));
3736 newRDN = new String(dnBytes, "UTF-8");
3737 }
3738 catch (final ParseException pe)
3739 {
3740 debugException(pe);
3741 throw new LDIFException(
3742 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3743 pe.getMessage()),
3744 firstLineNumber, true, ldifLines, pe);
3745 }
3746 catch (final Exception e)
3747 {
3748 debugException(e);
3749 throw new LDIFException(
3750 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3751 e),
3752 firstLineNumber, true, ldifLines, e);
3753 }
3754 }
3755 else
3756 {
3757 // Skip over any spaces leading up to the value, and then the rest of the
3758 // string is the new RDN.
3759 int pos = colonPos+1;
3760 while ((pos < length) && (line.charAt(pos) == ' '))
3761 {
3762 pos++;
3763 }
3764
3765 newRDN = line.substring(pos);
3766 }
3767
3768 if (newRDN.length() == 0)
3769 {
3770 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3771 firstLineNumber),
3772 firstLineNumber, true, ldifLines, null);
3773 }
3774
3775
3776 // The next line must be the deleteOldRDN flag, and it must start with
3777 // 'deleteoldrdn:'.
3778 if (! iterator.hasNext())
3779 {
3780 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3781 firstLineNumber),
3782 firstLineNumber, true, ldifLines, null);
3783 }
3784
3785 line = iterator.next();
3786 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3787 colonPos = line.indexOf(":");
3788 if ((colonPos < 0) ||
3789 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn")))
3790 {
3791 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3792 firstLineNumber),
3793 firstLineNumber, true, ldifLines, null);
3794 }
3795
3796 final String deleteOldRDNStr;
3797 length = line.length();
3798 if (length == (colonPos+1))
3799 {
3800 // The colon was the last character on the line. This is not acceptable.
3801 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get(
3802 firstLineNumber),
3803 firstLineNumber, true, ldifLines, null);
3804 }
3805 else if (line.charAt(colonPos+1) == ':')
3806 {
3807 // Skip over any spaces leading up to the value, and then the rest of the
3808 // string is the base64-encoded value. This is unusual and
3809 // unnecessary, but is nevertheless acceptable.
3810 int pos = colonPos+2;
3811 while ((pos < length) && (line.charAt(pos) == ' '))
3812 {
3813 pos++;
3814 }
3815
3816 try
3817 {
3818 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3819 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8");
3820 }
3821 catch (final ParseException pe)
3822 {
3823 debugException(pe);
3824 throw new LDIFException(
3825 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3826 firstLineNumber, pe.getMessage()),
3827 firstLineNumber, true, ldifLines, pe);
3828 }
3829 catch (final Exception e)
3830 {
3831 debugException(e);
3832 throw new LDIFException(
3833 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3834 firstLineNumber, e),
3835 firstLineNumber, true, ldifLines, e);
3836 }
3837 }
3838 else
3839 {
3840 // Skip over any spaces leading up to the value, and then the rest of the
3841 // string is the value.
3842 int pos = colonPos+1;
3843 while ((pos < length) && (line.charAt(pos) == ' '))
3844 {
3845 pos++;
3846 }
3847
3848 deleteOldRDNStr = line.substring(pos);
3849 }
3850
3851 final boolean deleteOldRDN;
3852 if (deleteOldRDNStr.equals("0"))
3853 {
3854 deleteOldRDN = false;
3855 }
3856 else if (deleteOldRDNStr.equals("1"))
3857 {
3858 deleteOldRDN = true;
3859 }
3860 else if (deleteOldRDNStr.equalsIgnoreCase("false") ||
3861 deleteOldRDNStr.equalsIgnoreCase("no"))
3862 {
3863 // This is technically illegal, but we'll allow it.
3864 deleteOldRDN = false;
3865 }
3866 else if (deleteOldRDNStr.equalsIgnoreCase("true") ||
3867 deleteOldRDNStr.equalsIgnoreCase("yes"))
3868 {
3869 // This is also technically illegal, but we'll allow it.
3870 deleteOldRDN = false;
3871 }
3872 else
3873 {
3874 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get(
3875 deleteOldRDNStr, firstLineNumber),
3876 firstLineNumber, true, ldifLines, null);
3877 }
3878
3879
3880 // If there is another line, then it must be the new superior DN and it must
3881 // start with "newsuperior:". If this is absent, then it's fine.
3882 final String newSuperiorDN;
3883 if (iterator.hasNext())
3884 {
3885 line = iterator.next();
3886 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3887 colonPos = line.indexOf(":");
3888 if ((colonPos < 0) ||
3889 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior")))
3890 {
3891 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get(
3892 firstLineNumber),
3893 firstLineNumber, true, ldifLines, null);
3894 }
3895
3896 length = line.length();
3897 if (length == (colonPos+1))
3898 {
3899 // The colon was the last character on the line. This is fine.
3900 newSuperiorDN = "";
3901 }
3902 else if (line.charAt(colonPos+1) == ':')
3903 {
3904 // Skip over any spaces leading up to the value, and then the rest of
3905 // the string is the base64-encoded new superior DN.
3906 int pos = colonPos+2;
3907 while ((pos < length) && (line.charAt(pos) == ' '))
3908 {
3909 pos++;
3910 }
3911
3912 try
3913 {
3914 final byte[] dnBytes = Base64.decode(line.substring(pos));
3915 newSuperiorDN = new String(dnBytes, "UTF-8");
3916 }
3917 catch (final ParseException pe)
3918 {
3919 debugException(pe);
3920 throw new LDIFException(
3921 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3922 firstLineNumber, pe.getMessage()),
3923 firstLineNumber, true, ldifLines, pe);
3924 }
3925 catch (final Exception e)
3926 {
3927 debugException(e);
3928 throw new LDIFException(
3929 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3930 firstLineNumber, e),
3931 firstLineNumber, true, ldifLines, e);
3932 }
3933 }
3934 else
3935 {
3936 // Skip over any spaces leading up to the value, and then the rest of
3937 // the string is the new superior DN.
3938 int pos = colonPos+1;
3939 while ((pos < length) && (line.charAt(pos) == ' '))
3940 {
3941 pos++;
3942 }
3943
3944 newSuperiorDN = line.substring(pos);
3945 }
3946 }
3947 else
3948 {
3949 newSuperiorDN = null;
3950 }
3951
3952
3953 // There must not be any more lines.
3954 if (iterator.hasNext())
3955 {
3956 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber),
3957 firstLineNumber, true, ldifLines, null);
3958 }
3959
3960 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN,
3961 newSuperiorDN, controls);
3962 }
3963
3964
3965
3966 /**
3967 * Examines the line contained in the provided buffer to determine whether it
3968 * may contain one or more illegal trailing spaces. If it does, then those
3969 * spaces will either be stripped out or an exception will be thrown to
3970 * indicate that they are illegal.
3971 *
3972 * @param buffer The buffer to be examined.
3973 * @param dn The DN of the LDIF record being parsed. It
3974 * may be {@code null} if the DN is not yet
3975 * known (e.g., because the provided line is
3976 * expected to contain that DN).
3977 * @param firstLineNumber The approximate line number in the LDIF
3978 * source on which the LDIF record begins.
3979 * @param trailingSpaceBehavior The behavior that should be exhibited when
3980 * encountering attribute values which are not
3981 * base64-encoded but contain trailing spaces.
3982 *
3983 * @throws LDIFException If the line contained in the provided buffer ends
3984 * with one or more illegal trailing spaces and
3985 * {@code stripTrailingSpaces} was provided with a
3986 * value of {@code false}.
3987 */
3988 private static void handleTrailingSpaces(final StringBuilder buffer,
3989 final String dn, final long firstLineNumber,
3990 final TrailingSpaceBehavior trailingSpaceBehavior)
3991 throws LDIFException
3992 {
3993 int pos = buffer.length() - 1;
3994 boolean trailingFound = false;
3995 while ((pos >= 0) && (buffer.charAt(pos) == ' '))
3996 {
3997 trailingFound = true;
3998 pos--;
3999 }
4000
4001 if (trailingFound && (buffer.charAt(pos) != ':'))
4002 {
4003 switch (trailingSpaceBehavior)
4004 {
4005 case STRIP:
4006 buffer.setLength(pos+1);
4007 break;
4008
4009 case REJECT:
4010 if (dn == null)
4011 {
4012 throw new LDIFException(
4013 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber,
4014 buffer.toString()),
4015 firstLineNumber, true);
4016 }
4017 else
4018 {
4019 throw new LDIFException(
4020 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn,
4021 firstLineNumber, buffer.toString()),
4022 firstLineNumber, true);
4023 }
4024
4025 case RETAIN:
4026 default:
4027 // No action will be taken.
4028 break;
4029 }
4030 }
4031 }
4032
4033
4034
4035 /**
4036 * This represents an unparsed LDIFRecord. It stores the line number of the
4037 * first line of the record and each line of the record.
4038 */
4039 private static final class UnparsedLDIFRecord
4040 {
4041 private final ArrayList<StringBuilder> lineList;
4042 private final long firstLineNumber;
4043 private final Exception failureCause;
4044 private final boolean isEOF;
4045 private final DuplicateValueBehavior duplicateValueBehavior;
4046 private final Schema schema;
4047 private final TrailingSpaceBehavior trailingSpaceBehavior;
4048
4049
4050
4051 /**
4052 * Constructor.
4053 *
4054 * @param lineList The lines that comprise the LDIF record.
4055 * @param duplicateValueBehavior The behavior to exhibit if the entry
4056 * contains duplicate attribute values.
4057 * @param trailingSpaceBehavior Specifies the behavior to exhibit when
4058 * encountering trailing spaces in
4059 * non-base64-encoded attribute values.
4060 * @param schema The schema to use when parsing, if
4061 * applicable.
4062 * @param firstLineNumber The first line number of the LDIF record.
4063 */
4064 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList,
4065 final DuplicateValueBehavior duplicateValueBehavior,
4066 final TrailingSpaceBehavior trailingSpaceBehavior,
4067 final Schema schema, final long firstLineNumber)
4068 {
4069 this.lineList = lineList;
4070 this.firstLineNumber = firstLineNumber;
4071 this.duplicateValueBehavior = duplicateValueBehavior;
4072 this.trailingSpaceBehavior = trailingSpaceBehavior;
4073 this.schema = schema;
4074
4075 failureCause = null;
4076 isEOF =
4077 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty());
4078 }
4079
4080
4081
4082 /**
4083 * Constructor.
4084 *
4085 * @param failureCause The Exception thrown when reading from the input.
4086 */
4087 private UnparsedLDIFRecord(final Exception failureCause)
4088 {
4089 this.failureCause = failureCause;
4090
4091 lineList = null;
4092 firstLineNumber = 0;
4093 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
4094 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
4095 schema = null;
4096 isEOF = false;
4097 }
4098
4099
4100
4101 /**
4102 * Return the lines that comprise the LDIF record.
4103 *
4104 * @return The lines that comprise the LDIF record.
4105 */
4106 private ArrayList<StringBuilder> getLineList()
4107 {
4108 return lineList;
4109 }
4110
4111
4112
4113 /**
4114 * Retrieves the behavior to exhibit when encountering duplicate attribute
4115 * values.
4116 *
4117 * @return The behavior to exhibit when encountering duplicate attribute
4118 * values.
4119 */
4120 private DuplicateValueBehavior getDuplicateValueBehavior()
4121 {
4122 return duplicateValueBehavior;
4123 }
4124
4125
4126
4127 /**
4128 * Retrieves the behavior that should be exhibited when encountering
4129 * attribute values which are not base64-encoded but contain trailing
4130 * spaces. The LDIF specification strongly recommends that any value which
4131 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK
4132 * LDIF parser may be configured to automatically strip these spaces, to
4133 * preserve them, or to reject any entry or change record containing them.
4134 *
4135 * @return The behavior that should be exhibited when encountering
4136 * attribute values which are not base64-encoded but contain
4137 * trailing spaces.
4138 */
4139 private TrailingSpaceBehavior getTrailingSpaceBehavior()
4140 {
4141 return trailingSpaceBehavior;
4142 }
4143
4144
4145
4146 /**
4147 * Retrieves the schema that should be used when parsing the record, if
4148 * applicable.
4149 *
4150 * @return The schema that should be used when parsing the record, or
4151 * {@code null} if none should be used.
4152 */
4153 private Schema getSchema()
4154 {
4155 return schema;
4156 }
4157
4158
4159
4160 /**
4161 * Return the first line number of the LDIF record.
4162 *
4163 * @return The first line number of the LDIF record.
4164 */
4165 private long getFirstLineNumber()
4166 {
4167 return firstLineNumber;
4168 }
4169
4170
4171
4172 /**
4173 * Return {@code true} iff the end of the input was reached.
4174 *
4175 * @return {@code true} iff the end of the input was reached.
4176 */
4177 private boolean isEOF()
4178 {
4179 return isEOF;
4180 }
4181
4182
4183
4184 /**
4185 * Returns the reason that reading the record lines failed. This normally
4186 * is only non-null if something bad happened to the input stream (like
4187 * a disk read error).
4188 *
4189 * @return The reason that reading the record lines failed.
4190 */
4191 private Exception getFailureCause()
4192 {
4193 return failureCause;
4194 }
4195 }
4196
4197
4198 /**
4199 * When processing in asynchronous mode, this thread is responsible for
4200 * reading the raw unparsed records from the input and submitting them for
4201 * processing.
4202 */
4203 private final class LineReaderThread
4204 extends Thread
4205 {
4206 /**
4207 * Constructor.
4208 */
4209 private LineReaderThread()
4210 {
4211 super("Asynchronous LDIF line reader");
4212 setDaemon(true);
4213 }
4214
4215
4216
4217 /**
4218 * Reads raw, unparsed records from the input and submits them for
4219 * processing until the input is finished or closed.
4220 */
4221 @Override()
4222 public void run()
4223 {
4224 try
4225 {
4226 boolean stopProcessing = false;
4227 while (!stopProcessing)
4228 {
4229 UnparsedLDIFRecord unparsedRecord = null;
4230 try
4231 {
4232 unparsedRecord = readUnparsedRecord();
4233 }
4234 catch (IOException e)
4235 {
4236 debugException(e);
4237 unparsedRecord = new UnparsedLDIFRecord(e);
4238 stopProcessing = true;
4239 }
4240 catch (Exception e)
4241 {
4242 debugException(e);
4243 unparsedRecord = new UnparsedLDIFRecord(e);
4244 }
4245
4246 try
4247 {
4248 asyncParser.submit(unparsedRecord);
4249 }
4250 catch (InterruptedException e)
4251 {
4252 debugException(e);
4253 // If this thread is interrupted, then someone wants us to stop
4254 // processing, so that's what we'll do.
4255 stopProcessing = true;
4256 }
4257
4258 if ((unparsedRecord == null) || (unparsedRecord.isEOF()))
4259 {
4260 stopProcessing = true;
4261 }
4262 }
4263 }
4264 finally
4265 {
4266 try
4267 {
4268 asyncParser.shutdown();
4269 }
4270 catch (InterruptedException e)
4271 {
4272 debugException(e);
4273 }
4274 finally
4275 {
4276 asyncParsingComplete.set(true);
4277 }
4278 }
4279 }
4280 }
4281
4282
4283
4284 /**
4285 * Used to parse Records asynchronously.
4286 */
4287 private final class RecordParser implements Processor<UnparsedLDIFRecord,
4288 LDIFRecord>
4289 {
4290 /**
4291 * {@inheritDoc}
4292 */
4293 public LDIFRecord process(final UnparsedLDIFRecord input)
4294 throws LDIFException
4295 {
4296 LDIFRecord record = decodeRecord(input, relativeBasePath, schema);
4297
4298 if ((record instanceof Entry) && (entryTranslator != null))
4299 {
4300 record = entryTranslator.translate((Entry) record,
4301 input.getFirstLineNumber());
4302
4303 if (record == null)
4304 {
4305 record = SKIP_ENTRY;
4306 }
4307 }
4308 if ((record instanceof LDIFChangeRecord) &&
4309 (changeRecordTranslator != null))
4310 {
4311 record = changeRecordTranslator.translate((LDIFChangeRecord) record,
4312 input.getFirstLineNumber());
4313
4314 if (record == null)
4315 {
4316 record = SKIP_ENTRY;
4317 }
4318 }
4319 return record;
4320 }
4321 }
4322 }